import os import logging import openai from openai import OpenAI import gradio as gr import requests import pandas as pd from smolagents import CodeAgent, DuckDuckGoSearchTool, tool # --- Setup logging --- logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" MAX_PROMPT_LENGTH = 15000 # characters, naive cap to avoid token overflow # --- Configure OpenAI SDK & Client --- openai_api_key = os.getenv("OPENAI_API_KEY") if not openai_api_key: raise RuntimeError("Environment variable OPENAI_API_KEY is required.") openai.api_key = openai_api_key client = OpenAI() OPENAI_MODEL_ID = os.getenv("OPENAI_MODEL_ID", "gpt-4.1") # --- Adapter so CodeAgent can call the OpenAI client correctly --- class OpenAIModelWrapper: def __init__(self, model_id: str, client: OpenAI): self.model_id = model_id self.client = client def __call__(self, prompt: str, **kwargs) -> str: try: resp = self.client.responses.create( model=self.model_id, input=prompt ) return getattr(resp, "output_text", str(resp)) except Exception as e: logger.exception("OpenAI inference error") return f"AGENT ERROR (inference): {e}" # --- Tool Definitions --- @tool def summarize_query(query: str) -> str: """ Reframes an unclear search query to improve relevance. Args: query (str): The original search query needing refinement. Returns: str: A concise, improved version of the query. """ return f"Summarize and reframe: {query}" @tool def wikipedia_search(page: str) -> str: """ Fetches the summary extract of an English Wikipedia page via the REST API. Args: page (str): Title of the Wikipedia page (e.g. 'Mercedes_Sosa_discography'). Returns: str: The page's summary (or an error message). """ try: url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}" resp = requests.get(url, timeout=10) resp.raise_for_status() extract = resp.json().get("extract", "") if not extract: logger.warning("Wikipedia returned empty extract for %s", page) return extract except Exception as e: logger.exception("Wikipedia lookup failed") return f"Wikipedia error: {e}" # Instantiate tools search_tool = DuckDuckGoSearchTool() wiki_tool = wikipedia_search summarize_tool = summarize_query # --- ReACT Prompt --- instruction_prompt = """ You are a ReACT agent with three tools: • DuckDuckGoSearchTool(query: str) • wikipedia_search(page: str) • summarize_query(query: str) Internally, for each question: 1. Thought: decide which tool to call. 2. Action: call the chosen tool. 3. Observation: record the result. 4. If empty/irrelevant: Thought: retry with summarize_query + DuckDuckGoSearchTool. Record new Observation. 5. Thought: integrate observations. Finally, output your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. """ # --- Build CodeAgent --- llm_wrapper = OpenAIModelWrapper(model_id=OPENAI_MODEL_ID, client=client) smart_agent = CodeAgent( tools=[search_tool, wiki_tool, summarize_tool], model=llm_wrapper ) # --- BasicAgent for Gradio --- class BasicAgent: def __init__(self): logger.info("Initialized SmolAgent (GPT-4.1) with ReACT & tools.") def __call__(self, question: str) -> str: # Validate question if not question or len(question.strip()) == 0: return "AGENT ERROR: Empty question." # Build and truncate prompt prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip() if len(prompt) > MAX_PROMPT_LENGTH: prompt = prompt[:MAX_PROMPT_LENGTH] # naive trim logger.warning("Prompt truncated to %d chars", MAX_PROMPT_LENGTH) try: return smart_agent.run(prompt) except Exception as e: logger.exception("Agent run error") return f"AGENT ERROR (run): {e}" # --- Submission logic --- def run_and_submit_all(profile: gr.OAuthProfile | None): if not profile: return "Please log in to Hugging Face.", None username = profile.username space_id = os.getenv("SPACE_ID", "") agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" agent = BasicAgent() # Fetch questions try: resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) resp.raise_for_status() questions = resp.json() if not isinstance(questions, list): raise ValueError("Invalid questions format") logger.info("Fetched %d questions", len(questions)) except Exception as e: logger.exception("Failed to fetch questions") return f"Error fetching questions: {e}", None # Run agent logs, payload, skipped = [], [], 0 for item in questions: tid = item.get("task_id") q = item.get("question") if not tid or not q: skipped += 1 continue ans = agent(q) logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) if skipped: logger.warning("Skipped %d malformed items", skipped) if not payload: return "Agent did not produce any answers.", pd.DataFrame(logs) # Submit answers submission = {"username": username, "agent_code": agent_code, "answers": payload} try: post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60) post.raise_for_status() res = post.json() status = ( f"Submission Successful!\n" f"User: {res.get('username')}\n" f"Score: {res.get('score','N/A')}% " f"({res.get('correct_count','?')}/{res.get('total_attempted','?')})\n" f"Message: {res.get('message','')}" ) return status, pd.DataFrame(logs) except Exception as e: logger.exception("Submission failed") return f"Submission Failed: {e}", pd.DataFrame(logs) # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown("# SmolAgent GAIA Runner 🚀") gr.Markdown(""" **Instructions:** 1. Clone this space. 2. Add `OPENAI_API_KEY` and optionally `OPENAI_MODEL_ID` in Settings → Secrets. 3. Log in to Hugging Face. 4. Click **Run Evaluation & Submit All Answers**. """) gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit All Answers") status_out = gr.Textbox(label="Status", lines=5, interactive=False) table_out = gr.DataFrame(label="Questions & Answers", wrap=True) run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out]) if __name__ == "__main__": demo.launch(debug=True, share=False)