import os import gradio as gr import requests import pandas as pd from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, tool # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- Tool Definitions --- @tool def summarize_query(query: str) -> str: """ Provides a structured summary to reframe a query if search results are unclear or poor. Args: query (str): The search query that needs summarization. Returns: str: A concise summary of key facts about the given query. """ return f"Summarize and reframe: {query}" search_tool = DuckDuckGoSearchTool() # --- System Prompt for ReACT + Scratchpad + Auto-Retry --- system_message = """ You are a ReACT agent with scratchpad memory and a retry mechanism. For every question: 1. Thought: Think what is needed. 2. Action: (Optional) Use a tool with a clear query. 3. Observation: Record what tool returned. If the first Observation is empty or irrelevant: 4. Thought: The result was unclear. I should reframe and retry. 5. Action: summarize_query with the original query. 6. Action: DuckDuckGoSearchTool with the reframed query. 7. Observation: Record new result. Then: 8. Thought: Reflect on all observations. 9. FINAL ANSWER: Provide the answer. Formatting Rules: - Begin with FINAL ANSWER: [your answer] - Numbers: plain (no commas unless list) - Strings: no articles unless inside proper names - Lists: comma-separated without extra punctuation Example scratchpad flow: Thought: Need fruits from painting. Action: DuckDuckGoSearchTool('fruits in Embroidery from Uzbekistan painting') Observation: (empty) Thought: Unclear result, retry. Action: summarize_query('fruits in Embroidery painting Uzbekistan') Observation: pomegranate, apple, grape Thought: Find breakfast fruits. Action: DuckDuckGoSearchTool('breakfast menu October 1949 SS Ile de France') Observation: grapes, apples, oranges Thought: Overlap is grapes and apples. FINAL ANSWER: grapes, apples """ # --- Build the Smart Agent --- smart_agent = CodeAgent( tools=[search_tool, summarize_query], model=HfApiModel(system_message=system_message) # <-- key fix here ) # --- Integrate into Gradio App --- class BasicAgent: def __init__(self): print("SmolAgent with ReACT, Scratchpad & Retry initialized.") def __call__(self, question: str) -> str: print(f"Agent received question (first 50 chars): {question[:50]}...") answer = smart_agent.run(question) print(f"Agent returning answer: {answer}") return answer def run_and_submit_all(profile: gr.OAuthProfile | None): space_id = os.getenv("SPACE_ID") if profile: username = profile.username print(f"User logged in: {username}") else: print("User not logged in.") return "Please log in to Hugging Face using the button above.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" try: agent = BasicAgent() except Exception as e: return f"Error initializing agent: {e}", None agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" # Fetch questions try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() if not questions_data: return "Fetched questions list is empty or invalid.", None except Exception as e: return f"Error fetching questions: {e}", None # Run agent on each question results_log = [] answers_payload = [] for item in questions_data: task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: continue try: submitted_answer = agent(question_text) answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer }) except Exception as e: results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}" }) if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # Submit answers submission_data = { "username": username, "agent_code": agent_code, "answers": answers_payload } try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() final_status = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}% " f"({result_data.get('correct_count', '?')}/" f"{result_data.get('total_attempted', '?')} correct)\n" f"Message: {result_data.get('message', '')}" ) results_df = pd.DataFrame(results_log) return final_status, results_df except Exception as e: results_df = pd.DataFrame(results_log) return f"Submission Failed: {e}", results_df # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀") gr.Markdown( """ **Instructions:** 1. Clone this space and modify if needed. 2. Log in to Hugging Face. 3. Click 'Run Evaluation & Submit All Answers'. **Note:** Evaluation can take a few minutes. """ ) gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) run_button.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "-"*30 + " App Starting " + "-"*30) space_host = os.getenv("SPACE_HOST") space_id = os.getenv("SPACE_ID") if space_host: print(f"SPACE_HOST: {space_host}") if space_id: print(f"SPACE_ID: {space_id}") print("Launching Gradio Interface...") demo.launch(debug=True, share=False)