|
import os |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
|
|
from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, tool |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
|
|
|
|
@tool |
|
def summarize_query(query: str) -> str: |
|
""" |
|
Provides a structured summary to reframe a query if search results are unclear or poor. |
|
|
|
Args: |
|
query (str): The search query that needs summarization. |
|
|
|
Returns: |
|
str: A concise summary of key facts about the given query. |
|
""" |
|
return f"Summarize and reframe: {query}" |
|
|
|
search_tool = DuckDuckGoSearchTool() |
|
|
|
|
|
|
|
system_message = """ |
|
You are a ReACT agent with scratchpad memory and a retry mechanism. |
|
|
|
For every question: |
|
1. Thought: Think what is needed. |
|
2. Action: (Optional) Use a tool with a clear query. |
|
3. Observation: Record what tool returned. |
|
|
|
If the first Observation is empty or irrelevant: |
|
4. Thought: The result was unclear. I should reframe and retry. |
|
5. Action: summarize_query with the original query. |
|
6. Action: DuckDuckGoSearchTool with the reframed query. |
|
7. Observation: Record new result. |
|
|
|
Then: |
|
8. Thought: Reflect on all observations. |
|
9. FINAL ANSWER: Provide the answer. |
|
|
|
Formatting Rules: |
|
- Begin with FINAL ANSWER: [your answer] |
|
- Numbers: plain (no commas unless list) |
|
- Strings: no articles unless inside proper names |
|
- Lists: comma-separated without extra punctuation |
|
""" |
|
|
|
|
|
|
|
smart_agent = CodeAgent( |
|
tools=[search_tool, summarize_query], |
|
model=HfApiModel(), |
|
system_message=system_message |
|
) |
|
|
|
|
|
|
|
class BasicAgent: |
|
def __init__(self): |
|
print("SmolAgent with ReACT, Scratchpad & Retry initialized.") |
|
|
|
def __call__(self, question: str) -> str: |
|
print(f"Agent received question (first 50 chars): {question[:50]}...") |
|
return smart_agent.run(question) |
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
space_id = os.getenv("SPACE_ID") |
|
if profile: |
|
username = profile.username |
|
else: |
|
return "Please log in to Hugging Face using the button above.", None |
|
|
|
agent = BasicAgent() |
|
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" |
|
|
|
|
|
try: |
|
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) |
|
resp.raise_for_status() |
|
questions = resp.json() |
|
if not questions: |
|
return "No questions fetched.", None |
|
except Exception as e: |
|
return f"Error fetching questions: {e}", None |
|
|
|
|
|
logs, payload = [], [] |
|
for item in questions: |
|
tid = item.get("task_id") |
|
q = item.get("question") |
|
if not tid or q is None: |
|
continue |
|
try: |
|
ans = agent(q) |
|
except Exception as e: |
|
ans = f"AGENT ERROR: {e}" |
|
logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans}) |
|
payload.append({"task_id": tid, "submitted_answer": ans}) |
|
|
|
if not payload: |
|
return "Agent did not produce any answers to submit.", pd.DataFrame(logs) |
|
|
|
|
|
sub = {"username": username, "agent_code": agent_code, "answers": payload} |
|
try: |
|
r = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60) |
|
r.raise_for_status() |
|
res = r.json() |
|
status = ( |
|
f"Submission Successful!\n" |
|
f"User: {res.get('username')}\n" |
|
f"Score: {res.get('score', 'N/A')}% " |
|
f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})" |
|
) |
|
return status, pd.DataFrame(logs) |
|
except Exception as e: |
|
return f"Submission Failed: {e}", pd.DataFrame(logs) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀") |
|
gr.Markdown("1. Clone this space 2. Log in 3. Click **Run Evaluation & Submit All Answers**") |
|
gr.LoginButton() |
|
btn = gr.Button("Run Evaluation & Submit All Answers") |
|
out_status = gr.Textbox(label="Status", lines=5, interactive=False) |
|
out_table = gr.DataFrame(label="Results") |
|
btn.click(fn=run_and_submit_all, outputs=[out_status, out_table]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True, share=False) |
|
|