File size: 4,253 Bytes
0412d41
 
 
 
 
98bf598
10e9b7d
0412d41
 
 
 
e80aab9
98bf598
 
31243f4
0412d41
 
 
 
 
 
 
31243f4
98bf598
 
 
 
2ae2c93
98bf598
2ae2c93
98bf598
 
2ae2c93
 
 
98bf598
2ae2c93
 
 
 
0412d41
98bf598
0412d41
2ae2c93
 
 
 
 
 
 
 
98bf598
 
0412d41
98bf598
 
 
2ae2c93
 
98bf598
0412d41
2ae2c93
0412d41
 
 
2ae2c93
0412d41
 
2ae2c93
a058371
0412d41
 
 
2ae2c93
 
 
a058371
0412d41
 
2ae2c93
0412d41
a058371
 
 
 
 
0412d41
 
 
2ae2c93
a058371
 
 
 
 
0412d41
 
a058371
0412d41
a058371
 
 
 
 
2ae2c93
a058371
2ae2c93
a058371
0412d41
2ae2c93
 
 
a058371
0412d41
a058371
 
2ae2c93
 
0412d41
a058371
0412d41
a058371
0412d41
a058371
0412d41
 
 
2ae2c93
0412d41
a058371
2ae2c93
a058371
 
0412d41
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import gradio as gr
import requests
import pandas as pd

from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, tool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Tool Definitions ---

@tool
def summarize_query(query: str) -> str:
    """
    Provides a structured summary to reframe a query if search results are unclear or poor.

    Args:
        query (str): The search query that needs summarization.

    Returns:
        str: A concise summary of key facts about the given query.
    """
    return f"Summarize and reframe: {query}"

search_tool = DuckDuckGoSearchTool()

# --- ReACT + Scratchpad + Retry Prompt ---

system_prompt = """
You are a ReACT agent with scratchpad memory and a retry mechanism.

1. Thought: Figure out what's needed.
2. Action: (Optional) Call a tool with a precise query.
3. Observation: Record tool output.

If the first Observation is empty/irrelevant:
4. Thought: Unclear result, reframe and retry.
5. Action: summarize_query(original query).
6. Action: DuckDuckGoSearchTool(reframed query).
7. Observation: Record new result.

Then:
8. Thought: Reflect using all observations.
9. FINAL ANSWER: Provide your answer.

Formatting:
- Start with FINAL ANSWER: [your answer]
- Numbers plain (no commas unless list)
- Strings no articles unless part of proper names
- Lists comma-separated, no extra punctuation
"""

# --- Build the Smart Agent ---

smart_agent = CodeAgent(
    tools=[search_tool, summarize_query],
    model=HfApiModel(),             # no prompt here
    system_prompt=system_prompt     # prompt passed to CodeAgent
)

# --- Hook into Gradio App ---

class BasicAgent:
    def __init__(self):
        print("SmolAgent (ReACT + Scratchpad + Retry) initialized.")

    def __call__(self, question: str) -> str:
        print(f"Q: {question[:50]}...")
        return smart_agent.run(question)

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if not profile:
        return "Please log in with Hugging Face.", None
    username = profile.username
    agent = BasicAgent()
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 1. Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
        if not questions:
            return "No questions fetched.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Run agent
    logs, payload = [], []
    for item in questions:
        tid = item.get("task_id")
        q = item.get("question")
        if not tid or q is None:
            continue
        try:
            ans = agent(q)
        except Exception as e:
            ans = f"AGENT ERROR: {e}"
        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "Agent did not produce any answers.", pd.DataFrame(logs)

    # 3. Submit
    sub = {"username": username, "agent_code": agent_code, "answers": payload}
    try:
        post = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
        post.raise_for_status()
        res = post.json()
        status = (
            f"Submission Successful!\n"
            f"User: {res.get('username')}\n"
            f"Score: {res.get('score', 'N/A')}% "
            f"({res.get('correct_count', '?')}/"
            f"{res.get('total_attempted', '?')})"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(logs)

# --- Gradio UI ---

with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
    gr.Markdown("1) Clone this space  2) Log in  3) Run Evaluation & Submit All Answers")
    gr.LoginButton()
    btn = gr.Button("Run Evaluation & Submit All Answers")
    out_status = gr.Textbox(label="Run Status", lines=5, interactive=False)
    out_table  = gr.DataFrame(label="Results")
    btn.click(fn=run_and_submit_all, outputs=[out_status, out_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)