File size: 4,397 Bytes
0412d41
 
 
 
 
98bf598
10e9b7d
0412d41
 
 
 
e80aab9
98bf598
 
31243f4
0412d41
 
 
 
 
 
 
31243f4
98bf598
 
 
 
a058371
98bf598
773749a
98bf598
 
 
 
 
 
 
0412d41
 
 
 
 
98bf598
0412d41
 
 
98bf598
0412d41
 
 
 
 
98bf598
 
0412d41
98bf598
 
 
a058371
 
98bf598
0412d41
 
 
 
 
 
 
 
 
a058371
0412d41
 
 
 
 
 
 
 
a058371
0412d41
 
 
 
a058371
 
 
 
 
0412d41
 
 
a058371
 
 
 
 
 
0412d41
 
a058371
0412d41
a058371
 
 
 
 
 
 
 
 
0412d41
a058371
 
 
 
0412d41
a058371
 
 
0412d41
a058371
0412d41
a058371
0412d41
a058371
0412d41
 
 
a058371
0412d41
a058371
 
 
 
0412d41
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import gradio as gr
import requests
import pandas as pd

from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, tool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Tool Definitions ---

@tool
def summarize_query(query: str) -> str:
    """
    Provides a structured summary to reframe a query if search results are unclear or poor.

    Args:
        query (str): The search query that needs summarization.

    Returns:
        str: A concise summary of key facts about the given query.
    """
    return f"Summarize and reframe: {query}"

search_tool = DuckDuckGoSearchTool()

# --- System Message for ReACT + Scratchpad + Auto-Retry ---

system_message = """
You are a ReACT agent with scratchpad memory and a retry mechanism.

For every question:
1. Thought: Think what is needed.
2. Action: (Optional) Use a tool with a clear query.
3. Observation: Record what tool returned.

If the first Observation is empty or irrelevant:
4. Thought: The result was unclear. I should reframe and retry.
5. Action: summarize_query with the original query.
6. Action: DuckDuckGoSearchTool with the reframed query.
7. Observation: Record new result.

Then:
8. Thought: Reflect on all observations.
9. FINAL ANSWER: Provide the answer.

Formatting Rules:
- Begin with FINAL ANSWER: [your answer]
- Numbers: plain (no commas unless list)
- Strings: no articles unless inside proper names
- Lists: comma-separated without extra punctuation
"""

# --- Build the Smart Agent ---

smart_agent = CodeAgent(
    tools=[search_tool, summarize_query],
    model=HfApiModel(),                 # no prompt here
    system_message=system_message       # moved into CodeAgent
)

# --- Integrate into Gradio App ---

class BasicAgent:
    def __init__(self):
        print("SmolAgent with ReACT, Scratchpad & Retry initialized.")

    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        return smart_agent.run(question)

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
    else:
        return "Please log in to Hugging Face using the button above.", None

    agent = BasicAgent()
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
        if not questions:
            return "No questions fetched.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Run agent
    logs, payload = [], []
    for item in questions:
        tid = item.get("task_id")
        q = item.get("question")
        if not tid or q is None:
            continue
        try:
            ans = agent(q)
        except Exception as e:
            ans = f"AGENT ERROR: {e}"
        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(logs)

    # Submit
    sub = {"username": username, "agent_code": agent_code, "answers": payload}
    try:
        r = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
        r.raise_for_status()
        res = r.json()
        status = (
            f"Submission Successful!\n"
            f"User: {res.get('username')}\n"
            f"Score: {res.get('score', 'N/A')}% "
            f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(logs)

# --- Gradio UI ---

with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
    gr.Markdown("1. Clone this space  2. Log in  3. Click **Run Evaluation & Submit All Answers**")
    gr.LoginButton()
    btn = gr.Button("Run Evaluation & Submit All Answers")
    out_status = gr.Textbox(label="Status", lines=5, interactive=False)
    out_table  = gr.DataFrame(label="Results")
    btn.click(fn=run_and_submit_all, outputs=[out_status, out_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)