Update app.py
Browse files
app.py
CHANGED
@@ -25,62 +25,59 @@ def summarize_query(query: str) -> str:
|
|
25 |
|
26 |
search_tool = DuckDuckGoSearchTool()
|
27 |
|
28 |
-
# ---
|
29 |
|
30 |
-
|
31 |
You are a ReACT agent with scratchpad memory and a retry mechanism.
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
3. Observation: Record what tool returned.
|
37 |
|
38 |
-
If the first Observation is empty
|
39 |
-
4. Thought:
|
40 |
-
5. Action: summarize_query
|
41 |
-
6. Action: DuckDuckGoSearchTool
|
42 |
7. Observation: Record new result.
|
43 |
|
44 |
Then:
|
45 |
-
8. Thought: Reflect
|
46 |
-
9. FINAL ANSWER: Provide
|
47 |
-
|
48 |
-
Formatting
|
49 |
-
-
|
50 |
-
- Numbers
|
51 |
-
- Strings
|
52 |
-
- Lists
|
53 |
"""
|
54 |
|
55 |
# --- Build the Smart Agent ---
|
56 |
|
57 |
smart_agent = CodeAgent(
|
58 |
tools=[search_tool, summarize_query],
|
59 |
-
model=HfApiModel(),
|
60 |
-
|
61 |
)
|
62 |
|
63 |
-
# ---
|
64 |
|
65 |
class BasicAgent:
|
66 |
def __init__(self):
|
67 |
-
print("SmolAgent
|
68 |
|
69 |
def __call__(self, question: str) -> str:
|
70 |
-
print(f"
|
71 |
return smart_agent.run(question)
|
72 |
|
73 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
74 |
space_id = os.getenv("SPACE_ID")
|
75 |
-
if profile:
|
76 |
-
|
77 |
-
|
78 |
-
return "Please log in to Hugging Face using the button above.", None
|
79 |
-
|
80 |
agent = BasicAgent()
|
81 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
82 |
|
83 |
-
# Fetch questions
|
84 |
try:
|
85 |
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
|
86 |
resp.raise_for_status()
|
@@ -90,7 +87,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
90 |
except Exception as e:
|
91 |
return f"Error fetching questions: {e}", None
|
92 |
|
93 |
-
# Run agent
|
94 |
logs, payload = [], []
|
95 |
for item in questions:
|
96 |
tid = item.get("task_id")
|
@@ -105,19 +102,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
105 |
payload.append({"task_id": tid, "submitted_answer": ans})
|
106 |
|
107 |
if not payload:
|
108 |
-
return "Agent did not produce any answers
|
109 |
|
110 |
-
# Submit
|
111 |
sub = {"username": username, "agent_code": agent_code, "answers": payload}
|
112 |
try:
|
113 |
-
|
114 |
-
|
115 |
-
res =
|
116 |
status = (
|
117 |
f"Submission Successful!\n"
|
118 |
f"User: {res.get('username')}\n"
|
119 |
f"Score: {res.get('score', 'N/A')}% "
|
120 |
-
f"({res.get('correct_count', '?')}/
|
|
|
121 |
)
|
122 |
return status, pd.DataFrame(logs)
|
123 |
except Exception as e:
|
@@ -127,10 +125,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
127 |
|
128 |
with gr.Blocks() as demo:
|
129 |
gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
|
130 |
-
gr.Markdown("1
|
131 |
gr.LoginButton()
|
132 |
btn = gr.Button("Run Evaluation & Submit All Answers")
|
133 |
-
out_status = gr.Textbox(label="Status", lines=5, interactive=False)
|
134 |
out_table = gr.DataFrame(label="Results")
|
135 |
btn.click(fn=run_and_submit_all, outputs=[out_status, out_table])
|
136 |
|
|
|
25 |
|
26 |
search_tool = DuckDuckGoSearchTool()
|
27 |
|
28 |
+
# --- ReACT + Scratchpad + Retry Prompt ---
|
29 |
|
30 |
+
system_prompt = """
|
31 |
You are a ReACT agent with scratchpad memory and a retry mechanism.
|
32 |
|
33 |
+
1. Thought: Figure out what's needed.
|
34 |
+
2. Action: (Optional) Call a tool with a precise query.
|
35 |
+
3. Observation: Record tool output.
|
|
|
36 |
|
37 |
+
If the first Observation is empty/irrelevant:
|
38 |
+
4. Thought: Unclear result, reframe and retry.
|
39 |
+
5. Action: summarize_query(original query).
|
40 |
+
6. Action: DuckDuckGoSearchTool(reframed query).
|
41 |
7. Observation: Record new result.
|
42 |
|
43 |
Then:
|
44 |
+
8. Thought: Reflect using all observations.
|
45 |
+
9. FINAL ANSWER: Provide your answer.
|
46 |
+
|
47 |
+
Formatting:
|
48 |
+
- Start with FINAL ANSWER: [your answer]
|
49 |
+
- Numbers plain (no commas unless list)
|
50 |
+
- Strings no articles unless part of proper names
|
51 |
+
- Lists comma-separated, no extra punctuation
|
52 |
"""
|
53 |
|
54 |
# --- Build the Smart Agent ---
|
55 |
|
56 |
smart_agent = CodeAgent(
|
57 |
tools=[search_tool, summarize_query],
|
58 |
+
model=HfApiModel(), # no prompt here
|
59 |
+
system_prompt=system_prompt # prompt passed to CodeAgent
|
60 |
)
|
61 |
|
62 |
+
# --- Hook into Gradio App ---
|
63 |
|
64 |
class BasicAgent:
|
65 |
def __init__(self):
|
66 |
+
print("SmolAgent (ReACT + Scratchpad + Retry) initialized.")
|
67 |
|
68 |
def __call__(self, question: str) -> str:
|
69 |
+
print(f"Q: {question[:50]}...")
|
70 |
return smart_agent.run(question)
|
71 |
|
72 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
73 |
space_id = os.getenv("SPACE_ID")
|
74 |
+
if not profile:
|
75 |
+
return "Please log in with Hugging Face.", None
|
76 |
+
username = profile.username
|
|
|
|
|
77 |
agent = BasicAgent()
|
78 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
79 |
|
80 |
+
# 1. Fetch questions
|
81 |
try:
|
82 |
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
|
83 |
resp.raise_for_status()
|
|
|
87 |
except Exception as e:
|
88 |
return f"Error fetching questions: {e}", None
|
89 |
|
90 |
+
# 2. Run agent
|
91 |
logs, payload = [], []
|
92 |
for item in questions:
|
93 |
tid = item.get("task_id")
|
|
|
102 |
payload.append({"task_id": tid, "submitted_answer": ans})
|
103 |
|
104 |
if not payload:
|
105 |
+
return "Agent did not produce any answers.", pd.DataFrame(logs)
|
106 |
|
107 |
+
# 3. Submit
|
108 |
sub = {"username": username, "agent_code": agent_code, "answers": payload}
|
109 |
try:
|
110 |
+
post = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
|
111 |
+
post.raise_for_status()
|
112 |
+
res = post.json()
|
113 |
status = (
|
114 |
f"Submission Successful!\n"
|
115 |
f"User: {res.get('username')}\n"
|
116 |
f"Score: {res.get('score', 'N/A')}% "
|
117 |
+
f"({res.get('correct_count', '?')}/"
|
118 |
+
f"{res.get('total_attempted', '?')})"
|
119 |
)
|
120 |
return status, pd.DataFrame(logs)
|
121 |
except Exception as e:
|
|
|
125 |
|
126 |
with gr.Blocks() as demo:
|
127 |
gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
|
128 |
+
gr.Markdown("1) Clone this space 2) Log in 3) Run Evaluation & Submit All Answers")
|
129 |
gr.LoginButton()
|
130 |
btn = gr.Button("Run Evaluation & Submit All Answers")
|
131 |
+
out_status = gr.Textbox(label="Run Status", lines=5, interactive=False)
|
132 |
out_table = gr.DataFrame(label="Results")
|
133 |
btn.click(fn=run_and_submit_all, outputs=[out_status, out_table])
|
134 |
|