darrenphodgson76 commited on
Commit
0412d41
·
verified ·
1 Parent(s): f454ba7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -27
app.py CHANGED
@@ -1,18 +1,31 @@
 
 
 
 
 
1
  from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, tool
2
 
3
- # --- TOOL DEFINITIONS ---
 
 
 
4
 
5
  @tool
6
  def summarize_query(query: str) -> str:
7
  """
8
- Provides a structured summary to reframe a query if the search results are unclear or poor.
 
 
 
 
 
 
9
  """
10
  return f"Summarize and reframe: {query}"
11
 
12
- # Live DuckDuckGo search tool
13
  search_tool = DuckDuckGoSearchTool()
14
 
15
- # --- SYSTEM PROMPT WITH RETRY STRATEGY ---
16
 
17
  system_prompt = """
18
  You are a ReACT agent with scratchpad memory and a retry mechanism.
@@ -22,40 +35,167 @@ For every question:
22
  2. Action: (Optional) Use a tool with a clear query.
23
  3. Observation: Record what tool returned.
24
 
25
- **If the first Observation seems empty, irrelevant, or very weak:**
26
- 4. Thought: "The result was unclear. I should reframe and try again."
27
- 5. Action: Use summarize_query to improve the search query.
28
- 6. Action: Retry DuckDuckGoSearchTool with the improved query.
29
- 7. Observation: Record result.
30
- 8. Thought: Reflect carefully across both Observations.
31
- 9. FINAL ANSWER: Provide the final answer.
32
 
33
- **Final Output Rules:**
34
- - Always start with FINAL ANSWER: [your answer].
35
- - Numbers: plain (no commas unless list).
36
- - Strings: no articles unless inside proper names.
37
- - Lists: comma-separated without extra punctuation.
38
 
39
- **Scratchpad Example:**
 
 
 
 
40
 
41
- Thought: Find fruits in Embroidery painting.
 
42
  Action: DuckDuckGoSearchTool('fruits in Embroidery from Uzbekistan painting')
43
- Observation: (empty result)
44
- Thought: Result was unclear. I should reframe and retry.
45
- Action: summarize_query('fruits in painting Embroidery from Uzbekistan')
46
- Observation: fruits: pomegranate, apple, grape
47
- Thought: Now get the breakfast fruits.
48
  Action: DuckDuckGoSearchTool('breakfast menu October 1949 SS Ile de France')
49
  Observation: grapes, apples, oranges
50
- Thought: Matching fruits are grapes and apples.
51
  FINAL ANSWER: grapes, apples
52
-
53
- Be methodical, careful, and retry only once if needed.
54
  """
55
 
56
- # --- BUILD THE SMART AGENT ---
57
 
58
  smart_agent = CodeAgent(
59
  tools=[search_tool, summarize_query],
60
  model=HfApiModel(system_prompt=system_prompt)
61
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+
6
  from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, tool
7
 
8
+ # --- Constants ---
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+
11
+ # --- Tool Definitions ---
12
 
13
  @tool
14
  def summarize_query(query: str) -> str:
15
  """
16
+ Provides a structured summary to reframe a query if search results are unclear or poor.
17
+
18
+ Args:
19
+ query (str): The search query that needs summarization.
20
+
21
+ Returns:
22
+ str: A concise summary of key facts about the given query.
23
  """
24
  return f"Summarize and reframe: {query}"
25
 
 
26
  search_tool = DuckDuckGoSearchTool()
27
 
28
+ # --- System Prompt for ReACT + Scratchpad + Auto-Retry ---
29
 
30
  system_prompt = """
31
  You are a ReACT agent with scratchpad memory and a retry mechanism.
 
35
  2. Action: (Optional) Use a tool with a clear query.
36
  3. Observation: Record what tool returned.
37
 
38
+ If the first Observation is empty or irrelevant:
39
+ 4. Thought: The result was unclear. I should reframe and retry.
40
+ 5. Action: summarize_query with the original query.
41
+ 6. Action: DuckDuckGoSearchTool with the reframed query.
42
+ 7. Observation: Record new result.
 
 
43
 
44
+ Then:
45
+ 8. Thought: Reflect on all observations.
46
+ 9. FINAL ANSWER: Provide the answer.
 
 
47
 
48
+ Formatting Rules:
49
+ - Begin with FINAL ANSWER: [your answer]
50
+ - Numbers: plain (no commas unless list)
51
+ - Strings: no articles unless inside proper names
52
+ - Lists: comma-separated without extra punctuation
53
 
54
+ Example scratchpad flow:
55
+ Thought: Need fruits from painting.
56
  Action: DuckDuckGoSearchTool('fruits in Embroidery from Uzbekistan painting')
57
+ Observation: (empty)
58
+ Thought: Unclear result, retry.
59
+ Action: summarize_query('fruits in Embroidery painting Uzbekistan')
60
+ Observation: pomegranate, apple, grape
61
+ Thought: Find breakfast fruits.
62
  Action: DuckDuckGoSearchTool('breakfast menu October 1949 SS Ile de France')
63
  Observation: grapes, apples, oranges
64
+ Thought: Overlap is grapes and apples.
65
  FINAL ANSWER: grapes, apples
 
 
66
  """
67
 
68
+ # --- Build the Smart Agent ---
69
 
70
  smart_agent = CodeAgent(
71
  tools=[search_tool, summarize_query],
72
  model=HfApiModel(system_prompt=system_prompt)
73
  )
74
+
75
+ # --- Integrate into Gradio App ---
76
+
77
+ class BasicAgent:
78
+ def __init__(self):
79
+ print("SmolAgent with ReACT, Scratchpad & Retry initialized.")
80
+
81
+ def __call__(self, question: str) -> str:
82
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
83
+ answer = smart_agent.run(question)
84
+ print(f"Agent returning answer: {answer}")
85
+ return answer
86
+
87
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
88
+ space_id = os.getenv("SPACE_ID")
89
+
90
+ if profile:
91
+ username = profile.username
92
+ print(f"User logged in: {username}")
93
+ else:
94
+ print("User not logged in.")
95
+ return "Please log in to Hugging Face using the button above.", None
96
+
97
+ api_url = DEFAULT_API_URL
98
+ questions_url = f"{api_url}/questions"
99
+ submit_url = f"{api_url}/submit"
100
+
101
+ try:
102
+ agent = BasicAgent()
103
+ except Exception as e:
104
+ return f"Error initializing agent: {e}", None
105
+
106
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
107
+ print(f"Agent code URL: {agent_code}")
108
+
109
+ # Fetch questions
110
+ try:
111
+ response = requests.get(questions_url, timeout=15)
112
+ response.raise_for_status()
113
+ questions_data = response.json()
114
+ if not questions_data:
115
+ return "Fetched questions list is empty or invalid.", None
116
+ except Exception as e:
117
+ return f"Error fetching questions: {e}", None
118
+
119
+ # Run agent on each question
120
+ results_log = []
121
+ answers_payload = []
122
+ for item in questions_data:
123
+ task_id = item.get("task_id")
124
+ question_text = item.get("question")
125
+ if not task_id or question_text is None:
126
+ continue
127
+ try:
128
+ submitted_answer = agent(question_text)
129
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
130
+ results_log.append({
131
+ "Task ID": task_id,
132
+ "Question": question_text,
133
+ "Submitted Answer": submitted_answer
134
+ })
135
+ except Exception as e:
136
+ results_log.append({
137
+ "Task ID": task_id,
138
+ "Question": question_text,
139
+ "Submitted Answer": f"AGENT ERROR: {e}"
140
+ })
141
+
142
+ if not answers_payload:
143
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
144
+
145
+ # Submit answers
146
+ submission_data = {
147
+ "username": username,
148
+ "agent_code": agent_code,
149
+ "answers": answers_payload
150
+ }
151
+ try:
152
+ response = requests.post(submit_url, json=submission_data, timeout=60)
153
+ response.raise_for_status()
154
+ result_data = response.json()
155
+ final_status = (
156
+ f"Submission Successful!\n"
157
+ f"User: {result_data.get('username')}\n"
158
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
159
+ f"({result_data.get('correct_count', '?')}/"
160
+ f"{result_data.get('total_attempted', '?')} correct)\n"
161
+ f"Message: {result_data.get('message', '')}"
162
+ )
163
+ results_df = pd.DataFrame(results_log)
164
+ return final_status, results_df
165
+ except Exception as e:
166
+ results_df = pd.DataFrame(results_log)
167
+ return f"Submission Failed: {e}", results_df
168
+
169
+ # --- Gradio Interface ---
170
+
171
+ with gr.Blocks() as demo:
172
+ gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
173
+ gr.Markdown(
174
+ """
175
+ **Instructions:**
176
+ 1. Clone this space and modify if needed.
177
+ 2. Log in to Hugging Face.
178
+ 3. Click 'Run Evaluation & Submit All Answers'.
179
+ **Note:** Evaluation can take a few minutes.
180
+ """
181
+ )
182
+ gr.LoginButton()
183
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
184
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
185
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
186
+
187
+ run_button.click(
188
+ fn=run_and_submit_all,
189
+ outputs=[status_output, results_table]
190
+ )
191
+
192
+ if __name__ == "__main__":
193
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
194
+ space_host = os.getenv("SPACE_HOST")
195
+ space_id = os.getenv("SPACE_ID")
196
+ if space_host:
197
+ print(f"SPACE_HOST: {space_host}")
198
+ if space_id:
199
+ print(f"SPACE_ID: {space_id}")
200
+ print("Launching Gradio Interface...")
201
+ demo.launch(debug=True, share=False)