darrenphodgson76 commited on
Commit
aee5bda
·
verified ·
1 Parent(s): f1dea33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -50
app.py CHANGED
@@ -1,106 +1,151 @@
1
  import os
2
- from openai import OpenAI # new client entrypoint
 
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
 
6
 
7
- # --- Configuration ---
8
- # In your Space settings, add OPENAI_API_KEY as a Secret; the client reads it automatically.
9
- API_URL = "https://agents-course-unit4-scoring.hf.space"
10
- MODEL_NAME = "gpt-4.1"
11
 
12
- # --- OpenAI Client ---
13
- client = OpenAI() # reads OPENAI_API_KEY from env
14
 
15
- # --- GPT-4.1 Caller ---
16
- def ask_chatgpt_4_1(question: str) -> str:
17
- response = client.responses.create(
18
- model=MODEL_NAME,
19
- input=question
20
- )
21
- # The new library returns `output_text`
22
- return response.output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # --- Agent Class ---
25
  class BasicAgent:
26
  def __init__(self):
27
- print("BasicAgent using OpenAI GPT-4.1 (new client) ready.")
28
 
29
  def __call__(self, question: str) -> str:
30
- print(f"Q>> {question}")
 
 
 
31
  try:
32
- return ask_chatgpt_4_1(question)
 
 
 
 
33
  except Exception as e:
34
  return f"AGENT ERROR: {e}"
35
 
36
- # --- Evaluation & Submission ---
 
37
  def run_and_submit_all(profile: gr.OAuthProfile | None):
38
  if not profile:
39
- return "Please log in to Hugging Face.", None
 
40
  username = profile.username
41
  space_id = os.getenv("SPACE_ID", "")
42
  agent = BasicAgent()
43
- code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"
44
 
45
  # 1. Fetch questions
46
  try:
47
- resp = requests.get(f"{API_URL}/questions", timeout=15)
48
  resp.raise_for_status()
49
- questions = resp.json()
50
- if not questions:
51
- return "No questions fetched.", None
52
  except Exception as e:
53
  return f"Error fetching questions: {e}", None
54
 
55
- # 2. Answer each
56
- logs = []
57
- payload = []
58
  for item in questions:
59
  tid = item.get("task_id")
60
- q = item.get("question")
61
  if not tid or q is None:
62
  continue
63
  ans = agent(q)
64
- logs.append({"Task ID": tid, "Question": q, "Answer": ans})
65
  payload.append({"task_id": tid, "submitted_answer": ans})
66
 
67
  if not payload:
68
- return "No answers generated.", pd.DataFrame(logs)
69
-
70
- # 3. Submit
71
- submission = {
72
- "username": username,
73
- "agent_code": code_link,
74
- "answers": payload
75
- }
76
  try:
77
- post = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
78
  post.raise_for_status()
79
  res = post.json()
80
  status = (
81
- f"Success! {res.get('username')} scored "
82
- f"{res.get('score', 'N/A')}% "
83
- f"({res.get('correct_count')}/{res.get('total_attempted')})"
 
 
84
  )
85
  return status, pd.DataFrame(logs)
86
  except Exception as e:
87
  return f"Submission Failed: {e}", pd.DataFrame(logs)
88
 
89
  # --- Gradio Interface ---
 
90
  with gr.Blocks() as demo:
91
- gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
92
  gr.Markdown(
93
  """
94
- 1. Clone space and modify if needed
95
- 2. Log in to Hugging Face
96
- 3. Click **Run Evaluation & Submit All Answers**
97
- (May take several minutes)
 
 
98
  """
99
  )
100
  gr.LoginButton()
101
- run_btn = gr.Button("Run Evaluation & Submit All Answers")
102
  status_out = gr.Textbox(label="Status", lines=5, interactive=False)
103
- table_out = gr.DataFrame(label="Q&A Log", wrap=True)
104
 
105
  run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
106
 
 
1
  import os
2
+ import openai # official OpenAI client
3
+ from openai import OpenAI
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ from smolagents import DuckDuckGoSearchTool, tool
8
 
9
+ # --- Constants ---
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
11
 
12
+ # --- Configure OpenAI SDK & Client ---
 
13
 
14
+ openai_api_key = os.getenv("OPENAI_API_KEY")
15
+ if not openai_api_key:
16
+ raise RuntimeError("Set OPENAI_API_KEY in your Space secrets or env!")
17
+
18
+ openai.api_key = openai_api_key
19
+ client = OpenAI() # new client object
20
+
21
+ # --- Tool Definitions ---
22
+
23
+ @tool
24
+ def summarize_query(query: str) -> str:
25
+ """
26
+ Provides a structured summary to reframe a query if search results are unclear or poor.
27
+
28
+ Args:
29
+ query (str): The search query that needs summarization.
30
+ Returns:
31
+ str: A concise summary of key facts about the given query.
32
+ """
33
+ return f"Summarize and reframe: {query}"
34
+
35
+ search_tool = DuckDuckGoSearchTool()
36
+
37
+ # --- ReACT + Scratchpad + Auto‐Retry Instruction Prompt ---
38
+
39
+ instruction_prompt = """
40
+ You are a ReACT agent with scratchpad memory and a retry mechanism.
41
+
42
+ For every question:
43
+ 1. Thought: Figure out what's needed.
44
+ 2. Action: (Optional) Call a tool with a precise query.
45
+ 3. Observation: Record tool output.
46
+
47
+ If the first Observation is empty or irrelevant:
48
+ 4. Thought: Unclear result; reframe and retry.
49
+ 5. Action: summarize_query(original question).
50
+ 6. Action: DuckDuckGoSearchTool(reframed query).
51
+ 7. Observation: Record new result.
52
+
53
+ Then:
54
+ 8. Thought: Reflect on all observations.
55
+ 9. FINAL ANSWER: Provide your answer.
56
+
57
+ Formatting rules:
58
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
59
+ """
60
+
61
+ # --- BasicAgent using the new OpenAI client ---
62
 
 
63
  class BasicAgent:
64
  def __init__(self):
65
+ print("SmolAgent (GPT-4.1) with ReACT, Scratchpad & Retry initialized.")
66
 
67
  def __call__(self, question: str) -> str:
68
+ # Build the full prompt
69
+ prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
70
+ print(f"Agent prompt (first 150 chars): {prompt[:150]}…")
71
+ # Call GPT-4.1 via the new client.responses.create API
72
  try:
73
+ response = client.responses.create(
74
+ model="gpt-4.1",
75
+ input=prompt
76
+ )
77
+ return response.output_text
78
  except Exception as e:
79
  return f"AGENT ERROR: {e}"
80
 
81
+ # --- Gradio / HF‐Spaces submission logic ---
82
+
83
  def run_and_submit_all(profile: gr.OAuthProfile | None):
84
  if not profile:
85
+ return "Please log in to Hugging Face using the login button above.", None
86
+
87
  username = profile.username
88
  space_id = os.getenv("SPACE_ID", "")
89
  agent = BasicAgent()
90
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
91
 
92
  # 1. Fetch questions
93
  try:
94
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
95
  resp.raise_for_status()
96
+ questions = resp.json() or []
 
 
97
  except Exception as e:
98
  return f"Error fetching questions: {e}", None
99
 
100
+ # 2. Run agent on each question
101
+ logs, payload = [], []
 
102
  for item in questions:
103
  tid = item.get("task_id")
104
+ q = item.get("question")
105
  if not tid or q is None:
106
  continue
107
  ans = agent(q)
108
+ logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
109
  payload.append({"task_id": tid, "submitted_answer": ans})
110
 
111
  if not payload:
112
+ return "Agent did not produce any answers.", pd.DataFrame(logs)
113
+
114
+ # 3. Submit answers
115
+ submission = {"username": username, "agent_code": agent_code, "answers": payload}
 
 
 
 
116
  try:
117
+ post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
118
  post.raise_for_status()
119
  res = post.json()
120
  status = (
121
+ f"Submission Successful!\n"
122
+ f"User: {res.get('username')}\n"
123
+ f"Overall Score: {res.get('score', 'N/A')}% "
124
+ f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
125
+ f"Message: {res.get('message', '')}"
126
  )
127
  return status, pd.DataFrame(logs)
128
  except Exception as e:
129
  return f"Submission Failed: {e}", pd.DataFrame(logs)
130
 
131
  # --- Gradio Interface ---
132
+
133
  with gr.Blocks() as demo:
134
+ gr.Markdown("# SmolAgent GAIA Runner (GPT-4.1) 🚀")
135
  gr.Markdown(
136
  """
137
+ **Instructions:**
138
+ 1. Clone this space.
139
+ 2. In Settings Secrets add `OPENAI_API_KEY`.
140
+ 3. Log in to Hugging Face.
141
+ 4. Click **Run Evaluation & Submit All Answers**.
142
+ **Note:** Evaluation may take several minutes.
143
  """
144
  )
145
  gr.LoginButton()
146
+ run_btn = gr.Button("Run Evaluation & Submit All Answers")
147
  status_out = gr.Textbox(label="Status", lines=5, interactive=False)
148
+ table_out = gr.DataFrame(label="Questions & Answers", wrap=True)
149
 
150
  run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
151