import os import openai # 🍕 https://www.google.com/search?q=openai.ChatCompletion.create import gradio as gr # 🍣 https://www.google.com/search?q=Gradio+python import requests import pandas as pd # --- Configuration --- openai.api_key = os.getenv("OPENAI_API_KEY") # 🍱 https://www.google.com/search?q=python+environment+variables API_URL = "https://agents-course-unit4-scoring.hf.space" MODEL_NAME = "gpt-4.1" # 🍜 https://www.google.com/search?q=gpt-4.1+openai+model+id # --- ChatGPT-4.1 Caller --- def ask_chatgpt_4_1(question: str) -> str: response = openai.ChatCompletion.create( model=MODEL_NAME, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": question} ], temperature=0.7, max_tokens=1500 ) return response.choices[0].message.content # --- Agent Class --- class BasicAgent: def __init__(self): print("BasicAgent using OpenAI GPT-4.1 ready.") def __call__(self, question: str) -> str: print(f"Q>> {question}") try: return ask_chatgpt_4_1(question) except Exception as e: return f"AGENT ERROR: {e}" # --- Evaluation & Submission --- def run_and_submit_all(profile: gr.OAuthProfile | None): if not profile: return "Please log in to Hugging Face.", None username = profile.username space_id = os.getenv("SPACE_ID", "") agent = BasicAgent() code_link = f"https://huggingface.co/spaces/{space_id}/tree/main" # 1. Fetch questions try: resp = requests.get(f"{API_URL}/questions", timeout=15) resp.raise_for_status() questions = resp.json() if not questions: return "No questions fetched.", None except Exception as e: return f"Error fetching questions: {e}", None # 2. Answer each logs = [] payload = [] for item in questions: tid = item.get("task_id") q = item.get("question") if not tid or q is None: continue ans = agent(q) logs.append({"Task ID": tid, "Question": q, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) if not payload: return "No answers generated.", pd.DataFrame(logs) # 3. Submit submission = { "username": username, "agent_code": code_link, "answers": payload } try: post = requests.post(f"{API_URL}/submit", json=submission, timeout=60) post.raise_for_status() res = post.json() status = ( f"Success! {res.get('username')} scored " f"{res.get('score', 'N/A')}% " f"({res.get('correct_count')}/{res.get('total_attempted')})" ) return status, pd.DataFrame(logs) except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(logs) # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀") gr.Markdown( """ 1. Clone space and modify if needed 2. Log in to Hugging Face 3. Click **Run Evaluation & Submit All Answers** (May take several minutes) """ ) gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit All Answers") status_out = gr.Textbox(label="Status", lines=5, interactive=False) table_out = gr.DataFrame(label="Q&A Log", wrap=True) run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out]) if __name__ == "__main__": demo.launch(debug=True, share=False)