Final_Assignment_Template

Paused

File size: 3,609 Bytes

import os
import openai  # 🍕 https://www.google.com/search?q=openai.ChatCompletion.create
import gradio as gr  # 🍣 https://www.google.com/search?q=Gradio+python
import requests
import pandas as pd

# --- Configuration ---
openai.api_key = os.getenv("OPENAI_API_KEY")  # 🍱 https://www.google.com/search?q=python+environment+variables
API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "gpt-4.1"  # 🍜 https://www.google.com/search?q=gpt-4.1+openai+model+id

# --- ChatGPT-4.1 Caller ---
def ask_chatgpt_4_1(question: str) -> str:
    response = openai.ChatCompletion.create(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user",   "content": question}
        ],
        temperature=0.7,
        max_tokens=1500
    )
    return response.choices[0].message.content

# --- Agent Class ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent using OpenAI GPT-4.1 ready.")

    def __call__(self, question: str) -> str:
        print(f"Q>> {question}")
        try:
            return ask_chatgpt_4_1(question)
        except Exception as e:
            return f"AGENT ERROR: {e}"

# --- Evaluation & Submission ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please log in to Hugging Face.", None
    username = profile.username
    space_id = os.getenv("SPACE_ID", "")
    agent = BasicAgent()
    code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 1. Fetch questions
    try:
        resp = requests.get(f"{API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
        if not questions:
            return "No questions fetched.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Answer each
    logs = []
    payload = []
    for item in questions:
        tid = item.get("task_id")
        q = item.get("question")
        if not tid or q is None:
            continue
        ans = agent(q)
        logs.append({"Task ID": tid, "Question": q, "Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "No answers generated.", pd.DataFrame(logs)

    # 3. Submit
    submission = {
        "username": username,
        "agent_code": code_link,
        "answers": payload
    }
    try:
        post = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
        post.raise_for_status()
        res = post.json()
        status = (
            f"Success! {res.get('username')} scored "
            f"{res.get('score', 'N/A')}% "
            f"({res.get('correct_count')}/{res.get('total_attempted')})"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(logs)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
    gr.Markdown(
        """
        1. Clone space and modify if needed  
        2. Log in to Hugging Face  
        3. Click **Run Evaluation & Submit All Answers**  
        (May take several minutes)
        """
    )
    gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    status_out = gr.Textbox(label="Status", lines=5, interactive=False)
    table_out = gr.DataFrame(label="Q&A Log", wrap=True)

    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)