File size: 3,609 Bytes
0412d41
0af627b
 
0412d41
 
 
0af627b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0412d41
0af627b
0412d41
 
0af627b
0412d41
 
0af627b
63d6133
0af627b
63d6133
 
0412d41
0af627b
0412d41
2ae2c93
0af627b
2ae2c93
63d6133
a058371
0af627b
0412d41
2ae2c93
0412d41
0af627b
a058371
 
 
 
0412d41
 
 
0af627b
63d6133
 
a058371
 
 
 
0412d41
63d6133
0af627b
a058371
 
 
0af627b
 
 
 
 
 
 
 
0412d41
0af627b
2ae2c93
 
a058371
0af627b
 
 
0412d41
a058371
0412d41
a058371
0412d41
63d6133
0412d41
 
63d6133
 
0af627b
 
 
 
63d6133
 
0412d41
63d6133
 
0af627b
63d6133
 
0412d41
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import openai  # πŸ• https://www.google.com/search?q=openai.ChatCompletion.create
import gradio as gr  # 🍣 https://www.google.com/search?q=Gradio+python
import requests
import pandas as pd

# --- Configuration ---
openai.api_key = os.getenv("OPENAI_API_KEY")  # 🍱 https://www.google.com/search?q=python+environment+variables
API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "gpt-4.1"  # 🍜 https://www.google.com/search?q=gpt-4.1+openai+model+id

# --- ChatGPT-4.1 Caller ---
def ask_chatgpt_4_1(question: str) -> str:
    response = openai.ChatCompletion.create(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user",   "content": question}
        ],
        temperature=0.7,
        max_tokens=1500
    )
    return response.choices[0].message.content

# --- Agent Class ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent using OpenAI GPT-4.1 ready.")

    def __call__(self, question: str) -> str:
        print(f"Q>> {question}")
        try:
            return ask_chatgpt_4_1(question)
        except Exception as e:
            return f"AGENT ERROR: {e}"

# --- Evaluation & Submission ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please log in to Hugging Face.", None
    username = profile.username
    space_id = os.getenv("SPACE_ID", "")
    agent = BasicAgent()
    code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 1. Fetch questions
    try:
        resp = requests.get(f"{API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
        if not questions:
            return "No questions fetched.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Answer each
    logs = []
    payload = []
    for item in questions:
        tid = item.get("task_id")
        q = item.get("question")
        if not tid or q is None:
            continue
        ans = agent(q)
        logs.append({"Task ID": tid, "Question": q, "Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "No answers generated.", pd.DataFrame(logs)

    # 3. Submit
    submission = {
        "username": username,
        "agent_code": code_link,
        "answers": payload
    }
    try:
        post = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
        post.raise_for_status()
        res = post.json()
        status = (
            f"Success! {res.get('username')} scored "
            f"{res.get('score', 'N/A')}% "
            f"({res.get('correct_count')}/{res.get('total_attempted')})"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(logs)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent GAIA Evaluation Runner πŸš€")
    gr.Markdown(
        """
        1. Clone space and modify if needed  
        2. Log in to Hugging Face  
        3. Click **Run Evaluation & Submit All Answers**  
        (May take several minutes)
        """
    )
    gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    status_out = gr.Textbox(label="Status", lines=5, interactive=False)
    table_out = gr.DataFrame(label="Q&A Log", wrap=True)

    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)