Commit
·
e3f57bf
1
Parent(s):
e3a5d5a
update Operator model name
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
8 |
|
9 |
# InfoStrings
|
10 |
from scorer import question_scorer
|
11 |
-
from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT,
|
12 |
|
13 |
TOKEN = os.environ.get("TOKEN", None)
|
14 |
|
|
|
8 |
|
9 |
# InfoStrings
|
10 |
from scorer import question_scorer
|
11 |
+
from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION
|
12 |
|
13 |
TOKEN = os.environ.get("TOKEN", None)
|
14 |
|
auto_Mind2Web-Online - Leaderboard_data.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
|
2 |
-
Operator,
|
3 |
SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,65.1,36.1,18.5,39.8,2025-3-22
|
4 |
Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,58.6,37.5,24.3,40.1,2025-3-22
|
5 |
Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,61.9,28.1,21.2,35.8,2025-3-22
|
|
|
1 |
Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
|
2 |
+
Operator,OpenAI Computer-Using Agent,OpenAI,OSU NLP,80.3,73.4,59,71.8,2025-3-22
|
3 |
SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,65.1,36.1,18.5,39.8,2025-3-22
|
4 |
Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,58.6,37.5,24.3,40.1,2025-3-22
|
5 |
Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,61.9,28.1,21.2,35.8,2025-3-22
|
human_Mind2Web-Online - Leaderboard_data.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
|
2 |
-
Operator,
|
3 |
SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,60.2,25.2,8.1,30.7,2025-3-22
|
4 |
Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,55.4,26.6,8.1,30.0,2025-3-22
|
5 |
Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,56.6,20.3,14.9,29.0,2025-3-22
|
|
|
1 |
Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
|
2 |
+
Operator,OpenAI Computer-Using Agent,OpenAI,OSU NLP,83.1,58.0,43.2,61.3,2025-3-22
|
3 |
SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,60.2,25.2,8.1,30.7,2025-3-22
|
4 |
Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,55.4,26.6,8.1,30.0,2025-3-22
|
5 |
Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,56.6,20.3,14.9,29.0,2025-3-22
|