WeijianQi1999 commited on
Commit
e3f57bf
·
1 Parent(s): e3a5d5a

update Operator model name

Browse files
app.py CHANGED
@@ -8,7 +8,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
8
 
9
  # InfoStrings
10
  from scorer import question_scorer
11
- from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, LEADERBOARD_HTML, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION
12
 
13
  TOKEN = os.environ.get("TOKEN", None)
14
 
 
8
 
9
  # InfoStrings
10
  from scorer import question_scorer
11
+ from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION
12
 
13
  TOKEN = os.environ.get("TOKEN", None)
14
 
auto_Mind2Web-Online - Leaderboard_data.csv CHANGED
@@ -1,5 +1,5 @@
1
  Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
2
- Operator,Unknown,OpenAI,OSU NLP,80.3,73.4,59,71.8,2025-3-22
3
  SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,65.1,36.1,18.5,39.8,2025-3-22
4
  Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,58.6,37.5,24.3,40.1,2025-3-22
5
  Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,61.9,28.1,21.2,35.8,2025-3-22
 
1
  Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
2
+ Operator,OpenAI Computer-Using Agent,OpenAI,OSU NLP,80.3,73.4,59,71.8,2025-3-22
3
  SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,65.1,36.1,18.5,39.8,2025-3-22
4
  Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,58.6,37.5,24.3,40.1,2025-3-22
5
  Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,61.9,28.1,21.2,35.8,2025-3-22
human_Mind2Web-Online - Leaderboard_data.csv CHANGED
@@ -1,5 +1,5 @@
1
  Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
2
- Operator,Unknown,OpenAI,OSU NLP,83.1,58.0,43.2,61.3,2025-3-22
3
  SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,60.2,25.2,8.1,30.7,2025-3-22
4
  Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,55.4,26.6,8.1,30.0,2025-3-22
5
  Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,56.6,20.3,14.9,29.0,2025-3-22
 
1
  Agent,Model,Organization,Source,Easy,Medium,Hard,Average SR,Date
2
+ Operator,OpenAI Computer-Using Agent,OpenAI,OSU NLP,83.1,58.0,43.2,61.3,2025-3-22
3
  SeeAct,gpt-4o-2024-08-06,OSU,OSU NLP,60.2,25.2,8.1,30.7,2025-3-22
4
  Browser Use,gpt-4o-2024-08-06,Browser Use,OSU NLP,55.4,26.6,8.1,30.0,2025-3-22
5
  Claude Computer Use,claude-3-5-sonnet-20241022,Anthropic,OSU NLP,56.6,20.3,14.9,29.0,2025-3-22