|
import os |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
|
|
|
from scorer import question_scorer |
|
from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION |
|
|
|
TOKEN = os.environ.get("TOKEN", None) |
|
|
|
OWNER="Online-Mind2Web" |
|
|
|
|
|
YEAR_VERSION = "2025" |
|
|
|
LOCAL_DEBUG = True |
|
|
|
|
|
def get_dataframe_from_results(eval_path): |
|
df = pd.read_csv(eval_path) |
|
df = df.sort_values(by=["Average SR"], ascending=False) |
|
for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']: |
|
df[format_column] = df[format_column].map('{:.1f}'.format) |
|
|
|
return df |
|
|
|
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv') |
|
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv') |
|
|
|
|
|
TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"] |
|
|
|
def refresh(): |
|
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv') |
|
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv') |
|
return auto_eval_dataframe_test, human_eval_dataframe_test |
|
|
|
def upload_file(files): |
|
file_paths = [file.name for file in files] |
|
return file_paths |
|
|
|
|
|
demo = gr.Blocks(css=""" |
|
#human-leaderboard-table { |
|
width: auto; /* allow auto sizing */ |
|
min-width: calc(100% + 20px); /* extend a little beyond the content */ |
|
} |
|
""") |
|
with demo: |
|
gr.HTML(TITLE) |
|
gr.HTML(LINKS) |
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
with gr.Row(): |
|
with gr.Accordion("π Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
label=CITATION_BUTTON_LABEL, |
|
elem_id="citation-button", |
|
lines=10, |
|
) |
|
gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text") |
|
|
|
with gr.Tab("Human Evaluation", elem_id="human-tab", id=1): |
|
human_leaderboard_table_test = gr.components.Dataframe( |
|
value=human_eval_dataframe_test, datatype=TYPES, interactive=False, |
|
|
|
|
|
|
|
|
|
wrap=False |
|
) |
|
with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2): |
|
auto_leaderboard_table_test = gr.components.Dataframe( |
|
value=auto_eval_dataframe_test, datatype=TYPES, interactive=False, |
|
wrap=False |
|
|
|
) |
|
|
|
with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3): |
|
with gr.Row(): |
|
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") |
|
|
|
refresh_button = gr.Button("Refresh") |
|
refresh_button.click( |
|
refresh, |
|
inputs=[], |
|
outputs=[ |
|
auto_leaderboard_table_test, |
|
human_leaderboard_table_test, |
|
], |
|
) |
|
|
|
scheduler = BackgroundScheduler() |
|
scheduler.start() |
|
demo.launch(debug=True) |
|
|