WeijianQi1999's picture
make it scrollable
e045c4a
raw
history blame
3.49 kB
import os
import gradio as gr
import pandas as pd
import numpy as np
from apscheduler.schedulers.background import BackgroundScheduler
# InfoStrings
from scorer import question_scorer
from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION
TOKEN = os.environ.get("TOKEN", None)
OWNER="Online-Mind2Web"
# api = HfApi()
YEAR_VERSION = "2025"
LOCAL_DEBUG = True
# Display the results
def get_dataframe_from_results(eval_path):
df = pd.read_csv(eval_path)
df = df.sort_values(by=["Average SR"], ascending=False)
for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']:
df[format_column] = df[format_column].map('{:.1f}'.format)
# df["Average SR"] = df["Average SR"].map('{:.1f}'.format)
return df
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')
TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"]
def refresh():
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')
return auto_eval_dataframe_test, human_eval_dataframe_test
def upload_file(files):
file_paths = [file.name for file in files]
return file_paths
demo = gr.Blocks(css="""
#human-leaderboard-table {
width: auto; /* allow auto sizing */
min-width: calc(100% + 20px); /* extend a little beyond the content */
}
""")
with demo:
gr.HTML(TITLE)
gr.HTML(LINKS)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
lines=10,
)
gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
# gr.HTML(LEADERBOARD_HTML)
with gr.Tab("Human Evaluation", elem_id="human-tab", id=1):
human_leaderboard_table_test = gr.components.Dataframe(
value=human_eval_dataframe_test, datatype=TYPES, interactive=False,
# column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
# interactive=False,
# height=700,
# column_widths=[190, 140, 75, 75, 50, 50, 50, 50, 75],
wrap=False
)
with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2):
auto_leaderboard_table_test = gr.components.Dataframe(
value=auto_eval_dataframe_test, datatype=TYPES, interactive=False,
wrap=False
# column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
)
with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3):
with gr.Row():
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
refresh_button = gr.Button("Refresh")
refresh_button.click(
refresh,
inputs=[],
outputs=[
auto_leaderboard_table_test,
human_leaderboard_table_test,
],
)
scheduler = BackgroundScheduler()
scheduler.start()
demo.launch(debug=True)