WeijianQi1999's picture
update Operator model name
e3f57bf
raw
history blame
3.16 kB
import os
import gradio as gr
import pandas as pd
import numpy as np
from apscheduler.schedulers.background import BackgroundScheduler
# InfoStrings
from scorer import question_scorer
from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION
TOKEN = os.environ.get("TOKEN", None)
OWNER="Online Mind2Web"
# api = HfApi()
YEAR_VERSION = "2024"
LOCAL_DEBUG = True
# Display the results
def get_dataframe_from_results(eval_path):
df = pd.read_csv(eval_path)
df = df.sort_values(by=["Average SR"], ascending=False)
for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']:
df[format_column] = df[format_column].map('{:.1f}'.format)
# df["Average SR"] = df["Average SR"].map('{:.1f}'.format)
return df
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')
TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"]
def refresh():
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')
return auto_eval_dataframe_test, human_eval_dataframe_test
def upload_file(files):
file_paths = [file.name for file in files]
return file_paths
demo = gr.Blocks()
with demo:
gr.HTML(TITLE)
gr.HTML(LINKS)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
lines=10,
)
gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
# gr.HTML(LEADERBOARD_HTML)
with gr.Tab("Human Evaluation", elem_id="human-tab", id=1):
human_leaderboard_table_test = gr.components.Dataframe(
value=human_eval_dataframe_test, datatype=TYPES, interactive=False,
column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
)
with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2):
auto_leaderboard_table_test = gr.components.Dataframe(
value=auto_eval_dataframe_test, datatype=TYPES, interactive=False,
column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
)
with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3):
with gr.Row():
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
refresh_button = gr.Button("Refresh")
refresh_button.click(
refresh,
inputs=[],
outputs=[
auto_leaderboard_table_test,
human_leaderboard_table_test,
],
)
scheduler = BackgroundScheduler()
scheduler.start()
demo.launch(debug=True)