File size: 4,029 Bytes
034ac91
5fc1f4b
 
034ac91
aa92f8e
034ac91
5fc1f4b
 
 
034ac91
79359ac
2c9a73e
79359ac
034ac91
 
 
 
 
 
 
 
 
 
727eb6f
79359ac
2c9a73e
 
727eb6f
79359ac
2c9a73e
727eb6f
 
 
 
aa92f8e
727eb6f
 
79359ac
2c9a73e
727eb6f
 
 
 
aa92f8e
034ac91
2c9a73e
034ac91
 
 
 
 
 
 
 
79359ac
034ac91
 
aa92f8e
 
034ac91
 
 
 
 
 
 
 
 
 
5fc1f4b
034ac91
5fc1f4b
 
034ac91
 
 
 
 
5fc1f4b
034ac91
 
 
 
5fc1f4b
f477fda
 
 
5fc1f4b
 
034ac91
5fc1f4b
034ac91
 
 
 
 
 
 
5fc1f4b
 
 
 
034ac91
7014cfe
034ac91
aa92f8e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import gradio as gr

from apscheduler.schedulers.background import BackgroundScheduler
from dabstep_benchmark.content import TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, VALIDATION_GUIDELINES
from dabstep_benchmark.leaderboard import *


def restart_space():
    HF_API.restart_space(repo_id=HF_LEADERBOARD)
    




if __name__ == "__main__":
    os.makedirs("data/task_scores", exist_ok=True)
    refresh(only_leaderboard=False)

    demo = gr.Blocks()
    with demo:
        gr.Markdown(TITLE)
        gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
        
        # Generate initial leaderboard data
        validated_lb, unvalidated_lb = generate_leaderboard_df()

        with gr.Tab("Validated"):
            verified_table = gr.Dataframe(
                value=validated_lb,
                datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"],
                interactive=False,
                column_widths=["20%"],
                wrap=True,
            )
        
        with gr.Tab("Unvalidated"):
            unverified_table = gr.Dataframe(
                value=unvalidated_lb,
                datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"],
                interactive=False,
                column_widths=["20%"],
                wrap=True,
            )
        # create a Gradio event listener that runs when the page is loaded to populate the dataframe
        demo.load(generate_leaderboard_df, inputs=None, outputs=[verified_table, unverified_table])

        refresh_button = gr.Button("Refresh")
        refresh_button.click(
            refresh,
            inputs=[
                gr.Checkbox(value=True, visible=False)
            ],
            outputs=[
                verified_table, unverified_table
            ],
        )
        gr.Markdown(VALIDATION_GUIDELINES, elem_classes="markdown-text")
                    
        with gr.Row():
            with gr.Accordion("📙 Citation", open=False):
                citation_button = gr.Textbox(
                    value=CITATION_BUTTON_TEXT,
                    label=CITATION_BUTTON_LABEL,
                    lines=len(CITATION_BUTTON_TEXT.split("\n")),
                    elem_id="citation-button",
                )  # .style(show_copy_button=True)

        with gr.Accordion("Submit new agent answers for evaluation"):
            with gr.Row():
                gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
            with gr.Row():
                with gr.Column():
                    split = gr.Radio(["all"], value="all", label="Split", visible=False)
                    agent_name_textbox = gr.Textbox(label="Agent name")
                    model_family_textbox = gr.Textbox(label="Model family")
                    system_prompt_textbox = gr.Textbox(label="System prompt example")
                    repo_url_textbox = gr.Textbox(label="Repo URL with agent code")
                with gr.Column():
                    organisation = gr.Textbox(label="Organisation")
                    mail = gr.Textbox(
                        label="Contact email (will be stored privately, & used if there is an issue with your submission)")
                    file_output = gr.File()

            with gr.Row():
                gr.LoginButton()
                submit_button = gr.Button("Submit answers")
            submission_result = gr.Markdown()
            submit_button.click(
                process_submission,
                [
                    split,
                    agent_name_textbox,
                    model_family_textbox,
                    repo_url_textbox,
                    file_output,
                    organisation,
                    mail
                ],
                submission_result,
            )

    scheduler = BackgroundScheduler()
    scheduler.add_job(restart_space, "interval", seconds=3600*24)
    scheduler.start()
    demo.launch(debug=True)