Commit
8767411
Β·
verified Β·
1 Parent(s): 28b372f

update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -25
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
 
 
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
 
@@ -29,52 +31,152 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
  from src.submission.submit import add_new_eval
30
 
31
  def restart_space():
32
- API.restart_space(repo_id=REPO_ID)
 
 
 
 
 
 
 
33
 
34
  ### Space initialization
35
  try:
36
- print(EVAL_REQUESTS_PATH)
37
  snapshot_download(
38
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
39
  )
40
- except Exception:
41
- restart_space()
 
 
 
42
  try:
43
- print(EVAL_RESULTS_PATH)
44
  snapshot_download(
45
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
46
  )
47
- except Exception:
48
- restart_space()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Load the leaderboard DataFrame
51
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
52
- print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape) # Debug
53
- print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist()) # Debug
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Load the evaluation queue DataFrames
56
- finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
 
 
 
 
 
 
57
 
58
- demo = gr.Blocks(css=custom_css)
59
- with demo:
60
  gr.HTML(TITLE)
61
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
62
 
63
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
64
- with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
65
  if LEADERBOARD_DF.empty:
66
  gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
67
  else:
 
 
 
 
 
 
68
  default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
69
- print("Default Selection before ensuring 'model_name':", default_selection) # Debug
70
 
71
  # Ensure "model_name" is included
72
  if "model_name" not in default_selection:
73
  default_selection.insert(0, "model_name")
74
- print("Default Selection after ensuring 'model_name':", default_selection) # Debug
75
 
76
- print("LEADERBOARD_DF dtypes:\n", LEADERBOARD_DF.dtypes) # Debug: Check column types
 
 
 
 
 
 
77
 
 
78
  leaderboard = Leaderboard(
79
  value=LEADERBOARD_DF,
80
  datatype=[col.type for col in COLUMNS],
@@ -83,7 +185,7 @@ with demo:
83
  cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
84
  label="Select Columns to Display:",
85
  ),
86
- search_columns=[col.name for col in COLUMNS if col.name in ["model_name", "license"]], # Updated to 'model_name'
87
  hide_columns=[col.name for col in COLUMNS if col.hidden],
88
  filter_columns=[
89
  ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
@@ -93,14 +195,13 @@ with demo:
93
  ),
94
  ],
95
  bool_checkboxgroup_label="Hide models",
96
- interactive=False,
97
  )
98
- # No need to call leaderboard.render() since it's created within the Gradio context
99
 
100
- with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
101
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
102
 
103
- with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
104
  with gr.Column():
105
  with gr.Row():
106
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
@@ -167,6 +268,13 @@ with demo:
167
  )
168
 
169
  scheduler = BackgroundScheduler()
 
170
  scheduler.add_job(restart_space, "interval", seconds=1800)
171
  scheduler.start()
172
- demo.queue(default_concurrency_limit=40).launch(debug=True)
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
+ import os
5
+ import json
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
  from huggingface_hub import snapshot_download
8
 
 
31
  from src.submission.submit import add_new_eval
32
 
33
  def restart_space():
34
+ try:
35
+ API.restart_space(repo_id=REPO_ID)
36
+ except Exception as e:
37
+ print(f"Error restarting space: {e}")
38
+
39
+ # Ensure directories exist
40
+ os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
41
+ os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
42
 
43
  ### Space initialization
44
  try:
45
+ print(f"Downloading evaluation requests from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
46
  snapshot_download(
47
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
48
+ tqdm_class=None, etag_timeout=30, token=TOKEN
49
  )
50
+ print("Successfully downloaded evaluation requests")
51
+ except Exception as e:
52
+ print(f"Error downloading evaluation requests: {e}")
53
+ # Don't restart immediately, try to continue
54
+
55
  try:
56
+ print(f"Downloading evaluation results from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
57
  snapshot_download(
58
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
59
+ tqdm_class=None, etag_timeout=30, token=TOKEN
60
  )
61
+ print("Successfully downloaded evaluation results")
62
+ except Exception as e:
63
+ print(f"Error downloading evaluation results: {e}")
64
+ # Don't restart immediately, try to continue
65
+
66
+ # Add fallback data in case the remote fetch fails
67
+ fallback_data = False
68
+ if not os.listdir(EVAL_RESULTS_PATH):
69
+ print("No evaluation results found. Creating sample data for testing.")
70
+ fallback_data = True
71
+ # Create a sample result file for testing
72
+ sample_data = {
73
+ "config": {
74
+ "model_name": "Sample Arabic Model",
75
+ "submitted_time": "2023-01-01",
76
+ "base_model": "bert-base-arabic",
77
+ "revision": "main",
78
+ "precision": "float16",
79
+ "weight_type": "Original",
80
+ "model_type": "Encoder",
81
+ "license": "Apache-2.0",
82
+ "params": 110000000,
83
+ "still_on_hub": True
84
+ },
85
+ "results": {
86
+ "average": 75.5,
87
+ "abstract_algebra": 70.2,
88
+ "anatomy": 72.5,
89
+ "astronomy": 80.1,
90
+ "business_ethics": 68.3,
91
+ "clinical_knowledge": 75.0,
92
+ "college_biology": 77.4,
93
+ "college_chemistry": 74.2
94
+ }
95
+ }
96
+
97
+ with open(os.path.join(EVAL_RESULTS_PATH, "sample_result.json"), 'w') as f:
98
+ json.dump(sample_data, f)
99
 
100
  # Load the leaderboard DataFrame
101
+ try:
102
+ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
103
+ print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
104
+ print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist())
105
+ print("LEADERBOARD_DF Sample:", LEADERBOARD_DF.head(1).to_dict('records') if not LEADERBOARD_DF.empty else "Empty DataFrame")
106
+
107
+ # If DataFrame is empty even with fallback data, create a minimal sample
108
+ if LEADERBOARD_DF.empty and fallback_data:
109
+ print("Creating minimal sample data for leaderboard")
110
+ LEADERBOARD_DF = pd.DataFrame([{
111
+ "model_name": "Sample Arabic LLM",
112
+ "submitted_time": "2023-01-01",
113
+ "base_model": "bert-base-arabic",
114
+ "revision": "main",
115
+ "precision": "float16",
116
+ "weight_type": "Original",
117
+ "model_type": "Encoder",
118
+ "license": "Apache-2.0",
119
+ "params": 110000000,
120
+ "still_on_hub": True,
121
+ "average": 75.5,
122
+ "abstract_algebra": 70.2,
123
+ "anatomy": 72.5,
124
+ "astronomy": 80.1,
125
+ "business_ethics": 68.3,
126
+ "clinical_knowledge": 75.0,
127
+ "college_biology": 77.4,
128
+ "college_chemistry": 74.2
129
+ }])
130
+ except Exception as e:
131
+ print(f"Error loading leaderboard data: {e}")
132
+ # Create a minimal sample DataFrame
133
+ LEADERBOARD_DF = pd.DataFrame([{
134
+ "model_name": "Error Loading Data",
135
+ "average": 0
136
+ }])
137
 
138
  # Load the evaluation queue DataFrames
139
+ try:
140
+ finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
141
+ except Exception as e:
142
+ print(f"Error loading evaluation queue data: {e}")
143
+ # Create empty DataFrames
144
+ finished_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
145
+ running_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
146
+ pending_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
147
 
148
+ with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
 
149
  gr.HTML(TITLE)
150
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
151
 
152
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
153
+ with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab", id=0):
154
  if LEADERBOARD_DF.empty:
155
  gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
156
  else:
157
+ # Debug information as Markdown
158
+ gr.Markdown("### Leaderboard Data Debug Info")
159
+ gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
160
+ gr.Markdown(f"DataFrame Columns: {LEADERBOARD_DF.columns.tolist()}")
161
+
162
+ # Get the default columns to display
163
  default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
164
+ print("Default Selection before ensuring 'model_name':", default_selection)
165
 
166
  # Ensure "model_name" is included
167
  if "model_name" not in default_selection:
168
  default_selection.insert(0, "model_name")
169
+ print("Default Selection after ensuring 'model_name':", default_selection)
170
 
171
+ # Make sure all columns exist in the DataFrame
172
+ for col in default_selection:
173
+ if col not in LEADERBOARD_DF.columns:
174
+ print(f"Warning: Column '{col}' not found in DataFrame. Adding empty column.")
175
+ LEADERBOARD_DF[col] = None
176
+
177
+ print("LEADERBOARD_DF dtypes:\n", LEADERBOARD_DF.dtypes)
178
 
179
+ # Create the leaderboard component
180
  leaderboard = Leaderboard(
181
  value=LEADERBOARD_DF,
182
  datatype=[col.type for col in COLUMNS],
 
185
  cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
186
  label="Select Columns to Display:",
187
  ),
188
+ search_columns=["model_name", "license"],
189
  hide_columns=[col.name for col in COLUMNS if col.hidden],
190
  filter_columns=[
191
  ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
 
195
  ),
196
  ],
197
  bool_checkboxgroup_label="Hide models",
198
+ interactive=True, # Change to True to enable interaction
199
  )
 
200
 
201
+ with gr.TabItem("πŸ“ About", elem_id="about-tab", id=1):
202
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
203
 
204
+ with gr.TabItem("πŸš€ Submit here!", elem_id="submit-tab", id=2):
205
  with gr.Column():
206
  with gr.Row():
207
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
268
  )
269
 
270
  scheduler = BackgroundScheduler()
271
+ # Run every 30 minutes instead of every 30 seconds (1800 seconds = 30 minutes)
272
  scheduler.add_job(restart_space, "interval", seconds=1800)
273
  scheduler.start()
274
+
275
+ # Launch with a more descriptive message
276
+ demo.queue(default_concurrency_limit=40).launch(
277
+ debug=True,
278
+ share=False,
279
+ show_error=True
280
+ )