Spaces:

Omartificial-Intelligence-Space
/

Arabic-MMMLU-Leaderborad

Running

App Files Files Community

Omartificial-Intelligence-Space commited on Feb 27

Commit

8767411

verified ·

1 Parent(s): 28b372f

update app.py

Browse files

Files changed (1) hide show

app.py +133 -25

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
@@ -29,52 +31,152 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
 def restart_space():
-    API.restart_space(repo_id=REPO_ID)
 ### Space initialization
 try:
-    print(EVAL_REQUESTS_PATH)
     snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
-except Exception:
-    restart_space()
 try:
-    print(EVAL_RESULTS_PATH)
     snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
-except Exception:
-    restart_space()
 # Load the leaderboard DataFrame
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)  # Debug
-print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist())  # Debug
 # Load the evaluation queue DataFrames
-finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-demo = gr.Blocks(css=custom_css)
-with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             if LEADERBOARD_DF.empty:
                 gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
             else:
                 default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
-                print("Default Selection before ensuring 'model_name':", default_selection)  # Debug
                 # Ensure "model_name" is included
                 if "model_name" not in default_selection:
                     default_selection.insert(0, "model_name")
-                    print("Default Selection after ensuring 'model_name':", default_selection)  # Debug
-                print("LEADERBOARD_DF dtypes:\n", LEADERBOARD_DF.dtypes)  # Debug: Check column types
                 leaderboard = Leaderboard(
                     value=LEADERBOARD_DF,
                     datatype=[col.type for col in COLUMNS],
@@ -83,7 +185,7 @@ with demo:
                         cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
                         label="Select Columns to Display:",
                     ),
-                    search_columns=[col.name for col in COLUMNS if col.name in ["model_name", "license"]],  # Updated to 'model_name'
                     hide_columns=[col.name for col in COLUMNS if col.hidden],
                     filter_columns=[
                         ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
@@ -93,14 +195,13 @@ with demo:
                         ),
                     ],
                     bool_checkboxgroup_label="Hide models",
-                    interactive=False,
                 )
-                # No need to call leaderboard.render() since it's created within the Gradio context
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
@@ -167,6 +268,13 @@ with demo:
             )
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
-demo.queue(default_concurrency_limit=40).launch(debug=True)

 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
+import os
+import json
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
 from src.submission.submit import add_new_eval
 def restart_space():
+    try:
+        API.restart_space(repo_id=REPO_ID)
+    except Exception as e:
+        print(f"Error restarting space: {e}")
+# Ensure directories exist
+os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
+os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
 ### Space initialization
 try:
+    print(f"Downloading evaluation requests from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
     snapshot_download(
+        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
+        tqdm_class=None, etag_timeout=30, token=TOKEN
     )
+    print("Successfully downloaded evaluation requests")
+except Exception as e:
+    print(f"Error downloading evaluation requests: {e}")
+    # Don't restart immediately, try to continue
 try:
+    print(f"Downloading evaluation results from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
     snapshot_download(
+        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
+        tqdm_class=None, etag_timeout=30, token=TOKEN
     )
+    print("Successfully downloaded evaluation results")
+except Exception as e:
+    print(f"Error downloading evaluation results: {e}")
+    # Don't restart immediately, try to continue
+# Add fallback data in case the remote fetch fails
+fallback_data = False
+if not os.listdir(EVAL_RESULTS_PATH):
+    print("No evaluation results found. Creating sample data for testing.")
+    fallback_data = True
+    # Create a sample result file for testing
+    sample_data = {
+        "config": {
+            "model_name": "Sample Arabic Model",
+            "submitted_time": "2023-01-01",
+            "base_model": "bert-base-arabic",
+            "revision": "main",
+            "precision": "float16",
+            "weight_type": "Original",
+            "model_type": "Encoder",
+            "license": "Apache-2.0",
+            "params": 110000000,
+            "still_on_hub": True
+        },
+        "results": {
+            "average": 75.5,
+            "abstract_algebra": 70.2,
+            "anatomy": 72.5,
+            "astronomy": 80.1,
+            "business_ethics": 68.3,
+            "clinical_knowledge": 75.0,
+            "college_biology": 77.4,
+            "college_chemistry": 74.2
+        }
+    }
+    with open(os.path.join(EVAL_RESULTS_PATH, "sample_result.json"), 'w') as f:
+        json.dump(sample_data, f)
 # Load the leaderboard DataFrame
+try:
+    LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+    print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
+    print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist())
+    print("LEADERBOARD_DF Sample:", LEADERBOARD_DF.head(1).to_dict('records') if not LEADERBOARD_DF.empty else "Empty DataFrame")
+    # If DataFrame is empty even with fallback data, create a minimal sample
+    if LEADERBOARD_DF.empty and fallback_data:
+        print("Creating minimal sample data for leaderboard")
+        LEADERBOARD_DF = pd.DataFrame([{
+            "model_name": "Sample Arabic LLM",
+            "submitted_time": "2023-01-01",
+            "base_model": "bert-base-arabic",
+            "revision": "main",
+            "precision": "float16",
+            "weight_type": "Original",
+            "model_type": "Encoder",
+            "license": "Apache-2.0",
+            "params": 110000000,
+            "still_on_hub": True,
+            "average": 75.5,
+            "abstract_algebra": 70.2,
+            "anatomy": 72.5,
+            "astronomy": 80.1,
+            "business_ethics": 68.3,
+            "clinical_knowledge": 75.0,
+            "college_biology": 77.4,
+            "college_chemistry": 74.2
+        }])
+except Exception as e:
+    print(f"Error loading leaderboard data: {e}")
+    # Create a minimal sample DataFrame
+    LEADERBOARD_DF = pd.DataFrame([{
+        "model_name": "Error Loading Data",
+        "average": 0
+    }])
 # Load the evaluation queue DataFrames
+try:
+    finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+except Exception as e:
+    print(f"Error loading evaluation queue data: {e}")
+    # Create empty DataFrames
+    finished_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
+    running_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
+    pending_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
+with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab", id=0):
             if LEADERBOARD_DF.empty:
                 gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
             else:
+                # Debug information as Markdown
+                gr.Markdown("### Leaderboard Data Debug Info")
+                gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
+                gr.Markdown(f"DataFrame Columns: {LEADERBOARD_DF.columns.tolist()}")
+                # Get the default columns to display
                 default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
+                print("Default Selection before ensuring 'model_name':", default_selection)
                 # Ensure "model_name" is included
                 if "model_name" not in default_selection:
                     default_selection.insert(0, "model_name")
+                    print("Default Selection after ensuring 'model_name':", default_selection)
+                # Make sure all columns exist in the DataFrame
+                for col in default_selection:
+                    if col not in LEADERBOARD_DF.columns:
+                        print(f"Warning: Column '{col}' not found in DataFrame. Adding empty column.")
+                        LEADERBOARD_DF[col] = None
+                print("LEADERBOARD_DF dtypes:\n", LEADERBOARD_DF.dtypes)
+                # Create the leaderboard component
                 leaderboard = Leaderboard(
                     value=LEADERBOARD_DF,
                     datatype=[col.type for col in COLUMNS],
                         cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
                         label="Select Columns to Display:",
                     ),
+                    search_columns=["model_name", "license"],
                     hide_columns=[col.name for col in COLUMNS if col.hidden],
                     filter_columns=[
                         ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
                         ),
                     ],
                     bool_checkboxgroup_label="Hide models",
+                    interactive=True,  # Change to True to enable interaction
                 )
+        with gr.TabItem("📝 About", elem_id="about-tab", id=1):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here!", elem_id="submit-tab", id=2):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
             )
 scheduler = BackgroundScheduler()
+# Run every 30 minutes instead of every 30 seconds (1800 seconds = 30 minutes)
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
+# Launch with a more descriptive message
+demo.queue(default_concurrency_limit=40).launch(
+    debug=True,
+    share=False,
+    show_error=True
+)