from pathlib import Path from utils import load_json_results from leaderboard_tab import search_leaderboard, update_columns_to_show, create_leaderboard_tab # Constants RERANKER_ABOUT_SECTION = """ ## About Reranking Evaluation The reranking evaluation assesses a model's ability to improve search quality by reordering initially retrieved results. Models are evaluated across multiple unseen Arabic datasets to ensure robust performance. ### Evaluation Metrics - **MRR@10 (Mean Reciprocal Rank at 10)**: Measures the ranking quality focusing on the first relevant result in top-10 - **NDCG@10 (Normalized DCG at 10)**: Evaluates the ranking quality of all relevant results in top-10 - **MAP (Mean Average Precision)**: Measures the overall precision across all relevant documents All metrics are averaged across multiple evaluation datasets to provide a comprehensive assessment of model performance. ### Model Requirements - Must accept query-document pairs as input - Should output relevance scores for reranking (has cross-attention or similar mechanism for query-document matching) - Support for Arabic text processing ### Evaluation Process 1. Models are tested on multiple unseen Arabic datasets 2. For each dataset: - Initial candidate documents are provided - Model reranks the candidates - MRR@10, NDCG@10, and MAP are calculated 3. Final scores are averaged across all datasets 4. Models are ranked based on overall performance ### How to Prepare Your Model - Model should be public on HuggingFace Hub (private models are not supported yet) - Make sure it works coherently with `sentence-transformers` library """ # Global variables reranking_df = None def load_reranking_results(prepare_for_display=False, sort_col=None, drop_cols=None): dataframe_path = Path(__file__).parent / "results" / "reranking_results.json" return load_json_results( dataframe_path, prepare_for_display=prepare_for_display, sort_col=sort_col, drop_cols=drop_cols ) def load_reranking_leaderboard(): """Load and prepare the reranking leaderboard data""" global reranking_df # Prepare reranking dataframe reranking_df = load_reranking_results(True, sort_col="Average Score", drop_cols=["Revision", "Precision", "Task"]) reranking_df.insert(0, "Rank", range(1, 1 + len(reranking_df))) return reranking_df def reranking_search_leaderboard(model_name, columns_to_show): """Search function for reranking leaderboard""" return search_leaderboard(reranking_df, model_name, columns_to_show) def update_reranker_columns_to_show(columns_to_show): """Update displayed columns for reranking leaderboard""" return update_columns_to_show(reranking_df, columns_to_show) def create_reranking_tab(): """Create the complete reranking leaderboard tab""" global reranking_df # Load data if not already loaded if (reranking_df is None): reranking_df = load_reranking_leaderboard() # Define default columns to show default_columns = ["Rank", "Model", "Average Score", "Model Size (MB)", "Context Length", "Embedding Dimension", "Namaa Global Knowledge", "Navid General Knowledge"] # Create and return the tab return create_leaderboard_tab( df=reranking_df, initial_columns_to_show=default_columns, search_function=reranking_search_leaderboard, update_function=update_reranker_columns_to_show, about_section=RERANKER_ABOUT_SECTION, task_type="Reranker" )