import gradio as gr import json import pandas as pd import numpy as np # Function to load and modify SVG for white color def load_svg_as_white(): try: with open('racine.svg', 'r', encoding='utf-8') as f: svg_content = f.read() # Add CSS to make it white while preserving the SVG structure white_svg = svg_content.replace('" except Exception as e: print(f"Error loading SVG: {e}") return "" # Load the scores from JSON file def load_scores(): with open('scores.json', 'r') as f: return json.load(f) # Function to create dataframe for a specific language and sector filter def create_language_df(scores, language, sector_filter='all'): models = list(scores.keys()) sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin'] sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin'] if sector_filter == 'en_only': selected_sectors = sectors_en elif sector_filter == 'fr_only': selected_sectors = sectors_fr else: # 'all' selected_sectors = sectors_en + sectors_fr data = [] for model in models: row = {'Model': model} # Add origin information (for styling) if 'origin' in scores[model]: row['origin'] = scores[model]['origin'] else: row['origin'] = 'CN' # Default to Chinese if not specified # Special handling for AMPERE-1 model if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1 row['coming_soon'] = True # Fill all sector values with empty strings for sector in selected_sectors: row[sector] = "" row['Average'] = "" # Add sort value for correct ordering row['sort_value'] = float('inf') # Place at the top when sorting else: row['coming_soon'] = False # Add sector scores sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors} row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()}) # Calculate and add average score avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores) row['Average'] = f"{avg_score:.3f}" # Add sort value for correct ordering row['sort_value'] = avg_score data.append(row) df = pd.DataFrame(data) # Sort by the sort_value df = df.sort_values('sort_value', ascending=False) # Remove the sort column df = df.drop('sort_value', axis=1) # Move Average column to be the second column (right after Model) cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']] # Add hidden columns at the end if 'origin' in df.columns: cols.append('origin') if 'coming_soon' in df.columns: cols.append('coming_soon') df = df[cols] return df def create_average_language_df(scores): models = list(scores.keys()) languages = ['en', 'fr', 'es', 'de', 'it'] sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin'] sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin'] all_sectors = sectors_en + sectors_fr data = [] for model in models: row = {'Model': model} # Add origin information (for styling) if 'origin' in scores[model]: row['origin'] = scores[model]['origin'] else: row['origin'] = 'CN' # Default to Chinese if not specified # Special handling for AMPERE-1 model if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1 row['coming_soon'] = True # Fill all sector values with empty strings for sector in all_sectors: row[sector] = "" row['Average'] = "" # Add sort value for correct ordering row['sort_value'] = float('inf') # Place at the top when sorting else: row['coming_soon'] = False # Calculate average for each sector across all languages for sector in all_sectors: sector_scores = [scores[model][sector][lang] for lang in languages] sector_avg = np.mean(sector_scores) row[sector] = f"{sector_avg:.3f}" # Calculate overall average across all sectors sector_values = [float(row[sector]) for sector in all_sectors] avg_value = np.mean(sector_values) if sector_values else 0 row['Average'] = f"{avg_value:.3f}" # Add sort value for correct ordering row['sort_value'] = avg_value data.append(row) df = pd.DataFrame(data) # Sort by the sort_value df = df.sort_values('sort_value', ascending=False) # Remove the sort column df = df.drop('sort_value', axis=1) # Move Average column to be the second column cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']] # Add hidden columns at the end if 'origin' in df.columns: cols.append('origin') if 'coming_soon' in df.columns: cols.append('coming_soon') df = df[cols] return df def create_leaderboard(): scores = load_scores() languages = { 'en': 'English', 'fr': 'French', 'es': 'Spanish', 'de': 'German', 'it': 'Italian' } # Load the SVG content white_svg_logo = load_svg_as_white() with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard", theme='argilla/argilla-theme') as demo: # Header section with white title and logo # Added an anchor tag around the SVG logo with href to racine.ai gr.HTML(f"""
{white_svg_logo}

Open VLM Retrieval Leaderboard

""") gr.Markdown(""" This leaderboard presents the performance of various visual embedding models across different business sectors and languages. The evaluation is based on retrieval accuracy for visual search tasks. ## Structure - **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR) - **Models**: Each row shows a different model's performance - **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval) - **Average**: Overall mean performance across all sectors for each model - **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models The leaderboard was created in collaboration with the Intelligence Lab of the ECE - Ecole centrale d'électronique. """) # Info box with custom styling gr.Markdown(""" ### How to Read the Results - Select a language tab to see how models perform with queries in that language - All scores are normalized retrieval accuracy metrics - Background colors indicate model origins (Blue = EU, Red = Chinese) """) # Custom CSS for styling tables gr.HTML(""" """) # Tabs section with gr.Tabs() as tabs: # Add Average Languages tab first with gr.Tab("Average Across Languages"): gr.Markdown(""" ### Average Performance Across Languages This table shows the average performance of each model for each sector, averaged across all query languages. """) # Get the dataframe for average across languages avg_df = create_average_language_df(scores) # Create HTML for the colored table html_table = "" # Add headers for col in avg_df.columns: if col not in ['origin', 'coming_soon']: html_table += f"" html_table += "" # Add rows with appropriate background colors for _, row in avg_df.iterrows(): origin = row['origin'] if 'origin' in row else 'CN' coming_soon = row.get('coming_soon', False) html_table += f"" for col in avg_df.columns: if col not in ['origin', 'coming_soon']: if coming_soon and col != 'Model': if col == 'Average': # Add "Coming Soon" text in italics html_table += "" else: html_table += "" else: html_table += f"" html_table += "" html_table += "
{col}
Coming Soon{row[col]}
" gr.HTML(html_table) # Add color legend gr.HTML("""
Model Origin:
European Union
China
""") # Individual language tabs for lang_code, lang_name in languages.items(): with gr.Tab(f"{lang_name} Queries"): gr.Markdown(f""" ### Performance with {lang_name} Queries The table below shows how each model performs when the search queries are in {lang_name}. """) # Get the dataframe for this language lang_df = create_language_df(scores, lang_code, 'all') # Create HTML for the colored table html_table = "" # Add headers for col in lang_df.columns: if col not in ['origin', 'coming_soon']: html_table += f"" html_table += "" # Add rows with appropriate background colors for _, row in lang_df.iterrows(): origin = row['origin'] if 'origin' in row else 'CN' coming_soon = row.get('coming_soon', False) html_table += f"" for col in lang_df.columns: if col not in ['origin', 'coming_soon']: if coming_soon and col != 'Model': if col == 'Average': # Add "Coming Soon" text in italics html_table += "" else: html_table += "" else: html_table += f"" html_table += "" html_table += "
{col}
Coming Soon{row[col]}
" gr.HTML(html_table) # Add color legend gr.HTML("""
Model Origin:
European Union
China
""") # Footer section gr.Markdown(""" --- ### Additional Information - Scores are updated regularly as new models are evaluated - All evaluations use the same test set for fair comparison - Models are evaluated on both English and French datasets to assess cross-lingual capabilities - Color coding indicates model origin (Blue = EU, Red = Chinese) ### Citation If you use these benchmarks in your research, please cite: ``` @article{visual_embeddings_benchmark_2025, title={Cross-lingual Visual Embeddings Benchmark}, author={racine.ai}, year={2025} } ``` """) return demo # Create and launch the interface if __name__ == "__main__": demo = create_leaderboard() demo.launch()