|
import gradio as gr |
|
import json |
|
import pandas as pd |
|
import numpy as np |
|
|
|
|
|
def load_svg_as_white(): |
|
try: |
|
with open('racine.svg', 'r', encoding='utf-8') as f: |
|
svg_content = f.read() |
|
|
|
white_svg = svg_content.replace('<svg', '''<svg style="filter: brightness(0) invert(1); left: 33%; position: absolute; margin-top: 0px; width: 500px;"''') |
|
return white_svg |
|
except FileNotFoundError: |
|
print("Warning: racine.svg file not found") |
|
return "<!-- SVG file not found -->" |
|
except Exception as e: |
|
print(f"Error loading SVG: {e}") |
|
return "<!-- Error loading SVG -->" |
|
|
|
|
|
def load_scores(): |
|
with open('scores.json', 'r') as f: |
|
return json.load(f) |
|
|
|
|
|
def create_language_df(scores, language, sector_filter='all'): |
|
models = list(scores.keys()) |
|
sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin'] |
|
sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin'] |
|
|
|
if sector_filter == 'en_only': |
|
selected_sectors = sectors_en |
|
elif sector_filter == 'fr_only': |
|
selected_sectors = sectors_fr |
|
else: |
|
selected_sectors = sectors_en + sectors_fr |
|
|
|
data = [] |
|
for model in models: |
|
row = {'Model': model} |
|
|
|
|
|
if 'origin' in scores[model]: |
|
row['origin'] = scores[model]['origin'] |
|
else: |
|
row['origin'] = 'CN' |
|
|
|
|
|
if "AMPERE-1" in model and "AMPERE-1.1" not in model: |
|
row['coming_soon'] = True |
|
|
|
for sector in selected_sectors: |
|
row[sector] = "" |
|
row['Average'] = "" |
|
|
|
row['sort_value'] = float('inf') |
|
else: |
|
row['coming_soon'] = False |
|
|
|
sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors} |
|
row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()}) |
|
|
|
|
|
avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores) |
|
row['Average'] = f"{avg_score:.3f}" |
|
|
|
row['sort_value'] = avg_score |
|
|
|
data.append(row) |
|
|
|
df = pd.DataFrame(data) |
|
|
|
|
|
df = df.sort_values('sort_value', ascending=False) |
|
|
|
|
|
df = df.drop('sort_value', axis=1) |
|
|
|
|
|
cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']] |
|
|
|
if 'origin' in df.columns: |
|
cols.append('origin') |
|
if 'coming_soon' in df.columns: |
|
cols.append('coming_soon') |
|
|
|
df = df[cols] |
|
|
|
return df |
|
|
|
def create_average_language_df(scores): |
|
models = list(scores.keys()) |
|
languages = ['en', 'fr', 'es', 'de', 'it'] |
|
sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin'] |
|
sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin'] |
|
all_sectors = sectors_en + sectors_fr |
|
|
|
data = [] |
|
for model in models: |
|
row = {'Model': model} |
|
|
|
|
|
if 'origin' in scores[model]: |
|
row['origin'] = scores[model]['origin'] |
|
else: |
|
row['origin'] = 'CN' |
|
|
|
|
|
if "AMPERE-1" in model and "AMPERE-1.1" not in model: |
|
row['coming_soon'] = True |
|
|
|
for sector in all_sectors: |
|
row[sector] = "" |
|
row['Average'] = "" |
|
|
|
row['sort_value'] = float('inf') |
|
else: |
|
row['coming_soon'] = False |
|
|
|
for sector in all_sectors: |
|
sector_scores = [scores[model][sector][lang] for lang in languages] |
|
sector_avg = np.mean(sector_scores) |
|
row[sector] = f"{sector_avg:.3f}" |
|
|
|
|
|
sector_values = [float(row[sector]) for sector in all_sectors] |
|
avg_value = np.mean(sector_values) if sector_values else 0 |
|
row['Average'] = f"{avg_value:.3f}" |
|
|
|
row['sort_value'] = avg_value |
|
|
|
data.append(row) |
|
|
|
df = pd.DataFrame(data) |
|
|
|
|
|
df = df.sort_values('sort_value', ascending=False) |
|
|
|
|
|
df = df.drop('sort_value', axis=1) |
|
|
|
|
|
cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']] |
|
|
|
if 'origin' in df.columns: |
|
cols.append('origin') |
|
if 'coming_soon' in df.columns: |
|
cols.append('coming_soon') |
|
|
|
df = df[cols] |
|
|
|
return df |
|
|
|
def create_leaderboard(): |
|
scores = load_scores() |
|
languages = { |
|
'en': 'English', |
|
'fr': 'French', |
|
'es': 'Spanish', |
|
'de': 'German', |
|
'it': 'Italian' |
|
} |
|
|
|
|
|
white_svg_logo = load_svg_as_white() |
|
|
|
with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard", |
|
theme='argilla/argilla-theme') as demo: |
|
|
|
|
|
|
|
gr.HTML(f""" |
|
<div style="padding: 2em; margin-bottom: 4em; height: 350px; background-color: transparent;"> |
|
<div style="display: flex; flex-direction: column; align-items: center; justify-content: center;"> |
|
<div style="width: 50px; margin-bottom: 20px; cursor: pointer;"> |
|
<a href="https://racine.ai" target="_blank" style="display: block;"> |
|
{white_svg_logo} |
|
</a> |
|
</div> |
|
<h1 style="font-size: 3em; font-weight: bold; margin: 0.5em 0; color: white; margin-top: 200px;"> |
|
Open VLM Retrieval Leaderboard |
|
</h1> |
|
</div> |
|
</div> |
|
""") |
|
|
|
gr.Markdown(""" |
|
This leaderboard presents the performance of various visual embedding models across different business sectors |
|
and languages. The evaluation is based on retrieval accuracy for visual search tasks. |
|
|
|
## Structure |
|
- **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR) |
|
- **Models**: Each row shows a different model's performance |
|
- **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval) |
|
- **Average**: Overall mean performance across all sectors for each model |
|
- **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models |
|
|
|
The leaderboard was created in collaboration with the <em>Intelligence Lab</em> of the <em>ECE - Ecole centrale d'électronique</em>. |
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
### How to Read the Results |
|
- Select a language tab to see how models perform with queries in that language |
|
- All scores are normalized retrieval accuracy metrics |
|
- Background colors indicate model origins (Blue = EU, Red = Chinese) |
|
""") |
|
|
|
|
|
gr.HTML(""" |
|
<style> |
|
table.gradio-dataframe tr[data-origin="EU"] { |
|
background-color: rgba(0, 0, 255, 0.2) !important; |
|
} |
|
table.gradio-dataframe tr[data-origin="CN"] { |
|
background-color: rgba(255, 0, 0, 0.2) !important; |
|
} |
|
</style> |
|
""") |
|
|
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.Tab("Average Across Languages"): |
|
gr.Markdown(""" |
|
### Average Performance Across Languages |
|
This table shows the average performance of each model for each sector, |
|
averaged across all query languages. |
|
""") |
|
|
|
|
|
avg_df = create_average_language_df(scores) |
|
|
|
|
|
html_table = "<table class='gradio-dataframe'><thead><tr>" |
|
|
|
|
|
for col in avg_df.columns: |
|
if col not in ['origin', 'coming_soon']: |
|
html_table += f"<th>{col}</th>" |
|
|
|
html_table += "</tr></thead><tbody>" |
|
|
|
|
|
for _, row in avg_df.iterrows(): |
|
origin = row['origin'] if 'origin' in row else 'CN' |
|
coming_soon = row.get('coming_soon', False) |
|
html_table += f"<tr data-origin='{origin}'>" |
|
|
|
for col in avg_df.columns: |
|
if col not in ['origin', 'coming_soon']: |
|
if coming_soon and col != 'Model': |
|
if col == 'Average': |
|
|
|
html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>" |
|
else: |
|
html_table += "<td></td>" |
|
else: |
|
html_table += f"<td>{row[col]}</td>" |
|
|
|
html_table += "</tr>" |
|
|
|
html_table += "</tbody></table>" |
|
|
|
gr.HTML(html_table) |
|
|
|
|
|
gr.HTML(""" |
|
<div style="margin-top: 20px; margin-bottom: 40px;"> |
|
<div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div> |
|
<div style="display: flex; align-items: center; margin-bottom: 8px;"> |
|
<div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div> |
|
<div>European Union</div> |
|
</div> |
|
<div style="display: flex; align-items: center;"> |
|
<div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div> |
|
<div>China</div> |
|
</div> |
|
</div> |
|
""") |
|
|
|
|
|
for lang_code, lang_name in languages.items(): |
|
with gr.Tab(f"{lang_name} Queries"): |
|
gr.Markdown(f""" |
|
### Performance with {lang_name} Queries |
|
The table below shows how each model performs when the search queries are in {lang_name}. |
|
""") |
|
|
|
|
|
lang_df = create_language_df(scores, lang_code, 'all') |
|
|
|
|
|
html_table = "<table class='gradio-dataframe'><thead><tr>" |
|
|
|
|
|
for col in lang_df.columns: |
|
if col not in ['origin', 'coming_soon']: |
|
html_table += f"<th>{col}</th>" |
|
|
|
html_table += "</tr></thead><tbody>" |
|
|
|
|
|
for _, row in lang_df.iterrows(): |
|
origin = row['origin'] if 'origin' in row else 'CN' |
|
coming_soon = row.get('coming_soon', False) |
|
html_table += f"<tr data-origin='{origin}'>" |
|
|
|
for col in lang_df.columns: |
|
if col not in ['origin', 'coming_soon']: |
|
if coming_soon and col != 'Model': |
|
if col == 'Average': |
|
|
|
html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>" |
|
else: |
|
html_table += "<td></td>" |
|
else: |
|
html_table += f"<td>{row[col]}</td>" |
|
|
|
html_table += "</tr>" |
|
|
|
html_table += "</tbody></table>" |
|
|
|
gr.HTML(html_table) |
|
|
|
|
|
gr.HTML(""" |
|
<div style="margin-top: 20px; margin-bottom: 40px;"> |
|
<div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div> |
|
<div style="display: flex; align-items: center; margin-bottom: 8px;"> |
|
<div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div> |
|
<div>European Union</div> |
|
</div> |
|
<div style="display: flex; align-items: center;"> |
|
<div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div> |
|
<div>China</div> |
|
</div> |
|
</div> |
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
--- |
|
### Additional Information |
|
- Scores are updated regularly as new models are evaluated |
|
- All evaluations use the same test set for fair comparison |
|
- Models are evaluated on both English and French datasets to assess cross-lingual capabilities |
|
- Color coding indicates model origin (Blue = EU, Red = Chinese) |
|
|
|
### Citation |
|
If you use these benchmarks in your research, please cite: |
|
``` |
|
@article{visual_embeddings_benchmark_2025, |
|
title={Cross-lingual Visual Embeddings Benchmark}, |
|
author={racine.ai}, |
|
year={2025} |
|
} |
|
``` |
|
""") |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_leaderboard() |
|
demo.launch() |