paulml's picture
Upload app.py
e3610a1 verified
import gradio as gr
import json
import pandas as pd
import numpy as np
# Function to load and modify SVG for white color
def load_svg_as_white():
try:
with open('racine.svg', 'r', encoding='utf-8') as f:
svg_content = f.read()
# Add CSS to make it white while preserving the SVG structure
white_svg = svg_content.replace('<svg', '''<svg style="filter: brightness(0) invert(1); left: 33%; position: absolute; margin-top: 0px; width: 500px;"''')
return white_svg
except FileNotFoundError:
print("Warning: racine.svg file not found")
return "<!-- SVG file not found -->"
except Exception as e:
print(f"Error loading SVG: {e}")
return "<!-- Error loading SVG -->"
# Load the scores from JSON file
def load_scores():
with open('scores.json', 'r') as f:
return json.load(f)
# Function to create dataframe for a specific language and sector filter
def create_language_df(scores, language, sector_filter='all'):
models = list(scores.keys())
sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
if sector_filter == 'en_only':
selected_sectors = sectors_en
elif sector_filter == 'fr_only':
selected_sectors = sectors_fr
else: # 'all'
selected_sectors = sectors_en + sectors_fr
data = []
for model in models:
row = {'Model': model}
# Add origin information (for styling)
if 'origin' in scores[model]:
row['origin'] = scores[model]['origin']
else:
row['origin'] = 'CN' # Default to Chinese if not specified
# Special handling for AMPERE-1 model
if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
row['coming_soon'] = True
# Fill all sector values with empty strings
for sector in selected_sectors:
row[sector] = ""
row['Average'] = ""
# Add sort value for correct ordering
row['sort_value'] = float('inf') # Place at the top when sorting
else:
row['coming_soon'] = False
# Add sector scores
sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})
# Calculate and add average score
avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
row['Average'] = f"{avg_score:.3f}"
# Add sort value for correct ordering
row['sort_value'] = avg_score
data.append(row)
df = pd.DataFrame(data)
# Sort by the sort_value
df = df.sort_values('sort_value', ascending=False)
# Remove the sort column
df = df.drop('sort_value', axis=1)
# Move Average column to be the second column (right after Model)
cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
# Add hidden columns at the end
if 'origin' in df.columns:
cols.append('origin')
if 'coming_soon' in df.columns:
cols.append('coming_soon')
df = df[cols]
return df
def create_average_language_df(scores):
models = list(scores.keys())
languages = ['en', 'fr', 'es', 'de', 'it']
sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
all_sectors = sectors_en + sectors_fr
data = []
for model in models:
row = {'Model': model}
# Add origin information (for styling)
if 'origin' in scores[model]:
row['origin'] = scores[model]['origin']
else:
row['origin'] = 'CN' # Default to Chinese if not specified
# Special handling for AMPERE-1 model
if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
row['coming_soon'] = True
# Fill all sector values with empty strings
for sector in all_sectors:
row[sector] = ""
row['Average'] = ""
# Add sort value for correct ordering
row['sort_value'] = float('inf') # Place at the top when sorting
else:
row['coming_soon'] = False
# Calculate average for each sector across all languages
for sector in all_sectors:
sector_scores = [scores[model][sector][lang] for lang in languages]
sector_avg = np.mean(sector_scores)
row[sector] = f"{sector_avg:.3f}"
# Calculate overall average across all sectors
sector_values = [float(row[sector]) for sector in all_sectors]
avg_value = np.mean(sector_values) if sector_values else 0
row['Average'] = f"{avg_value:.3f}"
# Add sort value for correct ordering
row['sort_value'] = avg_value
data.append(row)
df = pd.DataFrame(data)
# Sort by the sort_value
df = df.sort_values('sort_value', ascending=False)
# Remove the sort column
df = df.drop('sort_value', axis=1)
# Move Average column to be the second column
cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
# Add hidden columns at the end
if 'origin' in df.columns:
cols.append('origin')
if 'coming_soon' in df.columns:
cols.append('coming_soon')
df = df[cols]
return df
def create_leaderboard():
scores = load_scores()
languages = {
'en': 'English',
'fr': 'French',
'es': 'Spanish',
'de': 'German',
'it': 'Italian'
}
# Load the SVG content
white_svg_logo = load_svg_as_white()
with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard",
theme='argilla/argilla-theme') as demo:
# Header section with white title and logo
# Added an anchor tag around the SVG logo with href to racine.ai
gr.HTML(f"""
<div style="padding: 2em; margin-bottom: 4em; height: 350px; background-color: transparent;">
<div style="display: flex; flex-direction: column; align-items: center; justify-content: center;">
<div style="width: 50px; margin-bottom: 20px; cursor: pointer;">
<a href="https://racine.ai" target="_blank" style="display: block;">
{white_svg_logo}
</a>
</div>
<h1 style="font-size: 3em; font-weight: bold; margin: 0.5em 0; color: white; margin-top: 200px;">
Open VLM Retrieval Leaderboard
</h1>
</div>
</div>
""")
gr.Markdown("""
This leaderboard presents the performance of various visual embedding models across different business sectors
and languages. The evaluation is based on retrieval accuracy for visual search tasks.
## Structure
- **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
- **Models**: Each row shows a different model's performance
- **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
- **Average**: Overall mean performance across all sectors for each model
- **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models
The leaderboard was created in collaboration with the <em>Intelligence Lab</em> of the <em>ECE - Ecole centrale d'électronique</em>.
""")
# Info box with custom styling
gr.Markdown("""
### How to Read the Results
- Select a language tab to see how models perform with queries in that language
- All scores are normalized retrieval accuracy metrics
- Background colors indicate model origins (Blue = EU, Red = Chinese)
""")
# Custom CSS for styling tables
gr.HTML("""
<style>
table.gradio-dataframe tr[data-origin="EU"] {
background-color: rgba(0, 0, 255, 0.2) !important;
}
table.gradio-dataframe tr[data-origin="CN"] {
background-color: rgba(255, 0, 0, 0.2) !important;
}
</style>
""")
# Tabs section
with gr.Tabs() as tabs:
# Add Average Languages tab first
with gr.Tab("Average Across Languages"):
gr.Markdown("""
### Average Performance Across Languages
This table shows the average performance of each model for each sector,
averaged across all query languages.
""")
# Get the dataframe for average across languages
avg_df = create_average_language_df(scores)
# Create HTML for the colored table
html_table = "<table class='gradio-dataframe'><thead><tr>"
# Add headers
for col in avg_df.columns:
if col not in ['origin', 'coming_soon']:
html_table += f"<th>{col}</th>"
html_table += "</tr></thead><tbody>"
# Add rows with appropriate background colors
for _, row in avg_df.iterrows():
origin = row['origin'] if 'origin' in row else 'CN'
coming_soon = row.get('coming_soon', False)
html_table += f"<tr data-origin='{origin}'>"
for col in avg_df.columns:
if col not in ['origin', 'coming_soon']:
if coming_soon and col != 'Model':
if col == 'Average':
# Add "Coming Soon" text in italics
html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
else:
html_table += "<td></td>"
else:
html_table += f"<td>{row[col]}</td>"
html_table += "</tr>"
html_table += "</tbody></table>"
gr.HTML(html_table)
# Add color legend
gr.HTML("""
<div style="margin-top: 20px; margin-bottom: 40px;">
<div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
<div style="display: flex; align-items: center; margin-bottom: 8px;">
<div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
<div>European Union</div>
</div>
<div style="display: flex; align-items: center;">
<div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
<div>China</div>
</div>
</div>
""")
# Individual language tabs
for lang_code, lang_name in languages.items():
with gr.Tab(f"{lang_name} Queries"):
gr.Markdown(f"""
### Performance with {lang_name} Queries
The table below shows how each model performs when the search queries are in {lang_name}.
""")
# Get the dataframe for this language
lang_df = create_language_df(scores, lang_code, 'all')
# Create HTML for the colored table
html_table = "<table class='gradio-dataframe'><thead><tr>"
# Add headers
for col in lang_df.columns:
if col not in ['origin', 'coming_soon']:
html_table += f"<th>{col}</th>"
html_table += "</tr></thead><tbody>"
# Add rows with appropriate background colors
for _, row in lang_df.iterrows():
origin = row['origin'] if 'origin' in row else 'CN'
coming_soon = row.get('coming_soon', False)
html_table += f"<tr data-origin='{origin}'>"
for col in lang_df.columns:
if col not in ['origin', 'coming_soon']:
if coming_soon and col != 'Model':
if col == 'Average':
# Add "Coming Soon" text in italics
html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
else:
html_table += "<td></td>"
else:
html_table += f"<td>{row[col]}</td>"
html_table += "</tr>"
html_table += "</tbody></table>"
gr.HTML(html_table)
# Add color legend
gr.HTML("""
<div style="margin-top: 20px; margin-bottom: 40px;">
<div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
<div style="display: flex; align-items: center; margin-bottom: 8px;">
<div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
<div>European Union</div>
</div>
<div style="display: flex; align-items: center;">
<div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
<div>China</div>
</div>
</div>
""")
# Footer section
gr.Markdown("""
---
### Additional Information
- Scores are updated regularly as new models are evaluated
- All evaluations use the same test set for fair comparison
- Models are evaluated on both English and French datasets to assess cross-lingual capabilities
- Color coding indicates model origin (Blue = EU, Red = Chinese)
### Citation
If you use these benchmarks in your research, please cite:
```
@article{visual_embeddings_benchmark_2025,
title={Cross-lingual Visual Embeddings Benchmark},
author={racine.ai},
year={2025}
}
```
""")
return demo
# Create and launch the interface
if __name__ == "__main__":
demo = create_leaderboard()
demo.launch()