Spaces:

racineai
/

Open-VLM-Retrieval-Leaderboard

Running

App Files Files Community

Open-VLM-Retrieval-Leaderboard / app.py

paulml

Upload app.py

e3610a1 verified 4 months ago

raw

history blame contribute delete

16 kB

	import gradio as gr
	import json
	import pandas as pd
	import numpy as np

	# Function to load and modify SVG for white color
	def load_svg_as_white():
	try:
	with open('racine.svg', 'r', encoding='utf-8') as f:
	svg_content = f.read()
	# Add CSS to make it white while preserving the SVG structure
	white_svg = svg_content.replace('<svg', '''<svg style="filter: brightness(0) invert(1); left: 33%; position: absolute; margin-top: 0px; width: 500px;"''')
	return white_svg
	except FileNotFoundError:
	print("Warning: racine.svg file not found")
	return "<!-- SVG file not found -->"
	except Exception as e:
	print(f"Error loading SVG: {e}")
	return "<!-- Error loading SVG -->"

	# Load the scores from JSON file
	def load_scores():
	with open('scores.json', 'r') as f:
	return json.load(f)

	# Function to create dataframe for a specific language and sector filter
	def create_language_df(scores, language, sector_filter='all'):
	models = list(scores.keys())
	sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
	sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']

	if sector_filter == 'en_only':
	selected_sectors = sectors_en
	elif sector_filter == 'fr_only':
	selected_sectors = sectors_fr
	else: # 'all'
	selected_sectors = sectors_en + sectors_fr

	data = []
	for model in models:
	row = {'Model': model}

	# Add origin information (for styling)
	if 'origin' in scores[model]:
	row['origin'] = scores[model]['origin']
	else:
	row['origin'] = 'CN' # Default to Chinese if not specified

	# Special handling for AMPERE-1 model
	if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
	row['coming_soon'] = True
	# Fill all sector values with empty strings
	for sector in selected_sectors:
	row[sector] = ""
	row['Average'] = ""
	# Add sort value for correct ordering
	row['sort_value'] = float('inf') # Place at the top when sorting
	else:
	row['coming_soon'] = False
	# Add sector scores
	sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
	row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})

	# Calculate and add average score
	avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
	row['Average'] = f"{avg_score:.3f}"
	# Add sort value for correct ordering
	row['sort_value'] = avg_score

	data.append(row)

	df = pd.DataFrame(data)

	# Sort by the sort_value
	df = df.sort_values('sort_value', ascending=False)

	# Remove the sort column
	df = df.drop('sort_value', axis=1)

	# Move Average column to be the second column (right after Model)
	cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
	# Add hidden columns at the end
	if 'origin' in df.columns:
	cols.append('origin')
	if 'coming_soon' in df.columns:
	cols.append('coming_soon')

	df = df[cols]

	return df

	def create_average_language_df(scores):
	models = list(scores.keys())
	languages = ['en', 'fr', 'es', 'de', 'it']
	sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
	sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
	all_sectors = sectors_en + sectors_fr

	data = []
	for model in models:
	row = {'Model': model}

	# Add origin information (for styling)
	if 'origin' in scores[model]:
	row['origin'] = scores[model]['origin']
	else:
	row['origin'] = 'CN' # Default to Chinese if not specified

	# Special handling for AMPERE-1 model
	if "AMPERE-1" in model and "AMPERE-1.1" not in model: # Exclude AMPERE-1.1
	row['coming_soon'] = True
	# Fill all sector values with empty strings
	for sector in all_sectors:
	row[sector] = ""
	row['Average'] = ""
	# Add sort value for correct ordering
	row['sort_value'] = float('inf') # Place at the top when sorting
	else:
	row['coming_soon'] = False
	# Calculate average for each sector across all languages
	for sector in all_sectors:
	sector_scores = [scores[model][sector][lang] for lang in languages]
	sector_avg = np.mean(sector_scores)
	row[sector] = f"{sector_avg:.3f}"

	# Calculate overall average across all sectors
	sector_values = [float(row[sector]) for sector in all_sectors]
	avg_value = np.mean(sector_values) if sector_values else 0
	row['Average'] = f"{avg_value:.3f}"
	# Add sort value for correct ordering
	row['sort_value'] = avg_value

	data.append(row)

	df = pd.DataFrame(data)

	# Sort by the sort_value
	df = df.sort_values('sort_value', ascending=False)

	# Remove the sort column
	df = df.drop('sort_value', axis=1)

	# Move Average column to be the second column
	cols = ['Model', 'Average'] + [col for col in df.columns if col not in ['Model', 'Average', 'origin', 'coming_soon']]
	# Add hidden columns at the end
	if 'origin' in df.columns:
	cols.append('origin')
	if 'coming_soon' in df.columns:
	cols.append('coming_soon')

	df = df[cols]

	return df

	def create_leaderboard():
	scores = load_scores()
	languages = {
	'en': 'English',
	'fr': 'French',
	'es': 'Spanish',
	'de': 'German',
	'it': 'Italian'
	}

	# Load the SVG content
	white_svg_logo = load_svg_as_white()

	with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard",
	theme='argilla/argilla-theme') as demo:

	# Header section with white title and logo
	# Added an anchor tag around the SVG logo with href to racine.ai
	gr.HTML(f"""
	<div style="padding: 2em; margin-bottom: 4em; height: 350px; background-color: transparent;">
	<div style="display: flex; flex-direction: column; align-items: center; justify-content: center;">
	<div style="width: 50px; margin-bottom: 20px; cursor: pointer;">
	<a href="https://racine.ai" target="_blank" style="display: block;">
	{white_svg_logo}
	</a>
	</div>
	<h1 style="font-size: 3em; font-weight: bold; margin: 0.5em 0; color: white; margin-top: 200px;">
	Open VLM Retrieval Leaderboard
	</h1>
	</div>
	</div>
	""")

	gr.Markdown("""
	This leaderboard presents the performance of various visual embedding models across different business sectors
	and languages. The evaluation is based on retrieval accuracy for visual search tasks.

	## Structure
	- Sectors: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
	- Models: Each row shows a different model's performance
	- Scores: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
	- Average: Overall mean performance across all sectors for each model
	- Colors: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models

	The leaderboard was created in collaboration with the <em>Intelligence Lab</em> of the <em>ECE - Ecole centrale d'électronique</em>.
	""")

	# Info box with custom styling
	gr.Markdown("""
	### How to Read the Results
	- Select a language tab to see how models perform with queries in that language
	- All scores are normalized retrieval accuracy metrics
	- Background colors indicate model origins (Blue = EU, Red = Chinese)
	""")

	# Custom CSS for styling tables
	gr.HTML("""
	<style>
	table.gradio-dataframe tr[data-origin="EU"] {
	background-color: rgba(0, 0, 255, 0.2) !important;
	}
	table.gradio-dataframe tr[data-origin="CN"] {
	background-color: rgba(255, 0, 0, 0.2) !important;
	}
	</style>
	""")

	# Tabs section
	with gr.Tabs() as tabs:
	# Add Average Languages tab first
	with gr.Tab("Average Across Languages"):
	gr.Markdown("""
	### Average Performance Across Languages
	This table shows the average performance of each model for each sector,
	averaged across all query languages.
	""")

	# Get the dataframe for average across languages
	avg_df = create_average_language_df(scores)

	# Create HTML for the colored table
	html_table = "<table class='gradio-dataframe'><thead><tr>"

	# Add headers
	for col in avg_df.columns:
	if col not in ['origin', 'coming_soon']:
	html_table += f"<th>{col}</th>"

	html_table += "</tr></thead><tbody>"

	# Add rows with appropriate background colors
	for _, row in avg_df.iterrows():
	origin = row['origin'] if 'origin' in row else 'CN'
	coming_soon = row.get('coming_soon', False)
	html_table += f"<tr data-origin='{origin}'>"

	for col in avg_df.columns:
	if col not in ['origin', 'coming_soon']:
	if coming_soon and col != 'Model':
	if col == 'Average':
	# Add "Coming Soon" text in italics
	html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
	else:
	html_table += "<td></td>"
	else:
	html_table += f"<td>{row[col]}</td>"

	html_table += "</tr>"

	html_table += "</tbody></table>"

	gr.HTML(html_table)

	# Add color legend
	gr.HTML("""
	<div style="margin-top: 20px; margin-bottom: 40px;">
	<div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
	<div style="display: flex; align-items: center; margin-bottom: 8px;">
	<div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
	<div>European Union</div>
	</div>
	<div style="display: flex; align-items: center;">
	<div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
	<div>China</div>
	</div>
	</div>
	""")

	# Individual language tabs
	for lang_code, lang_name in languages.items():
	with gr.Tab(f"{lang_name} Queries"):
	gr.Markdown(f"""
	### Performance with {lang_name} Queries
	The table below shows how each model performs when the search queries are in {lang_name}.
	""")

	# Get the dataframe for this language
	lang_df = create_language_df(scores, lang_code, 'all')

	# Create HTML for the colored table
	html_table = "<table class='gradio-dataframe'><thead><tr>"

	# Add headers
	for col in lang_df.columns:
	if col not in ['origin', 'coming_soon']:
	html_table += f"<th>{col}</th>"

	html_table += "</tr></thead><tbody>"

	# Add rows with appropriate background colors
	for _, row in lang_df.iterrows():
	origin = row['origin'] if 'origin' in row else 'CN'
	coming_soon = row.get('coming_soon', False)
	html_table += f"<tr data-origin='{origin}'>"

	for col in lang_df.columns:
	if col not in ['origin', 'coming_soon']:
	if coming_soon and col != 'Model':
	if col == 'Average':
	# Add "Coming Soon" text in italics
	html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
	else:
	html_table += "<td></td>"
	else:
	html_table += f"<td>{row[col]}</td>"

	html_table += "</tr>"

	html_table += "</tbody></table>"

	gr.HTML(html_table)

	# Add color legend
	gr.HTML("""
	<div style="margin-top: 20px; margin-bottom: 40px;">
	<div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
	<div style="display: flex; align-items: center; margin-bottom: 8px;">
	<div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
	<div>European Union</div>
	</div>
	<div style="display: flex; align-items: center;">
	<div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
	<div>China</div>
	</div>
	</div>
	""")

	# Footer section
	gr.Markdown("""
	---
	### Additional Information
	- Scores are updated regularly as new models are evaluated
	- All evaluations use the same test set for fair comparison
	- Models are evaluated on both English and French datasets to assess cross-lingual capabilities
	- Color coding indicates model origin (Blue = EU, Red = Chinese)

	### Citation
	If you use these benchmarks in your research, please cite:
	```
	@article{visual_embeddings_benchmark_2025,
	title={Cross-lingual Visual Embeddings Benchmark},
	author={racine.ai},
	year={2025}
	}
	```
	""")

	return demo

	# Create and launch the interface
	if __name__ == "__main__":
	demo = create_leaderboard()
	demo.launch()