Spaces:

Navid-AI
/

The-Arabic-Rag-Leaderboard

Running on CPU Upgrade

App Files Files Community

The-Arabic-Rag-Leaderboard / utils.py

MohamedRashad

Add fuzzywuzzy dependency and update model submission functions in utils.py

3a93505 5 months ago

raw

history blame

10.2 kB

	import gradio as gr
	import pandas as pd
	import json
	import os
	from pathlib import Path
	from huggingface_hub import HfApi, hf_hub_download

	api = HfApi()

	OWNER = "Navid-AI"
	DATASET_REPO_ID = f"{OWNER}/requests-dataset"

	def load_retrieval_results():
	base_path = Path(__file__).parent
	results_dir = base_path / "assets" / "results"

	retrieval_dataframe_path = results_dir / "retrieval_results.json"
	if not retrieval_dataframe_path.exists():
	df = pd.DataFrame(columns=["Model", "Model Size", "Embedding Dimension", "Max Tokens", "Web Search Dataset (MRR)", "Web Search Dataset (nDCG@k=None)"])
	else:
	df = pd.read_json(retrieval_dataframe_path)
	return df

	def get_model_info(model_id, verbose=False):
	model_info = api.model_info(model_id)
	num_downloads = model_info.downloads
	num_likes = model_info.likes
	license = model_info.card_data["license"]
	num_parameters = round(model_info.safetensors.total / 1e6)
	supported_precisions = list(model_info.safetensors.parameters.keys())
	if verbose:
	print(f"Model '{model_id}' has {num_downloads} downloads, {num_likes} likes, and is licensed under {license}.")
	print(f"The model has approximately {num_parameters:.2f} billion parameters.")
	print(f"The model supports the following precisions: {supported_precisions}")
	return num_downloads, num_likes, license, num_parameters, supported_precisions

	def fetch_model_information(model_name):
	try:
	_, _, license, num_parameters, supported_precisions = get_model_info(model_name)
	if len(supported_precisions) == 0:
	supported_precisions = [None]
	except Exception as e:
	gr.Error(f"Error: Could not fetch model information. {str(e)}")
	return
	return gr.update(choices=supported_precisions, value=supported_precisions[0]), num_parameters, license

	def submit_model(model_name, revision, precision, params, license):
	# Load existing evaluations
	df_retrieval = load_retrieval_results()
	if df_retrieval.empty:
	return "Error: Could not load the retrieval results."

	existing_models_results = df_retrieval[['Model']]

	# Handle 'Missing' precision
	if precision == 'Missing':
	precision = None
	else:
	precision = precision.strip().lower()

	# Load pending and finished requests from the dataset repository
	df_pending = load_requests('pending')
	df_finished = load_requests('finished')

	# Check if model is already evaluated
	model_exists_in_results = ((existing_models_results['Model Name'] == model_name) &
	(existing_models_results['Revision'] == revision) &
	(existing_models_results['Precision'] == precision)).any()
	if model_exists_in_results:
	return f"Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated."

	# Check if model is in pending requests
	if not df_pending.empty:
	existing_models_pending = df_pending[['model_name', 'revision', 'precision']]
	model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) &
	(existing_models_pending['revision'] == revision) &
	(existing_models_pending['precision'] == precision)).any()
	if model_exists_in_pending:
	return f"Model '{model_name}' with revision '{revision}' and precision '{precision}' is already in the pending evaluations."

	# Check if model is in finished requests
	if not df_finished.empty:
	existing_models_finished = df_finished[['model_name', 'revision', 'precision']]
	model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
	(existing_models_finished['revision'] == revision) &
	(existing_models_finished['precision'] == precision)).any()
	if model_exists_in_finished:
	return f"Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated."

	# Check if model exists on HuggingFace Hub
	try:
	api.model_info(model_name)
	except Exception as e:
	return f"Error: Could not find model '{model_name}' on HuggingFace Hub. Please ensure the model name is correct and the model is public."

	# Proceed with submission
	status = "PENDING"

	# Prepare the submission data
	submission = {
	"model_name": model_name,
	"license": license,
	"revision": revision,
	"precision": precision,
	"status": status,
	"params": params
	}

	# Serialize the submission to JSON
	submission_json = json.dumps(submission, indent=2)

	# Define the file path in the repository
	org_model = model_name.split('/')
	if len(org_model) != 2:
	return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct'"
	org, model_id = org_model
	precision_str = precision if precision else 'Missing'
	file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}.json"

	# Upload the submission to the dataset repository
	try:
	hf_api_token = os.environ.get('HF_API_TOKEN', None)
	api.upload_file(
	path_or_fileobj=submission_json.encode('utf-8'),
	path_in_repo=file_path_in_repo,
	repo_id=DATASET_REPO_ID,
	repo_type="dataset",
	token=hf_api_token
	)
	except Exception as e:
	return f"Error: Could not submit the model. {str(e)}"

	return f"Model '{model_name}' has been submitted for evaluation."

	def load_requests(status_folder):
	api = HfApi()
	requests_data = []
	folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'

	hf_api_token = os.environ.get('HF_TOKEN', None)

	try:
	# List files in the dataset repository
	files_info = api.list_repo_files(
	repo_id=DATASET_REPO_ID,
	repo_type="dataset",
	token=hf_api_token
	)
	except Exception as e:
	print(f"Error accessing dataset repository: {e}")
	return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible

	# Filter files in the desired folder
	files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]

	for file_path in files_in_folder:
	try:
	# Download the JSON file
	local_file_path = hf_hub_download(
	repo_id=DATASET_REPO_ID,
	filename=file_path,
	repo_type="dataset",
	token=hf_api_token
	)
	# Load JSON data
	with open(local_file_path, 'r') as f:
	request = json.load(f)
	requests_data.append(request)
	except Exception as e:
	print(f"Error loading file {file_path}: {e}")
	continue # Skip files that can't be loaded

	df = pd.DataFrame(requests_data)
	return df


	def submit_gradio_module(type):

	with gr.Tab(f"Submit {type}") as submitter_tab:
	with gr.Row(equal_height=True):
	model_name_input = gr.Textbox(
	label="Model",
	placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)",
	scale=4,
	)
	fetch_data_button = gr.Button(value="Auto Fetch Model Info", variant="secondary")

	with gr.Row():
	precision_input = gr.Dropdown(
	choices=["F16", "F32", "BF16", "I8", "U8", "I16"],
	label="Precision",
	value="F16"
	)
	params_input = gr.Textbox(
	label="Params (in Millions)",
	placeholder="Enter the approximate number of parameters as Integer (e.g., 7, 13, 30, 70 ...)"
	)

	with gr.Row():
	license_input = gr.Textbox(
	label="License",
	placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)",
	value="Open"
	)
	revision_input = gr.Textbox(
	label="Revision",
	placeholder="main",
	value="main"
	)

	submit_button = gr.Button("Submit Model", variant="primary")
	submission_result = gr.Markdown()
	fetch_outputs = [precision_input, params_input, license_input]

	fetch_data_button.click(
	fetch_model_information,
	inputs=[model_name_input],
	outputs=fetch_outputs
	)
	model_name_input.submit(
	fetch_model_information,
	inputs=[model_name_input],
	outputs=fetch_outputs
	)
	submit_button.click(
	submit_model,
	inputs=[model_name_input, revision_input, precision_input, params_input, license_input],
	outputs=submission_result
	)

	# Load pending, finished, and failed requests
	df_pending = load_requests('pending')
	df_finished = load_requests('finished')
	df_failed = load_requests('failed')

	# Display the tables
	gr.Markdown("## Evaluation Status")
	with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
	if not df_pending.empty:
	gr.Dataframe(df_pending)
	else:
	gr.Markdown("No pending evaluations.")
	with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
	if not df_finished.empty:
	gr.Dataframe(df_finished)
	else:
	gr.Markdown("No finished evaluations.")
	with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
	if not df_failed.empty:
	gr.Dataframe(df_failed)
	else:
	gr.Markdown("No failed evaluations.")

	return submitter_tab