Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
import pandas as pd | |
import json | |
import os | |
from pathlib import Path | |
from huggingface_hub import HfApi, hf_hub_download | |
api = HfApi() | |
OWNER = "Navid-AI" | |
DATASET_REPO_ID = f"{OWNER}/requests-dataset" | |
results_dir = Path(__file__).parent / "results" | |
# Replace the current HF_TOKEN line with this to add a helpful error message if token is missing | |
HF_TOKEN = os.environ.get('HF_TOKEN') | |
if not HF_TOKEN: | |
print("Warning: HF_TOKEN environment variable not set. API operations requiring authentication will fail.") | |
HF_TOKEN = None | |
# Add a helper to load JSON results with optional formatting. | |
def load_json_results(file_path: Path, prepare_for_display=False, sort_col=None, drop_cols=None): | |
if file_path.exists(): | |
df = pd.read_json(file_path) | |
else: | |
raise FileNotFoundError(f"File '{file_path}' not found.") | |
if prepare_for_display: | |
# Apply common mapping for model link formatting. | |
df[["Model"]] = df[["Model"]].map(lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>') | |
if drop_cols is not None: | |
df.drop(columns=drop_cols, inplace=True) | |
if sort_col is not None: | |
df.sort_values(sort_col, ascending=False, inplace=True) | |
return df | |
def get_model_info(model_id, verbose=False): | |
model_info = api.model_info(model_id) | |
num_downloads = model_info.downloads | |
num_likes = model_info.likes | |
license = model_info.card_data["license"] | |
num_parameters = round(model_info.safetensors.total / 1e6) | |
supported_precisions = list(model_info.safetensors.parameters.keys()) | |
if verbose: | |
print(f"Model '{model_id}' has {num_downloads} downloads, {num_likes} likes, and is licensed under {license}.") | |
print(f"The model has approximately {num_parameters:.2f} billion parameters.") | |
print(f"The model supports the following precisions: {supported_precisions}") | |
return num_downloads, num_likes, license, num_parameters, supported_precisions | |
def fetch_model_information(model_name): | |
try: | |
num_downloads, num_likes, license, num_parameters, supported_precisions = get_model_info(model_name) | |
if len(supported_precisions) == 0: | |
supported_precisions = [None] | |
except Exception as e: | |
gr.Error(f"Error: Could not fetch model information. {str(e)}") | |
return | |
return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes | |
def submit_model(model_name, revision, precision, params, license, task, pending_gradio_df): | |
try: | |
if float(params) > 5000: | |
return "Model size should be less than 5000 million parameters (5 billion) π", pending_gradio_df | |
except ValueError: | |
gr.Error("The parameter count is not present or is not a number. Please make sure its available and its correct"), | |
# Handle 'Missing' precision | |
if precision == 'Missing': | |
precision = None | |
else: | |
precision = precision.strip().lower() | |
# Load pending and finished requests from the dataset repository | |
df_pending = load_requests('pending') | |
df_finished = load_requests('finished') | |
# Check if model is in pending requests | |
if not df_pending.empty: | |
existing_models_pending = df_pending[['model_name', 'revision', 'precision', 'task']] | |
model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) & | |
(existing_models_pending['revision'] == revision) & | |
(existing_models_pending['precision'] == precision.capitalize()) & | |
(existing_models_pending['task'] == task)).any() | |
if model_exists_in_pending: | |
return f"Model {model_name} is already in the evaluation queue as a {task} π", pending_gradio_df | |
# Check if model is in finished requests | |
if not df_finished.empty: | |
existing_models_finished = df_finished[['model_name', 'revision', 'precision', 'task']] | |
model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) & | |
(existing_models_finished['revision'] == revision) & | |
(existing_models_finished['precision'] == precision.capitalize()) & | |
(existing_models_finished['task'] == task)).any() | |
if model_exists_in_finished: | |
return f"Model {model_name} has already been evaluated as a {task} π" | |
# Check if model exists on HuggingFace Hub | |
try: | |
api.model_info(model_name) | |
except Exception as e: | |
print(f"Error fetching model info: {e}") | |
return f"Model {model_name} not found on HuggingFace Hub π€·ββοΈ" | |
# Proceed with submission | |
status = "PENDING" | |
# Prepare the submission data | |
submission = { | |
"model_name": model_name, | |
"license": license, | |
"revision": revision, | |
"precision": precision, | |
"status": status, | |
"params": params, | |
"task": task | |
} | |
# Serialize the submission to JSON | |
submission_json = json.dumps(submission, indent=2) | |
# Define the file path in the repository | |
org_model = model_name.split('/') | |
if len(org_model) != 2: | |
return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct' π€·ββοΈ" | |
org, model_id = org_model | |
precision_str = precision if precision else 'Missing' | |
file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json" | |
# Upload the submission to the dataset repository | |
try: | |
api.upload_file( | |
path_or_fileobj=submission_json.encode('utf-8'), | |
path_in_repo=file_path_in_repo, | |
repo_id=DATASET_REPO_ID, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
except Exception as e: | |
print(f"Error uploading file: {e}") | |
return f"Error: Could not submit model '{model_name}' for evaluation." | |
return f"Model {model_name} has been submitted successfully as a {task} π" | |
def load_requests(status_folder, task_type=None): | |
api = HfApi() | |
requests_data = [] | |
folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed' | |
try: | |
# Use the cached token | |
files_info = api.list_repo_files( | |
repo_id=DATASET_REPO_ID, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
except Exception as e: | |
print(f"Error accessing dataset repository: {e}") | |
return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible | |
# Filter files in the desired folder | |
files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')] | |
for file_path in files_in_folder: | |
try: | |
# Download the JSON file | |
local_file_path = hf_hub_download( | |
repo_id=DATASET_REPO_ID, | |
filename=file_path, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
# Load JSON data | |
with open(local_file_path, 'r') as f: | |
request = json.load(f) | |
requests_data.append(request) | |
except Exception as e: | |
print(f"Error loading file {file_path}: {e}") | |
continue # Skip files that can't be loaded | |
df = pd.DataFrame(requests_data) | |
# Filter by task type | |
if task_type and not df.empty: | |
df = df[df['task'] == task_type] | |
return df | |
def submit_gradio_module(task_type): | |
var = gr.State(value=task_type) | |
with gr.Row(equal_height=True): | |
model_name_input = gr.Textbox( | |
label="Model", | |
placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)", | |
scale=4, | |
) | |
fetch_data_button = gr.Button(value="Auto Fetch Model Info", variant="secondary") | |
with gr.Row(): | |
precision_input = gr.Dropdown( | |
choices=["F16", "F32", "BF16", "I8", "U8", "I16"], | |
label="Precision", | |
value="F16" | |
) | |
license_input = gr.Textbox( | |
label="License", | |
placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)", | |
value="Open" | |
) | |
revision_input = gr.Textbox( | |
label="Revision", | |
placeholder="main", | |
value="main" | |
) | |
with gr.Row(): | |
params_input = gr.Textbox( | |
label="Params (in Millions)", | |
interactive=False, | |
) | |
num_downloads_input = gr.Textbox( | |
label="Number of Downloads", | |
interactive=False, | |
) | |
num_likes_input = gr.Textbox( | |
label="Number of Likes", | |
interactive=False, | |
) | |
submit_button = gr.Button("Submit Model", variant="primary") | |
submission_result = gr.Textbox(label="Submission Result", interactive=False) | |
fetch_outputs = [precision_input, license_input, params_input, num_downloads_input, num_likes_input] | |
fetch_data_button.click( | |
fetch_model_information, | |
inputs=[model_name_input], | |
outputs=fetch_outputs | |
) | |
model_name_input.submit( | |
fetch_model_information, | |
inputs=[model_name_input], | |
outputs=fetch_outputs | |
) | |
# Load pending, finished, and failed requests | |
df_pending = load_requests('pending', task_type) | |
df_finished = load_requests('finished', task_type) | |
df_failed = load_requests('failed', task_type) | |
# Display the tables | |
gr.Markdown("## Evaluation Status") | |
with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False): | |
if not df_pending.empty: | |
pending_gradio_df = gr.Dataframe(df_pending) | |
else: | |
gr.Markdown("No pending evaluations.") | |
pending_gradio_df = None | |
with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False): | |
if not df_finished.empty: | |
gr.Dataframe(df_finished) | |
else: | |
gr.Markdown("No finished evaluations.") | |
with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False): | |
if not df_failed.empty: | |
gr.Dataframe(df_failed) | |
else: | |
gr.Markdown("No failed evaluations.") | |
submit_button.click( | |
submit_model, | |
inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var, pending_gradio_df], | |
outputs=[submission_result, pending_gradio_df], | |
) |