Spaces:
Sleeping
Sleeping
File size: 6,518 Bytes
77f290b b048432 77f290b b048432 77f290b b048432 77f290b b048432 77f290b b048432 77f290b 18fe2e4 77f290b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import pandas as pd
import os
from evaluations import documentation, requirements, training, validating, license, weights
from evaluations.utils import *
import zipfile
import os
import numpy as np
from huggingface_hub import InferenceClient
API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
client = InferenceClient(
"meta-llama/Llama-3.1-8B-Instruct",
token="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",
)
def init_llm(verbose):
log(verbose, "LOG", "Initializing LLM...")
def evaluate(llm, verbose, repo_url, title=None, year=None):
repository_zip_name = "data/repo.zip"
token = os.getenv("githubToken")
# token = userdata.get('githubToken')
if (llm):
init_llm(verbose)
else:
log(verbose, "LOG", "No LLM will be used for the evaluation.")
results = { "pred_live": "Yes", "pred_dependencies": None, "pred_training": None, "pred_evaluation": None, "pred_weights": None, "pred_readme": None, "pred_license": None, "pred_stars": None, "pred_citations": None, "pred_valid": False}
try:
if (get_api_link(repo_url) != ""):
results["pred_valid"] = True
else:
results["pred_live"] = "No"
results["pred_training"] = "No"
results["pred_evaluation"] = "No"
results["pred_weights"] = "No"
results["pred_packages"] = "No"
return results
username, repo_name = decompose_url(repo_url)
log(verbose, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}")
fetch_repo(verbose, repo_url, repository_zip_name, token)
if ((title != None) & (year != None) & (title != "") & (year != "")):
res = fetch_openalex(verbose, title, year)
if (res != None):
res = res["results"]
if (len(res) > 0):
res = res[0]
results["pred_citations"] = res["cited_by_count"]
if (not(os.path.exists(repository_zip_name))):
results["pred_live"] = "No"
return results
zip = zipfile.ZipFile(repository_zip_name)
readme = fetch_readme(zip)
results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
if (len(zip.namelist()) <= 2):
log(verbose, "LOG", "Empty repository")
results["pred_live"] = "No"
results["pred_training"] = "No"
results["pred_evaluation"] = "No"
results["pred_weights"] = "No"
results["pred_packages"] = "No"
else:
results["pred_dependencies"] = requirements.evaluate(verbose, llm, zip, readme)
results["pred_training"] = training.evaluate(verbose, llm, zip, readme)
results["pred_evaluation"] = validating.evaluate(verbose, llm, zip, readme)
results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
return results
except Exception as e:
log(verbose, "ERROR", "Evaluating repository failed: " + str(e))
results["pred_live"] = "No"
return results
def full_evaluations():
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
repro = evaluate(None, False)
full_results = []
nth = 1
for idx, row in paper_dump.iterrows():
if (idx % nth != 0):
continue
if (row["url"] == ""):
continue
print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
result = evaluate(None, False, row["url"], row["title"], row["year"])
for column in result.keys():
row[column] = result[column]
full_results.append(row)
def midl_evaluations():
compare_to_gt = True
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
verbose = 1
eval_readme = []
eval_training = []
eval_evaluating = []
eval_licensing = []
eval_weights = []
eval_dependencies = []
full_results = []
for idx, row in paper_dump.iterrows():
if (row["venue"] != "MIDL"):
continue
if (row["venue"] == 2024):
continue
if (pd.isna(row["url"]) | (row["url"] == "")):
continue
print(f"\nEvaluating {idx+1} out of {len(paper_dump.index)} papers...")
print(f'Paper title - "{row["title"]}" ({row["year"]})')
print(f'Repository link - {row["url"]}')
result = evaluate(None, verbose, row["url"])
for column in result.keys():
row[column] = result[column]
full_results.append(row)
if (compare_to_gt):
print("\nSummary:")
if ((row["pred_dependencies"] is not None) & (row["dependencies"] != "")):
eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
if ((row["pred_training"] is not None) & (row["training"] != "")):
eval_training.append(row["training"] == row["pred_training"])
print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
if ((row["pred_evaluation"] is not None) & (row["evaluation"] != "")):
eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
if ((row["pred_weights"] is not None) & (row["weights"] != "")):
eval_weights.append(row["weights"] == row["pred_weights"])
print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
if ((row["pred_readme"] is not None) & (row["readme"] != "")):
eval_readme.append(row["readme"] == row["pred_readme"])
print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
if ((row["pred_license"] is not None) & (row["license"] != "")):
eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%") |