Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
3cfadc8
1
Parent(s):
2188124
new evaluations
Browse files- .gitattributes +0 -0
- .gitignore +6 -1
- data/dump.csv +0 -0
- data/fetch_processed.py +1 -0
- data/fetch_zips.py +43 -0
- data/zipfiles.csv +0 -0
- evaluations/repo_evaluations.py +16 -66
- evaluations/utils.py +0 -1
- full_eval.py +0 -7
- midl.py +0 -12
- midl_summary.py +57 -0
.gitattributes
CHANGED
File without changes
|
.gitignore
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
-
data
|
|
|
|
|
|
|
|
|
2 |
data/*.zip
|
|
|
3 |
*.env
|
4 |
.env
|
5 |
evaluations/__pycache__/*
|
|
|
1 |
+
data/MIDL.csv
|
2 |
+
data/MICCAI.csv
|
3 |
+
data/arXiv.csv
|
4 |
+
data/Nature.csv
|
5 |
+
data/results.csv
|
6 |
data/*.zip
|
7 |
+
data/test/*
|
8 |
*.env
|
9 |
.env
|
10 |
evaluations/__pycache__/*
|
data/dump.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/fetch_processed.py
CHANGED
@@ -9,6 +9,7 @@ custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
|
|
9 |
|
10 |
for venue in custom_order:
|
11 |
df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
|
|
|
12 |
df.to_csv(f'data/{venue}.csv', sep="\t")
|
13 |
|
14 |
# Store all evaluations here
|
|
|
9 |
|
10 |
for venue in custom_order:
|
11 |
df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
|
12 |
+
df = df.replace('\t', ' ', regex=True)
|
13 |
df.to_csv(f'data/{venue}.csv', sep="\t")
|
14 |
|
15 |
# Store all evaluations here
|
data/fetch_zips.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import numpy as np
|
3 |
+
import sys
|
4 |
+
import pandas as pd
|
5 |
+
import re
|
6 |
+
sys.path.append("./")
|
7 |
+
from evaluations.utils import *
|
8 |
+
|
9 |
+
token = os.getenv("githubToken")
|
10 |
+
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
|
11 |
+
|
12 |
+
for venue in custom_order:
|
13 |
+
df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
|
14 |
+
df = df.replace('\t', ' ', regex=True)
|
15 |
+
df.to_csv(f'data/{venue}.csv', sep="\t")
|
16 |
+
|
17 |
+
# Store all evaluations here
|
18 |
+
paper_dump = pd.DataFrame()
|
19 |
+
# Official color codes for conferences
|
20 |
+
zip_idx = 0
|
21 |
+
|
22 |
+
for venue in custom_order:
|
23 |
+
with open(f'data/{venue}.csv') as file:
|
24 |
+
tsv_file = csv.reader(file, delimiter="\t")
|
25 |
+
for row in tsv_file:
|
26 |
+
if (row[0] == ""):
|
27 |
+
continue
|
28 |
+
|
29 |
+
if (row[1] == ""):
|
30 |
+
continue
|
31 |
+
|
32 |
+
repo_url = row[4]
|
33 |
+
username, repo_name = decompose_url(repo_url)
|
34 |
+
repo_save_name = f"repo_{zip_idx}.zip"
|
35 |
+
repository_zip_name = f"data/test/{repo_save_name}"
|
36 |
+
log(0, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}")
|
37 |
+
fetch_repo(0, repo_url, repository_zip_name, token)
|
38 |
+
|
39 |
+
if (os.path.exists(repository_zip_name)):
|
40 |
+
paper_dump = pd.concat([paper_dump, pd.DataFrame({"venue": venue, "title": [row[1]], "year": [row[2]], "pdf": [row[3]], "url": [row[4]], "public": [row[5]], "dependencies": [row[6]], "training": [row[7]], "evaluation": [row[8]], "weights": [row[9]], "readme": [row[10]], "license": [row[11]], "zip_idx": [ repository_zip_name ]})], ignore_index=True)
|
41 |
+
zip_idx += 1
|
42 |
+
|
43 |
+
paper_dump.to_csv(f'data/zipfiles.csv', sep="\t")
|
data/zipfiles.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluations/repo_evaluations.py
CHANGED
@@ -7,10 +7,7 @@ import os
|
|
7 |
import numpy as np
|
8 |
from huggingface_hub import InferenceClient
|
9 |
|
10 |
-
def evaluate(llm, verbose, repo_url, title=None, year=None):
|
11 |
-
repository_zip_name = "data/repo.zip"
|
12 |
-
token = os.getenv("githubToken")
|
13 |
-
|
14 |
try:
|
15 |
if (not(llm)):
|
16 |
log(verbose, "LOG", "No LLM will be used for the evaluation.")
|
@@ -31,20 +28,26 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
|
|
31 |
return results
|
32 |
|
33 |
username, repo_name = decompose_url(repo_url)
|
34 |
-
log(verbose, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}")
|
35 |
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
zip = zipfile.ZipFile(repository_zip_name)
|
44 |
readme = fetch_readme(zip)
|
45 |
results["NA"] = documentation.is_applicable(verbose, llm, readme)
|
46 |
|
47 |
-
results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
|
48 |
results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
|
49 |
|
50 |
if (len(zip.namelist()) <= 2):
|
@@ -65,7 +68,7 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
|
|
65 |
return results
|
66 |
|
67 |
def full_evaluation():
|
68 |
-
paper_dump = pd.read_csv("data/
|
69 |
full_results = []
|
70 |
|
71 |
for idx, row in paper_dump.iterrows():
|
@@ -74,62 +77,9 @@ def full_evaluation():
|
|
74 |
continue
|
75 |
|
76 |
print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
|
77 |
-
result = evaluate(None, False, row["url"], row["title"], row["year"])
|
78 |
for column in result.keys():
|
79 |
row[column] = result[column]
|
80 |
|
81 |
full_results.append(row)
|
82 |
return pd.DataFrame(full_results)
|
83 |
-
|
84 |
-
def midl_evaluations(model):
|
85 |
-
compare_to_gt = True
|
86 |
-
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
|
87 |
-
verbose = 0
|
88 |
-
|
89 |
-
eval_readme = []
|
90 |
-
eval_training = []
|
91 |
-
eval_evaluating = []
|
92 |
-
eval_licensing = []
|
93 |
-
eval_weights = []
|
94 |
-
eval_dependencies = []
|
95 |
-
full_results = []
|
96 |
-
for idx, row in paper_dump.iterrows():
|
97 |
-
if (row["venue"] != "MIDL"):
|
98 |
-
continue
|
99 |
-
|
100 |
-
if (row["venue"] == 2024):
|
101 |
-
continue
|
102 |
-
|
103 |
-
if (pd.isna(row["url"]) | (row["url"] == "")):
|
104 |
-
continue
|
105 |
-
|
106 |
-
print(f"\nEvaluating {idx+1} out of {len(paper_dump.index)} papers...")
|
107 |
-
print(f'Paper title - "{row["title"]}" ({row["year"]})')
|
108 |
-
print(f'Repository link - {row["url"]}')
|
109 |
-
result = evaluate(model, verbose, row["url"])
|
110 |
-
for column in result.keys():
|
111 |
-
row[column] = result[column]
|
112 |
-
full_results.append(row)
|
113 |
-
if (compare_to_gt):
|
114 |
-
print("\nSummary:")
|
115 |
-
print(row["NA"])
|
116 |
-
if ((~pd.isna(row["dependencies"])) & (row["pred_dependencies"] is not None)):
|
117 |
-
eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
|
118 |
-
print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
|
119 |
-
if ((~pd.isna(row["training"])) & (row["pred_dependencies"] is not None)):
|
120 |
-
eval_training.append(row["training"] == row["pred_training"])
|
121 |
-
print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
|
122 |
-
if ((~pd.isna(row["evaluation"])) & (row["pred_dependencies"] is not None)):
|
123 |
-
eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
|
124 |
-
print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
|
125 |
-
if ((~pd.isna(row["weights"])) & (row["pred_dependencies"] is not None)):
|
126 |
-
eval_weights.append(row["weights"] == row["pred_weights"])
|
127 |
-
print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
|
128 |
-
if ((~pd.isna(row["readme"])) & (row["pred_dependencies"] is not None)):
|
129 |
-
eval_readme.append(row["readme"] == row["pred_readme"])
|
130 |
-
print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
|
131 |
-
if ((~pd.isna(row["license"])) & (row["pred_dependencies"] is not None)):
|
132 |
-
eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
|
133 |
-
print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")
|
134 |
-
|
135 |
-
return pd.DataFrame(full_results)
|
|
|
7 |
import numpy as np
|
8 |
from huggingface_hub import InferenceClient
|
9 |
|
10 |
+
def evaluate(llm, verbose, repo_url, title=None, year=None, zip=None):
|
|
|
|
|
|
|
11 |
try:
|
12 |
if (not(llm)):
|
13 |
log(verbose, "LOG", "No LLM will be used for the evaluation.")
|
|
|
28 |
return results
|
29 |
|
30 |
username, repo_name = decompose_url(repo_url)
|
|
|
31 |
|
32 |
+
# If you don't provide a zip file, it will be fetched from github. For this, you need to provide a github token.
|
33 |
+
if (zip is None):
|
34 |
+
token = os.getenv("githubToken")
|
35 |
+
repository_zip_name = "data/repo.zip"
|
36 |
+
log(verbose, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}")
|
37 |
|
38 |
+
fetch_repo(verbose, repo_url, repository_zip_name, token)
|
39 |
|
40 |
+
if (not(os.path.exists(repository_zip_name))):
|
41 |
+
results["pred_live"] = "No"
|
42 |
+
return results
|
43 |
+
|
44 |
+
results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
|
45 |
+
|
46 |
+
zip = zipfile.ZipFile(repository_zip_name)
|
47 |
|
|
|
48 |
readme = fetch_readme(zip)
|
49 |
results["NA"] = documentation.is_applicable(verbose, llm, readme)
|
50 |
|
|
|
51 |
results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
|
52 |
|
53 |
if (len(zip.namelist()) <= 2):
|
|
|
68 |
return results
|
69 |
|
70 |
def full_evaluation():
|
71 |
+
paper_dump = pd.read_csv("data/zipfiles.csv", sep="\t")
|
72 |
full_results = []
|
73 |
|
74 |
for idx, row in paper_dump.iterrows():
|
|
|
77 |
continue
|
78 |
|
79 |
print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
|
80 |
+
result = evaluate(None, False, row["url"], row["title"], row["year"], zip=zipfile.ZipFile(row["zip_idx"]))
|
81 |
for column in result.keys():
|
82 |
row[column] = result[column]
|
83 |
|
84 |
full_results.append(row)
|
85 |
return pd.DataFrame(full_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluations/utils.py
CHANGED
@@ -51,7 +51,6 @@ def fetch_repo(verbose, repo_url, repo_name, token):
|
|
51 |
if (os.path.exists(repo_name)):
|
52 |
os.remove(repo_name)
|
53 |
|
54 |
-
|
55 |
if ("github.com" not in repo_url):
|
56 |
log(verbose, "ERROR", f"URL not for github repo, please evaluate manually ({repo_url}).")
|
57 |
return
|
|
|
51 |
if (os.path.exists(repo_name)):
|
52 |
os.remove(repo_name)
|
53 |
|
|
|
54 |
if ("github.com" not in repo_url):
|
55 |
log(verbose, "ERROR", f"URL not for github repo, please evaluate manually ({repo_url}).")
|
56 |
return
|
full_eval.py
CHANGED
@@ -1,11 +1,4 @@
|
|
1 |
from evaluations.repo_evaluations import full_evaluation
|
2 |
-
# importing os module for environment variables
|
3 |
-
import os
|
4 |
-
# importing necessary functions from dotenv library
|
5 |
-
from dotenv import load_dotenv
|
6 |
-
# loading variables from .env file
|
7 |
-
load_dotenv()
|
8 |
-
token = os.getenv("githubToken")
|
9 |
|
10 |
res = full_evaluation()
|
11 |
res.to_csv("data/results.csv", sep="\t", index=False)
|
|
|
1 |
from evaluations.repo_evaluations import full_evaluation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
res = full_evaluation()
|
4 |
res.to_csv("data/results.csv", sep="\t", index=False)
|
midl.py
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
from evaluations.repo_evaluations import midl_evaluations
|
2 |
-
from evaluations.models import LocalLLM
|
3 |
-
import os
|
4 |
-
from dotenv import load_dotenv
|
5 |
-
load_dotenv()
|
6 |
-
token = os.getenv("githubToken")
|
7 |
-
|
8 |
-
# Load model directly
|
9 |
-
model = LocalLLM("meta-llama/Llama-3.2-3B-Instruct")
|
10 |
-
|
11 |
-
res = midl_evaluations(model)
|
12 |
-
res.to_csv("results_midl.csv", sep="\t", index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
midl_summary.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
compare_to_gt = True
|
7 |
+
ground_truth = pd.read_csv("data/zipfiles.csv", sep="\t")
|
8 |
+
results = pd.read_csv("data/results.csv", sep="\t")
|
9 |
+
verbose = 0
|
10 |
+
|
11 |
+
eval_readme = []
|
12 |
+
eval_training = []
|
13 |
+
eval_evaluating = []
|
14 |
+
eval_licensing = []
|
15 |
+
eval_weights = []
|
16 |
+
eval_dependencies = []
|
17 |
+
full_results = []
|
18 |
+
for (index1, row1), (index2, row2) in zip(ground_truth.iterrows(), results.iterrows()):
|
19 |
+
if (pd.isna(row1["training"])):
|
20 |
+
continue
|
21 |
+
|
22 |
+
print(f"\nEvaluating {index1+1} out of {len(ground_truth.index)} papers...")
|
23 |
+
print(f'Paper title - "{row1["title"]}" ({row1["year"]})')
|
24 |
+
print(f'Repository link - {row1["url"]}')
|
25 |
+
if ((not(pd.isna(row1["dependencies"]))) & (row2["pred_dependencies"] is not None)):
|
26 |
+
eval_dependencies.append(row2["pred_dependencies"] == row1["dependencies"])
|
27 |
+
if (row2["pred_dependencies"] != row1["dependencies"]):
|
28 |
+
print(f"Dependencies acc. - {row2['pred_dependencies']} (GT:{row1['dependencies']})")
|
29 |
+
if ((not(pd.isna(row1["training"]))) & (row2["pred_dependencies"] is not None)):
|
30 |
+
eval_training.append(row1["training"] == row2["pred_training"])
|
31 |
+
if (row1["training"] != row2["pred_training"]):
|
32 |
+
print(f"Training acc. -{row2['pred_training']} (GT:{row1['training']})")
|
33 |
+
if ((not(pd.isna(row1["evaluation"]))) & (row2["pred_dependencies"] is not None)):
|
34 |
+
eval_evaluating.append(row1["evaluation"] == row2["pred_evaluation"])
|
35 |
+
if (row1["evaluation"] != row2["pred_evaluation"]):
|
36 |
+
print(f"Evaluating acc. - {row2['pred_evaluation']} (GT:{row1['evaluation']})")
|
37 |
+
if ((not(pd.isna(row1["weights"]))) & (row2["pred_dependencies"] is not None)):
|
38 |
+
eval_weights.append(row1["weights"] == row2["pred_weights"])
|
39 |
+
if (row1["weights"] != row2["pred_weights"]):
|
40 |
+
print(f"Weights acc. - {row2['pred_weights']} (GT:{row1['weights']})")
|
41 |
+
if ((not(pd.isna(row1["readme"]))) & (row2["pred_dependencies"] is not None)):
|
42 |
+
eval_readme.append(row1["readme"] == row2["pred_readme"])
|
43 |
+
if (row1["readme"] != row2["pred_readme"]):
|
44 |
+
print(f"README acc. - {row2['pred_readme']} (GT:{row1['readme']})")
|
45 |
+
if ((not(pd.isna(row1["license"]))) & (row2["pred_dependencies"] is not None)):
|
46 |
+
eval_licensing.append(("No" if row1["license"] == "No" else "Yes") == row2["pred_license"])
|
47 |
+
if (("No" if row1["license"] == "No" else "Yes") != row2["pred_license"]):
|
48 |
+
print(f"LICENSE acc. - {row2['pred_license']} (GT:{row1['license']})")
|
49 |
+
|
50 |
+
|
51 |
+
print("\nSummary:")
|
52 |
+
print(f"Dependencies acc. - {int(100 * np.mean(eval_dependencies))}%")
|
53 |
+
print(f"Training acc. - {int(100 * np.mean(eval_training))}%")
|
54 |
+
print(f"Evaluating acc. - {int(100 * np.mean(eval_evaluating))}%")
|
55 |
+
print(f"Weights acc. - {int(100 * np.mean(eval_weights))}%")
|
56 |
+
print(f"README acc. - {int(100 * np.mean(eval_readme))}%")
|
57 |
+
print(f"LICENSE acc. - {int(100 * np.mean(eval_licensing))}%")
|