reproduce / evaluations /repo_evaluations.py
Attila Simkó
big upgrade
2db37b1
import pandas as pd
import os
from evaluations import documentation, requirements, training, validating, license, weights, pitfalls
from evaluations.utils import *
from core.conversion import fetch_repo, decompose_url
import zipfile
import csv
import os
import numpy as np
from huggingface_hub import InferenceClient
from concurrent.futures import ThreadPoolExecutor
from core.conversion import noop_logger
token = os.getenv("githubToken")
def evaluate(llm, paper, log_fn=noop_logger):
repo_url = paper.main_repo_url
title = paper.title
year = paper.year
zip=zipfile.ZipFile(paper.zip_path)
try:
if (not(llm)):
log_fn("LOG", "No LLM will be used for the evaluation.")
paper.code_repro_auto = { "live": "Yes", "dependencies": None, "training": None, "evaluation": None, "weights": None, "readme": None, "license": None, "stars": None, "citations": None, "valid": False}
# if ((title != None) & (year != None) & (title != "") & (year != "")):
# res = fetch_openalex(title, year, log_fn=log_fn)
# if ((res != None)):
# res = res["results"]
# if (len(res) > 0):
# res = res[0]
# paper.code_repro_auto["citations"] = res["cited_by_count"]
# if (get_api_link(repo_url) != ""):
# paper.code_repro_auto["valid"] = True
# else:
# return paper.code_repro_auto
# paper.code_repro_auto["stars"] = fetch_repo_stars(repo_url, token, log_fn)
readmes = fetch_readmes(zip)
paper.code_repro_auto["NA"] = documentation.is_applicable(llm, readmes, log_fn)
paper.code_repro_auto["license"] = license.evaluate(llm, zip, readmes, log_fn)
if (len(zip.namelist()) <= 2):
log_fn("LOG", "The repository is empty.")
paper.code_repro_auto["dependencies"] = requirements.evaluate(llm, zip, readmes, log_fn)
paper.code_repro_auto["training"] = training.evaluate(llm, zip, readmes, log_fn)
paper.code_repro_auto["evaluation"] = validating.evaluate(llm, zip, readmes, log_fn)
paper.code_repro_auto["weights"] = weights.evaluate(llm, zip, readmes, log_fn)
paper.code_repro_auto["readme"] = documentation.evaluate(llm, zip, readmes, log_fn)
paper.code_repro_auto["codetocomment"] = documentation.get_code_to_comment_ratio(zip)
pitfalls.evaluate(llm, zip, readmes, log_fn)
return paper
except Exception as e:
log_fn("ERROR", "Evaluating repository failed: " + str(e))
paper.code_repro_auto["live"] = "No"
return paper
def process_row(paper):
if ((paper.zip_path is None) or (not(os.path.exists(paper.zip_path)))):
paper.log("ERROR", "Zip file doesn't exist")
return paper
paper = evaluate(None, paper, paper.log)
return paper