import pandas as pd import os from evaluations import documentation, requirements, training, validating, license, weights, pitfalls from evaluations.utils import * from core.conversion import fetch_repo, decompose_url import zipfile import csv import os import numpy as np from huggingface_hub import InferenceClient from concurrent.futures import ThreadPoolExecutor from core.conversion import noop_logger token = os.getenv("githubToken") def evaluate(llm, paper, log_fn=noop_logger): repo_url = paper.main_repo_url title = paper.title year = paper.year zip=zipfile.ZipFile(paper.zip_path) try: if (not(llm)): log_fn("LOG", "No LLM will be used for the evaluation.") paper.code_repro_auto = { "live": "Yes", "dependencies": None, "training": None, "evaluation": None, "weights": None, "readme": None, "license": None, "stars": None, "citations": None, "valid": False} # if ((title != None) & (year != None) & (title != "") & (year != "")): # res = fetch_openalex(title, year, log_fn=log_fn) # if ((res != None)): # res = res["results"] # if (len(res) > 0): # res = res[0] # paper.code_repro_auto["citations"] = res["cited_by_count"] # if (get_api_link(repo_url) != ""): # paper.code_repro_auto["valid"] = True # else: # return paper.code_repro_auto # paper.code_repro_auto["stars"] = fetch_repo_stars(repo_url, token, log_fn) readmes = fetch_readmes(zip) paper.code_repro_auto["NA"] = documentation.is_applicable(llm, readmes, log_fn) paper.code_repro_auto["license"] = license.evaluate(llm, zip, readmes, log_fn) if (len(zip.namelist()) <= 2): log_fn("LOG", "The repository is empty.") paper.code_repro_auto["dependencies"] = requirements.evaluate(llm, zip, readmes, log_fn) paper.code_repro_auto["training"] = training.evaluate(llm, zip, readmes, log_fn) paper.code_repro_auto["evaluation"] = validating.evaluate(llm, zip, readmes, log_fn) paper.code_repro_auto["weights"] = weights.evaluate(llm, zip, readmes, log_fn) paper.code_repro_auto["readme"] = documentation.evaluate(llm, zip, readmes, log_fn) paper.code_repro_auto["codetocomment"] = documentation.get_code_to_comment_ratio(zip) pitfalls.evaluate(llm, zip, readmes, log_fn) return paper except Exception as e: log_fn("ERROR", "Evaluating repository failed: " + str(e)) paper.code_repro_auto["live"] = "No" return paper def process_row(paper): if ((paper.zip_path is None) or (not(os.path.exists(paper.zip_path)))): paper.log("ERROR", "Zip file doesn't exist") return paper paper = evaluate(None, paper, paper.log) return paper