import pandas as pd from evaluations.repo_evaluations import process_row from evaluations.utils import * from concurrent.futures import ProcessPoolExecutor from tqdm import tqdm from core.paper import Paper if __name__ == "__main__": paper_dump = pd.read_csv("data/urls.csv", sep="\t") max_workers = 12 papers = [Paper.from_row(row) for _, row in paper_dump.iterrows()] # papers = [paper for paper in papers if paper.main_repo_url == "https://github.com/AsukaDaisuki/MAT"] with ProcessPoolExecutor(max_workers=max_workers) as executor: papers = list(tqdm(executor.map(process_row, papers), total=len(papers), desc="Running repo evaluations")) results = [p.to_dict() for p in papers] results_df = pd.DataFrame(results) results_df.to_csv("data/results.csv", sep="\t", index=False)