Spaces:

attilasimko
/

reproduce

Running on CPU Upgrade

App Files Files Community

Attila Simkó commited on about 23 hours ago

Commit

c76b324

1 Parent(s): c307e6e

not force

Browse files

Files changed (3) hide show

core/conversion.py +3 -6
data_generation/fetch_processed.py +1 -1
plotting/midl_summary.py +12 -12

core/conversion.py CHANGED Viewed

@@ -74,11 +74,8 @@ def decompose_url(url):
         return None, None
 def fetch_repo(repo_url, repo_name, token, force_download=False):
-    if (os.path.exists(repo_name)):
-        if (force_download):
-            os.remove(repo_name)
-        else:
-            return
     if ("github.com" not in repo_url):
         return ValueError(f"URL not for github repo, please evaluate manually ({repo_url}).")
@@ -101,7 +98,7 @@ def fetch_repo(repo_url, repo_name, token, force_download=False):
 def download_repo(paper):
     try:
         if (paper.main_repo_url is None):
-            return
         fetch_repo(0, paper.main_repo_url, paper.zip_path, token)
     except Exception as e:

         return None, None
 def fetch_repo(repo_url, repo_name, token, force_download=False):
+    if (os.path.exists(repo_name) & (not force_download)):
+        return
     if ("github.com" not in repo_url):
         return ValueError(f"URL not for github repo, please evaluate manually ({repo_url}).")
 def download_repo(paper):
     try:
         if (paper.main_repo_url is None):
+            return paper
         fetch_repo(0, paper.main_repo_url, paper.zip_path, token)
     except Exception as e:

data_generation/fetch_processed.py CHANGED Viewed

@@ -86,7 +86,7 @@ def download_xml(paper):
     return paper
-max_workers = 6
 if __name__ == "__main__":
     for venue in VENUE_ORDER:
         df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)

     return paper
+max_workers = 4
 if __name__ == "__main__":
     for venue in VENUE_ORDER:
         df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)

plotting/midl_summary.py CHANGED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 import numpy as np
 from core.paper import Paper
-def compare(ground_truth, automated_truth, key, verbose=False):
     if key not in ground_truth.keys() or key not in automated_truth.keys():
         return np.nan
     if (pd.isna(ground_truth[key]) or pd.isna(automated_truth[key])):
@@ -17,7 +17,7 @@ def compare(ground_truth, automated_truth, key, verbose=False):
         ground_truth[key] = "No" if ground_truth[key] == "No" else "Yes"
     res = ground_truth[key] == automated_truth[key]
     if verbose and res == False:
-        print(f"{key} acc. - {automated_truth[key]} (GT:{ground_truth[key]})")
     return res
 max_workers = 6
@@ -39,16 +39,16 @@ for idx, paper in enumerate(papers):
     if paper.venue != "MIDL" or paper.main_repo_url is None or (int(paper.year) >= 2024 if training else int(paper.year) < 2024):
         continue
-    if (verbose):
-        print(f"\nEvaluating {idx} out of {len(papers)} papers...")
-        print(f'Paper title - "{paper.title}" ({paper.year})')
-        print(f'Repository link - {paper.main_repo_url}')
-    eval_dependencies.append(compare(paper.code_repro_manual, paper.code_repro_auto, "dependencies", verbose))
-    eval_training.append(compare(paper.code_repro_manual, paper.code_repro_auto, "training", verbose))
-    eval_evaluating.append(compare(paper.code_repro_manual, paper.code_repro_auto, "evaluation", verbose))
-    eval_weights.append(compare(paper.code_repro_manual, paper.code_repro_auto, "weights", verbose))
-    eval_readme.append(compare(paper.code_repro_manual, paper.code_repro_auto, "readme", verbose))
-    eval_licensing.append(compare(paper.code_repro_manual, paper.code_repro_auto, "license", verbose))
 print("\nSummary:")
 print(f"Dependencies acc. - {int(100 * np.nanmean(eval_dependencies))}%")

 import numpy as np
 from core.paper import Paper
+def compare(ground_truth, automated_truth, key, verbose, url):
     if key not in ground_truth.keys() or key not in automated_truth.keys():
         return np.nan
     if (pd.isna(ground_truth[key]) or pd.isna(automated_truth[key])):
         ground_truth[key] = "No" if ground_truth[key] == "No" else "Yes"
     res = ground_truth[key] == automated_truth[key]
     if verbose and res == False:
+        print(f"{key} acc. - {automated_truth[key]} (GT:{ground_truth[key]}) ({url})")
     return res
 max_workers = 6
     if paper.venue != "MIDL" or paper.main_repo_url is None or (int(paper.year) >= 2024 if training else int(paper.year) < 2024):
         continue
+    # if (verbose):
+    #     print(f"\nEvaluating {idx} out of {len(papers)} papers...")
+    #     print(f'Paper title - "{paper.title}" ({paper.year})')
+    #     print(f'Repository link - {paper.main_repo_url}')
+    eval_dependencies.append(compare(paper.code_repro_manual, paper.code_repro_auto, "dependencies", verbose, paper.main_repo_url))
+    eval_training.append(compare(paper.code_repro_manual, paper.code_repro_auto, "training", verbose, paper.main_repo_url))
+    eval_evaluating.append(compare(paper.code_repro_manual, paper.code_repro_auto, "evaluation", verbose, paper.main_repo_url))
+    eval_weights.append(compare(paper.code_repro_manual, paper.code_repro_auto, "weights", verbose, paper.main_repo_url))
+    eval_readme.append(compare(paper.code_repro_manual, paper.code_repro_auto, "readme", verbose, paper.main_repo_url))
+    eval_licensing.append(compare(paper.code_repro_manual, paper.code_repro_auto, "license", verbose, paper.main_repo_url))
 print("\nSummary:")
 print(f"Dependencies acc. - {int(100 * np.nanmean(eval_dependencies))}%")