Attila Simkó commited on
Commit
c76b324
·
1 Parent(s): c307e6e
core/conversion.py CHANGED
@@ -74,11 +74,8 @@ def decompose_url(url):
74
  return None, None
75
 
76
  def fetch_repo(repo_url, repo_name, token, force_download=False):
77
- if (os.path.exists(repo_name)):
78
- if (force_download):
79
- os.remove(repo_name)
80
- else:
81
- return
82
 
83
  if ("github.com" not in repo_url):
84
  return ValueError(f"URL not for github repo, please evaluate manually ({repo_url}).")
@@ -101,7 +98,7 @@ def fetch_repo(repo_url, repo_name, token, force_download=False):
101
  def download_repo(paper):
102
  try:
103
  if (paper.main_repo_url is None):
104
- return
105
 
106
  fetch_repo(0, paper.main_repo_url, paper.zip_path, token)
107
  except Exception as e:
 
74
  return None, None
75
 
76
  def fetch_repo(repo_url, repo_name, token, force_download=False):
77
+ if (os.path.exists(repo_name) & (not force_download)):
78
+ return
 
 
 
79
 
80
  if ("github.com" not in repo_url):
81
  return ValueError(f"URL not for github repo, please evaluate manually ({repo_url}).")
 
98
  def download_repo(paper):
99
  try:
100
  if (paper.main_repo_url is None):
101
+ return paper
102
 
103
  fetch_repo(0, paper.main_repo_url, paper.zip_path, token)
104
  except Exception as e:
data_generation/fetch_processed.py CHANGED
@@ -86,7 +86,7 @@ def download_xml(paper):
86
  return paper
87
 
88
 
89
- max_workers = 6
90
  if __name__ == "__main__":
91
  for venue in VENUE_ORDER:
92
  df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
 
86
  return paper
87
 
88
 
89
+ max_workers = 4
90
  if __name__ == "__main__":
91
  for venue in VENUE_ORDER:
92
  df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
plotting/midl_summary.py CHANGED
@@ -7,7 +7,7 @@ import pandas as pd
7
  import numpy as np
8
  from core.paper import Paper
9
 
10
- def compare(ground_truth, automated_truth, key, verbose=False):
11
  if key not in ground_truth.keys() or key not in automated_truth.keys():
12
  return np.nan
13
  if (pd.isna(ground_truth[key]) or pd.isna(automated_truth[key])):
@@ -17,7 +17,7 @@ def compare(ground_truth, automated_truth, key, verbose=False):
17
  ground_truth[key] = "No" if ground_truth[key] == "No" else "Yes"
18
  res = ground_truth[key] == automated_truth[key]
19
  if verbose and res == False:
20
- print(f"{key} acc. - {automated_truth[key]} (GT:{ground_truth[key]})")
21
  return res
22
 
23
  max_workers = 6
@@ -39,16 +39,16 @@ for idx, paper in enumerate(papers):
39
  if paper.venue != "MIDL" or paper.main_repo_url is None or (int(paper.year) >= 2024 if training else int(paper.year) < 2024):
40
  continue
41
 
42
- if (verbose):
43
- print(f"\nEvaluating {idx} out of {len(papers)} papers...")
44
- print(f'Paper title - "{paper.title}" ({paper.year})')
45
- print(f'Repository link - {paper.main_repo_url}')
46
- eval_dependencies.append(compare(paper.code_repro_manual, paper.code_repro_auto, "dependencies", verbose))
47
- eval_training.append(compare(paper.code_repro_manual, paper.code_repro_auto, "training", verbose))
48
- eval_evaluating.append(compare(paper.code_repro_manual, paper.code_repro_auto, "evaluation", verbose))
49
- eval_weights.append(compare(paper.code_repro_manual, paper.code_repro_auto, "weights", verbose))
50
- eval_readme.append(compare(paper.code_repro_manual, paper.code_repro_auto, "readme", verbose))
51
- eval_licensing.append(compare(paper.code_repro_manual, paper.code_repro_auto, "license", verbose))
52
 
53
  print("\nSummary:")
54
  print(f"Dependencies acc. - {int(100 * np.nanmean(eval_dependencies))}%")
 
7
  import numpy as np
8
  from core.paper import Paper
9
 
10
+ def compare(ground_truth, automated_truth, key, verbose, url):
11
  if key not in ground_truth.keys() or key not in automated_truth.keys():
12
  return np.nan
13
  if (pd.isna(ground_truth[key]) or pd.isna(automated_truth[key])):
 
17
  ground_truth[key] = "No" if ground_truth[key] == "No" else "Yes"
18
  res = ground_truth[key] == automated_truth[key]
19
  if verbose and res == False:
20
+ print(f"{key} acc. - {automated_truth[key]} (GT:{ground_truth[key]}) ({url})")
21
  return res
22
 
23
  max_workers = 6
 
39
  if paper.venue != "MIDL" or paper.main_repo_url is None or (int(paper.year) >= 2024 if training else int(paper.year) < 2024):
40
  continue
41
 
42
+ # if (verbose):
43
+ # print(f"\nEvaluating {idx} out of {len(papers)} papers...")
44
+ # print(f'Paper title - "{paper.title}" ({paper.year})')
45
+ # print(f'Repository link - {paper.main_repo_url}')
46
+ eval_dependencies.append(compare(paper.code_repro_manual, paper.code_repro_auto, "dependencies", verbose, paper.main_repo_url))
47
+ eval_training.append(compare(paper.code_repro_manual, paper.code_repro_auto, "training", verbose, paper.main_repo_url))
48
+ eval_evaluating.append(compare(paper.code_repro_manual, paper.code_repro_auto, "evaluation", verbose, paper.main_repo_url))
49
+ eval_weights.append(compare(paper.code_repro_manual, paper.code_repro_auto, "weights", verbose, paper.main_repo_url))
50
+ eval_readme.append(compare(paper.code_repro_manual, paper.code_repro_auto, "readme", verbose, paper.main_repo_url))
51
+ eval_licensing.append(compare(paper.code_repro_manual, paper.code_repro_auto, "license", verbose, paper.main_repo_url))
52
 
53
  print("\nSummary:")
54
  print(f"Dependencies acc. - {int(100 * np.nanmean(eval_dependencies))}%")