attilasimko commited on
Commit
08a78c1
·
1 Parent(s): 0dc48b3

let's see...

Browse files
evaluations/models.py CHANGED
@@ -8,7 +8,8 @@ system_messages = { "STRICT": """You are a chatbot evaluating github repositorie
8
  "HELP": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files.
9
  Please help me answer the following question.
10
  Keep your answers short, and informative.
11
- Your answer should be a single paragraph.""" }
 
12
 
13
  class LocalLLM():
14
  def __init__(self, model_name):
 
8
  "HELP": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files.
9
  Please help me answer the following question.
10
  Keep your answers short, and informative.
11
+ Your answer should be a single paragraph.
12
+ If you can't find any issues with the code, return an empty string.""" }
13
 
14
  class LocalLLM():
15
  def __init__(self, model_name):
evaluations/pitfalls.py CHANGED
@@ -1,6 +1,46 @@
1
- from .utils import log, model_predict
2
  import re
3
 
4
  def evaluate(verbose, llm, zip, readme):
5
- log(verbose, "TITLE", "\nLooking for common pitfalls...")
6
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .utils import log, fetch_code
2
  import re
3
 
4
  def evaluate(verbose, llm, zip, readme):
5
+ log(verbose, "TITLE", "\nLooking for common pitfalls (in development)...")
6
+ code = fetch_code(zip)
7
+
8
+ if (llm):
9
+ # Pitfall #1
10
+ llm.predict("HELP", f"{code}\n Please explain if you find any design-flaws with regards to the data collection in the code.")
11
+
12
+ # Pitfall #2
13
+ llm.predict("HELP", f"{code}\n Please explain if you find signs of dataset shift in the code (e.g. sampling bias, imbalanced populations, imbalanced labels, non-stationary environments).")
14
+
15
+ # Pitfall #3
16
+ llm.predict("HELP", f"{code}\n Please explain if you find any confounders in the code.")
17
+
18
+ # Pitfall #4
19
+ llm.predict("HELP", f"{code}\n Please explain if you find any measurement errors in the code (labelling mistakes, noisy measurements, inappropriate proxies)")
20
+
21
+ # Pitfall #5
22
+ llm.predict("HELP", f"{code}\n Please explain if you find signs of historical biases in the data used.")
23
+
24
+ # Pitfall #6
25
+ llm.predict("HELP", f"{code}\n Please explain if you find signs of information leaking between the training and testing data.")
26
+
27
+ # Pitfall #7
28
+ llm.predict("HELP", f"{code}\n Please explain if you find a model-problem mismatch (e.g. over-complicated/simplistic model, computational challenges)")
29
+
30
+ # Pitfall #8
31
+ llm.predict("HELP", f"{code}\n Please explain if you find any signs of overfitting in the code (e.g. high variance, high complexity, low bias).")
32
+
33
+ # Pitfall #9
34
+ llm.predict("HELP", f"{code}\n Please explain if you find any misused metrics in the code (e.g. poor metric selection, poor implementations)")
35
+
36
+ # Pitfall #10
37
+ llm.predict("HELP", f"{code}\n Please explain if you find any signs of black box models in the code (e.g. lack of interpretability, lack of transparency)")
38
+
39
+ # Pitfall #11
40
+ llm.predict("HELP", f"{code}\n Please explain if you find any signs of baseline comparison issues in the code (e.g. if the testing data does not fit the training data)")
41
+
42
+ # Pitfall #12
43
+ llm.predict("HELP", f"{code}\n Please explain if you find any signs of insufficient reporting in the code (e.g. missing hyperparameters, missing evaluation metrics)")
44
+
45
+ # Pitfall #13
46
+ llm.predict("HELP", f"{code}\n Please explain if you find signs of faulty interpretations of the reported results.")
evaluations/repo_evaluations.py CHANGED
@@ -1,6 +1,6 @@
1
  import pandas as pd
2
  import os
3
- from evaluations import documentation, requirements, training, validating, license, weights
4
  from evaluations.utils import *
5
  import zipfile
6
  import os
@@ -52,18 +52,14 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
52
  results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
53
  if (len(zip.namelist()) <= 2):
54
  log(verbose, "LOG", "The repository is empty.")
55
- results["pred_live"] = "No"
56
- results["pred_training"] = "No"
57
- results["pred_evaluation"] = "No"
58
- results["pred_weights"] = "No"
59
- results["pred_packages"] = "No"
60
- else:
61
- results["pred_dependencies"] = requirements.evaluate(verbose, llm, zip, readme)
62
- results["pred_training"] = training.evaluate(verbose, llm, zip, readme)
63
- results["pred_evaluation"] = validating.evaluate(verbose, llm, zip, readme)
64
- results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
65
- results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
66
- results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
67
 
68
  return results
69
  except Exception as e:
 
1
  import pandas as pd
2
  import os
3
+ from evaluations import documentation, requirements, training, validating, license, weights, pitfalls
4
  from evaluations.utils import *
5
  import zipfile
6
  import os
 
52
  results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
53
  if (len(zip.namelist()) <= 2):
54
  log(verbose, "LOG", "The repository is empty.")
55
+
56
+ results["pred_dependencies"] = requirements.evaluate(verbose, llm, zip, readme)
57
+ results["pred_training"] = training.evaluate(verbose, llm, zip, readme)
58
+ results["pred_evaluation"] = validating.evaluate(verbose, llm, zip, readme)
59
+ results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
60
+ results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
61
+ results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
62
+ pitfalls.evaluate(verbose, llm, zip, readme)
 
 
 
 
63
 
64
  return results
65
  except Exception as e:
evaluations/utils.py CHANGED
@@ -2,10 +2,22 @@ import time
2
  import requests
3
  import time
4
  import os
 
5
  import json
6
  import streamlit as st
7
 
8
-
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def get_api_link(url):
11
  username, repo_name = decompose_url(url)
 
2
  import requests
3
  import time
4
  import os
5
+ import zipfile
6
  import json
7
  import streamlit as st
8
 
9
+ def fetch_code(path):
10
+ zip_content_dict = {}
11
+ with zipfile.ZipFile(path, 'r') as zip_ref:
12
+ for file_name in zip_ref.namelist():
13
+ if ((file_name.lower().endswith(".py") | (file_name.lower().endswith(".ipynb")) | (file_name.lower().endswith(".md")) | (file_name.lower().endswith(".txt")))):
14
+ with zip_ref.open(file_name) as file:
15
+ file_content = file.read().decode('utf-8')
16
+ zip_content_dict[file_name] = file_content
17
+ with zip_ref.open(file_name) as file:
18
+ file_content = file.read().decode('utf-8')
19
+ zip_content_dict[file_name] = file_content
20
+ return zip_content_dict
21
 
22
  def get_api_link(url):
23
  username, repo_name = decompose_url(url)