File size: 5,263 Bytes
82dc28f
77f290b
 
 
8ac76ef
2188124
 
 
 
8ac76ef
 
 
 
 
 
 
 
 
77f290b
69cbe77
77f290b
 
 
 
 
 
 
 
 
 
69cbe77
3abd747
 
 
 
 
 
77f290b
 
 
 
 
 
82dc28f
77f290b
 
 
 
 
3abd747
ccf0698
77f290b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccf0698
77f290b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599cf9b
77f290b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from .utils import log,fetch_code
import re
import numpy as np

def is_applicable(verbose, llm, readme):
    res_training = "NA"
    res_evaluation = "NA"
    res_weights = "NA"
    
    if (llm):
        log(verbose, "TITLE", "\nChecking what parts of the evaluations are applicable...")
        res_training = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for training a model?")
        res_evaluation = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for evaluating a model?")
        res_weights = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for loading pre-trained weights?")
    
    applicable = f"{res_training}/{res_evaluation}/{res_weights}"
    return applicable

def evaluate(verbose, llm, zip, readme):
  log(verbose, "TITLE", "\nEvaluating code documentation...")
  overall = "No"


  code_to_comment_ratio = get_code_to_comment_ratio(zip)
  log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")


  if (readme):
      non_empty_rows = [row for row in readme.split("\n") if row != ""]
      if (len(non_empty_rows) < 5):
          log(verbose, "ERROR", "Readme file has very few lines")

          if (llm):
              code = fetch_code(zip)
              if (llm):
                  summary = llm.predict("HELP", f"{code}\nBased on the readme file above can you give a quick summary of this repository? Please use references to file names on the repository.")
                  log(verbose, "LOG", f"Based on the code, your readme file could be something like...\n{summary}")
          return overall

      if (count_code_lines(non_empty_rows) > 5):
          log(verbose, "LOG", "Readme file contains python examples.")
          overall = "Yes"
          return overall
      

      if (llm):
          prompt = f'{readme}\n \
          Is this README file is enough to find what \
          package dependencies you need to install and how to train \
          and evaluate the proposed model?'
          llm.predict("HELP", prompt)


      manual_fail = False
      if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
          log(verbose, "ERROR", "Readme file missing training information")
          overall = "No"
      if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
          log(verbose, "ERROR", "Readme file missing testing information")
          overall = "No"

      if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
          log(verbose, "LOG", "Readme file contains links to examples")
          overall = "Yes"

      if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
          (len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
          (len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
          log(verbose, "ERROR", "Readme file missing information about package dependencies")
          overall = "No"

  return overall

def count_comment_lines(lines):
    # Initialize counters
    single_line_comments = 0
    multi_line_comments = 0
    in_multiline_comment = False

    for line in lines:
        stripped_line = line.strip()

        # Check for single-line comments
        if stripped_line.startswith('#'):
            single_line_comments += 1

        # Check for multi-line comment (docstring) start or end
        if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
            if not in_multiline_comment:
                # Starting a new multi-line comment
                in_multiline_comment = True
                multi_line_comments += 1  # Count the start line itself
            else:
                # Ending an existing multi-line comment
                in_multiline_comment = False
                multi_line_comments += 1  # Count the end line itself
        elif in_multiline_comment:
            # Continue counting lines within a multi-line comment
            multi_line_comments += 1

    return single_line_comments, multi_line_comments

def get_code_to_comment_ratio(zip):
    python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))]
    code_line_count = 1
    comment_line_count = 0
    for file in python_files:
        file_lines = zip.open(file).read().decode("utf-8").split('\n')
        sl_comm, ml_comm = count_comment_lines(file_lines)
        comment_line_count += sl_comm + ml_comm
        code_line_count += len(file_lines) - (sl_comm + ml_comm)
    code_to_comment_ratio = 100 * comment_line_count / code_line_count

    return code_to_comment_ratio

def count_code_lines(lines):
    is_code_snippet = False
    code_line_count = 0

    for line in lines:
        stripped_line = line.strip()

        if stripped_line.startswith('```'):
            if not is_code_snippet:
                is_code_snippet = True
                code_line_count += 1
            else:
                is_code_snippet = False
                code_line_count += 1
        elif is_code_snippet:
            code_line_count += 1

    return int(code_line_count / 2)