Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 5,263 Bytes
82dc28f 77f290b 8ac76ef 2188124 8ac76ef 77f290b 69cbe77 77f290b 69cbe77 3abd747 77f290b 82dc28f 77f290b 3abd747 ccf0698 77f290b ccf0698 77f290b 599cf9b 77f290b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
from .utils import log,fetch_code
import re
import numpy as np
def is_applicable(verbose, llm, readme):
res_training = "NA"
res_evaluation = "NA"
res_weights = "NA"
if (llm):
log(verbose, "TITLE", "\nChecking what parts of the evaluations are applicable...")
res_training = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for training a model?")
res_evaluation = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for evaluating a model?")
res_weights = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for loading pre-trained weights?")
applicable = f"{res_training}/{res_evaluation}/{res_weights}"
return applicable
def evaluate(verbose, llm, zip, readme):
log(verbose, "TITLE", "\nEvaluating code documentation...")
overall = "No"
code_to_comment_ratio = get_code_to_comment_ratio(zip)
log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")
if (readme):
non_empty_rows = [row for row in readme.split("\n") if row != ""]
if (len(non_empty_rows) < 5):
log(verbose, "ERROR", "Readme file has very few lines")
if (llm):
code = fetch_code(zip)
if (llm):
summary = llm.predict("HELP", f"{code}\nBased on the readme file above can you give a quick summary of this repository? Please use references to file names on the repository.")
log(verbose, "LOG", f"Based on the code, your readme file could be something like...\n{summary}")
return overall
if (count_code_lines(non_empty_rows) > 5):
log(verbose, "LOG", "Readme file contains python examples.")
overall = "Yes"
return overall
if (llm):
prompt = f'{readme}\n \
Is this README file is enough to find what \
package dependencies you need to install and how to train \
and evaluate the proposed model?'
llm.predict("HELP", prompt)
manual_fail = False
if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
log(verbose, "ERROR", "Readme file missing training information")
overall = "No"
if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
log(verbose, "ERROR", "Readme file missing testing information")
overall = "No"
if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
log(verbose, "LOG", "Readme file contains links to examples")
overall = "Yes"
if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
(len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
(len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
log(verbose, "ERROR", "Readme file missing information about package dependencies")
overall = "No"
return overall
def count_comment_lines(lines):
# Initialize counters
single_line_comments = 0
multi_line_comments = 0
in_multiline_comment = False
for line in lines:
stripped_line = line.strip()
# Check for single-line comments
if stripped_line.startswith('#'):
single_line_comments += 1
# Check for multi-line comment (docstring) start or end
if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
if not in_multiline_comment:
# Starting a new multi-line comment
in_multiline_comment = True
multi_line_comments += 1 # Count the start line itself
else:
# Ending an existing multi-line comment
in_multiline_comment = False
multi_line_comments += 1 # Count the end line itself
elif in_multiline_comment:
# Continue counting lines within a multi-line comment
multi_line_comments += 1
return single_line_comments, multi_line_comments
def get_code_to_comment_ratio(zip):
python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))]
code_line_count = 1
comment_line_count = 0
for file in python_files:
file_lines = zip.open(file).read().decode("utf-8").split('\n')
sl_comm, ml_comm = count_comment_lines(file_lines)
comment_line_count += sl_comm + ml_comm
code_line_count += len(file_lines) - (sl_comm + ml_comm)
code_to_comment_ratio = 100 * comment_line_count / code_line_count
return code_to_comment_ratio
def count_code_lines(lines):
is_code_snippet = False
code_line_count = 0
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith('```'):
if not is_code_snippet:
is_code_snippet = True
code_line_count += 1
else:
is_code_snippet = False
code_line_count += 1
elif is_code_snippet:
code_line_count += 1
return int(code_line_count / 2)
|