Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from .utils import log,fetch_code | |
import re | |
import numpy as np | |
def is_applicable(verbose, llm, readme): | |
res_training = "NA" | |
res_evaluation = "NA" | |
res_weights = "NA" | |
if (llm): | |
log(verbose, "TITLE", "\nChecking what parts of the evaluations are applicable...") | |
res_training = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for training a model?") | |
res_evaluation = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for evaluating a model?") | |
res_weights = llm.predict("STRICT", f"{readme}\nBased on the readme above, should the repository contain code for loading pre-trained weights?") | |
applicable = f"{res_training}/{res_evaluation}/{res_weights}" | |
return applicable | |
def evaluate(verbose, llm, zip, readme): | |
log(verbose, "TITLE", "\nEvaluating code documentation...") | |
overall = "No" | |
code_to_comment_ratio = get_code_to_comment_ratio(zip) | |
log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.") | |
if (readme): | |
non_empty_rows = [row for row in readme.split("\n") if row != ""] | |
if (len(non_empty_rows) < 5): | |
log(verbose, "ERROR", "Readme file has very few lines") | |
if (llm): | |
code = fetch_code(zip) | |
if (llm): | |
summary = llm.predict("HELP", f"{code}\nBased on the readme file above can you give a quick summary of this repository? Please use references to file names on the repository.") | |
log(verbose, "LOG", f"Based on the code, your readme file could be something like...\n{summary}") | |
return overall | |
if (count_code_lines(non_empty_rows) > 5): | |
log(verbose, "LOG", "Readme file contains python examples.") | |
overall = "Yes" | |
return overall | |
if (llm): | |
prompt = f'{readme}\n \ | |
Is this README file is enough to find what \ | |
package dependencies you need to install and how to train \ | |
and evaluate the proposed model?' | |
llm.predict("HELP", prompt) | |
manual_fail = False | |
if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)): | |
log(verbose, "ERROR", "Readme file missing training information") | |
overall = "No" | |
if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)): | |
log(verbose, "ERROR", "Readme file missing testing information") | |
overall = "No" | |
if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)): | |
log(verbose, "LOG", "Readme file contains links to examples") | |
overall = "Yes" | |
if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \ | |
(len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \ | |
(len(re.findall("requirement", readme, re.IGNORECASE)) == 0)): | |
log(verbose, "ERROR", "Readme file missing information about package dependencies") | |
overall = "No" | |
return overall | |
def count_comment_lines(lines): | |
# Initialize counters | |
single_line_comments = 0 | |
multi_line_comments = 0 | |
in_multiline_comment = False | |
for line in lines: | |
stripped_line = line.strip() | |
# Check for single-line comments | |
if stripped_line.startswith('#'): | |
single_line_comments += 1 | |
# Check for multi-line comment (docstring) start or end | |
if stripped_line.startswith('"""') or stripped_line.startswith("'''"): | |
if not in_multiline_comment: | |
# Starting a new multi-line comment | |
in_multiline_comment = True | |
multi_line_comments += 1 # Count the start line itself | |
else: | |
# Ending an existing multi-line comment | |
in_multiline_comment = False | |
multi_line_comments += 1 # Count the end line itself | |
elif in_multiline_comment: | |
# Continue counting lines within a multi-line comment | |
multi_line_comments += 1 | |
return single_line_comments, multi_line_comments | |
def get_code_to_comment_ratio(zip): | |
python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))] | |
code_line_count = 1 | |
comment_line_count = 0 | |
for file in python_files: | |
file_lines = zip.open(file).read().decode("utf-8").split('\n') | |
sl_comm, ml_comm = count_comment_lines(file_lines) | |
comment_line_count += sl_comm + ml_comm | |
code_line_count += len(file_lines) - (sl_comm + ml_comm) | |
code_to_comment_ratio = 100 * comment_line_count / code_line_count | |
return code_to_comment_ratio | |
def count_code_lines(lines): | |
is_code_snippet = False | |
code_line_count = 0 | |
for line in lines: | |
stripped_line = line.strip() | |
if stripped_line.startswith('```'): | |
if not is_code_snippet: | |
is_code_snippet = True | |
code_line_count += 1 | |
else: | |
is_code_snippet = False | |
code_line_count += 1 | |
elif is_code_snippet: | |
code_line_count += 1 | |
return int(code_line_count / 2) | |