Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from .utils import log, fetch_code | |
import re | |
def evaluate(verbose, llm, zip, readme): | |
log(verbose, "TITLE", "\nLooking for common pitfalls (in development)...") | |
code = fetch_code(zip) | |
if (llm): | |
# Pitfall #1 | |
llm.predict("HELP", f"{code}\n Please explain if you find any design-flaws with regards to the data collection in the code.") | |
# Pitfall #2 | |
llm.predict("HELP", f"{code}\n Please explain if you find signs of dataset shift in the code (e.g. sampling bias, imbalanced populations, imbalanced labels, non-stationary environments).") | |
# Pitfall #3 | |
llm.predict("HELP", f"{code}\n Please explain if you find any confounders in the code.") | |
# Pitfall #4 | |
llm.predict("HELP", f"{code}\n Please explain if you find any measurement errors in the code (labelling mistakes, noisy measurements, inappropriate proxies)") | |
# Pitfall #5 | |
llm.predict("HELP", f"{code}\n Please explain if you find signs of historical biases in the data used.") | |
# Pitfall #6 | |
llm.predict("HELP", f"{code}\n Please explain if you find signs of information leaking between the training and testing data.") | |
# Pitfall #7 | |
llm.predict("HELP", f"{code}\n Please explain if you find a model-problem mismatch (e.g. over-complicated/simplistic model, computational challenges)") | |
# Pitfall #8 | |
llm.predict("HELP", f"{code}\n Please explain if you find any signs of overfitting in the code (e.g. high variance, high complexity, low bias).") | |
# Pitfall #9 | |
llm.predict("HELP", f"{code}\n Please explain if you find any misused metrics in the code (e.g. poor metric selection, poor implementations)") | |
# Pitfall #10 | |
llm.predict("HELP", f"{code}\n Please explain if you find any signs of black box models in the code (e.g. lack of interpretability, lack of transparency)") | |
# Pitfall #11 | |
llm.predict("HELP", f"{code}\n Please explain if you find any signs of baseline comparison issues in the code (e.g. if the testing data does not fit the training data)") | |
# Pitfall #12 | |
llm.predict("HELP", f"{code}\n Please explain if you find any signs of insufficient reporting in the code (e.g. missing hyperparameters, missing evaluation metrics)") | |
# Pitfall #13 | |
llm.predict("HELP", f"{code}\n Please explain if you find signs of faulty interpretations of the reported results.") |