Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
import os | |
system_messages = { "STRICT": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. | |
Strictly answer the questions with "Yes" or "No". | |
Don't use any punctuation either.""", | |
"HELP": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. | |
Please help me answer the following question. | |
Keep your answers short, and informative. | |
Your answer should be a single paragraph.""", | |
"PITFALL": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. | |
You are looking for common pitfalls in the code. More specifically please consider the follwing pitfalls: | |
Please explain if you find any design-flaws with regards to the data collection in the code.")) | |
Please explain if you find signs of dataset shift in the code (e.g. sampling bias, imbalanced populations, imbalanced labels, non-stationary environments).")) | |
Please explain if you find any confounders in the code.")) | |
Please explain if you find any measurement errors in the code (labelling mistakes, noisy measurements, inappropriate proxies)")) | |
Please explain if you find signs of historical biases in the data used.")) | |
Please explain if you find signs of information leaking between the training and testing data.")) | |
Please explain if you find a model-problem mismatch (e.g. over-complicated/simplistic model, computational challenges)")) | |
Please explain if you find any signs of overfitting in the code (e.g. high variance, high complexity, low bias).")) | |
Please explain if you find any misused metrics in the code (e.g. poor metric selection, poor implementations)")) | |
Please explain if you find any signs of black box models in the code (e.g. lack of interpretability, lack of transparency)")) | |
Please explain if you find any signs of baseline comparison issues in the code (e.g. if the testing data does not fit the training data)")) | |
Please explain if you find any signs of insufficient reporting in the code (e.g. missing hyperparameters, missing evaluation metrics)")) | |
Please explain if you find signs of faulty interpretations of the reported results. | |
If you don't find anything concerning, please return an empty string.""" } | |
class LocalLLM(): | |
def __init__(self, model_name): | |
self.pipe = pipeline("text-generation", model=model_name, max_new_tokens=1000, device_map={0: 0}) | |
def predict(self, response_type, prompt): | |
messages = [ | |
{"role": "system", "content": system_messages[response_type]}, | |
{"role": "user", "content": prompt}, | |
] | |
res = self.pipe(messages) | |
res = res[0]["generated_text"] | |
res = [response for response in res if response["role"] == "assistant"][0]["content"] | |
res = res.strip() | |
return res | |
class RemoteLLM(): | |
def __init__(self, model_name): | |
token = os.getenv("hfToken") | |
self.model_name = model_name | |
self.client = InferenceClient(api_key=token) | |
def predict(self, response_type, prompt): | |
message = self.client.chat_completion( | |
model=self.model_name, max_tokens=500, stream=False, | |
messages=[{"role": "system", "content": system_messages[response_type]}, | |
{"role": "user", "content": prompt}]) | |
return message['choices'][0]['message']['content'] |