from transformers import pipeline from huggingface_hub import InferenceClient import os system_messages = { "STRICT": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with "Yes" or "No". Don't use any punctuation either.""", "HELP": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Please help me answer the following question. Keep your answers short, and informative. Your answer should be a single paragraph. If you can't find any issues with the code, return an empty string.""" } class LocalLLM(): def __init__(self, model_name): self.pipe = pipeline("text-generation", model=model_name, max_new_tokens=1000, device_map={0: 0}) def predict(self, response_type, prompt): messages = [ {"role": "system", "content": system_messages[response_type]}, {"role": "user", "content": prompt}, ] res = self.pipe(messages) res = res[0]["generated_text"] res = [response for response in res if response["role"] == "assistant"][0]["content"] res = res.strip() return res class RemoteLLM(): def __init__(self, model_name): token = os.getenv("hfToken") self.model_name = model_name self.client = InferenceClient(api_key=token) def predict(self, response_type, prompt): message = self.client.chat_completion( model=self.model_name, max_tokens=500, stream=False, messages=[{"role": "system", "content": system_messages[response_type]}, {"role": "user", "content": prompt}]) return message['choices'][0]['message']['content']