from transformers import pipeline from huggingface_hub import InferenceClient import os system_messages = { "STRICT": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with Yes or No.", "HELP": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Please help me answer the following question." } class LocalLLM(): def __init__(self, model_name): self.pipe = pipeline("text-generation", model=model_name, max_new_tokens=1000, device_map={0: 0}) def predict(self, response_type, prompt): messages = [ {"role": "system", "content": system_messages[response_type]}, {"role": "user", "content": prompt}, ] res = self.pipe(messages) res = res[0]["generated_text"] res = [response for response in res if response["role"] == "assistant"][0]["content"] res = res.strip() return res class RemoteLLM(): def __init__(self): token = os.getenv("hfToken") API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2" headers = {"Authorization": f"Bearer {token}", "x-wait-for-model": "true"} self.client = InferenceClient( "meta-llama/Llama-3.1-8B-Instruct", token=token, ) def predict(self, response_type, prompt): for message in self.client.chat_completion( messages=[{"role": "system", "content": system_messages[response_type]}, {"role": "user", "content": prompt}], max_tokens=500, stream=True, ): return message.choices[0].delta.content return ""