from transformers import pipeline
from huggingface_hub import InferenceClient
import os

system_messages = { "STRICT": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with Yes or No.",
                    "HELP": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Please help me answer the following question." }

class LocalLLM():
  def __init__(self, model_name):
    self.pipe = pipeline("text-generation", model=model_name, max_new_tokens=1000, device_map={0: 0})

  def predict(self, response_type, prompt):
    messages = [
        {"role": "system", "content": system_messages[response_type]},
        {"role": "user", "content": prompt},
    ]
    res = self.pipe(messages)
    res = res[0]["generated_text"]

    res = [response for response in res if response["role"] == "assistant"][0]["content"]
    res = res.strip()

    return res

class RemoteLLM():
  def __init__(self):
    token = os.getenv("hfToken")
    API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
    headers = {"Authorization": f"Bearer {token}", "x-wait-for-model": "true"}

    self.client = InferenceClient(
        "meta-llama/Llama-3.1-8B-Instruct",
        token=token,
    )


  def predict(self, response_type, prompt):
    for message in self.client.chat_completion(
        messages=[{"role": "system", "content": system_messages[response_type]}, 
                  {"role": "user", "content": prompt}],
        max_tokens=500,
        stream=True,
        ):
        return message.choices[0].delta.content

    return ""