Spaces:

lakshyaag
/

w3d1-legal-summarizer-app

Paused

App Files Files Community

lakshyaag commited on Jun 15, 2024

Commit

b644180

1 Parent(s): a2ae02a

update chainlit app

Browse files

Files changed (2) hide show

app.py +32 -41
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -4,10 +4,11 @@ import chainlit as cl  # importing chainlit for our app
 import torch
 from transformers import (
     AutoTokenizer,
-    AutoConfig,
     AutoModelForCausalLM,
     BitsAndBytesConfig,
 )
 import bitsandbytes as bnb
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
@@ -15,9 +16,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 # Prompt Templates
 INSTRUCTION_PROMPT_TEMPLATE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 Please convert the following legal content into a human-readable summary<|eot_id|><|start_header_id|>user<|end_header_id|>
 [LEGAL_DOC]
 {input}
 [END_LEGAL_DOC]<|eot_id|><|start_header_id|>assistant<|end_header_id|>
@@ -27,19 +26,42 @@ RESPONSE_TEMPLATE = """
 {summary}<|eot_id|>
 """
 def create_prompt(sample, include_response=False):
     """
     Parameters:
       - sample: dict representing row of dataset
       - include_response: bool
     Functionality:
       This function should build the Python str `full_prompt`.
       If `include_response` is true, it should include the summary -
       else it should not contain the summary (useful for prompting) and testing
     Returns:
       - full_prompt: str
     """
@@ -54,46 +76,15 @@ def create_prompt(sample, include_response=False):
     return full_prompt
-@cl.on_chat_start
-async def start_chat():
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_use_double_quant=True,
-        bnb_4bit_compute_dtype=torch.float16,
-    )
-    model_id = "lakshyaag/llama38binstruct_summarize"
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        quantization_config=bnb_config,
-        device_map="auto",
-    )
-    # Move model to GPU if available
-    if torch.cuda.is_available():
-        model = model.to("cuda")
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.padding_side = "right"
-    cl.user_session.set("model", model)
-    cl.user_session.set("tokenizer", tokenizer)
 @cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def main(message: cl.Message):
-    model = cl.user_session.get("model")
-    tokenizer = cl.user_session.get("tokenizer")
     # convert str input into tokenized input
-    encoded_input = tokenizer(message, return_tensors="pt")
     # send the tokenized inputs to our GPU
-    model_inputs = encoded_input.to("cuda")
     # generate response and set desired generation parameters
     generated_ids = model.generate(
@@ -104,7 +95,7 @@ async def main(message: cl.Message):
     )
     # decode output from tokenized output to str output
-    decoded_output = tokenizer.batch_decode(generated_ids)
     # return only the generated response (not the prompt) as output
     response = decoded_output[0].split("<|end_header_id|>")[-1]

 import torch
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     BitsAndBytesConfig,
 )
+from peft import PeftModel, PeftConfig
 import bitsandbytes as bnb
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 # Prompt Templates
 INSTRUCTION_PROMPT_TEMPLATE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 Please convert the following legal content into a human-readable summary<|eot_id|><|start_header_id|>user<|end_header_id|>
 [LEGAL_DOC]
 {input}
 [END_LEGAL_DOC]<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 {summary}<|eot_id|>
 """
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.float16,
+)
+config = PeftConfig.from_pretrained("lakshyaag/llama38binstruct_summarize")
+base_model = AutoModelForCausalLM.from_pretrained(
+    "NousResearch/Meta-Llama-3-8B-Instruct",
+    quantization_config=bnb_config,
+    device_map="auto",
+)
+model = PeftModel.from_pretrained(base_model, "lakshyaag/llama38binstruct_summarize")
+# Move model to GPU if available
+if torch.cuda.is_available():
+    model = model.to("cuda")
+tokenizer = AutoTokenizer.from_pretrained("lakshyaag/llama38binstruct_summarize")
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.padding_side = "right"
 def create_prompt(sample, include_response=False):
     """
     Parameters:
       - sample: dict representing row of dataset
       - include_response: bool
     Functionality:
       This function should build the Python str `full_prompt`.
       If `include_response` is true, it should include the summary -
       else it should not contain the summary (useful for prompting) and testing
     Returns:
       - full_prompt: str
     """
     return full_prompt
 @cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def main(message: cl.Message):
+    prompt = create_prompt({"original_text": message.content}, include_response=False)
     # convert str input into tokenized input
+    encoded_input = tokenizer(prompt, return_tensors="pt")
     # send the tokenized inputs to our GPU
+    model_inputs = encoded_input.to("cuda" if torch.cuda.is_available() else "cpu")
     # generate response and set desired generation parameters
     generated_ids = model.generate(
     )
     # decode output from tokenized output to str output
+    decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
     # return only the generated response (not the prompt) as output
     response = decoded_output[0].split("<|end_header_id|>")[-1]

requirements.txt CHANGED Viewed

@@ -3,4 +3,7 @@ transformers==4.41.2
 bitsandbytes==0.43.1
 accelerate==0.31.0
 tiktoken==0.5.1
-python-dotenv==1.0.0

 bitsandbytes==0.43.1
 accelerate==0.31.0
 tiktoken==0.5.1
+python-dotenv==1.0.0
+peft==0.11.1
+--extra-index-url https://download.pytorch.org/whl/cu113
+torch