lakshyaag commited on
Commit
b644180
·
1 Parent(s): a2ae02a

update chainlit app

Browse files
Files changed (2) hide show
  1. app.py +32 -41
  2. requirements.txt +4 -1
app.py CHANGED
@@ -4,10 +4,11 @@ import chainlit as cl # importing chainlit for our app
4
  import torch
5
  from transformers import (
6
  AutoTokenizer,
7
- AutoConfig,
8
  AutoModelForCausalLM,
9
  BitsAndBytesConfig,
10
  )
 
 
11
  import bitsandbytes as bnb
12
 
13
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
@@ -15,9 +16,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
15
 
16
  # Prompt Templates
17
  INSTRUCTION_PROMPT_TEMPLATE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
18
-
19
  Please convert the following legal content into a human-readable summary<|eot_id|><|start_header_id|>user<|end_header_id|>
20
-
21
  [LEGAL_DOC]
22
  {input}
23
  [END_LEGAL_DOC]<|eot_id|><|start_header_id|>assistant<|end_header_id|>
@@ -27,19 +26,42 @@ RESPONSE_TEMPLATE = """
27
  {summary}<|eot_id|>
28
  """
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def create_prompt(sample, include_response=False):
32
  """
33
  Parameters:
34
  - sample: dict representing row of dataset
35
  - include_response: bool
36
-
37
  Functionality:
38
  This function should build the Python str `full_prompt`.
39
-
40
  If `include_response` is true, it should include the summary -
41
  else it should not contain the summary (useful for prompting) and testing
42
-
43
  Returns:
44
  - full_prompt: str
45
  """
@@ -54,46 +76,15 @@ def create_prompt(sample, include_response=False):
54
  return full_prompt
55
 
56
 
57
- @cl.on_chat_start
58
- async def start_chat():
59
- bnb_config = BitsAndBytesConfig(
60
- load_in_4bit=True,
61
- bnb_4bit_quant_type="nf4",
62
- bnb_4bit_use_double_quant=True,
63
- bnb_4bit_compute_dtype=torch.float16,
64
- )
65
-
66
- model_id = "lakshyaag/llama38binstruct_summarize"
67
-
68
- model = AutoModelForCausalLM.from_pretrained(
69
- model_id,
70
- quantization_config=bnb_config,
71
- device_map="auto",
72
- )
73
-
74
- # Move model to GPU if available
75
- if torch.cuda.is_available():
76
- model = model.to("cuda")
77
-
78
- tokenizer = AutoTokenizer.from_pretrained(model_id)
79
-
80
- tokenizer.pad_token = tokenizer.eos_token
81
- tokenizer.padding_side = "right"
82
-
83
- cl.user_session.set("model", model)
84
- cl.user_session.set("tokenizer", tokenizer)
85
-
86
-
87
  @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
88
  async def main(message: cl.Message):
89
- model = cl.user_session.get("model")
90
- tokenizer = cl.user_session.get("tokenizer")
91
 
92
  # convert str input into tokenized input
93
- encoded_input = tokenizer(message, return_tensors="pt")
94
 
95
  # send the tokenized inputs to our GPU
96
- model_inputs = encoded_input.to("cuda")
97
 
98
  # generate response and set desired generation parameters
99
  generated_ids = model.generate(
@@ -104,7 +95,7 @@ async def main(message: cl.Message):
104
  )
105
 
106
  # decode output from tokenized output to str output
107
- decoded_output = tokenizer.batch_decode(generated_ids)
108
 
109
  # return only the generated response (not the prompt) as output
110
  response = decoded_output[0].split("<|end_header_id|>")[-1]
 
4
  import torch
5
  from transformers import (
6
  AutoTokenizer,
 
7
  AutoModelForCausalLM,
8
  BitsAndBytesConfig,
9
  )
10
+ from peft import PeftModel, PeftConfig
11
+
12
  import bitsandbytes as bnb
13
 
14
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
16
 
17
  # Prompt Templates
18
  INSTRUCTION_PROMPT_TEMPLATE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 
19
  Please convert the following legal content into a human-readable summary<|eot_id|><|start_header_id|>user<|end_header_id|>
 
20
  [LEGAL_DOC]
21
  {input}
22
  [END_LEGAL_DOC]<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
26
  {summary}<|eot_id|>
27
  """
28
 
29
+ bnb_config = BitsAndBytesConfig(
30
+ load_in_4bit=True,
31
+ bnb_4bit_quant_type="nf4",
32
+ bnb_4bit_use_double_quant=True,
33
+ bnb_4bit_compute_dtype=torch.float16,
34
+ )
35
+
36
+ config = PeftConfig.from_pretrained("lakshyaag/llama38binstruct_summarize")
37
+
38
+ base_model = AutoModelForCausalLM.from_pretrained(
39
+ "NousResearch/Meta-Llama-3-8B-Instruct",
40
+ quantization_config=bnb_config,
41
+ device_map="auto",
42
+ )
43
+
44
+ model = PeftModel.from_pretrained(base_model, "lakshyaag/llama38binstruct_summarize")
45
+
46
+ # Move model to GPU if available
47
+ if torch.cuda.is_available():
48
+ model = model.to("cuda")
49
+
50
+ tokenizer = AutoTokenizer.from_pretrained("lakshyaag/llama38binstruct_summarize")
51
+
52
+ tokenizer.pad_token = tokenizer.eos_token
53
+ tokenizer.padding_side = "right"
54
+
55
 
56
  def create_prompt(sample, include_response=False):
57
  """
58
  Parameters:
59
  - sample: dict representing row of dataset
60
  - include_response: bool
 
61
  Functionality:
62
  This function should build the Python str `full_prompt`.
 
63
  If `include_response` is true, it should include the summary -
64
  else it should not contain the summary (useful for prompting) and testing
 
65
  Returns:
66
  - full_prompt: str
67
  """
 
76
  return full_prompt
77
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
80
  async def main(message: cl.Message):
81
+ prompt = create_prompt({"original_text": message.content}, include_response=False)
 
82
 
83
  # convert str input into tokenized input
84
+ encoded_input = tokenizer(prompt, return_tensors="pt")
85
 
86
  # send the tokenized inputs to our GPU
87
+ model_inputs = encoded_input.to("cuda" if torch.cuda.is_available() else "cpu")
88
 
89
  # generate response and set desired generation parameters
90
  generated_ids = model.generate(
 
95
  )
96
 
97
  # decode output from tokenized output to str output
98
+ decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
99
 
100
  # return only the generated response (not the prompt) as output
101
  response = decoded_output[0].split("<|end_header_id|>")[-1]
requirements.txt CHANGED
@@ -3,4 +3,7 @@ transformers==4.41.2
3
  bitsandbytes==0.43.1
4
  accelerate==0.31.0
5
  tiktoken==0.5.1
6
- python-dotenv==1.0.0
 
 
 
 
3
  bitsandbytes==0.43.1
4
  accelerate==0.31.0
5
  tiktoken==0.5.1
6
+ python-dotenv==1.0.0
7
+ peft==0.11.1
8
+ --extra-index-url https://download.pytorch.org/whl/cu113
9
+ torch