Ubuntu commited on
Commit
b2000f8
·
1 Parent(s): dc7ac95

fixed memory issues

Browse files
Files changed (2) hide show
  1. model_handler.py +12 -3
  2. requirements.txt +1 -0
model_handler.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
 
4
 
5
  def load_model_and_tokenizer():
6
  """Load the fine-tuned model and tokenizer, ensuring CPU compatibility."""
@@ -9,18 +10,26 @@ def load_model_and_tokenizer():
9
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
10
  tokenizer.pad_token = tokenizer.eos_token
11
 
12
- # Load base model on CPU
 
 
 
 
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  base_model_name,
15
  torch_dtype=torch.float32, # Use float32 for CPU
16
- device_map="auto"
 
 
 
17
  )
18
 
19
  # Load adapter weights
20
  model = PeftModel.from_pretrained(
21
  base_model,
22
  "phi2-grpo-qlora-final",
23
- device_map="auto"
 
24
  )
25
 
26
  # Set to evaluation mode
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
+ import os
5
 
6
  def load_model_and_tokenizer():
7
  """Load the fine-tuned model and tokenizer, ensuring CPU compatibility."""
 
10
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
+ # Create offload directory if it doesn't exist
14
+ offload_dir = "offload_dir"
15
+ os.makedirs(offload_dir, exist_ok=True)
16
+
17
+ # Load base model with 8-bit quantization to reduce memory usage
18
  base_model = AutoModelForCausalLM.from_pretrained(
19
  base_model_name,
20
  torch_dtype=torch.float32, # Use float32 for CPU
21
+ device_map="auto",
22
+ offload_folder=offload_dir, # Add offload directory
23
+ load_in_8bit=True, # Use 8-bit quantization
24
+ low_cpu_mem_usage=True # Optimize for low memory
25
  )
26
 
27
  # Load adapter weights
28
  model = PeftModel.from_pretrained(
29
  base_model,
30
  "phi2-grpo-qlora-final",
31
+ device_map="auto",
32
+ offload_folder=offload_dir # Add offload directory
33
  )
34
 
35
  # Set to evaluation mode
requirements.txt CHANGED
@@ -3,3 +3,4 @@ torch>=2.0.0
3
  transformers>=4.30.0
4
  peft>=0.4.0
5
  accelerate>=0.20.0
 
 
3
  transformers>=4.30.0
4
  peft>=0.4.0
5
  accelerate>=0.20.0
6
+ bitsandbytes>=0.39.0