Ubuntu
commited on
Commit
·
b2000f8
1
Parent(s):
dc7ac95
fixed memory issues
Browse files- model_handler.py +12 -3
- requirements.txt +1 -0
model_handler.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import torch
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
from peft import PeftModel
|
|
|
4 |
|
5 |
def load_model_and_tokenizer():
|
6 |
"""Load the fine-tuned model and tokenizer, ensuring CPU compatibility."""
|
@@ -9,18 +10,26 @@ def load_model_and_tokenizer():
|
|
9 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
10 |
tokenizer.pad_token = tokenizer.eos_token
|
11 |
|
12 |
-
#
|
|
|
|
|
|
|
|
|
13 |
base_model = AutoModelForCausalLM.from_pretrained(
|
14 |
base_model_name,
|
15 |
torch_dtype=torch.float32, # Use float32 for CPU
|
16 |
-
device_map="auto"
|
|
|
|
|
|
|
17 |
)
|
18 |
|
19 |
# Load adapter weights
|
20 |
model = PeftModel.from_pretrained(
|
21 |
base_model,
|
22 |
"phi2-grpo-qlora-final",
|
23 |
-
device_map="auto"
|
|
|
24 |
)
|
25 |
|
26 |
# Set to evaluation mode
|
|
|
1 |
import torch
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
from peft import PeftModel
|
4 |
+
import os
|
5 |
|
6 |
def load_model_and_tokenizer():
|
7 |
"""Load the fine-tuned model and tokenizer, ensuring CPU compatibility."""
|
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
11 |
tokenizer.pad_token = tokenizer.eos_token
|
12 |
|
13 |
+
# Create offload directory if it doesn't exist
|
14 |
+
offload_dir = "offload_dir"
|
15 |
+
os.makedirs(offload_dir, exist_ok=True)
|
16 |
+
|
17 |
+
# Load base model with 8-bit quantization to reduce memory usage
|
18 |
base_model = AutoModelForCausalLM.from_pretrained(
|
19 |
base_model_name,
|
20 |
torch_dtype=torch.float32, # Use float32 for CPU
|
21 |
+
device_map="auto",
|
22 |
+
offload_folder=offload_dir, # Add offload directory
|
23 |
+
load_in_8bit=True, # Use 8-bit quantization
|
24 |
+
low_cpu_mem_usage=True # Optimize for low memory
|
25 |
)
|
26 |
|
27 |
# Load adapter weights
|
28 |
model = PeftModel.from_pretrained(
|
29 |
base_model,
|
30 |
"phi2-grpo-qlora-final",
|
31 |
+
device_map="auto",
|
32 |
+
offload_folder=offload_dir # Add offload directory
|
33 |
)
|
34 |
|
35 |
# Set to evaluation mode
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ torch>=2.0.0
|
|
3 |
transformers>=4.30.0
|
4 |
peft>=0.4.0
|
5 |
accelerate>=0.20.0
|
|
|
|
3 |
transformers>=4.30.0
|
4 |
peft>=0.4.0
|
5 |
accelerate>=0.20.0
|
6 |
+
bitsandbytes>=0.39.0
|