Update train.py
Browse files
train.py
CHANGED
@@ -1,91 +1,87 @@
|
|
1 |
-
|
2 |
-
|
3 |
from unsloth import FastLanguageModel
|
4 |
from trl import SFTTrainer
|
5 |
-
from peft import LoraConfig
|
6 |
from transformers import TrainingArguments
|
|
|
7 |
import torch
|
|
|
|
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
|
17 |
-
|
18 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
19 |
-
model_name=
|
20 |
-
max_seq_length=
|
21 |
-
dtype=
|
22 |
-
load_in_4bit=
|
23 |
)
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
-
model,
|
28 |
-
r=16,
|
29 |
-
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
30 |
-
lora_alpha=16,
|
31 |
-
lora_dropout=0,
|
32 |
-
bias="none",
|
33 |
-
use_gradient_checkpointing=True,
|
34 |
-
random_state=42,
|
35 |
-
use_rslora=False,
|
36 |
-
loftq_config=None,
|
37 |
-
)
|
38 |
|
39 |
-
#
|
40 |
-
def
|
41 |
-
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
|
|
45 |
|
46 |
-
#
|
47 |
training_args = TrainingArguments(
|
48 |
-
output_dir="output",
|
49 |
-
num_train_epochs=
|
50 |
-
per_device_train_batch_size=
|
51 |
-
gradient_accumulation_steps=1,
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
lr_scheduler_type="
|
62 |
-
|
63 |
-
|
64 |
-
seed=42,
|
65 |
)
|
66 |
|
67 |
-
#
|
68 |
trainer = SFTTrainer(
|
69 |
-
model=model,
|
70 |
-
tokenizer=tokenizer,
|
71 |
-
train_dataset=dataset,
|
72 |
-
dataset_text_field="
|
73 |
-
|
74 |
-
|
75 |
-
args=training_args,
|
76 |
)
|
77 |
|
|
|
|
|
78 |
trainer.train()
|
79 |
|
80 |
-
#
|
81 |
-
|
|
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
import os
|
86 |
|
|
|
87 |
try:
|
88 |
-
copytree(
|
89 |
print("β
Model saved to /home/user/app/final_model for download in UI.")
|
90 |
except Exception as e:
|
91 |
print("β οΈ Failed to copy model to visible folder:", str(e))
|
|
|
1 |
+
# === Required early import ===
|
2 |
+
import unsloth
|
3 |
from unsloth import FastLanguageModel
|
4 |
from trl import SFTTrainer
|
|
|
5 |
from transformers import TrainingArguments
|
6 |
+
from datasets import load_dataset
|
7 |
import torch
|
8 |
+
from shutil import copytree
|
9 |
+
import os
|
10 |
|
11 |
+
# === Model and training config ===
|
12 |
+
MODEL_NAME = "unsloth/SmolLM2-1.7B-Instruct" # Change if using another model
|
13 |
+
BATCH_SIZE = 2
|
14 |
+
EPOCHS = 3
|
15 |
+
LR = 2e-4
|
16 |
+
MAX_SEQ_LENGTH = 2048
|
17 |
+
USE_4BIT = True
|
18 |
|
19 |
+
# === Load model ===
|
20 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
21 |
+
model_name = MODEL_NAME,
|
22 |
+
max_seq_length = MAX_SEQ_LENGTH,
|
23 |
+
dtype = torch.float16,
|
24 |
+
load_in_4bit = USE_4BIT,
|
25 |
)
|
26 |
|
27 |
+
# === Load dataset ===
|
28 |
+
dataset = load_dataset("csv", data_files="data.csv")["train"] # Replace with your CSV if needed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
# === Create 'text' column from instruction + response ===
|
31 |
+
def add_text_column(example):
|
32 |
+
example["text"] = example["instruction"] + "\n" + example["response"]
|
33 |
+
return example
|
34 |
+
|
35 |
+
dataset = dataset.map(add_text_column)
|
36 |
|
37 |
+
# === Formatting function for trainer ===
|
38 |
+
def formatting_func(example):
|
39 |
+
return [example["text"]]
|
40 |
|
41 |
+
# === TrainingArguments ===
|
42 |
training_args = TrainingArguments(
|
43 |
+
output_dir = "output",
|
44 |
+
num_train_epochs = EPOCHS,
|
45 |
+
per_device_train_batch_size = BATCH_SIZE,
|
46 |
+
gradient_accumulation_steps = 1,
|
47 |
+
gradient_checkpointing = True,
|
48 |
+
optim = "paged_adamw_8bit",
|
49 |
+
logging_steps = 10,
|
50 |
+
save_strategy = "epoch",
|
51 |
+
learning_rate = LR,
|
52 |
+
bf16 = False,
|
53 |
+
fp16 = True,
|
54 |
+
max_grad_norm = 1.0,
|
55 |
+
warmup_ratio = 0.03,
|
56 |
+
lr_scheduler_type = "linear",
|
57 |
+
disable_tqdm = False,
|
58 |
+
report_to = "none",
|
|
|
59 |
)
|
60 |
|
61 |
+
# === Trainer setup ===
|
62 |
trainer = SFTTrainer(
|
63 |
+
model = model,
|
64 |
+
tokenizer = tokenizer,
|
65 |
+
train_dataset = dataset,
|
66 |
+
dataset_text_field = "text",
|
67 |
+
formatting_func = formatting_func,
|
68 |
+
args = training_args,
|
|
|
69 |
)
|
70 |
|
71 |
+
# === Train ===
|
72 |
+
model = FastLanguageModel.prepare_for_training(model)
|
73 |
trainer.train()
|
74 |
|
75 |
+
# === Save model ===
|
76 |
+
save_dir = "output"
|
77 |
+
final_dir = "/home/user/app/final_model"
|
78 |
|
79 |
+
model.save_pretrained(save_dir, safe_serialization=True)
|
80 |
+
tokenizer.save_pretrained(save_dir)
|
|
|
81 |
|
82 |
+
# === Copy to visible directory ===
|
83 |
try:
|
84 |
+
copytree(save_dir, final_dir, dirs_exist_ok=True)
|
85 |
print("β
Model saved to /home/user/app/final_model for download in UI.")
|
86 |
except Exception as e:
|
87 |
print("β οΈ Failed to copy model to visible folder:", str(e))
|