darrenphodgson76's picture
Update train.py
e747724 verified
raw
history blame
2.56 kB
# βœ… Final train.py with JSONL input and EOS-as-pad (no stop_sequences)
import unsloth # must be first
import pandas as pd
import torch
from datasets import Dataset
from transformers import TrainingArguments
from unsloth import FastLanguageModel
from trl import SFTTrainer
import os
import zipfile
# 1) Load Unsloth model + tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="HuggingFaceTB/SmolLM2-1.7B",
max_seq_length=2048,
dtype=torch.float16,
load_in_4bit=True,
)
# 2) Reuse the existing eos_token as pad_token
eos = tokenizer.eos_token # should be "<|endoftext|>"
tokenizer.pad_token = eos
model.config.pad_token_id = tokenizer.eos_token_id
# 3) Load & format your dataset from JSONL, always ending responses with EOS
# Read the JSONL we generated (one JSON object per line with "instruction" & "response")
df = pd.read_json("data.jsonl", lines=True)
df["text"] = df.apply(
lambda row: (
"### Instruction:\n"
+ row["instruction"].strip()
+ "\n\n### Response:\n"
+ row["response"].strip()
+ eos
),
axis=1
)
dataset = Dataset.from_pandas(df[["text"]])
# 4) Apply LoRA
model = FastLanguageModel.get_peft_model(
model,
r=8,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
)
# 5) Tokenize
def tokenize(example):
return tokenizer(
example["text"],
truncation=True,
padding="max_length",
max_length=512,
)
tokenized_dataset = dataset.map(tokenize, batched=True)
# 6) Training arguments
training_args = TrainingArguments(
output_dir="./output_model",
per_device_train_batch_size=2,
num_train_epochs=3,
learning_rate=2e-4,
logging_steps=10,
save_steps=100,
fp16=True,
)
# 7) Initialize SFTTrainer (no stop_sequences here)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
args=training_args,
train_dataset=tokenized_dataset,
)
# 8) Train!
trainer.train()
# 9) Save the adapter
output_dir = "./output_model"
os.makedirs(output_dir, exist_ok=True)
model.save_pretrained(output_dir)
# πŸ”§ Zip for download
zip_path = "/home/user/app/model.zip"
try:
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
for root, _, files in os.walk(output_dir):
for fname in files:
full = os.path.join(root, fname)
rel = os.path.relpath(full, output_dir)
z.write(full, rel)
print(f"βœ… Zipped model to {zip_path}")
except Exception as e:
print(f"❌ Failed to zip model: {e}")