File size: 1,514 Bytes
785db18 bce314a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import unsloth # must be first
import pandas as pd
import torch
from datasets import Dataset
from transformers import TrainingArguments
from unsloth import FastLanguageModel
from trl import SFTTrainer # ✅ now works because we added 'trl'
# Load and format your dataset
df = pd.read_csv("data.csv")
df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']}\n", axis=1)
dataset = Dataset.from_pandas(df[["text"]])
# Load Unsloth model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/SmolLM2-1.7B-Instruct",
max_seq_length = 2048,
dtype = torch.float16,
load_in_4bit = True,
)
# Apply LoRA without task_type
model = FastLanguageModel.get_peft_model(
model,
r = 8,
lora_alpha = 32,
lora_dropout = 0.05,
bias = "none",
)
# Tokenize text
def tokenize(example):
return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
tokenized_dataset = dataset.map(tokenize, batched=True)
# Set up training
training_args = TrainingArguments(
output_dir = "./lora-finetuned",
per_device_train_batch_size = 2,
num_train_epochs = 3,
learning_rate = 2e-4,
logging_steps = 10,
save_steps = 100,
fp16 = True,
)
# Train
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
args = training_args,
train_dataset = tokenized_dataset,
)
trainer.train()
# Save the fine-tuned LoRA adapter
model.save_pretrained("./lora-finetuned") |