File size: 1,514 Bytes
785db18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce314a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import unsloth  # must be first
import pandas as pd
import torch
from datasets import Dataset
from transformers import TrainingArguments
from unsloth import FastLanguageModel
from trl import SFTTrainer  # ✅ now works because we added 'trl'


# Load and format your dataset
df = pd.read_csv("data.csv")
df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']}\n", axis=1)
dataset = Dataset.from_pandas(df[["text"]])

# Load Unsloth model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/SmolLM2-1.7B-Instruct",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
)

# Apply LoRA without task_type
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
)

# Tokenize text
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

# Set up training
training_args = TrainingArguments(
    output_dir = "./lora-finetuned",
    per_device_train_batch_size = 2,
    num_train_epochs = 3,
    learning_rate = 2e-4,
    logging_steps = 10,
    save_steps = 100,
    fp16 = True,
)

# Train
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    args = training_args,
    train_dataset = tokenized_dataset,
)

trainer.train()

# Save the fine-tuned LoRA adapter
model.save_pretrained("./lora-finetuned")