|
import unsloth |
|
import pandas as pd |
|
import torch |
|
from datasets import Dataset |
|
from transformers import TrainingArguments |
|
from unsloth import FastLanguageModel |
|
from trl import SFTTrainer |
|
from shutil import copytree |
|
import os |
|
|
|
|
|
df = pd.read_csv("data.csv") |
|
df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']}\n", axis=1) |
|
dataset = Dataset.from_pandas(df[["text"]]) |
|
|
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name = "unsloth/Llama-3.2-3B-Instruct", |
|
max_seq_length = 2048, |
|
dtype = torch.float16, |
|
load_in_4bit = True, |
|
) |
|
|
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r = 8, |
|
lora_alpha = 32, |
|
lora_dropout = 0.05, |
|
bias = "none", |
|
) |
|
|
|
|
|
def tokenize(example): |
|
return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512) |
|
|
|
tokenized_dataset = dataset.map(tokenize, batched=True) |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir = "./lora-finetuned", |
|
per_device_train_batch_size = 2, |
|
num_train_epochs = 3, |
|
learning_rate = 2e-4, |
|
logging_steps = 10, |
|
save_steps = 100, |
|
fp16 = True, |
|
) |
|
|
|
|
|
trainer = SFTTrainer( |
|
model = model, |
|
tokenizer = tokenizer, |
|
args = training_args, |
|
train_dataset = tokenized_dataset, |
|
) |
|
|
|
trainer.train() |
|
|
|
|
|
model.save_pretrained("./lora-finetuned") |
|
|
|
|
|
try: |
|
copytree("./lora-finetuned", "/home/user/app/final_model", dirs_exist_ok=True) |
|
print("✅ Model saved to /home/user/app/final_model for download in UI.") |
|
except Exception as e: |
|
print("⚠️ Failed to copy model to visible folder:", str(e)) |
|
|