# ✅ Final train.py with ZIP logic added import unsloth # must be first import pandas as pd import torch from datasets import Dataset from transformers import TrainingArguments from unsloth import FastLanguageModel from trl import SFTTrainer import os import shutil import zipfile # Load and format your dataset df = pd.read_csv("data.csv") df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']}\n", axis=1) dataset = Dataset.from_pandas(df[["text"]]) # Load Unsloth model model, tokenizer = FastLanguageModel.from_pretrained( model_name = "unsloth/Llama-3.2-3B-Instruct", max_seq_length = 2048, dtype = torch.float16, load_in_4bit = True, ) # Apply LoRA without task_type model = FastLanguageModel.get_peft_model( model, r = 8, lora_alpha = 32, lora_dropout = 0.05, bias = "none", ) # Tokenize text def tokenize(example): return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512) tokenized_dataset = dataset.map(tokenize, batched=True) # Set up training training_args = TrainingArguments( output_dir = "./output_model", per_device_train_batch_size = 2, num_train_epochs = 3, learning_rate = 2e-4, logging_steps = 10, save_steps = 100, fp16 = True, ) # Train trainer = SFTTrainer( model = model, tokenizer = tokenizer, args = training_args, train_dataset = tokenized_dataset, ) trainer.train() # Save the fine-tuned LoRA adapter output_dir = "./output_model" os.makedirs(output_dir, exist_ok=True) model.save_pretrained(output_dir) # ✅ Zip it for download zip_path = "/home/user/app/model.zip" try: with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: for root, _, files in os.walk(output_dir): for file in files: full_path = os.path.join(root, file) rel_path = os.path.relpath(full_path, output_dir) zipf.write(full_path, rel_path) print(f"✅ Zipped model to {zip_path}") except Exception as e: print(f"❌ Failed to zip model: {e}")