Spaces:

darrenphodgson76
/

SmolLM2-1.7B-Instruct-Bussiness-Analysis

Paused

App Files Files Community

darrenphodgson76 commited on Apr 1

Commit

12a4ede

verified ·

1 Parent(s): ee5d7fb

Update train.py

Browse files

Files changed (1) hide show

train.py +61 -65

train.py CHANGED Viewed

@@ -1,91 +1,87 @@
-from datasets import load_dataset
-from transformers import AutoTokenizer
 from unsloth import FastLanguageModel
 from trl import SFTTrainer
-from peft import LoraConfig
 from transformers import TrainingArguments
 import torch
-# ✅ Load dataset
-dataset = load_dataset("csv", data_files="data.csv", split="train")
-# ✅ Load tokenizer and model
-model_name = "unsloth/SmolLM2-1.7B-Instruct"
-max_seq_length = 2048
-dtype = torch.float16  # Automatically uses bfloat16 if supported
-FastLanguageModel.for_inference = False
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=model_name,
-    max_seq_length=max_seq_length,
-    dtype=dtype,
-    load_in_4bit=True,
 )
-# ✅ Apply LoRA
-model = FastLanguageModel.get_peft_model(
-    model,
-    r=16,
-    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
-    lora_alpha=16,
-    lora_dropout=0,
-    bias="none",
-    use_gradient_checkpointing=True,
-    random_state=42,
-    use_rslora=False,
-    loftq_config=None,
-)
-# ✅ Format prompt
-def formatting_func(example):
-    return [f"<|user|>\n{example['prompt']}\n<|assistant|>\n{example['completion']}"]
-tokenizer.pad_token = tokenizer.eos_token
-tokenizer.padding_side = "right"
-# ✅ Training args
 training_args = TrainingArguments(
-    output_dir="output",
-    num_train_epochs=3,
-    per_device_train_batch_size=2,
-    gradient_accumulation_steps=1,
-    warmup_steps=5,
-    logging_steps=10,
-    save_strategy="no",
-    learning_rate=2e-4,
-    bf16=False,  # Use True if bfloat16 is supported
-    fp16=True,   # Enable float16
-    max_grad_norm=1.0,
-    max_steps=-1,
-    gradient_checkpointing=True,
-    lr_scheduler_type="cosine",
-    optim="adamw_8bit",
-    weight_decay=0.01,
-    seed=42,
 )
-# ✅ Trainer
 trainer = SFTTrainer(
-    model=model,
-    tokenizer=tokenizer,
-    train_dataset=dataset,
-    dataset_text_field="prompt",
-    max_seq_length=max_seq_length,
-    formatting_func=formatting_func,
-    args=training_args,
 )
 trainer.train()
-# ✅ Save LoRA adapter
-trainer.model.save_pretrained("output")
-# ✅ 👇 Copy output folder to visible file space so it appears in Hugging Face UI
-from shutil import copytree
-import os
 try:
-    copytree("output", "/home/user/app/final_model", dirs_exist_ok=True)
     print("✅ Model saved to /home/user/app/final_model for download in UI.")
 except Exception as e:
     print("⚠️ Failed to copy model to visible folder:", str(e))

+# === Required early import ===
+import unsloth
 from unsloth import FastLanguageModel
 from trl import SFTTrainer
 from transformers import TrainingArguments
+from datasets import load_dataset
 import torch
+from shutil import copytree
+import os
+# === Model and training config ===
+MODEL_NAME = "unsloth/SmolLM2-1.7B-Instruct"  # Change if using another model
+BATCH_SIZE = 2
+EPOCHS = 3
+LR = 2e-4
+MAX_SEQ_LENGTH = 2048
+USE_4BIT = True
+# === Load model ===
 model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = MODEL_NAME,
+    max_seq_length = MAX_SEQ_LENGTH,
+    dtype = torch.float16,
+    load_in_4bit = USE_4BIT,
 )
+# === Load dataset ===
+dataset = load_dataset("csv", data_files="data.csv")["train"]  # Replace with your CSV if needed
+# === Create 'text' column from instruction + response ===
+def add_text_column(example):
+    example["text"] = example["instruction"] + "\n" + example["response"]
+    return example
+dataset = dataset.map(add_text_column)
+# === Formatting function for trainer ===
+def formatting_func(example):
+    return [example["text"]]
+# === TrainingArguments ===
 training_args = TrainingArguments(
+    output_dir = "output",
+    num_train_epochs = EPOCHS,
+    per_device_train_batch_size = BATCH_SIZE,
+    gradient_accumulation_steps = 1,
+    gradient_checkpointing = True,
+    optim = "paged_adamw_8bit",
+    logging_steps = 10,
+    save_strategy = "epoch",
+    learning_rate = LR,
+    bf16 = False,
+    fp16 = True,
+    max_grad_norm = 1.0,
+    warmup_ratio = 0.03,
+    lr_scheduler_type = "linear",
+    disable_tqdm = False,
+    report_to = "none",
 )
+# === Trainer setup ===
 trainer = SFTTrainer(
+    model = model,
+    tokenizer = tokenizer,
+    train_dataset = dataset,
+    dataset_text_field = "text",
+    formatting_func = formatting_func,
+    args = training_args,
 )
+# === Train ===
+model = FastLanguageModel.prepare_for_training(model)
 trainer.train()
+# === Save model ===
+save_dir = "output"
+final_dir = "/home/user/app/final_model"
+model.save_pretrained(save_dir, safe_serialization=True)
+tokenizer.save_pretrained(save_dir)
+# === Copy to visible directory ===
 try:
+    copytree(save_dir, final_dir, dirs_exist_ok=True)
     print("✅ Model saved to /home/user/app/final_model for download in UI.")
 except Exception as e:
     print("⚠️ Failed to copy model to visible folder:", str(e))