darrenphodgson76 commited on
Commit
12a4ede
Β·
verified Β·
1 Parent(s): ee5d7fb

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +61 -65
train.py CHANGED
@@ -1,91 +1,87 @@
1
- from datasets import load_dataset
2
- from transformers import AutoTokenizer
3
  from unsloth import FastLanguageModel
4
  from trl import SFTTrainer
5
- from peft import LoraConfig
6
  from transformers import TrainingArguments
 
7
  import torch
 
 
8
 
9
- # βœ… Load dataset
10
- dataset = load_dataset("csv", data_files="data.csv", split="train")
11
-
12
- # βœ… Load tokenizer and model
13
- model_name = "unsloth/SmolLM2-1.7B-Instruct"
14
- max_seq_length = 2048
15
- dtype = torch.float16 # Automatically uses bfloat16 if supported
16
 
17
- FastLanguageModel.for_inference = False
18
  model, tokenizer = FastLanguageModel.from_pretrained(
19
- model_name=model_name,
20
- max_seq_length=max_seq_length,
21
- dtype=dtype,
22
- load_in_4bit=True,
23
  )
24
 
25
- # βœ… Apply LoRA
26
- model = FastLanguageModel.get_peft_model(
27
- model,
28
- r=16,
29
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
30
- lora_alpha=16,
31
- lora_dropout=0,
32
- bias="none",
33
- use_gradient_checkpointing=True,
34
- random_state=42,
35
- use_rslora=False,
36
- loftq_config=None,
37
- )
38
 
39
- # βœ… Format prompt
40
- def formatting_func(example):
41
- return [f"<|user|>\n{example['prompt']}\n<|assistant|>\n{example['completion']}"]
 
 
 
42
 
43
- tokenizer.pad_token = tokenizer.eos_token
44
- tokenizer.padding_side = "right"
 
45
 
46
- # βœ… Training args
47
  training_args = TrainingArguments(
48
- output_dir="output",
49
- num_train_epochs=3,
50
- per_device_train_batch_size=2,
51
- gradient_accumulation_steps=1,
52
- warmup_steps=5,
53
- logging_steps=10,
54
- save_strategy="no",
55
- learning_rate=2e-4,
56
- bf16=False, # Use True if bfloat16 is supported
57
- fp16=True, # Enable float16
58
- max_grad_norm=1.0,
59
- max_steps=-1,
60
- gradient_checkpointing=True,
61
- lr_scheduler_type="cosine",
62
- optim="adamw_8bit",
63
- weight_decay=0.01,
64
- seed=42,
65
  )
66
 
67
- # βœ… Trainer
68
  trainer = SFTTrainer(
69
- model=model,
70
- tokenizer=tokenizer,
71
- train_dataset=dataset,
72
- dataset_text_field="prompt",
73
- max_seq_length=max_seq_length,
74
- formatting_func=formatting_func,
75
- args=training_args,
76
  )
77
 
 
 
78
  trainer.train()
79
 
80
- # βœ… Save LoRA adapter
81
- trainer.model.save_pretrained("output")
 
82
 
83
- # βœ… πŸ‘‡ Copy output folder to visible file space so it appears in Hugging Face UI
84
- from shutil import copytree
85
- import os
86
 
 
87
  try:
88
- copytree("output", "/home/user/app/final_model", dirs_exist_ok=True)
89
  print("βœ… Model saved to /home/user/app/final_model for download in UI.")
90
  except Exception as e:
91
  print("⚠️ Failed to copy model to visible folder:", str(e))
 
1
+ # === Required early import ===
2
+ import unsloth
3
  from unsloth import FastLanguageModel
4
  from trl import SFTTrainer
 
5
  from transformers import TrainingArguments
6
+ from datasets import load_dataset
7
  import torch
8
+ from shutil import copytree
9
+ import os
10
 
11
+ # === Model and training config ===
12
+ MODEL_NAME = "unsloth/SmolLM2-1.7B-Instruct" # Change if using another model
13
+ BATCH_SIZE = 2
14
+ EPOCHS = 3
15
+ LR = 2e-4
16
+ MAX_SEQ_LENGTH = 2048
17
+ USE_4BIT = True
18
 
19
+ # === Load model ===
20
  model, tokenizer = FastLanguageModel.from_pretrained(
21
+ model_name = MODEL_NAME,
22
+ max_seq_length = MAX_SEQ_LENGTH,
23
+ dtype = torch.float16,
24
+ load_in_4bit = USE_4BIT,
25
  )
26
 
27
+ # === Load dataset ===
28
+ dataset = load_dataset("csv", data_files="data.csv")["train"] # Replace with your CSV if needed
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # === Create 'text' column from instruction + response ===
31
+ def add_text_column(example):
32
+ example["text"] = example["instruction"] + "\n" + example["response"]
33
+ return example
34
+
35
+ dataset = dataset.map(add_text_column)
36
 
37
+ # === Formatting function for trainer ===
38
+ def formatting_func(example):
39
+ return [example["text"]]
40
 
41
+ # === TrainingArguments ===
42
  training_args = TrainingArguments(
43
+ output_dir = "output",
44
+ num_train_epochs = EPOCHS,
45
+ per_device_train_batch_size = BATCH_SIZE,
46
+ gradient_accumulation_steps = 1,
47
+ gradient_checkpointing = True,
48
+ optim = "paged_adamw_8bit",
49
+ logging_steps = 10,
50
+ save_strategy = "epoch",
51
+ learning_rate = LR,
52
+ bf16 = False,
53
+ fp16 = True,
54
+ max_grad_norm = 1.0,
55
+ warmup_ratio = 0.03,
56
+ lr_scheduler_type = "linear",
57
+ disable_tqdm = False,
58
+ report_to = "none",
 
59
  )
60
 
61
+ # === Trainer setup ===
62
  trainer = SFTTrainer(
63
+ model = model,
64
+ tokenizer = tokenizer,
65
+ train_dataset = dataset,
66
+ dataset_text_field = "text",
67
+ formatting_func = formatting_func,
68
+ args = training_args,
 
69
  )
70
 
71
+ # === Train ===
72
+ model = FastLanguageModel.prepare_for_training(model)
73
  trainer.train()
74
 
75
+ # === Save model ===
76
+ save_dir = "output"
77
+ final_dir = "/home/user/app/final_model"
78
 
79
+ model.save_pretrained(save_dir, safe_serialization=True)
80
+ tokenizer.save_pretrained(save_dir)
 
81
 
82
+ # === Copy to visible directory ===
83
  try:
84
+ copytree(save_dir, final_dir, dirs_exist_ok=True)
85
  print("βœ… Model saved to /home/user/app/final_model for download in UI.")
86
  except Exception as e:
87
  print("⚠️ Failed to copy model to visible folder:", str(e))