darrenphodgson76 commited on
Commit
b6a51b0
·
verified ·
1 Parent(s): adfdbe2

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +8 -6
train.py CHANGED
@@ -10,12 +10,6 @@ import os
10
  import shutil
11
  import zipfile
12
 
13
- # Load and format your dataset
14
- df = pd.read_csv("data.csv")
15
- eos_token = tokenizer.eos_token or "</s>" # Fallback if eos_token is None
16
- df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']} {eos_token}", axis=1)
17
- dataset = Dataset.from_pandas(df[["text"]])
18
-
19
  # Load Unsloth model
20
  model, tokenizer = FastLanguageModel.from_pretrained(
21
  model_name = "HuggingFaceTB/SmolLM2-1.7B",
@@ -24,6 +18,14 @@ model, tokenizer = FastLanguageModel.from_pretrained(
24
  load_in_4bit = True,
25
  )
26
 
 
 
 
 
 
 
 
 
27
  # Apply LoRA without task_type
28
  model = FastLanguageModel.get_peft_model(
29
  model,
 
10
  import shutil
11
  import zipfile
12
 
 
 
 
 
 
 
13
  # Load Unsloth model
14
  model, tokenizer = FastLanguageModel.from_pretrained(
15
  model_name = "HuggingFaceTB/SmolLM2-1.7B",
 
18
  load_in_4bit = True,
19
  )
20
 
21
+ # Get eos_token after tokenizer is loaded
22
+ eos_token = tokenizer.eos_token or "</s>"# Fallback if eos_token is None
23
+
24
+ # Load and format your dataset
25
+ df = pd.read_csv("data.csv")
26
+ df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']} {eos_token}", axis=1)
27
+ dataset = Dataset.from_pandas(df[["text"]])
28
+
29
  # Apply LoRA without task_type
30
  model = FastLanguageModel.get_peft_model(
31
  model,