Update train.py
Browse files
train.py
CHANGED
@@ -10,12 +10,6 @@ import os
|
|
10 |
import shutil
|
11 |
import zipfile
|
12 |
|
13 |
-
# Load and format your dataset
|
14 |
-
df = pd.read_csv("data.csv")
|
15 |
-
eos_token = tokenizer.eos_token or "</s>" # Fallback if eos_token is None
|
16 |
-
df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']} {eos_token}", axis=1)
|
17 |
-
dataset = Dataset.from_pandas(df[["text"]])
|
18 |
-
|
19 |
# Load Unsloth model
|
20 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
21 |
model_name = "HuggingFaceTB/SmolLM2-1.7B",
|
@@ -24,6 +18,14 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
24 |
load_in_4bit = True,
|
25 |
)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Apply LoRA without task_type
|
28 |
model = FastLanguageModel.get_peft_model(
|
29 |
model,
|
|
|
10 |
import shutil
|
11 |
import zipfile
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# Load Unsloth model
|
14 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
15 |
model_name = "HuggingFaceTB/SmolLM2-1.7B",
|
|
|
18 |
load_in_4bit = True,
|
19 |
)
|
20 |
|
21 |
+
# Get eos_token after tokenizer is loaded
|
22 |
+
eos_token = tokenizer.eos_token or "</s>"# Fallback if eos_token is None
|
23 |
+
|
24 |
+
# Load and format your dataset
|
25 |
+
df = pd.read_csv("data.csv")
|
26 |
+
df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']} {eos_token}", axis=1)
|
27 |
+
dataset = Dataset.from_pandas(df[["text"]])
|
28 |
+
|
29 |
# Apply LoRA without task_type
|
30 |
model = FastLanguageModel.get_peft_model(
|
31 |
model,
|