Update train.py
Browse files
train.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# β
Final train.py with EOS-as-pad (no stop_sequences)
|
2 |
import unsloth # must be first
|
3 |
import pandas as pd
|
4 |
import torch
|
@@ -22,8 +22,9 @@ eos = tokenizer.eos_token # should be "<|endoftext|>"
|
|
22 |
tokenizer.pad_token = eos
|
23 |
model.config.pad_token_id = tokenizer.eos_token_id
|
24 |
|
25 |
-
# 3) Load & format your dataset, always ending responses with EOS
|
26 |
-
|
|
|
27 |
df["text"] = df.apply(
|
28 |
lambda row: (
|
29 |
"### Instruction:\n"
|
@@ -94,4 +95,4 @@ try:
|
|
94 |
z.write(full, rel)
|
95 |
print(f"β
Zipped model to {zip_path}")
|
96 |
except Exception as e:
|
97 |
-
print(f"β Failed to zip model: {e}")
|
|
|
1 |
+
# β
Final train.py with JSONL input and EOS-as-pad (no stop_sequences)
|
2 |
import unsloth # must be first
|
3 |
import pandas as pd
|
4 |
import torch
|
|
|
22 |
tokenizer.pad_token = eos
|
23 |
model.config.pad_token_id = tokenizer.eos_token_id
|
24 |
|
25 |
+
# 3) Load & format your dataset from JSONL, always ending responses with EOS
|
26 |
+
# Read the JSONL we generated (one JSON object per line with "instruction" & "response")
|
27 |
+
df = pd.read_json("data.jsonl", lines=True)
|
28 |
df["text"] = df.apply(
|
29 |
lambda row: (
|
30 |
"### Instruction:\n"
|
|
|
95 |
z.write(full, rel)
|
96 |
print(f"β
Zipped model to {zip_path}")
|
97 |
except Exception as e:
|
98 |
+
print(f"β Failed to zip model: {e}")
|