Update train.py
Browse files
train.py
CHANGED
@@ -40,7 +40,7 @@ def main():
|
|
40 |
model.config.pad_token_id = tokenizer.pad_token_id
|
41 |
|
42 |
# 3) Load chat-formatted dataset
|
43 |
-
df = pd.read_json("
|
44 |
# Confirm each sample ends with the end-of-turn token
|
45 |
assert df['text'].str.endswith("<|im_end|>").all(), "Some samples missing end-of-turn token"
|
46 |
|
|
|
40 |
model.config.pad_token_id = tokenizer.pad_token_id
|
41 |
|
42 |
# 3) Load chat-formatted dataset
|
43 |
+
df = pd.read_json("data.jsonl", lines=True)
|
44 |
# Confirm each sample ends with the end-of-turn token
|
45 |
assert df['text'].str.endswith("<|im_end|>").all(), "Some samples missing end-of-turn token"
|
46 |
|