darrenphodgson76 commited on
Commit
3c0a089
·
verified ·
1 Parent(s): 2efa3f3

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +1 -1
train.py CHANGED
@@ -40,7 +40,7 @@ def main():
40
  model.config.pad_token_id = tokenizer.pad_token_id
41
 
42
  # 3) Load chat-formatted dataset
43
- df = pd.read_json("chat_formatted_data.jsonl", lines=True)
44
  # Confirm each sample ends with the end-of-turn token
45
  assert df['text'].str.endswith("<|im_end|>").all(), "Some samples missing end-of-turn token"
46
 
 
40
  model.config.pad_token_id = tokenizer.pad_token_id
41
 
42
  # 3) Load chat-formatted dataset
43
+ df = pd.read_json("data.jsonl", lines=True)
44
  # Confirm each sample ends with the end-of-turn token
45
  assert df['text'].str.endswith("<|im_end|>").all(), "Some samples missing end-of-turn token"
46