Spaces:

darrenphodgson76
/

SmolLM2-1.7B-Instruct-Bussiness-Analysis

Paused

App Files Files Community

darrenphodgson76 commited on Apr 25

Commit

3c0a089

·

verified ·

1 Parent(s): 2efa3f3

Update train.py

Files changed (1) hide show

train.py +1 -1

train.py CHANGED Viewed

@@ -40,7 +40,7 @@ def main():
     model.config.pad_token_id = tokenizer.pad_token_id
     # 3) Load chat-formatted dataset
-    df = pd.read_json("chat_formatted_data.jsonl", lines=True)
     # Confirm each sample ends with the end-of-turn token
     assert df['text'].str.endswith("<|im_end|>").all(), "Some samples missing end-of-turn token"

     model.config.pad_token_id = tokenizer.pad_token_id
     # 3) Load chat-formatted dataset
+    df = pd.read_json("data.jsonl", lines=True)
     # Confirm each sample ends with the end-of-turn token
     assert df['text'].str.endswith("<|im_end|>").all(), "Some samples missing end-of-turn token"