Spaces:

darrenphodgson76
/

SmolLM2-1.7B-Instruct-Bussiness-Analysis

Paused

App Files Files Community

darrenphodgson76 commited on 5 days ago

Commit

f2467aa

·

verified ·

1 Parent(s): 3c0a089

Update train.py

Files changed (1) hide show

train.py +2 -15

train.py CHANGED Viewed

@@ -11,7 +11,7 @@ from trl import SFTTrainer
 def main():
-    # 1) Load 4-bit model + tokenizer
     model_name = "HuggingFaceTB/SmolLM2-1.7B"
     max_seq_length = 768
     dtype = torch.float16
@@ -24,20 +24,7 @@ def main():
         load_in_4bit=load_in_4bit,
     )
-    # 2) Configure special tokens for chat format
-    special_tokens = {
-        "bos_token": "<|im_start|>user\n",
-        "eos_token": "<|im_end|>",
-        "pad_token": "<|im_end|>",
-        "additional_special_tokens": [
-            "<|im_start|>assistant\n"
-        ]
-    }
-    tokenizer.add_special_tokens(special_tokens)
-    model.resize_token_embeddings(len(tokenizer))
-    model.config.bos_token_id = tokenizer.bos_token_id
-    model.config.eos_token_id = tokenizer.eos_token_id
-    model.config.pad_token_id = tokenizer.pad_token_id
     # 3) Load chat-formatted dataset
     df = pd.read_json("data.jsonl", lines=True)

 def main():
+    # 1) Load 4-bit model + tokenizer (SmolLM already chat-formatted)
     model_name = "HuggingFaceTB/SmolLM2-1.7B"
     max_seq_length = 768
     dtype = torch.float16
         load_in_4bit=load_in_4bit,
     )
+    # 2) NO manual special-tokens injection or resizing — base model tokenizer already includes chat markers
     # 3) Load chat-formatted dataset
     df = pd.read_json("data.jsonl", lines=True)