Model save

Browse files

Files changed (5) hide show

README.md +2 -4
all_results.json +5 -5
model.safetensors +1 -1
train_results.json +5 -5
trainer_state.json +212 -9

README.md CHANGED Viewed

@@ -1,11 +1,9 @@
 ---
 base_model: Qwen/Qwen2.5-1.5B-Instruct
-datasets: simplescaling/s1K-1.1
 library_name: transformers
 model_name: Qwen2.5-1.5B-Open-R1-Distill
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - sft
 licence: license
@@ -13,7 +11,7 @@ licence: license
 # Model Card for Qwen2.5-1.5B-Open-R1-Distill
-This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the [simplescaling/s1K-1.1](https://huggingface.co/datasets/simplescaling/s1K-1.1) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/224015062-chinese-university-of-hong-kong-shenzhen/huggingface/runs/qy2yg8g6)
 This model was trained with SFT.

 ---
 base_model: Qwen/Qwen2.5-1.5B-Instruct
 library_name: transformers
 model_name: Qwen2.5-1.5B-Open-R1-Distill
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 # Model Card for Qwen2.5-1.5B-Open-R1-Distill
+This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/224015062-chinese-university-of-hong-kong-shenzhen/huggingface/runs/j2a2ufcx)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 170926276608.0,
-    "train_loss": 1.3959611256917317,
-    "train_runtime": 49.4165,
     "train_samples": 1000,
-    "train_samples_per_second": 3.076,
-    "train_steps_per_second": 0.061
 }

 {
+    "total_flos": 8261436702720.0,
+    "train_loss": 1.3481066539369781,
+    "train_runtime": 309.1787,
     "train_samples": 1000,
+    "train_samples_per_second": 29.921,
+    "train_steps_per_second": 0.469
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:228fa871601f6e46f5b0793d77eb47fcfc36df302cdced250fefaa2f0aff6bb0
 size 3087467144

 version https://git-lfs.github.com/spec/v1
+oid sha256:2548c868e72401f8e2a03c6add0193e3bc58c081776254ce0741725677d479fa
 size 3087467144

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 170926276608.0,
-    "train_loss": 1.3959611256917317,
-    "train_runtime": 49.4165,
     "train_samples": 1000,
-    "train_samples_per_second": 3.076,
-    "train_steps_per_second": 0.061
 }

 {
+    "total_flos": 8261436702720.0,
+    "train_loss": 1.3481066539369781,
+    "train_runtime": 309.1787,
     "train_samples": 1000,
+    "train_samples_per_second": 29.921,
+    "train_steps_per_second": 0.469
 }

trainer_state.json CHANGED Viewed

@@ -3,23 +3,226 @@
   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 3,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 1.0,
-      "step": 3,
-      "total_flos": 170926276608.0,
-      "train_loss": 1.3959611256917317,
-      "train_runtime": 49.4165,
-      "train_samples_per_second": 3.076,
-      "train_steps_per_second": 0.061
     }
   ],
   "logging_steps": 5,
-  "max_steps": 3,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
@@ -35,7 +238,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 170926276608.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 145,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 0.034482758620689655,
+      "grad_norm": 2.223879312740725,
+      "learning_rate": 3.125e-05,
+      "loss": 1.6124,
+      "step": 5
+    },
+    {
+      "epoch": 0.06896551724137931,
+      "grad_norm": 1.8593462844920996,
+      "learning_rate": 4.997634107543713e-05,
+      "loss": 1.483,
+      "step": 10
+    },
+    {
+      "epoch": 0.10344827586206896,
+      "grad_norm": 1.2190914332382117,
+      "learning_rate": 4.971074924974395e-05,
+      "loss": 1.4303,
+      "step": 15
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 1.0812975621686058,
+      "learning_rate": 4.915349129238729e-05,
+      "loss": 1.4119,
+      "step": 20
+    },
+    {
+      "epoch": 0.1724137931034483,
+      "grad_norm": 1.0386501468781904,
+      "learning_rate": 4.8311884978689945e-05,
+      "loss": 1.3904,
+      "step": 25
+    },
+    {
+      "epoch": 0.20689655172413793,
+      "grad_norm": 0.8981832087913736,
+      "learning_rate": 4.719698207710602e-05,
+      "loss": 1.3854,
+      "step": 30
+    },
+    {
+      "epoch": 0.2413793103448276,
+      "grad_norm": 0.9863245505204763,
+      "learning_rate": 4.582342322009812e-05,
+      "loss": 1.3706,
+      "step": 35
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 0.8723743249631993,
+      "learning_rate": 4.4209245646929606e-05,
+      "loss": 1.377,
+      "step": 40
+    },
+    {
+      "epoch": 0.3103448275862069,
+      "grad_norm": 0.9389609365886089,
+      "learning_rate": 4.2375646343046135e-05,
+      "loss": 1.354,
+      "step": 45
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.9078155340095996,
+      "learning_rate": 4.034670368644256e-05,
+      "loss": 1.37,
+      "step": 50
+    },
+    {
+      "epoch": 0.3793103448275862,
+      "grad_norm": 0.8030698620747992,
+      "learning_rate": 3.8149061256287007e-05,
+      "loss": 1.374,
+      "step": 55
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.8467334234236363,
+      "learning_rate": 3.581157795594989e-05,
+      "loss": 1.3178,
+      "step": 60
+    },
+    {
+      "epoch": 0.4482758620689655,
+      "grad_norm": 0.8223111597770548,
+      "learning_rate": 3.3364949044936924e-05,
+      "loss": 1.3572,
+      "step": 65
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.8325338686878566,
+      "learning_rate": 3.084130305624209e-05,
+      "loss": 1.3393,
+      "step": 70
+    },
+    {
+      "epoch": 0.5172413793103449,
+      "grad_norm": 0.8266000883686835,
+      "learning_rate": 2.8273779892303337e-05,
+      "loss": 1.2754,
+      "step": 75
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.7826202562945184,
+      "learning_rate": 2.5696095639901996e-05,
+      "loss": 1.3357,
+      "step": 80
+    },
+    {
+      "epoch": 0.5862068965517241,
+      "grad_norm": 0.7673219689160695,
+      "learning_rate": 2.314209981875008e-05,
+      "loss": 1.349,
+      "step": 85
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.7415260497592828,
+      "learning_rate": 2.0645330877869e-05,
+      "loss": 1.3149,
+      "step": 90
+    },
+    {
+      "epoch": 0.6551724137931034,
+      "grad_norm": 0.743723421645456,
+      "learning_rate": 1.8238575776872595e-05,
+      "loss": 1.2909,
+      "step": 95
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.7636915735540463,
+      "learning_rate": 1.5953439435625384e-05,
+      "loss": 1.311,
+      "step": 100
+    },
+    {
+      "epoch": 0.7241379310344828,
+      "grad_norm": 0.7633473936950439,
+      "learning_rate": 1.3819929706157959e-05,
+      "loss": 1.3251,
+      "step": 105
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.7474290479644277,
+      "learning_rate": 1.1866063316886964e-05,
+      "loss": 1.3102,
+      "step": 110
+    },
+    {
+      "epoch": 0.7931034482758621,
+      "grad_norm": 0.7520390376697367,
+      "learning_rate": 1.0117497963783762e-05,
+      "loss": 1.2939,
+      "step": 115
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.7145745995851329,
+      "learning_rate": 8.597195379780726e-06,
+      "loss": 1.3224,
+      "step": 120
+    },
+    {
+      "epoch": 0.8620689655172413,
+      "grad_norm": 0.6935384011256254,
+      "learning_rate": 7.3251198069053465e-06,
+      "loss": 1.2729,
+      "step": 125
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.68511709825691,
+      "learning_rate": 6.317975830732497e-06,
+      "loss": 1.2847,
+      "step": 130
+    },
+    {
+      "epoch": 0.9310344827586207,
+      "grad_norm": 0.6717353850255624,
+      "learning_rate": 5.588989019848609e-06,
+      "loss": 1.2687,
+      "step": 135
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 0.7112591296225297,
+      "learning_rate": 5.147732250916841e-06,
+      "loss": 1.3043,
+      "step": 140
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.7123736228791405,
+      "learning_rate": 5e-06,
+      "loss": 1.2627,
+      "step": 145
+    },
     {
       "epoch": 1.0,
+      "step": 145,
+      "total_flos": 8261436702720.0,
+      "train_loss": 1.3481066539369781,
+      "train_runtime": 309.1787,
+      "train_samples_per_second": 29.921,
+      "train_steps_per_second": 0.469
     }
   ],
   "logging_steps": 5,
+  "max_steps": 145,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 8261436702720.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null