Training in progress, step 30, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +49 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:257e824b7f312932cbaba48c576ec017f3e34949732e0318db181ec0e9033eb8
 size 3152296

 version https://git-lfs.github.com/spec/v1
+oid sha256:a55d1d7a6866d3e288d048821f190cd0fce01f30911bb9938596fb0ed9f8f91d
 size 3152296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fd25a243c5c10da794b54c5569edc75ecd66df85121d20f5d6a2b56abb689a5
 size 6332154

 version https://git-lfs.github.com/spec/v1
+oid sha256:dda9e920297dcca2d65596a842b36bcb3be2dc47d68a837fef22fea7ae421aff
 size 6332154

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5a23cf47757f164f2cd7e8f37453fda648bb8645ab8bab7ab72c15aa0a34a05
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:60ef68c51e946e416e5a7596a5efe8023d080de3e0ed1b22446604496dbf1271
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:015707cb16790250630febca682498cb5d3456d5a13443b953687f19dc7d59ed
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2def2cd24154d8cecbaa07c36ae27e5ebb9b7273a78abfea27aa67c480e4ae2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 9.052206039428711,
   "best_model_checkpoint": "miner_id_24/checkpoint-20",
-  "epoch": 0.14678899082568808,
   "eval_steps": 5,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -89,6 +89,50 @@
       "eval_samples_per_second": 284.524,
       "eval_steps_per_second": 71.75,
       "step": 20
     }
   ],
   "logging_steps": 3,
@@ -103,7 +147,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -112,12 +156,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 97193029533696.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 9.052206039428711,
   "best_model_checkpoint": "miner_id_24/checkpoint-20",
+  "epoch": 0.22018348623853212,
   "eval_steps": 5,
+  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 284.524,
       "eval_steps_per_second": 71.75,
       "step": 20
+    },
+    {
+      "epoch": 0.15412844036697249,
+      "grad_norm": 717.9278564453125,
+      "learning_rate": 5.7422070843492734e-05,
+      "loss": 36.0957,
+      "step": 21
+    },
+    {
+      "epoch": 0.1761467889908257,
+      "grad_norm": 787.7925415039062,
+      "learning_rate": 2.7103137257858868e-05,
+      "loss": 38.3509,
+      "step": 24
+    },
+    {
+      "epoch": 0.1834862385321101,
+      "eval_loss": 8.969137191772461,
+      "eval_runtime": 0.4186,
+      "eval_samples_per_second": 274.758,
+      "eval_steps_per_second": 69.287,
+      "step": 25
+    },
+    {
+      "epoch": 0.1981651376146789,
+      "grad_norm": 1338.9857177734375,
+      "learning_rate": 7.022351411174866e-06,
+      "loss": 40.5705,
+      "step": 27
+    },
+    {
+      "epoch": 0.22018348623853212,
+      "grad_norm": 1031.021728515625,
+      "learning_rate": 0.0,
+      "loss": 46.5563,
+      "step": 30
+    },
+    {
+      "epoch": 0.22018348623853212,
+      "eval_loss": 9.053560256958008,
+      "eval_runtime": 0.4208,
+      "eval_samples_per_second": 273.271,
+      "eval_steps_per_second": 68.912,
+      "step": 30
     }
   ],
   "logging_steps": 3,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 141879479894016.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null