CocoRoF commited on
Commit
41c18d7
·
verified ·
1 Parent(s): 4100cb7

Training in progress, step 162, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca06cfe440d39a1e1fb871aaf6fc6d59c82997564474bfffd9e950e458625b3
3
  size 2362528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567f3dbf4ab3bb972f02a97f4434faa92ca020703b61912d50a8d17145bcb3bb
3
  size 2362528
last-checkpoint/README.md CHANGED
@@ -524,6 +524,18 @@ You can finetune this model on your own dataset.
524
  | 2.7407 | 148 | 0.2397 | - | - |
525
  | 2.7593 | 149 | 0.2448 | - | - |
526
  | 2.7778 | 150 | 0.2431 | 0.0084 | 0.9791 |
 
 
 
 
 
 
 
 
 
 
 
 
527
 
528
  </details>
529
 
 
524
  | 2.7407 | 148 | 0.2397 | - | - |
525
  | 2.7593 | 149 | 0.2448 | - | - |
526
  | 2.7778 | 150 | 0.2431 | 0.0084 | 0.9791 |
527
+ | 2.7963 | 151 | 0.2408 | - | - |
528
+ | 2.8148 | 152 | 0.2394 | - | - |
529
+ | 2.8333 | 153 | 0.2317 | - | - |
530
+ | 2.8519 | 154 | 0.2367 | - | - |
531
+ | 2.8704 | 155 | 0.2375 | - | - |
532
+ | 2.8889 | 156 | 0.2351 | - | - |
533
+ | 2.9074 | 157 | 0.2448 | - | - |
534
+ | 2.9259 | 158 | 0.229 | - | - |
535
+ | 2.9444 | 159 | 0.2274 | - | - |
536
+ | 2.9630 | 160 | 0.2313 | - | - |
537
+ | 2.9815 | 161 | 0.2269 | - | - |
538
+ | 3.0 | 162 | 0.2298 | - | - |
539
 
540
  </details>
541
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c3d6db61b0b7d9d1bae9c4b714748a0c0adac2ef774f2a74f674fcd39f83698
3
  size 735216376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e0b4ac780e12a3493da36fc06dc5bc0e1804d77f6fc3177c7591fbd03d4924
3
  size 735216376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2807a9acc950ff1e7baa887af2e4f7a2e9ba0cf01a47099d2a1023bfc494023a
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58dcd91962a421b3bd1f895143c1cf93caf62cf37d057c669274d148c8f016a2
3
  size 1475248442
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b074fcbd4de7291eb93634857c7ea7aadd27bba5e33cd735740b9c7250484019
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18047793e069e796780116e68c886aefe3991c2b5d10deed7f9aaac307ff5268
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.7777777777777777,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1111,6 +1111,90 @@
1111
  "eval_steps_per_second": 1.088,
1112
  "eval_test_triplet_cosine_accuracy": 0.9791250228881836,
1113
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1114
  }
1115
  ],
1116
  "logging_steps": 1.0,
@@ -1125,7 +1209,7 @@
1125
  "should_evaluate": false,
1126
  "should_log": false,
1127
  "should_save": true,
1128
- "should_training_stop": false
1129
  },
1130
  "attributes": {}
1131
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 25,
6
+ "global_step": 162,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1111
  "eval_steps_per_second": 1.088,
1112
  "eval_test_triplet_cosine_accuracy": 0.9791250228881836,
1113
  "step": 150
1114
+ },
1115
+ {
1116
+ "epoch": 2.7962962962962963,
1117
+ "grad_norm": 0.24518300592899323,
1118
+ "learning_rate": 5.741444866920153e-06,
1119
+ "loss": 0.2408,
1120
+ "step": 151
1121
+ },
1122
+ {
1123
+ "epoch": 2.814814814814815,
1124
+ "grad_norm": 0.3211422264575958,
1125
+ "learning_rate": 5.779467680608365e-06,
1126
+ "loss": 0.2394,
1127
+ "step": 152
1128
+ },
1129
+ {
1130
+ "epoch": 2.8333333333333335,
1131
+ "grad_norm": 0.2589081823825836,
1132
+ "learning_rate": 5.817490494296578e-06,
1133
+ "loss": 0.2317,
1134
+ "step": 153
1135
+ },
1136
+ {
1137
+ "epoch": 2.851851851851852,
1138
+ "grad_norm": 0.26596707105636597,
1139
+ "learning_rate": 5.855513307984791e-06,
1140
+ "loss": 0.2367,
1141
+ "step": 154
1142
+ },
1143
+ {
1144
+ "epoch": 2.8703703703703702,
1145
+ "grad_norm": 0.25816991925239563,
1146
+ "learning_rate": 5.8935361216730046e-06,
1147
+ "loss": 0.2375,
1148
+ "step": 155
1149
+ },
1150
+ {
1151
+ "epoch": 2.888888888888889,
1152
+ "grad_norm": 0.24328885972499847,
1153
+ "learning_rate": 5.931558935361217e-06,
1154
+ "loss": 0.2351,
1155
+ "step": 156
1156
+ },
1157
+ {
1158
+ "epoch": 2.9074074074074074,
1159
+ "grad_norm": 0.2682211995124817,
1160
+ "learning_rate": 5.9695817490494305e-06,
1161
+ "loss": 0.2448,
1162
+ "step": 157
1163
+ },
1164
+ {
1165
+ "epoch": 2.925925925925926,
1166
+ "grad_norm": 0.23873576521873474,
1167
+ "learning_rate": 6.007604562737643e-06,
1168
+ "loss": 0.229,
1169
+ "step": 158
1170
+ },
1171
+ {
1172
+ "epoch": 2.9444444444444446,
1173
+ "grad_norm": 0.23763084411621094,
1174
+ "learning_rate": 6.0456273764258555e-06,
1175
+ "loss": 0.2274,
1176
+ "step": 159
1177
+ },
1178
+ {
1179
+ "epoch": 2.962962962962963,
1180
+ "grad_norm": 0.24440748989582062,
1181
+ "learning_rate": 6.083650190114069e-06,
1182
+ "loss": 0.2313,
1183
+ "step": 160
1184
+ },
1185
+ {
1186
+ "epoch": 2.9814814814814814,
1187
+ "grad_norm": 0.24925145506858826,
1188
+ "learning_rate": 6.121673003802282e-06,
1189
+ "loss": 0.2269,
1190
+ "step": 161
1191
+ },
1192
+ {
1193
+ "epoch": 3.0,
1194
+ "grad_norm": 0.2385304570198059,
1195
+ "learning_rate": 6.159695817490496e-06,
1196
+ "loss": 0.2298,
1197
+ "step": 162
1198
  }
1199
  ],
1200
  "logging_steps": 1.0,
 
1209
  "should_evaluate": false,
1210
  "should_log": false,
1211
  "should_save": true,
1212
+ "should_training_stop": true
1213
  },
1214
  "attributes": {}
1215
  }