CocoRoF commited on
Commit
4d8e572
·
verified ·
1 Parent(s): 3b7db0f

Training in progress, step 12495, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f65db42286c6828403e19bd03efcefb25250e1efec73d5d62df88d031877370
3
  size 368988278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b8b80d9f328d9b46b771e57b9c11a0174556d172f3cffdd9f8feecd8a89c97
3
  size 368988278
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa947f13cc7ec696d218b55e64c458be90c9bb7d6f39b0a47978915ec3764eac
3
  size 1107079290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee36ad6289662fa604c98e4b82098bdb5f4ece0f63f769701b009cbd2467264
3
  size 1107079290
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2000996a6fbcd6fb689041f1ea8cf89a2645e18896cfa2a47f022b7761d5c9e
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fd064071c0ab1e33cfa9ed205d6f6c83d6ee49646339c32b79ed5829a3484
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8003091193973673,
5
  "eval_steps": 2500,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7039,6 +7039,1749 @@
7039
  "eval_samples_per_second": 1574.473,
7040
  "eval_steps_per_second": 49.202,
7041
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7042
  }
7043
  ],
7044
  "logging_steps": 10,
@@ -7053,12 +8796,12 @@
7053
  "should_evaluate": false,
7054
  "should_log": false,
7055
  "should_save": true,
7056
- "should_training_stop": false
7057
  },
7058
  "attributes": {}
7059
  }
7060
  },
7061
- "total_flos": 2.761116577746125e+19,
7062
  "train_batch_size": 4,
7063
  "trial_name": null,
7064
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999862446870104,
5
  "eval_steps": 2500,
6
+ "global_step": 12495,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7039
  "eval_samples_per_second": 1574.473,
7040
  "eval_steps_per_second": 49.202,
7041
  "step": 10000
7042
+ },
7043
+ {
7044
+ "epoch": 0.8011094285167646,
7045
+ "grad_norm": 77.1875,
7046
+ "learning_rate": 9.984353338811487e-07,
7047
+ "loss": 130.3064,
7048
+ "step": 10010
7049
+ },
7050
+ {
7051
+ "epoch": 0.8019097376361619,
7052
+ "grad_norm": 74.0,
7053
+ "learning_rate": 9.98433770778133e-07,
7054
+ "loss": 132.0653,
7055
+ "step": 10020
7056
+ },
7057
+ {
7058
+ "epoch": 0.8027100467555593,
7059
+ "grad_norm": 84.875,
7060
+ "learning_rate": 9.984322076751171e-07,
7061
+ "loss": 132.2227,
7062
+ "step": 10030
7063
+ },
7064
+ {
7065
+ "epoch": 0.8035103558749567,
7066
+ "grad_norm": 76.625,
7067
+ "learning_rate": 9.984306445721013e-07,
7068
+ "loss": 130.7247,
7069
+ "step": 10040
7070
+ },
7071
+ {
7072
+ "epoch": 0.8043106649943541,
7073
+ "grad_norm": 75.8125,
7074
+ "learning_rate": 9.984290814690853e-07,
7075
+ "loss": 131.7206,
7076
+ "step": 10050
7077
+ },
7078
+ {
7079
+ "epoch": 0.8051109741137514,
7080
+ "grad_norm": 73.6875,
7081
+ "learning_rate": 9.984275183660696e-07,
7082
+ "loss": 131.013,
7083
+ "step": 10060
7084
+ },
7085
+ {
7086
+ "epoch": 0.8059112832331488,
7087
+ "grad_norm": 72.625,
7088
+ "learning_rate": 9.984259552630538e-07,
7089
+ "loss": 132.8077,
7090
+ "step": 10070
7091
+ },
7092
+ {
7093
+ "epoch": 0.8067115923525462,
7094
+ "grad_norm": 72.375,
7095
+ "learning_rate": 9.98424392160038e-07,
7096
+ "loss": 132.2377,
7097
+ "step": 10080
7098
+ },
7099
+ {
7100
+ "epoch": 0.8075119014719435,
7101
+ "grad_norm": 82.5625,
7102
+ "learning_rate": 9.984228290570222e-07,
7103
+ "loss": 131.6139,
7104
+ "step": 10090
7105
+ },
7106
+ {
7107
+ "epoch": 0.808312210591341,
7108
+ "grad_norm": 75.4375,
7109
+ "learning_rate": 9.984212659540062e-07,
7110
+ "loss": 130.3295,
7111
+ "step": 10100
7112
+ },
7113
+ {
7114
+ "epoch": 0.8091125197107383,
7115
+ "grad_norm": 75.5,
7116
+ "learning_rate": 9.984197028509904e-07,
7117
+ "loss": 131.7549,
7118
+ "step": 10110
7119
+ },
7120
+ {
7121
+ "epoch": 0.8099128288301356,
7122
+ "grad_norm": 77.875,
7123
+ "learning_rate": 9.984181397479747e-07,
7124
+ "loss": 131.4637,
7125
+ "step": 10120
7126
+ },
7127
+ {
7128
+ "epoch": 0.810713137949533,
7129
+ "grad_norm": 74.8125,
7130
+ "learning_rate": 9.984165766449589e-07,
7131
+ "loss": 131.4822,
7132
+ "step": 10130
7133
+ },
7134
+ {
7135
+ "epoch": 0.8115134470689304,
7136
+ "grad_norm": 78.6875,
7137
+ "learning_rate": 9.984150135419429e-07,
7138
+ "loss": 131.1819,
7139
+ "step": 10140
7140
+ },
7141
+ {
7142
+ "epoch": 0.8123137561883278,
7143
+ "grad_norm": 78.0,
7144
+ "learning_rate": 9.98413450438927e-07,
7145
+ "loss": 131.0785,
7146
+ "step": 10150
7147
+ },
7148
+ {
7149
+ "epoch": 0.8131140653077251,
7150
+ "grad_norm": 78.9375,
7151
+ "learning_rate": 9.984118873359113e-07,
7152
+ "loss": 130.286,
7153
+ "step": 10160
7154
+ },
7155
+ {
7156
+ "epoch": 0.8139143744271224,
7157
+ "grad_norm": 78.0625,
7158
+ "learning_rate": 9.984103242328953e-07,
7159
+ "loss": 131.6022,
7160
+ "step": 10170
7161
+ },
7162
+ {
7163
+ "epoch": 0.8147146835465199,
7164
+ "grad_norm": 78.375,
7165
+ "learning_rate": 9.984087611298795e-07,
7166
+ "loss": 131.5716,
7167
+ "step": 10180
7168
+ },
7169
+ {
7170
+ "epoch": 0.8155149926659172,
7171
+ "grad_norm": 74.875,
7172
+ "learning_rate": 9.984071980268638e-07,
7173
+ "loss": 131.0095,
7174
+ "step": 10190
7175
+ },
7176
+ {
7177
+ "epoch": 0.8163153017853145,
7178
+ "grad_norm": 77.875,
7179
+ "learning_rate": 9.98405634923848e-07,
7180
+ "loss": 131.2664,
7181
+ "step": 10200
7182
+ },
7183
+ {
7184
+ "epoch": 0.817115610904712,
7185
+ "grad_norm": 73.5625,
7186
+ "learning_rate": 9.98404071820832e-07,
7187
+ "loss": 131.8767,
7188
+ "step": 10210
7189
+ },
7190
+ {
7191
+ "epoch": 0.8179159200241093,
7192
+ "grad_norm": 79.8125,
7193
+ "learning_rate": 9.984025087178162e-07,
7194
+ "loss": 128.864,
7195
+ "step": 10220
7196
+ },
7197
+ {
7198
+ "epoch": 0.8187162291435067,
7199
+ "grad_norm": 77.375,
7200
+ "learning_rate": 9.984009456148004e-07,
7201
+ "loss": 130.4429,
7202
+ "step": 10230
7203
+ },
7204
+ {
7205
+ "epoch": 0.819516538262904,
7206
+ "grad_norm": 70.8125,
7207
+ "learning_rate": 9.983993825117846e-07,
7208
+ "loss": 128.6023,
7209
+ "step": 10240
7210
+ },
7211
+ {
7212
+ "epoch": 0.8203168473823014,
7213
+ "grad_norm": 74.4375,
7214
+ "learning_rate": 9.983978194087689e-07,
7215
+ "loss": 130.9503,
7216
+ "step": 10250
7217
+ },
7218
+ {
7219
+ "epoch": 0.8211171565016988,
7220
+ "grad_norm": 80.125,
7221
+ "learning_rate": 9.983962563057529e-07,
7222
+ "loss": 132.8031,
7223
+ "step": 10260
7224
+ },
7225
+ {
7226
+ "epoch": 0.8219174656210961,
7227
+ "grad_norm": 73.8125,
7228
+ "learning_rate": 9.98394693202737e-07,
7229
+ "loss": 130.115,
7230
+ "step": 10270
7231
+ },
7232
+ {
7233
+ "epoch": 0.8227177747404936,
7234
+ "grad_norm": 73.75,
7235
+ "learning_rate": 9.983931300997213e-07,
7236
+ "loss": 130.5055,
7237
+ "step": 10280
7238
+ },
7239
+ {
7240
+ "epoch": 0.8235180838598909,
7241
+ "grad_norm": 74.0,
7242
+ "learning_rate": 9.983915669967053e-07,
7243
+ "loss": 130.6606,
7244
+ "step": 10290
7245
+ },
7246
+ {
7247
+ "epoch": 0.8243183929792882,
7248
+ "grad_norm": 78.6875,
7249
+ "learning_rate": 9.983900038936895e-07,
7250
+ "loss": 130.8658,
7251
+ "step": 10300
7252
+ },
7253
+ {
7254
+ "epoch": 0.8251187020986857,
7255
+ "grad_norm": 78.0,
7256
+ "learning_rate": 9.983884407906737e-07,
7257
+ "loss": 131.8639,
7258
+ "step": 10310
7259
+ },
7260
+ {
7261
+ "epoch": 0.825919011218083,
7262
+ "grad_norm": 74.0,
7263
+ "learning_rate": 9.98386877687658e-07,
7264
+ "loss": 131.2057,
7265
+ "step": 10320
7266
+ },
7267
+ {
7268
+ "epoch": 0.8267193203374803,
7269
+ "grad_norm": 78.0625,
7270
+ "learning_rate": 9.98385314584642e-07,
7271
+ "loss": 131.9255,
7272
+ "step": 10330
7273
+ },
7274
+ {
7275
+ "epoch": 0.8275196294568777,
7276
+ "grad_norm": 78.75,
7277
+ "learning_rate": 9.983837514816262e-07,
7278
+ "loss": 131.7983,
7279
+ "step": 10340
7280
+ },
7281
+ {
7282
+ "epoch": 0.8283199385762751,
7283
+ "grad_norm": 81.9375,
7284
+ "learning_rate": 9.983821883786104e-07,
7285
+ "loss": 132.316,
7286
+ "step": 10350
7287
+ },
7288
+ {
7289
+ "epoch": 0.8291202476956725,
7290
+ "grad_norm": 80.875,
7291
+ "learning_rate": 9.983806252755946e-07,
7292
+ "loss": 131.1004,
7293
+ "step": 10360
7294
+ },
7295
+ {
7296
+ "epoch": 0.8299205568150698,
7297
+ "grad_norm": 71.0,
7298
+ "learning_rate": 9.983790621725786e-07,
7299
+ "loss": 132.1643,
7300
+ "step": 10370
7301
+ },
7302
+ {
7303
+ "epoch": 0.8307208659344671,
7304
+ "grad_norm": 74.375,
7305
+ "learning_rate": 9.983774990695628e-07,
7306
+ "loss": 130.8654,
7307
+ "step": 10380
7308
+ },
7309
+ {
7310
+ "epoch": 0.8315211750538646,
7311
+ "grad_norm": 74.875,
7312
+ "learning_rate": 9.98375935966547e-07,
7313
+ "loss": 130.205,
7314
+ "step": 10390
7315
+ },
7316
+ {
7317
+ "epoch": 0.8323214841732619,
7318
+ "grad_norm": 73.375,
7319
+ "learning_rate": 9.983743728635313e-07,
7320
+ "loss": 130.6661,
7321
+ "step": 10400
7322
+ },
7323
+ {
7324
+ "epoch": 0.8331217932926593,
7325
+ "grad_norm": 73.4375,
7326
+ "learning_rate": 9.983728097605155e-07,
7327
+ "loss": 131.2944,
7328
+ "step": 10410
7329
+ },
7330
+ {
7331
+ "epoch": 0.8339221024120567,
7332
+ "grad_norm": 75.875,
7333
+ "learning_rate": 9.983712466574995e-07,
7334
+ "loss": 132.2854,
7335
+ "step": 10420
7336
+ },
7337
+ {
7338
+ "epoch": 0.834722411531454,
7339
+ "grad_norm": 80.625,
7340
+ "learning_rate": 9.983696835544837e-07,
7341
+ "loss": 130.7651,
7342
+ "step": 10430
7343
+ },
7344
+ {
7345
+ "epoch": 0.8355227206508514,
7346
+ "grad_norm": 71.6875,
7347
+ "learning_rate": 9.98368120451468e-07,
7348
+ "loss": 130.8648,
7349
+ "step": 10440
7350
+ },
7351
+ {
7352
+ "epoch": 0.8363230297702487,
7353
+ "grad_norm": 77.125,
7354
+ "learning_rate": 9.98366557348452e-07,
7355
+ "loss": 130.8581,
7356
+ "step": 10450
7357
+ },
7358
+ {
7359
+ "epoch": 0.8371233388896461,
7360
+ "grad_norm": 73.625,
7361
+ "learning_rate": 9.983649942454362e-07,
7362
+ "loss": 130.3991,
7363
+ "step": 10460
7364
+ },
7365
+ {
7366
+ "epoch": 0.8379236480090435,
7367
+ "grad_norm": 79.375,
7368
+ "learning_rate": 9.983634311424204e-07,
7369
+ "loss": 131.2294,
7370
+ "step": 10470
7371
+ },
7372
+ {
7373
+ "epoch": 0.8387239571284408,
7374
+ "grad_norm": 78.3125,
7375
+ "learning_rate": 9.983618680394046e-07,
7376
+ "loss": 129.3754,
7377
+ "step": 10480
7378
+ },
7379
+ {
7380
+ "epoch": 0.8395242662478383,
7381
+ "grad_norm": 81.75,
7382
+ "learning_rate": 9.983603049363886e-07,
7383
+ "loss": 131.7653,
7384
+ "step": 10490
7385
+ },
7386
+ {
7387
+ "epoch": 0.8403245753672356,
7388
+ "grad_norm": 74.9375,
7389
+ "learning_rate": 9.983587418333728e-07,
7390
+ "loss": 131.5391,
7391
+ "step": 10500
7392
+ },
7393
+ {
7394
+ "epoch": 0.8411248844866329,
7395
+ "grad_norm": 76.75,
7396
+ "learning_rate": 9.98357178730357e-07,
7397
+ "loss": 131.4889,
7398
+ "step": 10510
7399
+ },
7400
+ {
7401
+ "epoch": 0.8419251936060304,
7402
+ "grad_norm": 72.1875,
7403
+ "learning_rate": 9.98355615627341e-07,
7404
+ "loss": 133.0458,
7405
+ "step": 10520
7406
+ },
7407
+ {
7408
+ "epoch": 0.8427255027254277,
7409
+ "grad_norm": 81.3125,
7410
+ "learning_rate": 9.983540525243253e-07,
7411
+ "loss": 130.8461,
7412
+ "step": 10530
7413
+ },
7414
+ {
7415
+ "epoch": 0.8435258118448251,
7416
+ "grad_norm": 74.75,
7417
+ "learning_rate": 9.983524894213095e-07,
7418
+ "loss": 131.7515,
7419
+ "step": 10540
7420
+ },
7421
+ {
7422
+ "epoch": 0.8443261209642224,
7423
+ "grad_norm": 70.8125,
7424
+ "learning_rate": 9.983509263182937e-07,
7425
+ "loss": 132.1545,
7426
+ "step": 10550
7427
+ },
7428
+ {
7429
+ "epoch": 0.8451264300836198,
7430
+ "grad_norm": 76.9375,
7431
+ "learning_rate": 9.98349363215278e-07,
7432
+ "loss": 130.9946,
7433
+ "step": 10560
7434
+ },
7435
+ {
7436
+ "epoch": 0.8459267392030172,
7437
+ "grad_norm": 72.25,
7438
+ "learning_rate": 9.983478001122621e-07,
7439
+ "loss": 130.338,
7440
+ "step": 10570
7441
+ },
7442
+ {
7443
+ "epoch": 0.8467270483224145,
7444
+ "grad_norm": 75.1875,
7445
+ "learning_rate": 9.983462370092462e-07,
7446
+ "loss": 131.5349,
7447
+ "step": 10580
7448
+ },
7449
+ {
7450
+ "epoch": 0.8475273574418118,
7451
+ "grad_norm": 70.125,
7452
+ "learning_rate": 9.983446739062304e-07,
7453
+ "loss": 130.6219,
7454
+ "step": 10590
7455
+ },
7456
+ {
7457
+ "epoch": 0.8483276665612093,
7458
+ "grad_norm": 78.0625,
7459
+ "learning_rate": 9.983431108032146e-07,
7460
+ "loss": 131.3493,
7461
+ "step": 10600
7462
+ },
7463
+ {
7464
+ "epoch": 0.8491279756806066,
7465
+ "grad_norm": 74.0,
7466
+ "learning_rate": 9.983415477001986e-07,
7467
+ "loss": 131.0083,
7468
+ "step": 10610
7469
+ },
7470
+ {
7471
+ "epoch": 0.849928284800004,
7472
+ "grad_norm": 80.75,
7473
+ "learning_rate": 9.983399845971828e-07,
7474
+ "loss": 131.9277,
7475
+ "step": 10620
7476
+ },
7477
+ {
7478
+ "epoch": 0.8507285939194014,
7479
+ "grad_norm": 79.1875,
7480
+ "learning_rate": 9.98338421494167e-07,
7481
+ "loss": 130.602,
7482
+ "step": 10630
7483
+ },
7484
+ {
7485
+ "epoch": 0.8515289030387987,
7486
+ "grad_norm": 77.5625,
7487
+ "learning_rate": 9.98336858391151e-07,
7488
+ "loss": 130.4636,
7489
+ "step": 10640
7490
+ },
7491
+ {
7492
+ "epoch": 0.8523292121581961,
7493
+ "grad_norm": 77.75,
7494
+ "learning_rate": 9.983352952881353e-07,
7495
+ "loss": 131.2724,
7496
+ "step": 10650
7497
+ },
7498
+ {
7499
+ "epoch": 0.8531295212775935,
7500
+ "grad_norm": 78.875,
7501
+ "learning_rate": 9.983337321851195e-07,
7502
+ "loss": 130.8466,
7503
+ "step": 10660
7504
+ },
7505
+ {
7506
+ "epoch": 0.8539298303969909,
7507
+ "grad_norm": 77.375,
7508
+ "learning_rate": 9.983321690821037e-07,
7509
+ "loss": 130.6045,
7510
+ "step": 10670
7511
+ },
7512
+ {
7513
+ "epoch": 0.8547301395163882,
7514
+ "grad_norm": 77.875,
7515
+ "learning_rate": 9.983306059790877e-07,
7516
+ "loss": 131.8468,
7517
+ "step": 10680
7518
+ },
7519
+ {
7520
+ "epoch": 0.8555304486357855,
7521
+ "grad_norm": 75.25,
7522
+ "learning_rate": 9.98329042876072e-07,
7523
+ "loss": 130.7249,
7524
+ "step": 10690
7525
+ },
7526
+ {
7527
+ "epoch": 0.856330757755183,
7528
+ "grad_norm": 79.25,
7529
+ "learning_rate": 9.983274797730561e-07,
7530
+ "loss": 130.1633,
7531
+ "step": 10700
7532
+ },
7533
+ {
7534
+ "epoch": 0.8571310668745803,
7535
+ "grad_norm": 74.625,
7536
+ "learning_rate": 9.983259166700404e-07,
7537
+ "loss": 130.6425,
7538
+ "step": 10710
7539
+ },
7540
+ {
7541
+ "epoch": 0.8579313759939776,
7542
+ "grad_norm": 75.25,
7543
+ "learning_rate": 9.983243535670246e-07,
7544
+ "loss": 130.9092,
7545
+ "step": 10720
7546
+ },
7547
+ {
7548
+ "epoch": 0.858731685113375,
7549
+ "grad_norm": 77.625,
7550
+ "learning_rate": 9.983227904640088e-07,
7551
+ "loss": 129.9958,
7552
+ "step": 10730
7553
+ },
7554
+ {
7555
+ "epoch": 0.8595319942327724,
7556
+ "grad_norm": 79.5,
7557
+ "learning_rate": 9.983212273609928e-07,
7558
+ "loss": 132.0485,
7559
+ "step": 10740
7560
+ },
7561
+ {
7562
+ "epoch": 0.8603323033521698,
7563
+ "grad_norm": 77.0625,
7564
+ "learning_rate": 9.98319664257977e-07,
7565
+ "loss": 131.1308,
7566
+ "step": 10750
7567
+ },
7568
+ {
7569
+ "epoch": 0.8611326124715671,
7570
+ "grad_norm": 82.4375,
7571
+ "learning_rate": 9.983181011549612e-07,
7572
+ "loss": 131.665,
7573
+ "step": 10760
7574
+ },
7575
+ {
7576
+ "epoch": 0.8619329215909645,
7577
+ "grad_norm": 74.375,
7578
+ "learning_rate": 9.983165380519452e-07,
7579
+ "loss": 131.7367,
7580
+ "step": 10770
7581
+ },
7582
+ {
7583
+ "epoch": 0.8627332307103619,
7584
+ "grad_norm": 77.9375,
7585
+ "learning_rate": 9.983149749489295e-07,
7586
+ "loss": 130.5749,
7587
+ "step": 10780
7588
+ },
7589
+ {
7590
+ "epoch": 0.8635335398297592,
7591
+ "grad_norm": 72.1875,
7592
+ "learning_rate": 9.983134118459137e-07,
7593
+ "loss": 129.4679,
7594
+ "step": 10790
7595
+ },
7596
+ {
7597
+ "epoch": 0.8643338489491567,
7598
+ "grad_norm": 79.1875,
7599
+ "learning_rate": 9.983118487428977e-07,
7600
+ "loss": 132.4236,
7601
+ "step": 10800
7602
+ },
7603
+ {
7604
+ "epoch": 0.865134158068554,
7605
+ "grad_norm": 72.75,
7606
+ "learning_rate": 9.98310285639882e-07,
7607
+ "loss": 129.7827,
7608
+ "step": 10810
7609
+ },
7610
+ {
7611
+ "epoch": 0.8659344671879513,
7612
+ "grad_norm": 73.5625,
7613
+ "learning_rate": 9.983087225368661e-07,
7614
+ "loss": 131.0587,
7615
+ "step": 10820
7616
+ },
7617
+ {
7618
+ "epoch": 0.8667347763073487,
7619
+ "grad_norm": 75.375,
7620
+ "learning_rate": 9.983071594338503e-07,
7621
+ "loss": 131.5654,
7622
+ "step": 10830
7623
+ },
7624
+ {
7625
+ "epoch": 0.8675350854267461,
7626
+ "grad_norm": 79.875,
7627
+ "learning_rate": 9.983055963308343e-07,
7628
+ "loss": 131.1106,
7629
+ "step": 10840
7630
+ },
7631
+ {
7632
+ "epoch": 0.8683353945461435,
7633
+ "grad_norm": 74.0625,
7634
+ "learning_rate": 9.983040332278186e-07,
7635
+ "loss": 131.5627,
7636
+ "step": 10850
7637
+ },
7638
+ {
7639
+ "epoch": 0.8691357036655408,
7640
+ "grad_norm": 75.0,
7641
+ "learning_rate": 9.983024701248028e-07,
7642
+ "loss": 130.7058,
7643
+ "step": 10860
7644
+ },
7645
+ {
7646
+ "epoch": 0.8699360127849382,
7647
+ "grad_norm": 77.125,
7648
+ "learning_rate": 9.98300907021787e-07,
7649
+ "loss": 131.1618,
7650
+ "step": 10870
7651
+ },
7652
+ {
7653
+ "epoch": 0.8707363219043356,
7654
+ "grad_norm": 78.5,
7655
+ "learning_rate": 9.982993439187712e-07,
7656
+ "loss": 131.6918,
7657
+ "step": 10880
7658
+ },
7659
+ {
7660
+ "epoch": 0.8715366310237329,
7661
+ "grad_norm": 77.0,
7662
+ "learning_rate": 9.982977808157552e-07,
7663
+ "loss": 130.2754,
7664
+ "step": 10890
7665
+ },
7666
+ {
7667
+ "epoch": 0.8723369401431302,
7668
+ "grad_norm": 76.375,
7669
+ "learning_rate": 9.982962177127394e-07,
7670
+ "loss": 130.6358,
7671
+ "step": 10900
7672
+ },
7673
+ {
7674
+ "epoch": 0.8731372492625277,
7675
+ "grad_norm": 76.1875,
7676
+ "learning_rate": 9.982946546097237e-07,
7677
+ "loss": 131.4433,
7678
+ "step": 10910
7679
+ },
7680
+ {
7681
+ "epoch": 0.873937558381925,
7682
+ "grad_norm": 76.8125,
7683
+ "learning_rate": 9.982930915067079e-07,
7684
+ "loss": 131.1567,
7685
+ "step": 10920
7686
+ },
7687
+ {
7688
+ "epoch": 0.8747378675013224,
7689
+ "grad_norm": 75.0625,
7690
+ "learning_rate": 9.982915284036919e-07,
7691
+ "loss": 130.6782,
7692
+ "step": 10930
7693
+ },
7694
+ {
7695
+ "epoch": 0.8755381766207198,
7696
+ "grad_norm": 73.4375,
7697
+ "learning_rate": 9.982899653006761e-07,
7698
+ "loss": 130.3121,
7699
+ "step": 10940
7700
+ },
7701
+ {
7702
+ "epoch": 0.8763384857401171,
7703
+ "grad_norm": 75.25,
7704
+ "learning_rate": 9.982884021976603e-07,
7705
+ "loss": 131.139,
7706
+ "step": 10950
7707
+ },
7708
+ {
7709
+ "epoch": 0.8771387948595145,
7710
+ "grad_norm": 73.4375,
7711
+ "learning_rate": 9.982868390946443e-07,
7712
+ "loss": 130.9144,
7713
+ "step": 10960
7714
+ },
7715
+ {
7716
+ "epoch": 0.8779391039789118,
7717
+ "grad_norm": 79.5,
7718
+ "learning_rate": 9.982852759916286e-07,
7719
+ "loss": 129.6913,
7720
+ "step": 10970
7721
+ },
7722
+ {
7723
+ "epoch": 0.8787394130983093,
7724
+ "grad_norm": 73.3125,
7725
+ "learning_rate": 9.982837128886128e-07,
7726
+ "loss": 129.9289,
7727
+ "step": 10980
7728
+ },
7729
+ {
7730
+ "epoch": 0.8795397222177066,
7731
+ "grad_norm": 73.8125,
7732
+ "learning_rate": 9.98282149785597e-07,
7733
+ "loss": 132.2781,
7734
+ "step": 10990
7735
+ },
7736
+ {
7737
+ "epoch": 0.8803400313371039,
7738
+ "grad_norm": 75.9375,
7739
+ "learning_rate": 9.98280586682581e-07,
7740
+ "loss": 130.8845,
7741
+ "step": 11000
7742
+ },
7743
+ {
7744
+ "epoch": 0.8811403404565014,
7745
+ "grad_norm": 76.25,
7746
+ "learning_rate": 9.982790235795652e-07,
7747
+ "loss": 129.7611,
7748
+ "step": 11010
7749
+ },
7750
+ {
7751
+ "epoch": 0.8819406495758987,
7752
+ "grad_norm": 83.375,
7753
+ "learning_rate": 9.982774604765494e-07,
7754
+ "loss": 129.9167,
7755
+ "step": 11020
7756
+ },
7757
+ {
7758
+ "epoch": 0.882740958695296,
7759
+ "grad_norm": 76.3125,
7760
+ "learning_rate": 9.982758973735337e-07,
7761
+ "loss": 130.6375,
7762
+ "step": 11030
7763
+ },
7764
+ {
7765
+ "epoch": 0.8835412678146934,
7766
+ "grad_norm": 77.4375,
7767
+ "learning_rate": 9.982743342705179e-07,
7768
+ "loss": 130.627,
7769
+ "step": 11040
7770
+ },
7771
+ {
7772
+ "epoch": 0.8843415769340908,
7773
+ "grad_norm": 71.3125,
7774
+ "learning_rate": 9.982727711675019e-07,
7775
+ "loss": 130.6759,
7776
+ "step": 11050
7777
+ },
7778
+ {
7779
+ "epoch": 0.8851418860534882,
7780
+ "grad_norm": 77.875,
7781
+ "learning_rate": 9.98271208064486e-07,
7782
+ "loss": 131.0551,
7783
+ "step": 11060
7784
+ },
7785
+ {
7786
+ "epoch": 0.8859421951728855,
7787
+ "grad_norm": 73.8125,
7788
+ "learning_rate": 9.982696449614703e-07,
7789
+ "loss": 130.5227,
7790
+ "step": 11070
7791
+ },
7792
+ {
7793
+ "epoch": 0.8867425042922829,
7794
+ "grad_norm": 77.25,
7795
+ "learning_rate": 9.982680818584545e-07,
7796
+ "loss": 132.4977,
7797
+ "step": 11080
7798
+ },
7799
+ {
7800
+ "epoch": 0.8875428134116803,
7801
+ "grad_norm": 81.375,
7802
+ "learning_rate": 9.982665187554385e-07,
7803
+ "loss": 131.0946,
7804
+ "step": 11090
7805
+ },
7806
+ {
7807
+ "epoch": 0.8883431225310776,
7808
+ "grad_norm": 74.375,
7809
+ "learning_rate": 9.982649556524228e-07,
7810
+ "loss": 130.3095,
7811
+ "step": 11100
7812
+ },
7813
+ {
7814
+ "epoch": 0.889143431650475,
7815
+ "grad_norm": 76.3125,
7816
+ "learning_rate": 9.98263392549407e-07,
7817
+ "loss": 131.8633,
7818
+ "step": 11110
7819
+ },
7820
+ {
7821
+ "epoch": 0.8899437407698724,
7822
+ "grad_norm": 83.125,
7823
+ "learning_rate": 9.98261829446391e-07,
7824
+ "loss": 131.3789,
7825
+ "step": 11120
7826
+ },
7827
+ {
7828
+ "epoch": 0.8907440498892697,
7829
+ "grad_norm": 71.375,
7830
+ "learning_rate": 9.982602663433752e-07,
7831
+ "loss": 131.0635,
7832
+ "step": 11130
7833
+ },
7834
+ {
7835
+ "epoch": 0.8915443590086671,
7836
+ "grad_norm": 76.0625,
7837
+ "learning_rate": 9.982587032403594e-07,
7838
+ "loss": 131.6485,
7839
+ "step": 11140
7840
+ },
7841
+ {
7842
+ "epoch": 0.8923446681280645,
7843
+ "grad_norm": 84.5625,
7844
+ "learning_rate": 9.982571401373434e-07,
7845
+ "loss": 131.5727,
7846
+ "step": 11150
7847
+ },
7848
+ {
7849
+ "epoch": 0.8931449772474618,
7850
+ "grad_norm": 76.3125,
7851
+ "learning_rate": 9.982555770343276e-07,
7852
+ "loss": 131.7047,
7853
+ "step": 11160
7854
+ },
7855
+ {
7856
+ "epoch": 0.8939452863668592,
7857
+ "grad_norm": 78.25,
7858
+ "learning_rate": 9.982540139313119e-07,
7859
+ "loss": 132.1118,
7860
+ "step": 11170
7861
+ },
7862
+ {
7863
+ "epoch": 0.8947455954862565,
7864
+ "grad_norm": 77.5,
7865
+ "learning_rate": 9.98252450828296e-07,
7866
+ "loss": 130.6895,
7867
+ "step": 11180
7868
+ },
7869
+ {
7870
+ "epoch": 0.895545904605654,
7871
+ "grad_norm": 74.75,
7872
+ "learning_rate": 9.982508877252803e-07,
7873
+ "loss": 130.8122,
7874
+ "step": 11190
7875
+ },
7876
+ {
7877
+ "epoch": 0.8963462137250513,
7878
+ "grad_norm": 75.0,
7879
+ "learning_rate": 9.982493246222645e-07,
7880
+ "loss": 130.9431,
7881
+ "step": 11200
7882
+ },
7883
+ {
7884
+ "epoch": 0.8971465228444486,
7885
+ "grad_norm": 75.375,
7886
+ "learning_rate": 9.982477615192485e-07,
7887
+ "loss": 131.6024,
7888
+ "step": 11210
7889
+ },
7890
+ {
7891
+ "epoch": 0.8979468319638461,
7892
+ "grad_norm": 75.25,
7893
+ "learning_rate": 9.982461984162327e-07,
7894
+ "loss": 130.6127,
7895
+ "step": 11220
7896
+ },
7897
+ {
7898
+ "epoch": 0.8987471410832434,
7899
+ "grad_norm": 84.375,
7900
+ "learning_rate": 9.98244635313217e-07,
7901
+ "loss": 132.7165,
7902
+ "step": 11230
7903
+ },
7904
+ {
7905
+ "epoch": 0.8995474502026408,
7906
+ "grad_norm": 76.75,
7907
+ "learning_rate": 9.982430722102012e-07,
7908
+ "loss": 129.617,
7909
+ "step": 11240
7910
+ },
7911
+ {
7912
+ "epoch": 0.9003477593220381,
7913
+ "grad_norm": 84.6875,
7914
+ "learning_rate": 9.982415091071852e-07,
7915
+ "loss": 130.9968,
7916
+ "step": 11250
7917
+ },
7918
+ {
7919
+ "epoch": 0.9011480684414355,
7920
+ "grad_norm": 74.1875,
7921
+ "learning_rate": 9.982399460041694e-07,
7922
+ "loss": 130.6432,
7923
+ "step": 11260
7924
+ },
7925
+ {
7926
+ "epoch": 0.9019483775608329,
7927
+ "grad_norm": 77.9375,
7928
+ "learning_rate": 9.982383829011536e-07,
7929
+ "loss": 131.2904,
7930
+ "step": 11270
7931
+ },
7932
+ {
7933
+ "epoch": 0.9027486866802302,
7934
+ "grad_norm": 71.9375,
7935
+ "learning_rate": 9.982368197981376e-07,
7936
+ "loss": 131.8605,
7937
+ "step": 11280
7938
+ },
7939
+ {
7940
+ "epoch": 0.9035489957996276,
7941
+ "grad_norm": 76.75,
7942
+ "learning_rate": 9.982352566951218e-07,
7943
+ "loss": 132.6155,
7944
+ "step": 11290
7945
+ },
7946
+ {
7947
+ "epoch": 0.904349304919025,
7948
+ "grad_norm": 74.8125,
7949
+ "learning_rate": 9.98233693592106e-07,
7950
+ "loss": 130.2238,
7951
+ "step": 11300
7952
+ },
7953
+ {
7954
+ "epoch": 0.9051496140384223,
7955
+ "grad_norm": 78.4375,
7956
+ "learning_rate": 9.9823213048909e-07,
7957
+ "loss": 131.5677,
7958
+ "step": 11310
7959
+ },
7960
+ {
7961
+ "epoch": 0.9059499231578197,
7962
+ "grad_norm": 72.0,
7963
+ "learning_rate": 9.982305673860743e-07,
7964
+ "loss": 130.6105,
7965
+ "step": 11320
7966
+ },
7967
+ {
7968
+ "epoch": 0.9067502322772171,
7969
+ "grad_norm": 74.375,
7970
+ "learning_rate": 9.982290042830585e-07,
7971
+ "loss": 130.9643,
7972
+ "step": 11330
7973
+ },
7974
+ {
7975
+ "epoch": 0.9075505413966144,
7976
+ "grad_norm": 80.125,
7977
+ "learning_rate": 9.982274411800427e-07,
7978
+ "loss": 131.6026,
7979
+ "step": 11340
7980
+ },
7981
+ {
7982
+ "epoch": 0.9083508505160118,
7983
+ "grad_norm": 79.1875,
7984
+ "learning_rate": 9.98225878077027e-07,
7985
+ "loss": 131.4425,
7986
+ "step": 11350
7987
+ },
7988
+ {
7989
+ "epoch": 0.9091511596354092,
7990
+ "grad_norm": 76.875,
7991
+ "learning_rate": 9.98224314974011e-07,
7992
+ "loss": 131.8976,
7993
+ "step": 11360
7994
+ },
7995
+ {
7996
+ "epoch": 0.9099514687548066,
7997
+ "grad_norm": 80.0625,
7998
+ "learning_rate": 9.982227518709952e-07,
7999
+ "loss": 131.6046,
8000
+ "step": 11370
8001
+ },
8002
+ {
8003
+ "epoch": 0.9107517778742039,
8004
+ "grad_norm": 80.0625,
8005
+ "learning_rate": 9.982211887679794e-07,
8006
+ "loss": 130.2627,
8007
+ "step": 11380
8008
+ },
8009
+ {
8010
+ "epoch": 0.9115520869936012,
8011
+ "grad_norm": 73.6875,
8012
+ "learning_rate": 9.982196256649636e-07,
8013
+ "loss": 131.1418,
8014
+ "step": 11390
8015
+ },
8016
+ {
8017
+ "epoch": 0.9123523961129987,
8018
+ "grad_norm": 75.3125,
8019
+ "learning_rate": 9.982180625619476e-07,
8020
+ "loss": 130.4075,
8021
+ "step": 11400
8022
+ },
8023
+ {
8024
+ "epoch": 0.913152705232396,
8025
+ "grad_norm": 81.3125,
8026
+ "learning_rate": 9.982164994589318e-07,
8027
+ "loss": 131.0209,
8028
+ "step": 11410
8029
+ },
8030
+ {
8031
+ "epoch": 0.9139530143517933,
8032
+ "grad_norm": 76.0625,
8033
+ "learning_rate": 9.98214936355916e-07,
8034
+ "loss": 130.3062,
8035
+ "step": 11420
8036
+ },
8037
+ {
8038
+ "epoch": 0.9147533234711908,
8039
+ "grad_norm": 73.0,
8040
+ "learning_rate": 9.982133732529003e-07,
8041
+ "loss": 131.0498,
8042
+ "step": 11430
8043
+ },
8044
+ {
8045
+ "epoch": 0.9155536325905881,
8046
+ "grad_norm": 80.6875,
8047
+ "learning_rate": 9.982118101498843e-07,
8048
+ "loss": 130.2114,
8049
+ "step": 11440
8050
+ },
8051
+ {
8052
+ "epoch": 0.9163539417099855,
8053
+ "grad_norm": 75.25,
8054
+ "learning_rate": 9.982102470468685e-07,
8055
+ "loss": 130.6461,
8056
+ "step": 11450
8057
+ },
8058
+ {
8059
+ "epoch": 0.9171542508293828,
8060
+ "grad_norm": 82.5,
8061
+ "learning_rate": 9.982086839438527e-07,
8062
+ "loss": 131.5808,
8063
+ "step": 11460
8064
+ },
8065
+ {
8066
+ "epoch": 0.9179545599487802,
8067
+ "grad_norm": 75.0625,
8068
+ "learning_rate": 9.982071208408367e-07,
8069
+ "loss": 131.6145,
8070
+ "step": 11470
8071
+ },
8072
+ {
8073
+ "epoch": 0.9187548690681776,
8074
+ "grad_norm": 77.375,
8075
+ "learning_rate": 9.98205557737821e-07,
8076
+ "loss": 131.2061,
8077
+ "step": 11480
8078
+ },
8079
+ {
8080
+ "epoch": 0.9195551781875749,
8081
+ "grad_norm": 82.75,
8082
+ "learning_rate": 9.982039946348052e-07,
8083
+ "loss": 130.0431,
8084
+ "step": 11490
8085
+ },
8086
+ {
8087
+ "epoch": 0.9203554873069724,
8088
+ "grad_norm": 73.8125,
8089
+ "learning_rate": 9.982024315317894e-07,
8090
+ "loss": 131.8762,
8091
+ "step": 11500
8092
+ },
8093
+ {
8094
+ "epoch": 0.9211557964263697,
8095
+ "grad_norm": 71.25,
8096
+ "learning_rate": 9.982008684287736e-07,
8097
+ "loss": 131.6012,
8098
+ "step": 11510
8099
+ },
8100
+ {
8101
+ "epoch": 0.921956105545767,
8102
+ "grad_norm": 77.5,
8103
+ "learning_rate": 9.981993053257576e-07,
8104
+ "loss": 131.3987,
8105
+ "step": 11520
8106
+ },
8107
+ {
8108
+ "epoch": 0.9227564146651644,
8109
+ "grad_norm": 79.3125,
8110
+ "learning_rate": 9.981977422227418e-07,
8111
+ "loss": 132.0234,
8112
+ "step": 11530
8113
+ },
8114
+ {
8115
+ "epoch": 0.9235567237845618,
8116
+ "grad_norm": 81.0,
8117
+ "learning_rate": 9.98196179119726e-07,
8118
+ "loss": 132.7055,
8119
+ "step": 11540
8120
+ },
8121
+ {
8122
+ "epoch": 0.9243570329039592,
8123
+ "grad_norm": 81.5,
8124
+ "learning_rate": 9.981946160167102e-07,
8125
+ "loss": 131.5491,
8126
+ "step": 11550
8127
+ },
8128
+ {
8129
+ "epoch": 0.9251573420233565,
8130
+ "grad_norm": 81.3125,
8131
+ "learning_rate": 9.981930529136943e-07,
8132
+ "loss": 129.9155,
8133
+ "step": 11560
8134
+ },
8135
+ {
8136
+ "epoch": 0.9259576511427539,
8137
+ "grad_norm": 77.5,
8138
+ "learning_rate": 9.981914898106785e-07,
8139
+ "loss": 131.0387,
8140
+ "step": 11570
8141
+ },
8142
+ {
8143
+ "epoch": 0.9267579602621513,
8144
+ "grad_norm": 74.5625,
8145
+ "learning_rate": 9.981899267076627e-07,
8146
+ "loss": 129.5762,
8147
+ "step": 11580
8148
+ },
8149
+ {
8150
+ "epoch": 0.9275582693815486,
8151
+ "grad_norm": 72.5,
8152
+ "learning_rate": 9.981883636046467e-07,
8153
+ "loss": 129.8288,
8154
+ "step": 11590
8155
+ },
8156
+ {
8157
+ "epoch": 0.9283585785009459,
8158
+ "grad_norm": 74.6875,
8159
+ "learning_rate": 9.98186800501631e-07,
8160
+ "loss": 130.526,
8161
+ "step": 11600
8162
+ },
8163
+ {
8164
+ "epoch": 0.9291588876203434,
8165
+ "grad_norm": 79.3125,
8166
+ "learning_rate": 9.981852373986151e-07,
8167
+ "loss": 130.7928,
8168
+ "step": 11610
8169
+ },
8170
+ {
8171
+ "epoch": 0.9299591967397407,
8172
+ "grad_norm": 74.5625,
8173
+ "learning_rate": 9.981836742955994e-07,
8174
+ "loss": 130.5095,
8175
+ "step": 11620
8176
+ },
8177
+ {
8178
+ "epoch": 0.9307595058591381,
8179
+ "grad_norm": 78.6875,
8180
+ "learning_rate": 9.981821111925834e-07,
8181
+ "loss": 130.1216,
8182
+ "step": 11630
8183
+ },
8184
+ {
8185
+ "epoch": 0.9315598149785355,
8186
+ "grad_norm": 73.9375,
8187
+ "learning_rate": 9.981805480895676e-07,
8188
+ "loss": 130.3142,
8189
+ "step": 11640
8190
+ },
8191
+ {
8192
+ "epoch": 0.9323601240979328,
8193
+ "grad_norm": 80.25,
8194
+ "learning_rate": 9.981789849865518e-07,
8195
+ "loss": 129.8451,
8196
+ "step": 11650
8197
+ },
8198
+ {
8199
+ "epoch": 0.9331604332173302,
8200
+ "grad_norm": 77.75,
8201
+ "learning_rate": 9.98177421883536e-07,
8202
+ "loss": 131.5875,
8203
+ "step": 11660
8204
+ },
8205
+ {
8206
+ "epoch": 0.9339607423367275,
8207
+ "grad_norm": 75.9375,
8208
+ "learning_rate": 9.981758587805202e-07,
8209
+ "loss": 131.1091,
8210
+ "step": 11670
8211
+ },
8212
+ {
8213
+ "epoch": 0.934761051456125,
8214
+ "grad_norm": 79.4375,
8215
+ "learning_rate": 9.981742956775042e-07,
8216
+ "loss": 129.5825,
8217
+ "step": 11680
8218
+ },
8219
+ {
8220
+ "epoch": 0.9355613605755223,
8221
+ "grad_norm": 78.375,
8222
+ "learning_rate": 9.981727325744885e-07,
8223
+ "loss": 131.5865,
8224
+ "step": 11690
8225
+ },
8226
+ {
8227
+ "epoch": 0.9363616696949196,
8228
+ "grad_norm": 74.25,
8229
+ "learning_rate": 9.981711694714727e-07,
8230
+ "loss": 131.3969,
8231
+ "step": 11700
8232
+ },
8233
+ {
8234
+ "epoch": 0.9371619788143171,
8235
+ "grad_norm": 82.5625,
8236
+ "learning_rate": 9.98169606368457e-07,
8237
+ "loss": 131.5427,
8238
+ "step": 11710
8239
+ },
8240
+ {
8241
+ "epoch": 0.9379622879337144,
8242
+ "grad_norm": 81.125,
8243
+ "learning_rate": 9.98168043265441e-07,
8244
+ "loss": 131.2085,
8245
+ "step": 11720
8246
+ },
8247
+ {
8248
+ "epoch": 0.9387625970531117,
8249
+ "grad_norm": 76.25,
8250
+ "learning_rate": 9.981664801624251e-07,
8251
+ "loss": 130.8618,
8252
+ "step": 11730
8253
+ },
8254
+ {
8255
+ "epoch": 0.9395629061725091,
8256
+ "grad_norm": 75.0625,
8257
+ "learning_rate": 9.981649170594093e-07,
8258
+ "loss": 130.0715,
8259
+ "step": 11740
8260
+ },
8261
+ {
8262
+ "epoch": 0.9403632152919065,
8263
+ "grad_norm": 76.4375,
8264
+ "learning_rate": 9.981633539563933e-07,
8265
+ "loss": 131.8718,
8266
+ "step": 11750
8267
+ },
8268
+ {
8269
+ "epoch": 0.9411635244113039,
8270
+ "grad_norm": 78.1875,
8271
+ "learning_rate": 9.981617908533776e-07,
8272
+ "loss": 131.3843,
8273
+ "step": 11760
8274
+ },
8275
+ {
8276
+ "epoch": 0.9419638335307012,
8277
+ "grad_norm": 75.3125,
8278
+ "learning_rate": 9.981602277503618e-07,
8279
+ "loss": 130.1011,
8280
+ "step": 11770
8281
+ },
8282
+ {
8283
+ "epoch": 0.9427641426500986,
8284
+ "grad_norm": 78.0625,
8285
+ "learning_rate": 9.98158664647346e-07,
8286
+ "loss": 131.4609,
8287
+ "step": 11780
8288
+ },
8289
+ {
8290
+ "epoch": 0.943564451769496,
8291
+ "grad_norm": 73.75,
8292
+ "learning_rate": 9.9815710154433e-07,
8293
+ "loss": 130.214,
8294
+ "step": 11790
8295
+ },
8296
+ {
8297
+ "epoch": 0.9443647608888933,
8298
+ "grad_norm": 81.3125,
8299
+ "learning_rate": 9.981555384413142e-07,
8300
+ "loss": 132.272,
8301
+ "step": 11800
8302
+ },
8303
+ {
8304
+ "epoch": 0.9451650700082908,
8305
+ "grad_norm": 76.0625,
8306
+ "learning_rate": 9.981539753382984e-07,
8307
+ "loss": 130.5801,
8308
+ "step": 11810
8309
+ },
8310
+ {
8311
+ "epoch": 0.9459653791276881,
8312
+ "grad_norm": 71.3125,
8313
+ "learning_rate": 9.981524122352827e-07,
8314
+ "loss": 130.7302,
8315
+ "step": 11820
8316
+ },
8317
+ {
8318
+ "epoch": 0.9467656882470854,
8319
+ "grad_norm": 84.0625,
8320
+ "learning_rate": 9.981508491322667e-07,
8321
+ "loss": 130.839,
8322
+ "step": 11830
8323
+ },
8324
+ {
8325
+ "epoch": 0.9475659973664828,
8326
+ "grad_norm": 78.5,
8327
+ "learning_rate": 9.981492860292509e-07,
8328
+ "loss": 130.5891,
8329
+ "step": 11840
8330
+ },
8331
+ {
8332
+ "epoch": 0.9483663064858802,
8333
+ "grad_norm": 78.4375,
8334
+ "learning_rate": 9.98147722926235e-07,
8335
+ "loss": 130.0701,
8336
+ "step": 11850
8337
+ },
8338
+ {
8339
+ "epoch": 0.9491666156052775,
8340
+ "grad_norm": 76.1875,
8341
+ "learning_rate": 9.981461598232193e-07,
8342
+ "loss": 130.6237,
8343
+ "step": 11860
8344
+ },
8345
+ {
8346
+ "epoch": 0.9499669247246749,
8347
+ "grad_norm": 79.625,
8348
+ "learning_rate": 9.981445967202035e-07,
8349
+ "loss": 131.0294,
8350
+ "step": 11870
8351
+ },
8352
+ {
8353
+ "epoch": 0.9507672338440722,
8354
+ "grad_norm": 79.3125,
8355
+ "learning_rate": 9.981430336171875e-07,
8356
+ "loss": 132.2408,
8357
+ "step": 11880
8358
+ },
8359
+ {
8360
+ "epoch": 0.9515675429634697,
8361
+ "grad_norm": 72.0625,
8362
+ "learning_rate": 9.981414705141718e-07,
8363
+ "loss": 131.3878,
8364
+ "step": 11890
8365
+ },
8366
+ {
8367
+ "epoch": 0.952367852082867,
8368
+ "grad_norm": 75.5625,
8369
+ "learning_rate": 9.98139907411156e-07,
8370
+ "loss": 131.9279,
8371
+ "step": 11900
8372
+ },
8373
+ {
8374
+ "epoch": 0.9531681612022643,
8375
+ "grad_norm": 86.5,
8376
+ "learning_rate": 9.9813834430814e-07,
8377
+ "loss": 130.3583,
8378
+ "step": 11910
8379
+ },
8380
+ {
8381
+ "epoch": 0.9539684703216618,
8382
+ "grad_norm": 74.4375,
8383
+ "learning_rate": 9.981367812051242e-07,
8384
+ "loss": 130.4393,
8385
+ "step": 11920
8386
+ },
8387
+ {
8388
+ "epoch": 0.9547687794410591,
8389
+ "grad_norm": 74.6875,
8390
+ "learning_rate": 9.981352181021084e-07,
8391
+ "loss": 129.8773,
8392
+ "step": 11930
8393
+ },
8394
+ {
8395
+ "epoch": 0.9555690885604565,
8396
+ "grad_norm": 77.6875,
8397
+ "learning_rate": 9.981336549990924e-07,
8398
+ "loss": 130.8676,
8399
+ "step": 11940
8400
+ },
8401
+ {
8402
+ "epoch": 0.9563693976798538,
8403
+ "grad_norm": 80.5,
8404
+ "learning_rate": 9.981320918960767e-07,
8405
+ "loss": 131.1644,
8406
+ "step": 11950
8407
+ },
8408
+ {
8409
+ "epoch": 0.9571697067992512,
8410
+ "grad_norm": 81.125,
8411
+ "learning_rate": 9.981305287930609e-07,
8412
+ "loss": 131.0869,
8413
+ "step": 11960
8414
+ },
8415
+ {
8416
+ "epoch": 0.9579700159186486,
8417
+ "grad_norm": 85.0,
8418
+ "learning_rate": 9.98128965690045e-07,
8419
+ "loss": 130.5896,
8420
+ "step": 11970
8421
+ },
8422
+ {
8423
+ "epoch": 0.9587703250380459,
8424
+ "grad_norm": 72.625,
8425
+ "learning_rate": 9.981274025870293e-07,
8426
+ "loss": 131.133,
8427
+ "step": 11980
8428
+ },
8429
+ {
8430
+ "epoch": 0.9595706341574433,
8431
+ "grad_norm": 77.625,
8432
+ "learning_rate": 9.981258394840133e-07,
8433
+ "loss": 131.9948,
8434
+ "step": 11990
8435
+ },
8436
+ {
8437
+ "epoch": 0.9603709432768407,
8438
+ "grad_norm": 78.25,
8439
+ "learning_rate": 9.981242763809975e-07,
8440
+ "loss": 130.3504,
8441
+ "step": 12000
8442
+ },
8443
+ {
8444
+ "epoch": 0.961171252396238,
8445
+ "grad_norm": 69.0625,
8446
+ "learning_rate": 9.981227132779817e-07,
8447
+ "loss": 131.8812,
8448
+ "step": 12010
8449
+ },
8450
+ {
8451
+ "epoch": 0.9619715615156355,
8452
+ "grad_norm": 76.125,
8453
+ "learning_rate": 9.98121150174966e-07,
8454
+ "loss": 131.7717,
8455
+ "step": 12020
8456
+ },
8457
+ {
8458
+ "epoch": 0.9627718706350328,
8459
+ "grad_norm": 76.6875,
8460
+ "learning_rate": 9.981195870719502e-07,
8461
+ "loss": 131.0739,
8462
+ "step": 12030
8463
+ },
8464
+ {
8465
+ "epoch": 0.9635721797544301,
8466
+ "grad_norm": 79.5,
8467
+ "learning_rate": 9.981180239689342e-07,
8468
+ "loss": 132.3484,
8469
+ "step": 12040
8470
+ },
8471
+ {
8472
+ "epoch": 0.9643724888738275,
8473
+ "grad_norm": 81.25,
8474
+ "learning_rate": 9.981164608659184e-07,
8475
+ "loss": 131.6032,
8476
+ "step": 12050
8477
+ },
8478
+ {
8479
+ "epoch": 0.9651727979932249,
8480
+ "grad_norm": 80.375,
8481
+ "learning_rate": 9.981148977629026e-07,
8482
+ "loss": 131.8261,
8483
+ "step": 12060
8484
+ },
8485
+ {
8486
+ "epoch": 0.9659731071126223,
8487
+ "grad_norm": 80.9375,
8488
+ "learning_rate": 9.981133346598866e-07,
8489
+ "loss": 129.7895,
8490
+ "step": 12070
8491
+ },
8492
+ {
8493
+ "epoch": 0.9667734162320196,
8494
+ "grad_norm": 77.75,
8495
+ "learning_rate": 9.981117715568709e-07,
8496
+ "loss": 129.8964,
8497
+ "step": 12080
8498
+ },
8499
+ {
8500
+ "epoch": 0.967573725351417,
8501
+ "grad_norm": 72.5,
8502
+ "learning_rate": 9.98110208453855e-07,
8503
+ "loss": 130.2092,
8504
+ "step": 12090
8505
+ },
8506
+ {
8507
+ "epoch": 0.9683740344708144,
8508
+ "grad_norm": 70.75,
8509
+ "learning_rate": 9.98108645350839e-07,
8510
+ "loss": 130.064,
8511
+ "step": 12100
8512
+ },
8513
+ {
8514
+ "epoch": 0.9691743435902117,
8515
+ "grad_norm": 82.3125,
8516
+ "learning_rate": 9.981070822478233e-07,
8517
+ "loss": 133.1195,
8518
+ "step": 12110
8519
+ },
8520
+ {
8521
+ "epoch": 0.9699746527096091,
8522
+ "grad_norm": 73.375,
8523
+ "learning_rate": 9.981055191448075e-07,
8524
+ "loss": 131.6047,
8525
+ "step": 12120
8526
+ },
8527
+ {
8528
+ "epoch": 0.9707749618290065,
8529
+ "grad_norm": 78.0625,
8530
+ "learning_rate": 9.981039560417917e-07,
8531
+ "loss": 131.0189,
8532
+ "step": 12130
8533
+ },
8534
+ {
8535
+ "epoch": 0.9715752709484038,
8536
+ "grad_norm": 80.375,
8537
+ "learning_rate": 9.98102392938776e-07,
8538
+ "loss": 131.1126,
8539
+ "step": 12140
8540
+ },
8541
+ {
8542
+ "epoch": 0.9723755800678012,
8543
+ "grad_norm": 74.4375,
8544
+ "learning_rate": 9.9810082983576e-07,
8545
+ "loss": 131.885,
8546
+ "step": 12150
8547
+ },
8548
+ {
8549
+ "epoch": 0.9731758891871986,
8550
+ "grad_norm": 78.25,
8551
+ "learning_rate": 9.980992667327442e-07,
8552
+ "loss": 130.9126,
8553
+ "step": 12160
8554
+ },
8555
+ {
8556
+ "epoch": 0.9739761983065959,
8557
+ "grad_norm": 83.875,
8558
+ "learning_rate": 9.980977036297284e-07,
8559
+ "loss": 131.2764,
8560
+ "step": 12170
8561
+ },
8562
+ {
8563
+ "epoch": 0.9747765074259933,
8564
+ "grad_norm": 72.875,
8565
+ "learning_rate": 9.980961405267126e-07,
8566
+ "loss": 132.7158,
8567
+ "step": 12180
8568
+ },
8569
+ {
8570
+ "epoch": 0.9755768165453906,
8571
+ "grad_norm": 77.25,
8572
+ "learning_rate": 9.980945774236968e-07,
8573
+ "loss": 131.1642,
8574
+ "step": 12190
8575
+ },
8576
+ {
8577
+ "epoch": 0.9763771256647881,
8578
+ "grad_norm": 77.125,
8579
+ "learning_rate": 9.980930143206808e-07,
8580
+ "loss": 130.3661,
8581
+ "step": 12200
8582
+ },
8583
+ {
8584
+ "epoch": 0.9771774347841854,
8585
+ "grad_norm": 81.25,
8586
+ "learning_rate": 9.98091451217665e-07,
8587
+ "loss": 132.4058,
8588
+ "step": 12210
8589
+ },
8590
+ {
8591
+ "epoch": 0.9779777439035827,
8592
+ "grad_norm": 77.1875,
8593
+ "learning_rate": 9.980898881146493e-07,
8594
+ "loss": 131.1993,
8595
+ "step": 12220
8596
+ },
8597
+ {
8598
+ "epoch": 0.9787780530229802,
8599
+ "grad_norm": 78.375,
8600
+ "learning_rate": 9.980883250116333e-07,
8601
+ "loss": 129.8341,
8602
+ "step": 12230
8603
+ },
8604
+ {
8605
+ "epoch": 0.9795783621423775,
8606
+ "grad_norm": 82.3125,
8607
+ "learning_rate": 9.980867619086175e-07,
8608
+ "loss": 130.408,
8609
+ "step": 12240
8610
+ },
8611
+ {
8612
+ "epoch": 0.9803786712617749,
8613
+ "grad_norm": 79.375,
8614
+ "learning_rate": 9.980851988056017e-07,
8615
+ "loss": 132.0676,
8616
+ "step": 12250
8617
+ },
8618
+ {
8619
+ "epoch": 0.9811789803811722,
8620
+ "grad_norm": 75.4375,
8621
+ "learning_rate": 9.980836357025857e-07,
8622
+ "loss": 130.5135,
8623
+ "step": 12260
8624
+ },
8625
+ {
8626
+ "epoch": 0.9819792895005696,
8627
+ "grad_norm": 79.75,
8628
+ "learning_rate": 9.9808207259957e-07,
8629
+ "loss": 129.7958,
8630
+ "step": 12270
8631
+ },
8632
+ {
8633
+ "epoch": 0.982779598619967,
8634
+ "grad_norm": 78.8125,
8635
+ "learning_rate": 9.980805094965542e-07,
8636
+ "loss": 131.8359,
8637
+ "step": 12280
8638
+ },
8639
+ {
8640
+ "epoch": 0.9835799077393643,
8641
+ "grad_norm": 73.75,
8642
+ "learning_rate": 9.980789463935384e-07,
8643
+ "loss": 130.6134,
8644
+ "step": 12290
8645
+ },
8646
+ {
8647
+ "epoch": 0.9843802168587616,
8648
+ "grad_norm": 79.0,
8649
+ "learning_rate": 9.980773832905224e-07,
8650
+ "loss": 130.2782,
8651
+ "step": 12300
8652
+ },
8653
+ {
8654
+ "epoch": 0.9851805259781591,
8655
+ "grad_norm": 74.0,
8656
+ "learning_rate": 9.980758201875066e-07,
8657
+ "loss": 130.808,
8658
+ "step": 12310
8659
+ },
8660
+ {
8661
+ "epoch": 0.9859808350975564,
8662
+ "grad_norm": 73.0,
8663
+ "learning_rate": 9.980742570844908e-07,
8664
+ "loss": 130.4844,
8665
+ "step": 12320
8666
+ },
8667
+ {
8668
+ "epoch": 0.9867811442169538,
8669
+ "grad_norm": 72.4375,
8670
+ "learning_rate": 9.98072693981475e-07,
8671
+ "loss": 131.225,
8672
+ "step": 12330
8673
+ },
8674
+ {
8675
+ "epoch": 0.9875814533363512,
8676
+ "grad_norm": 78.875,
8677
+ "learning_rate": 9.980711308784593e-07,
8678
+ "loss": 132.9991,
8679
+ "step": 12340
8680
+ },
8681
+ {
8682
+ "epoch": 0.9883817624557485,
8683
+ "grad_norm": 75.9375,
8684
+ "learning_rate": 9.980695677754435e-07,
8685
+ "loss": 131.7026,
8686
+ "step": 12350
8687
+ },
8688
+ {
8689
+ "epoch": 0.9891820715751459,
8690
+ "grad_norm": 74.125,
8691
+ "learning_rate": 9.980680046724275e-07,
8692
+ "loss": 130.6302,
8693
+ "step": 12360
8694
+ },
8695
+ {
8696
+ "epoch": 0.9899823806945433,
8697
+ "grad_norm": 80.75,
8698
+ "learning_rate": 9.980664415694117e-07,
8699
+ "loss": 129.955,
8700
+ "step": 12370
8701
+ },
8702
+ {
8703
+ "epoch": 0.9907826898139407,
8704
+ "grad_norm": 70.125,
8705
+ "learning_rate": 9.98064878466396e-07,
8706
+ "loss": 130.9879,
8707
+ "step": 12380
8708
+ },
8709
+ {
8710
+ "epoch": 0.991582998933338,
8711
+ "grad_norm": 79.75,
8712
+ "learning_rate": 9.9806331536338e-07,
8713
+ "loss": 130.061,
8714
+ "step": 12390
8715
+ },
8716
+ {
8717
+ "epoch": 0.9923833080527353,
8718
+ "grad_norm": 75.375,
8719
+ "learning_rate": 9.980617522603641e-07,
8720
+ "loss": 129.8558,
8721
+ "step": 12400
8722
+ },
8723
+ {
8724
+ "epoch": 0.9931836171721328,
8725
+ "grad_norm": 78.4375,
8726
+ "learning_rate": 9.980601891573484e-07,
8727
+ "loss": 129.8251,
8728
+ "step": 12410
8729
+ },
8730
+ {
8731
+ "epoch": 0.9939839262915301,
8732
+ "grad_norm": 70.1875,
8733
+ "learning_rate": 9.980586260543324e-07,
8734
+ "loss": 130.4507,
8735
+ "step": 12420
8736
+ },
8737
+ {
8738
+ "epoch": 0.9947842354109274,
8739
+ "grad_norm": 74.4375,
8740
+ "learning_rate": 9.980570629513166e-07,
8741
+ "loss": 131.6167,
8742
+ "step": 12430
8743
+ },
8744
+ {
8745
+ "epoch": 0.9955845445303249,
8746
+ "grad_norm": 76.1875,
8747
+ "learning_rate": 9.980554998483008e-07,
8748
+ "loss": 132.0188,
8749
+ "step": 12440
8750
+ },
8751
+ {
8752
+ "epoch": 0.9963848536497222,
8753
+ "grad_norm": 79.6875,
8754
+ "learning_rate": 9.98053936745285e-07,
8755
+ "loss": 130.7771,
8756
+ "step": 12450
8757
+ },
8758
+ {
8759
+ "epoch": 0.9971851627691196,
8760
+ "grad_norm": 74.25,
8761
+ "learning_rate": 9.98052373642269e-07,
8762
+ "loss": 130.4612,
8763
+ "step": 12460
8764
+ },
8765
+ {
8766
+ "epoch": 0.9979854718885169,
8767
+ "grad_norm": 78.0625,
8768
+ "learning_rate": 9.980508105392532e-07,
8769
+ "loss": 129.6428,
8770
+ "step": 12470
8771
+ },
8772
+ {
8773
+ "epoch": 0.9987857810079143,
8774
+ "grad_norm": 77.6875,
8775
+ "learning_rate": 9.980492474362375e-07,
8776
+ "loss": 130.6133,
8777
+ "step": 12480
8778
+ },
8779
+ {
8780
+ "epoch": 0.9995860901273117,
8781
+ "grad_norm": 77.75,
8782
+ "learning_rate": 9.980476843332217e-07,
8783
+ "loss": 131.8606,
8784
+ "step": 12490
8785
  }
8786
  ],
8787
  "logging_steps": 10,
 
8796
  "should_evaluate": false,
8797
  "should_log": false,
8798
  "should_save": true,
8799
+ "should_training_stop": true
8800
  },
8801
  "attributes": {}
8802
  }
8803
  },
8804
+ "total_flos": 3.450015163893783e+19,
8805
  "train_batch_size": 4,
8806
  "trial_name": null,
8807
  "trial_params": null