Training in progress, step 1600, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891644712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77e67eae521c3cba24d0ae9eb0d44447bec89794066751cc7d707f2fdc5c29bf
|
3 |
size 891644712
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783444794
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42e0c34297a9f3d72c5022f0b3dc2e519698ad0189be335ae6d8648524d74bb2
|
3 |
size 1783444794
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dee4a2b51470c2e565b08aae8a4e5156e5c34e8c236adf6153eeb283fa560ec
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1affb368014b9c4895e09d750783801a20ec7c8f622ab5a02e1bce055904fb15
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4487,6 +4487,1126 @@
|
|
4487 |
"learning_rate": 0.00014958448930255265,
|
4488 |
"loss": 0.5353,
|
4489 |
"step": 1280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4490 |
}
|
4491 |
],
|
4492 |
"logging_steps": 2,
|
@@ -4506,7 +5626,7 @@
|
|
4506 |
"attributes": {}
|
4507 |
}
|
4508 |
},
|
4509 |
-
"total_flos":
|
4510 |
"train_batch_size": 8,
|
4511 |
"trial_name": null,
|
4512 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8874098724348308,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4487 |
"learning_rate": 0.00014958448930255265,
|
4488 |
"loss": 0.5353,
|
4489 |
"step": 1280
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 0.7110371602884082,
|
4493 |
+
"grad_norm": 0.29970696568489075,
|
4494 |
+
"learning_rate": 0.00014942837881378465,
|
4495 |
+
"loss": 0.4261,
|
4496 |
+
"step": 1282
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 0.7121464226289518,
|
4500 |
+
"grad_norm": 0.36919617652893066,
|
4501 |
+
"learning_rate": 0.00014927210875741347,
|
4502 |
+
"loss": 0.4935,
|
4503 |
+
"step": 1284
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 0.7132556849694953,
|
4507 |
+
"grad_norm": 0.3045351505279541,
|
4508 |
+
"learning_rate": 0.00014911567963791928,
|
4509 |
+
"loss": 0.4191,
|
4510 |
+
"step": 1286
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 0.7143649473100389,
|
4514 |
+
"grad_norm": 0.30813243985176086,
|
4515 |
+
"learning_rate": 0.00014895909196029585,
|
4516 |
+
"loss": 0.3992,
|
4517 |
+
"step": 1288
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 0.7154742096505824,
|
4521 |
+
"grad_norm": 0.3444893956184387,
|
4522 |
+
"learning_rate": 0.00014880234623004866,
|
4523 |
+
"loss": 0.4351,
|
4524 |
+
"step": 1290
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 0.7165834719911259,
|
4528 |
+
"grad_norm": 0.29215720295906067,
|
4529 |
+
"learning_rate": 0.00014864544295319356,
|
4530 |
+
"loss": 0.4323,
|
4531 |
+
"step": 1292
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 0.7176927343316695,
|
4535 |
+
"grad_norm": 0.37148571014404297,
|
4536 |
+
"learning_rate": 0.00014848838263625496,
|
4537 |
+
"loss": 0.4463,
|
4538 |
+
"step": 1294
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 0.718801996672213,
|
4542 |
+
"grad_norm": 0.38779592514038086,
|
4543 |
+
"learning_rate": 0.00014833116578626417,
|
4544 |
+
"loss": 0.5293,
|
4545 |
+
"step": 1296
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 0.7199112590127565,
|
4549 |
+
"grad_norm": 0.31231850385665894,
|
4550 |
+
"learning_rate": 0.00014817379291075792,
|
4551 |
+
"loss": 0.4575,
|
4552 |
+
"step": 1298
|
4553 |
+
},
|
4554 |
+
{
|
4555 |
+
"epoch": 0.7210205213533001,
|
4556 |
+
"grad_norm": 0.2966194450855255,
|
4557 |
+
"learning_rate": 0.00014801626451777658,
|
4558 |
+
"loss": 0.4337,
|
4559 |
+
"step": 1300
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 0.7221297836938436,
|
4563 |
+
"grad_norm": 0.2993515133857727,
|
4564 |
+
"learning_rate": 0.00014785858111586258,
|
4565 |
+
"loss": 0.3994,
|
4566 |
+
"step": 1302
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 0.7232390460343872,
|
4570 |
+
"grad_norm": 0.27827030420303345,
|
4571 |
+
"learning_rate": 0.00014770074321405878,
|
4572 |
+
"loss": 0.4079,
|
4573 |
+
"step": 1304
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 0.7243483083749307,
|
4577 |
+
"grad_norm": 0.2576323449611664,
|
4578 |
+
"learning_rate": 0.00014754275132190678,
|
4579 |
+
"loss": 0.3798,
|
4580 |
+
"step": 1306
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 0.7254575707154742,
|
4584 |
+
"grad_norm": 0.24843810498714447,
|
4585 |
+
"learning_rate": 0.0001473846059494453,
|
4586 |
+
"loss": 0.4564,
|
4587 |
+
"step": 1308
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 0.7265668330560178,
|
4591 |
+
"grad_norm": 0.2799087166786194,
|
4592 |
+
"learning_rate": 0.00014722630760720856,
|
4593 |
+
"loss": 0.3906,
|
4594 |
+
"step": 1310
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 0.7276760953965613,
|
4598 |
+
"grad_norm": 0.3635210394859314,
|
4599 |
+
"learning_rate": 0.00014706785680622462,
|
4600 |
+
"loss": 0.4409,
|
4601 |
+
"step": 1312
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 0.7287853577371048,
|
4605 |
+
"grad_norm": 0.30838248133659363,
|
4606 |
+
"learning_rate": 0.0001469092540580136,
|
4607 |
+
"loss": 0.4268,
|
4608 |
+
"step": 1314
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 0.7298946200776484,
|
4612 |
+
"grad_norm": 0.2162405252456665,
|
4613 |
+
"learning_rate": 0.0001467504998745863,
|
4614 |
+
"loss": 0.4085,
|
4615 |
+
"step": 1316
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 0.7310038824181919,
|
4619 |
+
"grad_norm": 0.30635640025138855,
|
4620 |
+
"learning_rate": 0.00014659159476844232,
|
4621 |
+
"loss": 0.4089,
|
4622 |
+
"step": 1318
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 0.7321131447587355,
|
4626 |
+
"grad_norm": 0.4048236012458801,
|
4627 |
+
"learning_rate": 0.00014643253925256846,
|
4628 |
+
"loss": 0.5283,
|
4629 |
+
"step": 1320
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 0.733222407099279,
|
4633 |
+
"grad_norm": 0.4575350284576416,
|
4634 |
+
"learning_rate": 0.00014627333384043713,
|
4635 |
+
"loss": 0.4675,
|
4636 |
+
"step": 1322
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 0.7343316694398225,
|
4640 |
+
"grad_norm": 0.3289128243923187,
|
4641 |
+
"learning_rate": 0.00014611397904600458,
|
4642 |
+
"loss": 0.4777,
|
4643 |
+
"step": 1324
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 0.7354409317803661,
|
4647 |
+
"grad_norm": 0.29231977462768555,
|
4648 |
+
"learning_rate": 0.00014595447538370935,
|
4649 |
+
"loss": 0.3851,
|
4650 |
+
"step": 1326
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 0.7365501941209096,
|
4654 |
+
"grad_norm": 0.33795663714408875,
|
4655 |
+
"learning_rate": 0.00014579482336847058,
|
4656 |
+
"loss": 0.475,
|
4657 |
+
"step": 1328
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 0.7376594564614531,
|
4661 |
+
"grad_norm": 0.20978425443172455,
|
4662 |
+
"learning_rate": 0.00014563502351568625,
|
4663 |
+
"loss": 0.3455,
|
4664 |
+
"step": 1330
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 0.7387687188019967,
|
4668 |
+
"grad_norm": 0.4113425016403198,
|
4669 |
+
"learning_rate": 0.00014547507634123176,
|
4670 |
+
"loss": 0.3741,
|
4671 |
+
"step": 1332
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 0.7398779811425402,
|
4675 |
+
"grad_norm": 0.38009563088417053,
|
4676 |
+
"learning_rate": 0.0001453149823614579,
|
4677 |
+
"loss": 0.53,
|
4678 |
+
"step": 1334
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 0.7409872434830838,
|
4682 |
+
"grad_norm": 0.2306888997554779,
|
4683 |
+
"learning_rate": 0.00014515474209318948,
|
4684 |
+
"loss": 0.4229,
|
4685 |
+
"step": 1336
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 0.7420965058236273,
|
4689 |
+
"grad_norm": 0.326107382774353,
|
4690 |
+
"learning_rate": 0.00014499435605372366,
|
4691 |
+
"loss": 0.3731,
|
4692 |
+
"step": 1338
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 0.7432057681641708,
|
4696 |
+
"grad_norm": 0.26597580313682556,
|
4697 |
+
"learning_rate": 0.00014483382476082802,
|
4698 |
+
"loss": 0.3841,
|
4699 |
+
"step": 1340
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 0.7443150305047144,
|
4703 |
+
"grad_norm": 0.3690161406993866,
|
4704 |
+
"learning_rate": 0.00014467314873273918,
|
4705 |
+
"loss": 0.4814,
|
4706 |
+
"step": 1342
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 0.7454242928452579,
|
4710 |
+
"grad_norm": 0.30208808183670044,
|
4711 |
+
"learning_rate": 0.0001445123284881609,
|
4712 |
+
"loss": 0.4522,
|
4713 |
+
"step": 1344
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 0.7465335551858014,
|
4717 |
+
"grad_norm": 0.28505003452301025,
|
4718 |
+
"learning_rate": 0.00014435136454626264,
|
4719 |
+
"loss": 0.4283,
|
4720 |
+
"step": 1346
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 0.747642817526345,
|
4724 |
+
"grad_norm": 0.2772189974784851,
|
4725 |
+
"learning_rate": 0.0001441902574266776,
|
4726 |
+
"loss": 0.3964,
|
4727 |
+
"step": 1348
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 0.7487520798668885,
|
4731 |
+
"grad_norm": 0.4741387665271759,
|
4732 |
+
"learning_rate": 0.0001440290076495013,
|
4733 |
+
"loss": 0.4577,
|
4734 |
+
"step": 1350
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 0.7498613422074321,
|
4738 |
+
"grad_norm": 0.3234037160873413,
|
4739 |
+
"learning_rate": 0.00014386761573528976,
|
4740 |
+
"loss": 0.4542,
|
4741 |
+
"step": 1352
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 0.7509706045479756,
|
4745 |
+
"grad_norm": 0.3096826374530792,
|
4746 |
+
"learning_rate": 0.0001437060822050579,
|
4747 |
+
"loss": 0.3892,
|
4748 |
+
"step": 1354
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 0.7520798668885191,
|
4752 |
+
"grad_norm": 0.2626829743385315,
|
4753 |
+
"learning_rate": 0.00014354440758027772,
|
4754 |
+
"loss": 0.4441,
|
4755 |
+
"step": 1356
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 0.7531891292290627,
|
4759 |
+
"grad_norm": 0.2835614085197449,
|
4760 |
+
"learning_rate": 0.00014338259238287678,
|
4761 |
+
"loss": 0.4905,
|
4762 |
+
"step": 1358
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 0.7542983915696062,
|
4766 |
+
"grad_norm": 0.23285934329032898,
|
4767 |
+
"learning_rate": 0.00014322063713523647,
|
4768 |
+
"loss": 0.3911,
|
4769 |
+
"step": 1360
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 0.7554076539101497,
|
4773 |
+
"grad_norm": 0.2746225595474243,
|
4774 |
+
"learning_rate": 0.00014305854236019018,
|
4775 |
+
"loss": 0.4681,
|
4776 |
+
"step": 1362
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 0.7565169162506933,
|
4780 |
+
"grad_norm": 0.30551669001579285,
|
4781 |
+
"learning_rate": 0.0001428963085810219,
|
4782 |
+
"loss": 0.3883,
|
4783 |
+
"step": 1364
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 0.7576261785912368,
|
4787 |
+
"grad_norm": 0.3372795283794403,
|
4788 |
+
"learning_rate": 0.0001427339363214642,
|
4789 |
+
"loss": 0.4452,
|
4790 |
+
"step": 1366
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 0.7587354409317804,
|
4794 |
+
"grad_norm": 0.2697629928588867,
|
4795 |
+
"learning_rate": 0.00014257142610569682,
|
4796 |
+
"loss": 0.3853,
|
4797 |
+
"step": 1368
|
4798 |
+
},
|
4799 |
+
{
|
4800 |
+
"epoch": 0.7598447032723239,
|
4801 |
+
"grad_norm": 0.29290249943733215,
|
4802 |
+
"learning_rate": 0.00014240877845834472,
|
4803 |
+
"loss": 0.4694,
|
4804 |
+
"step": 1370
|
4805 |
+
},
|
4806 |
+
{
|
4807 |
+
"epoch": 0.7609539656128674,
|
4808 |
+
"grad_norm": 0.298874169588089,
|
4809 |
+
"learning_rate": 0.00014224599390447672,
|
4810 |
+
"loss": 0.4644,
|
4811 |
+
"step": 1372
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 0.762063227953411,
|
4815 |
+
"grad_norm": 0.23659248650074005,
|
4816 |
+
"learning_rate": 0.00014208307296960344,
|
4817 |
+
"loss": 0.395,
|
4818 |
+
"step": 1374
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 0.7631724902939545,
|
4822 |
+
"grad_norm": 0.2965550124645233,
|
4823 |
+
"learning_rate": 0.00014192001617967587,
|
4824 |
+
"loss": 0.5332,
|
4825 |
+
"step": 1376
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 0.764281752634498,
|
4829 |
+
"grad_norm": 0.3338853716850281,
|
4830 |
+
"learning_rate": 0.00014175682406108352,
|
4831 |
+
"loss": 0.5176,
|
4832 |
+
"step": 1378
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 0.7653910149750416,
|
4836 |
+
"grad_norm": 0.24134789407253265,
|
4837 |
+
"learning_rate": 0.0001415934971406528,
|
4838 |
+
"loss": 0.4224,
|
4839 |
+
"step": 1380
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 0.7665002773155851,
|
4843 |
+
"grad_norm": 0.3920575678348541,
|
4844 |
+
"learning_rate": 0.00014143003594564528,
|
4845 |
+
"loss": 0.4627,
|
4846 |
+
"step": 1382
|
4847 |
+
},
|
4848 |
+
{
|
4849 |
+
"epoch": 0.7676095396561287,
|
4850 |
+
"grad_norm": 0.3521714508533478,
|
4851 |
+
"learning_rate": 0.00014126644100375603,
|
4852 |
+
"loss": 0.446,
|
4853 |
+
"step": 1384
|
4854 |
+
},
|
4855 |
+
{
|
4856 |
+
"epoch": 0.7687188019966722,
|
4857 |
+
"grad_norm": 0.2819899022579193,
|
4858 |
+
"learning_rate": 0.0001411027128431119,
|
4859 |
+
"loss": 0.3637,
|
4860 |
+
"step": 1386
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 0.7698280643372157,
|
4864 |
+
"grad_norm": 0.1896730363368988,
|
4865 |
+
"learning_rate": 0.00014093885199226972,
|
4866 |
+
"loss": 0.3206,
|
4867 |
+
"step": 1388
|
4868 |
+
},
|
4869 |
+
{
|
4870 |
+
"epoch": 0.7709373266777593,
|
4871 |
+
"grad_norm": 0.21066512167453766,
|
4872 |
+
"learning_rate": 0.0001407748589802148,
|
4873 |
+
"loss": 0.3081,
|
4874 |
+
"step": 1390
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 0.7720465890183028,
|
4878 |
+
"grad_norm": 0.2417469471693039,
|
4879 |
+
"learning_rate": 0.000140610734336359,
|
4880 |
+
"loss": 0.3954,
|
4881 |
+
"step": 1392
|
4882 |
+
},
|
4883 |
+
{
|
4884 |
+
"epoch": 0.7731558513588463,
|
4885 |
+
"grad_norm": 0.41810843348503113,
|
4886 |
+
"learning_rate": 0.00014044647859053915,
|
4887 |
+
"loss": 0.521,
|
4888 |
+
"step": 1394
|
4889 |
+
},
|
4890 |
+
{
|
4891 |
+
"epoch": 0.7742651136993899,
|
4892 |
+
"grad_norm": 0.21894732117652893,
|
4893 |
+
"learning_rate": 0.00014028209227301533,
|
4894 |
+
"loss": 0.342,
|
4895 |
+
"step": 1396
|
4896 |
+
},
|
4897 |
+
{
|
4898 |
+
"epoch": 0.7753743760399334,
|
4899 |
+
"grad_norm": 0.31191563606262207,
|
4900 |
+
"learning_rate": 0.00014011757591446918,
|
4901 |
+
"loss": 0.4173,
|
4902 |
+
"step": 1398
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 0.776483638380477,
|
4906 |
+
"grad_norm": 0.34966176748275757,
|
4907 |
+
"learning_rate": 0.00013995293004600206,
|
4908 |
+
"loss": 0.4993,
|
4909 |
+
"step": 1400
|
4910 |
+
},
|
4911 |
+
{
|
4912 |
+
"epoch": 0.7775929007210205,
|
4913 |
+
"grad_norm": 0.3630419671535492,
|
4914 |
+
"learning_rate": 0.00013978815519913345,
|
4915 |
+
"loss": 0.4331,
|
4916 |
+
"step": 1402
|
4917 |
+
},
|
4918 |
+
{
|
4919 |
+
"epoch": 0.778702163061564,
|
4920 |
+
"grad_norm": 0.2934836447238922,
|
4921 |
+
"learning_rate": 0.00013962325190579919,
|
4922 |
+
"loss": 0.4618,
|
4923 |
+
"step": 1404
|
4924 |
+
},
|
4925 |
+
{
|
4926 |
+
"epoch": 0.7798114254021076,
|
4927 |
+
"grad_norm": 0.330842524766922,
|
4928 |
+
"learning_rate": 0.00013945822069834983,
|
4929 |
+
"loss": 0.4437,
|
4930 |
+
"step": 1406
|
4931 |
+
},
|
4932 |
+
{
|
4933 |
+
"epoch": 0.7809206877426511,
|
4934 |
+
"grad_norm": 0.24101948738098145,
|
4935 |
+
"learning_rate": 0.0001392930621095489,
|
4936 |
+
"loss": 0.4065,
|
4937 |
+
"step": 1408
|
4938 |
+
},
|
4939 |
+
{
|
4940 |
+
"epoch": 0.7820299500831946,
|
4941 |
+
"grad_norm": 0.5105953216552734,
|
4942 |
+
"learning_rate": 0.00013912777667257094,
|
4943 |
+
"loss": 0.5135,
|
4944 |
+
"step": 1410
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 0.7831392124237382,
|
4948 |
+
"grad_norm": 0.2571849226951599,
|
4949 |
+
"learning_rate": 0.00013896236492100025,
|
4950 |
+
"loss": 0.4153,
|
4951 |
+
"step": 1412
|
4952 |
+
},
|
4953 |
+
{
|
4954 |
+
"epoch": 0.7842484747642817,
|
4955 |
+
"grad_norm": 0.27226725220680237,
|
4956 |
+
"learning_rate": 0.00013879682738882873,
|
4957 |
+
"loss": 0.3522,
|
4958 |
+
"step": 1414
|
4959 |
+
},
|
4960 |
+
{
|
4961 |
+
"epoch": 0.7853577371048253,
|
4962 |
+
"grad_norm": 0.2498832792043686,
|
4963 |
+
"learning_rate": 0.0001386311646104544,
|
4964 |
+
"loss": 0.4186,
|
4965 |
+
"step": 1416
|
4966 |
+
},
|
4967 |
+
{
|
4968 |
+
"epoch": 0.7864669994453688,
|
4969 |
+
"grad_norm": 0.18799692392349243,
|
4970 |
+
"learning_rate": 0.00013846537712067962,
|
4971 |
+
"loss": 0.3278,
|
4972 |
+
"step": 1418
|
4973 |
+
},
|
4974 |
+
{
|
4975 |
+
"epoch": 0.7875762617859123,
|
4976 |
+
"grad_norm": 0.3741225600242615,
|
4977 |
+
"learning_rate": 0.0001382994654547093,
|
4978 |
+
"loss": 0.4816,
|
4979 |
+
"step": 1420
|
4980 |
+
},
|
4981 |
+
{
|
4982 |
+
"epoch": 0.7886855241264559,
|
4983 |
+
"grad_norm": 0.34847941994667053,
|
4984 |
+
"learning_rate": 0.00013813343014814925,
|
4985 |
+
"loss": 0.5673,
|
4986 |
+
"step": 1422
|
4987 |
+
},
|
4988 |
+
{
|
4989 |
+
"epoch": 0.7897947864669994,
|
4990 |
+
"grad_norm": 0.38350898027420044,
|
4991 |
+
"learning_rate": 0.00013796727173700444,
|
4992 |
+
"loss": 0.5009,
|
4993 |
+
"step": 1424
|
4994 |
+
},
|
4995 |
+
{
|
4996 |
+
"epoch": 0.790904048807543,
|
4997 |
+
"grad_norm": 0.2870427668094635,
|
4998 |
+
"learning_rate": 0.0001378009907576772,
|
4999 |
+
"loss": 0.4333,
|
5000 |
+
"step": 1426
|
5001 |
+
},
|
5002 |
+
{
|
5003 |
+
"epoch": 0.7920133111480865,
|
5004 |
+
"grad_norm": 0.2194262146949768,
|
5005 |
+
"learning_rate": 0.00013763458774696563,
|
5006 |
+
"loss": 0.3433,
|
5007 |
+
"step": 1428
|
5008 |
+
},
|
5009 |
+
{
|
5010 |
+
"epoch": 0.79312257348863,
|
5011 |
+
"grad_norm": 0.26900714635849,
|
5012 |
+
"learning_rate": 0.00013746806324206173,
|
5013 |
+
"loss": 0.5099,
|
5014 |
+
"step": 1430
|
5015 |
+
},
|
5016 |
+
{
|
5017 |
+
"epoch": 0.7942318358291736,
|
5018 |
+
"grad_norm": 0.2423945814371109,
|
5019 |
+
"learning_rate": 0.00013730141778054962,
|
5020 |
+
"loss": 0.333,
|
5021 |
+
"step": 1432
|
5022 |
+
},
|
5023 |
+
{
|
5024 |
+
"epoch": 0.7953410981697171,
|
5025 |
+
"grad_norm": 0.31677019596099854,
|
5026 |
+
"learning_rate": 0.00013713465190040415,
|
5027 |
+
"loss": 0.4285,
|
5028 |
+
"step": 1434
|
5029 |
+
},
|
5030 |
+
{
|
5031 |
+
"epoch": 0.7964503605102606,
|
5032 |
+
"grad_norm": 0.33169832825660706,
|
5033 |
+
"learning_rate": 0.0001369677661399886,
|
5034 |
+
"loss": 0.4058,
|
5035 |
+
"step": 1436
|
5036 |
+
},
|
5037 |
+
{
|
5038 |
+
"epoch": 0.7975596228508042,
|
5039 |
+
"grad_norm": 0.3489621579647064,
|
5040 |
+
"learning_rate": 0.0001368007610380535,
|
5041 |
+
"loss": 0.4153,
|
5042 |
+
"step": 1438
|
5043 |
+
},
|
5044 |
+
{
|
5045 |
+
"epoch": 0.7986688851913477,
|
5046 |
+
"grad_norm": 0.2639998495578766,
|
5047 |
+
"learning_rate": 0.00013663363713373454,
|
5048 |
+
"loss": 0.3959,
|
5049 |
+
"step": 1440
|
5050 |
+
},
|
5051 |
+
{
|
5052 |
+
"epoch": 0.7997781475318912,
|
5053 |
+
"grad_norm": 0.27134037017822266,
|
5054 |
+
"learning_rate": 0.0001364663949665509,
|
5055 |
+
"loss": 0.4331,
|
5056 |
+
"step": 1442
|
5057 |
+
},
|
5058 |
+
{
|
5059 |
+
"epoch": 0.8008874098724348,
|
5060 |
+
"grad_norm": 0.28077998757362366,
|
5061 |
+
"learning_rate": 0.00013629903507640369,
|
5062 |
+
"loss": 0.5282,
|
5063 |
+
"step": 1444
|
5064 |
+
},
|
5065 |
+
{
|
5066 |
+
"epoch": 0.8019966722129783,
|
5067 |
+
"grad_norm": 0.2948680818080902,
|
5068 |
+
"learning_rate": 0.00013613155800357385,
|
5069 |
+
"loss": 0.3951,
|
5070 |
+
"step": 1446
|
5071 |
+
},
|
5072 |
+
{
|
5073 |
+
"epoch": 0.8031059345535219,
|
5074 |
+
"grad_norm": 0.3196534812450409,
|
5075 |
+
"learning_rate": 0.0001359639642887208,
|
5076 |
+
"loss": 0.4191,
|
5077 |
+
"step": 1448
|
5078 |
+
},
|
5079 |
+
{
|
5080 |
+
"epoch": 0.8042151968940654,
|
5081 |
+
"grad_norm": 0.26202476024627686,
|
5082 |
+
"learning_rate": 0.00013579625447288044,
|
5083 |
+
"loss": 0.352,
|
5084 |
+
"step": 1450
|
5085 |
+
},
|
5086 |
+
{
|
5087 |
+
"epoch": 0.8053244592346089,
|
5088 |
+
"grad_norm": 0.24866290390491486,
|
5089 |
+
"learning_rate": 0.00013562842909746342,
|
5090 |
+
"loss": 0.3203,
|
5091 |
+
"step": 1452
|
5092 |
+
},
|
5093 |
+
{
|
5094 |
+
"epoch": 0.8064337215751525,
|
5095 |
+
"grad_norm": 0.41027557849884033,
|
5096 |
+
"learning_rate": 0.00013546048870425356,
|
5097 |
+
"loss": 0.4187,
|
5098 |
+
"step": 1454
|
5099 |
+
},
|
5100 |
+
{
|
5101 |
+
"epoch": 0.807542983915696,
|
5102 |
+
"grad_norm": 0.34640318155288696,
|
5103 |
+
"learning_rate": 0.0001352924338354059,
|
5104 |
+
"loss": 0.3204,
|
5105 |
+
"step": 1456
|
5106 |
+
},
|
5107 |
+
{
|
5108 |
+
"epoch": 0.8086522462562395,
|
5109 |
+
"grad_norm": 0.3971330523490906,
|
5110 |
+
"learning_rate": 0.0001351242650334451,
|
5111 |
+
"loss": 0.4598,
|
5112 |
+
"step": 1458
|
5113 |
+
},
|
5114 |
+
{
|
5115 |
+
"epoch": 0.8097615085967831,
|
5116 |
+
"grad_norm": 0.3868078887462616,
|
5117 |
+
"learning_rate": 0.0001349559828412635,
|
5118 |
+
"loss": 0.3641,
|
5119 |
+
"step": 1460
|
5120 |
+
},
|
5121 |
+
{
|
5122 |
+
"epoch": 0.8108707709373266,
|
5123 |
+
"grad_norm": 0.26136353611946106,
|
5124 |
+
"learning_rate": 0.00013478758780211965,
|
5125 |
+
"loss": 0.4286,
|
5126 |
+
"step": 1462
|
5127 |
+
},
|
5128 |
+
{
|
5129 |
+
"epoch": 0.8119800332778702,
|
5130 |
+
"grad_norm": 0.2987557351589203,
|
5131 |
+
"learning_rate": 0.00013461908045963634,
|
5132 |
+
"loss": 0.4286,
|
5133 |
+
"step": 1464
|
5134 |
+
},
|
5135 |
+
{
|
5136 |
+
"epoch": 0.8130892956184138,
|
5137 |
+
"grad_norm": 0.33275967836380005,
|
5138 |
+
"learning_rate": 0.00013445046135779885,
|
5139 |
+
"loss": 0.3616,
|
5140 |
+
"step": 1466
|
5141 |
+
},
|
5142 |
+
{
|
5143 |
+
"epoch": 0.8141985579589573,
|
5144 |
+
"grad_norm": 0.2795950770378113,
|
5145 |
+
"learning_rate": 0.00013428173104095331,
|
5146 |
+
"loss": 0.5246,
|
5147 |
+
"step": 1468
|
5148 |
+
},
|
5149 |
+
{
|
5150 |
+
"epoch": 0.8153078202995009,
|
5151 |
+
"grad_norm": 0.3052369952201843,
|
5152 |
+
"learning_rate": 0.00013411289005380494,
|
5153 |
+
"loss": 0.3672,
|
5154 |
+
"step": 1470
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 0.8164170826400444,
|
5158 |
+
"grad_norm": 0.2728108763694763,
|
5159 |
+
"learning_rate": 0.00013394393894141605,
|
5160 |
+
"loss": 0.4897,
|
5161 |
+
"step": 1472
|
5162 |
+
},
|
5163 |
+
{
|
5164 |
+
"epoch": 0.817526344980588,
|
5165 |
+
"grad_norm": 0.3310782015323639,
|
5166 |
+
"learning_rate": 0.00013377487824920459,
|
5167 |
+
"loss": 0.5144,
|
5168 |
+
"step": 1474
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 0.8186356073211315,
|
5172 |
+
"grad_norm": 0.4153352677822113,
|
5173 |
+
"learning_rate": 0.00013360570852294227,
|
5174 |
+
"loss": 0.5313,
|
5175 |
+
"step": 1476
|
5176 |
+
},
|
5177 |
+
{
|
5178 |
+
"epoch": 0.819744869661675,
|
5179 |
+
"grad_norm": 0.4107600450515747,
|
5180 |
+
"learning_rate": 0.00013343643030875276,
|
5181 |
+
"loss": 0.4873,
|
5182 |
+
"step": 1478
|
5183 |
+
},
|
5184 |
+
{
|
5185 |
+
"epoch": 0.8208541320022186,
|
5186 |
+
"grad_norm": 0.2750249207019806,
|
5187 |
+
"learning_rate": 0.00013326704415311,
|
5188 |
+
"loss": 0.4373,
|
5189 |
+
"step": 1480
|
5190 |
+
},
|
5191 |
+
{
|
5192 |
+
"epoch": 0.8219633943427621,
|
5193 |
+
"grad_norm": 0.2961103618144989,
|
5194 |
+
"learning_rate": 0.00013309755060283626,
|
5195 |
+
"loss": 0.4252,
|
5196 |
+
"step": 1482
|
5197 |
+
},
|
5198 |
+
{
|
5199 |
+
"epoch": 0.8230726566833056,
|
5200 |
+
"grad_norm": 0.25156864523887634,
|
5201 |
+
"learning_rate": 0.00013292795020510066,
|
5202 |
+
"loss": 0.307,
|
5203 |
+
"step": 1484
|
5204 |
+
},
|
5205 |
+
{
|
5206 |
+
"epoch": 0.8241819190238492,
|
5207 |
+
"grad_norm": 0.2984169125556946,
|
5208 |
+
"learning_rate": 0.00013275824350741716,
|
5209 |
+
"loss": 0.4416,
|
5210 |
+
"step": 1486
|
5211 |
+
},
|
5212 |
+
{
|
5213 |
+
"epoch": 0.8252911813643927,
|
5214 |
+
"grad_norm": 0.27711641788482666,
|
5215 |
+
"learning_rate": 0.00013258843105764297,
|
5216 |
+
"loss": 0.3382,
|
5217 |
+
"step": 1488
|
5218 |
+
},
|
5219 |
+
{
|
5220 |
+
"epoch": 0.8264004437049363,
|
5221 |
+
"grad_norm": 0.3105831742286682,
|
5222 |
+
"learning_rate": 0.00013241851340397656,
|
5223 |
+
"loss": 0.4276,
|
5224 |
+
"step": 1490
|
5225 |
+
},
|
5226 |
+
{
|
5227 |
+
"epoch": 0.8275097060454798,
|
5228 |
+
"grad_norm": 0.3303448557853699,
|
5229 |
+
"learning_rate": 0.00013224849109495622,
|
5230 |
+
"loss": 0.4679,
|
5231 |
+
"step": 1492
|
5232 |
+
},
|
5233 |
+
{
|
5234 |
+
"epoch": 0.8286189683860233,
|
5235 |
+
"grad_norm": 0.3527246415615082,
|
5236 |
+
"learning_rate": 0.00013207836467945785,
|
5237 |
+
"loss": 0.5059,
|
5238 |
+
"step": 1494
|
5239 |
+
},
|
5240 |
+
{
|
5241 |
+
"epoch": 0.8297282307265669,
|
5242 |
+
"grad_norm": 0.41127604246139526,
|
5243 |
+
"learning_rate": 0.00013190813470669363,
|
5244 |
+
"loss": 0.5412,
|
5245 |
+
"step": 1496
|
5246 |
+
},
|
5247 |
+
{
|
5248 |
+
"epoch": 0.8308374930671104,
|
5249 |
+
"grad_norm": 0.24502909183502197,
|
5250 |
+
"learning_rate": 0.00013173780172620999,
|
5251 |
+
"loss": 0.3072,
|
5252 |
+
"step": 1498
|
5253 |
+
},
|
5254 |
+
{
|
5255 |
+
"epoch": 0.831946755407654,
|
5256 |
+
"grad_norm": 0.21016305685043335,
|
5257 |
+
"learning_rate": 0.00013156736628788584,
|
5258 |
+
"loss": 0.3889,
|
5259 |
+
"step": 1500
|
5260 |
+
},
|
5261 |
+
{
|
5262 |
+
"epoch": 0.8330560177481975,
|
5263 |
+
"grad_norm": 0.5300863981246948,
|
5264 |
+
"learning_rate": 0.000131396828941931,
|
5265 |
+
"loss": 0.5858,
|
5266 |
+
"step": 1502
|
5267 |
+
},
|
5268 |
+
{
|
5269 |
+
"epoch": 0.834165280088741,
|
5270 |
+
"grad_norm": 0.2194632738828659,
|
5271 |
+
"learning_rate": 0.00013122619023888402,
|
5272 |
+
"loss": 0.3433,
|
5273 |
+
"step": 1504
|
5274 |
+
},
|
5275 |
+
{
|
5276 |
+
"epoch": 0.8352745424292846,
|
5277 |
+
"grad_norm": 0.7686997056007385,
|
5278 |
+
"learning_rate": 0.00013105545072961093,
|
5279 |
+
"loss": 0.5005,
|
5280 |
+
"step": 1506
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 0.8363838047698281,
|
5284 |
+
"grad_norm": 0.2888992428779602,
|
5285 |
+
"learning_rate": 0.00013088461096530304,
|
5286 |
+
"loss": 0.3987,
|
5287 |
+
"step": 1508
|
5288 |
+
},
|
5289 |
+
{
|
5290 |
+
"epoch": 0.8374930671103716,
|
5291 |
+
"grad_norm": 0.2540237009525299,
|
5292 |
+
"learning_rate": 0.00013071367149747535,
|
5293 |
+
"loss": 0.4531,
|
5294 |
+
"step": 1510
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 0.8386023294509152,
|
5298 |
+
"grad_norm": 0.3219870924949646,
|
5299 |
+
"learning_rate": 0.00013054263287796465,
|
5300 |
+
"loss": 0.561,
|
5301 |
+
"step": 1512
|
5302 |
+
},
|
5303 |
+
{
|
5304 |
+
"epoch": 0.8397115917914587,
|
5305 |
+
"grad_norm": 0.24539148807525635,
|
5306 |
+
"learning_rate": 0.00013037149565892794,
|
5307 |
+
"loss": 0.3225,
|
5308 |
+
"step": 1514
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 0.8408208541320022,
|
5312 |
+
"grad_norm": 0.23382632434368134,
|
5313 |
+
"learning_rate": 0.00013020026039284045,
|
5314 |
+
"loss": 0.3557,
|
5315 |
+
"step": 1516
|
5316 |
+
},
|
5317 |
+
{
|
5318 |
+
"epoch": 0.8419301164725458,
|
5319 |
+
"grad_norm": 0.2739519476890564,
|
5320 |
+
"learning_rate": 0.00013002892763249398,
|
5321 |
+
"loss": 0.3635,
|
5322 |
+
"step": 1518
|
5323 |
+
},
|
5324 |
+
{
|
5325 |
+
"epoch": 0.8430393788130893,
|
5326 |
+
"grad_norm": 0.23864848911762238,
|
5327 |
+
"learning_rate": 0.000129857497930995,
|
5328 |
+
"loss": 0.393,
|
5329 |
+
"step": 1520
|
5330 |
+
},
|
5331 |
+
{
|
5332 |
+
"epoch": 0.8441486411536329,
|
5333 |
+
"grad_norm": 0.3150426149368286,
|
5334 |
+
"learning_rate": 0.00012968597184176298,
|
5335 |
+
"loss": 0.4337,
|
5336 |
+
"step": 1522
|
5337 |
+
},
|
5338 |
+
{
|
5339 |
+
"epoch": 0.8452579034941764,
|
5340 |
+
"grad_norm": 0.25435131788253784,
|
5341 |
+
"learning_rate": 0.00012951434991852857,
|
5342 |
+
"loss": 0.3681,
|
5343 |
+
"step": 1524
|
5344 |
+
},
|
5345 |
+
{
|
5346 |
+
"epoch": 0.8463671658347199,
|
5347 |
+
"grad_norm": 0.3123234808444977,
|
5348 |
+
"learning_rate": 0.0001293426327153317,
|
5349 |
+
"loss": 0.4089,
|
5350 |
+
"step": 1526
|
5351 |
+
},
|
5352 |
+
{
|
5353 |
+
"epoch": 0.8474764281752635,
|
5354 |
+
"grad_norm": 0.52870112657547,
|
5355 |
+
"learning_rate": 0.00012917082078652,
|
5356 |
+
"loss": 0.4779,
|
5357 |
+
"step": 1528
|
5358 |
+
},
|
5359 |
+
{
|
5360 |
+
"epoch": 0.848585690515807,
|
5361 |
+
"grad_norm": 0.2784457206726074,
|
5362 |
+
"learning_rate": 0.00012899891468674688,
|
5363 |
+
"loss": 0.4163,
|
5364 |
+
"step": 1530
|
5365 |
+
},
|
5366 |
+
{
|
5367 |
+
"epoch": 0.8496949528563505,
|
5368 |
+
"grad_norm": 0.26158082485198975,
|
5369 |
+
"learning_rate": 0.0001288269149709697,
|
5370 |
+
"loss": 0.464,
|
5371 |
+
"step": 1532
|
5372 |
+
},
|
5373 |
+
{
|
5374 |
+
"epoch": 0.8508042151968941,
|
5375 |
+
"grad_norm": 0.2595478594303131,
|
5376 |
+
"learning_rate": 0.00012865482219444804,
|
5377 |
+
"loss": 0.3473,
|
5378 |
+
"step": 1534
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 0.8519134775374376,
|
5382 |
+
"grad_norm": 0.28629809617996216,
|
5383 |
+
"learning_rate": 0.000128482636912742,
|
5384 |
+
"loss": 0.4626,
|
5385 |
+
"step": 1536
|
5386 |
+
},
|
5387 |
+
{
|
5388 |
+
"epoch": 0.8530227398779812,
|
5389 |
+
"grad_norm": 0.2769101560115814,
|
5390 |
+
"learning_rate": 0.00012831035968171025,
|
5391 |
+
"loss": 0.5318,
|
5392 |
+
"step": 1538
|
5393 |
+
},
|
5394 |
+
{
|
5395 |
+
"epoch": 0.8541320022185247,
|
5396 |
+
"grad_norm": 0.32585474848747253,
|
5397 |
+
"learning_rate": 0.00012813799105750823,
|
5398 |
+
"loss": 0.4428,
|
5399 |
+
"step": 1540
|
5400 |
+
},
|
5401 |
+
{
|
5402 |
+
"epoch": 0.8552412645590682,
|
5403 |
+
"grad_norm": 0.2759438157081604,
|
5404 |
+
"learning_rate": 0.00012796553159658653,
|
5405 |
+
"loss": 0.3853,
|
5406 |
+
"step": 1542
|
5407 |
+
},
|
5408 |
+
{
|
5409 |
+
"epoch": 0.8563505268996118,
|
5410 |
+
"grad_norm": 0.34193405508995056,
|
5411 |
+
"learning_rate": 0.0001277929818556889,
|
5412 |
+
"loss": 0.5144,
|
5413 |
+
"step": 1544
|
5414 |
+
},
|
5415 |
+
{
|
5416 |
+
"epoch": 0.8574597892401553,
|
5417 |
+
"grad_norm": 0.3385421335697174,
|
5418 |
+
"learning_rate": 0.00012762034239185063,
|
5419 |
+
"loss": 0.4466,
|
5420 |
+
"step": 1546
|
5421 |
+
},
|
5422 |
+
{
|
5423 |
+
"epoch": 0.8585690515806988,
|
5424 |
+
"grad_norm": 0.3747256398200989,
|
5425 |
+
"learning_rate": 0.00012744761376239655,
|
5426 |
+
"loss": 0.517,
|
5427 |
+
"step": 1548
|
5428 |
+
},
|
5429 |
+
{
|
5430 |
+
"epoch": 0.8596783139212424,
|
5431 |
+
"grad_norm": 0.31231391429901123,
|
5432 |
+
"learning_rate": 0.00012727479652493943,
|
5433 |
+
"loss": 0.3797,
|
5434 |
+
"step": 1550
|
5435 |
+
},
|
5436 |
+
{
|
5437 |
+
"epoch": 0.8607875762617859,
|
5438 |
+
"grad_norm": 0.2660077214241028,
|
5439 |
+
"learning_rate": 0.00012710189123737802,
|
5440 |
+
"loss": 0.3481,
|
5441 |
+
"step": 1552
|
5442 |
+
},
|
5443 |
+
{
|
5444 |
+
"epoch": 0.8618968386023295,
|
5445 |
+
"grad_norm": 0.34666526317596436,
|
5446 |
+
"learning_rate": 0.00012692889845789538,
|
5447 |
+
"loss": 0.4369,
|
5448 |
+
"step": 1554
|
5449 |
+
},
|
5450 |
+
{
|
5451 |
+
"epoch": 0.863006100942873,
|
5452 |
+
"grad_norm": 0.26898959279060364,
|
5453 |
+
"learning_rate": 0.00012675581874495697,
|
5454 |
+
"loss": 0.4428,
|
5455 |
+
"step": 1556
|
5456 |
+
},
|
5457 |
+
{
|
5458 |
+
"epoch": 0.8641153632834165,
|
5459 |
+
"grad_norm": 0.378214567899704,
|
5460 |
+
"learning_rate": 0.0001265826526573089,
|
5461 |
+
"loss": 0.5425,
|
5462 |
+
"step": 1558
|
5463 |
+
},
|
5464 |
+
{
|
5465 |
+
"epoch": 0.8652246256239601,
|
5466 |
+
"grad_norm": 0.3740502595901489,
|
5467 |
+
"learning_rate": 0.0001264094007539762,
|
5468 |
+
"loss": 0.5318,
|
5469 |
+
"step": 1560
|
5470 |
+
},
|
5471 |
+
{
|
5472 |
+
"epoch": 0.8663338879645036,
|
5473 |
+
"grad_norm": 0.25904256105422974,
|
5474 |
+
"learning_rate": 0.00012623606359426077,
|
5475 |
+
"loss": 0.4032,
|
5476 |
+
"step": 1562
|
5477 |
+
},
|
5478 |
+
{
|
5479 |
+
"epoch": 0.8674431503050472,
|
5480 |
+
"grad_norm": 0.29247143864631653,
|
5481 |
+
"learning_rate": 0.00012606264173773988,
|
5482 |
+
"loss": 0.4446,
|
5483 |
+
"step": 1564
|
5484 |
+
},
|
5485 |
+
{
|
5486 |
+
"epoch": 0.8685524126455907,
|
5487 |
+
"grad_norm": 0.2748325765132904,
|
5488 |
+
"learning_rate": 0.0001258891357442642,
|
5489 |
+
"loss": 0.4435,
|
5490 |
+
"step": 1566
|
5491 |
+
},
|
5492 |
+
{
|
5493 |
+
"epoch": 0.8696616749861342,
|
5494 |
+
"grad_norm": 0.4215485155582428,
|
5495 |
+
"learning_rate": 0.00012571554617395598,
|
5496 |
+
"loss": 0.4285,
|
5497 |
+
"step": 1568
|
5498 |
+
},
|
5499 |
+
{
|
5500 |
+
"epoch": 0.8707709373266778,
|
5501 |
+
"grad_norm": 0.2875049412250519,
|
5502 |
+
"learning_rate": 0.00012554187358720725,
|
5503 |
+
"loss": 0.4743,
|
5504 |
+
"step": 1570
|
5505 |
+
},
|
5506 |
+
{
|
5507 |
+
"epoch": 0.8718801996672213,
|
5508 |
+
"grad_norm": 0.3484318256378174,
|
5509 |
+
"learning_rate": 0.00012536811854467817,
|
5510 |
+
"loss": 0.4997,
|
5511 |
+
"step": 1572
|
5512 |
+
},
|
5513 |
+
{
|
5514 |
+
"epoch": 0.8729894620077648,
|
5515 |
+
"grad_norm": 0.33628854155540466,
|
5516 |
+
"learning_rate": 0.0001251942816072949,
|
5517 |
+
"loss": 0.5044,
|
5518 |
+
"step": 1574
|
5519 |
+
},
|
5520 |
+
{
|
5521 |
+
"epoch": 0.8740987243483084,
|
5522 |
+
"grad_norm": 0.28208592534065247,
|
5523 |
+
"learning_rate": 0.00012502036333624815,
|
5524 |
+
"loss": 0.3405,
|
5525 |
+
"step": 1576
|
5526 |
+
},
|
5527 |
+
{
|
5528 |
+
"epoch": 0.8752079866888519,
|
5529 |
+
"grad_norm": 0.2113669067621231,
|
5530 |
+
"learning_rate": 0.00012484636429299114,
|
5531 |
+
"loss": 0.3996,
|
5532 |
+
"step": 1578
|
5533 |
+
},
|
5534 |
+
{
|
5535 |
+
"epoch": 0.8763172490293955,
|
5536 |
+
"grad_norm": 0.2464189976453781,
|
5537 |
+
"learning_rate": 0.00012467228503923773,
|
5538 |
+
"loss": 0.3572,
|
5539 |
+
"step": 1580
|
5540 |
+
},
|
5541 |
+
{
|
5542 |
+
"epoch": 0.877426511369939,
|
5543 |
+
"grad_norm": 0.391923189163208,
|
5544 |
+
"learning_rate": 0.00012449812613696094,
|
5545 |
+
"loss": 0.6324,
|
5546 |
+
"step": 1582
|
5547 |
+
},
|
5548 |
+
{
|
5549 |
+
"epoch": 0.8785357737104825,
|
5550 |
+
"grad_norm": 0.2782968282699585,
|
5551 |
+
"learning_rate": 0.0001243238881483907,
|
5552 |
+
"loss": 0.352,
|
5553 |
+
"step": 1584
|
5554 |
+
},
|
5555 |
+
{
|
5556 |
+
"epoch": 0.8796450360510261,
|
5557 |
+
"grad_norm": 0.30209866166114807,
|
5558 |
+
"learning_rate": 0.00012414957163601236,
|
5559 |
+
"loss": 0.4611,
|
5560 |
+
"step": 1586
|
5561 |
+
},
|
5562 |
+
{
|
5563 |
+
"epoch": 0.8807542983915696,
|
5564 |
+
"grad_norm": 0.2990707457065582,
|
5565 |
+
"learning_rate": 0.0001239751771625648,
|
5566 |
+
"loss": 0.5419,
|
5567 |
+
"step": 1588
|
5568 |
+
},
|
5569 |
+
{
|
5570 |
+
"epoch": 0.8818635607321131,
|
5571 |
+
"grad_norm": 0.25346022844314575,
|
5572 |
+
"learning_rate": 0.00012380070529103852,
|
5573 |
+
"loss": 0.4205,
|
5574 |
+
"step": 1590
|
5575 |
+
},
|
5576 |
+
{
|
5577 |
+
"epoch": 0.8829728230726567,
|
5578 |
+
"grad_norm": 0.33787423372268677,
|
5579 |
+
"learning_rate": 0.00012362615658467377,
|
5580 |
+
"loss": 0.4186,
|
5581 |
+
"step": 1592
|
5582 |
+
},
|
5583 |
+
{
|
5584 |
+
"epoch": 0.8840820854132002,
|
5585 |
+
"grad_norm": 0.23708541691303253,
|
5586 |
+
"learning_rate": 0.00012345153160695917,
|
5587 |
+
"loss": 0.3261,
|
5588 |
+
"step": 1594
|
5589 |
+
},
|
5590 |
+
{
|
5591 |
+
"epoch": 0.8851913477537438,
|
5592 |
+
"grad_norm": 0.22642351686954498,
|
5593 |
+
"learning_rate": 0.00012327683092162918,
|
5594 |
+
"loss": 0.3415,
|
5595 |
+
"step": 1596
|
5596 |
+
},
|
5597 |
+
{
|
5598 |
+
"epoch": 0.8863006100942873,
|
5599 |
+
"grad_norm": 0.27935388684272766,
|
5600 |
+
"learning_rate": 0.00012310205509266292,
|
5601 |
+
"loss": 0.3471,
|
5602 |
+
"step": 1598
|
5603 |
+
},
|
5604 |
+
{
|
5605 |
+
"epoch": 0.8874098724348308,
|
5606 |
+
"grad_norm": 0.23772071301937103,
|
5607 |
+
"learning_rate": 0.000122927204684282,
|
5608 |
+
"loss": 0.3386,
|
5609 |
+
"step": 1600
|
5610 |
}
|
5611 |
],
|
5612 |
"logging_steps": 2,
|
|
|
5626 |
"attributes": {}
|
5627 |
}
|
5628 |
},
|
5629 |
+
"total_flos": 3897330499584000.0,
|
5630 |
"train_batch_size": 8,
|
5631 |
"trial_name": null,
|
5632 |
"trial_params": null
|