googlefan commited on
Commit
eb652e8
·
verified ·
1 Parent(s): 53760f5

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +703 -3
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c58976b502792127695bd0ffc1d164a730a91adbfc49daa7a4af6c9dc5f4e27
3
  size 2815117504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce622b98726b39ea3525b3a60f165f4a2e245c6f303bb285dc9a30d63fae39d
3
  size 2815117504
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aaa9d63074b75e27ef4e54bfa37f7a6ae7f6fba877724cdc31576ed5f3640ca
3
  size 2486357050
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e8c9cebcc9825d94ce1a61b80717541b152fc6070b98f7b5289160d3393d52
3
  size 2486357050
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6246754fb8f452326ac23bbc1d9ccf2ab04d37a4184e397bc00a914c5ad489c4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8238cb53c30f0afbee1f016fc637546ca2ff4b464b4241ee444a3083fdd4a52
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1488750598372426,
5
  "eval_steps": 500,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4207,6 +4207,706 @@
4207
  "learning_rate": 8.374629289717937e-06,
4208
  "loss": 1.7362,
4209
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4210
  }
4211
  ],
4212
  "logging_steps": 1,
@@ -4226,7 +4926,7 @@
4226
  "attributes": {}
4227
  }
4228
  },
4229
- "total_flos": 3.6087930691466035e+17,
4230
  "train_batch_size": 10,
4231
  "trial_name": null,
4232
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.340354236476783,
5
  "eval_steps": 500,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4207
  "learning_rate": 8.374629289717937e-06,
4208
  "loss": 1.7362,
4209
  "step": 600
4210
+ },
4211
+ {
4212
+ "epoch": 1.1507898516036381,
4213
+ "grad_norm": 1.765625,
4214
+ "learning_rate": 8.343357820842829e-06,
4215
+ "loss": 1.6909,
4216
+ "step": 601
4217
+ },
4218
+ {
4219
+ "epoch": 1.1527046433700334,
4220
+ "grad_norm": 1.671875,
4221
+ "learning_rate": 8.31210300068534e-06,
4222
+ "loss": 1.641,
4223
+ "step": 602
4224
+ },
4225
+ {
4226
+ "epoch": 1.154619435136429,
4227
+ "grad_norm": 1.78125,
4228
+ "learning_rate": 8.280865143346301e-06,
4229
+ "loss": 1.7413,
4230
+ "step": 603
4231
+ },
4232
+ {
4233
+ "epoch": 1.1565342269028243,
4234
+ "grad_norm": 1.7265625,
4235
+ "learning_rate": 8.249644562756056e-06,
4236
+ "loss": 1.6597,
4237
+ "step": 604
4238
+ },
4239
+ {
4240
+ "epoch": 1.1584490186692198,
4241
+ "grad_norm": 1.7890625,
4242
+ "learning_rate": 8.218441572671343e-06,
4243
+ "loss": 1.6769,
4244
+ "step": 605
4245
+ },
4246
+ {
4247
+ "epoch": 1.160363810435615,
4248
+ "grad_norm": 1.8671875,
4249
+ "learning_rate": 8.187256486672106e-06,
4250
+ "loss": 1.6976,
4251
+ "step": 606
4252
+ },
4253
+ {
4254
+ "epoch": 1.1622786022020106,
4255
+ "grad_norm": 1.640625,
4256
+ "learning_rate": 8.156089618158366e-06,
4257
+ "loss": 1.6065,
4258
+ "step": 607
4259
+ },
4260
+ {
4261
+ "epoch": 1.164193393968406,
4262
+ "grad_norm": 1.75,
4263
+ "learning_rate": 8.12494128034706e-06,
4264
+ "loss": 1.7632,
4265
+ "step": 608
4266
+ },
4267
+ {
4268
+ "epoch": 1.1661081857348012,
4269
+ "grad_norm": 1.7421875,
4270
+ "learning_rate": 8.093811786268905e-06,
4271
+ "loss": 1.6849,
4272
+ "step": 609
4273
+ },
4274
+ {
4275
+ "epoch": 1.1680229775011968,
4276
+ "grad_norm": 1.796875,
4277
+ "learning_rate": 8.06270144876523e-06,
4278
+ "loss": 1.7192,
4279
+ "step": 610
4280
+ },
4281
+ {
4282
+ "epoch": 1.169937769267592,
4283
+ "grad_norm": 1.6796875,
4284
+ "learning_rate": 8.03161058048487e-06,
4285
+ "loss": 1.6864,
4286
+ "step": 611
4287
+ },
4288
+ {
4289
+ "epoch": 1.1718525610339876,
4290
+ "grad_norm": 1.703125,
4291
+ "learning_rate": 8.000539493880972e-06,
4292
+ "loss": 1.704,
4293
+ "step": 612
4294
+ },
4295
+ {
4296
+ "epoch": 1.173767352800383,
4297
+ "grad_norm": 1.765625,
4298
+ "learning_rate": 7.969488501207909e-06,
4299
+ "loss": 1.6947,
4300
+ "step": 613
4301
+ },
4302
+ {
4303
+ "epoch": 1.1756821445667784,
4304
+ "grad_norm": 1.703125,
4305
+ "learning_rate": 7.938457914518098e-06,
4306
+ "loss": 1.7126,
4307
+ "step": 614
4308
+ },
4309
+ {
4310
+ "epoch": 1.1775969363331737,
4311
+ "grad_norm": 1.765625,
4312
+ "learning_rate": 7.907448045658899e-06,
4313
+ "loss": 1.7502,
4314
+ "step": 615
4315
+ },
4316
+ {
4317
+ "epoch": 1.1795117280995693,
4318
+ "grad_norm": 1.7578125,
4319
+ "learning_rate": 7.876459206269446e-06,
4320
+ "loss": 1.7348,
4321
+ "step": 616
4322
+ },
4323
+ {
4324
+ "epoch": 1.1814265198659646,
4325
+ "grad_norm": 1.7109375,
4326
+ "learning_rate": 7.845491707777551e-06,
4327
+ "loss": 1.6578,
4328
+ "step": 617
4329
+ },
4330
+ {
4331
+ "epoch": 1.18334131163236,
4332
+ "grad_norm": 1.734375,
4333
+ "learning_rate": 7.814545861396543e-06,
4334
+ "loss": 1.7479,
4335
+ "step": 618
4336
+ },
4337
+ {
4338
+ "epoch": 1.1852561033987554,
4339
+ "grad_norm": 1.7421875,
4340
+ "learning_rate": 7.783621978122167e-06,
4341
+ "loss": 1.7027,
4342
+ "step": 619
4343
+ },
4344
+ {
4345
+ "epoch": 1.1871708951651507,
4346
+ "grad_norm": 1.7265625,
4347
+ "learning_rate": 7.752720368729436e-06,
4348
+ "loss": 1.6828,
4349
+ "step": 620
4350
+ },
4351
+ {
4352
+ "epoch": 1.1890856869315463,
4353
+ "grad_norm": 1.703125,
4354
+ "learning_rate": 7.721841343769518e-06,
4355
+ "loss": 1.689,
4356
+ "step": 621
4357
+ },
4358
+ {
4359
+ "epoch": 1.1910004786979416,
4360
+ "grad_norm": 1.6796875,
4361
+ "learning_rate": 7.69098521356662e-06,
4362
+ "loss": 1.6498,
4363
+ "step": 622
4364
+ },
4365
+ {
4366
+ "epoch": 1.192915270464337,
4367
+ "grad_norm": 1.6796875,
4368
+ "learning_rate": 7.660152288214865e-06,
4369
+ "loss": 1.6855,
4370
+ "step": 623
4371
+ },
4372
+ {
4373
+ "epoch": 1.1948300622307324,
4374
+ "grad_norm": 1.7109375,
4375
+ "learning_rate": 7.629342877575169e-06,
4376
+ "loss": 1.7234,
4377
+ "step": 624
4378
+ },
4379
+ {
4380
+ "epoch": 1.196744853997128,
4381
+ "grad_norm": 1.7734375,
4382
+ "learning_rate": 7.598557291272133e-06,
4383
+ "loss": 1.7746,
4384
+ "step": 625
4385
+ },
4386
+ {
4387
+ "epoch": 1.1986596457635232,
4388
+ "grad_norm": 1.765625,
4389
+ "learning_rate": 7.567795838690941e-06,
4390
+ "loss": 1.7283,
4391
+ "step": 626
4392
+ },
4393
+ {
4394
+ "epoch": 1.2005744375299185,
4395
+ "grad_norm": 1.6953125,
4396
+ "learning_rate": 7.537058828974226e-06,
4397
+ "loss": 1.7204,
4398
+ "step": 627
4399
+ },
4400
+ {
4401
+ "epoch": 1.202489229296314,
4402
+ "grad_norm": 1.7109375,
4403
+ "learning_rate": 7.506346571018992e-06,
4404
+ "loss": 1.7417,
4405
+ "step": 628
4406
+ },
4407
+ {
4408
+ "epoch": 1.2044040210627094,
4409
+ "grad_norm": 1.7109375,
4410
+ "learning_rate": 7.475659373473481e-06,
4411
+ "loss": 1.6638,
4412
+ "step": 629
4413
+ },
4414
+ {
4415
+ "epoch": 1.206318812829105,
4416
+ "grad_norm": 1.6796875,
4417
+ "learning_rate": 7.444997544734105e-06,
4418
+ "loss": 1.7001,
4419
+ "step": 630
4420
+ },
4421
+ {
4422
+ "epoch": 1.2082336045955002,
4423
+ "grad_norm": 1.6953125,
4424
+ "learning_rate": 7.414361392942307e-06,
4425
+ "loss": 1.7401,
4426
+ "step": 631
4427
+ },
4428
+ {
4429
+ "epoch": 1.2101483963618955,
4430
+ "grad_norm": 1.765625,
4431
+ "learning_rate": 7.383751225981503e-06,
4432
+ "loss": 1.7466,
4433
+ "step": 632
4434
+ },
4435
+ {
4436
+ "epoch": 1.212063188128291,
4437
+ "grad_norm": 1.875,
4438
+ "learning_rate": 7.353167351473955e-06,
4439
+ "loss": 1.6681,
4440
+ "step": 633
4441
+ },
4442
+ {
4443
+ "epoch": 1.2139779798946864,
4444
+ "grad_norm": 1.71875,
4445
+ "learning_rate": 7.322610076777707e-06,
4446
+ "loss": 1.7014,
4447
+ "step": 634
4448
+ },
4449
+ {
4450
+ "epoch": 1.215892771661082,
4451
+ "grad_norm": 1.734375,
4452
+ "learning_rate": 7.292079708983475e-06,
4453
+ "loss": 1.7218,
4454
+ "step": 635
4455
+ },
4456
+ {
4457
+ "epoch": 1.2178075634274772,
4458
+ "grad_norm": 1.7578125,
4459
+ "learning_rate": 7.261576554911575e-06,
4460
+ "loss": 1.7206,
4461
+ "step": 636
4462
+ },
4463
+ {
4464
+ "epoch": 1.2197223551938727,
4465
+ "grad_norm": 1.7421875,
4466
+ "learning_rate": 7.2311009211088255e-06,
4467
+ "loss": 1.6895,
4468
+ "step": 637
4469
+ },
4470
+ {
4471
+ "epoch": 1.221637146960268,
4472
+ "grad_norm": 1.734375,
4473
+ "learning_rate": 7.20065311384549e-06,
4474
+ "loss": 1.744,
4475
+ "step": 638
4476
+ },
4477
+ {
4478
+ "epoch": 1.2235519387266636,
4479
+ "grad_norm": 1.703125,
4480
+ "learning_rate": 7.170233439112164e-06,
4481
+ "loss": 1.7217,
4482
+ "step": 639
4483
+ },
4484
+ {
4485
+ "epoch": 1.2254667304930589,
4486
+ "grad_norm": 1.6875,
4487
+ "learning_rate": 7.139842202616741e-06,
4488
+ "loss": 1.6799,
4489
+ "step": 640
4490
+ },
4491
+ {
4492
+ "epoch": 1.2273815222594542,
4493
+ "grad_norm": 1.71875,
4494
+ "learning_rate": 7.109479709781302e-06,
4495
+ "loss": 1.7117,
4496
+ "step": 641
4497
+ },
4498
+ {
4499
+ "epoch": 1.2292963140258497,
4500
+ "grad_norm": 1.734375,
4501
+ "learning_rate": 7.079146265739079e-06,
4502
+ "loss": 1.6948,
4503
+ "step": 642
4504
+ },
4505
+ {
4506
+ "epoch": 1.231211105792245,
4507
+ "grad_norm": 1.78125,
4508
+ "learning_rate": 7.048842175331356e-06,
4509
+ "loss": 1.7343,
4510
+ "step": 643
4511
+ },
4512
+ {
4513
+ "epoch": 1.2331258975586405,
4514
+ "grad_norm": 1.7578125,
4515
+ "learning_rate": 7.0185677431044404e-06,
4516
+ "loss": 1.7078,
4517
+ "step": 644
4518
+ },
4519
+ {
4520
+ "epoch": 1.2350406893250359,
4521
+ "grad_norm": 1.671875,
4522
+ "learning_rate": 6.988323273306569e-06,
4523
+ "loss": 1.7168,
4524
+ "step": 645
4525
+ },
4526
+ {
4527
+ "epoch": 1.2369554810914314,
4528
+ "grad_norm": 1.6875,
4529
+ "learning_rate": 6.958109069884879e-06,
4530
+ "loss": 1.6997,
4531
+ "step": 646
4532
+ },
4533
+ {
4534
+ "epoch": 1.2388702728578267,
4535
+ "grad_norm": 1.6640625,
4536
+ "learning_rate": 6.9279254364823265e-06,
4537
+ "loss": 1.6204,
4538
+ "step": 647
4539
+ },
4540
+ {
4541
+ "epoch": 1.2407850646242222,
4542
+ "grad_norm": 1.7265625,
4543
+ "learning_rate": 6.897772676434663e-06,
4544
+ "loss": 1.727,
4545
+ "step": 648
4546
+ },
4547
+ {
4548
+ "epoch": 1.2426998563906175,
4549
+ "grad_norm": 1.71875,
4550
+ "learning_rate": 6.867651092767366e-06,
4551
+ "loss": 1.7278,
4552
+ "step": 649
4553
+ },
4554
+ {
4555
+ "epoch": 1.2446146481570128,
4556
+ "grad_norm": 1.703125,
4557
+ "learning_rate": 6.837560988192593e-06,
4558
+ "loss": 1.7087,
4559
+ "step": 650
4560
+ },
4561
+ {
4562
+ "epoch": 1.2465294399234084,
4563
+ "grad_norm": 1.65625,
4564
+ "learning_rate": 6.807502665106164e-06,
4565
+ "loss": 1.6614,
4566
+ "step": 651
4567
+ },
4568
+ {
4569
+ "epoch": 1.2484442316898037,
4570
+ "grad_norm": 1.7578125,
4571
+ "learning_rate": 6.777476425584486e-06,
4572
+ "loss": 1.7264,
4573
+ "step": 652
4574
+ },
4575
+ {
4576
+ "epoch": 1.2503590234561992,
4577
+ "grad_norm": 1.7109375,
4578
+ "learning_rate": 6.747482571381556e-06,
4579
+ "loss": 1.701,
4580
+ "step": 653
4581
+ },
4582
+ {
4583
+ "epoch": 1.2522738152225945,
4584
+ "grad_norm": 1.6875,
4585
+ "learning_rate": 6.717521403925892e-06,
4586
+ "loss": 1.7061,
4587
+ "step": 654
4588
+ },
4589
+ {
4590
+ "epoch": 1.2541886069889898,
4591
+ "grad_norm": 1.7890625,
4592
+ "learning_rate": 6.687593224317533e-06,
4593
+ "loss": 1.6551,
4594
+ "step": 655
4595
+ },
4596
+ {
4597
+ "epoch": 1.2561033987553853,
4598
+ "grad_norm": 1.6796875,
4599
+ "learning_rate": 6.657698333324991e-06,
4600
+ "loss": 1.731,
4601
+ "step": 656
4602
+ },
4603
+ {
4604
+ "epoch": 1.2580181905217809,
4605
+ "grad_norm": 1.65625,
4606
+ "learning_rate": 6.627837031382246e-06,
4607
+ "loss": 1.7075,
4608
+ "step": 657
4609
+ },
4610
+ {
4611
+ "epoch": 1.2599329822881762,
4612
+ "grad_norm": 1.65625,
4613
+ "learning_rate": 6.598009618585717e-06,
4614
+ "loss": 1.6765,
4615
+ "step": 658
4616
+ },
4617
+ {
4618
+ "epoch": 1.2618477740545715,
4619
+ "grad_norm": 1.765625,
4620
+ "learning_rate": 6.568216394691245e-06,
4621
+ "loss": 1.7378,
4622
+ "step": 659
4623
+ },
4624
+ {
4625
+ "epoch": 1.263762565820967,
4626
+ "grad_norm": 1.78125,
4627
+ "learning_rate": 6.538457659111084e-06,
4628
+ "loss": 1.7609,
4629
+ "step": 660
4630
+ },
4631
+ {
4632
+ "epoch": 1.2656773575873623,
4633
+ "grad_norm": 1.6953125,
4634
+ "learning_rate": 6.5087337109109e-06,
4635
+ "loss": 1.7036,
4636
+ "step": 661
4637
+ },
4638
+ {
4639
+ "epoch": 1.2675921493537579,
4640
+ "grad_norm": 1.6875,
4641
+ "learning_rate": 6.479044848806739e-06,
4642
+ "loss": 1.6546,
4643
+ "step": 662
4644
+ },
4645
+ {
4646
+ "epoch": 1.2695069411201532,
4647
+ "grad_norm": 1.7421875,
4648
+ "learning_rate": 6.4493913711620685e-06,
4649
+ "loss": 1.7018,
4650
+ "step": 663
4651
+ },
4652
+ {
4653
+ "epoch": 1.2714217328865485,
4654
+ "grad_norm": 1.703125,
4655
+ "learning_rate": 6.419773575984727e-06,
4656
+ "loss": 1.7357,
4657
+ "step": 664
4658
+ },
4659
+ {
4660
+ "epoch": 1.273336524652944,
4661
+ "grad_norm": 1.6875,
4662
+ "learning_rate": 6.390191760923978e-06,
4663
+ "loss": 1.6928,
4664
+ "step": 665
4665
+ },
4666
+ {
4667
+ "epoch": 1.2752513164193395,
4668
+ "grad_norm": 1.7734375,
4669
+ "learning_rate": 6.360646223267477e-06,
4670
+ "loss": 1.6623,
4671
+ "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 1.2771661081857348,
4675
+ "grad_norm": 1.84375,
4676
+ "learning_rate": 6.3311372599383245e-06,
4677
+ "loss": 1.6921,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 1.2790808999521301,
4682
+ "grad_norm": 1.78125,
4683
+ "learning_rate": 6.301665167492037e-06,
4684
+ "loss": 1.7036,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 1.2809956917185257,
4689
+ "grad_norm": 1.7578125,
4690
+ "learning_rate": 6.272230242113613e-06,
4691
+ "loss": 1.7099,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 1.282910483484921,
4696
+ "grad_norm": 1.6875,
4697
+ "learning_rate": 6.242832779614521e-06,
4698
+ "loss": 1.6826,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 1.2848252752513165,
4703
+ "grad_norm": 1.8046875,
4704
+ "learning_rate": 6.213473075429741e-06,
4705
+ "loss": 1.7058,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 1.2867400670177118,
4710
+ "grad_norm": 1.8515625,
4711
+ "learning_rate": 6.184151424614795e-06,
4712
+ "loss": 1.6677,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 1.2886548587841071,
4717
+ "grad_norm": 1.7578125,
4718
+ "learning_rate": 6.154868121842788e-06,
4719
+ "loss": 1.7125,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 1.2905696505505027,
4724
+ "grad_norm": 1.7734375,
4725
+ "learning_rate": 6.1256234614014256e-06,
4726
+ "loss": 1.6581,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 1.292484442316898,
4731
+ "grad_norm": 1.78125,
4732
+ "learning_rate": 6.096417737190085e-06,
4733
+ "loss": 1.7104,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 1.2943992340832935,
4738
+ "grad_norm": 1.671875,
4739
+ "learning_rate": 6.067251242716838e-06,
4740
+ "loss": 1.6612,
4741
+ "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 1.2963140258496888,
4745
+ "grad_norm": 1.75,
4746
+ "learning_rate": 6.038124271095507e-06,
4747
+ "loss": 1.6501,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 1.2982288176160843,
4752
+ "grad_norm": 1.734375,
4753
+ "learning_rate": 6.0090371150427375e-06,
4754
+ "loss": 1.7283,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 1.3001436093824796,
4759
+ "grad_norm": 1.7265625,
4760
+ "learning_rate": 5.979990066875022e-06,
4761
+ "loss": 1.699,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 1.3020584011488752,
4766
+ "grad_norm": 1.84375,
4767
+ "learning_rate": 5.950983418505799e-06,
4768
+ "loss": 1.7458,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 1.3039731929152705,
4773
+ "grad_norm": 1.703125,
4774
+ "learning_rate": 5.922017461442492e-06,
4775
+ "loss": 1.6889,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 1.3058879846816658,
4780
+ "grad_norm": 1.7265625,
4781
+ "learning_rate": 5.893092486783594e-06,
4782
+ "loss": 1.6935,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 1.3078027764480613,
4787
+ "grad_norm": 1.7109375,
4788
+ "learning_rate": 5.864208785215732e-06,
4789
+ "loss": 1.6641,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 1.3097175682144566,
4794
+ "grad_norm": 1.734375,
4795
+ "learning_rate": 5.835366647010767e-06,
4796
+ "loss": 1.7062,
4797
+ "step": 684
4798
+ },
4799
+ {
4800
+ "epoch": 1.3116323599808521,
4801
+ "grad_norm": 1.6796875,
4802
+ "learning_rate": 5.8065663620228404e-06,
4803
+ "loss": 1.7008,
4804
+ "step": 685
4805
+ },
4806
+ {
4807
+ "epoch": 1.3135471517472475,
4808
+ "grad_norm": 1.7265625,
4809
+ "learning_rate": 5.777808219685496e-06,
4810
+ "loss": 1.7002,
4811
+ "step": 686
4812
+ },
4813
+ {
4814
+ "epoch": 1.3154619435136428,
4815
+ "grad_norm": 1.7265625,
4816
+ "learning_rate": 5.749092509008761e-06,
4817
+ "loss": 1.6896,
4818
+ "step": 687
4819
+ },
4820
+ {
4821
+ "epoch": 1.3173767352800383,
4822
+ "grad_norm": 1.7578125,
4823
+ "learning_rate": 5.720419518576223e-06,
4824
+ "loss": 1.7014,
4825
+ "step": 688
4826
+ },
4827
+ {
4828
+ "epoch": 1.3192915270464338,
4829
+ "grad_norm": 1.75,
4830
+ "learning_rate": 5.691789536542161e-06,
4831
+ "loss": 1.6799,
4832
+ "step": 689
4833
+ },
4834
+ {
4835
+ "epoch": 1.3212063188128291,
4836
+ "grad_norm": 1.7265625,
4837
+ "learning_rate": 5.6632028506286266e-06,
4838
+ "loss": 1.6558,
4839
+ "step": 690
4840
+ },
4841
+ {
4842
+ "epoch": 1.3231211105792244,
4843
+ "grad_norm": 1.6640625,
4844
+ "learning_rate": 5.634659748122552e-06,
4845
+ "loss": 1.6286,
4846
+ "step": 691
4847
+ },
4848
+ {
4849
+ "epoch": 1.32503590234562,
4850
+ "grad_norm": 1.6796875,
4851
+ "learning_rate": 5.606160515872886e-06,
4852
+ "loss": 1.6983,
4853
+ "step": 692
4854
+ },
4855
+ {
4856
+ "epoch": 1.3269506941120153,
4857
+ "grad_norm": 1.8359375,
4858
+ "learning_rate": 5.57770544028768e-06,
4859
+ "loss": 1.7115,
4860
+ "step": 693
4861
+ },
4862
+ {
4863
+ "epoch": 1.3288654858784108,
4864
+ "grad_norm": 1.7265625,
4865
+ "learning_rate": 5.5492948073312406e-06,
4866
+ "loss": 1.719,
4867
+ "step": 694
4868
+ },
4869
+ {
4870
+ "epoch": 1.330780277644806,
4871
+ "grad_norm": 1.7421875,
4872
+ "learning_rate": 5.520928902521221e-06,
4873
+ "loss": 1.7074,
4874
+ "step": 695
4875
+ },
4876
+ {
4877
+ "epoch": 1.3326950694112014,
4878
+ "grad_norm": 1.65625,
4879
+ "learning_rate": 5.492608010925793e-06,
4880
+ "loss": 1.7135,
4881
+ "step": 696
4882
+ },
4883
+ {
4884
+ "epoch": 1.334609861177597,
4885
+ "grad_norm": 1.7109375,
4886
+ "learning_rate": 5.46433241716074e-06,
4887
+ "loss": 1.6395,
4888
+ "step": 697
4889
+ },
4890
+ {
4891
+ "epoch": 1.3365246529439925,
4892
+ "grad_norm": 1.90625,
4893
+ "learning_rate": 5.436102405386636e-06,
4894
+ "loss": 1.7543,
4895
+ "step": 698
4896
+ },
4897
+ {
4898
+ "epoch": 1.3384394447103878,
4899
+ "grad_norm": 1.78125,
4900
+ "learning_rate": 5.407918259305951e-06,
4901
+ "loss": 1.6431,
4902
+ "step": 699
4903
+ },
4904
+ {
4905
+ "epoch": 1.340354236476783,
4906
+ "grad_norm": 1.7109375,
4907
+ "learning_rate": 5.379780262160237e-06,
4908
+ "loss": 1.7222,
4909
+ "step": 700
4910
  }
4911
  ],
4912
  "logging_steps": 1,
 
4926
  "attributes": {}
4927
  }
4928
  },
4929
+ "total_flos": 4.2104340592931635e+17,
4930
  "train_batch_size": 10,
4931
  "trial_name": null,
4932
  "trial_params": null