File size: 25,579 Bytes
64834d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.970940170940171,
  "eval_steps": 500,
  "global_step": 219,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.06837606837606838,
      "grad_norm": 192.408203125,
      "learning_rate": 1.1363636363636365e-05,
      "log_odds_chosen": 0.022557739168405533,
      "log_odds_ratio": -0.775595486164093,
      "logps/chosen": -1.23601233959198,
      "logps/rejected": -1.2647087574005127,
      "loss": 64.8127,
      "nll_loss": 1.6612399816513062,
      "rewards/accuracies": 0.48124998807907104,
      "rewards/chosen": -0.61800616979599,
      "rewards/margins": 0.014348246157169342,
      "rewards/rejected": -0.6323543787002563,
      "step": 5
    },
    {
      "epoch": 0.13675213675213677,
      "grad_norm": 87.27720642089844,
      "learning_rate": 2.272727272727273e-05,
      "log_odds_chosen": 0.01737348921597004,
      "log_odds_ratio": -0.7335888743400574,
      "logps/chosen": -0.9425485730171204,
      "logps/rejected": -0.9615429043769836,
      "loss": 54.8539,
      "nll_loss": 1.3627849817276,
      "rewards/accuracies": 0.515625,
      "rewards/chosen": -0.4712742865085602,
      "rewards/margins": 0.009497147053480148,
      "rewards/rejected": -0.4807714521884918,
      "step": 10
    },
    {
      "epoch": 0.20512820512820512,
      "grad_norm": 50.64696502685547,
      "learning_rate": 3.409090909090909e-05,
      "log_odds_chosen": 0.15462855994701385,
      "log_odds_ratio": -0.6995453834533691,
      "logps/chosen": -0.799379289150238,
      "logps/rejected": -0.8898841738700867,
      "loss": 51.0896,
      "nll_loss": 1.258803367614746,
      "rewards/accuracies": 0.5375000238418579,
      "rewards/chosen": -0.399689644575119,
      "rewards/margins": 0.04525243118405342,
      "rewards/rejected": -0.44494208693504333,
      "step": 15
    },
    {
      "epoch": 0.27350427350427353,
      "grad_norm": 54.63002014160156,
      "learning_rate": 4.545454545454546e-05,
      "log_odds_chosen": 0.23047371208667755,
      "log_odds_ratio": -0.6775897741317749,
      "logps/chosen": -0.7368494272232056,
      "logps/rejected": -0.8401222229003906,
      "loss": 48.2524,
      "nll_loss": 1.1753368377685547,
      "rewards/accuracies": 0.5843750238418579,
      "rewards/chosen": -0.3684247136116028,
      "rewards/margins": 0.05163642019033432,
      "rewards/rejected": -0.4200611114501953,
      "step": 20
    },
    {
      "epoch": 0.3418803418803419,
      "grad_norm": 75.0562973022461,
      "learning_rate": 4.9971395327545466e-05,
      "log_odds_chosen": 0.16501149535179138,
      "log_odds_ratio": -0.6983749270439148,
      "logps/chosen": -0.7642364501953125,
      "logps/rejected": -0.8525689244270325,
      "loss": 47.8234,
      "nll_loss": 1.1575247049331665,
      "rewards/accuracies": 0.5531250238418579,
      "rewards/chosen": -0.38211822509765625,
      "rewards/margins": 0.04416622966527939,
      "rewards/rejected": -0.42628446221351624,
      "step": 25
    },
    {
      "epoch": 0.41025641025641024,
      "grad_norm": 50.43643569946289,
      "learning_rate": 4.979682598982912e-05,
      "log_odds_chosen": 0.28536584973335266,
      "log_odds_ratio": -0.6665671467781067,
      "logps/chosen": -0.7366295456886292,
      "logps/rejected": -0.8632861971855164,
      "loss": 46.5783,
      "nll_loss": 1.1268101930618286,
      "rewards/accuracies": 0.6000000238418579,
      "rewards/chosen": -0.3683147728443146,
      "rewards/margins": 0.0633283406496048,
      "rewards/rejected": -0.4316430985927582,
      "step": 30
    },
    {
      "epoch": 0.47863247863247865,
      "grad_norm": 44.052860260009766,
      "learning_rate": 4.9464686742003006e-05,
      "log_odds_chosen": 0.11640346050262451,
      "log_odds_ratio": -0.7353156208992004,
      "logps/chosen": -0.7775823473930359,
      "logps/rejected": -0.8460026979446411,
      "loss": 46.9736,
      "nll_loss": 1.1240406036376953,
      "rewards/accuracies": 0.5218750238418579,
      "rewards/chosen": -0.38879117369651794,
      "rewards/margins": 0.03421013802289963,
      "rewards/rejected": -0.42300134897232056,
      "step": 35
    },
    {
      "epoch": 0.5470085470085471,
      "grad_norm": 47.66566848754883,
      "learning_rate": 4.8977088142549285e-05,
      "log_odds_chosen": 0.22347350418567657,
      "log_odds_ratio": -0.6904687285423279,
      "logps/chosen": -0.7437100410461426,
      "logps/rejected": -0.8720178604125977,
      "loss": 45.6756,
      "nll_loss": 1.09463632106781,
      "rewards/accuracies": 0.5718749761581421,
      "rewards/chosen": -0.3718550205230713,
      "rewards/margins": 0.06415387988090515,
      "rewards/rejected": -0.43600893020629883,
      "step": 40
    },
    {
      "epoch": 0.6153846153846154,
      "grad_norm": 44.04689025878906,
      "learning_rate": 4.833712860686666e-05,
      "log_odds_chosen": 0.3655330538749695,
      "log_odds_ratio": -0.6214441657066345,
      "logps/chosen": -0.7377647161483765,
      "logps/rejected": -0.9412744641304016,
      "loss": 43.8278,
      "nll_loss": 1.0561692714691162,
      "rewards/accuracies": 0.6343749761581421,
      "rewards/chosen": -0.36888235807418823,
      "rewards/margins": 0.10175484418869019,
      "rewards/rejected": -0.4706372320652008,
      "step": 45
    },
    {
      "epoch": 0.6837606837606838,
      "grad_norm": 45.08797073364258,
      "learning_rate": 4.754887471857969e-05,
      "log_odds_chosen": 0.44758883118629456,
      "log_odds_ratio": -0.6216556429862976,
      "logps/chosen": -0.6983757615089417,
      "logps/rejected": -0.9704947471618652,
      "loss": 42.6963,
      "nll_loss": 1.0264930725097656,
      "rewards/accuracies": 0.6187499761581421,
      "rewards/chosen": -0.3491878807544708,
      "rewards/margins": 0.13605953752994537,
      "rewards/rejected": -0.4852473735809326,
      "step": 50
    },
    {
      "epoch": 0.7521367521367521,
      "grad_norm": 42.02785110473633,
      "learning_rate": 4.6617335388682556e-05,
      "log_odds_chosen": 0.3934350609779358,
      "log_odds_ratio": -0.6445830464363098,
      "logps/chosen": -0.7269451022148132,
      "logps/rejected": -0.9580278396606445,
      "loss": 43.8584,
      "nll_loss": 1.0543291568756104,
      "rewards/accuracies": 0.59375,
      "rewards/chosen": -0.3634725511074066,
      "rewards/margins": 0.11554142087697983,
      "rewards/rejected": -0.47901391983032227,
      "step": 55
    },
    {
      "epoch": 0.8205128205128205,
      "grad_norm": 40.826419830322266,
      "learning_rate": 4.554843002672129e-05,
      "log_odds_chosen": 0.5472866296768188,
      "log_odds_ratio": -0.6086785793304443,
      "logps/chosen": -0.7054045796394348,
      "logps/rejected": -1.0688399076461792,
      "loss": 43.2701,
      "nll_loss": 1.048005223274231,
      "rewards/accuracies": 0.637499988079071,
      "rewards/chosen": -0.3527022898197174,
      "rewards/margins": 0.1817176640033722,
      "rewards/rejected": -0.5344199538230896,
      "step": 60
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 39.6722297668457,
      "learning_rate": 4.434895092626883e-05,
      "log_odds_chosen": 0.5648136138916016,
      "log_odds_ratio": -0.6339768171310425,
      "logps/chosen": -0.7455043792724609,
      "logps/rejected": -1.1616737842559814,
      "loss": 44.6575,
      "nll_loss": 1.0870110988616943,
      "rewards/accuracies": 0.6031249761581421,
      "rewards/chosen": -0.37275218963623047,
      "rewards/margins": 0.20808465778827667,
      "rewards/rejected": -0.5808368921279907,
      "step": 65
    },
    {
      "epoch": 0.9572649572649573,
      "grad_norm": 39.50532913208008,
      "learning_rate": 4.302652010371205e-05,
      "log_odds_chosen": 0.5741121172904968,
      "log_odds_ratio": -0.6454743146896362,
      "logps/chosen": -0.7433961033821106,
      "logps/rejected": -1.129797339439392,
      "loss": 44.1232,
      "nll_loss": 1.0643303394317627,
      "rewards/accuracies": 0.6343749761581421,
      "rewards/chosen": -0.3716980516910553,
      "rewards/margins": 0.19320069253444672,
      "rewards/rejected": -0.564898669719696,
      "step": 70
    },
    {
      "epoch": 1.0136752136752136,
      "grad_norm": 55.45820236206055,
      "learning_rate": 4.1589540864616025e-05,
      "log_odds_chosen": 0.7051900625228882,
      "log_odds_ratio": -0.5978847742080688,
      "logps/chosen": -0.6891235113143921,
      "logps/rejected": -1.1345970630645752,
      "loss": 35.3591,
      "nll_loss": 1.039401650428772,
      "rewards/accuracies": 0.6174242496490479,
      "rewards/chosen": -0.34456175565719604,
      "rewards/margins": 0.22273679077625275,
      "rewards/rejected": -0.5672985315322876,
      "step": 75
    },
    {
      "epoch": 1.082051282051282,
      "grad_norm": 43.05059814453125,
      "learning_rate": 4.0047144405434175e-05,
      "log_odds_chosen": 0.9981684684753418,
      "log_odds_ratio": -0.4477527141571045,
      "logps/chosen": -0.561476469039917,
      "logps/rejected": -1.0778883695602417,
      "loss": 36.5516,
      "nll_loss": 0.8772686719894409,
      "rewards/accuracies": 0.800000011920929,
      "rewards/chosen": -0.2807382345199585,
      "rewards/margins": 0.25820592045783997,
      "rewards/rejected": -0.5389441847801208,
      "step": 80
    },
    {
      "epoch": 1.1504273504273503,
      "grad_norm": 38.42478942871094,
      "learning_rate": 3.84091317898803e-05,
      "log_odds_chosen": 1.1495044231414795,
      "log_odds_ratio": -0.4064292907714844,
      "logps/chosen": -0.5493744611740112,
      "logps/rejected": -1.1405811309814453,
      "loss": 35.8963,
      "nll_loss": 0.8741697072982788,
      "rewards/accuracies": 0.824999988079071,
      "rewards/chosen": -0.2746872305870056,
      "rewards/margins": 0.29560327529907227,
      "rewards/rejected": -0.5702905654907227,
      "step": 85
    },
    {
      "epoch": 1.218803418803419,
      "grad_norm": 44.85431671142578,
      "learning_rate": 3.668591166867035e-05,
      "log_odds_chosen": 1.2424638271331787,
      "log_odds_ratio": -0.3852007985115051,
      "logps/chosen": -0.5860949754714966,
      "logps/rejected": -1.2700952291488647,
      "loss": 34.9224,
      "nll_loss": 0.8511130213737488,
      "rewards/accuracies": 0.840624988079071,
      "rewards/chosen": -0.2930474877357483,
      "rewards/margins": 0.34200018644332886,
      "rewards/rejected": -0.6350476145744324,
      "step": 90
    },
    {
      "epoch": 1.287179487179487,
      "grad_norm": 44.10654067993164,
      "learning_rate": 3.488843413838963e-05,
      "log_odds_chosen": 1.3683379888534546,
      "log_odds_ratio": -0.3600567579269409,
      "logps/chosen": -0.5340205430984497,
      "logps/rejected": -1.239029884338379,
      "loss": 34.0177,
      "nll_loss": 0.8299106359481812,
      "rewards/accuracies": 0.878125011920929,
      "rewards/chosen": -0.26701027154922485,
      "rewards/margins": 0.35250476002693176,
      "rewards/rejected": -0.6195149421691895,
      "step": 95
    },
    {
      "epoch": 1.3555555555555556,
      "grad_norm": 38.597747802734375,
      "learning_rate": 3.3028121159775656e-05,
      "log_odds_chosen": 1.4798239469528198,
      "log_odds_ratio": -0.3565274178981781,
      "logps/chosen": -0.5620280504226685,
      "logps/rejected": -1.3859326839447021,
      "loss": 34.4413,
      "nll_loss": 0.8532856702804565,
      "rewards/accuracies": 0.8343750238418579,
      "rewards/chosen": -0.28101402521133423,
      "rewards/margins": 0.41195231676101685,
      "rewards/rejected": -0.6929663419723511,
      "step": 100
    },
    {
      "epoch": 1.423931623931624,
      "grad_norm": 51.27384948730469,
      "learning_rate": 3.111679397756906e-05,
      "log_odds_chosen": 1.3512942790985107,
      "log_odds_ratio": -0.3769288659095764,
      "logps/chosen": -0.6085891127586365,
      "logps/rejected": -1.3841993808746338,
      "loss": 34.4354,
      "nll_loss": 0.8462675213813782,
      "rewards/accuracies": 0.875,
      "rewards/chosen": -0.30429455637931824,
      "rewards/margins": 0.38780516386032104,
      "rewards/rejected": -0.6920996904373169,
      "step": 105
    },
    {
      "epoch": 1.4923076923076923,
      "grad_norm": 43.76097106933594,
      "learning_rate": 2.9166598003138766e-05,
      "log_odds_chosen": 1.6075998544692993,
      "log_odds_ratio": -0.3306867480278015,
      "logps/chosen": -0.5251437425613403,
      "logps/rejected": -1.3855717182159424,
      "loss": 33.6202,
      "nll_loss": 0.8343909978866577,
      "rewards/accuracies": 0.893750011920929,
      "rewards/chosen": -0.26257187128067017,
      "rewards/margins": 0.43021392822265625,
      "rewards/rejected": -0.6927858591079712,
      "step": 110
    },
    {
      "epoch": 1.5606837606837607,
      "grad_norm": 47.68281555175781,
      "learning_rate": 2.7189925637210323e-05,
      "log_odds_chosen": 1.4052133560180664,
      "log_odds_ratio": -0.36428430676460266,
      "logps/chosen": -0.5953341722488403,
      "logps/rejected": -1.3725194931030273,
      "loss": 34.6962,
      "nll_loss": 0.8555153012275696,
      "rewards/accuracies": 0.856249988079071,
      "rewards/chosen": -0.29766708612442017,
      "rewards/margins": 0.3885926604270935,
      "rewards/rejected": -0.6862597465515137,
      "step": 115
    },
    {
      "epoch": 1.629059829059829,
      "grad_norm": 44.739234924316406,
      "learning_rate": 2.5199337523115418e-05,
      "log_odds_chosen": 1.314573884010315,
      "log_odds_ratio": -0.3843991756439209,
      "logps/chosen": -0.5602730512619019,
      "logps/rejected": -1.2206534147262573,
      "loss": 34.2171,
      "nll_loss": 0.8327986001968384,
      "rewards/accuracies": 0.8500000238418579,
      "rewards/chosen": -0.2801365256309509,
      "rewards/margins": 0.3301902115345001,
      "rewards/rejected": -0.6103267073631287,
      "step": 120
    },
    {
      "epoch": 1.6974358974358974,
      "grad_norm": 48.93033218383789,
      "learning_rate": 2.3207482730954063e-05,
      "log_odds_chosen": 1.6598823070526123,
      "log_odds_ratio": -0.31844857335090637,
      "logps/chosen": -0.5364278554916382,
      "logps/rejected": -1.481249213218689,
      "loss": 33.4162,
      "nll_loss": 0.8371628522872925,
      "rewards/accuracies": 0.8999999761581421,
      "rewards/chosen": -0.2682139277458191,
      "rewards/margins": 0.4724105894565582,
      "rewards/rejected": -0.7406246066093445,
      "step": 125
    },
    {
      "epoch": 1.7658119658119658,
      "grad_norm": 47.49737548828125,
      "learning_rate": 2.1227018379854383e-05,
      "log_odds_chosen": 1.7132043838500977,
      "log_odds_ratio": -0.309685617685318,
      "logps/chosen": -0.5513170957565308,
      "logps/rejected": -1.5208324193954468,
      "loss": 33.881,
      "nll_loss": 0.8536204099655151,
      "rewards/accuracies": 0.8999999761581421,
      "rewards/chosen": -0.2756585478782654,
      "rewards/margins": 0.4847577214241028,
      "rewards/rejected": -0.7604162096977234,
      "step": 130
    },
    {
      "epoch": 1.8341880341880343,
      "grad_norm": 53.4583740234375,
      "learning_rate": 1.927052920908528e-05,
      "log_odds_chosen": 1.623297929763794,
      "log_odds_ratio": -0.3428027033805847,
      "logps/chosen": -0.5809999704360962,
      "logps/rejected": -1.4746825695037842,
      "loss": 35.7338,
      "nll_loss": 0.8969090580940247,
      "rewards/accuracies": 0.862500011920929,
      "rewards/chosen": -0.2904999852180481,
      "rewards/margins": 0.4468413293361664,
      "rewards/rejected": -0.7373412847518921,
      "step": 135
    },
    {
      "epoch": 1.9025641025641025,
      "grad_norm": 44.477012634277344,
      "learning_rate": 1.735044760910251e-05,
      "log_odds_chosen": 1.4694030284881592,
      "log_odds_ratio": -0.36373740434646606,
      "logps/chosen": -0.5533854365348816,
      "logps/rejected": -1.3526192903518677,
      "loss": 33.8163,
      "nll_loss": 0.8294602632522583,
      "rewards/accuracies": 0.8500000238418579,
      "rewards/chosen": -0.2766927182674408,
      "rewards/margins": 0.39961689710617065,
      "rewards/rejected": -0.6763096451759338,
      "step": 140
    },
    {
      "epoch": 1.970940170940171,
      "grad_norm": 62.9003791809082,
      "learning_rate": 1.547897462068592e-05,
      "log_odds_chosen": 1.5487511157989502,
      "log_odds_ratio": -0.33681467175483704,
      "logps/chosen": -0.5517206192016602,
      "logps/rejected": -1.4492119550704956,
      "loss": 34.3693,
      "nll_loss": 0.8605127334594727,
      "rewards/accuracies": 0.8687499761581421,
      "rewards/chosen": -0.2758603096008301,
      "rewards/margins": 0.4487456679344177,
      "rewards/rejected": -0.7246059775352478,
      "step": 145
    },
    {
      "epoch": 2.0273504273504273,
      "grad_norm": 42.154563903808594,
      "learning_rate": 1.3668002404174047e-05,
      "log_odds_chosen": 2.2226734161376953,
      "log_odds_ratio": -0.2391778826713562,
      "logps/chosen": -0.4616817235946655,
      "logps/rejected": -1.7343417406082153,
      "loss": 23.7203,
      "nll_loss": 0.7272225618362427,
      "rewards/accuracies": 0.9280303120613098,
      "rewards/chosen": -0.23084086179733276,
      "rewards/margins": 0.6363300085067749,
      "rewards/rejected": -0.8671708703041077,
      "step": 150
    },
    {
      "epoch": 2.095726495726496,
      "grad_norm": 80.82354736328125,
      "learning_rate": 1.1929038671460486e-05,
      "log_odds_chosen": 3.258676052093506,
      "log_odds_ratio": -0.14076226949691772,
      "logps/chosen": -0.4106404781341553,
      "logps/rejected": -2.355543375015259,
      "loss": 26.2695,
      "nll_loss": 0.7021722197532654,
      "rewards/accuracies": 0.9750000238418579,
      "rewards/chosen": -0.20532023906707764,
      "rewards/margins": 0.972451388835907,
      "rewards/rejected": -1.1777716875076294,
      "step": 155
    },
    {
      "epoch": 2.164102564102564,
      "grad_norm": 49.468326568603516,
      "learning_rate": 1.027313356094443e-05,
      "log_odds_chosen": 3.117168426513672,
      "log_odds_ratio": -0.13647082448005676,
      "logps/chosen": -0.4620683789253235,
      "logps/rejected": -2.3730220794677734,
      "loss": 26.6823,
      "nll_loss": 0.7180877327919006,
      "rewards/accuracies": 0.9906250238418579,
      "rewards/chosen": -0.23103418946266174,
      "rewards/margins": 0.9554769396781921,
      "rewards/rejected": -1.1865110397338867,
      "step": 160
    },
    {
      "epoch": 2.2324786324786325,
      "grad_norm": 48.83137893676758,
      "learning_rate": 8.710809420103789e-06,
      "log_odds_chosen": 3.0069823265075684,
      "log_odds_ratio": -0.1508590579032898,
      "logps/chosen": -0.3987279534339905,
      "logps/rejected": -2.156508684158325,
      "loss": 25.979,
      "nll_loss": 0.6839076280593872,
      "rewards/accuracies": 0.984375,
      "rewards/chosen": -0.19936397671699524,
      "rewards/margins": 0.8788902163505554,
      "rewards/rejected": -1.0782543420791626,
      "step": 165
    },
    {
      "epoch": 2.3008547008547007,
      "grad_norm": 59.909706115722656,
      "learning_rate": 7.251993941883428e-06,
      "log_odds_chosen": 3.3163504600524902,
      "log_odds_ratio": -0.13661204278469086,
      "logps/chosen": -0.40769442915916443,
      "logps/rejected": -2.3925185203552246,
      "loss": 24.6734,
      "nll_loss": 0.6533316373825073,
      "rewards/accuracies": 0.96875,
      "rewards/chosen": -0.20384721457958221,
      "rewards/margins": 0.9924120903015137,
      "rewards/rejected": -1.1962592601776123,
      "step": 170
    },
    {
      "epoch": 2.3692307692307693,
      "grad_norm": 52.876243591308594,
      "learning_rate": 5.905957079779187e-06,
      "log_odds_chosen": 3.2027995586395264,
      "log_odds_ratio": -0.14145739376544952,
      "logps/chosen": -0.4457460343837738,
      "logps/rejected": -2.37770414352417,
      "loss": 25.9488,
      "nll_loss": 0.6930140256881714,
      "rewards/accuracies": 0.96875,
      "rewards/chosen": -0.2228730171918869,
      "rewards/margins": 0.9659790992736816,
      "rewards/rejected": -1.188852071762085,
      "step": 175
    },
    {
      "epoch": 2.437606837606838,
      "grad_norm": 58.3582763671875,
      "learning_rate": 4.681252142486841e-06,
      "log_odds_chosen": 3.392512559890747,
      "log_odds_ratio": -0.13067595660686493,
      "logps/chosen": -0.4224206507205963,
      "logps/rejected": -2.505432605743408,
      "loss": 24.921,
      "nll_loss": 0.6670365333557129,
      "rewards/accuracies": 0.9781249761581421,
      "rewards/chosen": -0.21121032536029816,
      "rewards/margins": 1.0415061712265015,
      "rewards/rejected": -1.252716302871704,
      "step": 180
    },
    {
      "epoch": 2.505982905982906,
      "grad_norm": 72.88838195800781,
      "learning_rate": 3.585661442426494e-06,
      "log_odds_chosen": 3.295175552368164,
      "log_odds_ratio": -0.14244017004966736,
      "logps/chosen": -0.46335524320602417,
      "logps/rejected": -2.5462584495544434,
      "loss": 27.0982,
      "nll_loss": 0.7326253056526184,
      "rewards/accuracies": 0.981249988079071,
      "rewards/chosen": -0.23167762160301208,
      "rewards/margins": 1.0414518117904663,
      "rewards/rejected": -1.2731292247772217,
      "step": 185
    },
    {
      "epoch": 2.574358974358974,
      "grad_norm": 53.10452651977539,
      "learning_rate": 2.6261468435155978e-06,
      "log_odds_chosen": 3.560279369354248,
      "log_odds_ratio": -0.1270817220211029,
      "logps/chosen": -0.3945046365261078,
      "logps/rejected": -2.5356345176696777,
      "loss": 24.711,
      "nll_loss": 0.6610320806503296,
      "rewards/accuracies": 0.984375,
      "rewards/chosen": -0.1972523182630539,
      "rewards/margins": 1.070564866065979,
      "rewards/rejected": -1.2678172588348389,
      "step": 190
    },
    {
      "epoch": 2.6427350427350427,
      "grad_norm": 60.35097885131836,
      "learning_rate": 1.8088055224315697e-06,
      "log_odds_chosen": 3.335855007171631,
      "log_odds_ratio": -0.1391274482011795,
      "logps/chosen": -0.4181729853153229,
      "logps/rejected": -2.4335877895355225,
      "loss": 25.7787,
      "nll_loss": 0.6885385513305664,
      "rewards/accuracies": 0.984375,
      "rewards/chosen": -0.20908649265766144,
      "rewards/margins": 1.0077073574066162,
      "rewards/rejected": -1.2167938947677612,
      "step": 195
    },
    {
      "epoch": 2.7111111111111112,
      "grad_norm": 78.32765197753906,
      "learning_rate": 1.138831224476533e-06,
      "log_odds_chosen": 3.4376022815704346,
      "log_odds_ratio": -0.1307111382484436,
      "logps/chosen": -0.44153517484664917,
      "logps/rejected": -2.567284107208252,
      "loss": 25.5218,
      "nll_loss": 0.6872699856758118,
      "rewards/accuracies": 0.984375,
      "rewards/chosen": -0.22076758742332458,
      "rewards/margins": 1.062874436378479,
      "rewards/rejected": -1.283642053604126,
      "step": 200
    },
    {
      "epoch": 2.7794871794871794,
      "grad_norm": 72.06491088867188,
      "learning_rate": 6.204812602412902e-07,
      "log_odds_chosen": 3.316706895828247,
      "log_odds_ratio": -0.12540897727012634,
      "logps/chosen": -0.4180404543876648,
      "logps/rejected": -2.4521901607513428,
      "loss": 25.071,
      "nll_loss": 0.6733669638633728,
      "rewards/accuracies": 0.981249988079071,
      "rewards/chosen": -0.2090202271938324,
      "rewards/margins": 1.0170748233795166,
      "rewards/rejected": -1.2260950803756714,
      "step": 205
    },
    {
      "epoch": 2.847863247863248,
      "grad_norm": 59.340885162353516,
      "learning_rate": 2.5704945278623436e-07,
      "log_odds_chosen": 3.219451904296875,
      "log_odds_ratio": -0.13932213187217712,
      "logps/chosen": -0.44474905729293823,
      "logps/rejected": -2.4706287384033203,
      "loss": 25.9818,
      "nll_loss": 0.6953645944595337,
      "rewards/accuracies": 0.9906250238418579,
      "rewards/chosen": -0.22237452864646912,
      "rewards/margins": 1.012939691543579,
      "rewards/rejected": -1.2353143692016602,
      "step": 210
    },
    {
      "epoch": 2.916239316239316,
      "grad_norm": 90.88463592529297,
      "learning_rate": 5.0845207244715196e-08,
      "log_odds_chosen": 3.495487689971924,
      "log_odds_ratio": -0.12796048820018768,
      "logps/chosen": -0.4196755290031433,
      "logps/rejected": -2.5507898330688477,
      "loss": 24.7788,
      "nll_loss": 0.6663211584091187,
      "rewards/accuracies": 0.9750000238418579,
      "rewards/chosen": -0.20983776450157166,
      "rewards/margins": 1.0655572414398193,
      "rewards/rejected": -1.2753949165344238,
      "step": 215
    },
    {
      "epoch": 2.970940170940171,
      "step": 219,
      "total_flos": 0.0,
      "train_loss": 35.7124394107627,
      "train_runtime": 3905.4181,
      "train_samples_per_second": 3.594,
      "train_steps_per_second": 0.056
    }
  ],
  "logging_steps": 5,
  "max_steps": 219,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}