|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3356, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.488095238095238e-09, |
|
"logits/chosen": -2.6795692443847656, |
|
"logits/rejected": -2.624149799346924, |
|
"logps/chosen": -54.570396423339844, |
|
"logps/rejected": -74.21392822265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4880952380952379e-08, |
|
"logits/chosen": -2.7060725688934326, |
|
"logits/rejected": -2.6765432357788086, |
|
"logps/chosen": -95.24983978271484, |
|
"logps/rejected": -91.18234252929688, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.0005662046023644507, |
|
"rewards/margins": -0.006994906347244978, |
|
"rewards/rejected": 0.007561111822724342, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9761904761904758e-08, |
|
"logits/chosen": -2.5795836448669434, |
|
"logits/rejected": -2.592409133911133, |
|
"logps/chosen": -124.33586120605469, |
|
"logps/rejected": -103.54573822021484, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0065773227252066135, |
|
"rewards/margins": -0.0029559016693383455, |
|
"rewards/rejected": 0.009533221833407879, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.4642857142857145e-08, |
|
"logits/chosen": -2.579939126968384, |
|
"logits/rejected": -2.5497870445251465, |
|
"logps/chosen": -68.13322448730469, |
|
"logps/rejected": -66.37541961669922, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.01673651486635208, |
|
"rewards/margins": 0.00222357758320868, |
|
"rewards/rejected": 0.01451293658465147, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.9523809523809515e-08, |
|
"logits/chosen": -2.6564245223999023, |
|
"logits/rejected": -2.608503818511963, |
|
"logps/chosen": -83.7612533569336, |
|
"logps/rejected": -79.3699951171875, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.050556618720293045, |
|
"rewards/margins": 0.005645673722028732, |
|
"rewards/rejected": 0.04491094499826431, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.44047619047619e-08, |
|
"logits/chosen": -2.752234935760498, |
|
"logits/rejected": -2.6355555057525635, |
|
"logps/chosen": -127.2625503540039, |
|
"logps/rejected": -114.26876068115234, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.09844812005758286, |
|
"rewards/margins": 0.0011480912799015641, |
|
"rewards/rejected": 0.09730003774166107, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.928571428571429e-08, |
|
"logits/chosen": -2.669374704360962, |
|
"logits/rejected": -2.652597188949585, |
|
"logps/chosen": -103.32049560546875, |
|
"logps/rejected": -105.29325103759766, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.19618754088878632, |
|
"rewards/margins": 0.012378268875181675, |
|
"rewards/rejected": 0.18380926549434662, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.7530548572540283, |
|
"logits/rejected": -2.680541753768921, |
|
"logps/chosen": -84.53085327148438, |
|
"logps/rejected": -84.82635498046875, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.33352726697921753, |
|
"rewards/margins": 0.020224859938025475, |
|
"rewards/rejected": 0.3133023679256439, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1904761904761903e-07, |
|
"logits/chosen": -2.572601079940796, |
|
"logits/rejected": -2.5415000915527344, |
|
"logps/chosen": -96.4114761352539, |
|
"logps/rejected": -84.30821228027344, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4342936873435974, |
|
"rewards/margins": 0.0613841637969017, |
|
"rewards/rejected": 0.3729095458984375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3392857142857142e-07, |
|
"logits/chosen": -2.7009196281433105, |
|
"logits/rejected": -2.698122262954712, |
|
"logps/chosen": -78.68132781982422, |
|
"logps/rejected": -81.79669189453125, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5974748730659485, |
|
"rewards/margins": 0.08051940053701401, |
|
"rewards/rejected": 0.5169554948806763, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.488095238095238e-07, |
|
"logits/chosen": -2.5833797454833984, |
|
"logits/rejected": -2.624276876449585, |
|
"logps/chosen": -77.67559814453125, |
|
"logps/rejected": -90.95040130615234, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5890167355537415, |
|
"rewards/margins": 0.06459061056375504, |
|
"rewards/rejected": 0.5244261026382446, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6369047619047617e-07, |
|
"logits/chosen": -2.5440800189971924, |
|
"logits/rejected": -2.536761522293091, |
|
"logps/chosen": -79.65280151367188, |
|
"logps/rejected": -77.1148681640625, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.48702484369277954, |
|
"rewards/margins": 0.02558879181742668, |
|
"rewards/rejected": 0.46143603324890137, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7857142857142858e-07, |
|
"logits/chosen": -2.59000301361084, |
|
"logits/rejected": -2.6294052600860596, |
|
"logps/chosen": -98.95535278320312, |
|
"logps/rejected": -93.15876770019531, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.44851523637771606, |
|
"rewards/margins": 0.04791822284460068, |
|
"rewards/rejected": 0.4005970060825348, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9345238095238096e-07, |
|
"logits/chosen": -2.5660836696624756, |
|
"logits/rejected": -2.532435894012451, |
|
"logps/chosen": -81.32213592529297, |
|
"logps/rejected": -86.37200927734375, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6584704518318176, |
|
"rewards/margins": 0.1672821044921875, |
|
"rewards/rejected": 0.4911883771419525, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.657209873199463, |
|
"logits/rejected": -2.620845079421997, |
|
"logps/chosen": -98.81898498535156, |
|
"logps/rejected": -91.02754974365234, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8377985954284668, |
|
"rewards/margins": 0.049154218286275864, |
|
"rewards/rejected": 0.7886443138122559, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.232142857142857e-07, |
|
"logits/chosen": -2.594756603240967, |
|
"logits/rejected": -2.5098514556884766, |
|
"logps/chosen": -108.9326171875, |
|
"logps/rejected": -124.50955963134766, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.6377179622650146, |
|
"rewards/margins": 0.7904380559921265, |
|
"rewards/rejected": -0.15272006392478943, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.5515310764312744, |
|
"logits/rejected": -2.4522361755371094, |
|
"logps/chosen": -90.93934631347656, |
|
"logps/rejected": -106.53071594238281, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8264306783676147, |
|
"rewards/margins": 0.5799387097358704, |
|
"rewards/rejected": 0.246491938829422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5297619047619046e-07, |
|
"logits/chosen": -2.511021137237549, |
|
"logits/rejected": -2.5456349849700928, |
|
"logps/chosen": -91.14982604980469, |
|
"logps/rejected": -99.70429992675781, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7046107649803162, |
|
"rewards/margins": 0.36221450567245483, |
|
"rewards/rejected": 0.34239625930786133, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6785714285714284e-07, |
|
"logits/chosen": -2.520282030105591, |
|
"logits/rejected": -2.503950595855713, |
|
"logps/chosen": -79.16224670410156, |
|
"logps/rejected": -89.08283233642578, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.8210590481758118, |
|
"rewards/margins": 0.16991613805294037, |
|
"rewards/rejected": 0.651142954826355, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.827380952380952e-07, |
|
"logits/chosen": -2.6823697090148926, |
|
"logits/rejected": -2.633678674697876, |
|
"logps/chosen": -104.0126724243164, |
|
"logps/rejected": -103.51971435546875, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1408202648162842, |
|
"rewards/margins": 0.2863886058330536, |
|
"rewards/rejected": 0.8544318079948425, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.976190476190476e-07, |
|
"logits/chosen": -2.530428409576416, |
|
"logits/rejected": -2.50227689743042, |
|
"logps/chosen": -100.63572692871094, |
|
"logps/rejected": -94.46806335449219, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.7896903157234192, |
|
"rewards/margins": 0.45959681272506714, |
|
"rewards/rejected": 0.33009350299835205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.4940271377563477, |
|
"logits/rejected": -2.5085806846618652, |
|
"logps/chosen": -92.1917724609375, |
|
"logps/rejected": -107.3184585571289, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7482628226280212, |
|
"rewards/margins": 0.49973025918006897, |
|
"rewards/rejected": 0.24853253364562988, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.2738095238095235e-07, |
|
"logits/chosen": -2.5470972061157227, |
|
"logits/rejected": -2.5241191387176514, |
|
"logps/chosen": -113.54488372802734, |
|
"logps/rejected": -129.91867065429688, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4822530746459961, |
|
"rewards/margins": 0.5863619446754456, |
|
"rewards/rejected": -0.10410883277654648, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4226190476190473e-07, |
|
"logits/chosen": -2.5854454040527344, |
|
"logits/rejected": -2.427126169204712, |
|
"logps/chosen": -95.35980987548828, |
|
"logps/rejected": -81.82037353515625, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8959482908248901, |
|
"rewards/margins": 0.8998041152954102, |
|
"rewards/rejected": -0.0038558482192456722, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": -2.5749735832214355, |
|
"logits/rejected": -2.58799409866333, |
|
"logps/chosen": -76.01658630371094, |
|
"logps/rejected": -77.50577545166016, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.28254395723342896, |
|
"rewards/margins": 0.4179397523403168, |
|
"rewards/rejected": -0.13539579510688782, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7202380952380953e-07, |
|
"logits/chosen": -2.655733823776245, |
|
"logits/rejected": -2.6001226902008057, |
|
"logps/chosen": -112.2961654663086, |
|
"logps/rejected": -124.30081939697266, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.081606388092041, |
|
"rewards/margins": 0.3873857855796814, |
|
"rewards/rejected": 0.6942206025123596, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.869047619047619e-07, |
|
"logits/chosen": -2.3797781467437744, |
|
"logits/rejected": -2.3257176876068115, |
|
"logps/chosen": -100.49422454833984, |
|
"logps/rejected": -116.31571197509766, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.26938995718955994, |
|
"rewards/margins": 0.5422745943069458, |
|
"rewards/rejected": -0.27288463711738586, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.017857142857143e-07, |
|
"logits/chosen": -2.506838321685791, |
|
"logits/rejected": -2.5618858337402344, |
|
"logps/chosen": -103.68598937988281, |
|
"logps/rejected": -116.80242919921875, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9570896029472351, |
|
"rewards/margins": 0.3445149064064026, |
|
"rewards/rejected": 0.6125746965408325, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.5634925365448, |
|
"logits/rejected": -2.520244836807251, |
|
"logps/chosen": -102.6960678100586, |
|
"logps/rejected": -90.80632019042969, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.0112148523330688, |
|
"rewards/margins": 0.3889988362789154, |
|
"rewards/rejected": 0.6222161054611206, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3154761904761904e-07, |
|
"logits/chosen": -2.569206714630127, |
|
"logits/rejected": -2.5652623176574707, |
|
"logps/chosen": -85.24828338623047, |
|
"logps/rejected": -93.45872497558594, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3659771978855133, |
|
"rewards/margins": 0.7742798924446106, |
|
"rewards/rejected": -0.4083026945590973, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.464285714285714e-07, |
|
"logits/chosen": -2.363185167312622, |
|
"logits/rejected": -2.371516227722168, |
|
"logps/chosen": -99.2336654663086, |
|
"logps/rejected": -92.32693481445312, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7069708704948425, |
|
"rewards/margins": 0.8502944111824036, |
|
"rewards/rejected": -0.1433234065771103, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.613095238095238e-07, |
|
"logits/chosen": -2.412259578704834, |
|
"logits/rejected": -2.4086456298828125, |
|
"logps/chosen": -96.43733978271484, |
|
"logps/rejected": -120.0870590209961, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.19554999470710754, |
|
"rewards/margins": 1.321014404296875, |
|
"rewards/rejected": -1.1254642009735107, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.6165080070495605, |
|
"logits/rejected": -2.6191306114196777, |
|
"logps/chosen": -117.46064758300781, |
|
"logps/rejected": -122.75732421875, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7428444623947144, |
|
"rewards/margins": 0.3669503331184387, |
|
"rewards/rejected": -1.1097948551177979, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.910714285714285e-07, |
|
"logits/chosen": -2.493110179901123, |
|
"logits/rejected": -2.4452643394470215, |
|
"logps/chosen": -91.34004211425781, |
|
"logps/rejected": -103.17684173583984, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6157582402229309, |
|
"rewards/margins": 0.7595478892326355, |
|
"rewards/rejected": -1.375306248664856, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.993377483443708e-07, |
|
"logits/chosen": -2.547645092010498, |
|
"logits/rejected": -2.4399895668029785, |
|
"logps/chosen": -106.4365005493164, |
|
"logps/rejected": -109.07222747802734, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.36611366271972656, |
|
"rewards/margins": 0.6897183060646057, |
|
"rewards/rejected": -1.0558319091796875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.97682119205298e-07, |
|
"logits/chosen": -2.5453834533691406, |
|
"logits/rejected": -2.5119881629943848, |
|
"logps/chosen": -108.45722961425781, |
|
"logps/rejected": -105.61241149902344, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.21297264099121094, |
|
"rewards/margins": 0.35165560245513916, |
|
"rewards/rejected": -0.13868291676044464, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.960264900662251e-07, |
|
"logits/chosen": -2.568861484527588, |
|
"logits/rejected": -2.552140712738037, |
|
"logps/chosen": -99.7040786743164, |
|
"logps/rejected": -109.383544921875, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2864856421947479, |
|
"rewards/margins": 0.8699267506599426, |
|
"rewards/rejected": -1.1564123630523682, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.943708609271523e-07, |
|
"logits/chosen": -2.584989070892334, |
|
"logits/rejected": -2.524940013885498, |
|
"logps/chosen": -116.22591400146484, |
|
"logps/rejected": -132.27352905273438, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3508208990097046, |
|
"rewards/margins": 0.757738471031189, |
|
"rewards/rejected": -1.1085593700408936, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.927152317880794e-07, |
|
"logits/chosen": -2.5064499378204346, |
|
"logits/rejected": -2.520719528198242, |
|
"logps/chosen": -105.9725570678711, |
|
"logps/rejected": -106.05126953125, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.491422414779663, |
|
"rewards/margins": 0.45032089948654175, |
|
"rewards/rejected": -1.9417431354522705, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.910596026490066e-07, |
|
"logits/chosen": -2.4913430213928223, |
|
"logits/rejected": -2.5125203132629395, |
|
"logps/chosen": -124.0137710571289, |
|
"logps/rejected": -119.0078353881836, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6193113327026367, |
|
"rewards/margins": 0.656644880771637, |
|
"rewards/rejected": -2.275956392288208, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.894039735099338e-07, |
|
"logits/chosen": -2.5196266174316406, |
|
"logits/rejected": -2.492640256881714, |
|
"logps/chosen": -108.40077209472656, |
|
"logps/rejected": -106.96036529541016, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8650729060173035, |
|
"rewards/margins": 0.4100722372531891, |
|
"rewards/rejected": -1.275145173072815, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.877483443708609e-07, |
|
"logits/chosen": -2.3113367557525635, |
|
"logits/rejected": -2.363025426864624, |
|
"logps/chosen": -108.32320404052734, |
|
"logps/rejected": -96.14768981933594, |
|
"loss": 1.0008, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.168811559677124, |
|
"rewards/margins": -1.5380103588104248, |
|
"rewards/rejected": -0.6308012008666992, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.860927152317881e-07, |
|
"logits/chosen": -2.2521350383758545, |
|
"logits/rejected": -2.2686538696289062, |
|
"logps/chosen": -78.05595397949219, |
|
"logps/rejected": -93.2776107788086, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5212607979774475, |
|
"rewards/margins": 0.686114490032196, |
|
"rewards/rejected": -1.207375407218933, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.844370860927152e-07, |
|
"logits/chosen": -2.2812628746032715, |
|
"logits/rejected": -2.29258394241333, |
|
"logps/chosen": -128.2143096923828, |
|
"logps/rejected": -135.92117309570312, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.4639101028442383, |
|
"rewards/margins": 1.0309460163116455, |
|
"rewards/rejected": -3.494856595993042, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.827814569536423e-07, |
|
"logits/chosen": -2.3497612476348877, |
|
"logits/rejected": -2.259904384613037, |
|
"logps/chosen": -126.2747802734375, |
|
"logps/rejected": -132.0948944091797, |
|
"loss": 0.5087, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5596282482147217, |
|
"rewards/margins": 1.1223429441452026, |
|
"rewards/rejected": -2.681971549987793, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.811258278145695e-07, |
|
"logits/chosen": -2.3696093559265137, |
|
"logits/rejected": -2.355694055557251, |
|
"logps/chosen": -113.27628326416016, |
|
"logps/rejected": -120.5525131225586, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5899262428283691, |
|
"rewards/margins": 0.7907289266586304, |
|
"rewards/rejected": -1.380655288696289, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.794701986754966e-07, |
|
"logits/chosen": -2.4090988636016846, |
|
"logits/rejected": -2.4314303398132324, |
|
"logps/chosen": -119.7711410522461, |
|
"logps/rejected": -138.52122497558594, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2273411750793457, |
|
"rewards/margins": 1.0350992679595947, |
|
"rewards/rejected": -2.2624402046203613, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.778145695364238e-07, |
|
"logits/chosen": -2.414658546447754, |
|
"logits/rejected": -2.4013447761535645, |
|
"logps/chosen": -101.0434799194336, |
|
"logps/rejected": -102.90351867675781, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9459658861160278, |
|
"rewards/margins": 0.6103629469871521, |
|
"rewards/rejected": -1.5563287734985352, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76158940397351e-07, |
|
"logits/chosen": -2.42374849319458, |
|
"logits/rejected": -2.4381699562072754, |
|
"logps/chosen": -113.9575424194336, |
|
"logps/rejected": -121.03520202636719, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7829158902168274, |
|
"rewards/margins": 1.2523859739303589, |
|
"rewards/rejected": -2.035301923751831, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7450331125827815e-07, |
|
"logits/chosen": -2.4486849308013916, |
|
"logits/rejected": -2.4538803100585938, |
|
"logps/chosen": -97.44860076904297, |
|
"logps/rejected": -100.29484558105469, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2252352237701416, |
|
"rewards/margins": 0.4965124726295471, |
|
"rewards/rejected": -0.7217476963996887, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.728476821192053e-07, |
|
"logits/chosen": -2.4106860160827637, |
|
"logits/rejected": -2.477334499359131, |
|
"logps/chosen": -87.63328552246094, |
|
"logps/rejected": -96.80977630615234, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.022742483764886856, |
|
"rewards/margins": 0.6748077273368835, |
|
"rewards/rejected": -0.6520652174949646, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -2.339754343032837, |
|
"eval_logits/rejected": -2.2989299297332764, |
|
"eval_logps/chosen": -104.51243591308594, |
|
"eval_logps/rejected": -112.7801513671875, |
|
"eval_loss": 0.6348409652709961, |
|
"eval_rewards/accuracies": 0.7120535969734192, |
|
"eval_rewards/chosen": -0.46458080410957336, |
|
"eval_rewards/margins": 1.1086541414260864, |
|
"eval_rewards/rejected": -1.5732349157333374, |
|
"eval_runtime": 528.3305, |
|
"eval_samples_per_second": 3.38, |
|
"eval_steps_per_second": 0.106, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7119205298013243e-07, |
|
"logits/chosen": -2.3279285430908203, |
|
"logits/rejected": -2.2698190212249756, |
|
"logps/chosen": -91.65203094482422, |
|
"logps/rejected": -111.75373840332031, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5981258153915405, |
|
"rewards/margins": 1.0291321277618408, |
|
"rewards/rejected": -1.6272579431533813, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.6953642384105957e-07, |
|
"logits/chosen": -2.432509183883667, |
|
"logits/rejected": -2.4649786949157715, |
|
"logps/chosen": -113.98470306396484, |
|
"logps/rejected": -131.01609802246094, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3087894320487976, |
|
"rewards/margins": 1.156360387802124, |
|
"rewards/rejected": -1.4651498794555664, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.678807947019867e-07, |
|
"logits/chosen": -2.4623587131500244, |
|
"logits/rejected": -2.406970500946045, |
|
"logps/chosen": -111.83284759521484, |
|
"logps/rejected": -117.27791595458984, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3450089991092682, |
|
"rewards/margins": 1.3268024921417236, |
|
"rewards/rejected": -1.6718114614486694, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.662251655629139e-07, |
|
"logits/chosen": -2.434732675552368, |
|
"logits/rejected": -2.482849597930908, |
|
"logps/chosen": -82.09310150146484, |
|
"logps/rejected": -113.52314758300781, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10573047399520874, |
|
"rewards/margins": 1.0775012969970703, |
|
"rewards/rejected": -1.1832319498062134, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6456953642384104e-07, |
|
"logits/chosen": -2.495922565460205, |
|
"logits/rejected": -2.432220935821533, |
|
"logps/chosen": -123.1400375366211, |
|
"logps/rejected": -111.23506164550781, |
|
"loss": 1.2805, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4564870297908783, |
|
"rewards/margins": 0.5551273226737976, |
|
"rewards/rejected": -1.0116143226623535, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.629139072847682e-07, |
|
"logits/chosen": -2.3615145683288574, |
|
"logits/rejected": -2.3742241859436035, |
|
"logps/chosen": -128.84971618652344, |
|
"logps/rejected": -140.29312133789062, |
|
"loss": 1.2493, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.166237235069275, |
|
"rewards/margins": 1.4496667385101318, |
|
"rewards/rejected": -2.615903854370117, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.612582781456953e-07, |
|
"logits/chosen": -2.471628189086914, |
|
"logits/rejected": -2.407003164291382, |
|
"logps/chosen": -106.4498291015625, |
|
"logps/rejected": -119.580078125, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.251348614692688, |
|
"rewards/margins": 1.0506912469863892, |
|
"rewards/rejected": -2.302039623260498, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.596026490066225e-07, |
|
"logits/chosen": -2.3577880859375, |
|
"logits/rejected": -2.3710594177246094, |
|
"logps/chosen": -109.6436996459961, |
|
"logps/rejected": -111.36781311035156, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1875368356704712, |
|
"rewards/margins": 0.8005573153495789, |
|
"rewards/rejected": -1.9880939722061157, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5794701986754965e-07, |
|
"logits/chosen": -2.3025927543640137, |
|
"logits/rejected": -2.412416934967041, |
|
"logps/chosen": -92.57754516601562, |
|
"logps/rejected": -125.9276123046875, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1066559553146362, |
|
"rewards/margins": 0.5373567938804626, |
|
"rewards/rejected": -1.6440128087997437, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.562913907284768e-07, |
|
"logits/chosen": -2.315936326980591, |
|
"logits/rejected": -2.264455556869507, |
|
"logps/chosen": -111.17767333984375, |
|
"logps/rejected": -124.8282699584961, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9577595591545105, |
|
"rewards/margins": 1.1823832988739014, |
|
"rewards/rejected": -2.1401429176330566, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.54635761589404e-07, |
|
"logits/chosen": -2.4507012367248535, |
|
"logits/rejected": -2.402617931365967, |
|
"logps/chosen": -112.44432067871094, |
|
"logps/rejected": -117.4054946899414, |
|
"loss": 0.7353, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0404579639434814, |
|
"rewards/margins": 0.9835718870162964, |
|
"rewards/rejected": -2.0240299701690674, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5298013245033113e-07, |
|
"logits/chosen": -2.410632610321045, |
|
"logits/rejected": -2.4103057384490967, |
|
"logps/chosen": -102.71327209472656, |
|
"logps/rejected": -118.40677642822266, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.740358829498291, |
|
"rewards/margins": 1.1352561712265015, |
|
"rewards/rejected": -1.875615119934082, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5132450331125827e-07, |
|
"logits/chosen": -2.2251460552215576, |
|
"logits/rejected": -2.2362751960754395, |
|
"logps/chosen": -106.41926574707031, |
|
"logps/rejected": -108.68377685546875, |
|
"loss": 0.9923, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0975613594055176, |
|
"rewards/margins": 1.4764889478683472, |
|
"rewards/rejected": -2.5740504264831543, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.496688741721854e-07, |
|
"logits/chosen": -2.2884135246276855, |
|
"logits/rejected": -2.3148610591888428, |
|
"logps/chosen": -95.29539489746094, |
|
"logps/rejected": -102.16908264160156, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0151549577713013, |
|
"rewards/margins": 0.7811011075973511, |
|
"rewards/rejected": -1.7962560653686523, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4801324503311255e-07, |
|
"logits/chosen": -2.266324520111084, |
|
"logits/rejected": -2.1928133964538574, |
|
"logps/chosen": -94.09014892578125, |
|
"logps/rejected": -102.40970611572266, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.572268009185791, |
|
"rewards/margins": 0.8905662298202515, |
|
"rewards/rejected": -1.462834358215332, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.463576158940397e-07, |
|
"logits/chosen": -2.2293038368225098, |
|
"logits/rejected": -2.1520204544067383, |
|
"logps/chosen": -119.3239974975586, |
|
"logps/rejected": -125.38603210449219, |
|
"loss": 0.7517, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5861669182777405, |
|
"rewards/margins": 1.747180700302124, |
|
"rewards/rejected": -2.333347797393799, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4470198675496683e-07, |
|
"logits/chosen": -2.3797130584716797, |
|
"logits/rejected": -2.3224523067474365, |
|
"logps/chosen": -103.2835922241211, |
|
"logps/rejected": -110.06852722167969, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6205762028694153, |
|
"rewards/margins": 0.7353760600090027, |
|
"rewards/rejected": -1.355952262878418, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.43046357615894e-07, |
|
"logits/chosen": -2.286005973815918, |
|
"logits/rejected": -2.243605375289917, |
|
"logps/chosen": -122.601806640625, |
|
"logps/rejected": -152.8876190185547, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.38620439171791077, |
|
"rewards/margins": 1.5684607028961182, |
|
"rewards/rejected": -1.9546654224395752, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4139072847682116e-07, |
|
"logits/chosen": -2.2788243293762207, |
|
"logits/rejected": -2.3132455348968506, |
|
"logps/chosen": -109.7626724243164, |
|
"logps/rejected": -121.210693359375, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1420578509569168, |
|
"rewards/margins": 1.0154675245285034, |
|
"rewards/rejected": -1.1575253009796143, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.397350993377483e-07, |
|
"logits/chosen": -2.1815805435180664, |
|
"logits/rejected": -2.2088842391967773, |
|
"logps/chosen": -97.82100677490234, |
|
"logps/rejected": -110.3985595703125, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8147605657577515, |
|
"rewards/margins": 1.0990091562271118, |
|
"rewards/rejected": -1.9137697219848633, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.380794701986755e-07, |
|
"logits/chosen": -2.1567564010620117, |
|
"logits/rejected": -2.213163375854492, |
|
"logps/chosen": -88.54952239990234, |
|
"logps/rejected": -115.46138000488281, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8950099945068359, |
|
"rewards/margins": 0.986484169960022, |
|
"rewards/rejected": -1.881494164466858, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3642384105960263e-07, |
|
"logits/chosen": -2.195145845413208, |
|
"logits/rejected": -2.1583914756774902, |
|
"logps/chosen": -89.95973205566406, |
|
"logps/rejected": -90.32757568359375, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1613867282867432, |
|
"rewards/margins": 1.2194865942001343, |
|
"rewards/rejected": -2.380873203277588, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.347682119205298e-07, |
|
"logits/chosen": -2.157541513442993, |
|
"logits/rejected": -2.0622384548187256, |
|
"logps/chosen": -122.03218078613281, |
|
"logps/rejected": -133.05084228515625, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2253652811050415, |
|
"rewards/margins": 1.0726807117462158, |
|
"rewards/rejected": -2.2980456352233887, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3311258278145697e-07, |
|
"logits/chosen": -2.2362678050994873, |
|
"logits/rejected": -2.2174267768859863, |
|
"logps/chosen": -104.6390151977539, |
|
"logps/rejected": -108.64559173583984, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9815710186958313, |
|
"rewards/margins": 0.686564564704895, |
|
"rewards/rejected": -1.6681352853775024, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.314569536423841e-07, |
|
"logits/chosen": -2.3051602840423584, |
|
"logits/rejected": -2.205004930496216, |
|
"logps/chosen": -112.1572494506836, |
|
"logps/rejected": -115.80439758300781, |
|
"loss": 3.3956, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1993415355682373, |
|
"rewards/margins": 0.9256394505500793, |
|
"rewards/rejected": -2.124980926513672, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2980132450331125e-07, |
|
"logits/chosen": -2.1093502044677734, |
|
"logits/rejected": -2.1304099559783936, |
|
"logps/chosen": -101.59135437011719, |
|
"logps/rejected": -121.8282241821289, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.4122694730758667, |
|
"rewards/margins": 1.0248304605484009, |
|
"rewards/rejected": -2.4371001720428467, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.281456953642384e-07, |
|
"logits/chosen": -2.1925549507141113, |
|
"logits/rejected": -2.2341551780700684, |
|
"logps/chosen": -125.73774719238281, |
|
"logps/rejected": -137.68995666503906, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3582961559295654, |
|
"rewards/margins": 1.1268060207366943, |
|
"rewards/rejected": -2.4851021766662598, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2649006622516553e-07, |
|
"logits/chosen": -2.1826648712158203, |
|
"logits/rejected": -2.0866520404815674, |
|
"logps/chosen": -112.77205657958984, |
|
"logps/rejected": -135.73634338378906, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9496241807937622, |
|
"rewards/margins": 2.8248558044433594, |
|
"rewards/rejected": -3.774479627609253, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2483443708609267e-07, |
|
"logits/chosen": -2.2106716632843018, |
|
"logits/rejected": -2.2418696880340576, |
|
"logps/chosen": -100.58189392089844, |
|
"logps/rejected": -122.01805114746094, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5530904531478882, |
|
"rewards/margins": 1.249182939529419, |
|
"rewards/rejected": -2.802273750305176, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.231788079470198e-07, |
|
"logits/chosen": -2.1691789627075195, |
|
"logits/rejected": -2.082367181777954, |
|
"logps/chosen": -100.97856903076172, |
|
"logps/rejected": -102.23161315917969, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1863991022109985, |
|
"rewards/margins": 1.5210940837860107, |
|
"rewards/rejected": -2.707493305206299, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.21523178807947e-07, |
|
"logits/chosen": -2.321969985961914, |
|
"logits/rejected": -2.2945773601531982, |
|
"logps/chosen": -95.80015563964844, |
|
"logps/rejected": -103.98514556884766, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6277263760566711, |
|
"rewards/margins": 1.0614575147628784, |
|
"rewards/rejected": -1.6891838312149048, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.1986754966887414e-07, |
|
"logits/chosen": -2.16201114654541, |
|
"logits/rejected": -2.100698471069336, |
|
"logps/chosen": -107.64762878417969, |
|
"logps/rejected": -114.20783996582031, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0765411853790283, |
|
"rewards/margins": 0.9841095209121704, |
|
"rewards/rejected": -2.060650587081909, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1821192052980133e-07, |
|
"logits/chosen": -2.15731143951416, |
|
"logits/rejected": -2.1200685501098633, |
|
"logps/chosen": -94.93736267089844, |
|
"logps/rejected": -108.20992279052734, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5728567838668823, |
|
"rewards/margins": 1.2192682027816772, |
|
"rewards/rejected": -1.7921253442764282, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.165562913907285e-07, |
|
"logits/chosen": -2.228494644165039, |
|
"logits/rejected": -2.199162006378174, |
|
"logps/chosen": -119.44285583496094, |
|
"logps/rejected": -124.89945220947266, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5043415427207947, |
|
"rewards/margins": 1.4671887159347534, |
|
"rewards/rejected": -1.9715303182601929, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.149006622516556e-07, |
|
"logits/chosen": -2.242833137512207, |
|
"logits/rejected": -2.193368673324585, |
|
"logps/chosen": -106.42388916015625, |
|
"logps/rejected": -115.7519302368164, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2504803538322449, |
|
"rewards/margins": 1.4816687107086182, |
|
"rewards/rejected": -1.7321488857269287, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1324503311258276e-07, |
|
"logits/chosen": -2.296274185180664, |
|
"logits/rejected": -2.233081340789795, |
|
"logps/chosen": -97.89036560058594, |
|
"logps/rejected": -118.38981628417969, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.856580376625061, |
|
"rewards/margins": 1.5226026773452759, |
|
"rewards/rejected": -2.379183053970337, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1158940397350995e-07, |
|
"logits/chosen": -2.2974660396575928, |
|
"logits/rejected": -2.1640889644622803, |
|
"logps/chosen": -111.53731536865234, |
|
"logps/rejected": -109.1888656616211, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9736050367355347, |
|
"rewards/margins": 1.2244486808776855, |
|
"rewards/rejected": -2.1980538368225098, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.099337748344371e-07, |
|
"logits/chosen": -2.130094289779663, |
|
"logits/rejected": -2.0237298011779785, |
|
"logps/chosen": -116.61064147949219, |
|
"logps/rejected": -123.98744201660156, |
|
"loss": 0.9585, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.352774143218994, |
|
"rewards/margins": 1.3028422594070435, |
|
"rewards/rejected": -3.6556167602539062, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0827814569536423e-07, |
|
"logits/chosen": -2.1122946739196777, |
|
"logits/rejected": -2.1758933067321777, |
|
"logps/chosen": -92.36100769042969, |
|
"logps/rejected": -117.257080078125, |
|
"loss": 0.5243, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1455414295196533, |
|
"rewards/margins": 1.1456917524337769, |
|
"rewards/rejected": -2.291233539581299, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0662251655629137e-07, |
|
"logits/chosen": -2.1967172622680664, |
|
"logits/rejected": -2.163334369659424, |
|
"logps/chosen": -94.69267272949219, |
|
"logps/rejected": -106.30582427978516, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9426735043525696, |
|
"rewards/margins": 0.8261833190917969, |
|
"rewards/rejected": -1.7688567638397217, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.049668874172185e-07, |
|
"logits/chosen": -2.2559750080108643, |
|
"logits/rejected": -2.2480287551879883, |
|
"logps/chosen": -114.98677825927734, |
|
"logps/rejected": -118.00787353515625, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.6318261623382568, |
|
"rewards/margins": 0.813581645488739, |
|
"rewards/rejected": -2.4454076290130615, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0331125827814565e-07, |
|
"logits/chosen": -2.277569055557251, |
|
"logits/rejected": -2.2428252696990967, |
|
"logps/chosen": -106.87760162353516, |
|
"logps/rejected": -107.15045166015625, |
|
"loss": 0.7337, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2901384830474854, |
|
"rewards/margins": 0.8376191854476929, |
|
"rewards/rejected": -2.1277577877044678, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.016556291390728e-07, |
|
"logits/chosen": -2.2305266857147217, |
|
"logits/rejected": -2.2446939945220947, |
|
"logps/chosen": -115.1706314086914, |
|
"logps/rejected": -132.69129943847656, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4605052471160889, |
|
"rewards/margins": 1.3340156078338623, |
|
"rewards/rejected": -2.794520854949951, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": -2.3378500938415527, |
|
"logits/rejected": -2.1980865001678467, |
|
"logps/chosen": -124.11688232421875, |
|
"logps/rejected": -121.2197494506836, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1835664510726929, |
|
"rewards/margins": 1.0963947772979736, |
|
"rewards/rejected": -2.279961109161377, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.983443708609271e-07, |
|
"logits/chosen": -2.2236156463623047, |
|
"logits/rejected": -2.2054903507232666, |
|
"logps/chosen": -122.0257568359375, |
|
"logps/rejected": -125.471923828125, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3710222244262695, |
|
"rewards/margins": 1.238471269607544, |
|
"rewards/rejected": -2.6094937324523926, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.966887417218543e-07, |
|
"logits/chosen": -2.2760846614837646, |
|
"logits/rejected": -2.2383294105529785, |
|
"logps/chosen": -104.09146881103516, |
|
"logps/rejected": -120.87336730957031, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4301923513412476, |
|
"rewards/margins": 1.626868486404419, |
|
"rewards/rejected": -3.0570602416992188, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9503311258278146e-07, |
|
"logits/chosen": -2.304551839828491, |
|
"logits/rejected": -2.3333609104156494, |
|
"logps/chosen": -119.12831115722656, |
|
"logps/rejected": -128.80160522460938, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5498030185699463, |
|
"rewards/margins": 0.8728634715080261, |
|
"rewards/rejected": -3.422666072845459, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.933774834437086e-07, |
|
"logits/chosen": -2.2905325889587402, |
|
"logits/rejected": -2.175750255584717, |
|
"logps/chosen": -111.89952087402344, |
|
"logps/rejected": -112.72969055175781, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.818068742752075, |
|
"rewards/margins": 0.5174419283866882, |
|
"rewards/rejected": -3.335510730743408, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9172185430463574e-07, |
|
"logits/chosen": -2.3529715538024902, |
|
"logits/rejected": -2.2983202934265137, |
|
"logps/chosen": -136.7278594970703, |
|
"logps/rejected": -129.16085815429688, |
|
"loss": 0.5891, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.4263627529144287, |
|
"rewards/margins": 0.8253445625305176, |
|
"rewards/rejected": -3.2517075538635254, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.9006622516556293e-07, |
|
"logits/chosen": -2.2374019622802734, |
|
"logits/rejected": -2.2284903526306152, |
|
"logps/chosen": -114.3366470336914, |
|
"logps/rejected": -110.65074157714844, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.3343544006347656, |
|
"rewards/margins": 1.0057871341705322, |
|
"rewards/rejected": -3.340141773223877, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_logits/chosen": -2.2622616291046143, |
|
"eval_logits/rejected": -2.215507745742798, |
|
"eval_logps/chosen": -119.37470245361328, |
|
"eval_logps/rejected": -125.0894546508789, |
|
"eval_loss": 0.5807133316993713, |
|
"eval_rewards/accuracies": 0.6830357313156128, |
|
"eval_rewards/chosen": -1.950809121131897, |
|
"eval_rewards/margins": 0.8533560633659363, |
|
"eval_rewards/rejected": -2.8041651248931885, |
|
"eval_runtime": 520.9457, |
|
"eval_samples_per_second": 3.428, |
|
"eval_steps_per_second": 0.107, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8841059602649007e-07, |
|
"logits/chosen": -2.4846906661987305, |
|
"logits/rejected": -2.38966703414917, |
|
"logps/chosen": -122.82658386230469, |
|
"logps/rejected": -122.37986755371094, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.5582962036132812, |
|
"rewards/margins": 0.7036358118057251, |
|
"rewards/rejected": -2.261931896209717, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.867549668874172e-07, |
|
"logits/chosen": -2.413020133972168, |
|
"logits/rejected": -2.348389148712158, |
|
"logps/chosen": -146.7459716796875, |
|
"logps/rejected": -152.81591796875, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9320647716522217, |
|
"rewards/margins": 1.321993112564087, |
|
"rewards/rejected": -3.2540581226348877, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8509933774834435e-07, |
|
"logits/chosen": -2.335376262664795, |
|
"logits/rejected": -2.3727335929870605, |
|
"logps/chosen": -96.5339584350586, |
|
"logps/rejected": -102.97718811035156, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.347673773765564, |
|
"rewards/margins": 0.7935667037963867, |
|
"rewards/rejected": -2.141240358352661, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.834437086092715e-07, |
|
"logits/chosen": -2.474375009536743, |
|
"logits/rejected": -2.457411527633667, |
|
"logps/chosen": -100.12342071533203, |
|
"logps/rejected": -98.28324890136719, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6612989902496338, |
|
"rewards/margins": 1.0141090154647827, |
|
"rewards/rejected": -1.675408124923706, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8178807947019863e-07, |
|
"logits/chosen": -2.4102184772491455, |
|
"logits/rejected": -2.366565704345703, |
|
"logps/chosen": -94.41053771972656, |
|
"logps/rejected": -106.40338134765625, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1945335865020752, |
|
"rewards/margins": 1.7779722213745117, |
|
"rewards/rejected": -2.972505807876587, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.8013245033112577e-07, |
|
"logits/chosen": -2.324781656265259, |
|
"logits/rejected": -2.265265703201294, |
|
"logps/chosen": -113.0925064086914, |
|
"logps/rejected": -116.36458587646484, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6215614080429077, |
|
"rewards/margins": 0.970362663269043, |
|
"rewards/rejected": -1.5919239521026611, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7847682119205296e-07, |
|
"logits/chosen": -2.4248404502868652, |
|
"logits/rejected": -2.3727540969848633, |
|
"logps/chosen": -112.99056243896484, |
|
"logps/rejected": -124.29933166503906, |
|
"loss": 1.2347, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.332558035850525, |
|
"rewards/margins": 1.7028182744979858, |
|
"rewards/rejected": -3.0353763103485107, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7682119205298016e-07, |
|
"logits/chosen": -2.510585308074951, |
|
"logits/rejected": -2.4303643703460693, |
|
"logps/chosen": -122.59515380859375, |
|
"logps/rejected": -119.64692687988281, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8100630044937134, |
|
"rewards/margins": 0.8500891923904419, |
|
"rewards/rejected": -2.660151958465576, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.751655629139073e-07, |
|
"logits/chosen": -2.48645281791687, |
|
"logits/rejected": -2.433279037475586, |
|
"logps/chosen": -131.58583068847656, |
|
"logps/rejected": -139.4903106689453, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.738173484802246, |
|
"rewards/margins": 0.9940687417984009, |
|
"rewards/rejected": -2.7322418689727783, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7350993377483444e-07, |
|
"logits/chosen": -2.2750325202941895, |
|
"logits/rejected": -2.214141845703125, |
|
"logps/chosen": -92.43232727050781, |
|
"logps/rejected": -118.48176574707031, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.566083312034607, |
|
"rewards/margins": 1.913888931274414, |
|
"rewards/rejected": -3.4799721240997314, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.718543046357616e-07, |
|
"logits/chosen": -2.3589186668395996, |
|
"logits/rejected": -2.289020538330078, |
|
"logps/chosen": -116.14213562011719, |
|
"logps/rejected": -115.25, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8134400844573975, |
|
"rewards/margins": 1.0903173685073853, |
|
"rewards/rejected": -2.9037575721740723, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.701986754966887e-07, |
|
"logits/chosen": -2.4153926372528076, |
|
"logits/rejected": -2.38564133644104, |
|
"logps/chosen": -198.99185180664062, |
|
"logps/rejected": -211.7269744873047, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -8.85645580291748, |
|
"rewards/margins": 1.3634490966796875, |
|
"rewards/rejected": -10.219904899597168, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6854304635761586e-07, |
|
"logits/chosen": -2.3718574047088623, |
|
"logits/rejected": -2.323935031890869, |
|
"logps/chosen": -114.41487121582031, |
|
"logps/rejected": -115.03157806396484, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.747230887413025, |
|
"rewards/margins": 0.9782400131225586, |
|
"rewards/rejected": -2.725471019744873, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6688741721854305e-07, |
|
"logits/chosen": -2.266796350479126, |
|
"logits/rejected": -2.279444456100464, |
|
"logps/chosen": -125.43962097167969, |
|
"logps/rejected": -138.60568237304688, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7848097085952759, |
|
"rewards/margins": 0.5874557495117188, |
|
"rewards/rejected": -2.372265577316284, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.652317880794702e-07, |
|
"logits/chosen": -2.3460640907287598, |
|
"logits/rejected": -2.2017135620117188, |
|
"logps/chosen": -173.7471923828125, |
|
"logps/rejected": -175.39913940429688, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -8.320539474487305, |
|
"rewards/margins": 0.5577089190483093, |
|
"rewards/rejected": -8.87824821472168, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6357615894039733e-07, |
|
"logits/chosen": -2.3931944370269775, |
|
"logits/rejected": -2.295135498046875, |
|
"logps/chosen": -117.7610855102539, |
|
"logps/rejected": -131.86878967285156, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.364652395248413, |
|
"rewards/margins": 0.9879738092422485, |
|
"rewards/rejected": -2.352626323699951, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6192052980132447e-07, |
|
"logits/chosen": -2.3470005989074707, |
|
"logits/rejected": -2.314392328262329, |
|
"logps/chosen": -116.54869079589844, |
|
"logps/rejected": -121.33402252197266, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6547797918319702, |
|
"rewards/margins": 1.0188862085342407, |
|
"rewards/rejected": -2.6736655235290527, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.602649006622516e-07, |
|
"logits/chosen": -2.3665614128112793, |
|
"logits/rejected": -2.2760112285614014, |
|
"logps/chosen": -116.99346923828125, |
|
"logps/rejected": -194.17459106445312, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1822216510772705, |
|
"rewards/margins": 7.195115089416504, |
|
"rewards/rejected": -8.377335548400879, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.5860927152317875e-07, |
|
"logits/chosen": -2.378209114074707, |
|
"logits/rejected": -2.3278615474700928, |
|
"logps/chosen": -119.82206726074219, |
|
"logps/rejected": -126.67927551269531, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.758419394493103, |
|
"rewards/margins": 1.255906343460083, |
|
"rewards/rejected": -3.0143258571624756, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5695364238410594e-07, |
|
"logits/chosen": -2.376044750213623, |
|
"logits/rejected": -2.308166265487671, |
|
"logps/chosen": -113.3560562133789, |
|
"logps/rejected": -114.37657165527344, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4774434566497803, |
|
"rewards/margins": 0.5793313384056091, |
|
"rewards/rejected": -2.056774616241455, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5529801324503314e-07, |
|
"logits/chosen": -2.374824285507202, |
|
"logits/rejected": -2.3935980796813965, |
|
"logps/chosen": -108.83685302734375, |
|
"logps/rejected": -115.5961685180664, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5476689338684082, |
|
"rewards/margins": 0.7745328545570374, |
|
"rewards/rejected": -2.322201728820801, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.536423841059603e-07, |
|
"logits/chosen": -2.339582681655884, |
|
"logits/rejected": -2.361855983734131, |
|
"logps/chosen": -121.9773941040039, |
|
"logps/rejected": -133.71356201171875, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1847705841064453, |
|
"rewards/margins": 0.9190909266471863, |
|
"rewards/rejected": -3.1038613319396973, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.519867549668874e-07, |
|
"logits/chosen": -2.2750911712646484, |
|
"logits/rejected": -2.235349416732788, |
|
"logps/chosen": -83.35279846191406, |
|
"logps/rejected": -102.2212905883789, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9985214471817017, |
|
"rewards/margins": 1.0258718729019165, |
|
"rewards/rejected": -3.024393081665039, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5033112582781456e-07, |
|
"logits/chosen": -2.3950631618499756, |
|
"logits/rejected": -2.286043643951416, |
|
"logps/chosen": -112.0318603515625, |
|
"logps/rejected": -116.00035095214844, |
|
"loss": 0.9739, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5054577589035034, |
|
"rewards/margins": 0.963812530040741, |
|
"rewards/rejected": -2.4692704677581787, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.486754966887417e-07, |
|
"logits/chosen": -2.2539525032043457, |
|
"logits/rejected": -2.280163526535034, |
|
"logps/chosen": -90.12135314941406, |
|
"logps/rejected": -103.78663635253906, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0227272510528564, |
|
"rewards/margins": 0.8646720051765442, |
|
"rewards/rejected": -1.8873993158340454, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4701986754966884e-07, |
|
"logits/chosen": -2.3163156509399414, |
|
"logits/rejected": -2.324432849884033, |
|
"logps/chosen": -91.22362518310547, |
|
"logps/rejected": -103.0667953491211, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4652012288570404, |
|
"rewards/margins": 1.1732677221298218, |
|
"rewards/rejected": -1.6384689807891846, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4536423841059603e-07, |
|
"logits/chosen": -2.3076674938201904, |
|
"logits/rejected": -2.3175816535949707, |
|
"logps/chosen": -100.41036224365234, |
|
"logps/rejected": -117.35456848144531, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2293812036514282, |
|
"rewards/margins": 0.8102920651435852, |
|
"rewards/rejected": -2.039673089981079, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4370860927152317e-07, |
|
"logits/chosen": -2.3699378967285156, |
|
"logits/rejected": -2.3562657833099365, |
|
"logps/chosen": -119.7326889038086, |
|
"logps/rejected": -131.7585906982422, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6823489665985107, |
|
"rewards/margins": 1.3262075185775757, |
|
"rewards/rejected": -2.008556604385376, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.420529801324503e-07, |
|
"logits/chosen": -2.1104941368103027, |
|
"logits/rejected": -2.0905330181121826, |
|
"logps/chosen": -171.0330810546875, |
|
"logps/rejected": -175.89886474609375, |
|
"loss": 0.9012, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -7.170141696929932, |
|
"rewards/margins": -0.5844208002090454, |
|
"rewards/rejected": -6.585721015930176, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4039735099337745e-07, |
|
"logits/chosen": -2.3445873260498047, |
|
"logits/rejected": -2.2650883197784424, |
|
"logps/chosen": -130.222900390625, |
|
"logps/rejected": -132.6783447265625, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4724981784820557, |
|
"rewards/margins": 0.9613865613937378, |
|
"rewards/rejected": -2.433884859085083, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.387417218543046e-07, |
|
"logits/chosen": -2.457414150238037, |
|
"logits/rejected": -2.5287423133850098, |
|
"logps/chosen": -135.46902465820312, |
|
"logps/rejected": -159.89739990234375, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7360942363739014, |
|
"rewards/margins": 1.1658858060836792, |
|
"rewards/rejected": -2.901979923248291, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3708609271523173e-07, |
|
"logits/chosen": -2.221667766571045, |
|
"logits/rejected": -2.208982467651367, |
|
"logps/chosen": -106.48121643066406, |
|
"logps/rejected": -104.82066345214844, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4603312015533447, |
|
"rewards/margins": 0.8812816739082336, |
|
"rewards/rejected": -2.3416128158569336, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.35430463576159e-07, |
|
"logits/chosen": -2.2299439907073975, |
|
"logits/rejected": -2.225663423538208, |
|
"logps/chosen": -94.22245025634766, |
|
"logps/rejected": -100.85789489746094, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.32827529311180115, |
|
"rewards/margins": 1.4746736288070679, |
|
"rewards/rejected": -1.8029489517211914, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.337748344370861e-07, |
|
"logits/chosen": -2.2380738258361816, |
|
"logits/rejected": -2.2997546195983887, |
|
"logps/chosen": -98.22574615478516, |
|
"logps/rejected": -112.7544937133789, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4563646912574768, |
|
"rewards/margins": 0.6618258953094482, |
|
"rewards/rejected": -1.1181905269622803, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3211920529801326e-07, |
|
"logits/chosen": -2.405059814453125, |
|
"logits/rejected": -2.429863452911377, |
|
"logps/chosen": -107.7689437866211, |
|
"logps/rejected": -119.28629302978516, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2023572474718094, |
|
"rewards/margins": 0.8103917241096497, |
|
"rewards/rejected": -1.0127489566802979, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.304635761589404e-07, |
|
"logits/chosen": -2.3326334953308105, |
|
"logits/rejected": -2.256371259689331, |
|
"logps/chosen": -111.0186767578125, |
|
"logps/rejected": -101.33964538574219, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1809541881084442, |
|
"rewards/margins": 0.9960860013961792, |
|
"rewards/rejected": -1.177040457725525, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2880794701986754e-07, |
|
"logits/chosen": -2.228715181350708, |
|
"logits/rejected": -2.2780203819274902, |
|
"logps/chosen": -82.96192932128906, |
|
"logps/rejected": -106.3135757446289, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4178234040737152, |
|
"rewards/margins": 1.3403428792953491, |
|
"rewards/rejected": -1.7581663131713867, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.271523178807947e-07, |
|
"logits/chosen": -2.3673007488250732, |
|
"logits/rejected": -2.361161947250366, |
|
"logps/chosen": -110.33650970458984, |
|
"logps/rejected": -118.94313049316406, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7891864776611328, |
|
"rewards/margins": 0.4793139100074768, |
|
"rewards/rejected": -1.2685004472732544, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.254966887417218e-07, |
|
"logits/chosen": -2.270993947982788, |
|
"logits/rejected": -2.3422646522521973, |
|
"logps/chosen": -110.1440200805664, |
|
"logps/rejected": -123.39158630371094, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0290793180465698, |
|
"rewards/margins": 0.8790245056152344, |
|
"rewards/rejected": -1.9081039428710938, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.23841059602649e-07, |
|
"logits/chosen": -2.3637521266937256, |
|
"logits/rejected": -2.3246121406555176, |
|
"logps/chosen": -123.53662109375, |
|
"logps/rejected": -130.99119567871094, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9286755323410034, |
|
"rewards/margins": 1.2442817687988281, |
|
"rewards/rejected": -2.172957181930542, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2218543046357615e-07, |
|
"logits/chosen": -2.1791653633117676, |
|
"logits/rejected": -2.18937349319458, |
|
"logps/chosen": -166.4168243408203, |
|
"logps/rejected": -188.746826171875, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -5.239639759063721, |
|
"rewards/margins": 1.5334604978561401, |
|
"rewards/rejected": -6.77310037612915, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.205298013245033e-07, |
|
"logits/chosen": -2.234860897064209, |
|
"logits/rejected": -2.235252857208252, |
|
"logps/chosen": -131.88731384277344, |
|
"logps/rejected": -147.35777282714844, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8779850006103516, |
|
"rewards/margins": 1.1691521406173706, |
|
"rewards/rejected": -4.0471367835998535, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1887417218543043e-07, |
|
"logits/chosen": -2.3836143016815186, |
|
"logits/rejected": -2.35886287689209, |
|
"logps/chosen": -122.30987548828125, |
|
"logps/rejected": -125.3285903930664, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5127007961273193, |
|
"rewards/margins": 1.0224969387054443, |
|
"rewards/rejected": -3.5351977348327637, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1721854304635757e-07, |
|
"logits/chosen": -2.280726194381714, |
|
"logits/rejected": -2.2161917686462402, |
|
"logps/chosen": -118.85569763183594, |
|
"logps/rejected": -122.44981384277344, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2102978229522705, |
|
"rewards/margins": 1.2170623540878296, |
|
"rewards/rejected": -3.4273605346679688, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.155629139072847e-07, |
|
"logits/chosen": -2.445349931716919, |
|
"logits/rejected": -2.4110920429229736, |
|
"logps/chosen": -110.351806640625, |
|
"logps/rejected": -118.65000915527344, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.997372031211853, |
|
"rewards/margins": 0.7109335660934448, |
|
"rewards/rejected": -2.708305835723877, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1390728476821196e-07, |
|
"logits/chosen": -2.3366105556488037, |
|
"logits/rejected": -2.3466391563415527, |
|
"logps/chosen": -127.3154067993164, |
|
"logps/rejected": -124.52156829833984, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.18947172164917, |
|
"rewards/margins": 0.6396933794021606, |
|
"rewards/rejected": -2.829165458679199, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.122516556291391e-07, |
|
"logits/chosen": -2.260577917098999, |
|
"logits/rejected": -2.2590389251708984, |
|
"logps/chosen": -113.4861831665039, |
|
"logps/rejected": -108.08863830566406, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.5557265281677246, |
|
"rewards/margins": 0.4942797124385834, |
|
"rewards/rejected": -3.050006151199341, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1059602649006624e-07, |
|
"logits/chosen": -2.3765158653259277, |
|
"logits/rejected": -2.3215491771698, |
|
"logps/chosen": -123.18157958984375, |
|
"logps/rejected": -119.86373138427734, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5957121849060059, |
|
"rewards/margins": 0.9963384866714478, |
|
"rewards/rejected": -2.592050552368164, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.089403973509934e-07, |
|
"logits/chosen": -2.3044986724853516, |
|
"logits/rejected": -2.3184516429901123, |
|
"logps/chosen": -120.02888488769531, |
|
"logps/rejected": -125.82350158691406, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5062299966812134, |
|
"rewards/margins": 1.3234798908233643, |
|
"rewards/rejected": -2.829709529876709, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.072847682119205e-07, |
|
"logits/chosen": -2.315985918045044, |
|
"logits/rejected": -2.262968063354492, |
|
"logps/chosen": -114.96397399902344, |
|
"logps/rejected": -129.48318481445312, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5051807165145874, |
|
"rewards/margins": 1.5209523439407349, |
|
"rewards/rejected": -3.0261335372924805, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -2.1823971271514893, |
|
"eval_logits/rejected": -2.1383469104766846, |
|
"eval_logps/chosen": -114.31800079345703, |
|
"eval_logps/rejected": -123.8126220703125, |
|
"eval_loss": 0.5244069695472717, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -1.4451391696929932, |
|
"eval_rewards/margins": 1.2313430309295654, |
|
"eval_rewards/rejected": -2.6764819622039795, |
|
"eval_runtime": 522.8803, |
|
"eval_samples_per_second": 3.416, |
|
"eval_steps_per_second": 0.107, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0562913907284766e-07, |
|
"logits/chosen": -2.313927173614502, |
|
"logits/rejected": -2.33535099029541, |
|
"logps/chosen": -118.8395767211914, |
|
"logps/rejected": -128.5199737548828, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8590469360351562, |
|
"rewards/margins": 0.7332299947738647, |
|
"rewards/rejected": -2.5922768115997314, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.039735099337748e-07, |
|
"logits/chosen": -2.3866069316864014, |
|
"logits/rejected": -2.3465638160705566, |
|
"logps/chosen": -120.46064758300781, |
|
"logps/rejected": -116.50871276855469, |
|
"loss": 0.8307, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8921232223510742, |
|
"rewards/margins": 0.763845682144165, |
|
"rewards/rejected": -2.6559691429138184, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.02317880794702e-07, |
|
"logits/chosen": -2.421853542327881, |
|
"logits/rejected": -2.318270206451416, |
|
"logps/chosen": -137.11500549316406, |
|
"logps/rejected": -134.6293487548828, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.621045708656311, |
|
"rewards/margins": 0.8223884701728821, |
|
"rewards/rejected": -2.443434238433838, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0066225165562913e-07, |
|
"logits/chosen": -2.35496187210083, |
|
"logits/rejected": -2.2371764183044434, |
|
"logps/chosen": -111.7890853881836, |
|
"logps/rejected": -106.77983093261719, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0801293849945068, |
|
"rewards/margins": 1.046350121498108, |
|
"rewards/rejected": -2.1264796257019043, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9900662251655627e-07, |
|
"logits/chosen": -2.329745054244995, |
|
"logits/rejected": -2.2365243434906006, |
|
"logps/chosen": -112.671875, |
|
"logps/rejected": -102.41822814941406, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2923063039779663, |
|
"rewards/margins": 1.320711612701416, |
|
"rewards/rejected": -2.613018035888672, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.973509933774834e-07, |
|
"logits/chosen": -2.2645044326782227, |
|
"logits/rejected": -2.1956381797790527, |
|
"logps/chosen": -104.55106353759766, |
|
"logps/rejected": -106.2977294921875, |
|
"loss": 0.7244, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3651814460754395, |
|
"rewards/margins": 0.206166073679924, |
|
"rewards/rejected": -1.5713475942611694, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9569536423841055e-07, |
|
"logits/chosen": -2.1943066120147705, |
|
"logits/rejected": -2.24649977684021, |
|
"logps/chosen": -89.5943374633789, |
|
"logps/rejected": -106.08259582519531, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.288975477218628, |
|
"rewards/margins": 0.5116127133369446, |
|
"rewards/rejected": -1.8005882501602173, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.940397350993377e-07, |
|
"logits/chosen": -2.607445240020752, |
|
"logits/rejected": -2.4970269203186035, |
|
"logps/chosen": -146.52468872070312, |
|
"logps/rejected": -140.1497344970703, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5630671977996826, |
|
"rewards/margins": 0.6673210263252258, |
|
"rewards/rejected": -2.2303881645202637, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9238410596026494e-07, |
|
"logits/chosen": -2.238361358642578, |
|
"logits/rejected": -2.1506645679473877, |
|
"logps/chosen": -99.36707305908203, |
|
"logps/rejected": -105.86885070800781, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7212435007095337, |
|
"rewards/margins": 1.0883468389511108, |
|
"rewards/rejected": -2.8095905780792236, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.907284768211921e-07, |
|
"logits/chosen": -2.3251490592956543, |
|
"logits/rejected": -2.307288408279419, |
|
"logps/chosen": -115.70127868652344, |
|
"logps/rejected": -124.7564926147461, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.711801290512085, |
|
"rewards/margins": 1.0508122444152832, |
|
"rewards/rejected": -2.7626137733459473, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.890728476821192e-07, |
|
"logits/chosen": -2.2478513717651367, |
|
"logits/rejected": -2.2882168292999268, |
|
"logps/chosen": -104.28340911865234, |
|
"logps/rejected": -121.9701156616211, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.887414574623108, |
|
"rewards/margins": 0.925916850566864, |
|
"rewards/rejected": -2.8133316040039062, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8741721854304636e-07, |
|
"logits/chosen": -2.296663999557495, |
|
"logits/rejected": -2.3100745677948, |
|
"logps/chosen": -158.80392456054688, |
|
"logps/rejected": -124.94380950927734, |
|
"loss": 1.3257, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.360524654388428, |
|
"rewards/margins": -2.369443655014038, |
|
"rewards/rejected": -1.9910815954208374, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.857615894039735e-07, |
|
"logits/chosen": -2.2816338539123535, |
|
"logits/rejected": -2.2055506706237793, |
|
"logps/chosen": -116.35295104980469, |
|
"logps/rejected": -125.78230285644531, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8872613906860352, |
|
"rewards/margins": 1.402567982673645, |
|
"rewards/rejected": -3.2898292541503906, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8410596026490064e-07, |
|
"logits/chosen": -2.3136837482452393, |
|
"logits/rejected": -2.327634334564209, |
|
"logps/chosen": -118.3811264038086, |
|
"logps/rejected": -131.34634399414062, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.959405243396759, |
|
"rewards/margins": 1.3146027326583862, |
|
"rewards/rejected": -2.27400803565979, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.824503311258278e-07, |
|
"logits/chosen": -2.2207038402557373, |
|
"logits/rejected": -2.2655978202819824, |
|
"logps/chosen": -122.19026184082031, |
|
"logps/rejected": -130.85289001464844, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.631255865097046, |
|
"rewards/margins": 1.0431182384490967, |
|
"rewards/rejected": -2.6743741035461426, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8079470198675497e-07, |
|
"logits/chosen": -2.3095381259918213, |
|
"logits/rejected": -2.2248189449310303, |
|
"logps/chosen": -103.5934066772461, |
|
"logps/rejected": -116.8990249633789, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2814357280731201, |
|
"rewards/margins": 1.4714066982269287, |
|
"rewards/rejected": -2.752842426300049, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.791390728476821e-07, |
|
"logits/chosen": -2.2992262840270996, |
|
"logits/rejected": -2.3474018573760986, |
|
"logps/chosen": -140.76292419433594, |
|
"logps/rejected": -168.6060333251953, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.161952018737793, |
|
"rewards/margins": 1.1705152988433838, |
|
"rewards/rejected": -3.3324673175811768, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7748344370860925e-07, |
|
"logits/chosen": -2.1538851261138916, |
|
"logits/rejected": -2.1492209434509277, |
|
"logps/chosen": -85.61529541015625, |
|
"logps/rejected": -105.35960388183594, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2468538284301758, |
|
"rewards/margins": 1.524840235710144, |
|
"rewards/rejected": -2.7716941833496094, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.758278145695364e-07, |
|
"logits/chosen": -2.220313549041748, |
|
"logits/rejected": -2.270676612854004, |
|
"logps/chosen": -116.72190856933594, |
|
"logps/rejected": -132.87937927246094, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.662656545639038, |
|
"rewards/margins": 1.4583295583724976, |
|
"rewards/rejected": -3.1209864616394043, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7417218543046353e-07, |
|
"logits/chosen": -2.3201870918273926, |
|
"logits/rejected": -2.287921667098999, |
|
"logps/chosen": -119.7146987915039, |
|
"logps/rejected": -135.27894592285156, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.569067358970642, |
|
"rewards/margins": 1.5324440002441406, |
|
"rewards/rejected": -3.1015114784240723, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.725165562913907e-07, |
|
"logits/chosen": -2.2404065132141113, |
|
"logits/rejected": -2.2358851432800293, |
|
"logps/chosen": -134.36831665039062, |
|
"logps/rejected": -137.21890258789062, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.740677833557129, |
|
"rewards/margins": 1.5694725513458252, |
|
"rewards/rejected": -3.310150623321533, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7086092715231786e-07, |
|
"logits/chosen": -2.3186452388763428, |
|
"logits/rejected": -2.2739059925079346, |
|
"logps/chosen": -118.93257141113281, |
|
"logps/rejected": -117.28021240234375, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.951216459274292, |
|
"rewards/margins": 1.0923653841018677, |
|
"rewards/rejected": -3.04358172416687, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6920529801324506e-07, |
|
"logits/chosen": -2.230313777923584, |
|
"logits/rejected": -2.269009828567505, |
|
"logps/chosen": -152.952880859375, |
|
"logps/rejected": -145.59732055664062, |
|
"loss": 0.8317, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.0952677726745605, |
|
"rewards/margins": -0.3405976891517639, |
|
"rewards/rejected": -3.7546706199645996, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.675496688741722e-07, |
|
"logits/chosen": -2.276404857635498, |
|
"logits/rejected": -2.223013162612915, |
|
"logps/chosen": -126.49522399902344, |
|
"logps/rejected": -123.188720703125, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3149405717849731, |
|
"rewards/margins": 0.8885973691940308, |
|
"rewards/rejected": -2.203538179397583, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6589403973509934e-07, |
|
"logits/chosen": -2.1905181407928467, |
|
"logits/rejected": -2.179508924484253, |
|
"logps/chosen": -105.01606750488281, |
|
"logps/rejected": -133.09783935546875, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6002556085586548, |
|
"rewards/margins": 2.0444486141204834, |
|
"rewards/rejected": -2.6447041034698486, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.642384105960265e-07, |
|
"logits/chosen": -2.2174530029296875, |
|
"logits/rejected": -2.250398635864258, |
|
"logps/chosen": -101.74955749511719, |
|
"logps/rejected": -133.28713989257812, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5314977765083313, |
|
"rewards/margins": 1.0308630466461182, |
|
"rewards/rejected": -1.5623606443405151, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.625827814569536e-07, |
|
"logits/chosen": -2.2077157497406006, |
|
"logits/rejected": -2.1879453659057617, |
|
"logps/chosen": -106.83088684082031, |
|
"logps/rejected": -117.6878662109375, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.922218918800354, |
|
"rewards/margins": 1.4254719018936157, |
|
"rewards/rejected": -3.3476905822753906, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6092715231788076e-07, |
|
"logits/chosen": -2.236419677734375, |
|
"logits/rejected": -2.1945042610168457, |
|
"logps/chosen": -112.48036193847656, |
|
"logps/rejected": -125.71522521972656, |
|
"loss": 0.8311, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3717401027679443, |
|
"rewards/margins": 1.444071888923645, |
|
"rewards/rejected": -2.8158118724823, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.5927152317880795e-07, |
|
"logits/chosen": -2.3207204341888428, |
|
"logits/rejected": -2.2542405128479004, |
|
"logps/chosen": -112.21119689941406, |
|
"logps/rejected": -117.85597229003906, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3228784799575806, |
|
"rewards/margins": 0.836434543132782, |
|
"rewards/rejected": -2.159313201904297, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.576158940397351e-07, |
|
"logits/chosen": -2.1415367126464844, |
|
"logits/rejected": -2.173337936401367, |
|
"logps/chosen": -111.6198501586914, |
|
"logps/rejected": -131.82977294921875, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.362797498703003, |
|
"rewards/margins": 0.8809803128242493, |
|
"rewards/rejected": -2.2437777519226074, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5596026490066223e-07, |
|
"logits/chosen": -2.169029951095581, |
|
"logits/rejected": -2.145346164703369, |
|
"logps/chosen": -102.84260559082031, |
|
"logps/rejected": -115.5724105834961, |
|
"loss": 0.778, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.428421974182129, |
|
"rewards/margins": 0.6596145629882812, |
|
"rewards/rejected": -2.08803653717041, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5430463576158937e-07, |
|
"logits/chosen": -2.3364787101745605, |
|
"logits/rejected": -2.2011687755584717, |
|
"logps/chosen": -109.7977066040039, |
|
"logps/rejected": -112.79130554199219, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1568695306777954, |
|
"rewards/margins": 1.5236353874206543, |
|
"rewards/rejected": -2.6805050373077393, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.526490066225165e-07, |
|
"logits/chosen": -2.270590305328369, |
|
"logits/rejected": -2.317115306854248, |
|
"logps/chosen": -131.15716552734375, |
|
"logps/rejected": -127.9232406616211, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5472099781036377, |
|
"rewards/margins": 0.4068627953529358, |
|
"rewards/rejected": -1.9540729522705078, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.509933774834437e-07, |
|
"logits/chosen": -2.258516550064087, |
|
"logits/rejected": -2.215508222579956, |
|
"logps/chosen": -116.99686431884766, |
|
"logps/rejected": -119.55728912353516, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1407909393310547, |
|
"rewards/margins": 1.8011624813079834, |
|
"rewards/rejected": -2.941953659057617, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4933774834437084e-07, |
|
"logits/chosen": -2.301156997680664, |
|
"logits/rejected": -2.282895565032959, |
|
"logps/chosen": -134.67526245117188, |
|
"logps/rejected": -115.00638580322266, |
|
"loss": 0.9478, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.1763339042663574, |
|
"rewards/margins": -0.813465416431427, |
|
"rewards/rejected": -2.362868309020996, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.47682119205298e-07, |
|
"logits/chosen": -2.1597695350646973, |
|
"logits/rejected": -2.2584662437438965, |
|
"logps/chosen": -90.55691528320312, |
|
"logps/rejected": -123.76590728759766, |
|
"loss": 0.408, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2247810363769531, |
|
"rewards/margins": 1.5576727390289307, |
|
"rewards/rejected": -2.782454013824463, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.460264900662252e-07, |
|
"logits/chosen": -2.3151791095733643, |
|
"logits/rejected": -2.3073203563690186, |
|
"logps/chosen": -110.79292297363281, |
|
"logps/rejected": -124.72319030761719, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4885177612304688, |
|
"rewards/margins": 0.9933377504348755, |
|
"rewards/rejected": -2.481855630874634, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.443708609271523e-07, |
|
"logits/chosen": -2.3785767555236816, |
|
"logits/rejected": -2.32625150680542, |
|
"logps/chosen": -135.62266540527344, |
|
"logps/rejected": -147.58059692382812, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6126123666763306, |
|
"rewards/margins": 1.325919508934021, |
|
"rewards/rejected": -2.9385318756103516, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4271523178807946e-07, |
|
"logits/chosen": -2.067755937576294, |
|
"logits/rejected": -2.157957077026367, |
|
"logps/chosen": -100.20467376708984, |
|
"logps/rejected": -139.4010009765625, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7971267700195312, |
|
"rewards/margins": 2.0710580348968506, |
|
"rewards/rejected": -3.8681845664978027, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.410596026490066e-07, |
|
"logits/chosen": -2.26438307762146, |
|
"logits/rejected": -2.1824748516082764, |
|
"logps/chosen": -107.44325256347656, |
|
"logps/rejected": -120.7808609008789, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0230767726898193, |
|
"rewards/margins": 1.2027556896209717, |
|
"rewards/rejected": -2.22583270072937, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.394039735099338e-07, |
|
"logits/chosen": -2.202819585800171, |
|
"logits/rejected": -2.1675939559936523, |
|
"logps/chosen": -118.07059478759766, |
|
"logps/rejected": -127.72599792480469, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6664397716522217, |
|
"rewards/margins": 0.9765909314155579, |
|
"rewards/rejected": -2.6430306434631348, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.377483443708609e-07, |
|
"logits/chosen": -2.2761118412017822, |
|
"logits/rejected": -2.2829902172088623, |
|
"logps/chosen": -125.0960464477539, |
|
"logps/rejected": -151.57437133789062, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6365854740142822, |
|
"rewards/margins": 1.4987179040908813, |
|
"rewards/rejected": -3.135303497314453, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3609271523178807e-07, |
|
"logits/chosen": -2.317108154296875, |
|
"logits/rejected": -2.367867946624756, |
|
"logps/chosen": -110.93936920166016, |
|
"logps/rejected": -124.30006408691406, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5488382577896118, |
|
"rewards/margins": 1.2520344257354736, |
|
"rewards/rejected": -2.800872564315796, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3443708609271524e-07, |
|
"logits/chosen": -2.283686399459839, |
|
"logits/rejected": -2.2000420093536377, |
|
"logps/chosen": -99.95622253417969, |
|
"logps/rejected": -107.75992584228516, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1963920593261719, |
|
"rewards/margins": 1.1246505975723267, |
|
"rewards/rejected": -2.321042537689209, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3278145695364238e-07, |
|
"logits/chosen": -2.3099443912506104, |
|
"logits/rejected": -2.293797016143799, |
|
"logps/chosen": -143.53994750976562, |
|
"logps/rejected": -156.98973083496094, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4224615097045898, |
|
"rewards/margins": 2.5084261894226074, |
|
"rewards/rejected": -3.9308879375457764, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3112582781456952e-07, |
|
"logits/chosen": -2.2922520637512207, |
|
"logits/rejected": -2.3070366382598877, |
|
"logps/chosen": -105.562744140625, |
|
"logps/rejected": -115.12934875488281, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.130995512008667, |
|
"rewards/margins": 1.4186890125274658, |
|
"rewards/rejected": -2.549685001373291, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2947019867549669e-07, |
|
"logits/chosen": -2.32692289352417, |
|
"logits/rejected": -2.284797191619873, |
|
"logps/chosen": -110.09574127197266, |
|
"logps/rejected": -141.3235321044922, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.62274169921875, |
|
"rewards/margins": 2.5739684104919434, |
|
"rewards/rejected": -4.196709632873535, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2781456953642383e-07, |
|
"logits/chosen": -2.2572226524353027, |
|
"logits/rejected": -2.3021931648254395, |
|
"logps/chosen": -103.7448501586914, |
|
"logps/rejected": -124.720947265625, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1002720594406128, |
|
"rewards/margins": 1.6038618087768555, |
|
"rewards/rejected": -2.704134225845337, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2615894039735097e-07, |
|
"logits/chosen": -2.1988630294799805, |
|
"logits/rejected": -2.1840052604675293, |
|
"logps/chosen": -97.76619720458984, |
|
"logps/rejected": -114.77903747558594, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6038196086883545, |
|
"rewards/margins": 1.3313450813293457, |
|
"rewards/rejected": -2.9351646900177, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2450331125827813e-07, |
|
"logits/chosen": -2.2937569618225098, |
|
"logits/rejected": -2.1716551780700684, |
|
"logps/chosen": -122.71870422363281, |
|
"logps/rejected": -126.25750732421875, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7580636739730835, |
|
"rewards/margins": 1.4850653409957886, |
|
"rewards/rejected": -3.243128538131714, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -2.1208481788635254, |
|
"eval_logits/rejected": -2.0760180950164795, |
|
"eval_logps/chosen": -117.771728515625, |
|
"eval_logps/rejected": -125.91642761230469, |
|
"eval_loss": 0.5643959641456604, |
|
"eval_rewards/accuracies": 0.6785714030265808, |
|
"eval_rewards/chosen": -1.7905113697052002, |
|
"eval_rewards/margins": 1.0963507890701294, |
|
"eval_rewards/rejected": -2.886862277984619, |
|
"eval_runtime": 519.2917, |
|
"eval_samples_per_second": 3.439, |
|
"eval_steps_per_second": 0.108, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.228476821192053e-07, |
|
"logits/chosen": -2.2932658195495605, |
|
"logits/rejected": -2.2196624279022217, |
|
"logps/chosen": -121.21055603027344, |
|
"logps/rejected": -114.63111877441406, |
|
"loss": 0.7063, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.775307059288025, |
|
"rewards/margins": 0.6449312567710876, |
|
"rewards/rejected": -2.420238494873047, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2119205298013244e-07, |
|
"logits/chosen": -2.137760877609253, |
|
"logits/rejected": -2.1844496726989746, |
|
"logps/chosen": -138.58255004882812, |
|
"logps/rejected": -127.23612213134766, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9617927074432373, |
|
"rewards/margins": 0.6043619513511658, |
|
"rewards/rejected": -2.5661544799804688, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1953642384105958e-07, |
|
"logits/chosen": -2.2938625812530518, |
|
"logits/rejected": -2.268752336502075, |
|
"logps/chosen": -113.85087585449219, |
|
"logps/rejected": -149.66539001464844, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5664665699005127, |
|
"rewards/margins": 0.8736650347709656, |
|
"rewards/rejected": -2.440131664276123, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1788079470198675e-07, |
|
"logits/chosen": -2.3219776153564453, |
|
"logits/rejected": -2.350645065307617, |
|
"logps/chosen": -94.68901824951172, |
|
"logps/rejected": -106.27494812011719, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4543777704238892, |
|
"rewards/margins": 0.8551589846611023, |
|
"rewards/rejected": -2.309536933898926, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1622516556291389e-07, |
|
"logits/chosen": -2.2941012382507324, |
|
"logits/rejected": -2.2624030113220215, |
|
"logps/chosen": -125.8183822631836, |
|
"logps/rejected": -133.0880584716797, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4116047620773315, |
|
"rewards/margins": 1.2253597974777222, |
|
"rewards/rejected": -2.6369645595550537, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1456953642384105e-07, |
|
"logits/chosen": -2.176222562789917, |
|
"logits/rejected": -2.0717849731445312, |
|
"logps/chosen": -104.74139404296875, |
|
"logps/rejected": -124.4080581665039, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.621763825416565, |
|
"rewards/margins": 1.3861135244369507, |
|
"rewards/rejected": -3.0078773498535156, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1291390728476822e-07, |
|
"logits/chosen": -2.244816541671753, |
|
"logits/rejected": -2.217611074447632, |
|
"logps/chosen": -126.18927001953125, |
|
"logps/rejected": -129.03915405273438, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9465411901474, |
|
"rewards/margins": 0.7713083028793335, |
|
"rewards/rejected": -2.7178492546081543, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1125827814569536e-07, |
|
"logits/chosen": -2.331676721572876, |
|
"logits/rejected": -2.249488353729248, |
|
"logps/chosen": -115.93875885009766, |
|
"logps/rejected": -122.89765930175781, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3803373575210571, |
|
"rewards/margins": 0.6847006678581238, |
|
"rewards/rejected": -2.065037965774536, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.096026490066225e-07, |
|
"logits/chosen": -2.2945587635040283, |
|
"logits/rejected": -2.2709438800811768, |
|
"logps/chosen": -102.5355453491211, |
|
"logps/rejected": -105.96309661865234, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1539552211761475, |
|
"rewards/margins": 1.233269453048706, |
|
"rewards/rejected": -2.3872246742248535, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0794701986754967e-07, |
|
"logits/chosen": -2.260633945465088, |
|
"logits/rejected": -2.257582187652588, |
|
"logps/chosen": -123.7904052734375, |
|
"logps/rejected": -139.2223663330078, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6961309909820557, |
|
"rewards/margins": 1.7095321416854858, |
|
"rewards/rejected": -3.405663251876831, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.062913907284768e-07, |
|
"logits/chosen": -2.4550869464874268, |
|
"logits/rejected": -2.369741439819336, |
|
"logps/chosen": -110.0873031616211, |
|
"logps/rejected": -110.52005767822266, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.5832288265228271, |
|
"rewards/margins": 0.5315386652946472, |
|
"rewards/rejected": -2.114767551422119, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0463576158940397e-07, |
|
"logits/chosen": -2.1035804748535156, |
|
"logits/rejected": -2.0870535373687744, |
|
"logps/chosen": -108.03116607666016, |
|
"logps/rejected": -193.72422790527344, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8296096324920654, |
|
"rewards/margins": 8.629142761230469, |
|
"rewards/rejected": -10.45875072479248, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.029801324503311e-07, |
|
"logits/chosen": -2.3085687160491943, |
|
"logits/rejected": -2.3340985774993896, |
|
"logps/chosen": -117.62290954589844, |
|
"logps/rejected": -126.44264221191406, |
|
"loss": 2.8703, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7038648128509521, |
|
"rewards/margins": 1.1059377193450928, |
|
"rewards/rejected": -2.809802770614624, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0132450331125828e-07, |
|
"logits/chosen": -2.4316937923431396, |
|
"logits/rejected": -2.3887412548065186, |
|
"logps/chosen": -138.1640625, |
|
"logps/rejected": -139.81886291503906, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.448891520500183, |
|
"rewards/margins": 0.9665302038192749, |
|
"rewards/rejected": -2.415421962738037, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9966887417218542e-07, |
|
"logits/chosen": -2.5827786922454834, |
|
"logits/rejected": -2.53794002532959, |
|
"logps/chosen": -126.8006362915039, |
|
"logps/rejected": -126.6136474609375, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2876603603363037, |
|
"rewards/margins": 0.7639477849006653, |
|
"rewards/rejected": -2.051608085632324, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9801324503311256e-07, |
|
"logits/chosen": -2.3453097343444824, |
|
"logits/rejected": -2.4177701473236084, |
|
"logps/chosen": -111.456787109375, |
|
"logps/rejected": -121.5312728881836, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4392328262329102, |
|
"rewards/margins": 1.1764917373657227, |
|
"rewards/rejected": -2.615724563598633, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9635761589403973e-07, |
|
"logits/chosen": -2.2641375064849854, |
|
"logits/rejected": -2.3049521446228027, |
|
"logps/chosen": -95.91242980957031, |
|
"logps/rejected": -111.0653305053711, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.325178861618042, |
|
"rewards/margins": 1.0794451236724854, |
|
"rewards/rejected": -2.4046239852905273, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.947019867549669e-07, |
|
"logits/chosen": -2.3387389183044434, |
|
"logits/rejected": -2.2360782623291016, |
|
"logps/chosen": -100.87395477294922, |
|
"logps/rejected": -111.1401138305664, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7546203136444092, |
|
"rewards/margins": 1.1646721363067627, |
|
"rewards/rejected": -2.919292449951172, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9304635761589403e-07, |
|
"logits/chosen": -2.3461403846740723, |
|
"logits/rejected": -2.3300156593322754, |
|
"logps/chosen": -103.99101257324219, |
|
"logps/rejected": -122.7502212524414, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1400959491729736, |
|
"rewards/margins": 1.375199556350708, |
|
"rewards/rejected": -2.5152957439422607, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.913907284768212e-07, |
|
"logits/chosen": -2.353959560394287, |
|
"logits/rejected": -2.2813894748687744, |
|
"logps/chosen": -110.76973724365234, |
|
"logps/rejected": -133.5187225341797, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2461332082748413, |
|
"rewards/margins": 2.200319766998291, |
|
"rewards/rejected": -3.446453094482422, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8973509933774834e-07, |
|
"logits/chosen": -2.483916759490967, |
|
"logits/rejected": -2.3655548095703125, |
|
"logps/chosen": -108.8669662475586, |
|
"logps/rejected": -116.64085388183594, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9422400593757629, |
|
"rewards/margins": 0.9399551153182983, |
|
"rewards/rejected": -1.8821951150894165, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8807947019867548e-07, |
|
"logits/chosen": -2.3532841205596924, |
|
"logits/rejected": -2.3442747592926025, |
|
"logps/chosen": -99.94935607910156, |
|
"logps/rejected": -115.6842269897461, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1935023069381714, |
|
"rewards/margins": 1.022131323814392, |
|
"rewards/rejected": -2.2156338691711426, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8642384105960262e-07, |
|
"logits/chosen": -2.473654270172119, |
|
"logits/rejected": -2.456444263458252, |
|
"logps/chosen": -128.30955505371094, |
|
"logps/rejected": -135.67520141601562, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0379096269607544, |
|
"rewards/margins": 0.6246587038040161, |
|
"rewards/rejected": -1.6625683307647705, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8476821192052979e-07, |
|
"logits/chosen": -2.4869556427001953, |
|
"logits/rejected": -2.449312686920166, |
|
"logps/chosen": -116.06538391113281, |
|
"logps/rejected": -120.8796615600586, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9571272730827332, |
|
"rewards/margins": 1.021393060684204, |
|
"rewards/rejected": -1.978520154953003, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8311258278145695e-07, |
|
"logits/chosen": -2.4670963287353516, |
|
"logits/rejected": -2.382390260696411, |
|
"logps/chosen": -123.17083740234375, |
|
"logps/rejected": -109.2813949584961, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2968004941940308, |
|
"rewards/margins": 0.7737834453582764, |
|
"rewards/rejected": -2.0705838203430176, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.814569536423841e-07, |
|
"logits/chosen": -2.345423460006714, |
|
"logits/rejected": -2.3300931453704834, |
|
"logps/chosen": -92.75725555419922, |
|
"logps/rejected": -107.68856048583984, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8894661068916321, |
|
"rewards/margins": 1.360033392906189, |
|
"rewards/rejected": -2.249499559402466, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7980132450331126e-07, |
|
"logits/chosen": -2.2602181434631348, |
|
"logits/rejected": -2.1599280834198, |
|
"logps/chosen": -105.10438537597656, |
|
"logps/rejected": -131.7586669921875, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.113909125328064, |
|
"rewards/margins": 1.5821037292480469, |
|
"rewards/rejected": -2.6960129737854004, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.781456953642384e-07, |
|
"logits/chosen": -2.3455591201782227, |
|
"logits/rejected": -2.364595413208008, |
|
"logps/chosen": -130.79019165039062, |
|
"logps/rejected": -147.9118194580078, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7606639862060547, |
|
"rewards/margins": 1.2344610691070557, |
|
"rewards/rejected": -2.9951250553131104, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7649006622516554e-07, |
|
"logits/chosen": -2.167285442352295, |
|
"logits/rejected": -2.247238874435425, |
|
"logps/chosen": -139.6284942626953, |
|
"logps/rejected": -158.96109008789062, |
|
"loss": 0.7002, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.767961263656616, |
|
"rewards/margins": 1.04625403881073, |
|
"rewards/rejected": -3.8142154216766357, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.748344370860927e-07, |
|
"logits/chosen": -2.3983192443847656, |
|
"logits/rejected": -2.4019296169281006, |
|
"logps/chosen": -115.08357238769531, |
|
"logps/rejected": -129.3126220703125, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6475099325180054, |
|
"rewards/margins": 1.2640306949615479, |
|
"rewards/rejected": -2.9115407466888428, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7317880794701987e-07, |
|
"logits/chosen": -2.3797879219055176, |
|
"logits/rejected": -2.324965715408325, |
|
"logps/chosen": -116.2055435180664, |
|
"logps/rejected": -131.01705932617188, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.186680555343628, |
|
"rewards/margins": 1.4000349044799805, |
|
"rewards/rejected": -2.5867154598236084, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.71523178807947e-07, |
|
"logits/chosen": -2.227961778640747, |
|
"logits/rejected": -2.197960615158081, |
|
"logps/chosen": -106.0052490234375, |
|
"logps/rejected": -126.49810791015625, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.7543351650238037, |
|
"rewards/margins": 1.1439127922058105, |
|
"rewards/rejected": -2.8982481956481934, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6986754966887418e-07, |
|
"logits/chosen": -2.34653902053833, |
|
"logits/rejected": -2.361428737640381, |
|
"logps/chosen": -93.7066879272461, |
|
"logps/rejected": -115.24361419677734, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.030475378036499, |
|
"rewards/margins": 1.2473831176757812, |
|
"rewards/rejected": -2.2778584957122803, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6821192052980132e-07, |
|
"logits/chosen": -2.3806099891662598, |
|
"logits/rejected": -2.404531955718994, |
|
"logps/chosen": -108.77107238769531, |
|
"logps/rejected": -124.2442398071289, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4443012475967407, |
|
"rewards/margins": 0.9878839254379272, |
|
"rewards/rejected": -2.432185411453247, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6655629139072846e-07, |
|
"logits/chosen": -2.447935104370117, |
|
"logits/rejected": -2.4319026470184326, |
|
"logps/chosen": -116.2603988647461, |
|
"logps/rejected": -123.78592681884766, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3556338548660278, |
|
"rewards/margins": 1.9813722372055054, |
|
"rewards/rejected": -3.337006092071533, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.649006622516556e-07, |
|
"logits/chosen": -2.3291103839874268, |
|
"logits/rejected": -2.3166141510009766, |
|
"logps/chosen": -111.0199203491211, |
|
"logps/rejected": -117.00514221191406, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5363482236862183, |
|
"rewards/margins": 0.870397686958313, |
|
"rewards/rejected": -2.4067459106445312, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.632450331125828e-07, |
|
"logits/chosen": -2.2856314182281494, |
|
"logits/rejected": -2.264632225036621, |
|
"logps/chosen": -102.96492004394531, |
|
"logps/rejected": -126.04007720947266, |
|
"loss": 0.456, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1887879371643066, |
|
"rewards/margins": 1.522206425666809, |
|
"rewards/rejected": -2.7109944820404053, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6158940397350993e-07, |
|
"logits/chosen": -2.2702925205230713, |
|
"logits/rejected": -2.237971782684326, |
|
"logps/chosen": -99.81072998046875, |
|
"logps/rejected": -124.4561996459961, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.322842001914978, |
|
"rewards/margins": 1.6832103729248047, |
|
"rewards/rejected": -3.0060524940490723, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5993377483443707e-07, |
|
"logits/chosen": -2.265434741973877, |
|
"logits/rejected": -2.2928626537323, |
|
"logps/chosen": -83.43587493896484, |
|
"logps/rejected": -114.23832702636719, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8695703744888306, |
|
"rewards/margins": 2.3257930278778076, |
|
"rewards/rejected": -3.1953632831573486, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5827814569536424e-07, |
|
"logits/chosen": -2.575456142425537, |
|
"logits/rejected": -2.395871162414551, |
|
"logps/chosen": -105.5760498046875, |
|
"logps/rejected": -97.21208190917969, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1773121356964111, |
|
"rewards/margins": 0.5559796690940857, |
|
"rewards/rejected": -1.7332916259765625, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5662251655629138e-07, |
|
"logits/chosen": -2.4753453731536865, |
|
"logits/rejected": -2.414577007293701, |
|
"logps/chosen": -143.94302368164062, |
|
"logps/rejected": -137.99838256835938, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1706478595733643, |
|
"rewards/margins": 1.126072883605957, |
|
"rewards/rejected": -2.2967207431793213, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5496688741721852e-07, |
|
"logits/chosen": -2.412086009979248, |
|
"logits/rejected": -2.3731260299682617, |
|
"logps/chosen": -106.2443618774414, |
|
"logps/rejected": -112.93099212646484, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2554187774658203, |
|
"rewards/margins": 1.3224613666534424, |
|
"rewards/rejected": -2.5778801441192627, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.533112582781457e-07, |
|
"logits/chosen": -2.2778310775756836, |
|
"logits/rejected": -2.256308078765869, |
|
"logps/chosen": -120.01104736328125, |
|
"logps/rejected": -123.55589294433594, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.085076093673706, |
|
"rewards/margins": 1.26901376247406, |
|
"rewards/rejected": -2.3540899753570557, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5165562913907285e-07, |
|
"logits/chosen": -2.393749475479126, |
|
"logits/rejected": -2.3263931274414062, |
|
"logps/chosen": -117.22212219238281, |
|
"logps/rejected": -138.7658233642578, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8770434856414795, |
|
"rewards/margins": 1.216582179069519, |
|
"rewards/rejected": -3.0936264991760254, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -2.4931600093841553, |
|
"logits/rejected": -2.4225075244903564, |
|
"logps/chosen": -125.9542236328125, |
|
"logps/rejected": -141.84219360351562, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3538328409194946, |
|
"rewards/margins": 1.1864392757415771, |
|
"rewards/rejected": -2.5402722358703613, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4834437086092716e-07, |
|
"logits/chosen": -2.3211989402770996, |
|
"logits/rejected": -2.3927392959594727, |
|
"logps/chosen": -94.21218872070312, |
|
"logps/rejected": -111.05567932128906, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.555248737335205, |
|
"rewards/margins": 0.7776703834533691, |
|
"rewards/rejected": -2.332918882369995, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.466887417218543e-07, |
|
"logits/chosen": -2.4537739753723145, |
|
"logits/rejected": -2.3887171745300293, |
|
"logps/chosen": -104.2787857055664, |
|
"logps/rejected": -113.91764831542969, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.352063775062561, |
|
"rewards/margins": 0.9023284912109375, |
|
"rewards/rejected": -2.254392147064209, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4503311258278144e-07, |
|
"logits/chosen": -2.4015471935272217, |
|
"logits/rejected": -2.42402720451355, |
|
"logps/chosen": -114.39097595214844, |
|
"logps/rejected": -132.64340209960938, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2820857763290405, |
|
"rewards/margins": 1.0415351390838623, |
|
"rewards/rejected": -2.323620557785034, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4337748344370858e-07, |
|
"logits/chosen": -2.355255126953125, |
|
"logits/rejected": -2.277355909347534, |
|
"logps/chosen": -102.35648345947266, |
|
"logps/rejected": -109.78977966308594, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6282755136489868, |
|
"rewards/margins": 1.255506992340088, |
|
"rewards/rejected": -2.8837826251983643, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4172185430463577e-07, |
|
"logits/chosen": -2.5044684410095215, |
|
"logits/rejected": -2.3574650287628174, |
|
"logps/chosen": -130.39955139160156, |
|
"logps/rejected": -128.08071899414062, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2382522821426392, |
|
"rewards/margins": 1.3744902610778809, |
|
"rewards/rejected": -2.6127424240112305, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/chosen": -2.261568784713745, |
|
"eval_logits/rejected": -2.2171857357025146, |
|
"eval_logps/chosen": -113.09461975097656, |
|
"eval_logps/rejected": -122.51795959472656, |
|
"eval_loss": 0.5183302164077759, |
|
"eval_rewards/accuracies": 0.703125, |
|
"eval_rewards/chosen": -1.3228007555007935, |
|
"eval_rewards/margins": 1.2242145538330078, |
|
"eval_rewards/rejected": -2.54701566696167, |
|
"eval_runtime": 523.6034, |
|
"eval_samples_per_second": 3.411, |
|
"eval_steps_per_second": 0.107, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4006622516556291e-07, |
|
"logits/chosen": -2.4229695796966553, |
|
"logits/rejected": -2.3659071922302246, |
|
"logps/chosen": -97.41563415527344, |
|
"logps/rejected": -107.28167724609375, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0089452266693115, |
|
"rewards/margins": 1.1100685596466064, |
|
"rewards/rejected": -2.119013786315918, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3841059602649005e-07, |
|
"logits/chosen": -2.368020534515381, |
|
"logits/rejected": -2.266580820083618, |
|
"logps/chosen": -107.78886413574219, |
|
"logps/rejected": -124.20140075683594, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1261156797409058, |
|
"rewards/margins": 1.54592764377594, |
|
"rewards/rejected": -2.6720430850982666, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3675496688741722e-07, |
|
"logits/chosen": -2.3915557861328125, |
|
"logits/rejected": -2.3538260459899902, |
|
"logps/chosen": -96.66950988769531, |
|
"logps/rejected": -107.39601135253906, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2402719259262085, |
|
"rewards/margins": 1.5004689693450928, |
|
"rewards/rejected": -2.7407407760620117, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3509933774834436e-07, |
|
"logits/chosen": -2.3380367755889893, |
|
"logits/rejected": -2.2895896434783936, |
|
"logps/chosen": -122.12117767333984, |
|
"logps/rejected": -122.6964111328125, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7105709314346313, |
|
"rewards/margins": 1.117949366569519, |
|
"rewards/rejected": -2.8285202980041504, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.334437086092715e-07, |
|
"logits/chosen": -2.650242567062378, |
|
"logits/rejected": -2.575338840484619, |
|
"logps/chosen": -116.67132568359375, |
|
"logps/rejected": -121.65495300292969, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0724276304244995, |
|
"rewards/margins": 0.7939808964729309, |
|
"rewards/rejected": -1.8664085865020752, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.317880794701987e-07, |
|
"logits/chosen": -2.4393889904022217, |
|
"logits/rejected": -2.356849431991577, |
|
"logps/chosen": -108.2217788696289, |
|
"logps/rejected": -105.802734375, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9382781982421875, |
|
"rewards/margins": 0.9701493978500366, |
|
"rewards/rejected": -1.9084275960922241, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3013245033112583e-07, |
|
"logits/chosen": -2.27262544631958, |
|
"logits/rejected": -2.2650160789489746, |
|
"logps/chosen": -82.49347686767578, |
|
"logps/rejected": -105.01361083984375, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5115716457366943, |
|
"rewards/margins": 1.9998562335968018, |
|
"rewards/rejected": -2.511428117752075, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2847682119205297e-07, |
|
"logits/chosen": -2.3641304969787598, |
|
"logits/rejected": -2.400428533554077, |
|
"logps/chosen": -95.62802124023438, |
|
"logps/rejected": -105.62736511230469, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2022227048873901, |
|
"rewards/margins": 1.1635633707046509, |
|
"rewards/rejected": -2.365786075592041, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2682119205298011e-07, |
|
"logits/chosen": -2.2362232208251953, |
|
"logits/rejected": -2.294517993927002, |
|
"logps/chosen": -111.7828140258789, |
|
"logps/rejected": -107.35648345947266, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3542122840881348, |
|
"rewards/margins": 0.9707919359207153, |
|
"rewards/rejected": -2.3250043392181396, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2516556291390728e-07, |
|
"logits/chosen": -2.4351532459259033, |
|
"logits/rejected": -2.3938307762145996, |
|
"logps/chosen": -116.37557220458984, |
|
"logps/rejected": -142.02877807617188, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.059444546699524, |
|
"rewards/margins": 1.6750872135162354, |
|
"rewards/rejected": -2.734531879425049, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2350993377483442e-07, |
|
"logits/chosen": -2.130566358566284, |
|
"logits/rejected": -2.1427571773529053, |
|
"logps/chosen": -98.26994323730469, |
|
"logps/rejected": -125.13362121582031, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1679749488830566, |
|
"rewards/margins": 2.0151760578155518, |
|
"rewards/rejected": -4.183150768280029, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.218543046357616e-07, |
|
"logits/chosen": -2.3847343921661377, |
|
"logits/rejected": -2.3289005756378174, |
|
"logps/chosen": -103.18563079833984, |
|
"logps/rejected": -106.60140228271484, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5415022373199463, |
|
"rewards/margins": 1.246010184288025, |
|
"rewards/rejected": -2.7875125408172607, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2019867549668873e-07, |
|
"logits/chosen": -2.344989776611328, |
|
"logits/rejected": -2.2486376762390137, |
|
"logps/chosen": -111.1012954711914, |
|
"logps/rejected": -114.76014709472656, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4294898509979248, |
|
"rewards/margins": 1.2511804103851318, |
|
"rewards/rejected": -2.6806702613830566, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.185430463576159e-07, |
|
"logits/chosen": -2.342101573944092, |
|
"logits/rejected": -2.3254072666168213, |
|
"logps/chosen": -114.9495620727539, |
|
"logps/rejected": -122.08809661865234, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4265601634979248, |
|
"rewards/margins": 1.6178033351898193, |
|
"rewards/rejected": -3.0443637371063232, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1688741721854305e-07, |
|
"logits/chosen": -2.329153537750244, |
|
"logits/rejected": -2.2368149757385254, |
|
"logps/chosen": -123.8796157836914, |
|
"logps/rejected": -119.62074279785156, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5358905792236328, |
|
"rewards/margins": 1.2361419200897217, |
|
"rewards/rejected": -2.7720324993133545, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1523178807947019e-07, |
|
"logits/chosen": -2.4591078758239746, |
|
"logits/rejected": -2.454157829284668, |
|
"logps/chosen": -116.4410629272461, |
|
"logps/rejected": -129.07809448242188, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5216033458709717, |
|
"rewards/margins": 1.3263527154922485, |
|
"rewards/rejected": -2.8479561805725098, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1357615894039735e-07, |
|
"logits/chosen": -2.287152051925659, |
|
"logits/rejected": -2.2752058506011963, |
|
"logps/chosen": -128.70211791992188, |
|
"logps/rejected": -141.4760284423828, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9110784530639648, |
|
"rewards/margins": 1.6628735065460205, |
|
"rewards/rejected": -3.5739517211914062, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.119205298013245e-07, |
|
"logits/chosen": -2.3498637676239014, |
|
"logits/rejected": -2.3876309394836426, |
|
"logps/chosen": -121.30989074707031, |
|
"logps/rejected": -129.5779571533203, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.256763219833374, |
|
"rewards/margins": 1.8756353855133057, |
|
"rewards/rejected": -3.1323981285095215, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1026490066225165e-07, |
|
"logits/chosen": -2.2559609413146973, |
|
"logits/rejected": -2.2900869846343994, |
|
"logps/chosen": -89.24148559570312, |
|
"logps/rejected": -104.9818115234375, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5417016744613647, |
|
"rewards/margins": 1.3289363384246826, |
|
"rewards/rejected": -2.870638132095337, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0860927152317881e-07, |
|
"logits/chosen": -2.3323373794555664, |
|
"logits/rejected": -2.4132628440856934, |
|
"logps/chosen": -112.27374267578125, |
|
"logps/rejected": -132.1646728515625, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.039150595664978, |
|
"rewards/margins": 1.5584369897842407, |
|
"rewards/rejected": -2.597587823867798, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0695364238410595e-07, |
|
"logits/chosen": -2.2826695442199707, |
|
"logits/rejected": -2.232888698577881, |
|
"logps/chosen": -107.91890716552734, |
|
"logps/rejected": -114.87126159667969, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3879740238189697, |
|
"rewards/margins": 1.4284284114837646, |
|
"rewards/rejected": -2.8164026737213135, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0529801324503311e-07, |
|
"logits/chosen": -2.433330535888672, |
|
"logits/rejected": -2.3720269203186035, |
|
"logps/chosen": -122.9089584350586, |
|
"logps/rejected": -130.165771484375, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7988684177398682, |
|
"rewards/margins": 1.0453321933746338, |
|
"rewards/rejected": -2.844200611114502, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0364238410596025e-07, |
|
"logits/chosen": -2.432610511779785, |
|
"logits/rejected": -2.3609492778778076, |
|
"logps/chosen": -126.18135070800781, |
|
"logps/rejected": -137.95114135742188, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1943086385726929, |
|
"rewards/margins": 1.0217430591583252, |
|
"rewards/rejected": -2.2160518169403076, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0198675496688741e-07, |
|
"logits/chosen": -2.40020489692688, |
|
"logits/rejected": -2.333512783050537, |
|
"logps/chosen": -120.67720794677734, |
|
"logps/rejected": -123.46641540527344, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2497332096099854, |
|
"rewards/margins": 1.2155460119247437, |
|
"rewards/rejected": -2.4652791023254395, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0033112582781457e-07, |
|
"logits/chosen": -2.4432952404022217, |
|
"logits/rejected": -2.3959970474243164, |
|
"logps/chosen": -131.6014862060547, |
|
"logps/rejected": -145.7483673095703, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9097809791564941, |
|
"rewards/margins": 1.4488131999969482, |
|
"rewards/rejected": -2.3585941791534424, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.867549668874171e-08, |
|
"logits/chosen": -2.2430427074432373, |
|
"logits/rejected": -2.2248117923736572, |
|
"logps/chosen": -99.05213928222656, |
|
"logps/rejected": -118.5693130493164, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.898768424987793, |
|
"rewards/margins": 1.2684627771377563, |
|
"rewards/rejected": -2.1672310829162598, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.701986754966887e-08, |
|
"logits/chosen": -2.4450364112854004, |
|
"logits/rejected": -2.3705830574035645, |
|
"logps/chosen": -107.76090240478516, |
|
"logps/rejected": -112.4260482788086, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4488307237625122, |
|
"rewards/margins": 0.7578933835029602, |
|
"rewards/rejected": -2.206723928451538, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.536423841059603e-08, |
|
"logits/chosen": -2.4003443717956543, |
|
"logits/rejected": -2.348435878753662, |
|
"logps/chosen": -98.28638458251953, |
|
"logps/rejected": -100.79689025878906, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.41732919216156, |
|
"rewards/margins": 1.0273702144622803, |
|
"rewards/rejected": -2.444699764251709, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.370860927152317e-08, |
|
"logits/chosen": -2.3565890789031982, |
|
"logits/rejected": -2.3134591579437256, |
|
"logps/chosen": -122.64701080322266, |
|
"logps/rejected": -140.7588348388672, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.8713737726211548, |
|
"rewards/margins": 0.9109483957290649, |
|
"rewards/rejected": -2.7823221683502197, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.205298013245033e-08, |
|
"logits/chosen": -2.4065792560577393, |
|
"logits/rejected": -2.343113422393799, |
|
"logps/chosen": -113.3506088256836, |
|
"logps/rejected": -118.6836166381836, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.640981912612915, |
|
"rewards/margins": 1.4465951919555664, |
|
"rewards/rejected": -3.0875768661499023, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.039735099337747e-08, |
|
"logits/chosen": -2.280989170074463, |
|
"logits/rejected": -2.2906501293182373, |
|
"logps/chosen": -108.36322021484375, |
|
"logps/rejected": -118.99311828613281, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3466839790344238, |
|
"rewards/margins": 1.234621524810791, |
|
"rewards/rejected": -2.581305503845215, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.874172185430463e-08, |
|
"logits/chosen": -2.3098435401916504, |
|
"logits/rejected": -2.365722179412842, |
|
"logps/chosen": -142.2515411376953, |
|
"logps/rejected": -136.40847778320312, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9295507669448853, |
|
"rewards/margins": 0.7886122465133667, |
|
"rewards/rejected": -2.718163013458252, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.70860927152318e-08, |
|
"logits/chosen": -2.4758474826812744, |
|
"logits/rejected": -2.4529106616973877, |
|
"logps/chosen": -102.67512512207031, |
|
"logps/rejected": -108.22530364990234, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4004428386688232, |
|
"rewards/margins": 0.7858734130859375, |
|
"rewards/rejected": -2.1863162517547607, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.543046357615893e-08, |
|
"logits/chosen": -2.4003779888153076, |
|
"logits/rejected": -2.3763396739959717, |
|
"logps/chosen": -104.71977233886719, |
|
"logps/rejected": -117.16717529296875, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1805320978164673, |
|
"rewards/margins": 1.6279674768447876, |
|
"rewards/rejected": -2.808499336242676, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.377483443708609e-08, |
|
"logits/chosen": -2.4015908241271973, |
|
"logits/rejected": -2.3405182361602783, |
|
"logps/chosen": -117.36273193359375, |
|
"logps/rejected": -124.22319030761719, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3869013786315918, |
|
"rewards/margins": 0.9574357867240906, |
|
"rewards/rejected": -2.344337224960327, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.211920529801324e-08, |
|
"logits/chosen": -2.4349982738494873, |
|
"logits/rejected": -2.4097352027893066, |
|
"logps/chosen": -125.55684661865234, |
|
"logps/rejected": -132.021484375, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4107874631881714, |
|
"rewards/margins": 0.8303612470626831, |
|
"rewards/rejected": -2.2411487102508545, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.04635761589404e-08, |
|
"logits/chosen": -2.265141248703003, |
|
"logits/rejected": -2.169220447540283, |
|
"logps/chosen": -102.09349060058594, |
|
"logps/rejected": -119.7810287475586, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4435564279556274, |
|
"rewards/margins": 1.294883131980896, |
|
"rewards/rejected": -2.7384393215179443, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.880794701986755e-08, |
|
"logits/chosen": -2.4385974407196045, |
|
"logits/rejected": -2.3579273223876953, |
|
"logps/chosen": -93.9774169921875, |
|
"logps/rejected": -96.58930969238281, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5919442772865295, |
|
"rewards/margins": 0.9432849884033203, |
|
"rewards/rejected": -1.5352293252944946, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.71523178807947e-08, |
|
"logits/chosen": -2.4252941608428955, |
|
"logits/rejected": -2.308663845062256, |
|
"logps/chosen": -139.50485229492188, |
|
"logps/rejected": -134.99417114257812, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1931045055389404, |
|
"rewards/margins": 1.3365012407302856, |
|
"rewards/rejected": -2.5296058654785156, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.549668874172185e-08, |
|
"logits/chosen": -2.3252806663513184, |
|
"logits/rejected": -2.2149767875671387, |
|
"logps/chosen": -119.28135681152344, |
|
"logps/rejected": -126.89034271240234, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3043967485427856, |
|
"rewards/margins": 1.297178030014038, |
|
"rewards/rejected": -2.601574420928955, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.3841059602649e-08, |
|
"logits/chosen": -2.4337799549102783, |
|
"logits/rejected": -2.408616065979004, |
|
"logps/chosen": -105.0708236694336, |
|
"logps/rejected": -112.90872955322266, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.739833652973175, |
|
"rewards/margins": 1.0932036638259888, |
|
"rewards/rejected": -1.8330373764038086, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.218543046357616e-08, |
|
"logits/chosen": -2.474499225616455, |
|
"logits/rejected": -2.3793933391571045, |
|
"logps/chosen": -115.8188247680664, |
|
"logps/rejected": -119.8792953491211, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7591004371643066, |
|
"rewards/margins": 1.390928864479065, |
|
"rewards/rejected": -2.150029182434082, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.052980132450331e-08, |
|
"logits/chosen": -2.342878580093384, |
|
"logits/rejected": -2.2635059356689453, |
|
"logps/chosen": -112.3121566772461, |
|
"logps/rejected": -118.00971984863281, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1263339519500732, |
|
"rewards/margins": 0.7218903303146362, |
|
"rewards/rejected": -1.848224401473999, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.887417218543045e-08, |
|
"logits/chosen": -2.4378771781921387, |
|
"logits/rejected": -2.493478775024414, |
|
"logps/chosen": -101.32011413574219, |
|
"logps/rejected": -126.55435943603516, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0950850248336792, |
|
"rewards/margins": 1.0815422534942627, |
|
"rewards/rejected": -2.1766273975372314, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.721854304635762e-08, |
|
"logits/chosen": -2.395272731781006, |
|
"logits/rejected": -2.352908134460449, |
|
"logps/chosen": -115.22686767578125, |
|
"logps/rejected": -114.85673522949219, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.910413384437561, |
|
"rewards/margins": 0.9333620071411133, |
|
"rewards/rejected": -1.8437751531600952, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.556291390728476e-08, |
|
"logits/chosen": -2.4603307247161865, |
|
"logits/rejected": -2.4367270469665527, |
|
"logps/chosen": -111.51399993896484, |
|
"logps/rejected": -120.80682373046875, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1758167743682861, |
|
"rewards/margins": 0.8748563528060913, |
|
"rewards/rejected": -2.050673007965088, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.390728476821191e-08, |
|
"logits/chosen": -2.3244917392730713, |
|
"logits/rejected": -2.253732919692993, |
|
"logps/chosen": -108.8800277709961, |
|
"logps/rejected": -125.33662414550781, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7894026637077332, |
|
"rewards/margins": 1.8209375143051147, |
|
"rewards/rejected": -2.610340118408203, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.225165562913907e-08, |
|
"logits/chosen": -2.387305974960327, |
|
"logits/rejected": -2.387345552444458, |
|
"logps/chosen": -107.43021392822266, |
|
"logps/rejected": -118.97044372558594, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9636829495429993, |
|
"rewards/margins": 0.8673983812332153, |
|
"rewards/rejected": -1.8310810327529907, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.059602649006622e-08, |
|
"logits/chosen": -2.3770089149475098, |
|
"logits/rejected": -2.371371269226074, |
|
"logps/chosen": -123.25062561035156, |
|
"logps/rejected": -140.9857635498047, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.802879810333252, |
|
"rewards/margins": 1.1967840194702148, |
|
"rewards/rejected": -1.9996639490127563, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.8940397350993375e-08, |
|
"logits/chosen": -2.3844501972198486, |
|
"logits/rejected": -2.415923595428467, |
|
"logps/chosen": -96.17528533935547, |
|
"logps/rejected": -111.2402114868164, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7955904006958008, |
|
"rewards/margins": 1.3297996520996094, |
|
"rewards/rejected": -2.125389814376831, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -2.2567203044891357, |
|
"eval_logits/rejected": -2.214937925338745, |
|
"eval_logps/chosen": -110.69182586669922, |
|
"eval_logps/rejected": -120.59849548339844, |
|
"eval_loss": 0.5078982710838318, |
|
"eval_rewards/accuracies": 0.7120535969734192, |
|
"eval_rewards/chosen": -1.0825201272964478, |
|
"eval_rewards/margins": 1.2725489139556885, |
|
"eval_rewards/rejected": -2.3550689220428467, |
|
"eval_runtime": 502.7018, |
|
"eval_samples_per_second": 3.553, |
|
"eval_steps_per_second": 0.111, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.728476821192053e-08, |
|
"logits/chosen": -2.398317575454712, |
|
"logits/rejected": -2.4122400283813477, |
|
"logps/chosen": -93.20875549316406, |
|
"logps/rejected": -113.88653564453125, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8295547366142273, |
|
"rewards/margins": 1.022578239440918, |
|
"rewards/rejected": -1.852132797241211, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.5629139072847675e-08, |
|
"logits/chosen": -2.414301633834839, |
|
"logits/rejected": -2.3872337341308594, |
|
"logps/chosen": -129.2257080078125, |
|
"logps/rejected": -136.29031372070312, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3618860244750977, |
|
"rewards/margins": 1.9172807931900024, |
|
"rewards/rejected": -3.2791664600372314, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.397350993377483e-08, |
|
"logits/chosen": -2.446453809738159, |
|
"logits/rejected": -2.384152889251709, |
|
"logps/chosen": -120.69456481933594, |
|
"logps/rejected": -128.5080108642578, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.820598304271698, |
|
"rewards/margins": 1.557586908340454, |
|
"rewards/rejected": -2.3781850337982178, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.231788079470199e-08, |
|
"logits/chosen": -2.416982889175415, |
|
"logits/rejected": -2.296403646469116, |
|
"logps/chosen": -110.80255126953125, |
|
"logps/rejected": -113.04368591308594, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1277140378952026, |
|
"rewards/margins": 1.060675859451294, |
|
"rewards/rejected": -2.188389778137207, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.0662251655629135e-08, |
|
"logits/chosen": -2.355494976043701, |
|
"logits/rejected": -2.2958462238311768, |
|
"logps/chosen": -113.16410064697266, |
|
"logps/rejected": -119.9725112915039, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6746724247932434, |
|
"rewards/margins": 1.8009824752807617, |
|
"rewards/rejected": -2.4756548404693604, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.900662251655629e-08, |
|
"logits/chosen": -2.4485743045806885, |
|
"logits/rejected": -2.426466703414917, |
|
"logps/chosen": -110.64210510253906, |
|
"logps/rejected": -122.92867279052734, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9479681849479675, |
|
"rewards/margins": 1.6344906091690063, |
|
"rewards/rejected": -2.582458972930908, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.735099337748344e-08, |
|
"logits/chosen": -2.279062509536743, |
|
"logits/rejected": -2.2378296852111816, |
|
"logps/chosen": -117.4856185913086, |
|
"logps/rejected": -126.33473205566406, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9676671028137207, |
|
"rewards/margins": 1.4139858484268188, |
|
"rewards/rejected": -2.381652593612671, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5695364238410595e-08, |
|
"logits/chosen": -2.27183198928833, |
|
"logits/rejected": -2.2195851802825928, |
|
"logps/chosen": -99.91886138916016, |
|
"logps/rejected": -139.50657653808594, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.060943603515625, |
|
"rewards/margins": 2.9951958656311035, |
|
"rewards/rejected": -4.056139945983887, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.403973509933775e-08, |
|
"logits/chosen": -2.413677215576172, |
|
"logits/rejected": -2.440647602081299, |
|
"logps/chosen": -118.7281723022461, |
|
"logps/rejected": -134.04771423339844, |
|
"loss": 0.5028, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.119231939315796, |
|
"rewards/margins": 1.4490314722061157, |
|
"rewards/rejected": -2.568263530731201, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.23841059602649e-08, |
|
"logits/chosen": -2.3565783500671387, |
|
"logits/rejected": -2.4461493492126465, |
|
"logps/chosen": -108.08616638183594, |
|
"logps/rejected": -132.34011840820312, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1898248195648193, |
|
"rewards/margins": 1.3966195583343506, |
|
"rewards/rejected": -2.58644437789917, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.072847682119205e-08, |
|
"logits/chosen": -2.396179437637329, |
|
"logits/rejected": -2.4256176948547363, |
|
"logps/chosen": -96.67437744140625, |
|
"logps/rejected": -101.86246490478516, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4870302081108093, |
|
"rewards/margins": 1.127990484237671, |
|
"rewards/rejected": -1.615020751953125, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.90728476821192e-08, |
|
"logits/chosen": -2.3725028038024902, |
|
"logits/rejected": -2.322782039642334, |
|
"logps/chosen": -128.52896118164062, |
|
"logps/rejected": -129.73118591308594, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3587214946746826, |
|
"rewards/margins": 1.5958476066589355, |
|
"rewards/rejected": -2.954568862915039, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7417218543046355e-08, |
|
"logits/chosen": -2.378821611404419, |
|
"logits/rejected": -2.277832269668579, |
|
"logps/chosen": -87.0296630859375, |
|
"logps/rejected": -106.12138366699219, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8327251672744751, |
|
"rewards/margins": 1.2228658199310303, |
|
"rewards/rejected": -2.055591106414795, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.576158940397351e-08, |
|
"logits/chosen": -2.549872398376465, |
|
"logits/rejected": -2.4757115840911865, |
|
"logps/chosen": -114.14369201660156, |
|
"logps/rejected": -116.66259765625, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9791749119758606, |
|
"rewards/margins": 0.8427003026008606, |
|
"rewards/rejected": -1.821874976158142, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.410596026490066e-08, |
|
"logits/chosen": -2.433527708053589, |
|
"logits/rejected": -2.371525764465332, |
|
"logps/chosen": -103.0054931640625, |
|
"logps/rejected": -103.31925964355469, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8271923065185547, |
|
"rewards/margins": 1.119652509689331, |
|
"rewards/rejected": -1.9468450546264648, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.245033112582781e-08, |
|
"logits/chosen": -2.337153434753418, |
|
"logits/rejected": -2.2308475971221924, |
|
"logps/chosen": -129.55728149414062, |
|
"logps/rejected": -122.7024917602539, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9213937520980835, |
|
"rewards/margins": 1.0446635484695435, |
|
"rewards/rejected": -1.9660571813583374, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.079470198675496e-08, |
|
"logits/chosen": -2.2858211994171143, |
|
"logits/rejected": -2.313380002975464, |
|
"logps/chosen": -107.20402526855469, |
|
"logps/rejected": -136.98562622070312, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9993183016777039, |
|
"rewards/margins": 1.509690284729004, |
|
"rewards/rejected": -2.5090086460113525, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.913907284768212e-08, |
|
"logits/chosen": -2.3693079948425293, |
|
"logits/rejected": -2.284874677658081, |
|
"logps/chosen": -106.6112289428711, |
|
"logps/rejected": -126.05074310302734, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7449665665626526, |
|
"rewards/margins": 1.826768159866333, |
|
"rewards/rejected": -2.571734666824341, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.748344370860927e-08, |
|
"logits/chosen": -2.2911553382873535, |
|
"logits/rejected": -2.380384922027588, |
|
"logps/chosen": -102.5718765258789, |
|
"logps/rejected": -124.40003967285156, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7534275650978088, |
|
"rewards/margins": 1.1661580801010132, |
|
"rewards/rejected": -1.9195858240127563, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.5827814569536422e-08, |
|
"logits/chosen": -2.4230473041534424, |
|
"logits/rejected": -2.4315543174743652, |
|
"logps/chosen": -117.46553802490234, |
|
"logps/rejected": -130.05776977539062, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8374800682067871, |
|
"rewards/margins": 1.2378642559051514, |
|
"rewards/rejected": -2.0753445625305176, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4172185430463576e-08, |
|
"logits/chosen": -2.417757034301758, |
|
"logits/rejected": -2.2985901832580566, |
|
"logps/chosen": -132.27774047851562, |
|
"logps/rejected": -133.81459045410156, |
|
"loss": 0.5058, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.378666877746582, |
|
"rewards/margins": 0.97132807970047, |
|
"rewards/rejected": -2.3499951362609863, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2516556291390726e-08, |
|
"logits/chosen": -2.327725887298584, |
|
"logits/rejected": -2.290168046951294, |
|
"logps/chosen": -118.74835205078125, |
|
"logps/rejected": -132.76882934570312, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3064758777618408, |
|
"rewards/margins": 1.0742686986923218, |
|
"rewards/rejected": -2.380744457244873, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0860927152317882e-08, |
|
"logits/chosen": -2.3731508255004883, |
|
"logits/rejected": -2.367323398590088, |
|
"logps/chosen": -126.88232421875, |
|
"logps/rejected": -135.72384643554688, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.890730082988739, |
|
"rewards/margins": 1.7571513652801514, |
|
"rewards/rejected": -2.6478817462921143, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.9205298013245032e-08, |
|
"logits/chosen": -2.4219555854797363, |
|
"logits/rejected": -2.4555513858795166, |
|
"logps/chosen": -96.6889419555664, |
|
"logps/rejected": -114.50843811035156, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1261937618255615, |
|
"rewards/margins": 1.0691124200820923, |
|
"rewards/rejected": -2.1953060626983643, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7549668874172186e-08, |
|
"logits/chosen": -2.3101606369018555, |
|
"logits/rejected": -2.3013217449188232, |
|
"logps/chosen": -95.89967346191406, |
|
"logps/rejected": -99.94120025634766, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2933688163757324, |
|
"rewards/margins": 0.8693240880966187, |
|
"rewards/rejected": -2.1626930236816406, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5894039735099336e-08, |
|
"logits/chosen": -2.22920823097229, |
|
"logits/rejected": -2.2497153282165527, |
|
"logps/chosen": -83.50569152832031, |
|
"logps/rejected": -98.3634033203125, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0309844017028809, |
|
"rewards/margins": 1.4945679903030396, |
|
"rewards/rejected": -2.525552272796631, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4238410596026489e-08, |
|
"logits/chosen": -2.220327854156494, |
|
"logits/rejected": -2.2442502975463867, |
|
"logps/chosen": -105.8703842163086, |
|
"logps/rejected": -126.78196716308594, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.513962984085083, |
|
"rewards/margins": 1.4240639209747314, |
|
"rewards/rejected": -2.9380269050598145, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2582781456953642e-08, |
|
"logits/chosen": -2.417300224304199, |
|
"logits/rejected": -2.3726484775543213, |
|
"logps/chosen": -126.7840576171875, |
|
"logps/rejected": -133.47689819335938, |
|
"loss": 0.4275, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0579255819320679, |
|
"rewards/margins": 1.5306205749511719, |
|
"rewards/rejected": -2.58854603767395, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0927152317880794e-08, |
|
"logits/chosen": -2.4346401691436768, |
|
"logits/rejected": -2.4542853832244873, |
|
"logps/chosen": -119.21122741699219, |
|
"logps/rejected": -128.86886596679688, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2019822597503662, |
|
"rewards/margins": 1.384701132774353, |
|
"rewards/rejected": -2.5866830348968506, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.271523178807947e-09, |
|
"logits/chosen": -2.4030935764312744, |
|
"logits/rejected": -2.3885276317596436, |
|
"logps/chosen": -111.55142974853516, |
|
"logps/rejected": -113.03800964355469, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.222048282623291, |
|
"rewards/margins": 0.9595780372619629, |
|
"rewards/rejected": -2.181626558303833, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.6158940397351e-09, |
|
"logits/chosen": -2.2258238792419434, |
|
"logits/rejected": -2.1862361431121826, |
|
"logps/chosen": -92.22099304199219, |
|
"logps/rejected": -98.86279296875, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.713568925857544, |
|
"rewards/margins": 0.8357810974121094, |
|
"rewards/rejected": -2.5493500232696533, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.960264900662252e-09, |
|
"logits/chosen": -2.317258358001709, |
|
"logits/rejected": -2.3031933307647705, |
|
"logps/chosen": -109.45621490478516, |
|
"logps/rejected": -111.22418212890625, |
|
"loss": 0.8281, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2969977855682373, |
|
"rewards/margins": 1.3577762842178345, |
|
"rewards/rejected": -2.6547741889953613, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.3046357615894034e-09, |
|
"logits/chosen": -2.2622385025024414, |
|
"logits/rejected": -2.2199172973632812, |
|
"logps/chosen": -98.4054946899414, |
|
"logps/rejected": -112.76808166503906, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9782658815383911, |
|
"rewards/margins": 1.7501733303070068, |
|
"rewards/rejected": -2.7284390926361084, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.6490066225165564e-09, |
|
"logits/chosen": -2.3729500770568848, |
|
"logits/rejected": -2.432080030441284, |
|
"logps/chosen": -101.60713195800781, |
|
"logps/rejected": -131.0595703125, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8371769189834595, |
|
"rewards/margins": 0.8700039982795715, |
|
"rewards/rejected": -1.7071807384490967, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.933774834437085e-10, |
|
"logits/chosen": -2.2028284072875977, |
|
"logits/rejected": -2.2098453044891357, |
|
"logps/chosen": -109.49913024902344, |
|
"logps/rejected": -121.43013763427734, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7948501110076904, |
|
"rewards/margins": 2.1088409423828125, |
|
"rewards/rejected": -2.903691291809082, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3356, |
|
"total_flos": 0.0, |
|
"train_loss": 0.58384587518933, |
|
"train_runtime": 30698.0699, |
|
"train_samples_per_second": 1.749, |
|
"train_steps_per_second": 0.109 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3356, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|