silviasapora's picture
Model save
64834d7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.970940170940171,
"eval_steps": 500,
"global_step": 219,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06837606837606838,
"grad_norm": 192.408203125,
"learning_rate": 1.1363636363636365e-05,
"log_odds_chosen": 0.022557739168405533,
"log_odds_ratio": -0.775595486164093,
"logps/chosen": -1.23601233959198,
"logps/rejected": -1.2647087574005127,
"loss": 64.8127,
"nll_loss": 1.6612399816513062,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.61800616979599,
"rewards/margins": 0.014348246157169342,
"rewards/rejected": -0.6323543787002563,
"step": 5
},
{
"epoch": 0.13675213675213677,
"grad_norm": 87.27720642089844,
"learning_rate": 2.272727272727273e-05,
"log_odds_chosen": 0.01737348921597004,
"log_odds_ratio": -0.7335888743400574,
"logps/chosen": -0.9425485730171204,
"logps/rejected": -0.9615429043769836,
"loss": 54.8539,
"nll_loss": 1.3627849817276,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.4712742865085602,
"rewards/margins": 0.009497147053480148,
"rewards/rejected": -0.4807714521884918,
"step": 10
},
{
"epoch": 0.20512820512820512,
"grad_norm": 50.64696502685547,
"learning_rate": 3.409090909090909e-05,
"log_odds_chosen": 0.15462855994701385,
"log_odds_ratio": -0.6995453834533691,
"logps/chosen": -0.799379289150238,
"logps/rejected": -0.8898841738700867,
"loss": 51.0896,
"nll_loss": 1.258803367614746,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.399689644575119,
"rewards/margins": 0.04525243118405342,
"rewards/rejected": -0.44494208693504333,
"step": 15
},
{
"epoch": 0.27350427350427353,
"grad_norm": 54.63002014160156,
"learning_rate": 4.545454545454546e-05,
"log_odds_chosen": 0.23047371208667755,
"log_odds_ratio": -0.6775897741317749,
"logps/chosen": -0.7368494272232056,
"logps/rejected": -0.8401222229003906,
"loss": 48.2524,
"nll_loss": 1.1753368377685547,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.3684247136116028,
"rewards/margins": 0.05163642019033432,
"rewards/rejected": -0.4200611114501953,
"step": 20
},
{
"epoch": 0.3418803418803419,
"grad_norm": 75.0562973022461,
"learning_rate": 4.9971395327545466e-05,
"log_odds_chosen": 0.16501149535179138,
"log_odds_ratio": -0.6983749270439148,
"logps/chosen": -0.7642364501953125,
"logps/rejected": -0.8525689244270325,
"loss": 47.8234,
"nll_loss": 1.1575247049331665,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.38211822509765625,
"rewards/margins": 0.04416622966527939,
"rewards/rejected": -0.42628446221351624,
"step": 25
},
{
"epoch": 0.41025641025641024,
"grad_norm": 50.43643569946289,
"learning_rate": 4.979682598982912e-05,
"log_odds_chosen": 0.28536584973335266,
"log_odds_ratio": -0.6665671467781067,
"logps/chosen": -0.7366295456886292,
"logps/rejected": -0.8632861971855164,
"loss": 46.5783,
"nll_loss": 1.1268101930618286,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.3683147728443146,
"rewards/margins": 0.0633283406496048,
"rewards/rejected": -0.4316430985927582,
"step": 30
},
{
"epoch": 0.47863247863247865,
"grad_norm": 44.052860260009766,
"learning_rate": 4.9464686742003006e-05,
"log_odds_chosen": 0.11640346050262451,
"log_odds_ratio": -0.7353156208992004,
"logps/chosen": -0.7775823473930359,
"logps/rejected": -0.8460026979446411,
"loss": 46.9736,
"nll_loss": 1.1240406036376953,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.38879117369651794,
"rewards/margins": 0.03421013802289963,
"rewards/rejected": -0.42300134897232056,
"step": 35
},
{
"epoch": 0.5470085470085471,
"grad_norm": 47.66566848754883,
"learning_rate": 4.8977088142549285e-05,
"log_odds_chosen": 0.22347350418567657,
"log_odds_ratio": -0.6904687285423279,
"logps/chosen": -0.7437100410461426,
"logps/rejected": -0.8720178604125977,
"loss": 45.6756,
"nll_loss": 1.09463632106781,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.3718550205230713,
"rewards/margins": 0.06415387988090515,
"rewards/rejected": -0.43600893020629883,
"step": 40
},
{
"epoch": 0.6153846153846154,
"grad_norm": 44.04689025878906,
"learning_rate": 4.833712860686666e-05,
"log_odds_chosen": 0.3655330538749695,
"log_odds_ratio": -0.6214441657066345,
"logps/chosen": -0.7377647161483765,
"logps/rejected": -0.9412744641304016,
"loss": 43.8278,
"nll_loss": 1.0561692714691162,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.36888235807418823,
"rewards/margins": 0.10175484418869019,
"rewards/rejected": -0.4706372320652008,
"step": 45
},
{
"epoch": 0.6837606837606838,
"grad_norm": 45.08797073364258,
"learning_rate": 4.754887471857969e-05,
"log_odds_chosen": 0.44758883118629456,
"log_odds_ratio": -0.6216556429862976,
"logps/chosen": -0.6983757615089417,
"logps/rejected": -0.9704947471618652,
"loss": 42.6963,
"nll_loss": 1.0264930725097656,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.3491878807544708,
"rewards/margins": 0.13605953752994537,
"rewards/rejected": -0.4852473735809326,
"step": 50
},
{
"epoch": 0.7521367521367521,
"grad_norm": 42.02785110473633,
"learning_rate": 4.6617335388682556e-05,
"log_odds_chosen": 0.3934350609779358,
"log_odds_ratio": -0.6445830464363098,
"logps/chosen": -0.7269451022148132,
"logps/rejected": -0.9580278396606445,
"loss": 43.8584,
"nll_loss": 1.0543291568756104,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.3634725511074066,
"rewards/margins": 0.11554142087697983,
"rewards/rejected": -0.47901391983032227,
"step": 55
},
{
"epoch": 0.8205128205128205,
"grad_norm": 40.826419830322266,
"learning_rate": 4.554843002672129e-05,
"log_odds_chosen": 0.5472866296768188,
"log_odds_ratio": -0.6086785793304443,
"logps/chosen": -0.7054045796394348,
"logps/rejected": -1.0688399076461792,
"loss": 43.2701,
"nll_loss": 1.048005223274231,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.3527022898197174,
"rewards/margins": 0.1817176640033722,
"rewards/rejected": -0.5344199538230896,
"step": 60
},
{
"epoch": 0.8888888888888888,
"grad_norm": 39.6722297668457,
"learning_rate": 4.434895092626883e-05,
"log_odds_chosen": 0.5648136138916016,
"log_odds_ratio": -0.6339768171310425,
"logps/chosen": -0.7455043792724609,
"logps/rejected": -1.1616737842559814,
"loss": 44.6575,
"nll_loss": 1.0870110988616943,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -0.37275218963623047,
"rewards/margins": 0.20808465778827667,
"rewards/rejected": -0.5808368921279907,
"step": 65
},
{
"epoch": 0.9572649572649573,
"grad_norm": 39.50532913208008,
"learning_rate": 4.302652010371205e-05,
"log_odds_chosen": 0.5741121172904968,
"log_odds_ratio": -0.6454743146896362,
"logps/chosen": -0.7433961033821106,
"logps/rejected": -1.129797339439392,
"loss": 44.1232,
"nll_loss": 1.0643303394317627,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.3716980516910553,
"rewards/margins": 0.19320069253444672,
"rewards/rejected": -0.564898669719696,
"step": 70
},
{
"epoch": 1.0136752136752136,
"grad_norm": 55.45820236206055,
"learning_rate": 4.1589540864616025e-05,
"log_odds_chosen": 0.7051900625228882,
"log_odds_ratio": -0.5978847742080688,
"logps/chosen": -0.6891235113143921,
"logps/rejected": -1.1345970630645752,
"loss": 35.3591,
"nll_loss": 1.039401650428772,
"rewards/accuracies": 0.6174242496490479,
"rewards/chosen": -0.34456175565719604,
"rewards/margins": 0.22273679077625275,
"rewards/rejected": -0.5672985315322876,
"step": 75
},
{
"epoch": 1.082051282051282,
"grad_norm": 43.05059814453125,
"learning_rate": 4.0047144405434175e-05,
"log_odds_chosen": 0.9981684684753418,
"log_odds_ratio": -0.4477527141571045,
"logps/chosen": -0.561476469039917,
"logps/rejected": -1.0778883695602417,
"loss": 36.5516,
"nll_loss": 0.8772686719894409,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.2807382345199585,
"rewards/margins": 0.25820592045783997,
"rewards/rejected": -0.5389441847801208,
"step": 80
},
{
"epoch": 1.1504273504273503,
"grad_norm": 38.42478942871094,
"learning_rate": 3.84091317898803e-05,
"log_odds_chosen": 1.1495044231414795,
"log_odds_ratio": -0.4064292907714844,
"logps/chosen": -0.5493744611740112,
"logps/rejected": -1.1405811309814453,
"loss": 35.8963,
"nll_loss": 0.8741697072982788,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2746872305870056,
"rewards/margins": 0.29560327529907227,
"rewards/rejected": -0.5702905654907227,
"step": 85
},
{
"epoch": 1.218803418803419,
"grad_norm": 44.85431671142578,
"learning_rate": 3.668591166867035e-05,
"log_odds_chosen": 1.2424638271331787,
"log_odds_ratio": -0.3852007985115051,
"logps/chosen": -0.5860949754714966,
"logps/rejected": -1.2700952291488647,
"loss": 34.9224,
"nll_loss": 0.8511130213737488,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.2930474877357483,
"rewards/margins": 0.34200018644332886,
"rewards/rejected": -0.6350476145744324,
"step": 90
},
{
"epoch": 1.287179487179487,
"grad_norm": 44.10654067993164,
"learning_rate": 3.488843413838963e-05,
"log_odds_chosen": 1.3683379888534546,
"log_odds_ratio": -0.3600567579269409,
"logps/chosen": -0.5340205430984497,
"logps/rejected": -1.239029884338379,
"loss": 34.0177,
"nll_loss": 0.8299106359481812,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.26701027154922485,
"rewards/margins": 0.35250476002693176,
"rewards/rejected": -0.6195149421691895,
"step": 95
},
{
"epoch": 1.3555555555555556,
"grad_norm": 38.597747802734375,
"learning_rate": 3.3028121159775656e-05,
"log_odds_chosen": 1.4798239469528198,
"log_odds_ratio": -0.3565274178981781,
"logps/chosen": -0.5620280504226685,
"logps/rejected": -1.3859326839447021,
"loss": 34.4413,
"nll_loss": 0.8532856702804565,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.28101402521133423,
"rewards/margins": 0.41195231676101685,
"rewards/rejected": -0.6929663419723511,
"step": 100
},
{
"epoch": 1.423931623931624,
"grad_norm": 51.27384948730469,
"learning_rate": 3.111679397756906e-05,
"log_odds_chosen": 1.3512942790985107,
"log_odds_ratio": -0.3769288659095764,
"logps/chosen": -0.6085891127586365,
"logps/rejected": -1.3841993808746338,
"loss": 34.4354,
"nll_loss": 0.8462675213813782,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.30429455637931824,
"rewards/margins": 0.38780516386032104,
"rewards/rejected": -0.6920996904373169,
"step": 105
},
{
"epoch": 1.4923076923076923,
"grad_norm": 43.76097106933594,
"learning_rate": 2.9166598003138766e-05,
"log_odds_chosen": 1.6075998544692993,
"log_odds_ratio": -0.3306867480278015,
"logps/chosen": -0.5251437425613403,
"logps/rejected": -1.3855717182159424,
"loss": 33.6202,
"nll_loss": 0.8343909978866577,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.26257187128067017,
"rewards/margins": 0.43021392822265625,
"rewards/rejected": -0.6927858591079712,
"step": 110
},
{
"epoch": 1.5606837606837607,
"grad_norm": 47.68281555175781,
"learning_rate": 2.7189925637210323e-05,
"log_odds_chosen": 1.4052133560180664,
"log_odds_ratio": -0.36428430676460266,
"logps/chosen": -0.5953341722488403,
"logps/rejected": -1.3725194931030273,
"loss": 34.6962,
"nll_loss": 0.8555153012275696,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.29766708612442017,
"rewards/margins": 0.3885926604270935,
"rewards/rejected": -0.6862597465515137,
"step": 115
},
{
"epoch": 1.629059829059829,
"grad_norm": 44.739234924316406,
"learning_rate": 2.5199337523115418e-05,
"log_odds_chosen": 1.314573884010315,
"log_odds_ratio": -0.3843991756439209,
"logps/chosen": -0.5602730512619019,
"logps/rejected": -1.2206534147262573,
"loss": 34.2171,
"nll_loss": 0.8327986001968384,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2801365256309509,
"rewards/margins": 0.3301902115345001,
"rewards/rejected": -0.6103267073631287,
"step": 120
},
{
"epoch": 1.6974358974358974,
"grad_norm": 48.93033218383789,
"learning_rate": 2.3207482730954063e-05,
"log_odds_chosen": 1.6598823070526123,
"log_odds_ratio": -0.31844857335090637,
"logps/chosen": -0.5364278554916382,
"logps/rejected": -1.481249213218689,
"loss": 33.4162,
"nll_loss": 0.8371628522872925,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.2682139277458191,
"rewards/margins": 0.4724105894565582,
"rewards/rejected": -0.7406246066093445,
"step": 125
},
{
"epoch": 1.7658119658119658,
"grad_norm": 47.49737548828125,
"learning_rate": 2.1227018379854383e-05,
"log_odds_chosen": 1.7132043838500977,
"log_odds_ratio": -0.309685617685318,
"logps/chosen": -0.5513170957565308,
"logps/rejected": -1.5208324193954468,
"loss": 33.881,
"nll_loss": 0.8536204099655151,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.2756585478782654,
"rewards/margins": 0.4847577214241028,
"rewards/rejected": -0.7604162096977234,
"step": 130
},
{
"epoch": 1.8341880341880343,
"grad_norm": 53.4583740234375,
"learning_rate": 1.927052920908528e-05,
"log_odds_chosen": 1.623297929763794,
"log_odds_ratio": -0.3428027033805847,
"logps/chosen": -0.5809999704360962,
"logps/rejected": -1.4746825695037842,
"loss": 35.7338,
"nll_loss": 0.8969090580940247,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.2904999852180481,
"rewards/margins": 0.4468413293361664,
"rewards/rejected": -0.7373412847518921,
"step": 135
},
{
"epoch": 1.9025641025641025,
"grad_norm": 44.477012634277344,
"learning_rate": 1.735044760910251e-05,
"log_odds_chosen": 1.4694030284881592,
"log_odds_ratio": -0.36373740434646606,
"logps/chosen": -0.5533854365348816,
"logps/rejected": -1.3526192903518677,
"loss": 33.8163,
"nll_loss": 0.8294602632522583,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2766927182674408,
"rewards/margins": 0.39961689710617065,
"rewards/rejected": -0.6763096451759338,
"step": 140
},
{
"epoch": 1.970940170940171,
"grad_norm": 62.9003791809082,
"learning_rate": 1.547897462068592e-05,
"log_odds_chosen": 1.5487511157989502,
"log_odds_ratio": -0.33681467175483704,
"logps/chosen": -0.5517206192016602,
"logps/rejected": -1.4492119550704956,
"loss": 34.3693,
"nll_loss": 0.8605127334594727,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.2758603096008301,
"rewards/margins": 0.4487456679344177,
"rewards/rejected": -0.7246059775352478,
"step": 145
},
{
"epoch": 2.0273504273504273,
"grad_norm": 42.154563903808594,
"learning_rate": 1.3668002404174047e-05,
"log_odds_chosen": 2.2226734161376953,
"log_odds_ratio": -0.2391778826713562,
"logps/chosen": -0.4616817235946655,
"logps/rejected": -1.7343417406082153,
"loss": 23.7203,
"nll_loss": 0.7272225618362427,
"rewards/accuracies": 0.9280303120613098,
"rewards/chosen": -0.23084086179733276,
"rewards/margins": 0.6363300085067749,
"rewards/rejected": -0.8671708703041077,
"step": 150
},
{
"epoch": 2.095726495726496,
"grad_norm": 80.82354736328125,
"learning_rate": 1.1929038671460486e-05,
"log_odds_chosen": 3.258676052093506,
"log_odds_ratio": -0.14076226949691772,
"logps/chosen": -0.4106404781341553,
"logps/rejected": -2.355543375015259,
"loss": 26.2695,
"nll_loss": 0.7021722197532654,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.20532023906707764,
"rewards/margins": 0.972451388835907,
"rewards/rejected": -1.1777716875076294,
"step": 155
},
{
"epoch": 2.164102564102564,
"grad_norm": 49.468326568603516,
"learning_rate": 1.027313356094443e-05,
"log_odds_chosen": 3.117168426513672,
"log_odds_ratio": -0.13647082448005676,
"logps/chosen": -0.4620683789253235,
"logps/rejected": -2.3730220794677734,
"loss": 26.6823,
"nll_loss": 0.7180877327919006,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": -0.23103418946266174,
"rewards/margins": 0.9554769396781921,
"rewards/rejected": -1.1865110397338867,
"step": 160
},
{
"epoch": 2.2324786324786325,
"grad_norm": 48.83137893676758,
"learning_rate": 8.710809420103789e-06,
"log_odds_chosen": 3.0069823265075684,
"log_odds_ratio": -0.1508590579032898,
"logps/chosen": -0.3987279534339905,
"logps/rejected": -2.156508684158325,
"loss": 25.979,
"nll_loss": 0.6839076280593872,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.19936397671699524,
"rewards/margins": 0.8788902163505554,
"rewards/rejected": -1.0782543420791626,
"step": 165
},
{
"epoch": 2.3008547008547007,
"grad_norm": 59.909706115722656,
"learning_rate": 7.251993941883428e-06,
"log_odds_chosen": 3.3163504600524902,
"log_odds_ratio": -0.13661204278469086,
"logps/chosen": -0.40769442915916443,
"logps/rejected": -2.3925185203552246,
"loss": 24.6734,
"nll_loss": 0.6533316373825073,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.20384721457958221,
"rewards/margins": 0.9924120903015137,
"rewards/rejected": -1.1962592601776123,
"step": 170
},
{
"epoch": 2.3692307692307693,
"grad_norm": 52.876243591308594,
"learning_rate": 5.905957079779187e-06,
"log_odds_chosen": 3.2027995586395264,
"log_odds_ratio": -0.14145739376544952,
"logps/chosen": -0.4457460343837738,
"logps/rejected": -2.37770414352417,
"loss": 25.9488,
"nll_loss": 0.6930140256881714,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.2228730171918869,
"rewards/margins": 0.9659790992736816,
"rewards/rejected": -1.188852071762085,
"step": 175
},
{
"epoch": 2.437606837606838,
"grad_norm": 58.3582763671875,
"learning_rate": 4.681252142486841e-06,
"log_odds_chosen": 3.392512559890747,
"log_odds_ratio": -0.13067595660686493,
"logps/chosen": -0.4224206507205963,
"logps/rejected": -2.505432605743408,
"loss": 24.921,
"nll_loss": 0.6670365333557129,
"rewards/accuracies": 0.9781249761581421,
"rewards/chosen": -0.21121032536029816,
"rewards/margins": 1.0415061712265015,
"rewards/rejected": -1.252716302871704,
"step": 180
},
{
"epoch": 2.505982905982906,
"grad_norm": 72.88838195800781,
"learning_rate": 3.585661442426494e-06,
"log_odds_chosen": 3.295175552368164,
"log_odds_ratio": -0.14244017004966736,
"logps/chosen": -0.46335524320602417,
"logps/rejected": -2.5462584495544434,
"loss": 27.0982,
"nll_loss": 0.7326253056526184,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.23167762160301208,
"rewards/margins": 1.0414518117904663,
"rewards/rejected": -1.2731292247772217,
"step": 185
},
{
"epoch": 2.574358974358974,
"grad_norm": 53.10452651977539,
"learning_rate": 2.6261468435155978e-06,
"log_odds_chosen": 3.560279369354248,
"log_odds_ratio": -0.1270817220211029,
"logps/chosen": -0.3945046365261078,
"logps/rejected": -2.5356345176696777,
"loss": 24.711,
"nll_loss": 0.6610320806503296,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.1972523182630539,
"rewards/margins": 1.070564866065979,
"rewards/rejected": -1.2678172588348389,
"step": 190
},
{
"epoch": 2.6427350427350427,
"grad_norm": 60.35097885131836,
"learning_rate": 1.8088055224315697e-06,
"log_odds_chosen": 3.335855007171631,
"log_odds_ratio": -0.1391274482011795,
"logps/chosen": -0.4181729853153229,
"logps/rejected": -2.4335877895355225,
"loss": 25.7787,
"nll_loss": 0.6885385513305664,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.20908649265766144,
"rewards/margins": 1.0077073574066162,
"rewards/rejected": -1.2167938947677612,
"step": 195
},
{
"epoch": 2.7111111111111112,
"grad_norm": 78.32765197753906,
"learning_rate": 1.138831224476533e-06,
"log_odds_chosen": 3.4376022815704346,
"log_odds_ratio": -0.1307111382484436,
"logps/chosen": -0.44153517484664917,
"logps/rejected": -2.567284107208252,
"loss": 25.5218,
"nll_loss": 0.6872699856758118,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.22076758742332458,
"rewards/margins": 1.062874436378479,
"rewards/rejected": -1.283642053604126,
"step": 200
},
{
"epoch": 2.7794871794871794,
"grad_norm": 72.06491088867188,
"learning_rate": 6.204812602412902e-07,
"log_odds_chosen": 3.316706895828247,
"log_odds_ratio": -0.12540897727012634,
"logps/chosen": -0.4180404543876648,
"logps/rejected": -2.4521901607513428,
"loss": 25.071,
"nll_loss": 0.6733669638633728,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.2090202271938324,
"rewards/margins": 1.0170748233795166,
"rewards/rejected": -1.2260950803756714,
"step": 205
},
{
"epoch": 2.847863247863248,
"grad_norm": 59.340885162353516,
"learning_rate": 2.5704945278623436e-07,
"log_odds_chosen": 3.219451904296875,
"log_odds_ratio": -0.13932213187217712,
"logps/chosen": -0.44474905729293823,
"logps/rejected": -2.4706287384033203,
"loss": 25.9818,
"nll_loss": 0.6953645944595337,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": -0.22237452864646912,
"rewards/margins": 1.012939691543579,
"rewards/rejected": -1.2353143692016602,
"step": 210
},
{
"epoch": 2.916239316239316,
"grad_norm": 90.88463592529297,
"learning_rate": 5.0845207244715196e-08,
"log_odds_chosen": 3.495487689971924,
"log_odds_ratio": -0.12796048820018768,
"logps/chosen": -0.4196755290031433,
"logps/rejected": -2.5507898330688477,
"loss": 24.7788,
"nll_loss": 0.6663211584091187,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.20983776450157166,
"rewards/margins": 1.0655572414398193,
"rewards/rejected": -1.2753949165344238,
"step": 215
},
{
"epoch": 2.970940170940171,
"step": 219,
"total_flos": 0.0,
"train_loss": 35.7124394107627,
"train_runtime": 3905.4181,
"train_samples_per_second": 3.594,
"train_steps_per_second": 0.056
}
],
"logging_steps": 5,
"max_steps": 219,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}