|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.970940170940171, |
|
"eval_steps": 500, |
|
"global_step": 219, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06837606837606838, |
|
"grad_norm": 192.408203125, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"log_odds_chosen": 0.022557739168405533, |
|
"log_odds_ratio": -0.775595486164093, |
|
"logps/chosen": -1.23601233959198, |
|
"logps/rejected": -1.2647087574005127, |
|
"loss": 64.8127, |
|
"nll_loss": 1.6612399816513062, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.61800616979599, |
|
"rewards/margins": 0.014348246157169342, |
|
"rewards/rejected": -0.6323543787002563, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"grad_norm": 87.27720642089844, |
|
"learning_rate": 2.272727272727273e-05, |
|
"log_odds_chosen": 0.01737348921597004, |
|
"log_odds_ratio": -0.7335888743400574, |
|
"logps/chosen": -0.9425485730171204, |
|
"logps/rejected": -0.9615429043769836, |
|
"loss": 54.8539, |
|
"nll_loss": 1.3627849817276, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.4712742865085602, |
|
"rewards/margins": 0.009497147053480148, |
|
"rewards/rejected": -0.4807714521884918, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 50.64696502685547, |
|
"learning_rate": 3.409090909090909e-05, |
|
"log_odds_chosen": 0.15462855994701385, |
|
"log_odds_ratio": -0.6995453834533691, |
|
"logps/chosen": -0.799379289150238, |
|
"logps/rejected": -0.8898841738700867, |
|
"loss": 51.0896, |
|
"nll_loss": 1.258803367614746, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.399689644575119, |
|
"rewards/margins": 0.04525243118405342, |
|
"rewards/rejected": -0.44494208693504333, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"grad_norm": 54.63002014160156, |
|
"learning_rate": 4.545454545454546e-05, |
|
"log_odds_chosen": 0.23047371208667755, |
|
"log_odds_ratio": -0.6775897741317749, |
|
"logps/chosen": -0.7368494272232056, |
|
"logps/rejected": -0.8401222229003906, |
|
"loss": 48.2524, |
|
"nll_loss": 1.1753368377685547, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.3684247136116028, |
|
"rewards/margins": 0.05163642019033432, |
|
"rewards/rejected": -0.4200611114501953, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 75.0562973022461, |
|
"learning_rate": 4.9971395327545466e-05, |
|
"log_odds_chosen": 0.16501149535179138, |
|
"log_odds_ratio": -0.6983749270439148, |
|
"logps/chosen": -0.7642364501953125, |
|
"logps/rejected": -0.8525689244270325, |
|
"loss": 47.8234, |
|
"nll_loss": 1.1575247049331665, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.38211822509765625, |
|
"rewards/margins": 0.04416622966527939, |
|
"rewards/rejected": -0.42628446221351624, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 50.43643569946289, |
|
"learning_rate": 4.979682598982912e-05, |
|
"log_odds_chosen": 0.28536584973335266, |
|
"log_odds_ratio": -0.6665671467781067, |
|
"logps/chosen": -0.7366295456886292, |
|
"logps/rejected": -0.8632861971855164, |
|
"loss": 46.5783, |
|
"nll_loss": 1.1268101930618286, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3683147728443146, |
|
"rewards/margins": 0.0633283406496048, |
|
"rewards/rejected": -0.4316430985927582, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.47863247863247865, |
|
"grad_norm": 44.052860260009766, |
|
"learning_rate": 4.9464686742003006e-05, |
|
"log_odds_chosen": 0.11640346050262451, |
|
"log_odds_ratio": -0.7353156208992004, |
|
"logps/chosen": -0.7775823473930359, |
|
"logps/rejected": -0.8460026979446411, |
|
"loss": 46.9736, |
|
"nll_loss": 1.1240406036376953, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.38879117369651794, |
|
"rewards/margins": 0.03421013802289963, |
|
"rewards/rejected": -0.42300134897232056, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5470085470085471, |
|
"grad_norm": 47.66566848754883, |
|
"learning_rate": 4.8977088142549285e-05, |
|
"log_odds_chosen": 0.22347350418567657, |
|
"log_odds_ratio": -0.6904687285423279, |
|
"logps/chosen": -0.7437100410461426, |
|
"logps/rejected": -0.8720178604125977, |
|
"loss": 45.6756, |
|
"nll_loss": 1.09463632106781, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.3718550205230713, |
|
"rewards/margins": 0.06415387988090515, |
|
"rewards/rejected": -0.43600893020629883, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 44.04689025878906, |
|
"learning_rate": 4.833712860686666e-05, |
|
"log_odds_chosen": 0.3655330538749695, |
|
"log_odds_ratio": -0.6214441657066345, |
|
"logps/chosen": -0.7377647161483765, |
|
"logps/rejected": -0.9412744641304016, |
|
"loss": 43.8278, |
|
"nll_loss": 1.0561692714691162, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.36888235807418823, |
|
"rewards/margins": 0.10175484418869019, |
|
"rewards/rejected": -0.4706372320652008, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 45.08797073364258, |
|
"learning_rate": 4.754887471857969e-05, |
|
"log_odds_chosen": 0.44758883118629456, |
|
"log_odds_ratio": -0.6216556429862976, |
|
"logps/chosen": -0.6983757615089417, |
|
"logps/rejected": -0.9704947471618652, |
|
"loss": 42.6963, |
|
"nll_loss": 1.0264930725097656, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3491878807544708, |
|
"rewards/margins": 0.13605953752994537, |
|
"rewards/rejected": -0.4852473735809326, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7521367521367521, |
|
"grad_norm": 42.02785110473633, |
|
"learning_rate": 4.6617335388682556e-05, |
|
"log_odds_chosen": 0.3934350609779358, |
|
"log_odds_ratio": -0.6445830464363098, |
|
"logps/chosen": -0.7269451022148132, |
|
"logps/rejected": -0.9580278396606445, |
|
"loss": 43.8584, |
|
"nll_loss": 1.0543291568756104, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3634725511074066, |
|
"rewards/margins": 0.11554142087697983, |
|
"rewards/rejected": -0.47901391983032227, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 40.826419830322266, |
|
"learning_rate": 4.554843002672129e-05, |
|
"log_odds_chosen": 0.5472866296768188, |
|
"log_odds_ratio": -0.6086785793304443, |
|
"logps/chosen": -0.7054045796394348, |
|
"logps/rejected": -1.0688399076461792, |
|
"loss": 43.2701, |
|
"nll_loss": 1.048005223274231, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3527022898197174, |
|
"rewards/margins": 0.1817176640033722, |
|
"rewards/rejected": -0.5344199538230896, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 39.6722297668457, |
|
"learning_rate": 4.434895092626883e-05, |
|
"log_odds_chosen": 0.5648136138916016, |
|
"log_odds_ratio": -0.6339768171310425, |
|
"logps/chosen": -0.7455043792724609, |
|
"logps/rejected": -1.1616737842559814, |
|
"loss": 44.6575, |
|
"nll_loss": 1.0870110988616943, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.37275218963623047, |
|
"rewards/margins": 0.20808465778827667, |
|
"rewards/rejected": -0.5808368921279907, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9572649572649573, |
|
"grad_norm": 39.50532913208008, |
|
"learning_rate": 4.302652010371205e-05, |
|
"log_odds_chosen": 0.5741121172904968, |
|
"log_odds_ratio": -0.6454743146896362, |
|
"logps/chosen": -0.7433961033821106, |
|
"logps/rejected": -1.129797339439392, |
|
"loss": 44.1232, |
|
"nll_loss": 1.0643303394317627, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.3716980516910553, |
|
"rewards/margins": 0.19320069253444672, |
|
"rewards/rejected": -0.564898669719696, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0136752136752136, |
|
"grad_norm": 55.45820236206055, |
|
"learning_rate": 4.1589540864616025e-05, |
|
"log_odds_chosen": 0.7051900625228882, |
|
"log_odds_ratio": -0.5978847742080688, |
|
"logps/chosen": -0.6891235113143921, |
|
"logps/rejected": -1.1345970630645752, |
|
"loss": 35.3591, |
|
"nll_loss": 1.039401650428772, |
|
"rewards/accuracies": 0.6174242496490479, |
|
"rewards/chosen": -0.34456175565719604, |
|
"rewards/margins": 0.22273679077625275, |
|
"rewards/rejected": -0.5672985315322876, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.082051282051282, |
|
"grad_norm": 43.05059814453125, |
|
"learning_rate": 4.0047144405434175e-05, |
|
"log_odds_chosen": 0.9981684684753418, |
|
"log_odds_ratio": -0.4477527141571045, |
|
"logps/chosen": -0.561476469039917, |
|
"logps/rejected": -1.0778883695602417, |
|
"loss": 36.5516, |
|
"nll_loss": 0.8772686719894409, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2807382345199585, |
|
"rewards/margins": 0.25820592045783997, |
|
"rewards/rejected": -0.5389441847801208, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1504273504273503, |
|
"grad_norm": 38.42478942871094, |
|
"learning_rate": 3.84091317898803e-05, |
|
"log_odds_chosen": 1.1495044231414795, |
|
"log_odds_ratio": -0.4064292907714844, |
|
"logps/chosen": -0.5493744611740112, |
|
"logps/rejected": -1.1405811309814453, |
|
"loss": 35.8963, |
|
"nll_loss": 0.8741697072982788, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2746872305870056, |
|
"rewards/margins": 0.29560327529907227, |
|
"rewards/rejected": -0.5702905654907227, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.218803418803419, |
|
"grad_norm": 44.85431671142578, |
|
"learning_rate": 3.668591166867035e-05, |
|
"log_odds_chosen": 1.2424638271331787, |
|
"log_odds_ratio": -0.3852007985115051, |
|
"logps/chosen": -0.5860949754714966, |
|
"logps/rejected": -1.2700952291488647, |
|
"loss": 34.9224, |
|
"nll_loss": 0.8511130213737488, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.2930474877357483, |
|
"rewards/margins": 0.34200018644332886, |
|
"rewards/rejected": -0.6350476145744324, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.287179487179487, |
|
"grad_norm": 44.10654067993164, |
|
"learning_rate": 3.488843413838963e-05, |
|
"log_odds_chosen": 1.3683379888534546, |
|
"log_odds_ratio": -0.3600567579269409, |
|
"logps/chosen": -0.5340205430984497, |
|
"logps/rejected": -1.239029884338379, |
|
"loss": 34.0177, |
|
"nll_loss": 0.8299106359481812, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.26701027154922485, |
|
"rewards/margins": 0.35250476002693176, |
|
"rewards/rejected": -0.6195149421691895, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.3555555555555556, |
|
"grad_norm": 38.597747802734375, |
|
"learning_rate": 3.3028121159775656e-05, |
|
"log_odds_chosen": 1.4798239469528198, |
|
"log_odds_ratio": -0.3565274178981781, |
|
"logps/chosen": -0.5620280504226685, |
|
"logps/rejected": -1.3859326839447021, |
|
"loss": 34.4413, |
|
"nll_loss": 0.8532856702804565, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.28101402521133423, |
|
"rewards/margins": 0.41195231676101685, |
|
"rewards/rejected": -0.6929663419723511, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.423931623931624, |
|
"grad_norm": 51.27384948730469, |
|
"learning_rate": 3.111679397756906e-05, |
|
"log_odds_chosen": 1.3512942790985107, |
|
"log_odds_ratio": -0.3769288659095764, |
|
"logps/chosen": -0.6085891127586365, |
|
"logps/rejected": -1.3841993808746338, |
|
"loss": 34.4354, |
|
"nll_loss": 0.8462675213813782, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.30429455637931824, |
|
"rewards/margins": 0.38780516386032104, |
|
"rewards/rejected": -0.6920996904373169, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.4923076923076923, |
|
"grad_norm": 43.76097106933594, |
|
"learning_rate": 2.9166598003138766e-05, |
|
"log_odds_chosen": 1.6075998544692993, |
|
"log_odds_ratio": -0.3306867480278015, |
|
"logps/chosen": -0.5251437425613403, |
|
"logps/rejected": -1.3855717182159424, |
|
"loss": 33.6202, |
|
"nll_loss": 0.8343909978866577, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.26257187128067017, |
|
"rewards/margins": 0.43021392822265625, |
|
"rewards/rejected": -0.6927858591079712, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5606837606837607, |
|
"grad_norm": 47.68281555175781, |
|
"learning_rate": 2.7189925637210323e-05, |
|
"log_odds_chosen": 1.4052133560180664, |
|
"log_odds_ratio": -0.36428430676460266, |
|
"logps/chosen": -0.5953341722488403, |
|
"logps/rejected": -1.3725194931030273, |
|
"loss": 34.6962, |
|
"nll_loss": 0.8555153012275696, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.29766708612442017, |
|
"rewards/margins": 0.3885926604270935, |
|
"rewards/rejected": -0.6862597465515137, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.629059829059829, |
|
"grad_norm": 44.739234924316406, |
|
"learning_rate": 2.5199337523115418e-05, |
|
"log_odds_chosen": 1.314573884010315, |
|
"log_odds_ratio": -0.3843991756439209, |
|
"logps/chosen": -0.5602730512619019, |
|
"logps/rejected": -1.2206534147262573, |
|
"loss": 34.2171, |
|
"nll_loss": 0.8327986001968384, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2801365256309509, |
|
"rewards/margins": 0.3301902115345001, |
|
"rewards/rejected": -0.6103267073631287, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.6974358974358974, |
|
"grad_norm": 48.93033218383789, |
|
"learning_rate": 2.3207482730954063e-05, |
|
"log_odds_chosen": 1.6598823070526123, |
|
"log_odds_ratio": -0.31844857335090637, |
|
"logps/chosen": -0.5364278554916382, |
|
"logps/rejected": -1.481249213218689, |
|
"loss": 33.4162, |
|
"nll_loss": 0.8371628522872925, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2682139277458191, |
|
"rewards/margins": 0.4724105894565582, |
|
"rewards/rejected": -0.7406246066093445, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.7658119658119658, |
|
"grad_norm": 47.49737548828125, |
|
"learning_rate": 2.1227018379854383e-05, |
|
"log_odds_chosen": 1.7132043838500977, |
|
"log_odds_ratio": -0.309685617685318, |
|
"logps/chosen": -0.5513170957565308, |
|
"logps/rejected": -1.5208324193954468, |
|
"loss": 33.881, |
|
"nll_loss": 0.8536204099655151, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2756585478782654, |
|
"rewards/margins": 0.4847577214241028, |
|
"rewards/rejected": -0.7604162096977234, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.8341880341880343, |
|
"grad_norm": 53.4583740234375, |
|
"learning_rate": 1.927052920908528e-05, |
|
"log_odds_chosen": 1.623297929763794, |
|
"log_odds_ratio": -0.3428027033805847, |
|
"logps/chosen": -0.5809999704360962, |
|
"logps/rejected": -1.4746825695037842, |
|
"loss": 35.7338, |
|
"nll_loss": 0.8969090580940247, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.2904999852180481, |
|
"rewards/margins": 0.4468413293361664, |
|
"rewards/rejected": -0.7373412847518921, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.9025641025641025, |
|
"grad_norm": 44.477012634277344, |
|
"learning_rate": 1.735044760910251e-05, |
|
"log_odds_chosen": 1.4694030284881592, |
|
"log_odds_ratio": -0.36373740434646606, |
|
"logps/chosen": -0.5533854365348816, |
|
"logps/rejected": -1.3526192903518677, |
|
"loss": 33.8163, |
|
"nll_loss": 0.8294602632522583, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2766927182674408, |
|
"rewards/margins": 0.39961689710617065, |
|
"rewards/rejected": -0.6763096451759338, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.970940170940171, |
|
"grad_norm": 62.9003791809082, |
|
"learning_rate": 1.547897462068592e-05, |
|
"log_odds_chosen": 1.5487511157989502, |
|
"log_odds_ratio": -0.33681467175483704, |
|
"logps/chosen": -0.5517206192016602, |
|
"logps/rejected": -1.4492119550704956, |
|
"loss": 34.3693, |
|
"nll_loss": 0.8605127334594727, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.2758603096008301, |
|
"rewards/margins": 0.4487456679344177, |
|
"rewards/rejected": -0.7246059775352478, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.0273504273504273, |
|
"grad_norm": 42.154563903808594, |
|
"learning_rate": 1.3668002404174047e-05, |
|
"log_odds_chosen": 2.2226734161376953, |
|
"log_odds_ratio": -0.2391778826713562, |
|
"logps/chosen": -0.4616817235946655, |
|
"logps/rejected": -1.7343417406082153, |
|
"loss": 23.7203, |
|
"nll_loss": 0.7272225618362427, |
|
"rewards/accuracies": 0.9280303120613098, |
|
"rewards/chosen": -0.23084086179733276, |
|
"rewards/margins": 0.6363300085067749, |
|
"rewards/rejected": -0.8671708703041077, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.095726495726496, |
|
"grad_norm": 80.82354736328125, |
|
"learning_rate": 1.1929038671460486e-05, |
|
"log_odds_chosen": 3.258676052093506, |
|
"log_odds_ratio": -0.14076226949691772, |
|
"logps/chosen": -0.4106404781341553, |
|
"logps/rejected": -2.355543375015259, |
|
"loss": 26.2695, |
|
"nll_loss": 0.7021722197532654, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.20532023906707764, |
|
"rewards/margins": 0.972451388835907, |
|
"rewards/rejected": -1.1777716875076294, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.164102564102564, |
|
"grad_norm": 49.468326568603516, |
|
"learning_rate": 1.027313356094443e-05, |
|
"log_odds_chosen": 3.117168426513672, |
|
"log_odds_ratio": -0.13647082448005676, |
|
"logps/chosen": -0.4620683789253235, |
|
"logps/rejected": -2.3730220794677734, |
|
"loss": 26.6823, |
|
"nll_loss": 0.7180877327919006, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.23103418946266174, |
|
"rewards/margins": 0.9554769396781921, |
|
"rewards/rejected": -1.1865110397338867, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.2324786324786325, |
|
"grad_norm": 48.83137893676758, |
|
"learning_rate": 8.710809420103789e-06, |
|
"log_odds_chosen": 3.0069823265075684, |
|
"log_odds_ratio": -0.1508590579032898, |
|
"logps/chosen": -0.3987279534339905, |
|
"logps/rejected": -2.156508684158325, |
|
"loss": 25.979, |
|
"nll_loss": 0.6839076280593872, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.19936397671699524, |
|
"rewards/margins": 0.8788902163505554, |
|
"rewards/rejected": -1.0782543420791626, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.3008547008547007, |
|
"grad_norm": 59.909706115722656, |
|
"learning_rate": 7.251993941883428e-06, |
|
"log_odds_chosen": 3.3163504600524902, |
|
"log_odds_ratio": -0.13661204278469086, |
|
"logps/chosen": -0.40769442915916443, |
|
"logps/rejected": -2.3925185203552246, |
|
"loss": 24.6734, |
|
"nll_loss": 0.6533316373825073, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.20384721457958221, |
|
"rewards/margins": 0.9924120903015137, |
|
"rewards/rejected": -1.1962592601776123, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.3692307692307693, |
|
"grad_norm": 52.876243591308594, |
|
"learning_rate": 5.905957079779187e-06, |
|
"log_odds_chosen": 3.2027995586395264, |
|
"log_odds_ratio": -0.14145739376544952, |
|
"logps/chosen": -0.4457460343837738, |
|
"logps/rejected": -2.37770414352417, |
|
"loss": 25.9488, |
|
"nll_loss": 0.6930140256881714, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2228730171918869, |
|
"rewards/margins": 0.9659790992736816, |
|
"rewards/rejected": -1.188852071762085, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.437606837606838, |
|
"grad_norm": 58.3582763671875, |
|
"learning_rate": 4.681252142486841e-06, |
|
"log_odds_chosen": 3.392512559890747, |
|
"log_odds_ratio": -0.13067595660686493, |
|
"logps/chosen": -0.4224206507205963, |
|
"logps/rejected": -2.505432605743408, |
|
"loss": 24.921, |
|
"nll_loss": 0.6670365333557129, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -0.21121032536029816, |
|
"rewards/margins": 1.0415061712265015, |
|
"rewards/rejected": -1.252716302871704, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.505982905982906, |
|
"grad_norm": 72.88838195800781, |
|
"learning_rate": 3.585661442426494e-06, |
|
"log_odds_chosen": 3.295175552368164, |
|
"log_odds_ratio": -0.14244017004966736, |
|
"logps/chosen": -0.46335524320602417, |
|
"logps/rejected": -2.5462584495544434, |
|
"loss": 27.0982, |
|
"nll_loss": 0.7326253056526184, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.23167762160301208, |
|
"rewards/margins": 1.0414518117904663, |
|
"rewards/rejected": -1.2731292247772217, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.574358974358974, |
|
"grad_norm": 53.10452651977539, |
|
"learning_rate": 2.6261468435155978e-06, |
|
"log_odds_chosen": 3.560279369354248, |
|
"log_odds_ratio": -0.1270817220211029, |
|
"logps/chosen": -0.3945046365261078, |
|
"logps/rejected": -2.5356345176696777, |
|
"loss": 24.711, |
|
"nll_loss": 0.6610320806503296, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1972523182630539, |
|
"rewards/margins": 1.070564866065979, |
|
"rewards/rejected": -1.2678172588348389, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.6427350427350427, |
|
"grad_norm": 60.35097885131836, |
|
"learning_rate": 1.8088055224315697e-06, |
|
"log_odds_chosen": 3.335855007171631, |
|
"log_odds_ratio": -0.1391274482011795, |
|
"logps/chosen": -0.4181729853153229, |
|
"logps/rejected": -2.4335877895355225, |
|
"loss": 25.7787, |
|
"nll_loss": 0.6885385513305664, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.20908649265766144, |
|
"rewards/margins": 1.0077073574066162, |
|
"rewards/rejected": -1.2167938947677612, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.7111111111111112, |
|
"grad_norm": 78.32765197753906, |
|
"learning_rate": 1.138831224476533e-06, |
|
"log_odds_chosen": 3.4376022815704346, |
|
"log_odds_ratio": -0.1307111382484436, |
|
"logps/chosen": -0.44153517484664917, |
|
"logps/rejected": -2.567284107208252, |
|
"loss": 25.5218, |
|
"nll_loss": 0.6872699856758118, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.22076758742332458, |
|
"rewards/margins": 1.062874436378479, |
|
"rewards/rejected": -1.283642053604126, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.7794871794871794, |
|
"grad_norm": 72.06491088867188, |
|
"learning_rate": 6.204812602412902e-07, |
|
"log_odds_chosen": 3.316706895828247, |
|
"log_odds_ratio": -0.12540897727012634, |
|
"logps/chosen": -0.4180404543876648, |
|
"logps/rejected": -2.4521901607513428, |
|
"loss": 25.071, |
|
"nll_loss": 0.6733669638633728, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.2090202271938324, |
|
"rewards/margins": 1.0170748233795166, |
|
"rewards/rejected": -1.2260950803756714, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.847863247863248, |
|
"grad_norm": 59.340885162353516, |
|
"learning_rate": 2.5704945278623436e-07, |
|
"log_odds_chosen": 3.219451904296875, |
|
"log_odds_ratio": -0.13932213187217712, |
|
"logps/chosen": -0.44474905729293823, |
|
"logps/rejected": -2.4706287384033203, |
|
"loss": 25.9818, |
|
"nll_loss": 0.6953645944595337, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.22237452864646912, |
|
"rewards/margins": 1.012939691543579, |
|
"rewards/rejected": -1.2353143692016602, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.916239316239316, |
|
"grad_norm": 90.88463592529297, |
|
"learning_rate": 5.0845207244715196e-08, |
|
"log_odds_chosen": 3.495487689971924, |
|
"log_odds_ratio": -0.12796048820018768, |
|
"logps/chosen": -0.4196755290031433, |
|
"logps/rejected": -2.5507898330688477, |
|
"loss": 24.7788, |
|
"nll_loss": 0.6663211584091187, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.20983776450157166, |
|
"rewards/margins": 1.0655572414398193, |
|
"rewards/rejected": -1.2753949165344238, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.970940170940171, |
|
"step": 219, |
|
"total_flos": 0.0, |
|
"train_loss": 35.7124394107627, |
|
"train_runtime": 3905.4181, |
|
"train_samples_per_second": 3.594, |
|
"train_steps_per_second": 0.056 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 219, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|