|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4981684981684982, |
|
"eval_steps": 500, |
|
"global_step": 238, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020931449502878076, |
|
"grad_norm": 26.601607092770625, |
|
"learning_rate": 2.083333333333333e-08, |
|
"logits/chosen": -2.9139022827148438, |
|
"logits/rejected": -2.8787596225738525, |
|
"logps/chosen": -325.8533020019531, |
|
"logps/rejected": -372.9187927246094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 16.144681025948177, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.730161428451538, |
|
"logits/rejected": -2.7064931392669678, |
|
"logps/chosen": -331.04364013671875, |
|
"logps/rejected": -309.8411865234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.001100256573408842, |
|
"rewards/margins": 0.0007432710262946784, |
|
"rewards/rejected": 0.0003569853724911809, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 14.575580508073863, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.798081874847412, |
|
"logits/rejected": -2.7473387718200684, |
|
"logps/chosen": -321.3909912109375, |
|
"logps/rejected": -299.4921569824219, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.006536015775054693, |
|
"rewards/margins": 0.0068586282432079315, |
|
"rewards/rejected": -0.0003226128756068647, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 8.070909879894359, |
|
"learning_rate": 4.99030821197584e-07, |
|
"logits/chosen": -2.7842161655426025, |
|
"logits/rejected": -2.728454113006592, |
|
"logps/chosen": -304.2439880371094, |
|
"logps/rejected": -275.6659851074219, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04330765828490257, |
|
"rewards/margins": 0.05840452387928963, |
|
"rewards/rejected": -0.015096860937774181, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 8.530980057951403, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.7036166191101074, |
|
"logits/rejected": -2.671600103378296, |
|
"logps/chosen": -289.6343078613281, |
|
"logps/rejected": -277.79425048828125, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.02837621606886387, |
|
"rewards/margins": 0.037141989916563034, |
|
"rewards/rejected": -0.008765773847699165, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 11.721866187252868, |
|
"learning_rate": 4.820092227512735e-07, |
|
"logits/chosen": -2.740696430206299, |
|
"logits/rejected": -2.6848952770233154, |
|
"logps/chosen": -290.8323059082031, |
|
"logps/rejected": -310.22576904296875, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.014719474129378796, |
|
"rewards/margins": 0.10523072630167007, |
|
"rewards/rejected": -0.11995019763708115, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 10.915648900872057, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -2.785388469696045, |
|
"logits/rejected": -2.744814395904541, |
|
"logps/chosen": -268.2928466796875, |
|
"logps/rejected": -272.32122802734375, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.02511899545788765, |
|
"rewards/margins": 0.14482316374778748, |
|
"rewards/rejected": -0.16994217038154602, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 11.152537838170248, |
|
"learning_rate": 4.4513054666826144e-07, |
|
"logits/chosen": -2.745926856994629, |
|
"logits/rejected": -2.7201263904571533, |
|
"logps/chosen": -332.7641296386719, |
|
"logps/rejected": -326.02862548828125, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12678995728492737, |
|
"rewards/margins": 0.15600599348545074, |
|
"rewards/rejected": -0.2827959656715393, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 20.747423551686875, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -2.8221614360809326, |
|
"logits/rejected": -2.75636887550354, |
|
"logps/chosen": -364.9082336425781, |
|
"logps/rejected": -309.4717102050781, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.07667700946331024, |
|
"rewards/margins": 0.23987922072410583, |
|
"rewards/rejected": -0.3165562152862549, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 12.643373189764793, |
|
"learning_rate": 3.9155114477557926e-07, |
|
"logits/chosen": -2.7849178314208984, |
|
"logits/rejected": -2.748291492462158, |
|
"logps/chosen": -287.956787109375, |
|
"logps/rejected": -340.713134765625, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.24098113179206848, |
|
"rewards/margins": 0.3181685507297516, |
|
"rewards/rejected": -0.5591496825218201, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 13.623537820936637, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -2.76792311668396, |
|
"logits/rejected": -2.7459118366241455, |
|
"logps/chosen": -346.0640563964844, |
|
"logps/rejected": -348.5198059082031, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2652357220649719, |
|
"rewards/margins": 0.34126365184783936, |
|
"rewards/rejected": -0.6064993739128113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 15.41884704737969, |
|
"learning_rate": 3.2585674142717477e-07, |
|
"logits/chosen": -2.7856485843658447, |
|
"logits/rejected": -2.7417104244232178, |
|
"logps/chosen": -335.01116943359375, |
|
"logps/rejected": -349.71392822265625, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.32472461462020874, |
|
"rewards/margins": 0.3165324330329895, |
|
"rewards/rejected": -0.6412570476531982, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 15.439444003212843, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -2.710737705230713, |
|
"logits/rejected": -2.718116283416748, |
|
"logps/chosen": -330.0270690917969, |
|
"logps/rejected": -354.4695739746094, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3180462718009949, |
|
"rewards/margins": 0.3863913416862488, |
|
"rewards/rejected": -0.7044375538825989, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 42.02069053016682, |
|
"learning_rate": 2.536699530523291e-07, |
|
"logits/chosen": -2.7691874504089355, |
|
"logits/rejected": -2.752965211868286, |
|
"logps/chosen": -347.0602722167969, |
|
"logps/rejected": -357.85504150390625, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2763240337371826, |
|
"rewards/margins": 0.27957138419151306, |
|
"rewards/rejected": -0.5558954477310181, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 24.79876226607902, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -2.76993989944458, |
|
"logits/rejected": -2.750406265258789, |
|
"logps/chosen": -360.31005859375, |
|
"logps/rejected": -362.851806640625, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.26679104566574097, |
|
"rewards/margins": 0.46773427724838257, |
|
"rewards/rejected": -0.7345253229141235, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 22.26447349378322, |
|
"learning_rate": 1.8116906275593507e-07, |
|
"logits/chosen": -2.7364563941955566, |
|
"logits/rejected": -2.7079169750213623, |
|
"logps/chosen": -351.19189453125, |
|
"logps/rejected": -349.318603515625, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.43332210183143616, |
|
"rewards/margins": 0.3800078332424164, |
|
"rewards/rejected": -0.8133300542831421, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 26.67231794642688, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -2.730668544769287, |
|
"logits/rejected": -2.683061122894287, |
|
"logps/chosen": -329.1626281738281, |
|
"logps/rejected": -350.14288330078125, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4029548764228821, |
|
"rewards/margins": 0.5021006464958191, |
|
"rewards/rejected": -0.9050555229187012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 20.223571694588983, |
|
"learning_rate": 1.1455923682523475e-07, |
|
"logits/chosen": -2.532707691192627, |
|
"logits/rejected": -2.512026309967041, |
|
"logps/chosen": -324.9989929199219, |
|
"logps/rejected": -334.66888427734375, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.36262771487236023, |
|
"rewards/margins": 0.39737457036972046, |
|
"rewards/rejected": -0.7600023150444031, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 18.84156651008044, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -2.5160155296325684, |
|
"logits/rejected": -2.4737634658813477, |
|
"logps/chosen": -328.61224365234375, |
|
"logps/rejected": -344.9612121582031, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.454254150390625, |
|
"rewards/margins": 0.40306931734085083, |
|
"rewards/rejected": -0.8573234677314758, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 32.06520543659375, |
|
"learning_rate": 5.9541440373546445e-08, |
|
"logits/chosen": -2.3760242462158203, |
|
"logits/rejected": -2.352184772491455, |
|
"logps/chosen": -323.0282897949219, |
|
"logps/rejected": -362.2243957519531, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5423271059989929, |
|
"rewards/margins": 0.3743038773536682, |
|
"rewards/rejected": -0.9166310429573059, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 23.15044859855663, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -2.311206817626953, |
|
"logits/rejected": -2.2949588298797607, |
|
"logps/chosen": -342.80731201171875, |
|
"logps/rejected": -394.5885314941406, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5416313409805298, |
|
"rewards/margins": 0.41420310735702515, |
|
"rewards/rejected": -0.9558345079421997, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 19.06533234677666, |
|
"learning_rate": 2.0824506276503894e-08, |
|
"logits/chosen": -2.3925139904022217, |
|
"logits/rejected": -2.268831491470337, |
|
"logps/chosen": -395.4692077636719, |
|
"logps/rejected": -360.0453186035156, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5327693223953247, |
|
"rewards/margins": 0.4689061641693115, |
|
"rewards/rejected": -1.0016754865646362, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 24.03298915942009, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -2.316939353942871, |
|
"logits/rejected": -2.2927820682525635, |
|
"logps/chosen": -342.931640625, |
|
"logps/rejected": -364.62982177734375, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.501736581325531, |
|
"rewards/margins": 0.49570217728614807, |
|
"rewards/rejected": -0.9974387884140015, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 19.987346780573713, |
|
"learning_rate": 1.722118176089915e-09, |
|
"logits/chosen": -2.4182724952697754, |
|
"logits/rejected": -2.337088108062744, |
|
"logps/chosen": -337.9970703125, |
|
"logps/rejected": -381.84320068359375, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.39490121603012085, |
|
"rewards/margins": 0.7370277643203735, |
|
"rewards/rejected": -1.1319290399551392, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4981684981684982, |
|
"eval_logits/chosen": -2.3480114936828613, |
|
"eval_logits/rejected": -2.3013908863067627, |
|
"eval_logps/chosen": -315.5382385253906, |
|
"eval_logps/rejected": -372.7966003417969, |
|
"eval_loss": 0.5668980479240417, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -0.4034212827682495, |
|
"eval_rewards/margins": 0.5594114065170288, |
|
"eval_rewards/rejected": -0.9628326892852783, |
|
"eval_runtime": 171.8973, |
|
"eval_samples_per_second": 11.635, |
|
"eval_steps_per_second": 0.186, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4981684981684982, |
|
"step": 238, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6061525304778284, |
|
"train_runtime": 7204.502, |
|
"train_samples_per_second": 4.228, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 238, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|