me5-instruct-afri-HQ-hn-m3-save-steps-500-high-threshold-prompt-qe-pos-only-6500
/
trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.658586374075019, | |
"eval_steps": 500, | |
"global_step": 6500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.025516713447307986, | |
"grad_norm": 22.250633239746094, | |
"learning_rate": 1.1862244897959185e-06, | |
"loss": 2.3912, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.05103342689461597, | |
"grad_norm": 18.358823776245117, | |
"learning_rate": 2.461734693877551e-06, | |
"loss": 0.9986, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.07655014034192396, | |
"grad_norm": 72.52508544921875, | |
"learning_rate": 3.737244897959184e-06, | |
"loss": 0.8566, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.10206685378923194, | |
"grad_norm": 23.034740447998047, | |
"learning_rate": 5.012755102040817e-06, | |
"loss": 0.7413, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.12758356723653994, | |
"grad_norm": 45.18895721435547, | |
"learning_rate": 6.288265306122449e-06, | |
"loss": 0.7472, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.15310028068384793, | |
"grad_norm": 49.649410247802734, | |
"learning_rate": 7.563775510204082e-06, | |
"loss": 0.7018, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.17861699413115592, | |
"grad_norm": 37.48847198486328, | |
"learning_rate": 8.839285714285714e-06, | |
"loss": 0.6664, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.20413370757846389, | |
"grad_norm": 41.822566986083984, | |
"learning_rate": 9.987241281542388e-06, | |
"loss": 0.6929, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.22965042102577188, | |
"grad_norm": 31.396697998046875, | |
"learning_rate": 9.84547774312447e-06, | |
"loss": 0.6417, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.25516713447307987, | |
"grad_norm": 18.564287185668945, | |
"learning_rate": 9.70371420470655e-06, | |
"loss": 0.6491, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.28068384792038786, | |
"grad_norm": 40.36125946044922, | |
"learning_rate": 9.561950666288631e-06, | |
"loss": 0.6364, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.30620056136769586, | |
"grad_norm": 22.446331024169922, | |
"learning_rate": 9.420187127870712e-06, | |
"loss": 0.6668, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.33171727481500385, | |
"grad_norm": 19.624980926513672, | |
"learning_rate": 9.278423589452793e-06, | |
"loss": 0.6713, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 0.35723398826231184, | |
"grad_norm": 14.320337295532227, | |
"learning_rate": 9.136660051034874e-06, | |
"loss": 0.5721, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 0.3827507017096198, | |
"grad_norm": 25.894556045532227, | |
"learning_rate": 8.994896512616955e-06, | |
"loss": 0.597, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.40826741515692777, | |
"grad_norm": 40.72581100463867, | |
"learning_rate": 8.853132974199036e-06, | |
"loss": 0.594, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 0.43378412860423576, | |
"grad_norm": 21.20204734802246, | |
"learning_rate": 8.711369435781117e-06, | |
"loss": 0.5882, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 0.45930084205154376, | |
"grad_norm": 49.4229850769043, | |
"learning_rate": 8.5696058973632e-06, | |
"loss": 0.6018, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 0.48481755549885175, | |
"grad_norm": 21.961654663085938, | |
"learning_rate": 8.42784235894528e-06, | |
"loss": 0.5872, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 0.5103342689461597, | |
"grad_norm": 15.664958000183105, | |
"learning_rate": 8.286078820527362e-06, | |
"loss": 0.5802, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.5358509823934677, | |
"grad_norm": 32.18100357055664, | |
"learning_rate": 8.144315282109441e-06, | |
"loss": 0.5614, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 0.5613676958407757, | |
"grad_norm": 14.913948059082031, | |
"learning_rate": 8.002551743691524e-06, | |
"loss": 0.5166, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 0.5868844092880837, | |
"grad_norm": 14.455422401428223, | |
"learning_rate": 7.860788205273603e-06, | |
"loss": 0.5722, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 0.6124011227353917, | |
"grad_norm": 49.46743392944336, | |
"learning_rate": 7.719024666855686e-06, | |
"loss": 0.5289, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 0.6379178361826997, | |
"grad_norm": 16.0176944732666, | |
"learning_rate": 7.577261128437766e-06, | |
"loss": 0.5803, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 0.6634345496300077, | |
"grad_norm": 11.620992660522461, | |
"learning_rate": 7.435497590019848e-06, | |
"loss": 0.5355, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 0.6889512630773157, | |
"grad_norm": 24.707399368286133, | |
"learning_rate": 7.293734051601928e-06, | |
"loss": 0.5694, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 0.7144679765246237, | |
"grad_norm": 35.875511169433594, | |
"learning_rate": 7.15197051318401e-06, | |
"loss": 0.569, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 0.7399846899719316, | |
"grad_norm": 32.23057556152344, | |
"learning_rate": 7.010206974766091e-06, | |
"loss": 0.5016, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 0.7655014034192396, | |
"grad_norm": 24.113296508789062, | |
"learning_rate": 6.868443436348172e-06, | |
"loss": 0.5586, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.7910181168665475, | |
"grad_norm": 25.87450408935547, | |
"learning_rate": 6.726679897930253e-06, | |
"loss": 0.5154, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 0.8165348303138555, | |
"grad_norm": 32.350250244140625, | |
"learning_rate": 6.584916359512335e-06, | |
"loss": 0.5318, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 0.8420515437611635, | |
"grad_norm": 22.004989624023438, | |
"learning_rate": 6.443152821094415e-06, | |
"loss": 0.5461, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 0.8675682572084715, | |
"grad_norm": 39.10634994506836, | |
"learning_rate": 6.301389282676497e-06, | |
"loss": 0.4885, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 0.8930849706557795, | |
"grad_norm": 24.000411987304688, | |
"learning_rate": 6.159625744258577e-06, | |
"loss": 0.573, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 0.9186016841030875, | |
"grad_norm": 15.122103691101074, | |
"learning_rate": 6.017862205840658e-06, | |
"loss": 0.5145, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 0.9441183975503955, | |
"grad_norm": 29.39118003845215, | |
"learning_rate": 5.87609866742274e-06, | |
"loss": 0.5214, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 0.9696351109977035, | |
"grad_norm": 21.783594131469727, | |
"learning_rate": 5.73433512900482e-06, | |
"loss": 0.493, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 0.9951518244450115, | |
"grad_norm": 6.728275299072266, | |
"learning_rate": 5.592571590586902e-06, | |
"loss": 0.5413, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 1.0206685378923195, | |
"grad_norm": 23.733306884765625, | |
"learning_rate": 5.450808052168982e-06, | |
"loss": 0.442, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 1.0461852513396275, | |
"grad_norm": 27.818904876708984, | |
"learning_rate": 5.309044513751064e-06, | |
"loss": 0.4068, | |
"step": 4100 | |
}, | |
{ | |
"epoch": 1.0717019647869355, | |
"grad_norm": 27.924034118652344, | |
"learning_rate": 5.167280975333145e-06, | |
"loss": 0.4142, | |
"step": 4200 | |
}, | |
{ | |
"epoch": 1.0972186782342435, | |
"grad_norm": 30.92214012145996, | |
"learning_rate": 5.025517436915226e-06, | |
"loss": 0.4112, | |
"step": 4300 | |
}, | |
{ | |
"epoch": 1.1227353916815515, | |
"grad_norm": 31.626100540161133, | |
"learning_rate": 4.8837538984973074e-06, | |
"loss": 0.4168, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 1.1482521051288594, | |
"grad_norm": 37.9410514831543, | |
"learning_rate": 4.7419903600793884e-06, | |
"loss": 0.399, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 1.1737688185761674, | |
"grad_norm": 23.123756408691406, | |
"learning_rate": 4.6002268216614694e-06, | |
"loss": 0.4104, | |
"step": 4600 | |
}, | |
{ | |
"epoch": 1.1992855320234754, | |
"grad_norm": 16.32610511779785, | |
"learning_rate": 4.45846328324355e-06, | |
"loss": 0.4082, | |
"step": 4700 | |
}, | |
{ | |
"epoch": 1.2248022454707834, | |
"grad_norm": 28.599788665771484, | |
"learning_rate": 4.316699744825631e-06, | |
"loss": 0.4197, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 1.2503189589180914, | |
"grad_norm": 27.3553524017334, | |
"learning_rate": 4.174936206407712e-06, | |
"loss": 0.4162, | |
"step": 4900 | |
}, | |
{ | |
"epoch": 1.2758356723653994, | |
"grad_norm": 30.498682022094727, | |
"learning_rate": 4.033172667989793e-06, | |
"loss": 0.4178, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 1.3013523858127074, | |
"grad_norm": 33.331581115722656, | |
"learning_rate": 3.891409129571874e-06, | |
"loss": 0.3608, | |
"step": 5100 | |
}, | |
{ | |
"epoch": 1.3268690992600152, | |
"grad_norm": 32.784950256347656, | |
"learning_rate": 3.7496455911539554e-06, | |
"loss": 0.4207, | |
"step": 5200 | |
}, | |
{ | |
"epoch": 1.3523858127073232, | |
"grad_norm": 31.74955177307129, | |
"learning_rate": 3.6078820527360364e-06, | |
"loss": 0.3906, | |
"step": 5300 | |
}, | |
{ | |
"epoch": 1.3779025261546312, | |
"grad_norm": 23.00249671936035, | |
"learning_rate": 3.4661185143181174e-06, | |
"loss": 0.3973, | |
"step": 5400 | |
}, | |
{ | |
"epoch": 1.4034192396019392, | |
"grad_norm": 31.479957580566406, | |
"learning_rate": 3.324354975900199e-06, | |
"loss": 0.4064, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 1.4289359530492471, | |
"grad_norm": 8.337751388549805, | |
"learning_rate": 3.18259143748228e-06, | |
"loss": 0.3594, | |
"step": 5600 | |
}, | |
{ | |
"epoch": 1.4544526664965551, | |
"grad_norm": 27.91620445251465, | |
"learning_rate": 3.040827899064361e-06, | |
"loss": 0.4218, | |
"step": 5700 | |
}, | |
{ | |
"epoch": 1.4799693799438631, | |
"grad_norm": 26.840124130249023, | |
"learning_rate": 2.899064360646442e-06, | |
"loss": 0.368, | |
"step": 5800 | |
}, | |
{ | |
"epoch": 1.5054860933911711, | |
"grad_norm": 38.38251876831055, | |
"learning_rate": 2.757300822228523e-06, | |
"loss": 0.3994, | |
"step": 5900 | |
}, | |
{ | |
"epoch": 1.5310028068384791, | |
"grad_norm": 31.26285743713379, | |
"learning_rate": 2.615537283810604e-06, | |
"loss": 0.3785, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 1.556519520285787, | |
"grad_norm": 17.311967849731445, | |
"learning_rate": 2.473773745392685e-06, | |
"loss": 0.3602, | |
"step": 6100 | |
}, | |
{ | |
"epoch": 1.582036233733095, | |
"grad_norm": 10.20802116394043, | |
"learning_rate": 2.332010206974766e-06, | |
"loss": 0.3891, | |
"step": 6200 | |
}, | |
{ | |
"epoch": 1.607552947180403, | |
"grad_norm": 15.230350494384766, | |
"learning_rate": 2.1902466685568476e-06, | |
"loss": 0.3718, | |
"step": 6300 | |
}, | |
{ | |
"epoch": 1.633069660627711, | |
"grad_norm": 27.391233444213867, | |
"learning_rate": 2.0484831301389286e-06, | |
"loss": 0.3699, | |
"step": 6400 | |
}, | |
{ | |
"epoch": 1.658586374075019, | |
"grad_norm": 22.2066707611084, | |
"learning_rate": 1.9067195917210096e-06, | |
"loss": 0.3702, | |
"step": 6500 | |
} | |
], | |
"logging_steps": 100, | |
"max_steps": 7838, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 100, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |