|
{ |
|
"best_metric": 0.2909594774246216, |
|
"best_model_checkpoint": "/data2/ckpts/GenRM/qwen-2.5-math-instruct/full/GenPRM-78k-train-5:5-decontamination/checkpoint-1200", |
|
"epoch": 0.9995883079456567, |
|
"eval_steps": 100, |
|
"global_step": 1214, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016467682173734045, |
|
"grad_norm": 2.2536449432373047, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.9706, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03293536434746809, |
|
"grad_norm": 0.7532587051391602, |
|
"learning_rate": 4.999919851200522e-06, |
|
"loss": 0.5433, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04940304652120214, |
|
"grad_norm": 0.6202924847602844, |
|
"learning_rate": 4.995290485881111e-06, |
|
"loss": 0.4319, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.5724588632583618, |
|
"learning_rate": 4.983551854245604e-06, |
|
"loss": 0.3985, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08233841086867023, |
|
"grad_norm": 0.6023426651954651, |
|
"learning_rate": 4.9647374005198125e-06, |
|
"loss": 0.3951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08233841086867023, |
|
"eval_loss": 0.37706664204597473, |
|
"eval_runtime": 38.4153, |
|
"eval_samples_per_second": 52.063, |
|
"eval_steps_per_second": 6.508, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09880609304240429, |
|
"grad_norm": 0.6422224640846252, |
|
"learning_rate": 4.938900728467664e-06, |
|
"loss": 0.381, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 0.5755812525749207, |
|
"learning_rate": 4.9061154486701204e-06, |
|
"loss": 0.3649, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.5953928232192993, |
|
"learning_rate": 4.866474968803222e-06, |
|
"loss": 0.3605, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14820913956360643, |
|
"grad_norm": 0.608504593372345, |
|
"learning_rate": 4.820092227512736e-06, |
|
"loss": 0.3514, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16467682173734047, |
|
"grad_norm": 0.6068744659423828, |
|
"learning_rate": 4.767099372643641e-06, |
|
"loss": 0.3471, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16467682173734047, |
|
"eval_loss": 0.3430534303188324, |
|
"eval_runtime": 38.4427, |
|
"eval_samples_per_second": 52.026, |
|
"eval_steps_per_second": 6.503, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1811445039110745, |
|
"grad_norm": 0.6543622612953186, |
|
"learning_rate": 4.707647384741187e-06, |
|
"loss": 0.3494, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.6283196210861206, |
|
"learning_rate": 4.641905646896205e-06, |
|
"loss": 0.3382, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2140798682585426, |
|
"grad_norm": 0.6050875186920166, |
|
"learning_rate": 4.570061462160209e-06, |
|
"loss": 0.3382, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 0.6354455947875977, |
|
"learning_rate": 4.492319519905217e-06, |
|
"loss": 0.3319, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24701523260601072, |
|
"grad_norm": 0.5852451920509338, |
|
"learning_rate": 4.408901312648652e-06, |
|
"loss": 0.3295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24701523260601072, |
|
"eval_loss": 0.3265990614891052, |
|
"eval_runtime": 38.4536, |
|
"eval_samples_per_second": 52.011, |
|
"eval_steps_per_second": 6.501, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 0.6466448307037354, |
|
"learning_rate": 4.3200445050048545e-06, |
|
"loss": 0.3322, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2799505969534788, |
|
"grad_norm": 0.6878635287284851, |
|
"learning_rate": 4.226002256561101e-06, |
|
"loss": 0.3336, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.29641827912721286, |
|
"grad_norm": 0.6180429458618164, |
|
"learning_rate": 4.127042500607298e-06, |
|
"loss": 0.3287, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.31288596130094687, |
|
"grad_norm": 0.6696975231170654, |
|
"learning_rate": 4.023447180774308e-06, |
|
"loss": 0.3208, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 0.6044342517852783, |
|
"learning_rate": 3.915511447755793e-06, |
|
"loss": 0.3162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"eval_loss": 0.3160727918148041, |
|
"eval_runtime": 38.4863, |
|
"eval_samples_per_second": 51.967, |
|
"eval_steps_per_second": 6.496, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 0.6135950088500977, |
|
"learning_rate": 3.803542818402154e-06, |
|
"loss": 0.3167, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.362289007822149, |
|
"grad_norm": 0.6029672026634216, |
|
"learning_rate": 3.687860299582381e-06, |
|
"loss": 0.3221, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3787566899958831, |
|
"grad_norm": 0.6770069003105164, |
|
"learning_rate": 3.568793479309998e-06, |
|
"loss": 0.319, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.6067743897438049, |
|
"learning_rate": 3.4466815877225456e-06, |
|
"loss": 0.3122, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.41169205434335115, |
|
"grad_norm": 0.5844372510910034, |
|
"learning_rate": 3.3218725305899402e-06, |
|
"loss": 0.3143, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41169205434335115, |
|
"eval_loss": 0.3084495961666107, |
|
"eval_runtime": 38.5709, |
|
"eval_samples_per_second": 51.853, |
|
"eval_steps_per_second": 6.482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4281597365170852, |
|
"grad_norm": 0.6176360249519348, |
|
"learning_rate": 3.194721898105323e-06, |
|
"loss": 0.3128, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4446274186908193, |
|
"grad_norm": 0.6112409830093384, |
|
"learning_rate": 3.0655919517824167e-06, |
|
"loss": 0.315, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.5909674763679504, |
|
"learning_rate": 2.9348505923458008e-06, |
|
"loss": 0.3179, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.47756278303828736, |
|
"grad_norm": 0.5776228904724121, |
|
"learning_rate": 2.802870311554661e-06, |
|
"loss": 0.3109, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.49403046521202143, |
|
"grad_norm": 0.5729939937591553, |
|
"learning_rate": 2.6700271309463397e-06, |
|
"loss": 0.3054, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49403046521202143, |
|
"eval_loss": 0.3028527796268463, |
|
"eval_runtime": 38.5353, |
|
"eval_samples_per_second": 51.9, |
|
"eval_steps_per_second": 6.488, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5104981473857555, |
|
"grad_norm": 0.6428977847099304, |
|
"learning_rate": 2.536699530523292e-06, |
|
"loss": 0.3051, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5269658295594895, |
|
"grad_norm": 0.652348518371582, |
|
"learning_rate": 2.4032673704356912e-06, |
|
"loss": 0.3006, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5434335117332235, |
|
"grad_norm": 0.665675699710846, |
|
"learning_rate": 2.2701108087318935e-06, |
|
"loss": 0.3068, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5599011939069576, |
|
"grad_norm": 0.568688154220581, |
|
"learning_rate": 2.1376092182601694e-06, |
|
"loss": 0.3079, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 0.6417534351348877, |
|
"learning_rate": 2.006140105807523e-06, |
|
"loss": 0.3031, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"eval_loss": 0.29845306277275085, |
|
"eval_runtime": 38.6545, |
|
"eval_samples_per_second": 51.74, |
|
"eval_steps_per_second": 6.468, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5928365582544257, |
|
"grad_norm": 0.6650199294090271, |
|
"learning_rate": 1.8760780365550654e-06, |
|
"loss": 0.304, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6093042404281598, |
|
"grad_norm": 0.6189924478530884, |
|
"learning_rate": 1.7477935669142319e-06, |
|
"loss": 0.3034, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6257719226018937, |
|
"grad_norm": 0.64354008436203, |
|
"learning_rate": 1.6216521887842863e-06, |
|
"loss": 0.306, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6422396047756278, |
|
"grad_norm": 0.5726205110549927, |
|
"learning_rate": 1.4980132882389836e-06, |
|
"loss": 0.2985, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"grad_norm": 0.6365756988525391, |
|
"learning_rate": 1.3772291216091954e-06, |
|
"loss": 0.2988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"eval_loss": 0.2953297197818756, |
|
"eval_runtime": 38.4039, |
|
"eval_samples_per_second": 52.078, |
|
"eval_steps_per_second": 6.51, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6751749691230959, |
|
"grad_norm": 0.6121465563774109, |
|
"learning_rate": 1.2596438118786732e-06, |
|
"loss": 0.3006, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 0.601615309715271, |
|
"learning_rate": 1.1455923682523476e-06, |
|
"loss": 0.298, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7081103334705641, |
|
"grad_norm": 0.5637004971504211, |
|
"learning_rate": 1.0353997316904204e-06, |
|
"loss": 0.3021, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.724578015644298, |
|
"grad_norm": 0.6115686893463135, |
|
"learning_rate": 9.293798491276612e-07, |
|
"loss": 0.3007, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7410456978180321, |
|
"grad_norm": 0.56075119972229, |
|
"learning_rate": 8.278347790154595e-07, |
|
"loss": 0.2965, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7410456978180321, |
|
"eval_loss": 0.2931554615497589, |
|
"eval_runtime": 38.421, |
|
"eval_samples_per_second": 52.055, |
|
"eval_steps_per_second": 6.507, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7575133799917662, |
|
"grad_norm": 0.5739229917526245, |
|
"learning_rate": 7.310538307350684e-07, |
|
"loss": 0.2949, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7739810621655002, |
|
"grad_norm": 0.6038621664047241, |
|
"learning_rate": 6.393127403338714e-07, |
|
"loss": 0.2999, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7904487443392343, |
|
"grad_norm": 0.6664382815361023, |
|
"learning_rate": 5.528728849330817e-07, |
|
"loss": 0.3014, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 0.5887951850891113, |
|
"learning_rate": 4.719805380450804e-07, |
|
"loss": 0.2979, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8233841086867023, |
|
"grad_norm": 0.584530234336853, |
|
"learning_rate": 3.9686616792204677e-07, |
|
"loss": 0.2935, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8233841086867023, |
|
"eval_loss": 0.29177162051200867, |
|
"eval_runtime": 38.3967, |
|
"eval_samples_per_second": 52.088, |
|
"eval_steps_per_second": 6.511, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8398517908604364, |
|
"grad_norm": 0.520577609539032, |
|
"learning_rate": 3.2774378093494067e-07, |
|
"loss": 0.2974, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8563194730341704, |
|
"grad_norm": 0.60713130235672, |
|
"learning_rate": 2.6481031185358995e-07, |
|
"loss": 0.3022, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8727871552079045, |
|
"grad_norm": 0.655536413192749, |
|
"learning_rate": 2.0824506276503898e-07, |
|
"loss": 0.2961, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8892548373816386, |
|
"grad_norm": 0.5735917091369629, |
|
"learning_rate": 1.5820919222869325e-07, |
|
"loss": 0.2983, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9057225195553725, |
|
"grad_norm": 0.5977383852005005, |
|
"learning_rate": 1.1484525612372372e-07, |
|
"loss": 0.2975, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9057225195553725, |
|
"eval_loss": 0.2911008894443512, |
|
"eval_runtime": 38.7911, |
|
"eval_samples_per_second": 51.558, |
|
"eval_steps_per_second": 6.445, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.602390468120575, |
|
"learning_rate": 7.827680149686879e-08, |
|
"loss": 0.299, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9386578839028407, |
|
"grad_norm": 0.5963403582572937, |
|
"learning_rate": 4.86080145678014e-08, |
|
"loss": 0.2954, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9551255660765747, |
|
"grad_norm": 0.5622375011444092, |
|
"learning_rate": 2.5923423894919365e-08, |
|
"loss": 0.2968, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9715932482503088, |
|
"grad_norm": 0.5845054984092712, |
|
"learning_rate": 1.02876595472573e-08, |
|
"loss": 0.292, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9880609304240429, |
|
"grad_norm": 0.6160269975662231, |
|
"learning_rate": 1.7452689686631164e-09, |
|
"loss": 0.304, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9880609304240429, |
|
"eval_loss": 0.2909594774246216, |
|
"eval_runtime": 38.9593, |
|
"eval_samples_per_second": 51.336, |
|
"eval_steps_per_second": 6.417, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9995883079456567, |
|
"step": 1214, |
|
"total_flos": 5.587165226987422e+18, |
|
"train_loss": 0.332974739875982, |
|
"train_runtime": 7444.239, |
|
"train_samples_per_second": 10.439, |
|
"train_steps_per_second": 0.163 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1214, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.587165226987422e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|