{ "best_metric": 0.47667405009269714, "best_model_checkpoint": "output_pipe/tf1/origin/checkpoint-400", "epoch": 4.0, "eval_steps": 200, "global_step": 1920, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20833333333333334, "grad_norm": 1.63341224193573, "learning_rate": 2.9197860962566846e-05, "loss": 0.5662, "step": 100 }, { "epoch": 0.4166666666666667, "grad_norm": 1.3856669664382935, "learning_rate": 2.7593582887700534e-05, "loss": 0.4946, "step": 200 }, { "epoch": 0.4166666666666667, "eval_accuracy": 0.762, "eval_f1": 0.7618132615970921, "eval_loss": 0.49597057700157166, "eval_matthews_correlation": 0.5242370258299112, "eval_precision": 0.7624125986272372, "eval_recall": 0.7618247567844342, "eval_runtime": 0.2589, "eval_samples_per_second": 3862.489, "eval_steps_per_second": 61.8, "step": 200 }, { "epoch": 0.625, "grad_norm": 1.5926254987716675, "learning_rate": 2.5989304812834225e-05, "loss": 0.4657, "step": 300 }, { "epoch": 0.8333333333333334, "grad_norm": 1.4355705976486206, "learning_rate": 2.4385026737967917e-05, "loss": 0.4635, "step": 400 }, { "epoch": 0.8333333333333334, "eval_accuracy": 0.763, "eval_f1": 0.762971319529663, "eval_loss": 0.47667405009269714, "eval_matthews_correlation": 0.5264938160370651, "eval_precision": 0.7633570213191873, "eval_recall": 0.7631368407578085, "eval_runtime": 0.2576, "eval_samples_per_second": 3881.803, "eval_steps_per_second": 62.109, "step": 400 }, { "epoch": 1.0416666666666667, "grad_norm": 2.168917179107666, "learning_rate": 2.2780748663101604e-05, "loss": 0.4364, "step": 500 }, { "epoch": 1.25, "grad_norm": 3.3078157901763916, "learning_rate": 2.1176470588235296e-05, "loss": 0.3744, "step": 600 }, { "epoch": 1.25, "eval_accuracy": 0.754, "eval_f1": 0.749902400936951, "eval_loss": 0.5560281276702881, "eval_matthews_correlation": 0.5223504030579328, "eval_precision": 0.7695887888707038, "eval_recall": 0.7530241935483871, "eval_runtime": 0.2573, "eval_samples_per_second": 3886.497, "eval_steps_per_second": 62.184, "step": 600 }, { "epoch": 1.4583333333333333, "grad_norm": 3.278846263885498, "learning_rate": 1.9572192513368987e-05, "loss": 0.3754, "step": 700 }, { "epoch": 1.6666666666666665, "grad_norm": 2.1275076866149902, "learning_rate": 1.796791443850267e-05, "loss": 0.3609, "step": 800 }, { "epoch": 1.6666666666666665, "eval_accuracy": 0.765, "eval_f1": 0.7644344070112339, "eval_loss": 0.5123264789581299, "eval_matthews_correlation": 0.5338727577972531, "eval_precision": 0.7684401940200087, "eval_recall": 0.7654409882232462, "eval_runtime": 0.2578, "eval_samples_per_second": 3879.096, "eval_steps_per_second": 62.066, "step": 800 }, { "epoch": 1.875, "grad_norm": 3.509340763092041, "learning_rate": 1.6363636363636363e-05, "loss": 0.3541, "step": 900 }, { "epoch": 2.0833333333333335, "grad_norm": 5.369084358215332, "learning_rate": 1.4759358288770054e-05, "loss": 0.28, "step": 1000 }, { "epoch": 2.0833333333333335, "eval_accuracy": 0.748, "eval_f1": 0.7478547643442623, "eval_loss": 0.6457599997520447, "eval_matthews_correlation": 0.4972441781985867, "eval_precision": 0.7490048793014894, "eval_recall": 0.7482398873527906, "eval_runtime": 0.2579, "eval_samples_per_second": 3878.146, "eval_steps_per_second": 62.05, "step": 1000 }, { "epoch": 2.2916666666666665, "grad_norm": 6.104151725769043, "learning_rate": 1.3155080213903743e-05, "loss": 0.1573, "step": 1100 }, { "epoch": 2.5, "grad_norm": 6.101887226104736, "learning_rate": 1.1550802139037433e-05, "loss": 0.1544, "step": 1200 }, { "epoch": 2.5, "eval_accuracy": 0.76, "eval_f1": 0.7582945261767029, "eval_loss": 0.6890341639518738, "eval_matthews_correlation": 0.5255066950532729, "eval_precision": 0.7661740558292283, "eval_recall": 0.7593766001024065, "eval_runtime": 0.258, "eval_samples_per_second": 3876.157, "eval_steps_per_second": 62.019, "step": 1200 }, { "epoch": 2.7083333333333335, "grad_norm": 4.268365383148193, "learning_rate": 9.946524064171122e-06, "loss": 0.1521, "step": 1300 }, { "epoch": 2.9166666666666665, "grad_norm": 12.284384727478027, "learning_rate": 8.342245989304813e-06, "loss": 0.1441, "step": 1400 }, { "epoch": 2.9166666666666665, "eval_accuracy": 0.76, "eval_f1": 0.7599039615846338, "eval_loss": 0.7288458347320557, "eval_matthews_correlation": 0.5200269546030198, "eval_precision": 0.7601383817028637, "eval_recall": 0.7598886328725039, "eval_runtime": 0.257, "eval_samples_per_second": 3891.459, "eval_steps_per_second": 62.263, "step": 1400 }, { "epoch": 3.125, "grad_norm": 3.6564340591430664, "learning_rate": 6.737967914438503e-06, "loss": 0.0714, "step": 1500 }, { "epoch": 3.3333333333333335, "grad_norm": 1.877123236656189, "learning_rate": 5.1336898395721925e-06, "loss": 0.0287, "step": 1600 }, { "epoch": 3.3333333333333335, "eval_accuracy": 0.753, "eval_f1": 0.7529938248456212, "eval_loss": 1.072191596031189, "eval_matthews_correlation": 0.5060012042686535, "eval_precision": 0.7529930119720478, "eval_recall": 0.7530081925243215, "eval_runtime": 0.2579, "eval_samples_per_second": 3877.949, "eval_steps_per_second": 62.047, "step": 1600 }, { "epoch": 3.5416666666666665, "grad_norm": 3.2252037525177, "learning_rate": 3.5454545454545454e-06, "loss": 0.0299, "step": 1700 }, { "epoch": 3.75, "grad_norm": 0.10440269112586975, "learning_rate": 1.9411764705882357e-06, "loss": 0.0262, "step": 1800 }, { "epoch": 3.75, "eval_accuracy": 0.754, "eval_f1": 0.7539842549923195, "eval_loss": 1.1535484790802002, "eval_matthews_correlation": 0.5083546901639514, "eval_precision": 0.754242443647541, "eval_recall": 0.7541122631848438, "eval_runtime": 0.257, "eval_samples_per_second": 3890.936, "eval_steps_per_second": 62.255, "step": 1800 }, { "epoch": 3.9583333333333335, "grad_norm": 0.09924239665269852, "learning_rate": 3.368983957219252e-07, "loss": 0.0214, "step": 1900 }, { "epoch": 4.0, "step": 1920, "total_flos": 8259439257809280.0, "train_loss": 0.2583618890028447, "train_runtime": 166.3128, "train_samples_per_second": 737.694, "train_steps_per_second": 11.545 } ], "logging_steps": 100, "max_steps": 1920, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8259439257809280.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }