{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03125, "grad_norm": 27.536455154418945, "learning_rate": 2e-05, "loss": 5.269, "mean_token_accuracy": 0.193113774061203, "num_tokens": 1320.0, "step": 1 }, { "epoch": 1.0, "grad_norm": 10.073838233947754, "learning_rate": 1.8062500000000002e-05, "loss": 2.9886, "mean_token_accuracy": 0.4425700907745669, "num_tokens": 49123.0, "step": 32 }, { "epoch": 2.0, "grad_norm": 72.79724884033203, "learning_rate": 1.60625e-05, "loss": 1.7728, "mean_token_accuracy": 0.6223073434084654, "num_tokens": 98246.0, "step": 64 }, { "epoch": 3.0, "grad_norm": 23.59392738342285, "learning_rate": 1.4062500000000001e-05, "loss": 1.0599, "mean_token_accuracy": 0.7586388783529401, "num_tokens": 147369.0, "step": 96 }, { "epoch": 4.0, "grad_norm": 33.94417190551758, "learning_rate": 1.2062500000000002e-05, "loss": 0.5248, "mean_token_accuracy": 0.8688167557120323, "num_tokens": 196492.0, "step": 128 }, { "epoch": 5.0, "grad_norm": 28.460622787475586, "learning_rate": 1.0062500000000002e-05, "loss": 0.2783, "mean_token_accuracy": 0.9295041039586067, "num_tokens": 245615.0, "step": 160 }, { "epoch": 6.0, "grad_norm": 13.504148483276367, "learning_rate": 8.062500000000001e-06, "loss": 0.1414, "mean_token_accuracy": 0.9666158780455589, "num_tokens": 294738.0, "step": 192 }, { "epoch": 7.0, "grad_norm": 1.4141818284988403, "learning_rate": 6.0625e-06, "loss": 0.061, "mean_token_accuracy": 0.9868465270847082, "num_tokens": 343861.0, "step": 224 }, { "epoch": 8.0, "grad_norm": 6.186761856079102, "learning_rate": 4.0625000000000005e-06, "loss": 0.0437, "mean_token_accuracy": 0.9876908175647259, "num_tokens": 392984.0, "step": 256 }, { "epoch": 9.0, "grad_norm": 0.0, "learning_rate": 2.0625e-06, "loss": 0.0267, "mean_token_accuracy": 0.9606593921780586, "num_tokens": 442107.0, "step": 288 }, { "epoch": 10.0, "grad_norm": 0.08612818270921707, "learning_rate": 6.250000000000001e-08, "loss": 0.017, "mean_token_accuracy": 0.993696304038167, "num_tokens": 491230.0, "step": 320 }, { "epoch": 10.0, "step": 320, "total_flos": 3034108976431104.0, "train_loss": 0.6985519178211689, "train_runtime": 385.0713, "train_samples_per_second": 6.466, "train_steps_per_second": 0.831 } ], "logging_steps": 10, "max_steps": 320, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3034108976431104.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }