{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0206685378923195,
  "eval_steps": 500,
  "global_step": 4000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.025516713447307986,
      "grad_norm": 22.250633239746094,
      "learning_rate": 1.1862244897959185e-06,
      "loss": 2.3912,
      "step": 100
    },
    {
      "epoch": 0.05103342689461597,
      "grad_norm": 18.358823776245117,
      "learning_rate": 2.461734693877551e-06,
      "loss": 0.9986,
      "step": 200
    },
    {
      "epoch": 0.07655014034192396,
      "grad_norm": 72.52508544921875,
      "learning_rate": 3.737244897959184e-06,
      "loss": 0.8566,
      "step": 300
    },
    {
      "epoch": 0.10206685378923194,
      "grad_norm": 23.034740447998047,
      "learning_rate": 5.012755102040817e-06,
      "loss": 0.7413,
      "step": 400
    },
    {
      "epoch": 0.12758356723653994,
      "grad_norm": 45.18895721435547,
      "learning_rate": 6.288265306122449e-06,
      "loss": 0.7472,
      "step": 500
    },
    {
      "epoch": 0.15310028068384793,
      "grad_norm": 49.649410247802734,
      "learning_rate": 7.563775510204082e-06,
      "loss": 0.7018,
      "step": 600
    },
    {
      "epoch": 0.17861699413115592,
      "grad_norm": 37.48847198486328,
      "learning_rate": 8.839285714285714e-06,
      "loss": 0.6664,
      "step": 700
    },
    {
      "epoch": 0.20413370757846389,
      "grad_norm": 41.822566986083984,
      "learning_rate": 9.987241281542388e-06,
      "loss": 0.6929,
      "step": 800
    },
    {
      "epoch": 0.22965042102577188,
      "grad_norm": 31.396697998046875,
      "learning_rate": 9.84547774312447e-06,
      "loss": 0.6417,
      "step": 900
    },
    {
      "epoch": 0.25516713447307987,
      "grad_norm": 18.564287185668945,
      "learning_rate": 9.70371420470655e-06,
      "loss": 0.6491,
      "step": 1000
    },
    {
      "epoch": 0.28068384792038786,
      "grad_norm": 40.36125946044922,
      "learning_rate": 9.561950666288631e-06,
      "loss": 0.6364,
      "step": 1100
    },
    {
      "epoch": 0.30620056136769586,
      "grad_norm": 22.446331024169922,
      "learning_rate": 9.420187127870712e-06,
      "loss": 0.6668,
      "step": 1200
    },
    {
      "epoch": 0.33171727481500385,
      "grad_norm": 19.624980926513672,
      "learning_rate": 9.278423589452793e-06,
      "loss": 0.6713,
      "step": 1300
    },
    {
      "epoch": 0.35723398826231184,
      "grad_norm": 14.320337295532227,
      "learning_rate": 9.136660051034874e-06,
      "loss": 0.5721,
      "step": 1400
    },
    {
      "epoch": 0.3827507017096198,
      "grad_norm": 25.894556045532227,
      "learning_rate": 8.994896512616955e-06,
      "loss": 0.597,
      "step": 1500
    },
    {
      "epoch": 0.40826741515692777,
      "grad_norm": 40.72581100463867,
      "learning_rate": 8.853132974199036e-06,
      "loss": 0.594,
      "step": 1600
    },
    {
      "epoch": 0.43378412860423576,
      "grad_norm": 21.20204734802246,
      "learning_rate": 8.711369435781117e-06,
      "loss": 0.5882,
      "step": 1700
    },
    {
      "epoch": 0.45930084205154376,
      "grad_norm": 49.4229850769043,
      "learning_rate": 8.5696058973632e-06,
      "loss": 0.6018,
      "step": 1800
    },
    {
      "epoch": 0.48481755549885175,
      "grad_norm": 21.961654663085938,
      "learning_rate": 8.42784235894528e-06,
      "loss": 0.5872,
      "step": 1900
    },
    {
      "epoch": 0.5103342689461597,
      "grad_norm": 15.664958000183105,
      "learning_rate": 8.286078820527362e-06,
      "loss": 0.5802,
      "step": 2000
    },
    {
      "epoch": 0.5358509823934677,
      "grad_norm": 32.18100357055664,
      "learning_rate": 8.144315282109441e-06,
      "loss": 0.5614,
      "step": 2100
    },
    {
      "epoch": 0.5613676958407757,
      "grad_norm": 14.913948059082031,
      "learning_rate": 8.002551743691524e-06,
      "loss": 0.5166,
      "step": 2200
    },
    {
      "epoch": 0.5868844092880837,
      "grad_norm": 14.455422401428223,
      "learning_rate": 7.860788205273603e-06,
      "loss": 0.5722,
      "step": 2300
    },
    {
      "epoch": 0.6124011227353917,
      "grad_norm": 49.46743392944336,
      "learning_rate": 7.719024666855686e-06,
      "loss": 0.5289,
      "step": 2400
    },
    {
      "epoch": 0.6379178361826997,
      "grad_norm": 16.0176944732666,
      "learning_rate": 7.577261128437766e-06,
      "loss": 0.5803,
      "step": 2500
    },
    {
      "epoch": 0.6634345496300077,
      "grad_norm": 11.620992660522461,
      "learning_rate": 7.435497590019848e-06,
      "loss": 0.5355,
      "step": 2600
    },
    {
      "epoch": 0.6889512630773157,
      "grad_norm": 24.707399368286133,
      "learning_rate": 7.293734051601928e-06,
      "loss": 0.5694,
      "step": 2700
    },
    {
      "epoch": 0.7144679765246237,
      "grad_norm": 35.875511169433594,
      "learning_rate": 7.15197051318401e-06,
      "loss": 0.569,
      "step": 2800
    },
    {
      "epoch": 0.7399846899719316,
      "grad_norm": 32.23057556152344,
      "learning_rate": 7.010206974766091e-06,
      "loss": 0.5016,
      "step": 2900
    },
    {
      "epoch": 0.7655014034192396,
      "grad_norm": 24.113296508789062,
      "learning_rate": 6.868443436348172e-06,
      "loss": 0.5586,
      "step": 3000
    },
    {
      "epoch": 0.7910181168665475,
      "grad_norm": 25.87450408935547,
      "learning_rate": 6.726679897930253e-06,
      "loss": 0.5154,
      "step": 3100
    },
    {
      "epoch": 0.8165348303138555,
      "grad_norm": 32.350250244140625,
      "learning_rate": 6.584916359512335e-06,
      "loss": 0.5318,
      "step": 3200
    },
    {
      "epoch": 0.8420515437611635,
      "grad_norm": 22.004989624023438,
      "learning_rate": 6.443152821094415e-06,
      "loss": 0.5461,
      "step": 3300
    },
    {
      "epoch": 0.8675682572084715,
      "grad_norm": 39.10634994506836,
      "learning_rate": 6.301389282676497e-06,
      "loss": 0.4885,
      "step": 3400
    },
    {
      "epoch": 0.8930849706557795,
      "grad_norm": 24.000411987304688,
      "learning_rate": 6.159625744258577e-06,
      "loss": 0.573,
      "step": 3500
    },
    {
      "epoch": 0.9186016841030875,
      "grad_norm": 15.122103691101074,
      "learning_rate": 6.017862205840658e-06,
      "loss": 0.5145,
      "step": 3600
    },
    {
      "epoch": 0.9441183975503955,
      "grad_norm": 29.39118003845215,
      "learning_rate": 5.87609866742274e-06,
      "loss": 0.5214,
      "step": 3700
    },
    {
      "epoch": 0.9696351109977035,
      "grad_norm": 21.783594131469727,
      "learning_rate": 5.73433512900482e-06,
      "loss": 0.493,
      "step": 3800
    },
    {
      "epoch": 0.9951518244450115,
      "grad_norm": 6.728275299072266,
      "learning_rate": 5.592571590586902e-06,
      "loss": 0.5413,
      "step": 3900
    },
    {
      "epoch": 1.0206685378923195,
      "grad_norm": 23.733306884765625,
      "learning_rate": 5.450808052168982e-06,
      "loss": 0.442,
      "step": 4000
    }
  ],
  "logging_steps": 100,
  "max_steps": 7838,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}