{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9829619921363041,
  "eval_steps": 500,
  "global_step": 1500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0163826998689384,
      "grad_norm": 10.452404975891113,
      "learning_rate": 0.00010434782608695653,
      "loss": 2.7741,
      "step": 25
    },
    {
      "epoch": 0.0327653997378768,
      "grad_norm": 11.673263549804688,
      "learning_rate": 0.0001999979723762913,
      "loss": 2.2936,
      "step": 50
    },
    {
      "epoch": 0.0491480996068152,
      "grad_norm": 0.0,
      "learning_rate": 0.00019982342283922738,
      "loss": 2.0868,
      "step": 75
    },
    {
      "epoch": 0.0655307994757536,
      "grad_norm": 12.91142463684082,
      "learning_rate": 0.000199367821181338,
      "loss": 1.9288,
      "step": 100
    },
    {
      "epoch": 0.08191349934469201,
      "grad_norm": 0.0,
      "learning_rate": 0.00019863245014577668,
      "loss": 10.3322,
      "step": 125
    },
    {
      "epoch": 0.0982961992136304,
      "grad_norm": 11.256654739379883,
      "learning_rate": 0.00019761938016434448,
      "loss": 2.2306,
      "step": 150
    },
    {
      "epoch": 0.11467889908256881,
      "grad_norm": 5.196111679077148,
      "learning_rate": 0.0001963314635282044,
      "loss": 7.8393,
      "step": 175
    },
    {
      "epoch": 0.1310615989515072,
      "grad_norm": 6.82452917098999,
      "learning_rate": 0.00019477232635727637,
      "loss": 3.5118,
      "step": 200
    },
    {
      "epoch": 0.1474442988204456,
      "grad_norm": 0.0,
      "learning_rate": 0.000192946358390923,
      "loss": 3.4974,
      "step": 225
    },
    {
      "epoch": 0.16382699868938402,
      "grad_norm": 0.0,
      "learning_rate": 0.0001908587006286703,
      "loss": 4.7892,
      "step": 250
    },
    {
      "epoch": 0.18020969855832242,
      "grad_norm": 7.4760284423828125,
      "learning_rate": 0.00018851523085576096,
      "loss": 1.7774,
      "step": 275
    },
    {
      "epoch": 0.1965923984272608,
      "grad_norm": 0.0,
      "learning_rate": 0.0001859225470942928,
      "loss": 2.1847,
      "step": 300
    },
    {
      "epoch": 0.2129750982961992,
      "grad_norm": 3.8474948406219482,
      "learning_rate": 0.00018308794902653533,
      "loss": 3.0351,
      "step": 325
    },
    {
      "epoch": 0.22935779816513763,
      "grad_norm": 69.14743041992188,
      "learning_rate": 0.00018001941744272767,
      "loss": 2.0028,
      "step": 350
    },
    {
      "epoch": 0.24574049803407602,
      "grad_norm": 10.878889083862305,
      "learning_rate": 0.00017672559177122165,
      "loss": 1.89,
      "step": 375
    },
    {
      "epoch": 0.2621231979030144,
      "grad_norm": 6.749469757080078,
      "learning_rate": 0.00017321574575423406,
      "loss": 2.203,
      "step": 400
    },
    {
      "epoch": 0.27850589777195284,
      "grad_norm": 6.272464752197266,
      "learning_rate": 0.0001694997613376928,
      "loss": 1.8728,
      "step": 425
    },
    {
      "epoch": 0.2948885976408912,
      "grad_norm": 65.30072784423828,
      "learning_rate": 0.0001655881008486903,
      "loss": 2.1526,
      "step": 450
    },
    {
      "epoch": 0.3112712975098296,
      "grad_norm": 0.0,
      "learning_rate": 0.00016149177753887746,
      "loss": 1.8009,
      "step": 475
    },
    {
      "epoch": 0.32765399737876805,
      "grad_norm": 14.52846622467041,
      "learning_rate": 0.0001572223245767338,
      "loss": 2.0906,
      "step": 500
    },
    {
      "epoch": 0.3440366972477064,
      "grad_norm": 10.511902809143066,
      "learning_rate": 0.00015279176257601557,
      "loss": 2.9393,
      "step": 525
    },
    {
      "epoch": 0.36041939711664484,
      "grad_norm": 8.591642379760742,
      "learning_rate": 0.00014821256575180507,
      "loss": 1.1961,
      "step": 550
    },
    {
      "epoch": 0.3768020969855832,
      "grad_norm": 9.35624885559082,
      "learning_rate": 0.00014349762679944896,
      "loss": 1.3758,
      "step": 575
    },
    {
      "epoch": 0.3931847968545216,
      "grad_norm": 51.37662887573242,
      "learning_rate": 0.0001386602205952681,
      "loss": 1.4211,
      "step": 600
    },
    {
      "epoch": 0.40956749672346004,
      "grad_norm": 9.251007080078125,
      "learning_rate": 0.00013371396682124005,
      "loss": 3.5546,
      "step": 625
    },
    {
      "epoch": 0.4259501965923984,
      "grad_norm": 0.0,
      "learning_rate": 0.0001286727916188834,
      "loss": 2.1943,
      "step": 650
    },
    {
      "epoch": 0.44233289646133683,
      "grad_norm": 8.098660469055176,
      "learning_rate": 0.00012355088838030776,
      "loss": 1.8534,
      "step": 675
    },
    {
      "epoch": 0.45871559633027525,
      "grad_norm": 13.290648460388184,
      "learning_rate": 0.00011836267778682133,
      "loss": 2.822,
      "step": 700
    },
    {
      "epoch": 0.4750982961992136,
      "grad_norm": 4.341573715209961,
      "learning_rate": 0.00011312276720760782,
      "loss": 4.252,
      "step": 725
    },
    {
      "epoch": 0.49148099606815204,
      "grad_norm": 49.69887924194336,
      "learning_rate": 0.0001078459095727845,
      "loss": 1.6486,
      "step": 750
    },
    {
      "epoch": 0.5078636959370905,
      "grad_norm": 12.931384086608887,
      "learning_rate": 0.00010254696183663511,
      "loss": 1.824,
      "step": 775
    },
    {
      "epoch": 0.5242463958060288,
      "grad_norm": 8.917986869812012,
      "learning_rate": 9.724084314796292e-05,
      "loss": 1.1574,
      "step": 800
    },
    {
      "epoch": 0.5406290956749672,
      "grad_norm": 11.101146697998047,
      "learning_rate": 9.194249284533576e-05,
      "loss": 3.1605,
      "step": 825
    },
    {
      "epoch": 0.5570117955439057,
      "grad_norm": 8.709878921508789,
      "learning_rate": 8.666682839548719e-05,
      "loss": 2.3045,
      "step": 850
    },
    {
      "epoch": 0.573394495412844,
      "grad_norm": 5.888107776641846,
      "learning_rate": 8.142870339329723e-05,
      "loss": 1.6091,
      "step": 875
    },
    {
      "epoch": 0.5897771952817824,
      "grad_norm": 0.0,
      "learning_rate": 7.624286574160409e-05,
      "loss": 1.6872,
      "step": 900
    },
    {
      "epoch": 0.6061598951507209,
      "grad_norm": 9.354924201965332,
      "learning_rate": 7.112391612859118e-05,
      "loss": 1.5703,
      "step": 925
    },
    {
      "epoch": 0.6225425950196593,
      "grad_norm": 8.378700256347656,
      "learning_rate": 6.608626691965541e-05,
      "loss": 3.3726,
      "step": 950
    },
    {
      "epoch": 0.6389252948885976,
      "grad_norm": 40.91008758544922,
      "learning_rate": 6.114410157949745e-05,
      "loss": 3.2629,
      "step": 975
    },
    {
      "epoch": 0.6553079947575361,
      "grad_norm": 14.765325546264648,
      "learning_rate": 5.631133473868018e-05,
      "loss": 1.8236,
      "step": 1000
    },
    {
      "epoch": 0.6716906946264745,
      "grad_norm": 7.710732936859131,
      "learning_rate": 5.160157301708732e-05,
      "loss": 1.7716,
      "step": 1025
    },
    {
      "epoch": 0.6880733944954128,
      "grad_norm": 21.088882446289062,
      "learning_rate": 4.70280767145842e-05,
      "loss": 1.5484,
      "step": 1050
    },
    {
      "epoch": 0.7044560943643512,
      "grad_norm": 6.960242748260498,
      "learning_rate": 4.260372247674004e-05,
      "loss": 1.1968,
      "step": 1075
    },
    {
      "epoch": 0.7208387942332897,
      "grad_norm": 0.0,
      "learning_rate": 3.8340967040725995e-05,
      "loss": 1.7298,
      "step": 1100
    },
    {
      "epoch": 0.737221494102228,
      "grad_norm": 0.0,
      "learning_rate": 3.425181216346213e-05,
      "loss": 21.8374,
      "step": 1125
    },
    {
      "epoch": 0.7536041939711664,
      "grad_norm": 14.913447380065918,
      "learning_rate": 3.0347770830758316e-05,
      "loss": 2.7944,
      "step": 1150
    },
    {
      "epoch": 0.7699868938401049,
      "grad_norm": 12.520014762878418,
      "learning_rate": 2.6639834842586365e-05,
      "loss": 2.1394,
      "step": 1175
    },
    {
      "epoch": 0.7863695937090432,
      "grad_norm": 8.859058380126953,
      "learning_rate": 2.3138443865747062e-05,
      "loss": 1.3066,
      "step": 1200
    },
    {
      "epoch": 0.8027522935779816,
      "grad_norm": 14.920854568481445,
      "learning_rate": 1.985345604106439e-05,
      "loss": 2.6596,
      "step": 1225
    },
    {
      "epoch": 0.8191349934469201,
      "grad_norm": 32.096649169921875,
      "learning_rate": 1.679412022786172e-05,
      "loss": 9.7768,
      "step": 1250
    },
    {
      "epoch": 0.8355176933158585,
      "grad_norm": 0.0,
      "learning_rate": 1.396904996386551e-05,
      "loss": 8.7551,
      "step": 1275
    },
    {
      "epoch": 0.8519003931847968,
      "grad_norm": 10.609190940856934,
      "learning_rate": 1.1386199213852755e-05,
      "loss": 1.4679,
      "step": 1300
    },
    {
      "epoch": 0.8682830930537353,
      "grad_norm": 4.4171624183654785,
      "learning_rate": 9.052839975320836e-06,
      "loss": 1.4764,
      "step": 1325
    },
    {
      "epoch": 0.8846657929226737,
      "grad_norm": 0.0,
      "learning_rate": 6.975541804231478e-06,
      "loss": 3.5415,
      "step": 1350
    },
    {
      "epoch": 0.901048492791612,
      "grad_norm": 0.0,
      "learning_rate": 5.160153318473815e-06,
      "loss": 2.2546,
      "step": 1375
    },
    {
      "epoch": 0.9174311926605505,
      "grad_norm": 5.587663173675537,
      "learning_rate": 3.611785731123274e-06,
      "loss": 2.6802,
      "step": 1400
    },
    {
      "epoch": 0.9338138925294889,
      "grad_norm": 9.083599090576172,
      "learning_rate": 2.3347984598581783e-06,
      "loss": 1.555,
      "step": 1425
    },
    {
      "epoch": 0.9501965923984272,
      "grad_norm": 4.485108852386475,
      "learning_rate": 1.3327868530511934e-06,
      "loss": 1.736,
      "step": 1450
    },
    {
      "epoch": 0.9665792922673656,
      "grad_norm": 9.273879051208496,
      "learning_rate": 6.08572067092017e-07,
      "loss": 1.7019,
      "step": 1475
    },
    {
      "epoch": 0.9829619921363041,
      "grad_norm": 8.116653442382812,
      "learning_rate": 1.6419312344211347e-07,
      "loss": 5.0805,
      "step": 1500
    }
  ],
  "logging_steps": 25,
  "max_steps": 1526,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 6665443461341952.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}