Med-R1-v1 / trainer_state.json

Upload folder using huggingface_hub

5bb66f6 verified 2 months ago

35.5 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.10020040080160321,
	"eval_steps": 500,
	"global_step": 200,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.000501002004008016,
	"grad_norm": 2.6050267219543457,
	"learning_rate": 2.9999999999999997e-05,
	"loss": 2.0966,
	"step": 1
	},
	{
	"epoch": 0.001002004008016032,
	"grad_norm": 2.553297996520996,
	"learning_rate": 5.9999999999999995e-05,
	"loss": 2.5944,
	"step": 2
	},
	{
	"epoch": 0.001503006012024048,
	"grad_norm": 2.3349509239196777,
	"learning_rate": 8.999999999999999e-05,
	"loss": 2.5769,
	"step": 3
	},
	{
	"epoch": 0.002004008016032064,
	"grad_norm": 2.3492772579193115,
	"learning_rate": 0.00011999999999999999,
	"loss": 2.3153,
	"step": 4
	},
	{
	"epoch": 0.00250501002004008,
	"grad_norm": 1.5965133905410767,
	"learning_rate": 0.00015,
	"loss": 2.3999,
	"step": 5
	},
	{
	"epoch": 0.003006012024048096,
	"grad_norm": 1.4280346632003784,
	"learning_rate": 0.00017999999999999998,
	"loss": 2.0657,
	"step": 6
	},
	{
	"epoch": 0.0035070140280561123,
	"grad_norm": 1.726511836051941,
	"learning_rate": 0.00020999999999999998,
	"loss": 2.2877,
	"step": 7
	},
	{
	"epoch": 0.004008016032064128,
	"grad_norm": 1.741253137588501,
	"learning_rate": 0.00023999999999999998,
	"loss": 2.2613,
	"step": 8
	},
	{
	"epoch": 0.0045090180360721445,
	"grad_norm": 1.8103491067886353,
	"learning_rate": 0.00027,
	"loss": 2.1171,
	"step": 9
	},
	{
	"epoch": 0.00501002004008016,
	"grad_norm": 1.4325164556503296,
	"learning_rate": 0.0003,
	"loss": 2.0913,
	"step": 10
	},
	{
	"epoch": 0.005511022044088177,
	"grad_norm": 1.3550007343292236,
	"learning_rate": 0.0002999246609743847,
	"loss": 2.1027,
	"step": 11
	},
	{
	"epoch": 0.006012024048096192,
	"grad_norm": 1.288711428642273,
	"learning_rate": 0.00029984932194876944,
	"loss": 2.0211,
	"step": 12
	},
	{
	"epoch": 0.006513026052104208,
	"grad_norm": 1.4124622344970703,
	"learning_rate": 0.00029977398292315414,
	"loss": 2.1794,
	"step": 13
	},
	{
	"epoch": 0.0070140280561122245,
	"grad_norm": 1.2239776849746704,
	"learning_rate": 0.0002996986438975389,
	"loss": 1.8072,
	"step": 14
	},
	{
	"epoch": 0.00751503006012024,
	"grad_norm": 1.2423063516616821,
	"learning_rate": 0.00029962330487192366,
	"loss": 2.1701,
	"step": 15
	},
	{
	"epoch": 0.008016032064128256,
	"grad_norm": 1.249097228050232,
	"learning_rate": 0.00029954796584630836,
	"loss": 2.155,
	"step": 16
	},
	{
	"epoch": 0.008517034068136272,
	"grad_norm": 1.2091352939605713,
	"learning_rate": 0.00029947262682069307,
	"loss": 2.1671,
	"step": 17
	},
	{
	"epoch": 0.009018036072144289,
	"grad_norm": 1.168344497680664,
	"learning_rate": 0.00029939728779507783,
	"loss": 1.8644,
	"step": 18
	},
	{
	"epoch": 0.009519038076152305,
	"grad_norm": 1.1328169107437134,
	"learning_rate": 0.00029932194876946253,
	"loss": 2.0983,
	"step": 19
	},
	{
	"epoch": 0.01002004008016032,
	"grad_norm": 1.081700086593628,
	"learning_rate": 0.0002992466097438473,
	"loss": 1.9168,
	"step": 20
	},
	{
	"epoch": 0.010521042084168337,
	"grad_norm": 1.0554951429367065,
	"learning_rate": 0.000299171270718232,
	"loss": 1.9249,
	"step": 21
	},
	{
	"epoch": 0.011022044088176353,
	"grad_norm": 1.1648558378219604,
	"learning_rate": 0.00029909593169261676,
	"loss": 1.9194,
	"step": 22
	},
	{
	"epoch": 0.011523046092184368,
	"grad_norm": 0.9951939582824707,
	"learning_rate": 0.0002990205926670015,
	"loss": 1.9181,
	"step": 23
	},
	{
	"epoch": 0.012024048096192385,
	"grad_norm": 1.038643717765808,
	"learning_rate": 0.0002989452536413862,
	"loss": 2.0071,
	"step": 24
	},
	{
	"epoch": 0.012525050100200401,
	"grad_norm": 0.9779028296470642,
	"learning_rate": 0.0002988699146157709,
	"loss": 2.1351,
	"step": 25
	},
	{
	"epoch": 0.013026052104208416,
	"grad_norm": 0.9438173174858093,
	"learning_rate": 0.0002987945755901557,
	"loss": 2.0301,
	"step": 26
	},
	{
	"epoch": 0.013527054108216433,
	"grad_norm": 0.9372079372406006,
	"learning_rate": 0.0002987192365645404,
	"loss": 1.8637,
	"step": 27
	},
	{
	"epoch": 0.014028056112224449,
	"grad_norm": 0.9139612317085266,
	"learning_rate": 0.00029864389753892515,
	"loss": 1.9235,
	"step": 28
	},
	{
	"epoch": 0.014529058116232466,
	"grad_norm": 0.9861688613891602,
	"learning_rate": 0.00029856855851330985,
	"loss": 1.8747,
	"step": 29
	},
	{
	"epoch": 0.01503006012024048,
	"grad_norm": 0.9747604131698608,
	"learning_rate": 0.0002984932194876946,
	"loss": 1.9839,
	"step": 30
	},
	{
	"epoch": 0.015531062124248497,
	"grad_norm": 0.9112619161605835,
	"learning_rate": 0.0002984178804620793,
	"loss": 2.1169,
	"step": 31
	},
	{
	"epoch": 0.01603206412825651,
	"grad_norm": 0.8841367363929749,
	"learning_rate": 0.0002983425414364641,
	"loss": 1.8372,
	"step": 32
	},
	{
	"epoch": 0.016533066132264528,
	"grad_norm": 0.8845950961112976,
	"learning_rate": 0.0002982672024108488,
	"loss": 1.7765,
	"step": 33
	},
	{
	"epoch": 0.017034068136272545,
	"grad_norm": 0.8608232736587524,
	"learning_rate": 0.00029819186338523354,
	"loss": 1.7745,
	"step": 34
	},
	{
	"epoch": 0.01753507014028056,
	"grad_norm": 0.8822921514511108,
	"learning_rate": 0.00029811652435961824,
	"loss": 1.9187,
	"step": 35
	},
	{
	"epoch": 0.018036072144288578,
	"grad_norm": 0.8354641199111938,
	"learning_rate": 0.00029804118533400295,
	"loss": 1.9194,
	"step": 36
	},
	{
	"epoch": 0.018537074148296594,
	"grad_norm": 0.845734179019928,
	"learning_rate": 0.00029796584630838776,
	"loss": 1.95,
	"step": 37
	},
	{
	"epoch": 0.01903807615230461,
	"grad_norm": 0.7486892342567444,
	"learning_rate": 0.00029789050728277247,
	"loss": 1.6453,
	"step": 38
	},
	{
	"epoch": 0.019539078156312624,
	"grad_norm": 0.8062511682510376,
	"learning_rate": 0.00029781516825715717,
	"loss": 1.8782,
	"step": 39
	},
	{
	"epoch": 0.02004008016032064,
	"grad_norm": 0.7674809694290161,
	"learning_rate": 0.00029773982923154193,
	"loss": 1.8426,
	"step": 40
	},
	{
	"epoch": 0.020541082164328657,
	"grad_norm": 0.7894824743270874,
	"learning_rate": 0.00029766449020592663,
	"loss": 1.8384,
	"step": 41
	},
	{
	"epoch": 0.021042084168336674,
	"grad_norm": 0.7632150053977966,
	"learning_rate": 0.0002975891511803114,
	"loss": 1.7607,
	"step": 42
	},
	{
	"epoch": 0.02154308617234469,
	"grad_norm": 0.8097577095031738,
	"learning_rate": 0.0002975138121546961,
	"loss": 1.9556,
	"step": 43
	},
	{
	"epoch": 0.022044088176352707,
	"grad_norm": 0.6966553926467896,
	"learning_rate": 0.0002974384731290808,
	"loss": 1.7072,
	"step": 44
	},
	{
	"epoch": 0.022545090180360723,
	"grad_norm": 0.886178731918335,
	"learning_rate": 0.00029736313410346556,
	"loss": 2.3305,
	"step": 45
	},
	{
	"epoch": 0.023046092184368736,
	"grad_norm": 0.7746443748474121,
	"learning_rate": 0.0002972877950778503,
	"loss": 2.1129,
	"step": 46
	},
	{
	"epoch": 0.023547094188376753,
	"grad_norm": 0.8091543912887573,
	"learning_rate": 0.000297212456052235,
	"loss": 2.0141,
	"step": 47
	},
	{
	"epoch": 0.02404809619238477,
	"grad_norm": 0.7234994769096375,
	"learning_rate": 0.0002971371170266198,
	"loss": 1.9564,
	"step": 48
	},
	{
	"epoch": 0.024549098196392786,
	"grad_norm": 0.7358165979385376,
	"learning_rate": 0.0002970617780010045,
	"loss": 1.9121,
	"step": 49
	},
	{
	"epoch": 0.025050100200400802,
	"grad_norm": 0.7158864736557007,
	"learning_rate": 0.0002969864389753892,
	"loss": 1.7413,
	"step": 50
	},
	{
	"epoch": 0.02555110220440882,
	"grad_norm": 0.7489972710609436,
	"learning_rate": 0.00029691109994977395,
	"loss": 1.9817,
	"step": 51
	},
	{
	"epoch": 0.026052104208416832,
	"grad_norm": 0.7536230087280273,
	"learning_rate": 0.00029683576092415866,
	"loss": 1.9258,
	"step": 52
	},
	{
	"epoch": 0.02655310621242485,
	"grad_norm": 0.8415588140487671,
	"learning_rate": 0.0002967604218985434,
	"loss": 1.9913,
	"step": 53
	},
	{
	"epoch": 0.027054108216432865,
	"grad_norm": 0.768453061580658,
	"learning_rate": 0.0002966850828729282,
	"loss": 2.0848,
	"step": 54
	},
	{
	"epoch": 0.02755511022044088,
	"grad_norm": 0.7687628269195557,
	"learning_rate": 0.0002966097438473129,
	"loss": 1.9663,
	"step": 55
	},
	{
	"epoch": 0.028056112224448898,
	"grad_norm": 0.7271875739097595,
	"learning_rate": 0.00029653440482169764,
	"loss": 1.9076,
	"step": 56
	},
	{
	"epoch": 0.028557114228456915,
	"grad_norm": 0.6827739477157593,
	"learning_rate": 0.00029645906579608234,
	"loss": 1.7408,
	"step": 57
	},
	{
	"epoch": 0.02905811623246493,
	"grad_norm": 0.805442214012146,
	"learning_rate": 0.00029638372677046705,
	"loss": 1.9494,
	"step": 58
	},
	{
	"epoch": 0.029559118236472944,
	"grad_norm": 0.7472760677337646,
	"learning_rate": 0.0002963083877448518,
	"loss": 1.9039,
	"step": 59
	},
	{
	"epoch": 0.03006012024048096,
	"grad_norm": 0.737997829914093,
	"learning_rate": 0.00029623304871923657,
	"loss": 1.7575,
	"step": 60
	},
	{
	"epoch": 0.030561122244488977,
	"grad_norm": 0.7741632461547852,
	"learning_rate": 0.00029615770969362127,
	"loss": 2.033,
	"step": 61
	},
	{
	"epoch": 0.031062124248496994,
	"grad_norm": 0.7429640293121338,
	"learning_rate": 0.00029608237066800603,
	"loss": 1.899,
	"step": 62
	},
	{
	"epoch": 0.03156312625250501,
	"grad_norm": 0.7532819509506226,
	"learning_rate": 0.00029600703164239073,
	"loss": 1.8829,
	"step": 63
	},
	{
	"epoch": 0.03206412825651302,
	"grad_norm": 0.7010864615440369,
	"learning_rate": 0.00029593169261677544,
	"loss": 1.821,
	"step": 64
	},
	{
	"epoch": 0.03256513026052104,
	"grad_norm": 0.737218976020813,
	"learning_rate": 0.0002958563535911602,
	"loss": 1.9476,
	"step": 65
	},
	{
	"epoch": 0.033066132264529056,
	"grad_norm": 0.7213771939277649,
	"learning_rate": 0.0002957810145655449,
	"loss": 1.9,
	"step": 66
	},
	{
	"epoch": 0.03356713426853707,
	"grad_norm": 0.7673630714416504,
	"learning_rate": 0.00029570567553992966,
	"loss": 2.0354,
	"step": 67
	},
	{
	"epoch": 0.03406813627254509,
	"grad_norm": 0.721883237361908,
	"learning_rate": 0.0002956303365143144,
	"loss": 1.7045,
	"step": 68
	},
	{
	"epoch": 0.034569138276553106,
	"grad_norm": 0.7481613159179688,
	"learning_rate": 0.0002955549974886991,
	"loss": 1.8593,
	"step": 69
	},
	{
	"epoch": 0.03507014028056112,
	"grad_norm": 0.7304365038871765,
	"learning_rate": 0.00029547965846308383,
	"loss": 1.752,
	"step": 70
	},
	{
	"epoch": 0.03557114228456914,
	"grad_norm": 0.7513949871063232,
	"learning_rate": 0.0002954043194374686,
	"loss": 1.9917,
	"step": 71
	},
	{
	"epoch": 0.036072144288577156,
	"grad_norm": 0.7858319878578186,
	"learning_rate": 0.0002953289804118533,
	"loss": 2.087,
	"step": 72
	},
	{
	"epoch": 0.03657314629258517,
	"grad_norm": 0.7061849236488342,
	"learning_rate": 0.00029525364138623805,
	"loss": 1.8584,
	"step": 73
	},
	{
	"epoch": 0.03707414829659319,
	"grad_norm": 0.7773633599281311,
	"learning_rate": 0.00029517830236062276,
	"loss": 1.9295,
	"step": 74
	},
	{
	"epoch": 0.037575150300601205,
	"grad_norm": 0.7323243618011475,
	"learning_rate": 0.0002951029633350075,
	"loss": 1.9308,
	"step": 75
	},
	{
	"epoch": 0.03807615230460922,
	"grad_norm": 0.725524365901947,
	"learning_rate": 0.0002950276243093923,
	"loss": 1.9616,
	"step": 76
	},
	{
	"epoch": 0.03857715430861723,
	"grad_norm": 0.7054190039634705,
	"learning_rate": 0.000294952285283777,
	"loss": 1.6339,
	"step": 77
	},
	{
	"epoch": 0.03907815631262525,
	"grad_norm": 0.6920881271362305,
	"learning_rate": 0.0002948769462581617,
	"loss": 1.809,
	"step": 78
	},
	{
	"epoch": 0.039579158316633264,
	"grad_norm": 0.6498042941093445,
	"learning_rate": 0.00029480160723254644,
	"loss": 1.6668,
	"step": 79
	},
	{
	"epoch": 0.04008016032064128,
	"grad_norm": 0.6788399815559387,
	"learning_rate": 0.00029472626820693115,
	"loss": 1.713,
	"step": 80
	},
	{
	"epoch": 0.0405811623246493,
	"grad_norm": 0.7612844705581665,
	"learning_rate": 0.0002946509291813159,
	"loss": 1.9079,
	"step": 81
	},
	{
	"epoch": 0.041082164328657314,
	"grad_norm": 0.6935157775878906,
	"learning_rate": 0.0002945755901557006,
	"loss": 1.7071,
	"step": 82
	},
	{
	"epoch": 0.04158316633266533,
	"grad_norm": 0.7637260556221008,
	"learning_rate": 0.00029450025113008537,
	"loss": 2.1187,
	"step": 83
	},
	{
	"epoch": 0.04208416833667335,
	"grad_norm": 0.7054077982902527,
	"learning_rate": 0.0002944249121044701,
	"loss": 1.7924,
	"step": 84
	},
	{
	"epoch": 0.042585170340681364,
	"grad_norm": 0.701574981212616,
	"learning_rate": 0.00029434957307885483,
	"loss": 1.8163,
	"step": 85
	},
	{
	"epoch": 0.04308617234468938,
	"grad_norm": 0.7836251854896545,
	"learning_rate": 0.00029427423405323954,
	"loss": 1.6773,
	"step": 86
	},
	{
	"epoch": 0.0435871743486974,
	"grad_norm": 0.7043123245239258,
	"learning_rate": 0.0002941988950276243,
	"loss": 1.8403,
	"step": 87
	},
	{
	"epoch": 0.04408817635270541,
	"grad_norm": 0.7133166790008545,
	"learning_rate": 0.000294123556002009,
	"loss": 1.8445,
	"step": 88
	},
	{
	"epoch": 0.04458917835671343,
	"grad_norm": 0.7085109949111938,
	"learning_rate": 0.00029404821697639376,
	"loss": 1.8926,
	"step": 89
	},
	{
	"epoch": 0.045090180360721446,
	"grad_norm": 0.701048731803894,
	"learning_rate": 0.00029397287795077847,
	"loss": 1.9562,
	"step": 90
	},
	{
	"epoch": 0.045591182364729456,
	"grad_norm": 0.6904398202896118,
	"learning_rate": 0.0002938975389251632,
	"loss": 1.8759,
	"step": 91
	},
	{
	"epoch": 0.04609218436873747,
	"grad_norm": 0.7591177821159363,
	"learning_rate": 0.00029382219989954793,
	"loss": 1.9114,
	"step": 92
	},
	{
	"epoch": 0.04659318637274549,
	"grad_norm": 0.7278134822845459,
	"learning_rate": 0.0002937468608739327,
	"loss": 1.9612,
	"step": 93
	},
	{
	"epoch": 0.047094188376753505,
	"grad_norm": 0.7665961384773254,
	"learning_rate": 0.0002936715218483174,
	"loss": 2.0373,
	"step": 94
	},
	{
	"epoch": 0.04759519038076152,
	"grad_norm": 0.7089666128158569,
	"learning_rate": 0.00029359618282270215,
	"loss": 2.062,
	"step": 95
	},
	{
	"epoch": 0.04809619238476954,
	"grad_norm": 0.6922783851623535,
	"learning_rate": 0.00029352084379708686,
	"loss": 1.8768,
	"step": 96
	},
	{
	"epoch": 0.048597194388777555,
	"grad_norm": 0.6904755234718323,
	"learning_rate": 0.00029344550477147156,
	"loss": 1.7936,
	"step": 97
	},
	{
	"epoch": 0.04909819639278557,
	"grad_norm": 0.7081161141395569,
	"learning_rate": 0.0002933701657458563,
	"loss": 1.7919,
	"step": 98
	},
	{
	"epoch": 0.04959919839679359,
	"grad_norm": 0.7969085574150085,
	"learning_rate": 0.0002932948267202411,
	"loss": 2.0337,
	"step": 99
	},
	{
	"epoch": 0.050100200400801605,
	"grad_norm": 0.7019357085227966,
	"learning_rate": 0.0002932194876946258,
	"loss": 1.9433,
	"step": 100
	},
	{
	"epoch": 0.05060120240480962,
	"grad_norm": 0.6784742474555969,
	"learning_rate": 0.00029314414866901054,
	"loss": 1.7878,
	"step": 101
	},
	{
	"epoch": 0.05110220440881764,
	"grad_norm": 0.7405863404273987,
	"learning_rate": 0.00029306880964339525,
	"loss": 1.7159,
	"step": 102
	},
	{
	"epoch": 0.051603206412825654,
	"grad_norm": 0.6663607358932495,
	"learning_rate": 0.00029299347061777995,
	"loss": 1.7941,
	"step": 103
	},
	{
	"epoch": 0.052104208416833664,
	"grad_norm": 0.7274753451347351,
	"learning_rate": 0.0002929181315921647,
	"loss": 1.8692,
	"step": 104
	},
	{
	"epoch": 0.05260521042084168,
	"grad_norm": 0.7117050886154175,
	"learning_rate": 0.0002928427925665494,
	"loss": 1.8508,
	"step": 105
	},
	{
	"epoch": 0.0531062124248497,
	"grad_norm": 0.6324647068977356,
	"learning_rate": 0.0002927674535409342,
	"loss": 1.6226,
	"step": 106
	},
	{
	"epoch": 0.053607214428857713,
	"grad_norm": 0.6551498770713806,
	"learning_rate": 0.00029269211451531894,
	"loss": 1.6189,
	"step": 107
	},
	{
	"epoch": 0.05410821643286573,
	"grad_norm": 0.7170161008834839,
	"learning_rate": 0.00029261677548970364,
	"loss": 1.9402,
	"step": 108
	},
	{
	"epoch": 0.05460921843687375,
	"grad_norm": 0.6295390129089355,
	"learning_rate": 0.0002925414364640884,
	"loss": 1.6223,
	"step": 109
	},
	{
	"epoch": 0.05511022044088176,
	"grad_norm": 0.6735630631446838,
	"learning_rate": 0.0002924660974384731,
	"loss": 1.9057,
	"step": 110
	},
	{
	"epoch": 0.05561122244488978,
	"grad_norm": 0.6805582046508789,
	"learning_rate": 0.0002923907584128578,
	"loss": 1.7012,
	"step": 111
	},
	{
	"epoch": 0.056112224448897796,
	"grad_norm": 0.6882889866828918,
	"learning_rate": 0.00029231541938724257,
	"loss": 1.7673,
	"step": 112
	},
	{
	"epoch": 0.05661322645290581,
	"grad_norm": 0.6445188522338867,
	"learning_rate": 0.00029224008036162727,
	"loss": 1.7567,
	"step": 113
	},
	{
	"epoch": 0.05711422845691383,
	"grad_norm": 0.6788764595985413,
	"learning_rate": 0.00029216474133601203,
	"loss": 1.8939,
	"step": 114
	},
	{
	"epoch": 0.057615230460921846,
	"grad_norm": 0.6786359548568726,
	"learning_rate": 0.0002920894023103968,
	"loss": 1.7107,
	"step": 115
	},
	{
	"epoch": 0.05811623246492986,
	"grad_norm": 0.73366779088974,
	"learning_rate": 0.0002920140632847815,
	"loss": 1.9247,
	"step": 116
	},
	{
	"epoch": 0.05861723446893788,
	"grad_norm": 0.6751691699028015,
	"learning_rate": 0.0002919387242591662,
	"loss": 1.8489,
	"step": 117
	},
	{
	"epoch": 0.05911823647294589,
	"grad_norm": 0.6604770421981812,
	"learning_rate": 0.00029186338523355096,
	"loss": 1.9441,
	"step": 118
	},
	{
	"epoch": 0.059619238476953905,
	"grad_norm": 0.6513468027114868,
	"learning_rate": 0.00029178804620793566,
	"loss": 1.9047,
	"step": 119
	},
	{
	"epoch": 0.06012024048096192,
	"grad_norm": 0.7937461137771606,
	"learning_rate": 0.0002917127071823204,
	"loss": 1.9885,
	"step": 120
	},
	{
	"epoch": 0.06062124248496994,
	"grad_norm": 0.6777886748313904,
	"learning_rate": 0.0002916373681567051,
	"loss": 2.0286,
	"step": 121
	},
	{
	"epoch": 0.061122244488977955,
	"grad_norm": 0.6839383244514465,
	"learning_rate": 0.0002915620291310899,
	"loss": 1.8146,
	"step": 122
	},
	{
	"epoch": 0.06162324649298597,
	"grad_norm": 0.6735116243362427,
	"learning_rate": 0.00029148669010547464,
	"loss": 1.7767,
	"step": 123
	},
	{
	"epoch": 0.06212424849699399,
	"grad_norm": 0.7088850736618042,
	"learning_rate": 0.00029141135107985935,
	"loss": 1.9,
	"step": 124
	},
	{
	"epoch": 0.062625250501002,
	"grad_norm": 0.6435878872871399,
	"learning_rate": 0.00029133601205424405,
	"loss": 1.7717,
	"step": 125
	},
	{
	"epoch": 0.06312625250501001,
	"grad_norm": 0.6343419551849365,
	"learning_rate": 0.0002912606730286288,
	"loss": 1.9353,
	"step": 126
	},
	{
	"epoch": 0.06362725450901803,
	"grad_norm": 0.6651162505149841,
	"learning_rate": 0.0002911853340030135,
	"loss": 1.8287,
	"step": 127
	},
	{
	"epoch": 0.06412825651302605,
	"grad_norm": 0.6920936703681946,
	"learning_rate": 0.0002911099949773983,
	"loss": 1.9564,
	"step": 128
	},
	{
	"epoch": 0.06462925851703406,
	"grad_norm": 0.6264630556106567,
	"learning_rate": 0.00029103465595178304,
	"loss": 1.7342,
	"step": 129
	},
	{
	"epoch": 0.06513026052104208,
	"grad_norm": 0.6150006055831909,
	"learning_rate": 0.00029095931692616774,
	"loss": 1.7269,
	"step": 130
	},
	{
	"epoch": 0.0656312625250501,
	"grad_norm": 0.6793592572212219,
	"learning_rate": 0.00029088397790055245,
	"loss": 1.9202,
	"step": 131
	},
	{
	"epoch": 0.06613226452905811,
	"grad_norm": 0.7007801532745361,
	"learning_rate": 0.0002908086388749372,
	"loss": 1.9389,
	"step": 132
	},
	{
	"epoch": 0.06663326653306613,
	"grad_norm": 0.5819805264472961,
	"learning_rate": 0.0002907332998493219,
	"loss": 1.4803,
	"step": 133
	},
	{
	"epoch": 0.06713426853707415,
	"grad_norm": 0.6895102262496948,
	"learning_rate": 0.00029065796082370667,
	"loss": 1.8499,
	"step": 134
	},
	{
	"epoch": 0.06763527054108216,
	"grad_norm": 0.6814382672309875,
	"learning_rate": 0.00029058262179809137,
	"loss": 1.9159,
	"step": 135
	},
	{
	"epoch": 0.06813627254509018,
	"grad_norm": 0.6768064498901367,
	"learning_rate": 0.0002905072827724761,
	"loss": 1.9397,
	"step": 136
	},
	{
	"epoch": 0.0686372745490982,
	"grad_norm": 0.6451818943023682,
	"learning_rate": 0.00029043194374686084,
	"loss": 1.7264,
	"step": 137
	},
	{
	"epoch": 0.06913827655310621,
	"grad_norm": 0.6633362770080566,
	"learning_rate": 0.0002903566047212456,
	"loss": 1.7829,
	"step": 138
	},
	{
	"epoch": 0.06963927855711423,
	"grad_norm": 0.659122109413147,
	"learning_rate": 0.0002902812656956303,
	"loss": 1.8371,
	"step": 139
	},
	{
	"epoch": 0.07014028056112225,
	"grad_norm": 0.657511293888092,
	"learning_rate": 0.00029020592667001506,
	"loss": 1.8257,
	"step": 140
	},
	{
	"epoch": 0.07064128256513026,
	"grad_norm": 0.6987048387527466,
	"learning_rate": 0.00029013058764439976,
	"loss": 1.9275,
	"step": 141
	},
	{
	"epoch": 0.07114228456913828,
	"grad_norm": 0.6087613105773926,
	"learning_rate": 0.0002900552486187845,
	"loss": 1.7095,
	"step": 142
	},
	{
	"epoch": 0.0716432865731463,
	"grad_norm": 0.6897568702697754,
	"learning_rate": 0.00028997990959316923,
	"loss": 1.8934,
	"step": 143
	},
	{
	"epoch": 0.07214428857715431,
	"grad_norm": 0.7103919386863708,
	"learning_rate": 0.00028990457056755393,
	"loss": 1.8729,
	"step": 144
	},
	{
	"epoch": 0.07264529058116233,
	"grad_norm": 0.5942186117172241,
	"learning_rate": 0.0002898292315419387,
	"loss": 1.6083,
	"step": 145
	},
	{
	"epoch": 0.07314629258517034,
	"grad_norm": 0.6732192039489746,
	"learning_rate": 0.00028975389251632345,
	"loss": 1.9068,
	"step": 146
	},
	{
	"epoch": 0.07364729458917836,
	"grad_norm": 0.6461887359619141,
	"learning_rate": 0.00028967855349070815,
	"loss": 1.6627,
	"step": 147
	},
	{
	"epoch": 0.07414829659318638,
	"grad_norm": 0.6353530883789062,
	"learning_rate": 0.0002896032144650929,
	"loss": 1.8885,
	"step": 148
	},
	{
	"epoch": 0.0746492985971944,
	"grad_norm": 0.5766530632972717,
	"learning_rate": 0.0002895278754394776,
	"loss": 1.4533,
	"step": 149
	},
	{
	"epoch": 0.07515030060120241,
	"grad_norm": 0.6750043630599976,
	"learning_rate": 0.0002894525364138623,
	"loss": 2.0198,
	"step": 150
	},
	{
	"epoch": 0.07565130260521043,
	"grad_norm": 0.589191734790802,
	"learning_rate": 0.0002893771973882471,
	"loss": 1.6217,
	"step": 151
	},
	{
	"epoch": 0.07615230460921844,
	"grad_norm": 0.7778326272964478,
	"learning_rate": 0.00028930185836263184,
	"loss": 1.8912,
	"step": 152
	},
	{
	"epoch": 0.07665330661322645,
	"grad_norm": 0.7026570439338684,
	"learning_rate": 0.00028922651933701655,
	"loss": 1.9116,
	"step": 153
	},
	{
	"epoch": 0.07715430861723446,
	"grad_norm": 0.6697170734405518,
	"learning_rate": 0.0002891511803114013,
	"loss": 1.8512,
	"step": 154
	},
	{
	"epoch": 0.07765531062124248,
	"grad_norm": 0.6900460124015808,
	"learning_rate": 0.000289075841285786,
	"loss": 1.9111,
	"step": 155
	},
	{
	"epoch": 0.0781563126252505,
	"grad_norm": 0.6959229111671448,
	"learning_rate": 0.0002890005022601707,
	"loss": 1.9443,
	"step": 156
	},
	{
	"epoch": 0.07865731462925851,
	"grad_norm": 0.676102340221405,
	"learning_rate": 0.0002889251632345555,
	"loss": 1.7728,
	"step": 157
	},
	{
	"epoch": 0.07915831663326653,
	"grad_norm": 0.6908376216888428,
	"learning_rate": 0.0002888498242089402,
	"loss": 1.7032,
	"step": 158
	},
	{
	"epoch": 0.07965931863727455,
	"grad_norm": 0.6415160298347473,
	"learning_rate": 0.00028877448518332494,
	"loss": 1.644,
	"step": 159
	},
	{
	"epoch": 0.08016032064128256,
	"grad_norm": 0.7101219296455383,
	"learning_rate": 0.0002886991461577097,
	"loss": 1.9767,
	"step": 160
	},
	{
	"epoch": 0.08066132264529058,
	"grad_norm": 0.6580759882926941,
	"learning_rate": 0.0002886238071320944,
	"loss": 1.7492,
	"step": 161
	},
	{
	"epoch": 0.0811623246492986,
	"grad_norm": 0.6474287509918213,
	"learning_rate": 0.00028854846810647916,
	"loss": 1.8357,
	"step": 162
	},
	{
	"epoch": 0.08166332665330661,
	"grad_norm": 0.622855007648468,
	"learning_rate": 0.00028847312908086386,
	"loss": 1.7535,
	"step": 163
	},
	{
	"epoch": 0.08216432865731463,
	"grad_norm": 0.6816668510437012,
	"learning_rate": 0.00028839779005524857,
	"loss": 2.0255,
	"step": 164
	},
	{
	"epoch": 0.08266533066132264,
	"grad_norm": 0.663633406162262,
	"learning_rate": 0.00028832245102963333,
	"loss": 1.7689,
	"step": 165
	},
	{
	"epoch": 0.08316633266533066,
	"grad_norm": 0.6390612721443176,
	"learning_rate": 0.00028824711200401803,
	"loss": 1.735,
	"step": 166
	},
	{
	"epoch": 0.08366733466933868,
	"grad_norm": 0.660830557346344,
	"learning_rate": 0.0002881717729784028,
	"loss": 1.7635,
	"step": 167
	},
	{
	"epoch": 0.0841683366733467,
	"grad_norm": 0.6632187366485596,
	"learning_rate": 0.00028809643395278755,
	"loss": 1.983,
	"step": 168
	},
	{
	"epoch": 0.08466933867735471,
	"grad_norm": 0.6418716311454773,
	"learning_rate": 0.00028802109492717226,
	"loss": 1.7174,
	"step": 169
	},
	{
	"epoch": 0.08517034068136273,
	"grad_norm": 0.6375272274017334,
	"learning_rate": 0.00028794575590155696,
	"loss": 1.7768,
	"step": 170
	},
	{
	"epoch": 0.08567134268537074,
	"grad_norm": 0.6906164884567261,
	"learning_rate": 0.0002878704168759417,
	"loss": 1.9821,
	"step": 171
	},
	{
	"epoch": 0.08617234468937876,
	"grad_norm": 0.6385218501091003,
	"learning_rate": 0.0002877950778503264,
	"loss": 1.7104,
	"step": 172
	},
	{
	"epoch": 0.08667334669338678,
	"grad_norm": 0.6564759016036987,
	"learning_rate": 0.0002877197388247112,
	"loss": 1.8649,
	"step": 173
	},
	{
	"epoch": 0.0871743486973948,
	"grad_norm": 0.6422074437141418,
	"learning_rate": 0.0002876443997990959,
	"loss": 1.899,
	"step": 174
	},
	{
	"epoch": 0.08767535070140281,
	"grad_norm": 0.6718552112579346,
	"learning_rate": 0.00028756906077348065,
	"loss": 2.0651,
	"step": 175
	},
	{
	"epoch": 0.08817635270541083,
	"grad_norm": 0.6550820469856262,
	"learning_rate": 0.0002874937217478654,
	"loss": 1.9969,
	"step": 176
	},
	{
	"epoch": 0.08867735470941884,
	"grad_norm": 0.6562731862068176,
	"learning_rate": 0.0002874183827222501,
	"loss": 1.8166,
	"step": 177
	},
	{
	"epoch": 0.08917835671342686,
	"grad_norm": 0.6029812097549438,
	"learning_rate": 0.0002873430436966348,
	"loss": 1.7566,
	"step": 178
	},
	{
	"epoch": 0.08967935871743488,
	"grad_norm": 0.6309391260147095,
	"learning_rate": 0.0002872677046710196,
	"loss": 1.8427,
	"step": 179
	},
	{
	"epoch": 0.09018036072144289,
	"grad_norm": 0.598958432674408,
	"learning_rate": 0.0002871923656454043,
	"loss": 1.6701,
	"step": 180
	},
	{
	"epoch": 0.0906813627254509,
	"grad_norm": 0.6371743679046631,
	"learning_rate": 0.00028711702661978904,
	"loss": 1.7658,
	"step": 181
	},
	{
	"epoch": 0.09118236472945891,
	"grad_norm": 0.6294912099838257,
	"learning_rate": 0.00028704168759417374,
	"loss": 1.6954,
	"step": 182
	},
	{
	"epoch": 0.09168336673346693,
	"grad_norm": 0.6587108373641968,
	"learning_rate": 0.0002869663485685585,
	"loss": 1.8646,
	"step": 183
	},
	{
	"epoch": 0.09218436873747494,
	"grad_norm": 0.6413068771362305,
	"learning_rate": 0.0002868910095429432,
	"loss": 2.0061,
	"step": 184
	},
	{
	"epoch": 0.09268537074148296,
	"grad_norm": 0.6469525694847107,
	"learning_rate": 0.00028681567051732796,
	"loss": 1.821,
	"step": 185
	},
	{
	"epoch": 0.09318637274549098,
	"grad_norm": 0.6417365074157715,
	"learning_rate": 0.00028674033149171267,
	"loss": 1.8994,
	"step": 186
	},
	{
	"epoch": 0.093687374749499,
	"grad_norm": 0.6416391134262085,
	"learning_rate": 0.00028666499246609743,
	"loss": 1.7628,
	"step": 187
	},
	{
	"epoch": 0.09418837675350701,
	"grad_norm": 0.6492236852645874,
	"learning_rate": 0.00028658965344048213,
	"loss": 1.7946,
	"step": 188
	},
	{
	"epoch": 0.09468937875751503,
	"grad_norm": 0.635639488697052,
	"learning_rate": 0.00028651431441486684,
	"loss": 1.7534,
	"step": 189
	},
	{
	"epoch": 0.09519038076152304,
	"grad_norm": 0.6563454270362854,
	"learning_rate": 0.0002864389753892516,
	"loss": 1.7212,
	"step": 190
	},
	{
	"epoch": 0.09569138276553106,
	"grad_norm": 0.660172700881958,
	"learning_rate": 0.00028636363636363636,
	"loss": 1.7969,
	"step": 191
	},
	{
	"epoch": 0.09619238476953908,
	"grad_norm": 0.6818733215332031,
	"learning_rate": 0.00028628829733802106,
	"loss": 1.8091,
	"step": 192
	},
	{
	"epoch": 0.0966933867735471,
	"grad_norm": 0.6508086323738098,
	"learning_rate": 0.0002862129583124058,
	"loss": 1.8333,
	"step": 193
	},
	{
	"epoch": 0.09719438877755511,
	"grad_norm": 0.6614415049552917,
	"learning_rate": 0.0002861376192867905,
	"loss": 1.9563,
	"step": 194
	},
	{
	"epoch": 0.09769539078156313,
	"grad_norm": 0.6511570811271667,
	"learning_rate": 0.0002860622802611753,
	"loss": 1.9191,
	"step": 195
	},
	{
	"epoch": 0.09819639278557114,
	"grad_norm": 0.6392014026641846,
	"learning_rate": 0.00028598694123556,
	"loss": 1.8118,
	"step": 196
	},
	{
	"epoch": 0.09869739478957916,
	"grad_norm": 0.6255333423614502,
	"learning_rate": 0.0002859116022099447,
	"loss": 1.9144,
	"step": 197
	},
	{
	"epoch": 0.09919839679358718,
	"grad_norm": 0.6069381833076477,
	"learning_rate": 0.00028583626318432945,
	"loss": 1.8036,
	"step": 198
	},
	{
	"epoch": 0.09969939879759519,
	"grad_norm": 0.6432672739028931,
	"learning_rate": 0.0002857609241587142,
	"loss": 1.8292,
	"step": 199
	},
	{
	"epoch": 0.10020040080160321,
	"grad_norm": 0.6631090641021729,
	"learning_rate": 0.0002856855851330989,
	"loss": 1.8854,
	"step": 200
	}
	],
	"logging_steps": 1,
	"max_steps": 3992,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 200,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.89216143179776e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}