{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6784781797836628, "eval_steps": 500, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003729951510630362, "grad_norm": 2.955669403076172, "learning_rate": 0.0, "loss": 0.2543, "step": 1 }, { "epoch": 0.0007459903021260724, "grad_norm": 2.713864803314209, "learning_rate": 3.010299956639811e-07, "loss": 0.2329, "step": 2 }, { "epoch": 0.0011189854531891085, "grad_norm": 2.927621364593506, "learning_rate": 4.771212547196623e-07, "loss": 0.2408, "step": 3 }, { "epoch": 0.0014919806042521448, "grad_norm": 2.876699447631836, "learning_rate": 6.020599913279622e-07, "loss": 0.2299, "step": 4 }, { "epoch": 0.001864975755315181, "grad_norm": 2.6549205780029297, "learning_rate": 6.989700043360186e-07, "loss": 0.2462, "step": 5 }, { "epoch": 0.002237970906378217, "grad_norm": 2.748915910720825, "learning_rate": 7.781512503836435e-07, "loss": 0.2396, "step": 6 }, { "epoch": 0.0026109660574412533, "grad_norm": 2.625251293182373, "learning_rate": 8.450980400142567e-07, "loss": 0.2451, "step": 7 }, { "epoch": 0.0029839612085042896, "grad_norm": 2.622999668121338, "learning_rate": 9.030899869919433e-07, "loss": 0.2554, "step": 8 }, { "epoch": 0.0033569563595673255, "grad_norm": 2.497288703918457, "learning_rate": 9.542425094393247e-07, "loss": 0.2358, "step": 9 }, { "epoch": 0.003729951510630362, "grad_norm": 2.275991201400757, "learning_rate": 9.999999999999997e-07, "loss": 0.243, "step": 10 }, { "epoch": 0.004102946661693398, "grad_norm": 1.8718013763427734, "learning_rate": 1.0413926851582248e-06, "loss": 0.2046, "step": 11 }, { "epoch": 0.004475941812756434, "grad_norm": 1.9501583576202393, "learning_rate": 1.0791812460476246e-06, "loss": 0.2302, "step": 12 }, { "epoch": 0.00484893696381947, "grad_norm": 2.136972665786743, "learning_rate": 1.1139433523068364e-06, "loss": 0.2303, "step": 13 }, { "epoch": 0.005221932114882507, "grad_norm": 1.9301296472549438, "learning_rate": 1.1461280356782378e-06, "loss": 0.2058, "step": 14 }, { "epoch": 0.005594927265945543, "grad_norm": 3.295438528060913, "learning_rate": 1.176091259055681e-06, "loss": 0.2011, "step": 15 }, { "epoch": 0.005967922417008579, "grad_norm": 2.8567750453948975, "learning_rate": 1.2041199826559244e-06, "loss": 0.2118, "step": 16 }, { "epoch": 0.006340917568071615, "grad_norm": 2.3600707054138184, "learning_rate": 1.230448921378274e-06, "loss": 0.2533, "step": 17 }, { "epoch": 0.006713912719134651, "grad_norm": 2.2572262287139893, "learning_rate": 1.2552725051033058e-06, "loss": 0.2429, "step": 18 }, { "epoch": 0.007086907870197687, "grad_norm": 2.9337754249572754, "learning_rate": 1.2787536009528286e-06, "loss": 0.2271, "step": 19 }, { "epoch": 0.007459903021260724, "grad_norm": 1.6263501644134521, "learning_rate": 1.301029995663981e-06, "loss": 0.2094, "step": 20 }, { "epoch": 0.007832898172323759, "grad_norm": 1.9904942512512207, "learning_rate": 1.322219294733919e-06, "loss": 0.2274, "step": 21 }, { "epoch": 0.008205893323386795, "grad_norm": 1.6295989751815796, "learning_rate": 1.3424226808222062e-06, "loss": 0.2102, "step": 22 }, { "epoch": 0.008578888474449832, "grad_norm": 1.9166569709777832, "learning_rate": 1.3617278360175927e-06, "loss": 0.2173, "step": 23 }, { "epoch": 0.008951883625512868, "grad_norm": 1.7434173822402954, "learning_rate": 1.3802112417116059e-06, "loss": 0.1959, "step": 24 }, { "epoch": 0.009324878776575904, "grad_norm": 1.0918080806732178, "learning_rate": 1.3979400086720373e-06, "loss": 0.2462, "step": 25 }, { "epoch": 0.00969787392763894, "grad_norm": 1.8288853168487549, "learning_rate": 1.4149733479708177e-06, "loss": 0.2012, "step": 26 }, { "epoch": 0.010070869078701977, "grad_norm": 1.131218433380127, "learning_rate": 1.431363764158987e-06, "loss": 0.1855, "step": 27 }, { "epoch": 0.010443864229765013, "grad_norm": 1.374854326248169, "learning_rate": 1.4471580313422189e-06, "loss": 0.2073, "step": 28 }, { "epoch": 0.01081685938082805, "grad_norm": 1.0204018354415894, "learning_rate": 1.4623979978989559e-06, "loss": 0.2089, "step": 29 }, { "epoch": 0.011189854531891086, "grad_norm": 1.2161109447479248, "learning_rate": 1.477121254719662e-06, "loss": 0.2116, "step": 30 }, { "epoch": 0.011562849682954122, "grad_norm": 1.7500050067901611, "learning_rate": 1.4913616938342723e-06, "loss": 0.1983, "step": 31 }, { "epoch": 0.011935844834017158, "grad_norm": 1.40103280544281, "learning_rate": 1.5051499783199057e-06, "loss": 0.2164, "step": 32 }, { "epoch": 0.012308839985080195, "grad_norm": 1.3091260194778442, "learning_rate": 1.5185139398778872e-06, "loss": 0.2198, "step": 33 }, { "epoch": 0.01268183513614323, "grad_norm": 1.0630847215652466, "learning_rate": 1.5314789170422548e-06, "loss": 0.1956, "step": 34 }, { "epoch": 0.013054830287206266, "grad_norm": 1.1215120553970337, "learning_rate": 1.544068044350275e-06, "loss": 0.1907, "step": 35 }, { "epoch": 0.013427825438269302, "grad_norm": 1.2612782716751099, "learning_rate": 1.556302500767287e-06, "loss": 0.2248, "step": 36 }, { "epoch": 0.013800820589332338, "grad_norm": 1.2203145027160645, "learning_rate": 1.5682017240669948e-06, "loss": 0.202, "step": 37 }, { "epoch": 0.014173815740395375, "grad_norm": 1.3385965824127197, "learning_rate": 1.57978359661681e-06, "loss": 0.2182, "step": 38 }, { "epoch": 0.01454681089145841, "grad_norm": 1.2350348234176636, "learning_rate": 1.5910646070264987e-06, "loss": 0.1981, "step": 39 }, { "epoch": 0.014919806042521447, "grad_norm": 1.058457851409912, "learning_rate": 1.602059991327962e-06, "loss": 0.2061, "step": 40 }, { "epoch": 0.015292801193584483, "grad_norm": 1.133886456489563, "learning_rate": 1.6127838567197353e-06, "loss": 0.1984, "step": 41 }, { "epoch": 0.015665796344647518, "grad_norm": 1.70027756690979, "learning_rate": 1.6232492903979003e-06, "loss": 0.2397, "step": 42 }, { "epoch": 0.016038791495710556, "grad_norm": 2.2900636196136475, "learning_rate": 1.633468455579586e-06, "loss": 0.1712, "step": 43 }, { "epoch": 0.01641178664677359, "grad_norm": 0.9927043318748474, "learning_rate": 1.643452676486187e-06, "loss": 0.2022, "step": 44 }, { "epoch": 0.01678478179783663, "grad_norm": 1.5136033296585083, "learning_rate": 1.6532125137753431e-06, "loss": 0.1932, "step": 45 }, { "epoch": 0.017157776948899663, "grad_norm": 1.2458966970443726, "learning_rate": 1.6627578316815738e-06, "loss": 0.2054, "step": 46 }, { "epoch": 0.0175307720999627, "grad_norm": 1.0522123575210571, "learning_rate": 1.672097857935717e-06, "loss": 0.1966, "step": 47 }, { "epoch": 0.017903767251025736, "grad_norm": 1.1892930269241333, "learning_rate": 1.6812412373755868e-06, "loss": 0.1917, "step": 48 }, { "epoch": 0.018276762402088774, "grad_norm": 1.0238450765609741, "learning_rate": 1.6901960800285134e-06, "loss": 0.2131, "step": 49 }, { "epoch": 0.01864975755315181, "grad_norm": 0.7709140777587891, "learning_rate": 1.6989700043360184e-06, "loss": 0.1808, "step": 50 }, { "epoch": 0.019022752704214847, "grad_norm": 1.056961178779602, "learning_rate": 1.707570176097936e-06, "loss": 0.1805, "step": 51 }, { "epoch": 0.01939574785527788, "grad_norm": 0.8817536234855652, "learning_rate": 1.716003343634799e-06, "loss": 0.2203, "step": 52 }, { "epoch": 0.01976874300634092, "grad_norm": 1.0316263437271118, "learning_rate": 1.7242758696007888e-06, "loss": 0.2179, "step": 53 }, { "epoch": 0.020141738157403954, "grad_norm": 1.4016993045806885, "learning_rate": 1.7323937598229684e-06, "loss": 0.1971, "step": 54 }, { "epoch": 0.02051473330846699, "grad_norm": 1.1570782661437988, "learning_rate": 1.7403626894942437e-06, "loss": 0.2006, "step": 55 }, { "epoch": 0.020887728459530026, "grad_norm": 1.2456053495407104, "learning_rate": 1.7481880270062002e-06, "loss": 0.171, "step": 56 }, { "epoch": 0.02126072361059306, "grad_norm": 0.9606248140335083, "learning_rate": 1.7558748556724912e-06, "loss": 0.1974, "step": 57 }, { "epoch": 0.0216337187616561, "grad_norm": 0.8683868050575256, "learning_rate": 1.7634279935629368e-06, "loss": 0.2095, "step": 58 }, { "epoch": 0.022006713912719134, "grad_norm": 0.9230842590332031, "learning_rate": 1.7708520116421439e-06, "loss": 0.1957, "step": 59 }, { "epoch": 0.02237970906378217, "grad_norm": 0.8752296566963196, "learning_rate": 1.7781512503836432e-06, "loss": 0.1932, "step": 60 }, { "epoch": 0.022752704214845206, "grad_norm": 1.2449864149093628, "learning_rate": 1.7853298350107666e-06, "loss": 0.1931, "step": 61 }, { "epoch": 0.023125699365908244, "grad_norm": 0.7964703440666199, "learning_rate": 1.7923916894982536e-06, "loss": 0.2117, "step": 62 }, { "epoch": 0.02349869451697128, "grad_norm": 1.113854169845581, "learning_rate": 1.7993405494535814e-06, "loss": 0.1987, "step": 63 }, { "epoch": 0.023871689668034317, "grad_norm": 0.742511510848999, "learning_rate": 1.8061799739838866e-06, "loss": 0.1958, "step": 64 }, { "epoch": 0.02424468481909735, "grad_norm": 0.9517892599105835, "learning_rate": 1.8129133566428552e-06, "loss": 0.1976, "step": 65 }, { "epoch": 0.02461767997016039, "grad_norm": 0.8582099080085754, "learning_rate": 1.8195439355418683e-06, "loss": 0.2092, "step": 66 }, { "epoch": 0.024990675121223424, "grad_norm": 1.150067687034607, "learning_rate": 1.826074802700826e-06, "loss": 0.1873, "step": 67 }, { "epoch": 0.02536367027228646, "grad_norm": 1.0755053758621216, "learning_rate": 1.8325089127062361e-06, "loss": 0.1788, "step": 68 }, { "epoch": 0.025736665423349497, "grad_norm": 0.9981770515441895, "learning_rate": 1.8388490907372552e-06, "loss": 0.2019, "step": 69 }, { "epoch": 0.02610966057441253, "grad_norm": 1.0963281393051147, "learning_rate": 1.8450980400142566e-06, "loss": 0.2137, "step": 70 }, { "epoch": 0.02648265572547557, "grad_norm": 1.247193694114685, "learning_rate": 1.851258348719075e-06, "loss": 0.1775, "step": 71 }, { "epoch": 0.026855650876538604, "grad_norm": 0.881563663482666, "learning_rate": 1.857332496431268e-06, "loss": 0.1994, "step": 72 }, { "epoch": 0.027228646027601642, "grad_norm": 0.8829440474510193, "learning_rate": 1.8633228601204554e-06, "loss": 0.1966, "step": 73 }, { "epoch": 0.027601641178664676, "grad_norm": 1.0856279134750366, "learning_rate": 1.8692317197309759e-06, "loss": 0.192, "step": 74 }, { "epoch": 0.027974636329727715, "grad_norm": 1.0018643140792847, "learning_rate": 1.8750612633916996e-06, "loss": 0.2011, "step": 75 }, { "epoch": 0.02834763148079075, "grad_norm": 1.0125221014022827, "learning_rate": 1.880813592280791e-06, "loss": 0.2066, "step": 76 }, { "epoch": 0.028720626631853787, "grad_norm": 0.7755595445632935, "learning_rate": 1.8864907251724815e-06, "loss": 0.205, "step": 77 }, { "epoch": 0.02909362178291682, "grad_norm": 1.0572590827941895, "learning_rate": 1.89209460269048e-06, "loss": 0.2002, "step": 78 }, { "epoch": 0.02946661693397986, "grad_norm": 0.9640480875968933, "learning_rate": 1.897627091290441e-06, "loss": 0.1896, "step": 79 }, { "epoch": 0.029839612085042894, "grad_norm": 1.0146069526672363, "learning_rate": 1.903089986991943e-06, "loss": 0.1888, "step": 80 }, { "epoch": 0.03021260723610593, "grad_norm": 0.9471408724784851, "learning_rate": 1.9084850188786494e-06, "loss": 0.1862, "step": 81 }, { "epoch": 0.030585602387168967, "grad_norm": 1.1787012815475464, "learning_rate": 1.9138138523837166e-06, "loss": 0.1614, "step": 82 }, { "epoch": 0.030958597538232, "grad_norm": 1.0137715339660645, "learning_rate": 1.919078092376074e-06, "loss": 0.1861, "step": 83 }, { "epoch": 0.031331592689295036, "grad_norm": 1.10236656665802, "learning_rate": 1.9242792860618812e-06, "loss": 0.194, "step": 84 }, { "epoch": 0.031704587840358074, "grad_norm": 0.9688572287559509, "learning_rate": 1.9294189257142923e-06, "loss": 0.2075, "step": 85 }, { "epoch": 0.03207758299142111, "grad_norm": 1.0640106201171875, "learning_rate": 1.934498451243567e-06, "loss": 0.1944, "step": 86 }, { "epoch": 0.03245057814248415, "grad_norm": 1.1639996767044067, "learning_rate": 1.939519252618618e-06, "loss": 0.1922, "step": 87 }, { "epoch": 0.03282357329354718, "grad_norm": 0.9969357252120972, "learning_rate": 1.9444826721501684e-06, "loss": 0.1824, "step": 88 }, { "epoch": 0.03319656844461022, "grad_norm": 0.9153484106063843, "learning_rate": 1.949390006644912e-06, "loss": 0.196, "step": 89 }, { "epoch": 0.03356956359567326, "grad_norm": 0.8214101195335388, "learning_rate": 1.9542425094393244e-06, "loss": 0.2051, "step": 90 }, { "epoch": 0.033942558746736295, "grad_norm": 1.301434874534607, "learning_rate": 1.9590413923210933e-06, "loss": 0.2061, "step": 91 }, { "epoch": 0.03431555389779933, "grad_norm": 1.0066206455230713, "learning_rate": 1.963787827345555e-06, "loss": 0.1786, "step": 92 }, { "epoch": 0.034688549048862365, "grad_norm": 0.8446815013885498, "learning_rate": 1.968482948553935e-06, "loss": 0.1915, "step": 93 }, { "epoch": 0.0350615441999254, "grad_norm": 1.7559953927993774, "learning_rate": 1.9731278535996984e-06, "loss": 0.2031, "step": 94 }, { "epoch": 0.035434539350988434, "grad_norm": 1.3251174688339233, "learning_rate": 1.9777236052888472e-06, "loss": 0.1757, "step": 95 }, { "epoch": 0.03580753450205147, "grad_norm": 1.0417511463165283, "learning_rate": 1.982271233039568e-06, "loss": 0.1803, "step": 96 }, { "epoch": 0.03618052965311451, "grad_norm": 0.8866796493530273, "learning_rate": 1.9867717342662444e-06, "loss": 0.2173, "step": 97 }, { "epoch": 0.03655352480417755, "grad_norm": 0.7538737654685974, "learning_rate": 1.9912260756924947e-06, "loss": 0.2242, "step": 98 }, { "epoch": 0.03692651995524058, "grad_norm": 1.2100152969360352, "learning_rate": 1.9956351945975495e-06, "loss": 0.204, "step": 99 }, { "epoch": 0.03729951510630362, "grad_norm": 0.9184908270835876, "learning_rate": 1.9999999999999995e-06, "loss": 0.2209, "step": 100 }, { "epoch": 0.037672510257366655, "grad_norm": 1.1192511320114136, "learning_rate": 2e-06, "loss": 0.1934, "step": 101 }, { "epoch": 0.03804550540842969, "grad_norm": 1.255412220954895, "learning_rate": 2e-06, "loss": 0.1951, "step": 102 }, { "epoch": 0.038418500559492724, "grad_norm": 1.2811542749404907, "learning_rate": 2e-06, "loss": 0.1593, "step": 103 }, { "epoch": 0.03879149571055576, "grad_norm": 0.8891837000846863, "learning_rate": 2e-06, "loss": 0.1923, "step": 104 }, { "epoch": 0.0391644908616188, "grad_norm": 0.9238028526306152, "learning_rate": 2e-06, "loss": 0.2087, "step": 105 }, { "epoch": 0.03953748601268184, "grad_norm": 1.4864000082015991, "learning_rate": 2e-06, "loss": 0.1639, "step": 106 }, { "epoch": 0.03991048116374487, "grad_norm": 1.5685158967971802, "learning_rate": 2e-06, "loss": 0.2061, "step": 107 }, { "epoch": 0.04028347631480791, "grad_norm": 1.1058951616287231, "learning_rate": 2e-06, "loss": 0.1954, "step": 108 }, { "epoch": 0.040656471465870946, "grad_norm": 0.897661566734314, "learning_rate": 2e-06, "loss": 0.1861, "step": 109 }, { "epoch": 0.04102946661693398, "grad_norm": 0.9436293244361877, "learning_rate": 2e-06, "loss": 0.186, "step": 110 }, { "epoch": 0.041402461767997015, "grad_norm": 0.8275522589683533, "learning_rate": 2e-06, "loss": 0.1921, "step": 111 }, { "epoch": 0.04177545691906005, "grad_norm": 0.9334837794303894, "learning_rate": 2e-06, "loss": 0.1993, "step": 112 }, { "epoch": 0.04214845207012309, "grad_norm": 1.0663117170333862, "learning_rate": 2e-06, "loss": 0.18, "step": 113 }, { "epoch": 0.04252144722118612, "grad_norm": 0.9661346077919006, "learning_rate": 2e-06, "loss": 0.1814, "step": 114 }, { "epoch": 0.04289444237224916, "grad_norm": 0.843009352684021, "learning_rate": 2e-06, "loss": 0.2044, "step": 115 }, { "epoch": 0.0432674375233122, "grad_norm": 0.7755104899406433, "learning_rate": 2e-06, "loss": 0.2029, "step": 116 }, { "epoch": 0.043640432674375236, "grad_norm": 0.9779033064842224, "learning_rate": 2e-06, "loss": 0.1855, "step": 117 }, { "epoch": 0.04401342782543827, "grad_norm": 1.4884803295135498, "learning_rate": 2e-06, "loss": 0.193, "step": 118 }, { "epoch": 0.044386422976501305, "grad_norm": 1.2431998252868652, "learning_rate": 2e-06, "loss": 0.1994, "step": 119 }, { "epoch": 0.04475941812756434, "grad_norm": 1.1431639194488525, "learning_rate": 2e-06, "loss": 0.1925, "step": 120 }, { "epoch": 0.04513241327862738, "grad_norm": 0.8979003429412842, "learning_rate": 2e-06, "loss": 0.2201, "step": 121 }, { "epoch": 0.04550540842969041, "grad_norm": 0.9939451813697815, "learning_rate": 2e-06, "loss": 0.1773, "step": 122 }, { "epoch": 0.04587840358075345, "grad_norm": 0.9666232466697693, "learning_rate": 2e-06, "loss": 0.2003, "step": 123 }, { "epoch": 0.04625139873181649, "grad_norm": 0.9700254797935486, "learning_rate": 2e-06, "loss": 0.1916, "step": 124 }, { "epoch": 0.04662439388287952, "grad_norm": 1.2820014953613281, "learning_rate": 2e-06, "loss": 0.2008, "step": 125 }, { "epoch": 0.04699738903394256, "grad_norm": 0.7606964707374573, "learning_rate": 2e-06, "loss": 0.2068, "step": 126 }, { "epoch": 0.047370384185005596, "grad_norm": 0.8402016758918762, "learning_rate": 2e-06, "loss": 0.1929, "step": 127 }, { "epoch": 0.047743379336068634, "grad_norm": 1.0176241397857666, "learning_rate": 2e-06, "loss": 0.2051, "step": 128 }, { "epoch": 0.048116374487131665, "grad_norm": 1.2330819368362427, "learning_rate": 2e-06, "loss": 0.1786, "step": 129 }, { "epoch": 0.0484893696381947, "grad_norm": 1.5852580070495605, "learning_rate": 2e-06, "loss": 0.1676, "step": 130 }, { "epoch": 0.04886236478925774, "grad_norm": 0.9063729643821716, "learning_rate": 2e-06, "loss": 0.2031, "step": 131 }, { "epoch": 0.04923535994032078, "grad_norm": 0.8103752136230469, "learning_rate": 2e-06, "loss": 0.1876, "step": 132 }, { "epoch": 0.04960835509138381, "grad_norm": 0.7853859663009644, "learning_rate": 2e-06, "loss": 0.211, "step": 133 }, { "epoch": 0.04998135024244685, "grad_norm": 0.7973482608795166, "learning_rate": 2e-06, "loss": 0.2155, "step": 134 }, { "epoch": 0.050354345393509886, "grad_norm": 0.9142418503761292, "learning_rate": 2e-06, "loss": 0.2043, "step": 135 }, { "epoch": 0.05072734054457292, "grad_norm": 1.3480682373046875, "learning_rate": 2e-06, "loss": 0.1788, "step": 136 }, { "epoch": 0.051100335695635955, "grad_norm": 0.9693803787231445, "learning_rate": 2e-06, "loss": 0.2146, "step": 137 }, { "epoch": 0.05147333084669899, "grad_norm": 1.0029312372207642, "learning_rate": 2e-06, "loss": 0.2168, "step": 138 }, { "epoch": 0.05184632599776203, "grad_norm": 0.991188108921051, "learning_rate": 2e-06, "loss": 0.2021, "step": 139 }, { "epoch": 0.05221932114882506, "grad_norm": 0.7589095830917358, "learning_rate": 2e-06, "loss": 0.2042, "step": 140 }, { "epoch": 0.0525923162998881, "grad_norm": 1.10875403881073, "learning_rate": 2e-06, "loss": 0.1882, "step": 141 }, { "epoch": 0.05296531145095114, "grad_norm": 0.9789022207260132, "learning_rate": 2e-06, "loss": 0.2072, "step": 142 }, { "epoch": 0.05333830660201418, "grad_norm": 0.9074448347091675, "learning_rate": 2e-06, "loss": 0.202, "step": 143 }, { "epoch": 0.05371130175307721, "grad_norm": 1.0179435014724731, "learning_rate": 2e-06, "loss": 0.1909, "step": 144 }, { "epoch": 0.054084296904140246, "grad_norm": 2.4293954372406006, "learning_rate": 2e-06, "loss": 0.1937, "step": 145 }, { "epoch": 0.054457292055203284, "grad_norm": 0.8433589935302734, "learning_rate": 2e-06, "loss": 0.2037, "step": 146 }, { "epoch": 0.05483028720626632, "grad_norm": 0.8750677704811096, "learning_rate": 2e-06, "loss": 0.1897, "step": 147 }, { "epoch": 0.05520328235732935, "grad_norm": 0.8713390231132507, "learning_rate": 2e-06, "loss": 0.2145, "step": 148 }, { "epoch": 0.05557627750839239, "grad_norm": 0.8548018932342529, "learning_rate": 2e-06, "loss": 0.1762, "step": 149 }, { "epoch": 0.05594927265945543, "grad_norm": 0.8632192015647888, "learning_rate": 2e-06, "loss": 0.2014, "step": 150 }, { "epoch": 0.05632226781051846, "grad_norm": 0.9655972123146057, "learning_rate": 2e-06, "loss": 0.2165, "step": 151 }, { "epoch": 0.0566952629615815, "grad_norm": 1.2930134534835815, "learning_rate": 2e-06, "loss": 0.2135, "step": 152 }, { "epoch": 0.057068258112644536, "grad_norm": 0.7961334586143494, "learning_rate": 2e-06, "loss": 0.2126, "step": 153 }, { "epoch": 0.057441253263707574, "grad_norm": 0.8275865316390991, "learning_rate": 2e-06, "loss": 0.1855, "step": 154 }, { "epoch": 0.057814248414770605, "grad_norm": 0.7583549618721008, "learning_rate": 2e-06, "loss": 0.1888, "step": 155 }, { "epoch": 0.05818724356583364, "grad_norm": 1.1340233087539673, "learning_rate": 2e-06, "loss": 0.1896, "step": 156 }, { "epoch": 0.05856023871689668, "grad_norm": 1.314001441001892, "learning_rate": 2e-06, "loss": 0.1818, "step": 157 }, { "epoch": 0.05893323386795972, "grad_norm": 1.2041810750961304, "learning_rate": 2e-06, "loss": 0.1748, "step": 158 }, { "epoch": 0.05930622901902275, "grad_norm": 0.9739815592765808, "learning_rate": 2e-06, "loss": 0.1779, "step": 159 }, { "epoch": 0.05967922417008579, "grad_norm": 0.939154326915741, "learning_rate": 2e-06, "loss": 0.2087, "step": 160 }, { "epoch": 0.06005221932114883, "grad_norm": 0.7634104490280151, "learning_rate": 2e-06, "loss": 0.1954, "step": 161 }, { "epoch": 0.06042521447221186, "grad_norm": 1.1457438468933105, "learning_rate": 2e-06, "loss": 0.1798, "step": 162 }, { "epoch": 0.060798209623274896, "grad_norm": 0.8657371401786804, "learning_rate": 2e-06, "loss": 0.1818, "step": 163 }, { "epoch": 0.061171204774337934, "grad_norm": 0.906326174736023, "learning_rate": 2e-06, "loss": 0.2085, "step": 164 }, { "epoch": 0.06154419992540097, "grad_norm": 0.9799226522445679, "learning_rate": 2e-06, "loss": 0.175, "step": 165 }, { "epoch": 0.061917195076464, "grad_norm": 1.2733439207077026, "learning_rate": 2e-06, "loss": 0.191, "step": 166 }, { "epoch": 0.06229019022752704, "grad_norm": 0.9803994297981262, "learning_rate": 2e-06, "loss": 0.2143, "step": 167 }, { "epoch": 0.06266318537859007, "grad_norm": 0.808297872543335, "learning_rate": 2e-06, "loss": 0.2333, "step": 168 }, { "epoch": 0.06303618052965311, "grad_norm": 1.0419412851333618, "learning_rate": 2e-06, "loss": 0.188, "step": 169 }, { "epoch": 0.06340917568071615, "grad_norm": 0.9039874076843262, "learning_rate": 2e-06, "loss": 0.2016, "step": 170 }, { "epoch": 0.06378217083177919, "grad_norm": 1.0869637727737427, "learning_rate": 2e-06, "loss": 0.1708, "step": 171 }, { "epoch": 0.06415516598284222, "grad_norm": 0.8984447121620178, "learning_rate": 2e-06, "loss": 0.2016, "step": 172 }, { "epoch": 0.06452816113390526, "grad_norm": 1.0992993116378784, "learning_rate": 2e-06, "loss": 0.1831, "step": 173 }, { "epoch": 0.0649011562849683, "grad_norm": 1.123399019241333, "learning_rate": 2e-06, "loss": 0.1771, "step": 174 }, { "epoch": 0.06527415143603134, "grad_norm": 1.1219019889831543, "learning_rate": 2e-06, "loss": 0.199, "step": 175 }, { "epoch": 0.06564714658709436, "grad_norm": 1.1419055461883545, "learning_rate": 2e-06, "loss": 0.1877, "step": 176 }, { "epoch": 0.0660201417381574, "grad_norm": 1.0656007528305054, "learning_rate": 2e-06, "loss": 0.1792, "step": 177 }, { "epoch": 0.06639313688922044, "grad_norm": 0.7174457907676697, "learning_rate": 2e-06, "loss": 0.2195, "step": 178 }, { "epoch": 0.06676613204028348, "grad_norm": 1.1026972532272339, "learning_rate": 2e-06, "loss": 0.1791, "step": 179 }, { "epoch": 0.06713912719134651, "grad_norm": 0.8938913345336914, "learning_rate": 2e-06, "loss": 0.1896, "step": 180 }, { "epoch": 0.06751212234240955, "grad_norm": 0.9341595768928528, "learning_rate": 2e-06, "loss": 0.1981, "step": 181 }, { "epoch": 0.06788511749347259, "grad_norm": 0.7811640501022339, "learning_rate": 2e-06, "loss": 0.2028, "step": 182 }, { "epoch": 0.06825811264453562, "grad_norm": 0.8894217610359192, "learning_rate": 2e-06, "loss": 0.2211, "step": 183 }, { "epoch": 0.06863110779559865, "grad_norm": 1.116247296333313, "learning_rate": 2e-06, "loss": 0.1927, "step": 184 }, { "epoch": 0.06900410294666169, "grad_norm": 0.786838173866272, "learning_rate": 2e-06, "loss": 0.1948, "step": 185 }, { "epoch": 0.06937709809772473, "grad_norm": 0.8856045007705688, "learning_rate": 2e-06, "loss": 0.2024, "step": 186 }, { "epoch": 0.06975009324878777, "grad_norm": 0.8843008875846863, "learning_rate": 2e-06, "loss": 0.1917, "step": 187 }, { "epoch": 0.0701230883998508, "grad_norm": 0.9210066199302673, "learning_rate": 2e-06, "loss": 0.1894, "step": 188 }, { "epoch": 0.07049608355091384, "grad_norm": 1.0365571975708008, "learning_rate": 2e-06, "loss": 0.2012, "step": 189 }, { "epoch": 0.07086907870197687, "grad_norm": 1.4541336297988892, "learning_rate": 2e-06, "loss": 0.1958, "step": 190 }, { "epoch": 0.0712420738530399, "grad_norm": 0.8088018298149109, "learning_rate": 2e-06, "loss": 0.2112, "step": 191 }, { "epoch": 0.07161506900410294, "grad_norm": 1.2797662019729614, "learning_rate": 2e-06, "loss": 0.209, "step": 192 }, { "epoch": 0.07198806415516598, "grad_norm": 0.8636647462844849, "learning_rate": 2e-06, "loss": 0.177, "step": 193 }, { "epoch": 0.07236105930622902, "grad_norm": 1.190615177154541, "learning_rate": 2e-06, "loss": 0.2059, "step": 194 }, { "epoch": 0.07273405445729206, "grad_norm": 1.0763156414031982, "learning_rate": 2e-06, "loss": 0.1879, "step": 195 }, { "epoch": 0.0731070496083551, "grad_norm": 0.8184335231781006, "learning_rate": 2e-06, "loss": 0.1746, "step": 196 }, { "epoch": 0.07348004475941813, "grad_norm": 1.031040906906128, "learning_rate": 2e-06, "loss": 0.2061, "step": 197 }, { "epoch": 0.07385303991048116, "grad_norm": 1.0925304889678955, "learning_rate": 2e-06, "loss": 0.2315, "step": 198 }, { "epoch": 0.0742260350615442, "grad_norm": 1.263104796409607, "learning_rate": 2e-06, "loss": 0.1874, "step": 199 }, { "epoch": 0.07459903021260723, "grad_norm": 1.2525988817214966, "learning_rate": 2e-06, "loss": 0.2001, "step": 200 }, { "epoch": 0.07497202536367027, "grad_norm": 1.1437304019927979, "learning_rate": 2e-06, "loss": 0.196, "step": 201 }, { "epoch": 0.07534502051473331, "grad_norm": 0.9249920845031738, "learning_rate": 2e-06, "loss": 0.2284, "step": 202 }, { "epoch": 0.07571801566579635, "grad_norm": 1.3175647258758545, "learning_rate": 2e-06, "loss": 0.1773, "step": 203 }, { "epoch": 0.07609101081685939, "grad_norm": 0.9062038064002991, "learning_rate": 2e-06, "loss": 0.1751, "step": 204 }, { "epoch": 0.07646400596792241, "grad_norm": 1.3245359659194946, "learning_rate": 2e-06, "loss": 0.1951, "step": 205 }, { "epoch": 0.07683700111898545, "grad_norm": 0.9871267676353455, "learning_rate": 2e-06, "loss": 0.1738, "step": 206 }, { "epoch": 0.07720999627004849, "grad_norm": 1.2542921304702759, "learning_rate": 2e-06, "loss": 0.1839, "step": 207 }, { "epoch": 0.07758299142111152, "grad_norm": 1.0713342428207397, "learning_rate": 2e-06, "loss": 0.1924, "step": 208 }, { "epoch": 0.07795598657217456, "grad_norm": 0.9084940552711487, "learning_rate": 2e-06, "loss": 0.2068, "step": 209 }, { "epoch": 0.0783289817232376, "grad_norm": 1.0184133052825928, "learning_rate": 2e-06, "loss": 0.1979, "step": 210 }, { "epoch": 0.07870197687430064, "grad_norm": 1.3750758171081543, "learning_rate": 2e-06, "loss": 0.2034, "step": 211 }, { "epoch": 0.07907497202536368, "grad_norm": 1.185585856437683, "learning_rate": 2e-06, "loss": 0.1855, "step": 212 }, { "epoch": 0.0794479671764267, "grad_norm": 0.7795955538749695, "learning_rate": 2e-06, "loss": 0.1736, "step": 213 }, { "epoch": 0.07982096232748974, "grad_norm": 0.92829430103302, "learning_rate": 2e-06, "loss": 0.1913, "step": 214 }, { "epoch": 0.08019395747855278, "grad_norm": 1.0296121835708618, "learning_rate": 2e-06, "loss": 0.2, "step": 215 }, { "epoch": 0.08056695262961582, "grad_norm": 0.9393232464790344, "learning_rate": 2e-06, "loss": 0.2108, "step": 216 }, { "epoch": 0.08093994778067885, "grad_norm": 1.2556380033493042, "learning_rate": 2e-06, "loss": 0.1663, "step": 217 }, { "epoch": 0.08131294293174189, "grad_norm": 0.9467172026634216, "learning_rate": 2e-06, "loss": 0.1894, "step": 218 }, { "epoch": 0.08168593808280493, "grad_norm": 0.9067661762237549, "learning_rate": 2e-06, "loss": 0.1879, "step": 219 }, { "epoch": 0.08205893323386795, "grad_norm": 1.1636991500854492, "learning_rate": 2e-06, "loss": 0.2085, "step": 220 }, { "epoch": 0.08243192838493099, "grad_norm": 1.255859375, "learning_rate": 2e-06, "loss": 0.1905, "step": 221 }, { "epoch": 0.08280492353599403, "grad_norm": 2.1955971717834473, "learning_rate": 2e-06, "loss": 0.2061, "step": 222 }, { "epoch": 0.08317791868705707, "grad_norm": 1.1468454599380493, "learning_rate": 2e-06, "loss": 0.2105, "step": 223 }, { "epoch": 0.0835509138381201, "grad_norm": 0.7652864456176758, "learning_rate": 2e-06, "loss": 0.2134, "step": 224 }, { "epoch": 0.08392390898918314, "grad_norm": 0.8539823293685913, "learning_rate": 2e-06, "loss": 0.2242, "step": 225 }, { "epoch": 0.08429690414024618, "grad_norm": 0.7995286583900452, "learning_rate": 2e-06, "loss": 0.2107, "step": 226 }, { "epoch": 0.08466989929130922, "grad_norm": 1.2320066690444946, "learning_rate": 2e-06, "loss": 0.1923, "step": 227 }, { "epoch": 0.08504289444237224, "grad_norm": 1.2519886493682861, "learning_rate": 2e-06, "loss": 0.2074, "step": 228 }, { "epoch": 0.08541588959343528, "grad_norm": 0.7685458660125732, "learning_rate": 2e-06, "loss": 0.2147, "step": 229 }, { "epoch": 0.08578888474449832, "grad_norm": 1.037226676940918, "learning_rate": 2e-06, "loss": 0.2003, "step": 230 }, { "epoch": 0.08616187989556136, "grad_norm": 0.7983611822128296, "learning_rate": 2e-06, "loss": 0.1942, "step": 231 }, { "epoch": 0.0865348750466244, "grad_norm": 1.1226369142532349, "learning_rate": 2e-06, "loss": 0.2006, "step": 232 }, { "epoch": 0.08690787019768743, "grad_norm": 1.4999130964279175, "learning_rate": 2e-06, "loss": 0.1807, "step": 233 }, { "epoch": 0.08728086534875047, "grad_norm": 1.0049039125442505, "learning_rate": 2e-06, "loss": 0.2106, "step": 234 }, { "epoch": 0.0876538604998135, "grad_norm": 1.027787208557129, "learning_rate": 2e-06, "loss": 0.2027, "step": 235 }, { "epoch": 0.08802685565087653, "grad_norm": 0.9472967386245728, "learning_rate": 2e-06, "loss": 0.1971, "step": 236 }, { "epoch": 0.08839985080193957, "grad_norm": 1.3772896528244019, "learning_rate": 2e-06, "loss": 0.2022, "step": 237 }, { "epoch": 0.08877284595300261, "grad_norm": 0.9266098141670227, "learning_rate": 2e-06, "loss": 0.2105, "step": 238 }, { "epoch": 0.08914584110406565, "grad_norm": 0.9963822960853577, "learning_rate": 2e-06, "loss": 0.1864, "step": 239 }, { "epoch": 0.08951883625512869, "grad_norm": 0.9289550185203552, "learning_rate": 2e-06, "loss": 0.1789, "step": 240 }, { "epoch": 0.08989183140619172, "grad_norm": 1.4354194402694702, "learning_rate": 2e-06, "loss": 0.1835, "step": 241 }, { "epoch": 0.09026482655725476, "grad_norm": 1.280236005783081, "learning_rate": 2e-06, "loss": 0.1897, "step": 242 }, { "epoch": 0.09063782170831779, "grad_norm": 0.9676737785339355, "learning_rate": 2e-06, "loss": 0.1828, "step": 243 }, { "epoch": 0.09101081685938082, "grad_norm": 0.7829356789588928, "learning_rate": 2e-06, "loss": 0.2072, "step": 244 }, { "epoch": 0.09138381201044386, "grad_norm": 1.206229567527771, "learning_rate": 2e-06, "loss": 0.2062, "step": 245 }, { "epoch": 0.0917568071615069, "grad_norm": 1.1395920515060425, "learning_rate": 2e-06, "loss": 0.191, "step": 246 }, { "epoch": 0.09212980231256994, "grad_norm": 0.850797176361084, "learning_rate": 2e-06, "loss": 0.2031, "step": 247 }, { "epoch": 0.09250279746363298, "grad_norm": 0.9985802173614502, "learning_rate": 2e-06, "loss": 0.2052, "step": 248 }, { "epoch": 0.09287579261469601, "grad_norm": 0.9354230165481567, "learning_rate": 2e-06, "loss": 0.2058, "step": 249 }, { "epoch": 0.09324878776575904, "grad_norm": 0.9014205932617188, "learning_rate": 2e-06, "loss": 0.2067, "step": 250 }, { "epoch": 0.09362178291682208, "grad_norm": 1.1266734600067139, "learning_rate": 2e-06, "loss": 0.2052, "step": 251 }, { "epoch": 0.09399477806788512, "grad_norm": 0.9357256889343262, "learning_rate": 2e-06, "loss": 0.1723, "step": 252 }, { "epoch": 0.09436777321894815, "grad_norm": 0.9371101260185242, "learning_rate": 2e-06, "loss": 0.1831, "step": 253 }, { "epoch": 0.09474076837001119, "grad_norm": 0.9625357985496521, "learning_rate": 2e-06, "loss": 0.1735, "step": 254 }, { "epoch": 0.09511376352107423, "grad_norm": 0.9862954020500183, "learning_rate": 2e-06, "loss": 0.1981, "step": 255 }, { "epoch": 0.09548675867213727, "grad_norm": 0.778066873550415, "learning_rate": 2e-06, "loss": 0.2011, "step": 256 }, { "epoch": 0.09585975382320029, "grad_norm": 1.0456721782684326, "learning_rate": 2e-06, "loss": 0.2029, "step": 257 }, { "epoch": 0.09623274897426333, "grad_norm": 1.0561538934707642, "learning_rate": 2e-06, "loss": 0.1896, "step": 258 }, { "epoch": 0.09660574412532637, "grad_norm": 0.9710410237312317, "learning_rate": 2e-06, "loss": 0.2272, "step": 259 }, { "epoch": 0.0969787392763894, "grad_norm": 1.1012240648269653, "learning_rate": 2e-06, "loss": 0.1758, "step": 260 }, { "epoch": 0.09735173442745244, "grad_norm": 0.96613609790802, "learning_rate": 2e-06, "loss": 0.1893, "step": 261 }, { "epoch": 0.09772472957851548, "grad_norm": 0.9015029668807983, "learning_rate": 2e-06, "loss": 0.208, "step": 262 }, { "epoch": 0.09809772472957852, "grad_norm": 0.9254395365715027, "learning_rate": 2e-06, "loss": 0.2085, "step": 263 }, { "epoch": 0.09847071988064156, "grad_norm": 1.2276625633239746, "learning_rate": 2e-06, "loss": 0.1866, "step": 264 }, { "epoch": 0.09884371503170458, "grad_norm": 0.9839171171188354, "learning_rate": 2e-06, "loss": 0.2015, "step": 265 }, { "epoch": 0.09921671018276762, "grad_norm": 1.039355993270874, "learning_rate": 2e-06, "loss": 0.2199, "step": 266 }, { "epoch": 0.09958970533383066, "grad_norm": 0.9424523115158081, "learning_rate": 2e-06, "loss": 0.2119, "step": 267 }, { "epoch": 0.0999627004848937, "grad_norm": 1.0655133724212646, "learning_rate": 2e-06, "loss": 0.1691, "step": 268 }, { "epoch": 0.10033569563595673, "grad_norm": 0.7542335391044617, "learning_rate": 2e-06, "loss": 0.2039, "step": 269 }, { "epoch": 0.10070869078701977, "grad_norm": 0.9677282571792603, "learning_rate": 2e-06, "loss": 0.1993, "step": 270 }, { "epoch": 0.10108168593808281, "grad_norm": 1.0334980487823486, "learning_rate": 2e-06, "loss": 0.1813, "step": 271 }, { "epoch": 0.10145468108914583, "grad_norm": 0.9737362265586853, "learning_rate": 2e-06, "loss": 0.2016, "step": 272 }, { "epoch": 0.10182767624020887, "grad_norm": 0.9492107629776001, "learning_rate": 2e-06, "loss": 0.1988, "step": 273 }, { "epoch": 0.10220067139127191, "grad_norm": 1.1284170150756836, "learning_rate": 2e-06, "loss": 0.1938, "step": 274 }, { "epoch": 0.10257366654233495, "grad_norm": 0.9622642397880554, "learning_rate": 2e-06, "loss": 0.2135, "step": 275 }, { "epoch": 0.10294666169339799, "grad_norm": 0.8183611035346985, "learning_rate": 2e-06, "loss": 0.1963, "step": 276 }, { "epoch": 0.10331965684446102, "grad_norm": 0.8391555547714233, "learning_rate": 2e-06, "loss": 0.214, "step": 277 }, { "epoch": 0.10369265199552406, "grad_norm": 0.8915409445762634, "learning_rate": 2e-06, "loss": 0.2282, "step": 278 }, { "epoch": 0.1040656471465871, "grad_norm": 1.1723452806472778, "learning_rate": 2e-06, "loss": 0.1909, "step": 279 }, { "epoch": 0.10443864229765012, "grad_norm": 1.0293711423873901, "learning_rate": 2e-06, "loss": 0.2017, "step": 280 }, { "epoch": 0.10481163744871316, "grad_norm": 0.9263574481010437, "learning_rate": 2e-06, "loss": 0.2024, "step": 281 }, { "epoch": 0.1051846325997762, "grad_norm": 1.0220046043395996, "learning_rate": 2e-06, "loss": 0.2088, "step": 282 }, { "epoch": 0.10555762775083924, "grad_norm": 0.8803762197494507, "learning_rate": 2e-06, "loss": 0.1902, "step": 283 }, { "epoch": 0.10593062290190228, "grad_norm": 0.8716146349906921, "learning_rate": 2e-06, "loss": 0.2226, "step": 284 }, { "epoch": 0.10630361805296532, "grad_norm": 0.8786336183547974, "learning_rate": 2e-06, "loss": 0.1901, "step": 285 }, { "epoch": 0.10667661320402835, "grad_norm": 0.9511817693710327, "learning_rate": 2e-06, "loss": 0.218, "step": 286 }, { "epoch": 0.10704960835509138, "grad_norm": 0.7654321193695068, "learning_rate": 2e-06, "loss": 0.2308, "step": 287 }, { "epoch": 0.10742260350615442, "grad_norm": 0.9913997650146484, "learning_rate": 2e-06, "loss": 0.2105, "step": 288 }, { "epoch": 0.10779559865721745, "grad_norm": 0.9783602356910706, "learning_rate": 2e-06, "loss": 0.1997, "step": 289 }, { "epoch": 0.10816859380828049, "grad_norm": 1.1759568452835083, "learning_rate": 2e-06, "loss": 0.1915, "step": 290 }, { "epoch": 0.10854158895934353, "grad_norm": 0.9460901021957397, "learning_rate": 2e-06, "loss": 0.2067, "step": 291 }, { "epoch": 0.10891458411040657, "grad_norm": 0.9386464357376099, "learning_rate": 2e-06, "loss": 0.2079, "step": 292 }, { "epoch": 0.1092875792614696, "grad_norm": 1.0207915306091309, "learning_rate": 2e-06, "loss": 0.1926, "step": 293 }, { "epoch": 0.10966057441253264, "grad_norm": 1.0852059125900269, "learning_rate": 2e-06, "loss": 0.1953, "step": 294 }, { "epoch": 0.11003356956359567, "grad_norm": 0.8511086702346802, "learning_rate": 2e-06, "loss": 0.2202, "step": 295 }, { "epoch": 0.1104065647146587, "grad_norm": 2.204251766204834, "learning_rate": 2e-06, "loss": 0.2201, "step": 296 }, { "epoch": 0.11077955986572174, "grad_norm": 1.0798625946044922, "learning_rate": 2e-06, "loss": 0.1694, "step": 297 }, { "epoch": 0.11115255501678478, "grad_norm": 0.8933678865432739, "learning_rate": 2e-06, "loss": 0.2085, "step": 298 }, { "epoch": 0.11152555016784782, "grad_norm": 0.9618139863014221, "learning_rate": 2e-06, "loss": 0.1929, "step": 299 }, { "epoch": 0.11189854531891086, "grad_norm": 1.0402642488479614, "learning_rate": 2e-06, "loss": 0.1827, "step": 300 }, { "epoch": 0.1122715404699739, "grad_norm": 0.897169291973114, "learning_rate": 2e-06, "loss": 0.2227, "step": 301 }, { "epoch": 0.11264453562103692, "grad_norm": 0.9654368758201599, "learning_rate": 2e-06, "loss": 0.2063, "step": 302 }, { "epoch": 0.11301753077209996, "grad_norm": 0.9414504766464233, "learning_rate": 2e-06, "loss": 0.1872, "step": 303 }, { "epoch": 0.113390525923163, "grad_norm": 1.0622587203979492, "learning_rate": 2e-06, "loss": 0.1894, "step": 304 }, { "epoch": 0.11376352107422603, "grad_norm": 0.8382514715194702, "learning_rate": 2e-06, "loss": 0.2264, "step": 305 }, { "epoch": 0.11413651622528907, "grad_norm": 1.0582919120788574, "learning_rate": 2e-06, "loss": 0.2104, "step": 306 }, { "epoch": 0.11450951137635211, "grad_norm": 0.8076647520065308, "learning_rate": 2e-06, "loss": 0.211, "step": 307 }, { "epoch": 0.11488250652741515, "grad_norm": 0.859614908695221, "learning_rate": 2e-06, "loss": 0.2071, "step": 308 }, { "epoch": 0.11525550167847819, "grad_norm": 0.7204486727714539, "learning_rate": 2e-06, "loss": 0.2154, "step": 309 }, { "epoch": 0.11562849682954121, "grad_norm": 0.852245569229126, "learning_rate": 2e-06, "loss": 0.209, "step": 310 }, { "epoch": 0.11600149198060425, "grad_norm": 0.9215561747550964, "learning_rate": 2e-06, "loss": 0.1882, "step": 311 }, { "epoch": 0.11637448713166729, "grad_norm": 0.8485605120658875, "learning_rate": 2e-06, "loss": 0.2153, "step": 312 }, { "epoch": 0.11674748228273032, "grad_norm": 1.071969747543335, "learning_rate": 2e-06, "loss": 0.2175, "step": 313 }, { "epoch": 0.11712047743379336, "grad_norm": 0.9881746768951416, "learning_rate": 2e-06, "loss": 0.1716, "step": 314 }, { "epoch": 0.1174934725848564, "grad_norm": 0.7736453413963318, "learning_rate": 2e-06, "loss": 0.2009, "step": 315 }, { "epoch": 0.11786646773591944, "grad_norm": 0.7560129165649414, "learning_rate": 2e-06, "loss": 0.2023, "step": 316 }, { "epoch": 0.11823946288698246, "grad_norm": 1.0830320119857788, "learning_rate": 2e-06, "loss": 0.1937, "step": 317 }, { "epoch": 0.1186124580380455, "grad_norm": 1.0198551416397095, "learning_rate": 2e-06, "loss": 0.203, "step": 318 }, { "epoch": 0.11898545318910854, "grad_norm": 1.1804211139678955, "learning_rate": 2e-06, "loss": 0.1981, "step": 319 }, { "epoch": 0.11935844834017158, "grad_norm": 1.375030755996704, "learning_rate": 2e-06, "loss": 0.1841, "step": 320 }, { "epoch": 0.11973144349123462, "grad_norm": 1.0197371244430542, "learning_rate": 2e-06, "loss": 0.2052, "step": 321 }, { "epoch": 0.12010443864229765, "grad_norm": 0.8116735816001892, "learning_rate": 2e-06, "loss": 0.2193, "step": 322 }, { "epoch": 0.12047743379336069, "grad_norm": 1.150598168373108, "learning_rate": 2e-06, "loss": 0.1963, "step": 323 }, { "epoch": 0.12085042894442372, "grad_norm": 0.8719567656517029, "learning_rate": 2e-06, "loss": 0.2124, "step": 324 }, { "epoch": 0.12122342409548675, "grad_norm": 0.8246392607688904, "learning_rate": 2e-06, "loss": 0.2162, "step": 325 }, { "epoch": 0.12159641924654979, "grad_norm": 1.0488113164901733, "learning_rate": 2e-06, "loss": 0.2073, "step": 326 }, { "epoch": 0.12196941439761283, "grad_norm": 0.9453825354576111, "learning_rate": 2e-06, "loss": 0.1987, "step": 327 }, { "epoch": 0.12234240954867587, "grad_norm": 1.2794287204742432, "learning_rate": 2e-06, "loss": 0.1988, "step": 328 }, { "epoch": 0.1227154046997389, "grad_norm": 0.8134766817092896, "learning_rate": 2e-06, "loss": 0.2005, "step": 329 }, { "epoch": 0.12308839985080194, "grad_norm": 0.8527361750602722, "learning_rate": 2e-06, "loss": 0.2139, "step": 330 }, { "epoch": 0.12346139500186498, "grad_norm": 1.079713225364685, "learning_rate": 2e-06, "loss": 0.2193, "step": 331 }, { "epoch": 0.123834390152928, "grad_norm": 0.8858556151390076, "learning_rate": 2e-06, "loss": 0.2015, "step": 332 }, { "epoch": 0.12420738530399104, "grad_norm": 1.1836223602294922, "learning_rate": 2e-06, "loss": 0.1744, "step": 333 }, { "epoch": 0.12458038045505408, "grad_norm": 0.994394838809967, "learning_rate": 2e-06, "loss": 0.1936, "step": 334 }, { "epoch": 0.12495337560611712, "grad_norm": 1.1432346105575562, "learning_rate": 2e-06, "loss": 0.2035, "step": 335 }, { "epoch": 0.12532637075718014, "grad_norm": 1.0197690725326538, "learning_rate": 2e-06, "loss": 0.1737, "step": 336 }, { "epoch": 0.12569936590824318, "grad_norm": 0.9594882726669312, "learning_rate": 2e-06, "loss": 0.1986, "step": 337 }, { "epoch": 0.12607236105930622, "grad_norm": 1.0097217559814453, "learning_rate": 2e-06, "loss": 0.2304, "step": 338 }, { "epoch": 0.12644535621036926, "grad_norm": 0.8829630017280579, "learning_rate": 2e-06, "loss": 0.1962, "step": 339 }, { "epoch": 0.1268183513614323, "grad_norm": 0.9026457667350769, "learning_rate": 2e-06, "loss": 0.2012, "step": 340 }, { "epoch": 0.12719134651249533, "grad_norm": 1.0839552879333496, "learning_rate": 2e-06, "loss": 0.2004, "step": 341 }, { "epoch": 0.12756434166355837, "grad_norm": 1.3908957242965698, "learning_rate": 2e-06, "loss": 0.1939, "step": 342 }, { "epoch": 0.1279373368146214, "grad_norm": 0.8478268384933472, "learning_rate": 2e-06, "loss": 0.1908, "step": 343 }, { "epoch": 0.12831033196568445, "grad_norm": 1.0070209503173828, "learning_rate": 2e-06, "loss": 0.1886, "step": 344 }, { "epoch": 0.1286833271167475, "grad_norm": 0.897091805934906, "learning_rate": 2e-06, "loss": 0.1948, "step": 345 }, { "epoch": 0.12905632226781052, "grad_norm": 0.8363308310508728, "learning_rate": 2e-06, "loss": 0.2089, "step": 346 }, { "epoch": 0.12942931741887356, "grad_norm": 0.9248823523521423, "learning_rate": 2e-06, "loss": 0.221, "step": 347 }, { "epoch": 0.1298023125699366, "grad_norm": 1.0500788688659668, "learning_rate": 2e-06, "loss": 0.2047, "step": 348 }, { "epoch": 0.13017530772099964, "grad_norm": 0.8218602538108826, "learning_rate": 2e-06, "loss": 0.2097, "step": 349 }, { "epoch": 0.13054830287206268, "grad_norm": 1.1356159448623657, "learning_rate": 2e-06, "loss": 0.2013, "step": 350 }, { "epoch": 0.1309212980231257, "grad_norm": 1.0954879522323608, "learning_rate": 2e-06, "loss": 0.2072, "step": 351 }, { "epoch": 0.13129429317418873, "grad_norm": 1.1806498765945435, "learning_rate": 2e-06, "loss": 0.2103, "step": 352 }, { "epoch": 0.13166728832525176, "grad_norm": 0.9195176959037781, "learning_rate": 2e-06, "loss": 0.2106, "step": 353 }, { "epoch": 0.1320402834763148, "grad_norm": 1.0016396045684814, "learning_rate": 2e-06, "loss": 0.2026, "step": 354 }, { "epoch": 0.13241327862737784, "grad_norm": 1.04340398311615, "learning_rate": 2e-06, "loss": 0.2061, "step": 355 }, { "epoch": 0.13278627377844088, "grad_norm": 1.042103886604309, "learning_rate": 2e-06, "loss": 0.2001, "step": 356 }, { "epoch": 0.13315926892950392, "grad_norm": 0.8204907774925232, "learning_rate": 2e-06, "loss": 0.1967, "step": 357 }, { "epoch": 0.13353226408056695, "grad_norm": 0.7597780823707581, "learning_rate": 2e-06, "loss": 0.2414, "step": 358 }, { "epoch": 0.13390525923163, "grad_norm": 1.1440365314483643, "learning_rate": 2e-06, "loss": 0.1901, "step": 359 }, { "epoch": 0.13427825438269303, "grad_norm": 0.9034528732299805, "learning_rate": 2e-06, "loss": 0.2092, "step": 360 }, { "epoch": 0.13465124953375607, "grad_norm": 1.3232297897338867, "learning_rate": 2e-06, "loss": 0.2044, "step": 361 }, { "epoch": 0.1350242446848191, "grad_norm": 1.0840073823928833, "learning_rate": 2e-06, "loss": 0.1765, "step": 362 }, { "epoch": 0.13539723983588214, "grad_norm": 1.3067322969436646, "learning_rate": 2e-06, "loss": 0.2018, "step": 363 }, { "epoch": 0.13577023498694518, "grad_norm": 1.0514347553253174, "learning_rate": 2e-06, "loss": 0.1728, "step": 364 }, { "epoch": 0.1361432301380082, "grad_norm": 0.7831644415855408, "learning_rate": 2e-06, "loss": 0.2217, "step": 365 }, { "epoch": 0.13651622528907123, "grad_norm": 1.0022858381271362, "learning_rate": 2e-06, "loss": 0.2005, "step": 366 }, { "epoch": 0.13688922044013427, "grad_norm": 0.8478943109512329, "learning_rate": 2e-06, "loss": 0.2115, "step": 367 }, { "epoch": 0.1372622155911973, "grad_norm": 0.8771319389343262, "learning_rate": 2e-06, "loss": 0.1929, "step": 368 }, { "epoch": 0.13763521074226034, "grad_norm": 1.2978500127792358, "learning_rate": 2e-06, "loss": 0.2155, "step": 369 }, { "epoch": 0.13800820589332338, "grad_norm": 0.9163507223129272, "learning_rate": 2e-06, "loss": 0.1927, "step": 370 }, { "epoch": 0.13838120104438642, "grad_norm": 1.3140736818313599, "learning_rate": 2e-06, "loss": 0.1925, "step": 371 }, { "epoch": 0.13875419619544946, "grad_norm": 1.2815812826156616, "learning_rate": 2e-06, "loss": 0.2039, "step": 372 }, { "epoch": 0.1391271913465125, "grad_norm": 0.9287624955177307, "learning_rate": 2e-06, "loss": 0.2206, "step": 373 }, { "epoch": 0.13950018649757553, "grad_norm": 0.8873145580291748, "learning_rate": 2e-06, "loss": 0.2316, "step": 374 }, { "epoch": 0.13987318164863857, "grad_norm": 1.0029889345169067, "learning_rate": 2e-06, "loss": 0.2197, "step": 375 }, { "epoch": 0.1402461767997016, "grad_norm": 1.055535912513733, "learning_rate": 2e-06, "loss": 0.1918, "step": 376 }, { "epoch": 0.14061917195076465, "grad_norm": 1.8212354183197021, "learning_rate": 2e-06, "loss": 0.186, "step": 377 }, { "epoch": 0.1409921671018277, "grad_norm": 1.0998896360397339, "learning_rate": 2e-06, "loss": 0.2044, "step": 378 }, { "epoch": 0.14136516225289072, "grad_norm": 0.9196210503578186, "learning_rate": 2e-06, "loss": 0.1923, "step": 379 }, { "epoch": 0.14173815740395374, "grad_norm": 1.0084151029586792, "learning_rate": 2e-06, "loss": 0.2118, "step": 380 }, { "epoch": 0.14211115255501677, "grad_norm": 0.8531040549278259, "learning_rate": 2e-06, "loss": 0.2153, "step": 381 }, { "epoch": 0.1424841477060798, "grad_norm": 0.8971665501594543, "learning_rate": 2e-06, "loss": 0.2061, "step": 382 }, { "epoch": 0.14285714285714285, "grad_norm": 1.0290454626083374, "learning_rate": 2e-06, "loss": 0.2133, "step": 383 }, { "epoch": 0.1432301380082059, "grad_norm": 1.0639463663101196, "learning_rate": 2e-06, "loss": 0.1874, "step": 384 }, { "epoch": 0.14360313315926893, "grad_norm": 1.0288538932800293, "learning_rate": 2e-06, "loss": 0.1966, "step": 385 }, { "epoch": 0.14397612831033196, "grad_norm": 1.0478414297103882, "learning_rate": 2e-06, "loss": 0.2022, "step": 386 }, { "epoch": 0.144349123461395, "grad_norm": 0.830055296421051, "learning_rate": 2e-06, "loss": 0.2169, "step": 387 }, { "epoch": 0.14472211861245804, "grad_norm": 0.7767391204833984, "learning_rate": 2e-06, "loss": 0.2101, "step": 388 }, { "epoch": 0.14509511376352108, "grad_norm": 0.809712827205658, "learning_rate": 2e-06, "loss": 0.19, "step": 389 }, { "epoch": 0.14546810891458412, "grad_norm": 1.040543556213379, "learning_rate": 2e-06, "loss": 0.2146, "step": 390 }, { "epoch": 0.14584110406564715, "grad_norm": 0.9452835321426392, "learning_rate": 2e-06, "loss": 0.2143, "step": 391 }, { "epoch": 0.1462140992167102, "grad_norm": 1.0385925769805908, "learning_rate": 2e-06, "loss": 0.2031, "step": 392 }, { "epoch": 0.14658709436777323, "grad_norm": 0.8741906881332397, "learning_rate": 2e-06, "loss": 0.1996, "step": 393 }, { "epoch": 0.14696008951883627, "grad_norm": 0.9067285656929016, "learning_rate": 2e-06, "loss": 0.2018, "step": 394 }, { "epoch": 0.14733308466989928, "grad_norm": 1.0989304780960083, "learning_rate": 2e-06, "loss": 0.1868, "step": 395 }, { "epoch": 0.14770607982096232, "grad_norm": 1.0696651935577393, "learning_rate": 2e-06, "loss": 0.2026, "step": 396 }, { "epoch": 0.14807907497202535, "grad_norm": 1.0123809576034546, "learning_rate": 2e-06, "loss": 0.2051, "step": 397 }, { "epoch": 0.1484520701230884, "grad_norm": 1.1634759902954102, "learning_rate": 2e-06, "loss": 0.2083, "step": 398 }, { "epoch": 0.14882506527415143, "grad_norm": 1.3225884437561035, "learning_rate": 2e-06, "loss": 0.2044, "step": 399 }, { "epoch": 0.14919806042521447, "grad_norm": 1.022335410118103, "learning_rate": 2e-06, "loss": 0.2083, "step": 400 }, { "epoch": 0.1495710555762775, "grad_norm": 1.3332576751708984, "learning_rate": 2e-06, "loss": 0.1965, "step": 401 }, { "epoch": 0.14994405072734054, "grad_norm": 5.057086944580078, "learning_rate": 2e-06, "loss": 0.1976, "step": 402 }, { "epoch": 0.15031704587840358, "grad_norm": 0.9213835000991821, "learning_rate": 2e-06, "loss": 0.2039, "step": 403 }, { "epoch": 0.15069004102946662, "grad_norm": 1.1638898849487305, "learning_rate": 2e-06, "loss": 0.1898, "step": 404 }, { "epoch": 0.15106303618052966, "grad_norm": 0.8567191362380981, "learning_rate": 2e-06, "loss": 0.2024, "step": 405 }, { "epoch": 0.1514360313315927, "grad_norm": 1.0603688955307007, "learning_rate": 2e-06, "loss": 0.1808, "step": 406 }, { "epoch": 0.15180902648265573, "grad_norm": 1.011190414428711, "learning_rate": 2e-06, "loss": 0.1766, "step": 407 }, { "epoch": 0.15218202163371877, "grad_norm": 0.9938746094703674, "learning_rate": 2e-06, "loss": 0.1971, "step": 408 }, { "epoch": 0.1525550167847818, "grad_norm": 1.1509929895401, "learning_rate": 2e-06, "loss": 0.1908, "step": 409 }, { "epoch": 0.15292801193584482, "grad_norm": 1.0045833587646484, "learning_rate": 2e-06, "loss": 0.2175, "step": 410 }, { "epoch": 0.15330100708690786, "grad_norm": 0.9367875456809998, "learning_rate": 2e-06, "loss": 0.2139, "step": 411 }, { "epoch": 0.1536740022379709, "grad_norm": 0.7698757648468018, "learning_rate": 2e-06, "loss": 0.2228, "step": 412 }, { "epoch": 0.15404699738903394, "grad_norm": 0.9634842872619629, "learning_rate": 2e-06, "loss": 0.2023, "step": 413 }, { "epoch": 0.15441999254009697, "grad_norm": 0.8943634033203125, "learning_rate": 2e-06, "loss": 0.1779, "step": 414 }, { "epoch": 0.15479298769116, "grad_norm": 0.8061760663986206, "learning_rate": 2e-06, "loss": 0.2006, "step": 415 }, { "epoch": 0.15516598284222305, "grad_norm": 0.7390882968902588, "learning_rate": 2e-06, "loss": 0.2019, "step": 416 }, { "epoch": 0.1555389779932861, "grad_norm": 0.9829319715499878, "learning_rate": 2e-06, "loss": 0.2005, "step": 417 }, { "epoch": 0.15591197314434913, "grad_norm": 0.8578018546104431, "learning_rate": 2e-06, "loss": 0.2029, "step": 418 }, { "epoch": 0.15628496829541216, "grad_norm": 0.7973668575286865, "learning_rate": 2e-06, "loss": 0.226, "step": 419 }, { "epoch": 0.1566579634464752, "grad_norm": 0.9900999665260315, "learning_rate": 2e-06, "loss": 0.2136, "step": 420 }, { "epoch": 0.15703095859753824, "grad_norm": 1.242411732673645, "learning_rate": 2e-06, "loss": 0.201, "step": 421 }, { "epoch": 0.15740395374860128, "grad_norm": 1.093449354171753, "learning_rate": 2e-06, "loss": 0.1798, "step": 422 }, { "epoch": 0.15777694889966432, "grad_norm": 1.217016577720642, "learning_rate": 2e-06, "loss": 0.1878, "step": 423 }, { "epoch": 0.15814994405072735, "grad_norm": 0.7123441100120544, "learning_rate": 2e-06, "loss": 0.218, "step": 424 }, { "epoch": 0.15852293920179036, "grad_norm": 1.2584582567214966, "learning_rate": 2e-06, "loss": 0.1744, "step": 425 }, { "epoch": 0.1588959343528534, "grad_norm": 1.7297847270965576, "learning_rate": 2e-06, "loss": 0.2087, "step": 426 }, { "epoch": 0.15926892950391644, "grad_norm": 0.8605469465255737, "learning_rate": 2e-06, "loss": 0.2032, "step": 427 }, { "epoch": 0.15964192465497948, "grad_norm": 1.129231333732605, "learning_rate": 2e-06, "loss": 0.1989, "step": 428 }, { "epoch": 0.16001491980604252, "grad_norm": 1.1401667594909668, "learning_rate": 2e-06, "loss": 0.202, "step": 429 }, { "epoch": 0.16038791495710555, "grad_norm": 1.1294167041778564, "learning_rate": 2e-06, "loss": 0.1792, "step": 430 }, { "epoch": 0.1607609101081686, "grad_norm": 0.9822379946708679, "learning_rate": 2e-06, "loss": 0.1984, "step": 431 }, { "epoch": 0.16113390525923163, "grad_norm": 1.0212706327438354, "learning_rate": 2e-06, "loss": 0.2066, "step": 432 }, { "epoch": 0.16150690041029467, "grad_norm": 1.2711808681488037, "learning_rate": 2e-06, "loss": 0.1956, "step": 433 }, { "epoch": 0.1618798955613577, "grad_norm": 0.8436346054077148, "learning_rate": 2e-06, "loss": 0.2074, "step": 434 }, { "epoch": 0.16225289071242074, "grad_norm": 1.0892198085784912, "learning_rate": 2e-06, "loss": 0.2054, "step": 435 }, { "epoch": 0.16262588586348378, "grad_norm": 0.8327460885047913, "learning_rate": 2e-06, "loss": 0.2131, "step": 436 }, { "epoch": 0.16299888101454682, "grad_norm": 0.9754176139831543, "learning_rate": 2e-06, "loss": 0.1807, "step": 437 }, { "epoch": 0.16337187616560986, "grad_norm": 1.2283945083618164, "learning_rate": 2e-06, "loss": 0.1829, "step": 438 }, { "epoch": 0.1637448713166729, "grad_norm": 1.3499023914337158, "learning_rate": 2e-06, "loss": 0.2151, "step": 439 }, { "epoch": 0.1641178664677359, "grad_norm": 1.355258822441101, "learning_rate": 2e-06, "loss": 0.1934, "step": 440 }, { "epoch": 0.16449086161879894, "grad_norm": 0.8649986982345581, "learning_rate": 2e-06, "loss": 0.2137, "step": 441 }, { "epoch": 0.16486385676986198, "grad_norm": 0.8423401117324829, "learning_rate": 2e-06, "loss": 0.1916, "step": 442 }, { "epoch": 0.16523685192092502, "grad_norm": 0.8926856517791748, "learning_rate": 2e-06, "loss": 0.194, "step": 443 }, { "epoch": 0.16560984707198806, "grad_norm": 1.064077377319336, "learning_rate": 2e-06, "loss": 0.1961, "step": 444 }, { "epoch": 0.1659828422230511, "grad_norm": 0.9314874410629272, "learning_rate": 2e-06, "loss": 0.1995, "step": 445 }, { "epoch": 0.16635583737411413, "grad_norm": 1.0773807764053345, "learning_rate": 2e-06, "loss": 0.223, "step": 446 }, { "epoch": 0.16672883252517717, "grad_norm": 1.1482757329940796, "learning_rate": 2e-06, "loss": 0.1895, "step": 447 }, { "epoch": 0.1671018276762402, "grad_norm": 0.8492816686630249, "learning_rate": 2e-06, "loss": 0.2021, "step": 448 }, { "epoch": 0.16747482282730325, "grad_norm": 1.0005009174346924, "learning_rate": 2e-06, "loss": 0.1957, "step": 449 }, { "epoch": 0.1678478179783663, "grad_norm": 1.3120653629302979, "learning_rate": 2e-06, "loss": 0.2192, "step": 450 }, { "epoch": 0.16822081312942933, "grad_norm": 1.2881848812103271, "learning_rate": 2e-06, "loss": 0.2185, "step": 451 }, { "epoch": 0.16859380828049236, "grad_norm": 0.8801202774047852, "learning_rate": 2e-06, "loss": 0.1944, "step": 452 }, { "epoch": 0.1689668034315554, "grad_norm": 1.226344108581543, "learning_rate": 2e-06, "loss": 0.1839, "step": 453 }, { "epoch": 0.16933979858261844, "grad_norm": 1.1045247316360474, "learning_rate": 2e-06, "loss": 0.1741, "step": 454 }, { "epoch": 0.16971279373368145, "grad_norm": 0.8277959227561951, "learning_rate": 2e-06, "loss": 0.2078, "step": 455 }, { "epoch": 0.1700857888847445, "grad_norm": 1.1679118871688843, "learning_rate": 2e-06, "loss": 0.1912, "step": 456 }, { "epoch": 0.17045878403580753, "grad_norm": 1.4183374643325806, "learning_rate": 2e-06, "loss": 0.1822, "step": 457 }, { "epoch": 0.17083177918687056, "grad_norm": 0.9129360914230347, "learning_rate": 2e-06, "loss": 0.1917, "step": 458 }, { "epoch": 0.1712047743379336, "grad_norm": 1.1617270708084106, "learning_rate": 2e-06, "loss": 0.1814, "step": 459 }, { "epoch": 0.17157776948899664, "grad_norm": 1.0721662044525146, "learning_rate": 2e-06, "loss": 0.2075, "step": 460 }, { "epoch": 0.17195076464005968, "grad_norm": 0.9300745129585266, "learning_rate": 2e-06, "loss": 0.1945, "step": 461 }, { "epoch": 0.17232375979112272, "grad_norm": 1.2440983057022095, "learning_rate": 2e-06, "loss": 0.2046, "step": 462 }, { "epoch": 0.17269675494218575, "grad_norm": 0.9573012590408325, "learning_rate": 2e-06, "loss": 0.1942, "step": 463 }, { "epoch": 0.1730697500932488, "grad_norm": 1.0386253595352173, "learning_rate": 2e-06, "loss": 0.2176, "step": 464 }, { "epoch": 0.17344274524431183, "grad_norm": 1.0358246564865112, "learning_rate": 2e-06, "loss": 0.1857, "step": 465 }, { "epoch": 0.17381574039537487, "grad_norm": 1.2177265882492065, "learning_rate": 2e-06, "loss": 0.2108, "step": 466 }, { "epoch": 0.1741887355464379, "grad_norm": 0.9553227424621582, "learning_rate": 2e-06, "loss": 0.1907, "step": 467 }, { "epoch": 0.17456173069750094, "grad_norm": 1.0858536958694458, "learning_rate": 2e-06, "loss": 0.1915, "step": 468 }, { "epoch": 0.17493472584856398, "grad_norm": 1.239364743232727, "learning_rate": 2e-06, "loss": 0.1784, "step": 469 }, { "epoch": 0.175307720999627, "grad_norm": 1.6808676719665527, "learning_rate": 2e-06, "loss": 0.1938, "step": 470 }, { "epoch": 0.17568071615069003, "grad_norm": 0.7831138372421265, "learning_rate": 2e-06, "loss": 0.2147, "step": 471 }, { "epoch": 0.17605371130175307, "grad_norm": 0.8647436499595642, "learning_rate": 2e-06, "loss": 0.1916, "step": 472 }, { "epoch": 0.1764267064528161, "grad_norm": 1.3220148086547852, "learning_rate": 2e-06, "loss": 0.1805, "step": 473 }, { "epoch": 0.17679970160387914, "grad_norm": 0.8763908743858337, "learning_rate": 2e-06, "loss": 0.21, "step": 474 }, { "epoch": 0.17717269675494218, "grad_norm": 1.006888747215271, "learning_rate": 2e-06, "loss": 0.189, "step": 475 }, { "epoch": 0.17754569190600522, "grad_norm": 1.0888190269470215, "learning_rate": 2e-06, "loss": 0.1836, "step": 476 }, { "epoch": 0.17791868705706826, "grad_norm": 0.8511141538619995, "learning_rate": 2e-06, "loss": 0.2168, "step": 477 }, { "epoch": 0.1782916822081313, "grad_norm": 0.9532693028450012, "learning_rate": 2e-06, "loss": 0.2182, "step": 478 }, { "epoch": 0.17866467735919433, "grad_norm": 0.8835806846618652, "learning_rate": 2e-06, "loss": 0.2271, "step": 479 }, { "epoch": 0.17903767251025737, "grad_norm": 0.7130380272865295, "learning_rate": 2e-06, "loss": 0.1989, "step": 480 }, { "epoch": 0.1794106676613204, "grad_norm": 0.9246224761009216, "learning_rate": 2e-06, "loss": 0.1935, "step": 481 }, { "epoch": 0.17978366281238345, "grad_norm": 1.0340907573699951, "learning_rate": 2e-06, "loss": 0.1784, "step": 482 }, { "epoch": 0.1801566579634465, "grad_norm": 0.8822704553604126, "learning_rate": 2e-06, "loss": 0.2151, "step": 483 }, { "epoch": 0.18052965311450953, "grad_norm": 1.3958379030227661, "learning_rate": 2e-06, "loss": 0.1972, "step": 484 }, { "epoch": 0.18090264826557254, "grad_norm": 1.0920135974884033, "learning_rate": 2e-06, "loss": 0.1961, "step": 485 }, { "epoch": 0.18127564341663557, "grad_norm": 1.0873379707336426, "learning_rate": 2e-06, "loss": 0.2151, "step": 486 }, { "epoch": 0.1816486385676986, "grad_norm": 0.95895916223526, "learning_rate": 2e-06, "loss": 0.2081, "step": 487 }, { "epoch": 0.18202163371876165, "grad_norm": 0.8082234859466553, "learning_rate": 2e-06, "loss": 0.2218, "step": 488 }, { "epoch": 0.1823946288698247, "grad_norm": 0.9949958920478821, "learning_rate": 2e-06, "loss": 0.2116, "step": 489 }, { "epoch": 0.18276762402088773, "grad_norm": 1.1207650899887085, "learning_rate": 2e-06, "loss": 0.2132, "step": 490 }, { "epoch": 0.18314061917195076, "grad_norm": 1.158165454864502, "learning_rate": 2e-06, "loss": 0.1809, "step": 491 }, { "epoch": 0.1835136143230138, "grad_norm": 0.7989445328712463, "learning_rate": 2e-06, "loss": 0.229, "step": 492 }, { "epoch": 0.18388660947407684, "grad_norm": 0.8534945249557495, "learning_rate": 2e-06, "loss": 0.1842, "step": 493 }, { "epoch": 0.18425960462513988, "grad_norm": 0.9253622889518738, "learning_rate": 2e-06, "loss": 0.2285, "step": 494 }, { "epoch": 0.18463259977620292, "grad_norm": 0.942074179649353, "learning_rate": 2e-06, "loss": 0.216, "step": 495 }, { "epoch": 0.18500559492726595, "grad_norm": 0.8206772208213806, "learning_rate": 2e-06, "loss": 0.2222, "step": 496 }, { "epoch": 0.185378590078329, "grad_norm": 0.8780680894851685, "learning_rate": 2e-06, "loss": 0.2227, "step": 497 }, { "epoch": 0.18575158522939203, "grad_norm": 0.7887238264083862, "learning_rate": 2e-06, "loss": 0.2002, "step": 498 }, { "epoch": 0.18612458038045504, "grad_norm": 0.93376624584198, "learning_rate": 2e-06, "loss": 0.1902, "step": 499 }, { "epoch": 0.18649757553151808, "grad_norm": 1.2919833660125732, "learning_rate": 2e-06, "loss": 0.2185, "step": 500 }, { "epoch": 0.18687057068258112, "grad_norm": 0.8931835293769836, "learning_rate": 2e-06, "loss": 0.2183, "step": 501 }, { "epoch": 0.18724356583364415, "grad_norm": 0.9536945223808289, "learning_rate": 2e-06, "loss": 0.2095, "step": 502 }, { "epoch": 0.1876165609847072, "grad_norm": 1.1308852434158325, "learning_rate": 2e-06, "loss": 0.189, "step": 503 }, { "epoch": 0.18798955613577023, "grad_norm": 1.1296603679656982, "learning_rate": 2e-06, "loss": 0.1827, "step": 504 }, { "epoch": 0.18836255128683327, "grad_norm": 0.7937658429145813, "learning_rate": 2e-06, "loss": 0.2248, "step": 505 }, { "epoch": 0.1887355464378963, "grad_norm": 1.3925045728683472, "learning_rate": 2e-06, "loss": 0.1879, "step": 506 }, { "epoch": 0.18910854158895934, "grad_norm": 1.1175990104675293, "learning_rate": 2e-06, "loss": 0.2025, "step": 507 }, { "epoch": 0.18948153674002238, "grad_norm": 1.5032963752746582, "learning_rate": 2e-06, "loss": 0.1968, "step": 508 }, { "epoch": 0.18985453189108542, "grad_norm": 0.9408325552940369, "learning_rate": 2e-06, "loss": 0.2051, "step": 509 }, { "epoch": 0.19022752704214846, "grad_norm": 1.0905200242996216, "learning_rate": 2e-06, "loss": 0.2018, "step": 510 }, { "epoch": 0.1906005221932115, "grad_norm": 1.2797642946243286, "learning_rate": 2e-06, "loss": 0.2028, "step": 511 }, { "epoch": 0.19097351734427453, "grad_norm": 0.9551194906234741, "learning_rate": 2e-06, "loss": 0.2275, "step": 512 }, { "epoch": 0.19134651249533757, "grad_norm": 0.9359089136123657, "learning_rate": 2e-06, "loss": 0.1914, "step": 513 }, { "epoch": 0.19171950764640058, "grad_norm": 0.730552613735199, "learning_rate": 2e-06, "loss": 0.2097, "step": 514 }, { "epoch": 0.19209250279746362, "grad_norm": 1.0773857831954956, "learning_rate": 2e-06, "loss": 0.2035, "step": 515 }, { "epoch": 0.19246549794852666, "grad_norm": 0.9647989273071289, "learning_rate": 2e-06, "loss": 0.1872, "step": 516 }, { "epoch": 0.1928384930995897, "grad_norm": 0.9264434576034546, "learning_rate": 2e-06, "loss": 0.2042, "step": 517 }, { "epoch": 0.19321148825065274, "grad_norm": 0.8714203238487244, "learning_rate": 2e-06, "loss": 0.2041, "step": 518 }, { "epoch": 0.19358448340171577, "grad_norm": 1.430816888809204, "learning_rate": 2e-06, "loss": 0.2172, "step": 519 }, { "epoch": 0.1939574785527788, "grad_norm": 1.0130763053894043, "learning_rate": 2e-06, "loss": 0.2006, "step": 520 }, { "epoch": 0.19433047370384185, "grad_norm": 0.8605667948722839, "learning_rate": 2e-06, "loss": 0.1878, "step": 521 }, { "epoch": 0.1947034688549049, "grad_norm": 1.424079418182373, "learning_rate": 2e-06, "loss": 0.1816, "step": 522 }, { "epoch": 0.19507646400596793, "grad_norm": 0.9603455662727356, "learning_rate": 2e-06, "loss": 0.2111, "step": 523 }, { "epoch": 0.19544945915703096, "grad_norm": 1.2314612865447998, "learning_rate": 2e-06, "loss": 0.1949, "step": 524 }, { "epoch": 0.195822454308094, "grad_norm": 0.8565496802330017, "learning_rate": 2e-06, "loss": 0.2142, "step": 525 }, { "epoch": 0.19619544945915704, "grad_norm": 0.9405530095100403, "learning_rate": 2e-06, "loss": 0.1706, "step": 526 }, { "epoch": 0.19656844461022008, "grad_norm": 0.7931005954742432, "learning_rate": 2e-06, "loss": 0.1805, "step": 527 }, { "epoch": 0.19694143976128312, "grad_norm": 1.0556527376174927, "learning_rate": 2e-06, "loss": 0.1975, "step": 528 }, { "epoch": 0.19731443491234613, "grad_norm": 0.9323137998580933, "learning_rate": 2e-06, "loss": 0.2107, "step": 529 }, { "epoch": 0.19768743006340916, "grad_norm": 0.8173279762268066, "learning_rate": 2e-06, "loss": 0.2104, "step": 530 }, { "epoch": 0.1980604252144722, "grad_norm": 0.9454634189605713, "learning_rate": 2e-06, "loss": 0.2072, "step": 531 }, { "epoch": 0.19843342036553524, "grad_norm": 0.8415260910987854, "learning_rate": 2e-06, "loss": 0.2258, "step": 532 }, { "epoch": 0.19880641551659828, "grad_norm": 0.8049208521842957, "learning_rate": 2e-06, "loss": 0.2402, "step": 533 }, { "epoch": 0.19917941066766132, "grad_norm": 0.7870807647705078, "learning_rate": 2e-06, "loss": 0.218, "step": 534 }, { "epoch": 0.19955240581872435, "grad_norm": 1.2361797094345093, "learning_rate": 2e-06, "loss": 0.1964, "step": 535 }, { "epoch": 0.1999254009697874, "grad_norm": 0.7926539182662964, "learning_rate": 2e-06, "loss": 0.2118, "step": 536 }, { "epoch": 0.20029839612085043, "grad_norm": 1.1509639024734497, "learning_rate": 2e-06, "loss": 0.2165, "step": 537 }, { "epoch": 0.20067139127191347, "grad_norm": 0.8690283298492432, "learning_rate": 2e-06, "loss": 0.2257, "step": 538 }, { "epoch": 0.2010443864229765, "grad_norm": 1.1114712953567505, "learning_rate": 2e-06, "loss": 0.1888, "step": 539 }, { "epoch": 0.20141738157403954, "grad_norm": 1.102615475654602, "learning_rate": 2e-06, "loss": 0.1955, "step": 540 }, { "epoch": 0.20179037672510258, "grad_norm": 0.9116284251213074, "learning_rate": 2e-06, "loss": 0.1964, "step": 541 }, { "epoch": 0.20216337187616562, "grad_norm": 0.9335741996765137, "learning_rate": 2e-06, "loss": 0.1881, "step": 542 }, { "epoch": 0.20253636702722866, "grad_norm": 0.8879089951515198, "learning_rate": 2e-06, "loss": 0.2305, "step": 543 }, { "epoch": 0.20290936217829167, "grad_norm": 0.9800735712051392, "learning_rate": 2e-06, "loss": 0.1809, "step": 544 }, { "epoch": 0.2032823573293547, "grad_norm": 0.7688462138175964, "learning_rate": 2e-06, "loss": 0.186, "step": 545 }, { "epoch": 0.20365535248041775, "grad_norm": 1.2997785806655884, "learning_rate": 2e-06, "loss": 0.2027, "step": 546 }, { "epoch": 0.20402834763148078, "grad_norm": 0.8497205376625061, "learning_rate": 2e-06, "loss": 0.2152, "step": 547 }, { "epoch": 0.20440134278254382, "grad_norm": 1.2752513885498047, "learning_rate": 2e-06, "loss": 0.169, "step": 548 }, { "epoch": 0.20477433793360686, "grad_norm": 0.9009093642234802, "learning_rate": 2e-06, "loss": 0.2079, "step": 549 }, { "epoch": 0.2051473330846699, "grad_norm": 1.0766544342041016, "learning_rate": 2e-06, "loss": 0.1869, "step": 550 }, { "epoch": 0.20552032823573294, "grad_norm": 1.1396617889404297, "learning_rate": 2e-06, "loss": 0.1827, "step": 551 }, { "epoch": 0.20589332338679597, "grad_norm": 1.1221847534179688, "learning_rate": 2e-06, "loss": 0.2049, "step": 552 }, { "epoch": 0.206266318537859, "grad_norm": 1.0023646354675293, "learning_rate": 2e-06, "loss": 0.2064, "step": 553 }, { "epoch": 0.20663931368892205, "grad_norm": 0.8073069453239441, "learning_rate": 2e-06, "loss": 0.2006, "step": 554 }, { "epoch": 0.2070123088399851, "grad_norm": 1.0152326822280884, "learning_rate": 2e-06, "loss": 0.2393, "step": 555 }, { "epoch": 0.20738530399104813, "grad_norm": 1.2739393711090088, "learning_rate": 2e-06, "loss": 0.2048, "step": 556 }, { "epoch": 0.20775829914211116, "grad_norm": 0.860404372215271, "learning_rate": 2e-06, "loss": 0.2123, "step": 557 }, { "epoch": 0.2081312942931742, "grad_norm": 1.1002343893051147, "learning_rate": 2e-06, "loss": 0.1857, "step": 558 }, { "epoch": 0.2085042894442372, "grad_norm": 0.8319405913352966, "learning_rate": 2e-06, "loss": 0.205, "step": 559 }, { "epoch": 0.20887728459530025, "grad_norm": 0.8396271467208862, "learning_rate": 2e-06, "loss": 0.2098, "step": 560 }, { "epoch": 0.2092502797463633, "grad_norm": 0.8502417802810669, "learning_rate": 2e-06, "loss": 0.2033, "step": 561 }, { "epoch": 0.20962327489742633, "grad_norm": 0.9166382551193237, "learning_rate": 2e-06, "loss": 0.2129, "step": 562 }, { "epoch": 0.20999627004848936, "grad_norm": 0.941150963306427, "learning_rate": 2e-06, "loss": 0.2272, "step": 563 }, { "epoch": 0.2103692651995524, "grad_norm": 0.8455848693847656, "learning_rate": 2e-06, "loss": 0.2394, "step": 564 }, { "epoch": 0.21074226035061544, "grad_norm": 1.1626328229904175, "learning_rate": 2e-06, "loss": 0.2246, "step": 565 }, { "epoch": 0.21111525550167848, "grad_norm": 1.186963677406311, "learning_rate": 2e-06, "loss": 0.1958, "step": 566 }, { "epoch": 0.21148825065274152, "grad_norm": 0.9363473057746887, "learning_rate": 2e-06, "loss": 0.2158, "step": 567 }, { "epoch": 0.21186124580380455, "grad_norm": 0.790198564529419, "learning_rate": 2e-06, "loss": 0.2023, "step": 568 }, { "epoch": 0.2122342409548676, "grad_norm": 1.0179481506347656, "learning_rate": 2e-06, "loss": 0.1964, "step": 569 }, { "epoch": 0.21260723610593063, "grad_norm": 0.8688399791717529, "learning_rate": 2e-06, "loss": 0.2062, "step": 570 }, { "epoch": 0.21298023125699367, "grad_norm": 0.8520952463150024, "learning_rate": 2e-06, "loss": 0.1839, "step": 571 }, { "epoch": 0.2133532264080567, "grad_norm": 1.2470297813415527, "learning_rate": 2e-06, "loss": 0.184, "step": 572 }, { "epoch": 0.21372622155911974, "grad_norm": 1.097622275352478, "learning_rate": 2e-06, "loss": 0.2103, "step": 573 }, { "epoch": 0.21409921671018275, "grad_norm": 0.7465157508850098, "learning_rate": 2e-06, "loss": 0.2198, "step": 574 }, { "epoch": 0.2144722118612458, "grad_norm": 0.761018693447113, "learning_rate": 2e-06, "loss": 0.1891, "step": 575 }, { "epoch": 0.21484520701230883, "grad_norm": 0.9405248761177063, "learning_rate": 2e-06, "loss": 0.181, "step": 576 }, { "epoch": 0.21521820216337187, "grad_norm": 0.8489209413528442, "learning_rate": 2e-06, "loss": 0.2058, "step": 577 }, { "epoch": 0.2155911973144349, "grad_norm": 0.9635956287384033, "learning_rate": 2e-06, "loss": 0.1777, "step": 578 }, { "epoch": 0.21596419246549795, "grad_norm": 1.130800724029541, "learning_rate": 2e-06, "loss": 0.1923, "step": 579 }, { "epoch": 0.21633718761656098, "grad_norm": 0.9505035281181335, "learning_rate": 2e-06, "loss": 0.1846, "step": 580 }, { "epoch": 0.21671018276762402, "grad_norm": 1.020970344543457, "learning_rate": 2e-06, "loss": 0.1848, "step": 581 }, { "epoch": 0.21708317791868706, "grad_norm": 1.8622032403945923, "learning_rate": 2e-06, "loss": 0.2164, "step": 582 }, { "epoch": 0.2174561730697501, "grad_norm": 1.2388616800308228, "learning_rate": 2e-06, "loss": 0.196, "step": 583 }, { "epoch": 0.21782916822081314, "grad_norm": 0.7878226041793823, "learning_rate": 2e-06, "loss": 0.2213, "step": 584 }, { "epoch": 0.21820216337187617, "grad_norm": 1.0986346006393433, "learning_rate": 2e-06, "loss": 0.2082, "step": 585 }, { "epoch": 0.2185751585229392, "grad_norm": 0.8043391704559326, "learning_rate": 2e-06, "loss": 0.1983, "step": 586 }, { "epoch": 0.21894815367400225, "grad_norm": 1.4239261150360107, "learning_rate": 2e-06, "loss": 0.2042, "step": 587 }, { "epoch": 0.2193211488250653, "grad_norm": 0.9492800235748291, "learning_rate": 2e-06, "loss": 0.2118, "step": 588 }, { "epoch": 0.2196941439761283, "grad_norm": 0.8755353689193726, "learning_rate": 2e-06, "loss": 0.2066, "step": 589 }, { "epoch": 0.22006713912719134, "grad_norm": 0.8456981182098389, "learning_rate": 2e-06, "loss": 0.2153, "step": 590 }, { "epoch": 0.22044013427825437, "grad_norm": 1.4854273796081543, "learning_rate": 2e-06, "loss": 0.2187, "step": 591 }, { "epoch": 0.2208131294293174, "grad_norm": 1.0030053853988647, "learning_rate": 2e-06, "loss": 0.1993, "step": 592 }, { "epoch": 0.22118612458038045, "grad_norm": 1.040297031402588, "learning_rate": 2e-06, "loss": 0.2053, "step": 593 }, { "epoch": 0.2215591197314435, "grad_norm": 1.661920189857483, "learning_rate": 2e-06, "loss": 0.1701, "step": 594 }, { "epoch": 0.22193211488250653, "grad_norm": 1.1722215414047241, "learning_rate": 2e-06, "loss": 0.1822, "step": 595 }, { "epoch": 0.22230511003356956, "grad_norm": 0.9399772882461548, "learning_rate": 2e-06, "loss": 0.2087, "step": 596 }, { "epoch": 0.2226781051846326, "grad_norm": 1.38283109664917, "learning_rate": 2e-06, "loss": 0.1963, "step": 597 }, { "epoch": 0.22305110033569564, "grad_norm": 0.8309964537620544, "learning_rate": 2e-06, "loss": 0.2207, "step": 598 }, { "epoch": 0.22342409548675868, "grad_norm": 1.099584698677063, "learning_rate": 2e-06, "loss": 0.2181, "step": 599 }, { "epoch": 0.22379709063782172, "grad_norm": 1.0178828239440918, "learning_rate": 2e-06, "loss": 0.2011, "step": 600 }, { "epoch": 0.22417008578888475, "grad_norm": 1.2686821222305298, "learning_rate": 2e-06, "loss": 0.1921, "step": 601 }, { "epoch": 0.2245430809399478, "grad_norm": 0.9977150559425354, "learning_rate": 2e-06, "loss": 0.2102, "step": 602 }, { "epoch": 0.22491607609101083, "grad_norm": 1.1901379823684692, "learning_rate": 2e-06, "loss": 0.2088, "step": 603 }, { "epoch": 0.22528907124207384, "grad_norm": 0.932580828666687, "learning_rate": 2e-06, "loss": 0.1869, "step": 604 }, { "epoch": 0.22566206639313688, "grad_norm": 0.959183931350708, "learning_rate": 2e-06, "loss": 0.1593, "step": 605 }, { "epoch": 0.22603506154419992, "grad_norm": 1.0313653945922852, "learning_rate": 2e-06, "loss": 0.1984, "step": 606 }, { "epoch": 0.22640805669526295, "grad_norm": 1.4330129623413086, "learning_rate": 2e-06, "loss": 0.1829, "step": 607 }, { "epoch": 0.226781051846326, "grad_norm": 0.8622726202011108, "learning_rate": 2e-06, "loss": 0.2286, "step": 608 }, { "epoch": 0.22715404699738903, "grad_norm": 1.2492940425872803, "learning_rate": 2e-06, "loss": 0.197, "step": 609 }, { "epoch": 0.22752704214845207, "grad_norm": 1.4998513460159302, "learning_rate": 2e-06, "loss": 0.2009, "step": 610 }, { "epoch": 0.2279000372995151, "grad_norm": 0.6814820170402527, "learning_rate": 2e-06, "loss": 0.2035, "step": 611 }, { "epoch": 0.22827303245057814, "grad_norm": 1.134840726852417, "learning_rate": 2e-06, "loss": 0.1967, "step": 612 }, { "epoch": 0.22864602760164118, "grad_norm": 1.2586655616760254, "learning_rate": 2e-06, "loss": 0.1993, "step": 613 }, { "epoch": 0.22901902275270422, "grad_norm": 0.9301605224609375, "learning_rate": 2e-06, "loss": 0.2099, "step": 614 }, { "epoch": 0.22939201790376726, "grad_norm": 0.8654438257217407, "learning_rate": 2e-06, "loss": 0.1794, "step": 615 }, { "epoch": 0.2297650130548303, "grad_norm": 1.4498627185821533, "learning_rate": 2e-06, "loss": 0.2015, "step": 616 }, { "epoch": 0.23013800820589334, "grad_norm": 1.1193093061447144, "learning_rate": 2e-06, "loss": 0.2039, "step": 617 }, { "epoch": 0.23051100335695637, "grad_norm": 1.0976874828338623, "learning_rate": 2e-06, "loss": 0.184, "step": 618 }, { "epoch": 0.23088399850801938, "grad_norm": 0.928781270980835, "learning_rate": 2e-06, "loss": 0.2194, "step": 619 }, { "epoch": 0.23125699365908242, "grad_norm": 0.7948481440544128, "learning_rate": 2e-06, "loss": 0.2333, "step": 620 }, { "epoch": 0.23162998881014546, "grad_norm": 0.946725070476532, "learning_rate": 2e-06, "loss": 0.2014, "step": 621 }, { "epoch": 0.2320029839612085, "grad_norm": 1.0384751558303833, "learning_rate": 2e-06, "loss": 0.215, "step": 622 }, { "epoch": 0.23237597911227154, "grad_norm": 0.9687400460243225, "learning_rate": 2e-06, "loss": 0.2005, "step": 623 }, { "epoch": 0.23274897426333457, "grad_norm": 1.2368228435516357, "learning_rate": 2e-06, "loss": 0.1903, "step": 624 }, { "epoch": 0.2331219694143976, "grad_norm": 1.2837542295455933, "learning_rate": 2e-06, "loss": 0.2189, "step": 625 }, { "epoch": 0.23349496456546065, "grad_norm": 1.0219788551330566, "learning_rate": 2e-06, "loss": 0.2008, "step": 626 }, { "epoch": 0.2338679597165237, "grad_norm": 1.0392464399337769, "learning_rate": 2e-06, "loss": 0.1927, "step": 627 }, { "epoch": 0.23424095486758673, "grad_norm": 0.7776193618774414, "learning_rate": 2e-06, "loss": 0.186, "step": 628 }, { "epoch": 0.23461395001864976, "grad_norm": 0.8217248916625977, "learning_rate": 2e-06, "loss": 0.216, "step": 629 }, { "epoch": 0.2349869451697128, "grad_norm": 0.7631544470787048, "learning_rate": 2e-06, "loss": 0.2086, "step": 630 }, { "epoch": 0.23535994032077584, "grad_norm": 0.7680940628051758, "learning_rate": 2e-06, "loss": 0.1781, "step": 631 }, { "epoch": 0.23573293547183888, "grad_norm": 0.9548686742782593, "learning_rate": 2e-06, "loss": 0.1819, "step": 632 }, { "epoch": 0.23610593062290192, "grad_norm": 0.8924967050552368, "learning_rate": 2e-06, "loss": 0.1917, "step": 633 }, { "epoch": 0.23647892577396493, "grad_norm": 0.9398453235626221, "learning_rate": 2e-06, "loss": 0.2068, "step": 634 }, { "epoch": 0.23685192092502796, "grad_norm": 1.2863820791244507, "learning_rate": 2e-06, "loss": 0.1893, "step": 635 }, { "epoch": 0.237224916076091, "grad_norm": 1.330156922340393, "learning_rate": 2e-06, "loss": 0.1667, "step": 636 }, { "epoch": 0.23759791122715404, "grad_norm": 0.9352864027023315, "learning_rate": 2e-06, "loss": 0.2056, "step": 637 }, { "epoch": 0.23797090637821708, "grad_norm": 0.8858610987663269, "learning_rate": 2e-06, "loss": 0.2041, "step": 638 }, { "epoch": 0.23834390152928012, "grad_norm": 1.010868787765503, "learning_rate": 2e-06, "loss": 0.2038, "step": 639 }, { "epoch": 0.23871689668034315, "grad_norm": 1.1588542461395264, "learning_rate": 2e-06, "loss": 0.18, "step": 640 }, { "epoch": 0.2390898918314062, "grad_norm": 0.9710838198661804, "learning_rate": 2e-06, "loss": 0.189, "step": 641 }, { "epoch": 0.23946288698246923, "grad_norm": 1.1014474630355835, "learning_rate": 2e-06, "loss": 0.2026, "step": 642 }, { "epoch": 0.23983588213353227, "grad_norm": 0.9208314418792725, "learning_rate": 2e-06, "loss": 0.2119, "step": 643 }, { "epoch": 0.2402088772845953, "grad_norm": 1.0527114868164062, "learning_rate": 2e-06, "loss": 0.1888, "step": 644 }, { "epoch": 0.24058187243565834, "grad_norm": 1.2152858972549438, "learning_rate": 2e-06, "loss": 0.2074, "step": 645 }, { "epoch": 0.24095486758672138, "grad_norm": 1.4974337816238403, "learning_rate": 2e-06, "loss": 0.1924, "step": 646 }, { "epoch": 0.24132786273778442, "grad_norm": 1.1860182285308838, "learning_rate": 2e-06, "loss": 0.1733, "step": 647 }, { "epoch": 0.24170085788884743, "grad_norm": 0.9891045689582825, "learning_rate": 2e-06, "loss": 0.1822, "step": 648 }, { "epoch": 0.24207385303991047, "grad_norm": 1.285539984703064, "learning_rate": 2e-06, "loss": 0.1835, "step": 649 }, { "epoch": 0.2424468481909735, "grad_norm": 0.8355045318603516, "learning_rate": 2e-06, "loss": 0.2008, "step": 650 }, { "epoch": 0.24281984334203655, "grad_norm": 1.0089906454086304, "learning_rate": 2e-06, "loss": 0.1868, "step": 651 }, { "epoch": 0.24319283849309958, "grad_norm": 1.01986563205719, "learning_rate": 2e-06, "loss": 0.1848, "step": 652 }, { "epoch": 0.24356583364416262, "grad_norm": 1.5180487632751465, "learning_rate": 2e-06, "loss": 0.2057, "step": 653 }, { "epoch": 0.24393882879522566, "grad_norm": 1.1425895690917969, "learning_rate": 2e-06, "loss": 0.2232, "step": 654 }, { "epoch": 0.2443118239462887, "grad_norm": 0.9315125942230225, "learning_rate": 2e-06, "loss": 0.1924, "step": 655 }, { "epoch": 0.24468481909735174, "grad_norm": 0.8382992148399353, "learning_rate": 2e-06, "loss": 0.2056, "step": 656 }, { "epoch": 0.24505781424841477, "grad_norm": 1.0494027137756348, "learning_rate": 2e-06, "loss": 0.2019, "step": 657 }, { "epoch": 0.2454308093994778, "grad_norm": 1.0728511810302734, "learning_rate": 2e-06, "loss": 0.1873, "step": 658 }, { "epoch": 0.24580380455054085, "grad_norm": 0.8401355147361755, "learning_rate": 2e-06, "loss": 0.212, "step": 659 }, { "epoch": 0.2461767997016039, "grad_norm": 1.0953620672225952, "learning_rate": 2e-06, "loss": 0.1831, "step": 660 }, { "epoch": 0.24654979485266693, "grad_norm": 1.2292126417160034, "learning_rate": 2e-06, "loss": 0.1978, "step": 661 }, { "epoch": 0.24692279000372996, "grad_norm": 1.627443790435791, "learning_rate": 2e-06, "loss": 0.1901, "step": 662 }, { "epoch": 0.24729578515479297, "grad_norm": 1.1579126119613647, "learning_rate": 2e-06, "loss": 0.1804, "step": 663 }, { "epoch": 0.247668780305856, "grad_norm": 1.0369088649749756, "learning_rate": 2e-06, "loss": 0.1766, "step": 664 }, { "epoch": 0.24804177545691905, "grad_norm": 0.8188715577125549, "learning_rate": 2e-06, "loss": 0.2078, "step": 665 }, { "epoch": 0.2484147706079821, "grad_norm": 0.8875457644462585, "learning_rate": 2e-06, "loss": 0.2114, "step": 666 }, { "epoch": 0.24878776575904513, "grad_norm": 1.0028759241104126, "learning_rate": 2e-06, "loss": 0.201, "step": 667 }, { "epoch": 0.24916076091010816, "grad_norm": 1.040744423866272, "learning_rate": 2e-06, "loss": 0.1813, "step": 668 }, { "epoch": 0.2495337560611712, "grad_norm": 0.9344378709793091, "learning_rate": 2e-06, "loss": 0.2128, "step": 669 }, { "epoch": 0.24990675121223424, "grad_norm": 0.818523108959198, "learning_rate": 2e-06, "loss": 0.2099, "step": 670 }, { "epoch": 0.25027974636329725, "grad_norm": 1.0410813093185425, "learning_rate": 2e-06, "loss": 0.2042, "step": 671 }, { "epoch": 0.2506527415143603, "grad_norm": 1.2319177389144897, "learning_rate": 2e-06, "loss": 0.1899, "step": 672 }, { "epoch": 0.2510257366654233, "grad_norm": 0.9345608949661255, "learning_rate": 2e-06, "loss": 0.2076, "step": 673 }, { "epoch": 0.25139873181648636, "grad_norm": 0.8540711402893066, "learning_rate": 2e-06, "loss": 0.1815, "step": 674 }, { "epoch": 0.2517717269675494, "grad_norm": 0.8851346373558044, "learning_rate": 2e-06, "loss": 0.1957, "step": 675 }, { "epoch": 0.25214472211861244, "grad_norm": 1.0565543174743652, "learning_rate": 2e-06, "loss": 0.2018, "step": 676 }, { "epoch": 0.2525177172696755, "grad_norm": 1.0138041973114014, "learning_rate": 2e-06, "loss": 0.192, "step": 677 }, { "epoch": 0.2528907124207385, "grad_norm": 1.1815528869628906, "learning_rate": 2e-06, "loss": 0.2073, "step": 678 }, { "epoch": 0.25326370757180156, "grad_norm": 1.0222357511520386, "learning_rate": 2e-06, "loss": 0.2139, "step": 679 }, { "epoch": 0.2536367027228646, "grad_norm": 1.1879674196243286, "learning_rate": 2e-06, "loss": 0.1797, "step": 680 }, { "epoch": 0.25400969787392763, "grad_norm": 1.617465615272522, "learning_rate": 2e-06, "loss": 0.2226, "step": 681 }, { "epoch": 0.25438269302499067, "grad_norm": 1.104323148727417, "learning_rate": 2e-06, "loss": 0.2151, "step": 682 }, { "epoch": 0.2547556881760537, "grad_norm": 1.416809320449829, "learning_rate": 2e-06, "loss": 0.1991, "step": 683 }, { "epoch": 0.25512868332711675, "grad_norm": 0.8006582260131836, "learning_rate": 2e-06, "loss": 0.1711, "step": 684 }, { "epoch": 0.2555016784781798, "grad_norm": 0.7324338555335999, "learning_rate": 2e-06, "loss": 0.2035, "step": 685 }, { "epoch": 0.2558746736292428, "grad_norm": 1.0860177278518677, "learning_rate": 2e-06, "loss": 0.2117, "step": 686 }, { "epoch": 0.25624766878030586, "grad_norm": 1.0112907886505127, "learning_rate": 2e-06, "loss": 0.188, "step": 687 }, { "epoch": 0.2566206639313689, "grad_norm": 0.872611403465271, "learning_rate": 2e-06, "loss": 0.2155, "step": 688 }, { "epoch": 0.25699365908243194, "grad_norm": 0.878930926322937, "learning_rate": 2e-06, "loss": 0.2058, "step": 689 }, { "epoch": 0.257366654233495, "grad_norm": 0.955852746963501, "learning_rate": 2e-06, "loss": 0.1968, "step": 690 }, { "epoch": 0.257739649384558, "grad_norm": 1.1580475568771362, "learning_rate": 2e-06, "loss": 0.2181, "step": 691 }, { "epoch": 0.25811264453562105, "grad_norm": 0.8880849480628967, "learning_rate": 2e-06, "loss": 0.2122, "step": 692 }, { "epoch": 0.2584856396866841, "grad_norm": 0.9151087403297424, "learning_rate": 2e-06, "loss": 0.1975, "step": 693 }, { "epoch": 0.2588586348377471, "grad_norm": 0.9916893243789673, "learning_rate": 2e-06, "loss": 0.2117, "step": 694 }, { "epoch": 0.25923162998881016, "grad_norm": 1.016208529472351, "learning_rate": 2e-06, "loss": 0.2181, "step": 695 }, { "epoch": 0.2596046251398732, "grad_norm": 1.2771873474121094, "learning_rate": 2e-06, "loss": 0.2069, "step": 696 }, { "epoch": 0.25997762029093624, "grad_norm": 0.9333749413490295, "learning_rate": 2e-06, "loss": 0.2036, "step": 697 }, { "epoch": 0.2603506154419993, "grad_norm": 0.800326943397522, "learning_rate": 2e-06, "loss": 0.1917, "step": 698 }, { "epoch": 0.2607236105930623, "grad_norm": 0.7154765129089355, "learning_rate": 2e-06, "loss": 0.2298, "step": 699 }, { "epoch": 0.26109660574412535, "grad_norm": 1.0297412872314453, "learning_rate": 2e-06, "loss": 0.1677, "step": 700 }, { "epoch": 0.26146960089518834, "grad_norm": 0.9019288420677185, "learning_rate": 2e-06, "loss": 0.2072, "step": 701 }, { "epoch": 0.2618425960462514, "grad_norm": 0.8919926881790161, "learning_rate": 2e-06, "loss": 0.1985, "step": 702 }, { "epoch": 0.2622155911973144, "grad_norm": 0.7822431325912476, "learning_rate": 2e-06, "loss": 0.2104, "step": 703 }, { "epoch": 0.26258858634837745, "grad_norm": 1.0176637172698975, "learning_rate": 2e-06, "loss": 0.2094, "step": 704 }, { "epoch": 0.2629615814994405, "grad_norm": 0.765091598033905, "learning_rate": 2e-06, "loss": 0.182, "step": 705 }, { "epoch": 0.2633345766505035, "grad_norm": 0.9157160520553589, "learning_rate": 2e-06, "loss": 0.197, "step": 706 }, { "epoch": 0.26370757180156656, "grad_norm": 1.0495715141296387, "learning_rate": 2e-06, "loss": 0.2037, "step": 707 }, { "epoch": 0.2640805669526296, "grad_norm": 0.8719690442085266, "learning_rate": 2e-06, "loss": 0.1914, "step": 708 }, { "epoch": 0.26445356210369264, "grad_norm": 0.9908958077430725, "learning_rate": 2e-06, "loss": 0.2222, "step": 709 }, { "epoch": 0.2648265572547557, "grad_norm": 0.9206390976905823, "learning_rate": 2e-06, "loss": 0.1722, "step": 710 }, { "epoch": 0.2651995524058187, "grad_norm": 0.8433656096458435, "learning_rate": 2e-06, "loss": 0.2222, "step": 711 }, { "epoch": 0.26557254755688176, "grad_norm": 1.3706755638122559, "learning_rate": 2e-06, "loss": 0.1707, "step": 712 }, { "epoch": 0.2659455427079448, "grad_norm": 1.174750804901123, "learning_rate": 2e-06, "loss": 0.2279, "step": 713 }, { "epoch": 0.26631853785900783, "grad_norm": 1.2879513502120972, "learning_rate": 2e-06, "loss": 0.21, "step": 714 }, { "epoch": 0.26669153301007087, "grad_norm": 1.1148231029510498, "learning_rate": 2e-06, "loss": 0.1892, "step": 715 }, { "epoch": 0.2670645281611339, "grad_norm": 0.8283557295799255, "learning_rate": 2e-06, "loss": 0.188, "step": 716 }, { "epoch": 0.26743752331219695, "grad_norm": 1.0812667608261108, "learning_rate": 2e-06, "loss": 0.2074, "step": 717 }, { "epoch": 0.26781051846326, "grad_norm": 1.1144688129425049, "learning_rate": 2e-06, "loss": 0.2251, "step": 718 }, { "epoch": 0.268183513614323, "grad_norm": 0.9569750428199768, "learning_rate": 2e-06, "loss": 0.1868, "step": 719 }, { "epoch": 0.26855650876538606, "grad_norm": 1.151784896850586, "learning_rate": 2e-06, "loss": 0.1802, "step": 720 }, { "epoch": 0.2689295039164491, "grad_norm": 1.0361768007278442, "learning_rate": 2e-06, "loss": 0.2043, "step": 721 }, { "epoch": 0.26930249906751214, "grad_norm": 1.0342217683792114, "learning_rate": 2e-06, "loss": 0.1829, "step": 722 }, { "epoch": 0.2696754942185752, "grad_norm": 1.0148510932922363, "learning_rate": 2e-06, "loss": 0.2186, "step": 723 }, { "epoch": 0.2700484893696382, "grad_norm": 1.0618101358413696, "learning_rate": 2e-06, "loss": 0.186, "step": 724 }, { "epoch": 0.27042148452070125, "grad_norm": 1.1286981105804443, "learning_rate": 2e-06, "loss": 0.182, "step": 725 }, { "epoch": 0.2707944796717643, "grad_norm": 0.8407672047615051, "learning_rate": 2e-06, "loss": 0.2175, "step": 726 }, { "epoch": 0.2711674748228273, "grad_norm": 0.9918192625045776, "learning_rate": 2e-06, "loss": 0.2294, "step": 727 }, { "epoch": 0.27154046997389036, "grad_norm": 1.0016976594924927, "learning_rate": 2e-06, "loss": 0.199, "step": 728 }, { "epoch": 0.2719134651249534, "grad_norm": 0.8489307761192322, "learning_rate": 2e-06, "loss": 0.2171, "step": 729 }, { "epoch": 0.2722864602760164, "grad_norm": 1.0137982368469238, "learning_rate": 2e-06, "loss": 0.2313, "step": 730 }, { "epoch": 0.2726594554270794, "grad_norm": 1.0565139055252075, "learning_rate": 2e-06, "loss": 0.2029, "step": 731 }, { "epoch": 0.27303245057814246, "grad_norm": 0.8135107159614563, "learning_rate": 2e-06, "loss": 0.176, "step": 732 }, { "epoch": 0.2734054457292055, "grad_norm": 0.9492537975311279, "learning_rate": 2e-06, "loss": 0.2072, "step": 733 }, { "epoch": 0.27377844088026854, "grad_norm": 1.0412604808807373, "learning_rate": 2e-06, "loss": 0.1857, "step": 734 }, { "epoch": 0.2741514360313316, "grad_norm": 0.9329516291618347, "learning_rate": 2e-06, "loss": 0.1998, "step": 735 }, { "epoch": 0.2745244311823946, "grad_norm": 1.8007322549819946, "learning_rate": 2e-06, "loss": 0.1642, "step": 736 }, { "epoch": 0.27489742633345765, "grad_norm": 0.908190906047821, "learning_rate": 2e-06, "loss": 0.1976, "step": 737 }, { "epoch": 0.2752704214845207, "grad_norm": 0.8497306108474731, "learning_rate": 2e-06, "loss": 0.2063, "step": 738 }, { "epoch": 0.2756434166355837, "grad_norm": 1.0472643375396729, "learning_rate": 2e-06, "loss": 0.1918, "step": 739 }, { "epoch": 0.27601641178664676, "grad_norm": 0.9079187512397766, "learning_rate": 2e-06, "loss": 0.2077, "step": 740 }, { "epoch": 0.2763894069377098, "grad_norm": 1.2124857902526855, "learning_rate": 2e-06, "loss": 0.21, "step": 741 }, { "epoch": 0.27676240208877284, "grad_norm": 1.1388746500015259, "learning_rate": 2e-06, "loss": 0.1814, "step": 742 }, { "epoch": 0.2771353972398359, "grad_norm": 0.7970696091651917, "learning_rate": 2e-06, "loss": 0.2032, "step": 743 }, { "epoch": 0.2775083923908989, "grad_norm": 0.7907265424728394, "learning_rate": 2e-06, "loss": 0.2106, "step": 744 }, { "epoch": 0.27788138754196196, "grad_norm": 0.8309521079063416, "learning_rate": 2e-06, "loss": 0.2059, "step": 745 }, { "epoch": 0.278254382693025, "grad_norm": 0.8650986552238464, "learning_rate": 2e-06, "loss": 0.2078, "step": 746 }, { "epoch": 0.27862737784408803, "grad_norm": 1.0123873949050903, "learning_rate": 2e-06, "loss": 0.1789, "step": 747 }, { "epoch": 0.27900037299515107, "grad_norm": 1.151126503944397, "learning_rate": 2e-06, "loss": 0.2079, "step": 748 }, { "epoch": 0.2793733681462141, "grad_norm": 0.8777632117271423, "learning_rate": 2e-06, "loss": 0.1902, "step": 749 }, { "epoch": 0.27974636329727715, "grad_norm": 0.9957173466682434, "learning_rate": 2e-06, "loss": 0.2126, "step": 750 }, { "epoch": 0.2801193584483402, "grad_norm": 0.8149513006210327, "learning_rate": 2e-06, "loss": 0.2015, "step": 751 }, { "epoch": 0.2804923535994032, "grad_norm": 1.203360915184021, "learning_rate": 2e-06, "loss": 0.216, "step": 752 }, { "epoch": 0.28086534875046626, "grad_norm": 0.7734687328338623, "learning_rate": 2e-06, "loss": 0.2178, "step": 753 }, { "epoch": 0.2812383439015293, "grad_norm": 0.9968791604042053, "learning_rate": 2e-06, "loss": 0.186, "step": 754 }, { "epoch": 0.28161133905259234, "grad_norm": 0.8255706429481506, "learning_rate": 2e-06, "loss": 0.2025, "step": 755 }, { "epoch": 0.2819843342036554, "grad_norm": 0.828402042388916, "learning_rate": 2e-06, "loss": 0.2065, "step": 756 }, { "epoch": 0.2823573293547184, "grad_norm": 1.2056639194488525, "learning_rate": 2e-06, "loss": 0.1782, "step": 757 }, { "epoch": 0.28273032450578145, "grad_norm": 1.1836950778961182, "learning_rate": 2e-06, "loss": 0.198, "step": 758 }, { "epoch": 0.2831033196568445, "grad_norm": 1.1901183128356934, "learning_rate": 2e-06, "loss": 0.2025, "step": 759 }, { "epoch": 0.28347631480790747, "grad_norm": 0.9625614881515503, "learning_rate": 2e-06, "loss": 0.196, "step": 760 }, { "epoch": 0.2838493099589705, "grad_norm": 0.9038240313529968, "learning_rate": 2e-06, "loss": 0.2037, "step": 761 }, { "epoch": 0.28422230511003355, "grad_norm": 0.8800246119499207, "learning_rate": 2e-06, "loss": 0.2254, "step": 762 }, { "epoch": 0.2845953002610966, "grad_norm": 0.7588366866111755, "learning_rate": 2e-06, "loss": 0.1676, "step": 763 }, { "epoch": 0.2849682954121596, "grad_norm": 0.82753986120224, "learning_rate": 2e-06, "loss": 0.2295, "step": 764 }, { "epoch": 0.28534129056322266, "grad_norm": 1.0983814001083374, "learning_rate": 2e-06, "loss": 0.2049, "step": 765 }, { "epoch": 0.2857142857142857, "grad_norm": 0.8202376365661621, "learning_rate": 2e-06, "loss": 0.1942, "step": 766 }, { "epoch": 0.28608728086534874, "grad_norm": 1.0410337448120117, "learning_rate": 2e-06, "loss": 0.1964, "step": 767 }, { "epoch": 0.2864602760164118, "grad_norm": 0.8169183731079102, "learning_rate": 2e-06, "loss": 0.2148, "step": 768 }, { "epoch": 0.2868332711674748, "grad_norm": 1.0874624252319336, "learning_rate": 2e-06, "loss": 0.2369, "step": 769 }, { "epoch": 0.28720626631853785, "grad_norm": 1.221967339515686, "learning_rate": 2e-06, "loss": 0.1911, "step": 770 }, { "epoch": 0.2875792614696009, "grad_norm": 1.057813286781311, "learning_rate": 2e-06, "loss": 0.1924, "step": 771 }, { "epoch": 0.2879522566206639, "grad_norm": 0.7396080493927002, "learning_rate": 2e-06, "loss": 0.2235, "step": 772 }, { "epoch": 0.28832525177172696, "grad_norm": 0.9804563522338867, "learning_rate": 2e-06, "loss": 0.2214, "step": 773 }, { "epoch": 0.28869824692279, "grad_norm": 0.9885976314544678, "learning_rate": 2e-06, "loss": 0.1982, "step": 774 }, { "epoch": 0.28907124207385304, "grad_norm": 0.7508994936943054, "learning_rate": 2e-06, "loss": 0.2111, "step": 775 }, { "epoch": 0.2894442372249161, "grad_norm": 0.844713032245636, "learning_rate": 2e-06, "loss": 0.2021, "step": 776 }, { "epoch": 0.2898172323759791, "grad_norm": 1.716420292854309, "learning_rate": 2e-06, "loss": 0.2106, "step": 777 }, { "epoch": 0.29019022752704215, "grad_norm": 0.9460886716842651, "learning_rate": 2e-06, "loss": 0.1878, "step": 778 }, { "epoch": 0.2905632226781052, "grad_norm": 0.7122468948364258, "learning_rate": 2e-06, "loss": 0.2135, "step": 779 }, { "epoch": 0.29093621782916823, "grad_norm": 1.309718370437622, "learning_rate": 2e-06, "loss": 0.1876, "step": 780 }, { "epoch": 0.29130921298023127, "grad_norm": 1.1822468042373657, "learning_rate": 2e-06, "loss": 0.1834, "step": 781 }, { "epoch": 0.2916822081312943, "grad_norm": 0.7834658026695251, "learning_rate": 2e-06, "loss": 0.1976, "step": 782 }, { "epoch": 0.29205520328235735, "grad_norm": 0.8861050009727478, "learning_rate": 2e-06, "loss": 0.2096, "step": 783 }, { "epoch": 0.2924281984334204, "grad_norm": 0.9005755186080933, "learning_rate": 2e-06, "loss": 0.1947, "step": 784 }, { "epoch": 0.2928011935844834, "grad_norm": 0.952597975730896, "learning_rate": 2e-06, "loss": 0.2054, "step": 785 }, { "epoch": 0.29317418873554646, "grad_norm": 0.915331244468689, "learning_rate": 2e-06, "loss": 0.1942, "step": 786 }, { "epoch": 0.2935471838866095, "grad_norm": 1.2303520441055298, "learning_rate": 2e-06, "loss": 0.195, "step": 787 }, { "epoch": 0.29392017903767254, "grad_norm": 0.98580002784729, "learning_rate": 2e-06, "loss": 0.1954, "step": 788 }, { "epoch": 0.2942931741887356, "grad_norm": 1.2160906791687012, "learning_rate": 2e-06, "loss": 0.2059, "step": 789 }, { "epoch": 0.29466616933979856, "grad_norm": 0.761691689491272, "learning_rate": 2e-06, "loss": 0.2283, "step": 790 }, { "epoch": 0.2950391644908616, "grad_norm": 0.9655224680900574, "learning_rate": 2e-06, "loss": 0.204, "step": 791 }, { "epoch": 0.29541215964192463, "grad_norm": 0.7672178745269775, "learning_rate": 2e-06, "loss": 0.1948, "step": 792 }, { "epoch": 0.29578515479298767, "grad_norm": 0.8200654983520508, "learning_rate": 2e-06, "loss": 0.2185, "step": 793 }, { "epoch": 0.2961581499440507, "grad_norm": 0.8756325840950012, "learning_rate": 2e-06, "loss": 0.214, "step": 794 }, { "epoch": 0.29653114509511375, "grad_norm": 1.0252673625946045, "learning_rate": 2e-06, "loss": 0.2293, "step": 795 }, { "epoch": 0.2969041402461768, "grad_norm": 1.0527899265289307, "learning_rate": 2e-06, "loss": 0.2178, "step": 796 }, { "epoch": 0.2972771353972398, "grad_norm": 1.0746062994003296, "learning_rate": 2e-06, "loss": 0.1998, "step": 797 }, { "epoch": 0.29765013054830286, "grad_norm": 0.8087089657783508, "learning_rate": 2e-06, "loss": 0.2049, "step": 798 }, { "epoch": 0.2980231256993659, "grad_norm": 1.3048698902130127, "learning_rate": 2e-06, "loss": 0.2074, "step": 799 }, { "epoch": 0.29839612085042894, "grad_norm": 0.9792512655258179, "learning_rate": 2e-06, "loss": 0.1963, "step": 800 }, { "epoch": 0.298769116001492, "grad_norm": 1.3280284404754639, "learning_rate": 2e-06, "loss": 0.2161, "step": 801 }, { "epoch": 0.299142111152555, "grad_norm": 1.024731159210205, "learning_rate": 2e-06, "loss": 0.1933, "step": 802 }, { "epoch": 0.29951510630361805, "grad_norm": 1.2583715915679932, "learning_rate": 2e-06, "loss": 0.2048, "step": 803 }, { "epoch": 0.2998881014546811, "grad_norm": 1.118729829788208, "learning_rate": 2e-06, "loss": 0.2048, "step": 804 }, { "epoch": 0.3002610966057441, "grad_norm": 0.7477700114250183, "learning_rate": 2e-06, "loss": 0.1949, "step": 805 }, { "epoch": 0.30063409175680716, "grad_norm": 1.3293098211288452, "learning_rate": 2e-06, "loss": 0.208, "step": 806 }, { "epoch": 0.3010070869078702, "grad_norm": 0.7457369565963745, "learning_rate": 2e-06, "loss": 0.2212, "step": 807 }, { "epoch": 0.30138008205893324, "grad_norm": 0.9776383638381958, "learning_rate": 2e-06, "loss": 0.196, "step": 808 }, { "epoch": 0.3017530772099963, "grad_norm": 0.6984178423881531, "learning_rate": 2e-06, "loss": 0.21, "step": 809 }, { "epoch": 0.3021260723610593, "grad_norm": 1.2122461795806885, "learning_rate": 2e-06, "loss": 0.1837, "step": 810 }, { "epoch": 0.30249906751212235, "grad_norm": 1.0389920473098755, "learning_rate": 2e-06, "loss": 0.1897, "step": 811 }, { "epoch": 0.3028720626631854, "grad_norm": 1.3622450828552246, "learning_rate": 2e-06, "loss": 0.1973, "step": 812 }, { "epoch": 0.30324505781424843, "grad_norm": 1.192179560661316, "learning_rate": 2e-06, "loss": 0.1894, "step": 813 }, { "epoch": 0.30361805296531147, "grad_norm": 0.7386181354522705, "learning_rate": 2e-06, "loss": 0.2306, "step": 814 }, { "epoch": 0.3039910481163745, "grad_norm": 1.0825145244598389, "learning_rate": 2e-06, "loss": 0.2216, "step": 815 }, { "epoch": 0.30436404326743755, "grad_norm": 0.8014611601829529, "learning_rate": 2e-06, "loss": 0.1936, "step": 816 }, { "epoch": 0.3047370384185006, "grad_norm": 0.8980475068092346, "learning_rate": 2e-06, "loss": 0.2057, "step": 817 }, { "epoch": 0.3051100335695636, "grad_norm": 1.198147177696228, "learning_rate": 2e-06, "loss": 0.2014, "step": 818 }, { "epoch": 0.30548302872062666, "grad_norm": 0.9037997722625732, "learning_rate": 2e-06, "loss": 0.1893, "step": 819 }, { "epoch": 0.30585602387168964, "grad_norm": 0.8807291388511658, "learning_rate": 2e-06, "loss": 0.2095, "step": 820 }, { "epoch": 0.3062290190227527, "grad_norm": 0.9128229022026062, "learning_rate": 2e-06, "loss": 0.1885, "step": 821 }, { "epoch": 0.3066020141738157, "grad_norm": 0.8169448971748352, "learning_rate": 2e-06, "loss": 0.2183, "step": 822 }, { "epoch": 0.30697500932487876, "grad_norm": 1.0538982152938843, "learning_rate": 2e-06, "loss": 0.1904, "step": 823 }, { "epoch": 0.3073480044759418, "grad_norm": 1.0489468574523926, "learning_rate": 2e-06, "loss": 0.2187, "step": 824 }, { "epoch": 0.30772099962700483, "grad_norm": 0.9746491312980652, "learning_rate": 2e-06, "loss": 0.2095, "step": 825 }, { "epoch": 0.30809399477806787, "grad_norm": 1.0140058994293213, "learning_rate": 2e-06, "loss": 0.194, "step": 826 }, { "epoch": 0.3084669899291309, "grad_norm": 0.9371570348739624, "learning_rate": 2e-06, "loss": 0.1751, "step": 827 }, { "epoch": 0.30883998508019395, "grad_norm": 0.931766927242279, "learning_rate": 2e-06, "loss": 0.1918, "step": 828 }, { "epoch": 0.309212980231257, "grad_norm": 0.9022484421730042, "learning_rate": 2e-06, "loss": 0.1929, "step": 829 }, { "epoch": 0.30958597538232, "grad_norm": 0.8784067630767822, "learning_rate": 2e-06, "loss": 0.1949, "step": 830 }, { "epoch": 0.30995897053338306, "grad_norm": 1.099555492401123, "learning_rate": 2e-06, "loss": 0.197, "step": 831 }, { "epoch": 0.3103319656844461, "grad_norm": 1.004995346069336, "learning_rate": 2e-06, "loss": 0.1813, "step": 832 }, { "epoch": 0.31070496083550914, "grad_norm": 0.9738664627075195, "learning_rate": 2e-06, "loss": 0.198, "step": 833 }, { "epoch": 0.3110779559865722, "grad_norm": 0.7574090957641602, "learning_rate": 2e-06, "loss": 0.2205, "step": 834 }, { "epoch": 0.3114509511376352, "grad_norm": 0.8266856670379639, "learning_rate": 2e-06, "loss": 0.1685, "step": 835 }, { "epoch": 0.31182394628869825, "grad_norm": 1.0651094913482666, "learning_rate": 2e-06, "loss": 0.1921, "step": 836 }, { "epoch": 0.3121969414397613, "grad_norm": 1.1830395460128784, "learning_rate": 2e-06, "loss": 0.2013, "step": 837 }, { "epoch": 0.3125699365908243, "grad_norm": 0.8132326602935791, "learning_rate": 2e-06, "loss": 0.1917, "step": 838 }, { "epoch": 0.31294293174188736, "grad_norm": 1.0352681875228882, "learning_rate": 2e-06, "loss": 0.2171, "step": 839 }, { "epoch": 0.3133159268929504, "grad_norm": 1.2947672605514526, "learning_rate": 2e-06, "loss": 0.1812, "step": 840 }, { "epoch": 0.31368892204401344, "grad_norm": 0.9956995248794556, "learning_rate": 2e-06, "loss": 0.1964, "step": 841 }, { "epoch": 0.3140619171950765, "grad_norm": 1.045432448387146, "learning_rate": 2e-06, "loss": 0.1998, "step": 842 }, { "epoch": 0.3144349123461395, "grad_norm": 0.8036715388298035, "learning_rate": 2e-06, "loss": 0.2174, "step": 843 }, { "epoch": 0.31480790749720255, "grad_norm": 0.9450079202651978, "learning_rate": 2e-06, "loss": 0.2057, "step": 844 }, { "epoch": 0.3151809026482656, "grad_norm": 0.9494861960411072, "learning_rate": 2e-06, "loss": 0.1985, "step": 845 }, { "epoch": 0.31555389779932863, "grad_norm": 0.9947577714920044, "learning_rate": 2e-06, "loss": 0.2012, "step": 846 }, { "epoch": 0.31592689295039167, "grad_norm": 1.3932459354400635, "learning_rate": 2e-06, "loss": 0.1839, "step": 847 }, { "epoch": 0.3162998881014547, "grad_norm": 0.7863510847091675, "learning_rate": 2e-06, "loss": 0.2337, "step": 848 }, { "epoch": 0.3166728832525177, "grad_norm": 0.8848775029182434, "learning_rate": 2e-06, "loss": 0.2041, "step": 849 }, { "epoch": 0.3170458784035807, "grad_norm": 0.911489725112915, "learning_rate": 2e-06, "loss": 0.197, "step": 850 }, { "epoch": 0.31741887355464377, "grad_norm": 0.8114460110664368, "learning_rate": 2e-06, "loss": 0.1906, "step": 851 }, { "epoch": 0.3177918687057068, "grad_norm": 0.8422631025314331, "learning_rate": 2e-06, "loss": 0.2094, "step": 852 }, { "epoch": 0.31816486385676984, "grad_norm": 0.9901478886604309, "learning_rate": 2e-06, "loss": 0.21, "step": 853 }, { "epoch": 0.3185378590078329, "grad_norm": 1.0816091299057007, "learning_rate": 2e-06, "loss": 0.1952, "step": 854 }, { "epoch": 0.3189108541588959, "grad_norm": 0.7913552522659302, "learning_rate": 2e-06, "loss": 0.2186, "step": 855 }, { "epoch": 0.31928384930995896, "grad_norm": 1.096597671508789, "learning_rate": 2e-06, "loss": 0.1986, "step": 856 }, { "epoch": 0.319656844461022, "grad_norm": 0.9933605194091797, "learning_rate": 2e-06, "loss": 0.2138, "step": 857 }, { "epoch": 0.32002983961208503, "grad_norm": 0.8845122456550598, "learning_rate": 2e-06, "loss": 0.2016, "step": 858 }, { "epoch": 0.32040283476314807, "grad_norm": 0.9170219302177429, "learning_rate": 2e-06, "loss": 0.2248, "step": 859 }, { "epoch": 0.3207758299142111, "grad_norm": 0.8853694796562195, "learning_rate": 2e-06, "loss": 0.2072, "step": 860 }, { "epoch": 0.32114882506527415, "grad_norm": 0.8211340308189392, "learning_rate": 2e-06, "loss": 0.1929, "step": 861 }, { "epoch": 0.3215218202163372, "grad_norm": 1.4581407308578491, "learning_rate": 2e-06, "loss": 0.1874, "step": 862 }, { "epoch": 0.3218948153674002, "grad_norm": 0.9136397242546082, "learning_rate": 2e-06, "loss": 0.2178, "step": 863 }, { "epoch": 0.32226781051846326, "grad_norm": 1.1327235698699951, "learning_rate": 2e-06, "loss": 0.1908, "step": 864 }, { "epoch": 0.3226408056695263, "grad_norm": 0.9776673913002014, "learning_rate": 2e-06, "loss": 0.1947, "step": 865 }, { "epoch": 0.32301380082058934, "grad_norm": 1.2059829235076904, "learning_rate": 2e-06, "loss": 0.2187, "step": 866 }, { "epoch": 0.3233867959716524, "grad_norm": 0.8930542469024658, "learning_rate": 2e-06, "loss": 0.1888, "step": 867 }, { "epoch": 0.3237597911227154, "grad_norm": 0.9218270182609558, "learning_rate": 2e-06, "loss": 0.2105, "step": 868 }, { "epoch": 0.32413278627377845, "grad_norm": 1.1489524841308594, "learning_rate": 2e-06, "loss": 0.1865, "step": 869 }, { "epoch": 0.3245057814248415, "grad_norm": 0.8859754800796509, "learning_rate": 2e-06, "loss": 0.2225, "step": 870 }, { "epoch": 0.3248787765759045, "grad_norm": 0.7416685223579407, "learning_rate": 2e-06, "loss": 0.2004, "step": 871 }, { "epoch": 0.32525177172696756, "grad_norm": 0.8773690462112427, "learning_rate": 2e-06, "loss": 0.1932, "step": 872 }, { "epoch": 0.3256247668780306, "grad_norm": 0.8621373772621155, "learning_rate": 2e-06, "loss": 0.1921, "step": 873 }, { "epoch": 0.32599776202909364, "grad_norm": 1.0499120950698853, "learning_rate": 2e-06, "loss": 0.218, "step": 874 }, { "epoch": 0.3263707571801567, "grad_norm": 1.0611863136291504, "learning_rate": 2e-06, "loss": 0.1987, "step": 875 }, { "epoch": 0.3267437523312197, "grad_norm": 1.1960920095443726, "learning_rate": 2e-06, "loss": 0.2181, "step": 876 }, { "epoch": 0.32711674748228275, "grad_norm": 1.0010141134262085, "learning_rate": 2e-06, "loss": 0.1645, "step": 877 }, { "epoch": 0.3274897426333458, "grad_norm": 1.0149712562561035, "learning_rate": 2e-06, "loss": 0.1899, "step": 878 }, { "epoch": 0.3278627377844088, "grad_norm": 1.1715494394302368, "learning_rate": 2e-06, "loss": 0.2206, "step": 879 }, { "epoch": 0.3282357329354718, "grad_norm": 0.965601921081543, "learning_rate": 2e-06, "loss": 0.2112, "step": 880 }, { "epoch": 0.32860872808653485, "grad_norm": 0.944214403629303, "learning_rate": 2e-06, "loss": 0.2039, "step": 881 }, { "epoch": 0.3289817232375979, "grad_norm": 1.0225995779037476, "learning_rate": 2e-06, "loss": 0.1996, "step": 882 }, { "epoch": 0.3293547183886609, "grad_norm": 0.8162322044372559, "learning_rate": 2e-06, "loss": 0.2076, "step": 883 }, { "epoch": 0.32972771353972397, "grad_norm": 0.937503457069397, "learning_rate": 2e-06, "loss": 0.212, "step": 884 }, { "epoch": 0.330100708690787, "grad_norm": 0.9410938620567322, "learning_rate": 2e-06, "loss": 0.1799, "step": 885 }, { "epoch": 0.33047370384185004, "grad_norm": 0.8899851441383362, "learning_rate": 2e-06, "loss": 0.2075, "step": 886 }, { "epoch": 0.3308466989929131, "grad_norm": 1.711491346359253, "learning_rate": 2e-06, "loss": 0.2028, "step": 887 }, { "epoch": 0.3312196941439761, "grad_norm": 1.0528260469436646, "learning_rate": 2e-06, "loss": 0.1942, "step": 888 }, { "epoch": 0.33159268929503916, "grad_norm": 1.2651773691177368, "learning_rate": 2e-06, "loss": 0.2255, "step": 889 }, { "epoch": 0.3319656844461022, "grad_norm": 0.7411620616912842, "learning_rate": 2e-06, "loss": 0.1953, "step": 890 }, { "epoch": 0.33233867959716523, "grad_norm": 0.8716508746147156, "learning_rate": 2e-06, "loss": 0.2145, "step": 891 }, { "epoch": 0.33271167474822827, "grad_norm": 1.0596343278884888, "learning_rate": 2e-06, "loss": 0.2247, "step": 892 }, { "epoch": 0.3330846698992913, "grad_norm": 0.9563511610031128, "learning_rate": 2e-06, "loss": 0.1996, "step": 893 }, { "epoch": 0.33345766505035435, "grad_norm": 1.1970733404159546, "learning_rate": 2e-06, "loss": 0.1833, "step": 894 }, { "epoch": 0.3338306602014174, "grad_norm": 0.8358293175697327, "learning_rate": 2e-06, "loss": 0.2203, "step": 895 }, { "epoch": 0.3342036553524804, "grad_norm": 0.9696957468986511, "learning_rate": 2e-06, "loss": 0.2067, "step": 896 }, { "epoch": 0.33457665050354346, "grad_norm": 1.0301525592803955, "learning_rate": 2e-06, "loss": 0.2064, "step": 897 }, { "epoch": 0.3349496456546065, "grad_norm": 1.0481252670288086, "learning_rate": 2e-06, "loss": 0.2181, "step": 898 }, { "epoch": 0.33532264080566954, "grad_norm": 1.0534732341766357, "learning_rate": 2e-06, "loss": 0.2159, "step": 899 }, { "epoch": 0.3356956359567326, "grad_norm": 1.1701549291610718, "learning_rate": 2e-06, "loss": 0.1812, "step": 900 }, { "epoch": 0.3360686311077956, "grad_norm": 1.090673804283142, "learning_rate": 2e-06, "loss": 0.1921, "step": 901 }, { "epoch": 0.33644162625885865, "grad_norm": 0.7534927725791931, "learning_rate": 2e-06, "loss": 0.1987, "step": 902 }, { "epoch": 0.3368146214099217, "grad_norm": 0.978081226348877, "learning_rate": 2e-06, "loss": 0.2184, "step": 903 }, { "epoch": 0.3371876165609847, "grad_norm": 1.1794285774230957, "learning_rate": 2e-06, "loss": 0.2009, "step": 904 }, { "epoch": 0.33756061171204776, "grad_norm": 1.3504539728164673, "learning_rate": 2e-06, "loss": 0.2122, "step": 905 }, { "epoch": 0.3379336068631108, "grad_norm": 0.8831222057342529, "learning_rate": 2e-06, "loss": 0.1901, "step": 906 }, { "epoch": 0.33830660201417384, "grad_norm": 1.359678030014038, "learning_rate": 2e-06, "loss": 0.2176, "step": 907 }, { "epoch": 0.3386795971652369, "grad_norm": 0.8956407904624939, "learning_rate": 2e-06, "loss": 0.1954, "step": 908 }, { "epoch": 0.33905259231629986, "grad_norm": 0.8307924866676331, "learning_rate": 2e-06, "loss": 0.2016, "step": 909 }, { "epoch": 0.3394255874673629, "grad_norm": 0.9338180422782898, "learning_rate": 2e-06, "loss": 0.2094, "step": 910 }, { "epoch": 0.33979858261842594, "grad_norm": 0.8787013292312622, "learning_rate": 2e-06, "loss": 0.2183, "step": 911 }, { "epoch": 0.340171577769489, "grad_norm": 0.8847888112068176, "learning_rate": 2e-06, "loss": 0.2106, "step": 912 }, { "epoch": 0.340544572920552, "grad_norm": 0.9362011551856995, "learning_rate": 2e-06, "loss": 0.2181, "step": 913 }, { "epoch": 0.34091756807161505, "grad_norm": 0.9681510329246521, "learning_rate": 2e-06, "loss": 0.2128, "step": 914 }, { "epoch": 0.3412905632226781, "grad_norm": 0.8502315878868103, "learning_rate": 2e-06, "loss": 0.2301, "step": 915 }, { "epoch": 0.3416635583737411, "grad_norm": 1.0404282808303833, "learning_rate": 2e-06, "loss": 0.2065, "step": 916 }, { "epoch": 0.34203655352480417, "grad_norm": 1.0191593170166016, "learning_rate": 2e-06, "loss": 0.2262, "step": 917 }, { "epoch": 0.3424095486758672, "grad_norm": 1.191853404045105, "learning_rate": 2e-06, "loss": 0.1987, "step": 918 }, { "epoch": 0.34278254382693024, "grad_norm": 1.1574351787567139, "learning_rate": 2e-06, "loss": 0.1911, "step": 919 }, { "epoch": 0.3431555389779933, "grad_norm": 0.8975393772125244, "learning_rate": 2e-06, "loss": 0.2081, "step": 920 }, { "epoch": 0.3435285341290563, "grad_norm": 0.9629279971122742, "learning_rate": 2e-06, "loss": 0.2089, "step": 921 }, { "epoch": 0.34390152928011936, "grad_norm": 1.2684855461120605, "learning_rate": 2e-06, "loss": 0.1905, "step": 922 }, { "epoch": 0.3442745244311824, "grad_norm": 0.8186299204826355, "learning_rate": 2e-06, "loss": 0.2161, "step": 923 }, { "epoch": 0.34464751958224543, "grad_norm": 1.002027153968811, "learning_rate": 2e-06, "loss": 0.2168, "step": 924 }, { "epoch": 0.34502051473330847, "grad_norm": 1.2483253479003906, "learning_rate": 2e-06, "loss": 0.2271, "step": 925 }, { "epoch": 0.3453935098843715, "grad_norm": 0.9245807528495789, "learning_rate": 2e-06, "loss": 0.2002, "step": 926 }, { "epoch": 0.34576650503543455, "grad_norm": 1.2918051481246948, "learning_rate": 2e-06, "loss": 0.2103, "step": 927 }, { "epoch": 0.3461395001864976, "grad_norm": 1.0108749866485596, "learning_rate": 2e-06, "loss": 0.1907, "step": 928 }, { "epoch": 0.3465124953375606, "grad_norm": 1.1306078433990479, "learning_rate": 2e-06, "loss": 0.1871, "step": 929 }, { "epoch": 0.34688549048862366, "grad_norm": 1.1442478895187378, "learning_rate": 2e-06, "loss": 0.216, "step": 930 }, { "epoch": 0.3472584856396867, "grad_norm": 1.06812584400177, "learning_rate": 2e-06, "loss": 0.1842, "step": 931 }, { "epoch": 0.34763148079074974, "grad_norm": 0.8505949974060059, "learning_rate": 2e-06, "loss": 0.2128, "step": 932 }, { "epoch": 0.3480044759418128, "grad_norm": 0.9828668832778931, "learning_rate": 2e-06, "loss": 0.1863, "step": 933 }, { "epoch": 0.3483774710928758, "grad_norm": 0.8433536887168884, "learning_rate": 2e-06, "loss": 0.198, "step": 934 }, { "epoch": 0.34875046624393885, "grad_norm": 1.0484402179718018, "learning_rate": 2e-06, "loss": 0.1966, "step": 935 }, { "epoch": 0.3491234613950019, "grad_norm": 0.9749783277511597, "learning_rate": 2e-06, "loss": 0.2099, "step": 936 }, { "epoch": 0.3494964565460649, "grad_norm": 1.0232090950012207, "learning_rate": 2e-06, "loss": 0.2112, "step": 937 }, { "epoch": 0.34986945169712796, "grad_norm": 0.8946332335472107, "learning_rate": 2e-06, "loss": 0.1837, "step": 938 }, { "epoch": 0.35024244684819095, "grad_norm": 0.8554012179374695, "learning_rate": 2e-06, "loss": 0.2266, "step": 939 }, { "epoch": 0.350615441999254, "grad_norm": 0.9120677709579468, "learning_rate": 2e-06, "loss": 0.1854, "step": 940 }, { "epoch": 0.350988437150317, "grad_norm": 1.2240043878555298, "learning_rate": 2e-06, "loss": 0.2195, "step": 941 }, { "epoch": 0.35136143230138006, "grad_norm": 1.1926672458648682, "learning_rate": 2e-06, "loss": 0.1832, "step": 942 }, { "epoch": 0.3517344274524431, "grad_norm": 1.169490098953247, "learning_rate": 2e-06, "loss": 0.2203, "step": 943 }, { "epoch": 0.35210742260350614, "grad_norm": 0.6632615923881531, "learning_rate": 2e-06, "loss": 0.2044, "step": 944 }, { "epoch": 0.3524804177545692, "grad_norm": 1.394538164138794, "learning_rate": 2e-06, "loss": 0.1867, "step": 945 }, { "epoch": 0.3528534129056322, "grad_norm": 0.9661036133766174, "learning_rate": 2e-06, "loss": 0.1966, "step": 946 }, { "epoch": 0.35322640805669525, "grad_norm": 0.8952990770339966, "learning_rate": 2e-06, "loss": 0.2145, "step": 947 }, { "epoch": 0.3535994032077583, "grad_norm": 0.9991438388824463, "learning_rate": 2e-06, "loss": 0.181, "step": 948 }, { "epoch": 0.3539723983588213, "grad_norm": 1.1404422521591187, "learning_rate": 2e-06, "loss": 0.202, "step": 949 }, { "epoch": 0.35434539350988437, "grad_norm": 0.9403278827667236, "learning_rate": 2e-06, "loss": 0.2214, "step": 950 }, { "epoch": 0.3547183886609474, "grad_norm": 0.6493822336196899, "learning_rate": 2e-06, "loss": 0.2193, "step": 951 }, { "epoch": 0.35509138381201044, "grad_norm": 0.766869843006134, "learning_rate": 2e-06, "loss": 0.2055, "step": 952 }, { "epoch": 0.3554643789630735, "grad_norm": 0.8727012872695923, "learning_rate": 2e-06, "loss": 0.2085, "step": 953 }, { "epoch": 0.3558373741141365, "grad_norm": 1.1994620561599731, "learning_rate": 2e-06, "loss": 0.187, "step": 954 }, { "epoch": 0.35621036926519956, "grad_norm": 0.810297429561615, "learning_rate": 2e-06, "loss": 0.2117, "step": 955 }, { "epoch": 0.3565833644162626, "grad_norm": 0.9238312244415283, "learning_rate": 2e-06, "loss": 0.2053, "step": 956 }, { "epoch": 0.35695635956732563, "grad_norm": 0.920512855052948, "learning_rate": 2e-06, "loss": 0.2061, "step": 957 }, { "epoch": 0.35732935471838867, "grad_norm": 1.180568814277649, "learning_rate": 2e-06, "loss": 0.2082, "step": 958 }, { "epoch": 0.3577023498694517, "grad_norm": 1.0023415088653564, "learning_rate": 2e-06, "loss": 0.2334, "step": 959 }, { "epoch": 0.35807534502051475, "grad_norm": 0.9183721542358398, "learning_rate": 2e-06, "loss": 0.2045, "step": 960 }, { "epoch": 0.3584483401715778, "grad_norm": 0.9470187425613403, "learning_rate": 2e-06, "loss": 0.2135, "step": 961 }, { "epoch": 0.3588213353226408, "grad_norm": 1.357176661491394, "learning_rate": 2e-06, "loss": 0.1882, "step": 962 }, { "epoch": 0.35919433047370386, "grad_norm": 0.7957634925842285, "learning_rate": 2e-06, "loss": 0.2113, "step": 963 }, { "epoch": 0.3595673256247669, "grad_norm": 1.1549471616744995, "learning_rate": 2e-06, "loss": 0.2246, "step": 964 }, { "epoch": 0.35994032077582994, "grad_norm": 0.8826845288276672, "learning_rate": 2e-06, "loss": 0.2142, "step": 965 }, { "epoch": 0.360313315926893, "grad_norm": 1.143000841140747, "learning_rate": 2e-06, "loss": 0.1964, "step": 966 }, { "epoch": 0.360686311077956, "grad_norm": 0.7854561805725098, "learning_rate": 2e-06, "loss": 0.219, "step": 967 }, { "epoch": 0.36105930622901905, "grad_norm": 0.8030688166618347, "learning_rate": 2e-06, "loss": 0.2166, "step": 968 }, { "epoch": 0.36143230138008203, "grad_norm": 1.1307846307754517, "learning_rate": 2e-06, "loss": 0.1867, "step": 969 }, { "epoch": 0.36180529653114507, "grad_norm": 1.1654753684997559, "learning_rate": 2e-06, "loss": 0.2142, "step": 970 }, { "epoch": 0.3621782916822081, "grad_norm": 0.9277890920639038, "learning_rate": 2e-06, "loss": 0.2263, "step": 971 }, { "epoch": 0.36255128683327115, "grad_norm": 1.3197232484817505, "learning_rate": 2e-06, "loss": 0.201, "step": 972 }, { "epoch": 0.3629242819843342, "grad_norm": 0.9182858467102051, "learning_rate": 2e-06, "loss": 0.2072, "step": 973 }, { "epoch": 0.3632972771353972, "grad_norm": 1.1942015886306763, "learning_rate": 2e-06, "loss": 0.1907, "step": 974 }, { "epoch": 0.36367027228646026, "grad_norm": 1.0474880933761597, "learning_rate": 2e-06, "loss": 0.2105, "step": 975 }, { "epoch": 0.3640432674375233, "grad_norm": 0.9015294313430786, "learning_rate": 2e-06, "loss": 0.235, "step": 976 }, { "epoch": 0.36441626258858634, "grad_norm": 1.1072009801864624, "learning_rate": 2e-06, "loss": 0.2011, "step": 977 }, { "epoch": 0.3647892577396494, "grad_norm": 0.8931601643562317, "learning_rate": 2e-06, "loss": 0.199, "step": 978 }, { "epoch": 0.3651622528907124, "grad_norm": 1.0246127843856812, "learning_rate": 2e-06, "loss": 0.1924, "step": 979 }, { "epoch": 0.36553524804177545, "grad_norm": 0.8978980779647827, "learning_rate": 2e-06, "loss": 0.2153, "step": 980 }, { "epoch": 0.3659082431928385, "grad_norm": 0.8116791248321533, "learning_rate": 2e-06, "loss": 0.2267, "step": 981 }, { "epoch": 0.3662812383439015, "grad_norm": 0.958911120891571, "learning_rate": 2e-06, "loss": 0.2033, "step": 982 }, { "epoch": 0.36665423349496457, "grad_norm": 0.9785597920417786, "learning_rate": 2e-06, "loss": 0.2118, "step": 983 }, { "epoch": 0.3670272286460276, "grad_norm": 0.7932273745536804, "learning_rate": 2e-06, "loss": 0.204, "step": 984 }, { "epoch": 0.36740022379709064, "grad_norm": 0.9915621876716614, "learning_rate": 2e-06, "loss": 0.2153, "step": 985 }, { "epoch": 0.3677732189481537, "grad_norm": 0.8830494284629822, "learning_rate": 2e-06, "loss": 0.2028, "step": 986 }, { "epoch": 0.3681462140992167, "grad_norm": 1.1246381998062134, "learning_rate": 2e-06, "loss": 0.2164, "step": 987 }, { "epoch": 0.36851920925027976, "grad_norm": 1.0047249794006348, "learning_rate": 2e-06, "loss": 0.2132, "step": 988 }, { "epoch": 0.3688922044013428, "grad_norm": 1.0233324766159058, "learning_rate": 2e-06, "loss": 0.2226, "step": 989 }, { "epoch": 0.36926519955240583, "grad_norm": 1.4614324569702148, "learning_rate": 2e-06, "loss": 0.1997, "step": 990 }, { "epoch": 0.36963819470346887, "grad_norm": 1.2231661081314087, "learning_rate": 2e-06, "loss": 0.199, "step": 991 }, { "epoch": 0.3700111898545319, "grad_norm": 0.7069471478462219, "learning_rate": 2e-06, "loss": 0.2194, "step": 992 }, { "epoch": 0.37038418500559495, "grad_norm": 1.2101720571517944, "learning_rate": 2e-06, "loss": 0.2207, "step": 993 }, { "epoch": 0.370757180156658, "grad_norm": 0.8413933515548706, "learning_rate": 2e-06, "loss": 0.2137, "step": 994 }, { "epoch": 0.371130175307721, "grad_norm": 2.1912453174591064, "learning_rate": 2e-06, "loss": 0.1993, "step": 995 }, { "epoch": 0.37150317045878406, "grad_norm": 1.2123141288757324, "learning_rate": 2e-06, "loss": 0.2128, "step": 996 }, { "epoch": 0.3718761656098471, "grad_norm": 1.0624631643295288, "learning_rate": 2e-06, "loss": 0.1879, "step": 997 }, { "epoch": 0.3722491607609101, "grad_norm": 1.0499316453933716, "learning_rate": 2e-06, "loss": 0.2126, "step": 998 }, { "epoch": 0.3726221559119731, "grad_norm": 0.8166530132293701, "learning_rate": 2e-06, "loss": 0.2151, "step": 999 }, { "epoch": 0.37299515106303616, "grad_norm": 1.2379356622695923, "learning_rate": 2e-06, "loss": 0.2391, "step": 1000 }, { "epoch": 0.3733681462140992, "grad_norm": 1.1387557983398438, "learning_rate": 2e-06, "loss": 0.2115, "step": 1001 }, { "epoch": 0.37374114136516223, "grad_norm": 1.013697862625122, "learning_rate": 2e-06, "loss": 0.1953, "step": 1002 }, { "epoch": 0.37411413651622527, "grad_norm": 0.9432240128517151, "learning_rate": 2e-06, "loss": 0.1893, "step": 1003 }, { "epoch": 0.3744871316672883, "grad_norm": 1.2834141254425049, "learning_rate": 2e-06, "loss": 0.1991, "step": 1004 }, { "epoch": 0.37486012681835135, "grad_norm": 1.1053087711334229, "learning_rate": 2e-06, "loss": 0.1966, "step": 1005 }, { "epoch": 0.3752331219694144, "grad_norm": 0.9161782264709473, "learning_rate": 2e-06, "loss": 0.194, "step": 1006 }, { "epoch": 0.3756061171204774, "grad_norm": 0.7631995677947998, "learning_rate": 2e-06, "loss": 0.2047, "step": 1007 }, { "epoch": 0.37597911227154046, "grad_norm": 0.9678554534912109, "learning_rate": 2e-06, "loss": 0.1859, "step": 1008 }, { "epoch": 0.3763521074226035, "grad_norm": 1.4488648176193237, "learning_rate": 2e-06, "loss": 0.1913, "step": 1009 }, { "epoch": 0.37672510257366654, "grad_norm": 1.2140763998031616, "learning_rate": 2e-06, "loss": 0.2006, "step": 1010 }, { "epoch": 0.3770980977247296, "grad_norm": 1.072853922843933, "learning_rate": 2e-06, "loss": 0.2041, "step": 1011 }, { "epoch": 0.3774710928757926, "grad_norm": 0.9722952246665955, "learning_rate": 2e-06, "loss": 0.1848, "step": 1012 }, { "epoch": 0.37784408802685565, "grad_norm": 0.8668981194496155, "learning_rate": 2e-06, "loss": 0.2064, "step": 1013 }, { "epoch": 0.3782170831779187, "grad_norm": 1.0233021974563599, "learning_rate": 2e-06, "loss": 0.182, "step": 1014 }, { "epoch": 0.3785900783289817, "grad_norm": 0.7861064672470093, "learning_rate": 2e-06, "loss": 0.2276, "step": 1015 }, { "epoch": 0.37896307348004477, "grad_norm": 0.8017202615737915, "learning_rate": 2e-06, "loss": 0.203, "step": 1016 }, { "epoch": 0.3793360686311078, "grad_norm": 1.0342336893081665, "learning_rate": 2e-06, "loss": 0.2264, "step": 1017 }, { "epoch": 0.37970906378217084, "grad_norm": 1.045116662979126, "learning_rate": 2e-06, "loss": 0.2037, "step": 1018 }, { "epoch": 0.3800820589332339, "grad_norm": 0.7543050050735474, "learning_rate": 2e-06, "loss": 0.19, "step": 1019 }, { "epoch": 0.3804550540842969, "grad_norm": 0.8177738785743713, "learning_rate": 2e-06, "loss": 0.1985, "step": 1020 }, { "epoch": 0.38082804923535996, "grad_norm": 1.0118495225906372, "learning_rate": 2e-06, "loss": 0.1997, "step": 1021 }, { "epoch": 0.381201044386423, "grad_norm": 0.8239173293113708, "learning_rate": 2e-06, "loss": 0.2013, "step": 1022 }, { "epoch": 0.38157403953748603, "grad_norm": 0.8710222840309143, "learning_rate": 2e-06, "loss": 0.1884, "step": 1023 }, { "epoch": 0.38194703468854907, "grad_norm": 0.7206466197967529, "learning_rate": 2e-06, "loss": 0.1943, "step": 1024 }, { "epoch": 0.3823200298396121, "grad_norm": 1.2451677322387695, "learning_rate": 2e-06, "loss": 0.2036, "step": 1025 }, { "epoch": 0.38269302499067515, "grad_norm": 1.1192114353179932, "learning_rate": 2e-06, "loss": 0.1983, "step": 1026 }, { "epoch": 0.3830660201417382, "grad_norm": 0.9100322127342224, "learning_rate": 2e-06, "loss": 0.2065, "step": 1027 }, { "epoch": 0.38343901529280117, "grad_norm": 0.9208992719650269, "learning_rate": 2e-06, "loss": 0.2213, "step": 1028 }, { "epoch": 0.3838120104438642, "grad_norm": 1.1659595966339111, "learning_rate": 2e-06, "loss": 0.2073, "step": 1029 }, { "epoch": 0.38418500559492724, "grad_norm": 1.1880601644515991, "learning_rate": 2e-06, "loss": 0.2019, "step": 1030 }, { "epoch": 0.3845580007459903, "grad_norm": 0.9839760065078735, "learning_rate": 2e-06, "loss": 0.2046, "step": 1031 }, { "epoch": 0.3849309958970533, "grad_norm": 0.9602760672569275, "learning_rate": 2e-06, "loss": 0.1973, "step": 1032 }, { "epoch": 0.38530399104811636, "grad_norm": 1.1350632905960083, "learning_rate": 2e-06, "loss": 0.216, "step": 1033 }, { "epoch": 0.3856769861991794, "grad_norm": 1.0314005613327026, "learning_rate": 2e-06, "loss": 0.1775, "step": 1034 }, { "epoch": 0.38604998135024243, "grad_norm": 1.2765389680862427, "learning_rate": 2e-06, "loss": 0.2044, "step": 1035 }, { "epoch": 0.38642297650130547, "grad_norm": 1.0244998931884766, "learning_rate": 2e-06, "loss": 0.1776, "step": 1036 }, { "epoch": 0.3867959716523685, "grad_norm": 0.8764463663101196, "learning_rate": 2e-06, "loss": 0.2179, "step": 1037 }, { "epoch": 0.38716896680343155, "grad_norm": 0.8377415537834167, "learning_rate": 2e-06, "loss": 0.1991, "step": 1038 }, { "epoch": 0.3875419619544946, "grad_norm": 0.8251004815101624, "learning_rate": 2e-06, "loss": 0.1992, "step": 1039 }, { "epoch": 0.3879149571055576, "grad_norm": 0.9268501400947571, "learning_rate": 2e-06, "loss": 0.1921, "step": 1040 }, { "epoch": 0.38828795225662066, "grad_norm": 0.8755983114242554, "learning_rate": 2e-06, "loss": 0.2054, "step": 1041 }, { "epoch": 0.3886609474076837, "grad_norm": 0.8545960783958435, "learning_rate": 2e-06, "loss": 0.1953, "step": 1042 }, { "epoch": 0.38903394255874674, "grad_norm": 0.9620019793510437, "learning_rate": 2e-06, "loss": 0.2161, "step": 1043 }, { "epoch": 0.3894069377098098, "grad_norm": 0.9597080945968628, "learning_rate": 2e-06, "loss": 0.1811, "step": 1044 }, { "epoch": 0.3897799328608728, "grad_norm": 0.9461061358451843, "learning_rate": 2e-06, "loss": 0.1831, "step": 1045 }, { "epoch": 0.39015292801193585, "grad_norm": 0.8956647515296936, "learning_rate": 2e-06, "loss": 0.2233, "step": 1046 }, { "epoch": 0.3905259231629989, "grad_norm": 0.9699547290802002, "learning_rate": 2e-06, "loss": 0.1889, "step": 1047 }, { "epoch": 0.3908989183140619, "grad_norm": 0.8877326250076294, "learning_rate": 2e-06, "loss": 0.224, "step": 1048 }, { "epoch": 0.39127191346512497, "grad_norm": 0.9201275110244751, "learning_rate": 2e-06, "loss": 0.2102, "step": 1049 }, { "epoch": 0.391644908616188, "grad_norm": 1.0693944692611694, "learning_rate": 2e-06, "loss": 0.2011, "step": 1050 }, { "epoch": 0.39201790376725104, "grad_norm": 1.1724352836608887, "learning_rate": 2e-06, "loss": 0.192, "step": 1051 }, { "epoch": 0.3923908989183141, "grad_norm": 0.9381037950515747, "learning_rate": 2e-06, "loss": 0.207, "step": 1052 }, { "epoch": 0.3927638940693771, "grad_norm": 1.0867573022842407, "learning_rate": 2e-06, "loss": 0.1999, "step": 1053 }, { "epoch": 0.39313688922044016, "grad_norm": 0.942021906375885, "learning_rate": 2e-06, "loss": 0.2042, "step": 1054 }, { "epoch": 0.3935098843715032, "grad_norm": 1.2214759588241577, "learning_rate": 2e-06, "loss": 0.1852, "step": 1055 }, { "epoch": 0.39388287952256623, "grad_norm": 0.7655478715896606, "learning_rate": 2e-06, "loss": 0.2191, "step": 1056 }, { "epoch": 0.39425587467362927, "grad_norm": 0.7545295357704163, "learning_rate": 2e-06, "loss": 0.1925, "step": 1057 }, { "epoch": 0.39462886982469225, "grad_norm": 1.0130598545074463, "learning_rate": 2e-06, "loss": 0.2139, "step": 1058 }, { "epoch": 0.3950018649757553, "grad_norm": 0.812484860420227, "learning_rate": 2e-06, "loss": 0.2171, "step": 1059 }, { "epoch": 0.39537486012681833, "grad_norm": 1.1503957509994507, "learning_rate": 2e-06, "loss": 0.1945, "step": 1060 }, { "epoch": 0.39574785527788137, "grad_norm": 0.9575464129447937, "learning_rate": 2e-06, "loss": 0.2049, "step": 1061 }, { "epoch": 0.3961208504289444, "grad_norm": 0.9507084488868713, "learning_rate": 2e-06, "loss": 0.2229, "step": 1062 }, { "epoch": 0.39649384558000744, "grad_norm": 1.0455634593963623, "learning_rate": 2e-06, "loss": 0.2116, "step": 1063 }, { "epoch": 0.3968668407310705, "grad_norm": 0.8971614837646484, "learning_rate": 2e-06, "loss": 0.1952, "step": 1064 }, { "epoch": 0.3972398358821335, "grad_norm": 0.9910494089126587, "learning_rate": 2e-06, "loss": 0.1889, "step": 1065 }, { "epoch": 0.39761283103319656, "grad_norm": 0.8379992246627808, "learning_rate": 2e-06, "loss": 0.189, "step": 1066 }, { "epoch": 0.3979858261842596, "grad_norm": 1.0211981534957886, "learning_rate": 2e-06, "loss": 0.2176, "step": 1067 }, { "epoch": 0.39835882133532263, "grad_norm": 1.0475550889968872, "learning_rate": 2e-06, "loss": 0.1913, "step": 1068 }, { "epoch": 0.39873181648638567, "grad_norm": 0.8243854641914368, "learning_rate": 2e-06, "loss": 0.192, "step": 1069 }, { "epoch": 0.3991048116374487, "grad_norm": 0.971701979637146, "learning_rate": 2e-06, "loss": 0.2115, "step": 1070 }, { "epoch": 0.39947780678851175, "grad_norm": 0.7622646689414978, "learning_rate": 2e-06, "loss": 0.2166, "step": 1071 }, { "epoch": 0.3998508019395748, "grad_norm": 1.1252025365829468, "learning_rate": 2e-06, "loss": 0.2065, "step": 1072 }, { "epoch": 0.4002237970906378, "grad_norm": 0.9411959052085876, "learning_rate": 2e-06, "loss": 0.2322, "step": 1073 }, { "epoch": 0.40059679224170086, "grad_norm": 0.776604175567627, "learning_rate": 2e-06, "loss": 0.1866, "step": 1074 }, { "epoch": 0.4009697873927639, "grad_norm": 1.5085855722427368, "learning_rate": 2e-06, "loss": 0.1687, "step": 1075 }, { "epoch": 0.40134278254382694, "grad_norm": 1.1930238008499146, "learning_rate": 2e-06, "loss": 0.2409, "step": 1076 }, { "epoch": 0.40171577769489, "grad_norm": 0.9021043181419373, "learning_rate": 2e-06, "loss": 0.2133, "step": 1077 }, { "epoch": 0.402088772845953, "grad_norm": 1.103205680847168, "learning_rate": 2e-06, "loss": 0.2015, "step": 1078 }, { "epoch": 0.40246176799701605, "grad_norm": 1.0273627042770386, "learning_rate": 2e-06, "loss": 0.1979, "step": 1079 }, { "epoch": 0.4028347631480791, "grad_norm": 0.7992609739303589, "learning_rate": 2e-06, "loss": 0.224, "step": 1080 }, { "epoch": 0.4032077582991421, "grad_norm": 0.8837320804595947, "learning_rate": 2e-06, "loss": 0.22, "step": 1081 }, { "epoch": 0.40358075345020517, "grad_norm": 0.7483212947845459, "learning_rate": 2e-06, "loss": 0.2171, "step": 1082 }, { "epoch": 0.4039537486012682, "grad_norm": 1.1376221179962158, "learning_rate": 2e-06, "loss": 0.2225, "step": 1083 }, { "epoch": 0.40432674375233124, "grad_norm": 0.9611302614212036, "learning_rate": 2e-06, "loss": 0.1778, "step": 1084 }, { "epoch": 0.4046997389033943, "grad_norm": 0.8824406266212463, "learning_rate": 2e-06, "loss": 0.2053, "step": 1085 }, { "epoch": 0.4050727340544573, "grad_norm": 1.2256181240081787, "learning_rate": 2e-06, "loss": 0.2167, "step": 1086 }, { "epoch": 0.40544572920552036, "grad_norm": 1.028106927871704, "learning_rate": 2e-06, "loss": 0.1906, "step": 1087 }, { "epoch": 0.40581872435658334, "grad_norm": 0.8158109188079834, "learning_rate": 2e-06, "loss": 0.208, "step": 1088 }, { "epoch": 0.4061917195076464, "grad_norm": 0.8966194987297058, "learning_rate": 2e-06, "loss": 0.2142, "step": 1089 }, { "epoch": 0.4065647146587094, "grad_norm": 0.9066256880760193, "learning_rate": 2e-06, "loss": 0.2005, "step": 1090 }, { "epoch": 0.40693770980977245, "grad_norm": 1.0050798654556274, "learning_rate": 2e-06, "loss": 0.1951, "step": 1091 }, { "epoch": 0.4073107049608355, "grad_norm": 1.070601463317871, "learning_rate": 2e-06, "loss": 0.2134, "step": 1092 }, { "epoch": 0.40768370011189853, "grad_norm": 0.8856233358383179, "learning_rate": 2e-06, "loss": 0.2101, "step": 1093 }, { "epoch": 0.40805669526296157, "grad_norm": 1.0327688455581665, "learning_rate": 2e-06, "loss": 0.2406, "step": 1094 }, { "epoch": 0.4084296904140246, "grad_norm": 1.12840735912323, "learning_rate": 2e-06, "loss": 0.1721, "step": 1095 }, { "epoch": 0.40880268556508764, "grad_norm": 0.8932473063468933, "learning_rate": 2e-06, "loss": 0.2191, "step": 1096 }, { "epoch": 0.4091756807161507, "grad_norm": 1.1275701522827148, "learning_rate": 2e-06, "loss": 0.2014, "step": 1097 }, { "epoch": 0.4095486758672137, "grad_norm": 0.9176378846168518, "learning_rate": 2e-06, "loss": 0.2139, "step": 1098 }, { "epoch": 0.40992167101827676, "grad_norm": 0.9862965941429138, "learning_rate": 2e-06, "loss": 0.192, "step": 1099 }, { "epoch": 0.4102946661693398, "grad_norm": 1.0645536184310913, "learning_rate": 2e-06, "loss": 0.2263, "step": 1100 }, { "epoch": 0.41066766132040283, "grad_norm": 1.0197139978408813, "learning_rate": 2e-06, "loss": 0.1987, "step": 1101 }, { "epoch": 0.41104065647146587, "grad_norm": 0.9115609526634216, "learning_rate": 2e-06, "loss": 0.1947, "step": 1102 }, { "epoch": 0.4114136516225289, "grad_norm": 0.8956945538520813, "learning_rate": 2e-06, "loss": 0.2114, "step": 1103 }, { "epoch": 0.41178664677359195, "grad_norm": 0.8169106841087341, "learning_rate": 2e-06, "loss": 0.2256, "step": 1104 }, { "epoch": 0.412159641924655, "grad_norm": 0.997282087802887, "learning_rate": 2e-06, "loss": 0.2062, "step": 1105 }, { "epoch": 0.412532637075718, "grad_norm": 0.8512946367263794, "learning_rate": 2e-06, "loss": 0.2173, "step": 1106 }, { "epoch": 0.41290563222678106, "grad_norm": 0.9116448163986206, "learning_rate": 2e-06, "loss": 0.19, "step": 1107 }, { "epoch": 0.4132786273778441, "grad_norm": 0.9331592917442322, "learning_rate": 2e-06, "loss": 0.2084, "step": 1108 }, { "epoch": 0.41365162252890714, "grad_norm": 0.9012691974639893, "learning_rate": 2e-06, "loss": 0.1535, "step": 1109 }, { "epoch": 0.4140246176799702, "grad_norm": 0.9810705780982971, "learning_rate": 2e-06, "loss": 0.1601, "step": 1110 }, { "epoch": 0.4143976128310332, "grad_norm": 1.111215591430664, "learning_rate": 2e-06, "loss": 0.2013, "step": 1111 }, { "epoch": 0.41477060798209625, "grad_norm": 0.9458308219909668, "learning_rate": 2e-06, "loss": 0.2103, "step": 1112 }, { "epoch": 0.4151436031331593, "grad_norm": 0.749416172504425, "learning_rate": 2e-06, "loss": 0.2016, "step": 1113 }, { "epoch": 0.4155165982842223, "grad_norm": 0.9493358135223389, "learning_rate": 2e-06, "loss": 0.1983, "step": 1114 }, { "epoch": 0.41588959343528537, "grad_norm": 1.231197714805603, "learning_rate": 2e-06, "loss": 0.2203, "step": 1115 }, { "epoch": 0.4162625885863484, "grad_norm": 1.8180030584335327, "learning_rate": 2e-06, "loss": 0.2056, "step": 1116 }, { "epoch": 0.41663558373741144, "grad_norm": 1.1614389419555664, "learning_rate": 2e-06, "loss": 0.193, "step": 1117 }, { "epoch": 0.4170085788884744, "grad_norm": 1.1172637939453125, "learning_rate": 2e-06, "loss": 0.2096, "step": 1118 }, { "epoch": 0.41738157403953746, "grad_norm": 0.9282588362693787, "learning_rate": 2e-06, "loss": 0.2003, "step": 1119 }, { "epoch": 0.4177545691906005, "grad_norm": 1.3172205686569214, "learning_rate": 2e-06, "loss": 0.197, "step": 1120 }, { "epoch": 0.41812756434166354, "grad_norm": 0.8852254152297974, "learning_rate": 2e-06, "loss": 0.2308, "step": 1121 }, { "epoch": 0.4185005594927266, "grad_norm": 0.8737136125564575, "learning_rate": 2e-06, "loss": 0.2233, "step": 1122 }, { "epoch": 0.4188735546437896, "grad_norm": 1.29403817653656, "learning_rate": 2e-06, "loss": 0.1984, "step": 1123 }, { "epoch": 0.41924654979485265, "grad_norm": 1.2233723402023315, "learning_rate": 2e-06, "loss": 0.1912, "step": 1124 }, { "epoch": 0.4196195449459157, "grad_norm": 0.8161196708679199, "learning_rate": 2e-06, "loss": 0.2003, "step": 1125 }, { "epoch": 0.41999254009697873, "grad_norm": 0.8447216153144836, "learning_rate": 2e-06, "loss": 0.2075, "step": 1126 }, { "epoch": 0.42036553524804177, "grad_norm": 0.7853298187255859, "learning_rate": 2e-06, "loss": 0.2243, "step": 1127 }, { "epoch": 0.4207385303991048, "grad_norm": 0.8618863821029663, "learning_rate": 2e-06, "loss": 0.2035, "step": 1128 }, { "epoch": 0.42111152555016784, "grad_norm": 0.9011510610580444, "learning_rate": 2e-06, "loss": 0.1932, "step": 1129 }, { "epoch": 0.4214845207012309, "grad_norm": 0.9198399782180786, "learning_rate": 2e-06, "loss": 0.2326, "step": 1130 }, { "epoch": 0.4218575158522939, "grad_norm": 1.2043936252593994, "learning_rate": 2e-06, "loss": 0.2127, "step": 1131 }, { "epoch": 0.42223051100335696, "grad_norm": 1.2111618518829346, "learning_rate": 2e-06, "loss": 0.2084, "step": 1132 }, { "epoch": 0.42260350615442, "grad_norm": 0.8306072950363159, "learning_rate": 2e-06, "loss": 0.2006, "step": 1133 }, { "epoch": 0.42297650130548303, "grad_norm": 0.8545385599136353, "learning_rate": 2e-06, "loss": 0.2039, "step": 1134 }, { "epoch": 0.42334949645654607, "grad_norm": 0.7228959798812866, "learning_rate": 2e-06, "loss": 0.2188, "step": 1135 }, { "epoch": 0.4237224916076091, "grad_norm": 0.8552800416946411, "learning_rate": 2e-06, "loss": 0.225, "step": 1136 }, { "epoch": 0.42409548675867215, "grad_norm": 1.1903761625289917, "learning_rate": 2e-06, "loss": 0.1935, "step": 1137 }, { "epoch": 0.4244684819097352, "grad_norm": 1.2101848125457764, "learning_rate": 2e-06, "loss": 0.1987, "step": 1138 }, { "epoch": 0.4248414770607982, "grad_norm": 0.9158127903938293, "learning_rate": 2e-06, "loss": 0.1935, "step": 1139 }, { "epoch": 0.42521447221186126, "grad_norm": 1.1767957210540771, "learning_rate": 2e-06, "loss": 0.2156, "step": 1140 }, { "epoch": 0.4255874673629243, "grad_norm": 0.9325686693191528, "learning_rate": 2e-06, "loss": 0.1999, "step": 1141 }, { "epoch": 0.42596046251398734, "grad_norm": 1.08431077003479, "learning_rate": 2e-06, "loss": 0.1894, "step": 1142 }, { "epoch": 0.4263334576650504, "grad_norm": 0.9355331063270569, "learning_rate": 2e-06, "loss": 0.2069, "step": 1143 }, { "epoch": 0.4267064528161134, "grad_norm": 1.023769497871399, "learning_rate": 2e-06, "loss": 0.1966, "step": 1144 }, { "epoch": 0.42707944796717645, "grad_norm": 0.9405871629714966, "learning_rate": 2e-06, "loss": 0.2115, "step": 1145 }, { "epoch": 0.4274524431182395, "grad_norm": 0.7977302670478821, "learning_rate": 2e-06, "loss": 0.1886, "step": 1146 }, { "epoch": 0.42782543826930247, "grad_norm": 1.351109504699707, "learning_rate": 2e-06, "loss": 0.2229, "step": 1147 }, { "epoch": 0.4281984334203655, "grad_norm": 0.9311838150024414, "learning_rate": 2e-06, "loss": 0.2176, "step": 1148 }, { "epoch": 0.42857142857142855, "grad_norm": 1.2950161695480347, "learning_rate": 2e-06, "loss": 0.2022, "step": 1149 }, { "epoch": 0.4289444237224916, "grad_norm": 1.0183019638061523, "learning_rate": 2e-06, "loss": 0.2233, "step": 1150 }, { "epoch": 0.4293174188735546, "grad_norm": 1.1832247972488403, "learning_rate": 2e-06, "loss": 0.1879, "step": 1151 }, { "epoch": 0.42969041402461766, "grad_norm": 0.9124091863632202, "learning_rate": 2e-06, "loss": 0.2115, "step": 1152 }, { "epoch": 0.4300634091756807, "grad_norm": 0.883520245552063, "learning_rate": 2e-06, "loss": 0.1919, "step": 1153 }, { "epoch": 0.43043640432674374, "grad_norm": 0.856471598148346, "learning_rate": 2e-06, "loss": 0.1975, "step": 1154 }, { "epoch": 0.4308093994778068, "grad_norm": 1.0867767333984375, "learning_rate": 2e-06, "loss": 0.1887, "step": 1155 }, { "epoch": 0.4311823946288698, "grad_norm": 1.0188170671463013, "learning_rate": 2e-06, "loss": 0.1986, "step": 1156 }, { "epoch": 0.43155538977993285, "grad_norm": 0.8533804416656494, "learning_rate": 2e-06, "loss": 0.1959, "step": 1157 }, { "epoch": 0.4319283849309959, "grad_norm": 0.8642366528511047, "learning_rate": 2e-06, "loss": 0.1854, "step": 1158 }, { "epoch": 0.43230138008205893, "grad_norm": 1.1846740245819092, "learning_rate": 2e-06, "loss": 0.192, "step": 1159 }, { "epoch": 0.43267437523312197, "grad_norm": 0.729647696018219, "learning_rate": 2e-06, "loss": 0.196, "step": 1160 }, { "epoch": 0.433047370384185, "grad_norm": 0.8862849473953247, "learning_rate": 2e-06, "loss": 0.1962, "step": 1161 }, { "epoch": 0.43342036553524804, "grad_norm": 1.0529558658599854, "learning_rate": 2e-06, "loss": 0.1938, "step": 1162 }, { "epoch": 0.4337933606863111, "grad_norm": 0.951438844203949, "learning_rate": 2e-06, "loss": 0.2155, "step": 1163 }, { "epoch": 0.4341663558373741, "grad_norm": 0.9991082549095154, "learning_rate": 2e-06, "loss": 0.1921, "step": 1164 }, { "epoch": 0.43453935098843716, "grad_norm": 0.8230155110359192, "learning_rate": 2e-06, "loss": 0.2098, "step": 1165 }, { "epoch": 0.4349123461395002, "grad_norm": 1.0034247636795044, "learning_rate": 2e-06, "loss": 0.1992, "step": 1166 }, { "epoch": 0.43528534129056323, "grad_norm": 0.9694140553474426, "learning_rate": 2e-06, "loss": 0.1923, "step": 1167 }, { "epoch": 0.43565833644162627, "grad_norm": 0.7878644466400146, "learning_rate": 2e-06, "loss": 0.1975, "step": 1168 }, { "epoch": 0.4360313315926893, "grad_norm": 1.1365151405334473, "learning_rate": 2e-06, "loss": 0.2055, "step": 1169 }, { "epoch": 0.43640432674375235, "grad_norm": 0.8278276324272156, "learning_rate": 2e-06, "loss": 0.201, "step": 1170 }, { "epoch": 0.4367773218948154, "grad_norm": 0.8620278239250183, "learning_rate": 2e-06, "loss": 0.2072, "step": 1171 }, { "epoch": 0.4371503170458784, "grad_norm": 0.8129813075065613, "learning_rate": 2e-06, "loss": 0.1783, "step": 1172 }, { "epoch": 0.43752331219694146, "grad_norm": 0.8732791543006897, "learning_rate": 2e-06, "loss": 0.2007, "step": 1173 }, { "epoch": 0.4378963073480045, "grad_norm": 1.2515945434570312, "learning_rate": 2e-06, "loss": 0.1833, "step": 1174 }, { "epoch": 0.43826930249906754, "grad_norm": 1.0286751985549927, "learning_rate": 2e-06, "loss": 0.2026, "step": 1175 }, { "epoch": 0.4386422976501306, "grad_norm": 0.8698471784591675, "learning_rate": 2e-06, "loss": 0.2193, "step": 1176 }, { "epoch": 0.43901529280119356, "grad_norm": 0.9615607857704163, "learning_rate": 2e-06, "loss": 0.1957, "step": 1177 }, { "epoch": 0.4393882879522566, "grad_norm": 1.0892114639282227, "learning_rate": 2e-06, "loss": 0.2058, "step": 1178 }, { "epoch": 0.43976128310331963, "grad_norm": 1.5775411128997803, "learning_rate": 2e-06, "loss": 0.2161, "step": 1179 }, { "epoch": 0.44013427825438267, "grad_norm": 1.502206563949585, "learning_rate": 2e-06, "loss": 0.2043, "step": 1180 }, { "epoch": 0.4405072734054457, "grad_norm": 0.8389080762863159, "learning_rate": 2e-06, "loss": 0.2002, "step": 1181 }, { "epoch": 0.44088026855650875, "grad_norm": 0.7543381452560425, "learning_rate": 2e-06, "loss": 0.2241, "step": 1182 }, { "epoch": 0.4412532637075718, "grad_norm": 0.9248595833778381, "learning_rate": 2e-06, "loss": 0.185, "step": 1183 }, { "epoch": 0.4416262588586348, "grad_norm": 1.2829439640045166, "learning_rate": 2e-06, "loss": 0.2267, "step": 1184 }, { "epoch": 0.44199925400969786, "grad_norm": 0.8245465159416199, "learning_rate": 2e-06, "loss": 0.2437, "step": 1185 }, { "epoch": 0.4423722491607609, "grad_norm": 1.3498553037643433, "learning_rate": 2e-06, "loss": 0.1784, "step": 1186 }, { "epoch": 0.44274524431182394, "grad_norm": 0.9507748484611511, "learning_rate": 2e-06, "loss": 0.2128, "step": 1187 }, { "epoch": 0.443118239462887, "grad_norm": 1.0368828773498535, "learning_rate": 2e-06, "loss": 0.1876, "step": 1188 }, { "epoch": 0.44349123461395, "grad_norm": 1.1140867471694946, "learning_rate": 2e-06, "loss": 0.2427, "step": 1189 }, { "epoch": 0.44386422976501305, "grad_norm": 0.9097875356674194, "learning_rate": 2e-06, "loss": 0.2174, "step": 1190 }, { "epoch": 0.4442372249160761, "grad_norm": 0.8458914756774902, "learning_rate": 2e-06, "loss": 0.1945, "step": 1191 }, { "epoch": 0.44461022006713913, "grad_norm": 1.018532395362854, "learning_rate": 2e-06, "loss": 0.1932, "step": 1192 }, { "epoch": 0.44498321521820217, "grad_norm": 0.8694757223129272, "learning_rate": 2e-06, "loss": 0.2132, "step": 1193 }, { "epoch": 0.4453562103692652, "grad_norm": 0.8838082551956177, "learning_rate": 2e-06, "loss": 0.2116, "step": 1194 }, { "epoch": 0.44572920552032824, "grad_norm": 1.5297846794128418, "learning_rate": 2e-06, "loss": 0.2152, "step": 1195 }, { "epoch": 0.4461022006713913, "grad_norm": 0.9124077558517456, "learning_rate": 2e-06, "loss": 0.2085, "step": 1196 }, { "epoch": 0.4464751958224543, "grad_norm": 1.0279031991958618, "learning_rate": 2e-06, "loss": 0.1874, "step": 1197 }, { "epoch": 0.44684819097351736, "grad_norm": 0.9571466445922852, "learning_rate": 2e-06, "loss": 0.1896, "step": 1198 }, { "epoch": 0.4472211861245804, "grad_norm": 1.042912244796753, "learning_rate": 2e-06, "loss": 0.2028, "step": 1199 }, { "epoch": 0.44759418127564343, "grad_norm": 0.8258451819419861, "learning_rate": 2e-06, "loss": 0.1913, "step": 1200 }, { "epoch": 0.44796717642670647, "grad_norm": 0.8307275772094727, "learning_rate": 2e-06, "loss": 0.2241, "step": 1201 }, { "epoch": 0.4483401715777695, "grad_norm": 0.9376955628395081, "learning_rate": 2e-06, "loss": 0.2095, "step": 1202 }, { "epoch": 0.44871316672883255, "grad_norm": 1.3658615350723267, "learning_rate": 2e-06, "loss": 0.1647, "step": 1203 }, { "epoch": 0.4490861618798956, "grad_norm": 0.9630830883979797, "learning_rate": 2e-06, "loss": 0.1837, "step": 1204 }, { "epoch": 0.4494591570309586, "grad_norm": 0.7662046551704407, "learning_rate": 2e-06, "loss": 0.1762, "step": 1205 }, { "epoch": 0.44983215218202166, "grad_norm": 0.8443440198898315, "learning_rate": 2e-06, "loss": 0.2211, "step": 1206 }, { "epoch": 0.45020514733308464, "grad_norm": 0.8689948916435242, "learning_rate": 2e-06, "loss": 0.2082, "step": 1207 }, { "epoch": 0.4505781424841477, "grad_norm": 0.9000419974327087, "learning_rate": 2e-06, "loss": 0.196, "step": 1208 }, { "epoch": 0.4509511376352107, "grad_norm": 0.9030123949050903, "learning_rate": 2e-06, "loss": 0.2199, "step": 1209 }, { "epoch": 0.45132413278627376, "grad_norm": 0.9772928357124329, "learning_rate": 2e-06, "loss": 0.2128, "step": 1210 }, { "epoch": 0.4516971279373368, "grad_norm": 0.8974233269691467, "learning_rate": 2e-06, "loss": 0.2072, "step": 1211 }, { "epoch": 0.45207012308839983, "grad_norm": 0.9305419921875, "learning_rate": 2e-06, "loss": 0.1875, "step": 1212 }, { "epoch": 0.45244311823946287, "grad_norm": 1.0223610401153564, "learning_rate": 2e-06, "loss": 0.2106, "step": 1213 }, { "epoch": 0.4528161133905259, "grad_norm": 1.1119695901870728, "learning_rate": 2e-06, "loss": 0.2258, "step": 1214 }, { "epoch": 0.45318910854158895, "grad_norm": 0.8943946957588196, "learning_rate": 2e-06, "loss": 0.1989, "step": 1215 }, { "epoch": 0.453562103692652, "grad_norm": 0.9191533327102661, "learning_rate": 2e-06, "loss": 0.2013, "step": 1216 }, { "epoch": 0.453935098843715, "grad_norm": 0.7936472296714783, "learning_rate": 2e-06, "loss": 0.2131, "step": 1217 }, { "epoch": 0.45430809399477806, "grad_norm": 0.8428422212600708, "learning_rate": 2e-06, "loss": 0.2402, "step": 1218 }, { "epoch": 0.4546810891458411, "grad_norm": 0.8305052518844604, "learning_rate": 2e-06, "loss": 0.2223, "step": 1219 }, { "epoch": 0.45505408429690414, "grad_norm": 0.9498112201690674, "learning_rate": 2e-06, "loss": 0.184, "step": 1220 }, { "epoch": 0.4554270794479672, "grad_norm": 1.0248947143554688, "learning_rate": 2e-06, "loss": 0.1727, "step": 1221 }, { "epoch": 0.4558000745990302, "grad_norm": 0.8293464779853821, "learning_rate": 2e-06, "loss": 0.1737, "step": 1222 }, { "epoch": 0.45617306975009325, "grad_norm": 0.8387755155563354, "learning_rate": 2e-06, "loss": 0.2043, "step": 1223 }, { "epoch": 0.4565460649011563, "grad_norm": 0.7865810990333557, "learning_rate": 2e-06, "loss": 0.2141, "step": 1224 }, { "epoch": 0.45691906005221933, "grad_norm": 1.2172054052352905, "learning_rate": 2e-06, "loss": 0.1963, "step": 1225 }, { "epoch": 0.45729205520328237, "grad_norm": 0.7549672722816467, "learning_rate": 2e-06, "loss": 0.1984, "step": 1226 }, { "epoch": 0.4576650503543454, "grad_norm": 0.986049234867096, "learning_rate": 2e-06, "loss": 0.1993, "step": 1227 }, { "epoch": 0.45803804550540844, "grad_norm": 0.9823955297470093, "learning_rate": 2e-06, "loss": 0.2436, "step": 1228 }, { "epoch": 0.4584110406564715, "grad_norm": 0.7612717747688293, "learning_rate": 2e-06, "loss": 0.2042, "step": 1229 }, { "epoch": 0.4587840358075345, "grad_norm": 1.4066591262817383, "learning_rate": 2e-06, "loss": 0.1957, "step": 1230 }, { "epoch": 0.45915703095859756, "grad_norm": 0.8428547382354736, "learning_rate": 2e-06, "loss": 0.193, "step": 1231 }, { "epoch": 0.4595300261096606, "grad_norm": 0.766061007976532, "learning_rate": 2e-06, "loss": 0.1949, "step": 1232 }, { "epoch": 0.45990302126072363, "grad_norm": 0.8001962304115295, "learning_rate": 2e-06, "loss": 0.2314, "step": 1233 }, { "epoch": 0.46027601641178667, "grad_norm": 0.9091906547546387, "learning_rate": 2e-06, "loss": 0.1879, "step": 1234 }, { "epoch": 0.4606490115628497, "grad_norm": 0.9744974970817566, "learning_rate": 2e-06, "loss": 0.2034, "step": 1235 }, { "epoch": 0.46102200671391275, "grad_norm": 0.9809953570365906, "learning_rate": 2e-06, "loss": 0.2041, "step": 1236 }, { "epoch": 0.46139500186497573, "grad_norm": 1.0632573366165161, "learning_rate": 2e-06, "loss": 0.1986, "step": 1237 }, { "epoch": 0.46176799701603877, "grad_norm": 1.088507890701294, "learning_rate": 2e-06, "loss": 0.1925, "step": 1238 }, { "epoch": 0.4621409921671018, "grad_norm": 0.9407961964607239, "learning_rate": 2e-06, "loss": 0.194, "step": 1239 }, { "epoch": 0.46251398731816484, "grad_norm": 1.0338878631591797, "learning_rate": 2e-06, "loss": 0.2008, "step": 1240 }, { "epoch": 0.4628869824692279, "grad_norm": 0.9037163257598877, "learning_rate": 2e-06, "loss": 0.1884, "step": 1241 }, { "epoch": 0.4632599776202909, "grad_norm": 0.848834216594696, "learning_rate": 2e-06, "loss": 0.2015, "step": 1242 }, { "epoch": 0.46363297277135396, "grad_norm": 1.0716123580932617, "learning_rate": 2e-06, "loss": 0.2067, "step": 1243 }, { "epoch": 0.464005967922417, "grad_norm": 0.9680582880973816, "learning_rate": 2e-06, "loss": 0.192, "step": 1244 }, { "epoch": 0.46437896307348003, "grad_norm": 1.0419069528579712, "learning_rate": 2e-06, "loss": 0.1958, "step": 1245 }, { "epoch": 0.46475195822454307, "grad_norm": 0.9313234090805054, "learning_rate": 2e-06, "loss": 0.2079, "step": 1246 }, { "epoch": 0.4651249533756061, "grad_norm": 0.885547935962677, "learning_rate": 2e-06, "loss": 0.2082, "step": 1247 }, { "epoch": 0.46549794852666915, "grad_norm": 1.0875593423843384, "learning_rate": 2e-06, "loss": 0.1873, "step": 1248 }, { "epoch": 0.4658709436777322, "grad_norm": 0.7914965152740479, "learning_rate": 2e-06, "loss": 0.2115, "step": 1249 }, { "epoch": 0.4662439388287952, "grad_norm": 1.111791968345642, "learning_rate": 2e-06, "loss": 0.2175, "step": 1250 }, { "epoch": 0.46661693397985826, "grad_norm": 0.9594305157661438, "learning_rate": 2e-06, "loss": 0.1865, "step": 1251 }, { "epoch": 0.4669899291309213, "grad_norm": 0.8451018333435059, "learning_rate": 2e-06, "loss": 0.1972, "step": 1252 }, { "epoch": 0.46736292428198434, "grad_norm": 0.9783252477645874, "learning_rate": 2e-06, "loss": 0.2076, "step": 1253 }, { "epoch": 0.4677359194330474, "grad_norm": 0.8618592023849487, "learning_rate": 2e-06, "loss": 0.1875, "step": 1254 }, { "epoch": 0.4681089145841104, "grad_norm": 0.9775280952453613, "learning_rate": 2e-06, "loss": 0.2031, "step": 1255 }, { "epoch": 0.46848190973517345, "grad_norm": 1.072272777557373, "learning_rate": 2e-06, "loss": 0.2103, "step": 1256 }, { "epoch": 0.4688549048862365, "grad_norm": 1.264736294746399, "learning_rate": 2e-06, "loss": 0.181, "step": 1257 }, { "epoch": 0.46922790003729953, "grad_norm": 1.0123893022537231, "learning_rate": 2e-06, "loss": 0.1884, "step": 1258 }, { "epoch": 0.46960089518836257, "grad_norm": 1.0246769189834595, "learning_rate": 2e-06, "loss": 0.1975, "step": 1259 }, { "epoch": 0.4699738903394256, "grad_norm": 1.1821331977844238, "learning_rate": 2e-06, "loss": 0.1907, "step": 1260 }, { "epoch": 0.47034688549048864, "grad_norm": 0.8959387540817261, "learning_rate": 2e-06, "loss": 0.2003, "step": 1261 }, { "epoch": 0.4707198806415517, "grad_norm": 1.0930583477020264, "learning_rate": 2e-06, "loss": 0.2075, "step": 1262 }, { "epoch": 0.4710928757926147, "grad_norm": 1.0616998672485352, "learning_rate": 2e-06, "loss": 0.1958, "step": 1263 }, { "epoch": 0.47146587094367776, "grad_norm": 0.9349974989891052, "learning_rate": 2e-06, "loss": 0.222, "step": 1264 }, { "epoch": 0.4718388660947408, "grad_norm": 1.2246962785720825, "learning_rate": 2e-06, "loss": 0.1954, "step": 1265 }, { "epoch": 0.47221186124580383, "grad_norm": 0.9776229858398438, "learning_rate": 2e-06, "loss": 0.212, "step": 1266 }, { "epoch": 0.4725848563968668, "grad_norm": 0.812899649143219, "learning_rate": 2e-06, "loss": 0.191, "step": 1267 }, { "epoch": 0.47295785154792985, "grad_norm": 0.7886766195297241, "learning_rate": 2e-06, "loss": 0.225, "step": 1268 }, { "epoch": 0.4733308466989929, "grad_norm": 0.8274191617965698, "learning_rate": 2e-06, "loss": 0.2051, "step": 1269 }, { "epoch": 0.47370384185005593, "grad_norm": 0.8297175765037537, "learning_rate": 2e-06, "loss": 0.1899, "step": 1270 }, { "epoch": 0.47407683700111897, "grad_norm": 0.7538293600082397, "learning_rate": 2e-06, "loss": 0.214, "step": 1271 }, { "epoch": 0.474449832152182, "grad_norm": 0.9299455285072327, "learning_rate": 2e-06, "loss": 0.2035, "step": 1272 }, { "epoch": 0.47482282730324504, "grad_norm": 0.8499518632888794, "learning_rate": 2e-06, "loss": 0.206, "step": 1273 }, { "epoch": 0.4751958224543081, "grad_norm": 0.7615072727203369, "learning_rate": 2e-06, "loss": 0.2144, "step": 1274 }, { "epoch": 0.4755688176053711, "grad_norm": 0.9047805070877075, "learning_rate": 2e-06, "loss": 0.2465, "step": 1275 }, { "epoch": 0.47594181275643416, "grad_norm": 1.0026063919067383, "learning_rate": 2e-06, "loss": 0.224, "step": 1276 }, { "epoch": 0.4763148079074972, "grad_norm": 0.8054906129837036, "learning_rate": 2e-06, "loss": 0.2145, "step": 1277 }, { "epoch": 0.47668780305856023, "grad_norm": 0.893140435218811, "learning_rate": 2e-06, "loss": 0.2048, "step": 1278 }, { "epoch": 0.47706079820962327, "grad_norm": 0.9236406087875366, "learning_rate": 2e-06, "loss": 0.2114, "step": 1279 }, { "epoch": 0.4774337933606863, "grad_norm": 1.2507268190383911, "learning_rate": 2e-06, "loss": 0.2274, "step": 1280 }, { "epoch": 0.47780678851174935, "grad_norm": 0.7895500659942627, "learning_rate": 2e-06, "loss": 0.2379, "step": 1281 }, { "epoch": 0.4781797836628124, "grad_norm": 1.0215257406234741, "learning_rate": 2e-06, "loss": 0.2139, "step": 1282 }, { "epoch": 0.4785527788138754, "grad_norm": 1.137772560119629, "learning_rate": 2e-06, "loss": 0.2062, "step": 1283 }, { "epoch": 0.47892577396493846, "grad_norm": 1.0097604990005493, "learning_rate": 2e-06, "loss": 0.2135, "step": 1284 }, { "epoch": 0.4792987691160015, "grad_norm": 0.8951389193534851, "learning_rate": 2e-06, "loss": 0.2002, "step": 1285 }, { "epoch": 0.47967176426706454, "grad_norm": 1.0746266841888428, "learning_rate": 2e-06, "loss": 0.2109, "step": 1286 }, { "epoch": 0.4800447594181276, "grad_norm": 1.0292800664901733, "learning_rate": 2e-06, "loss": 0.1747, "step": 1287 }, { "epoch": 0.4804177545691906, "grad_norm": 0.7579898238182068, "learning_rate": 2e-06, "loss": 0.1999, "step": 1288 }, { "epoch": 0.48079074972025365, "grad_norm": 1.0053479671478271, "learning_rate": 2e-06, "loss": 0.2021, "step": 1289 }, { "epoch": 0.4811637448713167, "grad_norm": 0.8155003786087036, "learning_rate": 2e-06, "loss": 0.2109, "step": 1290 }, { "epoch": 0.48153674002237973, "grad_norm": 1.1021757125854492, "learning_rate": 2e-06, "loss": 0.2122, "step": 1291 }, { "epoch": 0.48190973517344277, "grad_norm": 0.7149756550788879, "learning_rate": 2e-06, "loss": 0.2058, "step": 1292 }, { "epoch": 0.4822827303245058, "grad_norm": 0.9569563269615173, "learning_rate": 2e-06, "loss": 0.2021, "step": 1293 }, { "epoch": 0.48265572547556884, "grad_norm": 0.9164791703224182, "learning_rate": 2e-06, "loss": 0.1864, "step": 1294 }, { "epoch": 0.4830287206266319, "grad_norm": 0.8762383460998535, "learning_rate": 2e-06, "loss": 0.2017, "step": 1295 }, { "epoch": 0.48340171577769486, "grad_norm": 0.8610120415687561, "learning_rate": 2e-06, "loss": 0.205, "step": 1296 }, { "epoch": 0.4837747109287579, "grad_norm": 1.1923651695251465, "learning_rate": 2e-06, "loss": 0.1999, "step": 1297 }, { "epoch": 0.48414770607982094, "grad_norm": 0.8275599479675293, "learning_rate": 2e-06, "loss": 0.1966, "step": 1298 }, { "epoch": 0.484520701230884, "grad_norm": 1.1154911518096924, "learning_rate": 2e-06, "loss": 0.1797, "step": 1299 }, { "epoch": 0.484893696381947, "grad_norm": 1.038810133934021, "learning_rate": 2e-06, "loss": 0.196, "step": 1300 }, { "epoch": 0.48526669153301005, "grad_norm": 0.8732941150665283, "learning_rate": 2e-06, "loss": 0.1972, "step": 1301 }, { "epoch": 0.4856396866840731, "grad_norm": 0.8140985369682312, "learning_rate": 2e-06, "loss": 0.2053, "step": 1302 }, { "epoch": 0.48601268183513613, "grad_norm": 0.8084601163864136, "learning_rate": 2e-06, "loss": 0.2156, "step": 1303 }, { "epoch": 0.48638567698619917, "grad_norm": 0.8968515992164612, "learning_rate": 2e-06, "loss": 0.2044, "step": 1304 }, { "epoch": 0.4867586721372622, "grad_norm": 0.9631931185722351, "learning_rate": 2e-06, "loss": 0.2207, "step": 1305 }, { "epoch": 0.48713166728832524, "grad_norm": 0.8738954067230225, "learning_rate": 2e-06, "loss": 0.1843, "step": 1306 }, { "epoch": 0.4875046624393883, "grad_norm": 0.7705062031745911, "learning_rate": 2e-06, "loss": 0.1907, "step": 1307 }, { "epoch": 0.4878776575904513, "grad_norm": 1.049697995185852, "learning_rate": 2e-06, "loss": 0.1957, "step": 1308 }, { "epoch": 0.48825065274151436, "grad_norm": 0.9304150938987732, "learning_rate": 2e-06, "loss": 0.1993, "step": 1309 }, { "epoch": 0.4886236478925774, "grad_norm": 1.2061336040496826, "learning_rate": 2e-06, "loss": 0.1935, "step": 1310 }, { "epoch": 0.48899664304364043, "grad_norm": 0.8848938345909119, "learning_rate": 2e-06, "loss": 0.2151, "step": 1311 }, { "epoch": 0.48936963819470347, "grad_norm": 1.084688425064087, "learning_rate": 2e-06, "loss": 0.2117, "step": 1312 }, { "epoch": 0.4897426333457665, "grad_norm": 0.9425563812255859, "learning_rate": 2e-06, "loss": 0.1882, "step": 1313 }, { "epoch": 0.49011562849682955, "grad_norm": 0.8952395915985107, "learning_rate": 2e-06, "loss": 0.1972, "step": 1314 }, { "epoch": 0.4904886236478926, "grad_norm": 1.1104282140731812, "learning_rate": 2e-06, "loss": 0.209, "step": 1315 }, { "epoch": 0.4908616187989556, "grad_norm": 0.9784849286079407, "learning_rate": 2e-06, "loss": 0.2083, "step": 1316 }, { "epoch": 0.49123461395001866, "grad_norm": 0.8387400507926941, "learning_rate": 2e-06, "loss": 0.1917, "step": 1317 }, { "epoch": 0.4916076091010817, "grad_norm": 1.4602051973342896, "learning_rate": 2e-06, "loss": 0.1942, "step": 1318 }, { "epoch": 0.49198060425214474, "grad_norm": 0.9706045985221863, "learning_rate": 2e-06, "loss": 0.2028, "step": 1319 }, { "epoch": 0.4923535994032078, "grad_norm": 1.1926292181015015, "learning_rate": 2e-06, "loss": 0.1947, "step": 1320 }, { "epoch": 0.4927265945542708, "grad_norm": 0.9311724305152893, "learning_rate": 2e-06, "loss": 0.2084, "step": 1321 }, { "epoch": 0.49309958970533385, "grad_norm": 1.0273479223251343, "learning_rate": 2e-06, "loss": 0.2161, "step": 1322 }, { "epoch": 0.4934725848563969, "grad_norm": 0.9733841419219971, "learning_rate": 2e-06, "loss": 0.182, "step": 1323 }, { "epoch": 0.49384558000745993, "grad_norm": 0.9543289542198181, "learning_rate": 2e-06, "loss": 0.219, "step": 1324 }, { "epoch": 0.49421857515852297, "grad_norm": 0.7729955315589905, "learning_rate": 2e-06, "loss": 0.1754, "step": 1325 }, { "epoch": 0.49459157030958595, "grad_norm": 1.0371787548065186, "learning_rate": 2e-06, "loss": 0.1989, "step": 1326 }, { "epoch": 0.494964565460649, "grad_norm": 0.7862864136695862, "learning_rate": 2e-06, "loss": 0.195, "step": 1327 }, { "epoch": 0.495337560611712, "grad_norm": 0.8408017158508301, "learning_rate": 2e-06, "loss": 0.2073, "step": 1328 }, { "epoch": 0.49571055576277506, "grad_norm": 1.0264546871185303, "learning_rate": 2e-06, "loss": 0.1968, "step": 1329 }, { "epoch": 0.4960835509138381, "grad_norm": 0.8310911059379578, "learning_rate": 2e-06, "loss": 0.2164, "step": 1330 }, { "epoch": 0.49645654606490114, "grad_norm": 1.0217857360839844, "learning_rate": 2e-06, "loss": 0.1784, "step": 1331 }, { "epoch": 0.4968295412159642, "grad_norm": 0.9495546221733093, "learning_rate": 2e-06, "loss": 0.2021, "step": 1332 }, { "epoch": 0.4972025363670272, "grad_norm": 0.9638365507125854, "learning_rate": 2e-06, "loss": 0.1793, "step": 1333 }, { "epoch": 0.49757553151809025, "grad_norm": 0.8478617072105408, "learning_rate": 2e-06, "loss": 0.1855, "step": 1334 }, { "epoch": 0.4979485266691533, "grad_norm": 0.8910413980484009, "learning_rate": 2e-06, "loss": 0.2026, "step": 1335 }, { "epoch": 0.49832152182021633, "grad_norm": 0.8154772520065308, "learning_rate": 2e-06, "loss": 0.2006, "step": 1336 }, { "epoch": 0.49869451697127937, "grad_norm": 0.8819144368171692, "learning_rate": 2e-06, "loss": 0.2199, "step": 1337 }, { "epoch": 0.4990675121223424, "grad_norm": 1.3117270469665527, "learning_rate": 2e-06, "loss": 0.185, "step": 1338 }, { "epoch": 0.49944050727340544, "grad_norm": 0.7083219289779663, "learning_rate": 2e-06, "loss": 0.2004, "step": 1339 }, { "epoch": 0.4998135024244685, "grad_norm": 1.295807957649231, "learning_rate": 2e-06, "loss": 0.1742, "step": 1340 }, { "epoch": 0.5001864975755315, "grad_norm": 1.0432149171829224, "learning_rate": 2e-06, "loss": 0.1968, "step": 1341 }, { "epoch": 0.5005594927265945, "grad_norm": 1.4518747329711914, "learning_rate": 2e-06, "loss": 0.2149, "step": 1342 }, { "epoch": 0.5009324878776575, "grad_norm": 0.8558361530303955, "learning_rate": 2e-06, "loss": 0.2101, "step": 1343 }, { "epoch": 0.5013054830287206, "grad_norm": 0.8370813131332397, "learning_rate": 2e-06, "loss": 0.1973, "step": 1344 }, { "epoch": 0.5016784781797836, "grad_norm": 0.9925529956817627, "learning_rate": 2e-06, "loss": 0.1976, "step": 1345 }, { "epoch": 0.5020514733308467, "grad_norm": 0.7963873744010925, "learning_rate": 2e-06, "loss": 0.2112, "step": 1346 }, { "epoch": 0.5024244684819097, "grad_norm": 0.9464275240898132, "learning_rate": 2e-06, "loss": 0.1886, "step": 1347 }, { "epoch": 0.5027974636329727, "grad_norm": 0.9581516981124878, "learning_rate": 2e-06, "loss": 0.2253, "step": 1348 }, { "epoch": 0.5031704587840358, "grad_norm": 0.8511423468589783, "learning_rate": 2e-06, "loss": 0.2014, "step": 1349 }, { "epoch": 0.5035434539350988, "grad_norm": 1.02018141746521, "learning_rate": 2e-06, "loss": 0.1835, "step": 1350 }, { "epoch": 0.5039164490861618, "grad_norm": 0.8902892470359802, "learning_rate": 2e-06, "loss": 0.217, "step": 1351 }, { "epoch": 0.5042894442372249, "grad_norm": 0.8972639441490173, "learning_rate": 2e-06, "loss": 0.2023, "step": 1352 }, { "epoch": 0.5046624393882879, "grad_norm": 1.2115341424942017, "learning_rate": 2e-06, "loss": 0.2163, "step": 1353 }, { "epoch": 0.505035434539351, "grad_norm": 0.9035894870758057, "learning_rate": 2e-06, "loss": 0.2127, "step": 1354 }, { "epoch": 0.505408429690414, "grad_norm": 0.7845447659492493, "learning_rate": 2e-06, "loss": 0.1899, "step": 1355 }, { "epoch": 0.505781424841477, "grad_norm": 0.989069938659668, "learning_rate": 2e-06, "loss": 0.1993, "step": 1356 }, { "epoch": 0.5061544199925401, "grad_norm": 1.1060456037521362, "learning_rate": 2e-06, "loss": 0.2209, "step": 1357 }, { "epoch": 0.5065274151436031, "grad_norm": 1.377783179283142, "learning_rate": 2e-06, "loss": 0.2366, "step": 1358 }, { "epoch": 0.5069004102946661, "grad_norm": 1.0014876127243042, "learning_rate": 2e-06, "loss": 0.1972, "step": 1359 }, { "epoch": 0.5072734054457292, "grad_norm": 0.9451000690460205, "learning_rate": 2e-06, "loss": 0.2021, "step": 1360 }, { "epoch": 0.5076464005967922, "grad_norm": 1.1157171726226807, "learning_rate": 2e-06, "loss": 0.1839, "step": 1361 }, { "epoch": 0.5080193957478553, "grad_norm": 0.8862402439117432, "learning_rate": 2e-06, "loss": 0.208, "step": 1362 }, { "epoch": 0.5083923908989183, "grad_norm": 0.9960072040557861, "learning_rate": 2e-06, "loss": 0.1977, "step": 1363 }, { "epoch": 0.5087653860499813, "grad_norm": 0.8603051900863647, "learning_rate": 2e-06, "loss": 0.1995, "step": 1364 }, { "epoch": 0.5091383812010444, "grad_norm": 1.3238534927368164, "learning_rate": 2e-06, "loss": 0.203, "step": 1365 }, { "epoch": 0.5095113763521074, "grad_norm": 0.975862443447113, "learning_rate": 2e-06, "loss": 0.2118, "step": 1366 }, { "epoch": 0.5098843715031705, "grad_norm": 1.0212750434875488, "learning_rate": 2e-06, "loss": 0.2033, "step": 1367 }, { "epoch": 0.5102573666542335, "grad_norm": 0.843689501285553, "learning_rate": 2e-06, "loss": 0.1901, "step": 1368 }, { "epoch": 0.5106303618052965, "grad_norm": 1.2444080114364624, "learning_rate": 2e-06, "loss": 0.1882, "step": 1369 }, { "epoch": 0.5110033569563596, "grad_norm": 0.9108520150184631, "learning_rate": 2e-06, "loss": 0.199, "step": 1370 }, { "epoch": 0.5113763521074226, "grad_norm": 0.8922763466835022, "learning_rate": 2e-06, "loss": 0.2047, "step": 1371 }, { "epoch": 0.5117493472584856, "grad_norm": 0.7751742601394653, "learning_rate": 2e-06, "loss": 0.2157, "step": 1372 }, { "epoch": 0.5121223424095487, "grad_norm": 1.131173849105835, "learning_rate": 2e-06, "loss": 0.1958, "step": 1373 }, { "epoch": 0.5124953375606117, "grad_norm": 0.840311586856842, "learning_rate": 2e-06, "loss": 0.1906, "step": 1374 }, { "epoch": 0.5128683327116748, "grad_norm": 0.9350585341453552, "learning_rate": 2e-06, "loss": 0.2169, "step": 1375 }, { "epoch": 0.5132413278627378, "grad_norm": 0.8612457513809204, "learning_rate": 2e-06, "loss": 0.2227, "step": 1376 }, { "epoch": 0.5136143230138008, "grad_norm": 1.0599944591522217, "learning_rate": 2e-06, "loss": 0.2068, "step": 1377 }, { "epoch": 0.5139873181648639, "grad_norm": 0.8462560176849365, "learning_rate": 2e-06, "loss": 0.2161, "step": 1378 }, { "epoch": 0.5143603133159269, "grad_norm": 1.02583646774292, "learning_rate": 2e-06, "loss": 0.2058, "step": 1379 }, { "epoch": 0.51473330846699, "grad_norm": 0.8467254638671875, "learning_rate": 2e-06, "loss": 0.2281, "step": 1380 }, { "epoch": 0.515106303618053, "grad_norm": 1.009759545326233, "learning_rate": 2e-06, "loss": 0.1916, "step": 1381 }, { "epoch": 0.515479298769116, "grad_norm": 0.8832783699035645, "learning_rate": 2e-06, "loss": 0.2257, "step": 1382 }, { "epoch": 0.5158522939201791, "grad_norm": 1.0258092880249023, "learning_rate": 2e-06, "loss": 0.2172, "step": 1383 }, { "epoch": 0.5162252890712421, "grad_norm": 0.8545379638671875, "learning_rate": 2e-06, "loss": 0.1889, "step": 1384 }, { "epoch": 0.5165982842223051, "grad_norm": 1.1977559328079224, "learning_rate": 2e-06, "loss": 0.2016, "step": 1385 }, { "epoch": 0.5169712793733682, "grad_norm": 0.9025326371192932, "learning_rate": 2e-06, "loss": 0.1928, "step": 1386 }, { "epoch": 0.5173442745244312, "grad_norm": 0.7250174880027771, "learning_rate": 2e-06, "loss": 0.2298, "step": 1387 }, { "epoch": 0.5177172696754943, "grad_norm": 0.9258498549461365, "learning_rate": 2e-06, "loss": 0.1993, "step": 1388 }, { "epoch": 0.5180902648265573, "grad_norm": 1.0027661323547363, "learning_rate": 2e-06, "loss": 0.201, "step": 1389 }, { "epoch": 0.5184632599776203, "grad_norm": 0.8430682420730591, "learning_rate": 2e-06, "loss": 0.2118, "step": 1390 }, { "epoch": 0.5188362551286834, "grad_norm": 0.9343864917755127, "learning_rate": 2e-06, "loss": 0.2457, "step": 1391 }, { "epoch": 0.5192092502797464, "grad_norm": 0.9292888045310974, "learning_rate": 2e-06, "loss": 0.2157, "step": 1392 }, { "epoch": 0.5195822454308094, "grad_norm": 1.0311505794525146, "learning_rate": 2e-06, "loss": 0.2082, "step": 1393 }, { "epoch": 0.5199552405818725, "grad_norm": 0.8285952806472778, "learning_rate": 2e-06, "loss": 0.2016, "step": 1394 }, { "epoch": 0.5203282357329355, "grad_norm": 0.9031305313110352, "learning_rate": 2e-06, "loss": 0.2068, "step": 1395 }, { "epoch": 0.5207012308839986, "grad_norm": 0.964750349521637, "learning_rate": 2e-06, "loss": 0.2017, "step": 1396 }, { "epoch": 0.5210742260350616, "grad_norm": 0.8781641721725464, "learning_rate": 2e-06, "loss": 0.1917, "step": 1397 }, { "epoch": 0.5214472211861246, "grad_norm": 1.0136576890945435, "learning_rate": 2e-06, "loss": 0.1944, "step": 1398 }, { "epoch": 0.5218202163371877, "grad_norm": 1.3042027950286865, "learning_rate": 2e-06, "loss": 0.1973, "step": 1399 }, { "epoch": 0.5221932114882507, "grad_norm": 0.8119072914123535, "learning_rate": 2e-06, "loss": 0.1913, "step": 1400 }, { "epoch": 0.5225662066393136, "grad_norm": 1.0908031463623047, "learning_rate": 2e-06, "loss": 0.189, "step": 1401 }, { "epoch": 0.5229392017903767, "grad_norm": 0.9326440691947937, "learning_rate": 2e-06, "loss": 0.2197, "step": 1402 }, { "epoch": 0.5233121969414397, "grad_norm": 0.8088356256484985, "learning_rate": 2e-06, "loss": 0.2176, "step": 1403 }, { "epoch": 0.5236851920925027, "grad_norm": 0.8277832269668579, "learning_rate": 2e-06, "loss": 0.212, "step": 1404 }, { "epoch": 0.5240581872435658, "grad_norm": 0.9052010178565979, "learning_rate": 2e-06, "loss": 0.2119, "step": 1405 }, { "epoch": 0.5244311823946288, "grad_norm": 0.8422574400901794, "learning_rate": 2e-06, "loss": 0.2014, "step": 1406 }, { "epoch": 0.5248041775456919, "grad_norm": 1.008782148361206, "learning_rate": 2e-06, "loss": 0.177, "step": 1407 }, { "epoch": 0.5251771726967549, "grad_norm": 1.0813308954238892, "learning_rate": 2e-06, "loss": 0.1772, "step": 1408 }, { "epoch": 0.5255501678478179, "grad_norm": 0.7307412624359131, "learning_rate": 2e-06, "loss": 0.2168, "step": 1409 }, { "epoch": 0.525923162998881, "grad_norm": 0.8838328719139099, "learning_rate": 2e-06, "loss": 0.2225, "step": 1410 }, { "epoch": 0.526296158149944, "grad_norm": 1.0744060277938843, "learning_rate": 2e-06, "loss": 0.1954, "step": 1411 }, { "epoch": 0.526669153301007, "grad_norm": 0.9473239779472351, "learning_rate": 2e-06, "loss": 0.2015, "step": 1412 }, { "epoch": 0.5270421484520701, "grad_norm": 0.9878484606742859, "learning_rate": 2e-06, "loss": 0.2065, "step": 1413 }, { "epoch": 0.5274151436031331, "grad_norm": 1.064440131187439, "learning_rate": 2e-06, "loss": 0.2057, "step": 1414 }, { "epoch": 0.5277881387541962, "grad_norm": 0.803508460521698, "learning_rate": 2e-06, "loss": 0.2032, "step": 1415 }, { "epoch": 0.5281611339052592, "grad_norm": 1.0688599348068237, "learning_rate": 2e-06, "loss": 0.1985, "step": 1416 }, { "epoch": 0.5285341290563222, "grad_norm": 0.8427935838699341, "learning_rate": 2e-06, "loss": 0.2221, "step": 1417 }, { "epoch": 0.5289071242073853, "grad_norm": 0.8512614965438843, "learning_rate": 2e-06, "loss": 0.2042, "step": 1418 }, { "epoch": 0.5292801193584483, "grad_norm": 1.170677661895752, "learning_rate": 2e-06, "loss": 0.2024, "step": 1419 }, { "epoch": 0.5296531145095114, "grad_norm": 1.0383622646331787, "learning_rate": 2e-06, "loss": 0.2154, "step": 1420 }, { "epoch": 0.5300261096605744, "grad_norm": 0.8108653426170349, "learning_rate": 2e-06, "loss": 0.1886, "step": 1421 }, { "epoch": 0.5303991048116374, "grad_norm": 0.9708895087242126, "learning_rate": 2e-06, "loss": 0.2129, "step": 1422 }, { "epoch": 0.5307720999627005, "grad_norm": 0.8620759844779968, "learning_rate": 2e-06, "loss": 0.201, "step": 1423 }, { "epoch": 0.5311450951137635, "grad_norm": 1.1189860105514526, "learning_rate": 2e-06, "loss": 0.1957, "step": 1424 }, { "epoch": 0.5315180902648265, "grad_norm": 1.1406034231185913, "learning_rate": 2e-06, "loss": 0.1932, "step": 1425 }, { "epoch": 0.5318910854158896, "grad_norm": 1.243134617805481, "learning_rate": 2e-06, "loss": 0.1788, "step": 1426 }, { "epoch": 0.5322640805669526, "grad_norm": 1.0278013944625854, "learning_rate": 2e-06, "loss": 0.2195, "step": 1427 }, { "epoch": 0.5326370757180157, "grad_norm": 0.8058028817176819, "learning_rate": 2e-06, "loss": 0.2046, "step": 1428 }, { "epoch": 0.5330100708690787, "grad_norm": 0.8705823421478271, "learning_rate": 2e-06, "loss": 0.217, "step": 1429 }, { "epoch": 0.5333830660201417, "grad_norm": 1.0765502452850342, "learning_rate": 2e-06, "loss": 0.1925, "step": 1430 }, { "epoch": 0.5337560611712048, "grad_norm": 0.9265007972717285, "learning_rate": 2e-06, "loss": 0.2147, "step": 1431 }, { "epoch": 0.5341290563222678, "grad_norm": 0.8785068988800049, "learning_rate": 2e-06, "loss": 0.2107, "step": 1432 }, { "epoch": 0.5345020514733309, "grad_norm": 0.7577351331710815, "learning_rate": 2e-06, "loss": 0.2142, "step": 1433 }, { "epoch": 0.5348750466243939, "grad_norm": 0.8868758678436279, "learning_rate": 2e-06, "loss": 0.1975, "step": 1434 }, { "epoch": 0.5352480417754569, "grad_norm": 1.135514259338379, "learning_rate": 2e-06, "loss": 0.2073, "step": 1435 }, { "epoch": 0.53562103692652, "grad_norm": 1.1699068546295166, "learning_rate": 2e-06, "loss": 0.2193, "step": 1436 }, { "epoch": 0.535994032077583, "grad_norm": 0.8925819396972656, "learning_rate": 2e-06, "loss": 0.2174, "step": 1437 }, { "epoch": 0.536367027228646, "grad_norm": 0.9224807024002075, "learning_rate": 2e-06, "loss": 0.2237, "step": 1438 }, { "epoch": 0.5367400223797091, "grad_norm": 1.0362088680267334, "learning_rate": 2e-06, "loss": 0.1932, "step": 1439 }, { "epoch": 0.5371130175307721, "grad_norm": 0.8815548419952393, "learning_rate": 2e-06, "loss": 0.1933, "step": 1440 }, { "epoch": 0.5374860126818352, "grad_norm": 0.9525446891784668, "learning_rate": 2e-06, "loss": 0.2162, "step": 1441 }, { "epoch": 0.5378590078328982, "grad_norm": 0.7262503504753113, "learning_rate": 2e-06, "loss": 0.2159, "step": 1442 }, { "epoch": 0.5382320029839612, "grad_norm": 0.9196373224258423, "learning_rate": 2e-06, "loss": 0.2076, "step": 1443 }, { "epoch": 0.5386049981350243, "grad_norm": 0.8361424803733826, "learning_rate": 2e-06, "loss": 0.1835, "step": 1444 }, { "epoch": 0.5389779932860873, "grad_norm": 0.9190415143966675, "learning_rate": 2e-06, "loss": 0.2, "step": 1445 }, { "epoch": 0.5393509884371503, "grad_norm": 1.0513532161712646, "learning_rate": 2e-06, "loss": 0.2072, "step": 1446 }, { "epoch": 0.5397239835882134, "grad_norm": 1.1233733892440796, "learning_rate": 2e-06, "loss": 0.2021, "step": 1447 }, { "epoch": 0.5400969787392764, "grad_norm": 1.1297720670700073, "learning_rate": 2e-06, "loss": 0.2215, "step": 1448 }, { "epoch": 0.5404699738903395, "grad_norm": 0.9336369633674622, "learning_rate": 2e-06, "loss": 0.2036, "step": 1449 }, { "epoch": 0.5408429690414025, "grad_norm": 1.0106576681137085, "learning_rate": 2e-06, "loss": 0.1804, "step": 1450 }, { "epoch": 0.5412159641924655, "grad_norm": 0.988398551940918, "learning_rate": 2e-06, "loss": 0.208, "step": 1451 }, { "epoch": 0.5415889593435286, "grad_norm": 0.9007694125175476, "learning_rate": 2e-06, "loss": 0.2178, "step": 1452 }, { "epoch": 0.5419619544945916, "grad_norm": 0.8123948574066162, "learning_rate": 2e-06, "loss": 0.2241, "step": 1453 }, { "epoch": 0.5423349496456547, "grad_norm": 0.9283350706100464, "learning_rate": 2e-06, "loss": 0.2232, "step": 1454 }, { "epoch": 0.5427079447967177, "grad_norm": 0.8744990825653076, "learning_rate": 2e-06, "loss": 0.2225, "step": 1455 }, { "epoch": 0.5430809399477807, "grad_norm": 1.135576844215393, "learning_rate": 2e-06, "loss": 0.2264, "step": 1456 }, { "epoch": 0.5434539350988438, "grad_norm": 0.77315753698349, "learning_rate": 2e-06, "loss": 0.2481, "step": 1457 }, { "epoch": 0.5438269302499068, "grad_norm": 1.1217293739318848, "learning_rate": 2e-06, "loss": 0.2078, "step": 1458 }, { "epoch": 0.5441999254009698, "grad_norm": 0.8613735437393188, "learning_rate": 2e-06, "loss": 0.1914, "step": 1459 }, { "epoch": 0.5445729205520328, "grad_norm": 0.9372427463531494, "learning_rate": 2e-06, "loss": 0.1962, "step": 1460 }, { "epoch": 0.5449459157030958, "grad_norm": 0.811021089553833, "learning_rate": 2e-06, "loss": 0.2016, "step": 1461 }, { "epoch": 0.5453189108541588, "grad_norm": 1.0369412899017334, "learning_rate": 2e-06, "loss": 0.2046, "step": 1462 }, { "epoch": 0.5456919060052219, "grad_norm": 1.1720285415649414, "learning_rate": 2e-06, "loss": 0.1938, "step": 1463 }, { "epoch": 0.5460649011562849, "grad_norm": 0.9193541407585144, "learning_rate": 2e-06, "loss": 0.2193, "step": 1464 }, { "epoch": 0.546437896307348, "grad_norm": 0.8558489084243774, "learning_rate": 2e-06, "loss": 0.2054, "step": 1465 }, { "epoch": 0.546810891458411, "grad_norm": 0.7677939534187317, "learning_rate": 2e-06, "loss": 0.2135, "step": 1466 }, { "epoch": 0.547183886609474, "grad_norm": 0.9511263370513916, "learning_rate": 2e-06, "loss": 0.2165, "step": 1467 }, { "epoch": 0.5475568817605371, "grad_norm": 1.0290344953536987, "learning_rate": 2e-06, "loss": 0.1844, "step": 1468 }, { "epoch": 0.5479298769116001, "grad_norm": 0.855604887008667, "learning_rate": 2e-06, "loss": 0.2181, "step": 1469 }, { "epoch": 0.5483028720626631, "grad_norm": 0.7282152771949768, "learning_rate": 2e-06, "loss": 0.1996, "step": 1470 }, { "epoch": 0.5486758672137262, "grad_norm": 1.1422181129455566, "learning_rate": 2e-06, "loss": 0.2187, "step": 1471 }, { "epoch": 0.5490488623647892, "grad_norm": 0.9203907251358032, "learning_rate": 2e-06, "loss": 0.2188, "step": 1472 }, { "epoch": 0.5494218575158523, "grad_norm": 0.8138203024864197, "learning_rate": 2e-06, "loss": 0.1956, "step": 1473 }, { "epoch": 0.5497948526669153, "grad_norm": 0.9618978500366211, "learning_rate": 2e-06, "loss": 0.1949, "step": 1474 }, { "epoch": 0.5501678478179783, "grad_norm": 1.0320115089416504, "learning_rate": 2e-06, "loss": 0.2055, "step": 1475 }, { "epoch": 0.5505408429690414, "grad_norm": 1.1048243045806885, "learning_rate": 2e-06, "loss": 0.2154, "step": 1476 }, { "epoch": 0.5509138381201044, "grad_norm": 1.187124490737915, "learning_rate": 2e-06, "loss": 0.1802, "step": 1477 }, { "epoch": 0.5512868332711675, "grad_norm": 0.8547005653381348, "learning_rate": 2e-06, "loss": 0.2123, "step": 1478 }, { "epoch": 0.5516598284222305, "grad_norm": 1.0152088403701782, "learning_rate": 2e-06, "loss": 0.2086, "step": 1479 }, { "epoch": 0.5520328235732935, "grad_norm": 1.2394713163375854, "learning_rate": 2e-06, "loss": 0.1936, "step": 1480 }, { "epoch": 0.5524058187243566, "grad_norm": 1.009573221206665, "learning_rate": 2e-06, "loss": 0.2027, "step": 1481 }, { "epoch": 0.5527788138754196, "grad_norm": 0.8763130307197571, "learning_rate": 2e-06, "loss": 0.1997, "step": 1482 }, { "epoch": 0.5531518090264826, "grad_norm": 0.8127403855323792, "learning_rate": 2e-06, "loss": 0.2298, "step": 1483 }, { "epoch": 0.5535248041775457, "grad_norm": 0.9252679944038391, "learning_rate": 2e-06, "loss": 0.209, "step": 1484 }, { "epoch": 0.5538977993286087, "grad_norm": 0.9289845824241638, "learning_rate": 2e-06, "loss": 0.1811, "step": 1485 }, { "epoch": 0.5542707944796718, "grad_norm": 0.6655697822570801, "learning_rate": 2e-06, "loss": 0.2098, "step": 1486 }, { "epoch": 0.5546437896307348, "grad_norm": 0.7421613335609436, "learning_rate": 2e-06, "loss": 0.2266, "step": 1487 }, { "epoch": 0.5550167847817978, "grad_norm": 0.8588148951530457, "learning_rate": 2e-06, "loss": 0.1941, "step": 1488 }, { "epoch": 0.5553897799328609, "grad_norm": 0.7405229210853577, "learning_rate": 2e-06, "loss": 0.2302, "step": 1489 }, { "epoch": 0.5557627750839239, "grad_norm": 0.831580638885498, "learning_rate": 2e-06, "loss": 0.1787, "step": 1490 }, { "epoch": 0.556135770234987, "grad_norm": 0.9835621118545532, "learning_rate": 2e-06, "loss": 0.2075, "step": 1491 }, { "epoch": 0.55650876538605, "grad_norm": 0.7864119410514832, "learning_rate": 2e-06, "loss": 0.2116, "step": 1492 }, { "epoch": 0.556881760537113, "grad_norm": 0.943805992603302, "learning_rate": 2e-06, "loss": 0.1945, "step": 1493 }, { "epoch": 0.5572547556881761, "grad_norm": 0.8403879404067993, "learning_rate": 2e-06, "loss": 0.2066, "step": 1494 }, { "epoch": 0.5576277508392391, "grad_norm": 0.790149986743927, "learning_rate": 2e-06, "loss": 0.2073, "step": 1495 }, { "epoch": 0.5580007459903021, "grad_norm": 0.8448852300643921, "learning_rate": 2e-06, "loss": 0.1953, "step": 1496 }, { "epoch": 0.5583737411413652, "grad_norm": 0.7651852965354919, "learning_rate": 2e-06, "loss": 0.2097, "step": 1497 }, { "epoch": 0.5587467362924282, "grad_norm": 1.1243904829025269, "learning_rate": 2e-06, "loss": 0.1838, "step": 1498 }, { "epoch": 0.5591197314434913, "grad_norm": 0.8842614889144897, "learning_rate": 2e-06, "loss": 0.1956, "step": 1499 }, { "epoch": 0.5594927265945543, "grad_norm": 1.2193019390106201, "learning_rate": 2e-06, "loss": 0.2074, "step": 1500 }, { "epoch": 0.5598657217456173, "grad_norm": 0.8264303803443909, "learning_rate": 2e-06, "loss": 0.1973, "step": 1501 }, { "epoch": 0.5602387168966804, "grad_norm": 1.4049650430679321, "learning_rate": 2e-06, "loss": 0.2163, "step": 1502 }, { "epoch": 0.5606117120477434, "grad_norm": 0.8161189556121826, "learning_rate": 2e-06, "loss": 0.1763, "step": 1503 }, { "epoch": 0.5609847071988064, "grad_norm": 1.0930392742156982, "learning_rate": 2e-06, "loss": 0.1855, "step": 1504 }, { "epoch": 0.5613577023498695, "grad_norm": 1.032840371131897, "learning_rate": 2e-06, "loss": 0.1998, "step": 1505 }, { "epoch": 0.5617306975009325, "grad_norm": 1.077797770500183, "learning_rate": 2e-06, "loss": 0.2087, "step": 1506 }, { "epoch": 0.5621036926519956, "grad_norm": 0.8913561701774597, "learning_rate": 2e-06, "loss": 0.1936, "step": 1507 }, { "epoch": 0.5624766878030586, "grad_norm": 0.9478410482406616, "learning_rate": 2e-06, "loss": 0.212, "step": 1508 }, { "epoch": 0.5628496829541216, "grad_norm": 0.8514869809150696, "learning_rate": 2e-06, "loss": 0.188, "step": 1509 }, { "epoch": 0.5632226781051847, "grad_norm": 0.8994277715682983, "learning_rate": 2e-06, "loss": 0.1988, "step": 1510 }, { "epoch": 0.5635956732562477, "grad_norm": 0.7155340909957886, "learning_rate": 2e-06, "loss": 0.2217, "step": 1511 }, { "epoch": 0.5639686684073107, "grad_norm": 0.8085910081863403, "learning_rate": 2e-06, "loss": 0.1898, "step": 1512 }, { "epoch": 0.5643416635583738, "grad_norm": 1.1427513360977173, "learning_rate": 2e-06, "loss": 0.2036, "step": 1513 }, { "epoch": 0.5647146587094368, "grad_norm": 0.8563709259033203, "learning_rate": 2e-06, "loss": 0.2026, "step": 1514 }, { "epoch": 0.5650876538604999, "grad_norm": 0.9192009568214417, "learning_rate": 2e-06, "loss": 0.2054, "step": 1515 }, { "epoch": 0.5654606490115629, "grad_norm": 1.0076457262039185, "learning_rate": 2e-06, "loss": 0.2084, "step": 1516 }, { "epoch": 0.5658336441626259, "grad_norm": 0.7837719321250916, "learning_rate": 2e-06, "loss": 0.2174, "step": 1517 }, { "epoch": 0.566206639313689, "grad_norm": 0.8359636068344116, "learning_rate": 2e-06, "loss": 0.2191, "step": 1518 }, { "epoch": 0.566579634464752, "grad_norm": 1.0927757024765015, "learning_rate": 2e-06, "loss": 0.197, "step": 1519 }, { "epoch": 0.5669526296158149, "grad_norm": 0.7511622905731201, "learning_rate": 2e-06, "loss": 0.2045, "step": 1520 }, { "epoch": 0.567325624766878, "grad_norm": 1.0128374099731445, "learning_rate": 2e-06, "loss": 0.183, "step": 1521 }, { "epoch": 0.567698619917941, "grad_norm": 1.03459632396698, "learning_rate": 2e-06, "loss": 0.1884, "step": 1522 }, { "epoch": 0.568071615069004, "grad_norm": 1.1607614755630493, "learning_rate": 2e-06, "loss": 0.1852, "step": 1523 }, { "epoch": 0.5684446102200671, "grad_norm": 0.9604873657226562, "learning_rate": 2e-06, "loss": 0.1904, "step": 1524 }, { "epoch": 0.5688176053711301, "grad_norm": 1.2132493257522583, "learning_rate": 2e-06, "loss": 0.1954, "step": 1525 }, { "epoch": 0.5691906005221932, "grad_norm": 1.0300458669662476, "learning_rate": 2e-06, "loss": 0.2158, "step": 1526 }, { "epoch": 0.5695635956732562, "grad_norm": 0.7362550497055054, "learning_rate": 2e-06, "loss": 0.169, "step": 1527 }, { "epoch": 0.5699365908243192, "grad_norm": 0.9546226263046265, "learning_rate": 2e-06, "loss": 0.2124, "step": 1528 }, { "epoch": 0.5703095859753823, "grad_norm": 0.888779878616333, "learning_rate": 2e-06, "loss": 0.1925, "step": 1529 }, { "epoch": 0.5706825811264453, "grad_norm": 0.9254459142684937, "learning_rate": 2e-06, "loss": 0.1995, "step": 1530 }, { "epoch": 0.5710555762775084, "grad_norm": 1.079147219657898, "learning_rate": 2e-06, "loss": 0.2193, "step": 1531 }, { "epoch": 0.5714285714285714, "grad_norm": 0.8361727595329285, "learning_rate": 2e-06, "loss": 0.2102, "step": 1532 }, { "epoch": 0.5718015665796344, "grad_norm": 0.888152003288269, "learning_rate": 2e-06, "loss": 0.202, "step": 1533 }, { "epoch": 0.5721745617306975, "grad_norm": 1.1898001432418823, "learning_rate": 2e-06, "loss": 0.2055, "step": 1534 }, { "epoch": 0.5725475568817605, "grad_norm": 1.0168462991714478, "learning_rate": 2e-06, "loss": 0.1962, "step": 1535 }, { "epoch": 0.5729205520328235, "grad_norm": 0.959384024143219, "learning_rate": 2e-06, "loss": 0.2057, "step": 1536 }, { "epoch": 0.5732935471838866, "grad_norm": 0.7897847890853882, "learning_rate": 2e-06, "loss": 0.1982, "step": 1537 }, { "epoch": 0.5736665423349496, "grad_norm": 1.098384141921997, "learning_rate": 2e-06, "loss": 0.182, "step": 1538 }, { "epoch": 0.5740395374860127, "grad_norm": 0.9576812982559204, "learning_rate": 2e-06, "loss": 0.2256, "step": 1539 }, { "epoch": 0.5744125326370757, "grad_norm": 0.9628286957740784, "learning_rate": 2e-06, "loss": 0.196, "step": 1540 }, { "epoch": 0.5747855277881387, "grad_norm": 0.9752312302589417, "learning_rate": 2e-06, "loss": 0.1973, "step": 1541 }, { "epoch": 0.5751585229392018, "grad_norm": 0.8177341222763062, "learning_rate": 2e-06, "loss": 0.1926, "step": 1542 }, { "epoch": 0.5755315180902648, "grad_norm": 0.7304345965385437, "learning_rate": 2e-06, "loss": 0.2062, "step": 1543 }, { "epoch": 0.5759045132413279, "grad_norm": 0.7894207239151001, "learning_rate": 2e-06, "loss": 0.1782, "step": 1544 }, { "epoch": 0.5762775083923909, "grad_norm": 0.6984177827835083, "learning_rate": 2e-06, "loss": 0.2099, "step": 1545 }, { "epoch": 0.5766505035434539, "grad_norm": 1.1561590433120728, "learning_rate": 2e-06, "loss": 0.2009, "step": 1546 }, { "epoch": 0.577023498694517, "grad_norm": 1.0591732263565063, "learning_rate": 2e-06, "loss": 0.2266, "step": 1547 }, { "epoch": 0.57739649384558, "grad_norm": 0.9659167528152466, "learning_rate": 2e-06, "loss": 0.2121, "step": 1548 }, { "epoch": 0.577769488996643, "grad_norm": 0.8190930485725403, "learning_rate": 2e-06, "loss": 0.2067, "step": 1549 }, { "epoch": 0.5781424841477061, "grad_norm": 0.7936007380485535, "learning_rate": 2e-06, "loss": 0.2195, "step": 1550 }, { "epoch": 0.5785154792987691, "grad_norm": 0.9459429383277893, "learning_rate": 2e-06, "loss": 0.218, "step": 1551 }, { "epoch": 0.5788884744498322, "grad_norm": 1.3359479904174805, "learning_rate": 2e-06, "loss": 0.2262, "step": 1552 }, { "epoch": 0.5792614696008952, "grad_norm": 0.7703587412834167, "learning_rate": 2e-06, "loss": 0.2349, "step": 1553 }, { "epoch": 0.5796344647519582, "grad_norm": 1.049426555633545, "learning_rate": 2e-06, "loss": 0.2037, "step": 1554 }, { "epoch": 0.5800074599030213, "grad_norm": 1.2910161018371582, "learning_rate": 2e-06, "loss": 0.1835, "step": 1555 }, { "epoch": 0.5803804550540843, "grad_norm": 0.8643163442611694, "learning_rate": 2e-06, "loss": 0.2076, "step": 1556 }, { "epoch": 0.5807534502051473, "grad_norm": 1.1509735584259033, "learning_rate": 2e-06, "loss": 0.1924, "step": 1557 }, { "epoch": 0.5811264453562104, "grad_norm": 0.7478874325752258, "learning_rate": 2e-06, "loss": 0.2049, "step": 1558 }, { "epoch": 0.5814994405072734, "grad_norm": 0.7487229108810425, "learning_rate": 2e-06, "loss": 0.1938, "step": 1559 }, { "epoch": 0.5818724356583365, "grad_norm": 0.7567011713981628, "learning_rate": 2e-06, "loss": 0.2215, "step": 1560 }, { "epoch": 0.5822454308093995, "grad_norm": 0.8239989280700684, "learning_rate": 2e-06, "loss": 0.2155, "step": 1561 }, { "epoch": 0.5826184259604625, "grad_norm": 0.9054506421089172, "learning_rate": 2e-06, "loss": 0.1994, "step": 1562 }, { "epoch": 0.5829914211115256, "grad_norm": 0.9732832312583923, "learning_rate": 2e-06, "loss": 0.2013, "step": 1563 }, { "epoch": 0.5833644162625886, "grad_norm": 0.8227533102035522, "learning_rate": 2e-06, "loss": 0.2037, "step": 1564 }, { "epoch": 0.5837374114136517, "grad_norm": 0.9743925929069519, "learning_rate": 2e-06, "loss": 0.2033, "step": 1565 }, { "epoch": 0.5841104065647147, "grad_norm": 0.9750741124153137, "learning_rate": 2e-06, "loss": 0.2169, "step": 1566 }, { "epoch": 0.5844834017157777, "grad_norm": 0.9188608527183533, "learning_rate": 2e-06, "loss": 0.2264, "step": 1567 }, { "epoch": 0.5848563968668408, "grad_norm": 0.7664763331413269, "learning_rate": 2e-06, "loss": 0.1959, "step": 1568 }, { "epoch": 0.5852293920179038, "grad_norm": 0.9523501396179199, "learning_rate": 2e-06, "loss": 0.186, "step": 1569 }, { "epoch": 0.5856023871689668, "grad_norm": 0.8691665530204773, "learning_rate": 2e-06, "loss": 0.2196, "step": 1570 }, { "epoch": 0.5859753823200299, "grad_norm": 1.0217360258102417, "learning_rate": 2e-06, "loss": 0.202, "step": 1571 }, { "epoch": 0.5863483774710929, "grad_norm": 1.3630115985870361, "learning_rate": 2e-06, "loss": 0.2132, "step": 1572 }, { "epoch": 0.586721372622156, "grad_norm": 0.9779193997383118, "learning_rate": 2e-06, "loss": 0.2091, "step": 1573 }, { "epoch": 0.587094367773219, "grad_norm": 0.9888725280761719, "learning_rate": 2e-06, "loss": 0.1835, "step": 1574 }, { "epoch": 0.587467362924282, "grad_norm": 0.9736975431442261, "learning_rate": 2e-06, "loss": 0.209, "step": 1575 }, { "epoch": 0.5878403580753451, "grad_norm": 0.7859480381011963, "learning_rate": 2e-06, "loss": 0.2033, "step": 1576 }, { "epoch": 0.5882133532264081, "grad_norm": 0.9102156758308411, "learning_rate": 2e-06, "loss": 0.2365, "step": 1577 }, { "epoch": 0.5885863483774711, "grad_norm": 0.9550003409385681, "learning_rate": 2e-06, "loss": 0.1785, "step": 1578 }, { "epoch": 0.5889593435285341, "grad_norm": 0.8985123634338379, "learning_rate": 2e-06, "loss": 0.2159, "step": 1579 }, { "epoch": 0.5893323386795971, "grad_norm": 0.9728977680206299, "learning_rate": 2e-06, "loss": 0.214, "step": 1580 }, { "epoch": 0.5897053338306601, "grad_norm": 0.9027715921401978, "learning_rate": 2e-06, "loss": 0.2032, "step": 1581 }, { "epoch": 0.5900783289817232, "grad_norm": 0.7975237369537354, "learning_rate": 2e-06, "loss": 0.1867, "step": 1582 }, { "epoch": 0.5904513241327862, "grad_norm": 0.839210569858551, "learning_rate": 2e-06, "loss": 0.2107, "step": 1583 }, { "epoch": 0.5908243192838493, "grad_norm": 0.8356136679649353, "learning_rate": 2e-06, "loss": 0.1942, "step": 1584 }, { "epoch": 0.5911973144349123, "grad_norm": 0.8310926556587219, "learning_rate": 2e-06, "loss": 0.1986, "step": 1585 }, { "epoch": 0.5915703095859753, "grad_norm": 0.9600639939308167, "learning_rate": 2e-06, "loss": 0.1936, "step": 1586 }, { "epoch": 0.5919433047370384, "grad_norm": 0.8111011385917664, "learning_rate": 2e-06, "loss": 0.2137, "step": 1587 }, { "epoch": 0.5923162998881014, "grad_norm": 1.1480382680892944, "learning_rate": 2e-06, "loss": 0.2142, "step": 1588 }, { "epoch": 0.5926892950391645, "grad_norm": 1.1302260160446167, "learning_rate": 2e-06, "loss": 0.1984, "step": 1589 }, { "epoch": 0.5930622901902275, "grad_norm": 0.8314052820205688, "learning_rate": 2e-06, "loss": 0.2187, "step": 1590 }, { "epoch": 0.5934352853412905, "grad_norm": 1.1475340127944946, "learning_rate": 2e-06, "loss": 0.2003, "step": 1591 }, { "epoch": 0.5938082804923536, "grad_norm": 1.1859869956970215, "learning_rate": 2e-06, "loss": 0.2093, "step": 1592 }, { "epoch": 0.5941812756434166, "grad_norm": 0.7394687533378601, "learning_rate": 2e-06, "loss": 0.2062, "step": 1593 }, { "epoch": 0.5945542707944796, "grad_norm": 0.6633815169334412, "learning_rate": 2e-06, "loss": 0.2043, "step": 1594 }, { "epoch": 0.5949272659455427, "grad_norm": 1.0799716711044312, "learning_rate": 2e-06, "loss": 0.2143, "step": 1595 }, { "epoch": 0.5953002610966057, "grad_norm": 0.9771822094917297, "learning_rate": 2e-06, "loss": 0.1976, "step": 1596 }, { "epoch": 0.5956732562476688, "grad_norm": 0.8734481334686279, "learning_rate": 2e-06, "loss": 0.2128, "step": 1597 }, { "epoch": 0.5960462513987318, "grad_norm": 0.9360693693161011, "learning_rate": 2e-06, "loss": 0.1833, "step": 1598 }, { "epoch": 0.5964192465497948, "grad_norm": 0.8876034021377563, "learning_rate": 2e-06, "loss": 0.2164, "step": 1599 }, { "epoch": 0.5967922417008579, "grad_norm": 0.9625729918479919, "learning_rate": 2e-06, "loss": 0.2029, "step": 1600 }, { "epoch": 0.5971652368519209, "grad_norm": 1.0239979028701782, "learning_rate": 2e-06, "loss": 0.1932, "step": 1601 }, { "epoch": 0.597538232002984, "grad_norm": 1.0448815822601318, "learning_rate": 2e-06, "loss": 0.2022, "step": 1602 }, { "epoch": 0.597911227154047, "grad_norm": 0.9427640438079834, "learning_rate": 2e-06, "loss": 0.1963, "step": 1603 }, { "epoch": 0.59828422230511, "grad_norm": 0.865763783454895, "learning_rate": 2e-06, "loss": 0.2144, "step": 1604 }, { "epoch": 0.5986572174561731, "grad_norm": 0.8063538670539856, "learning_rate": 2e-06, "loss": 0.2182, "step": 1605 }, { "epoch": 0.5990302126072361, "grad_norm": 1.0209932327270508, "learning_rate": 2e-06, "loss": 0.1868, "step": 1606 }, { "epoch": 0.5994032077582991, "grad_norm": 1.056165337562561, "learning_rate": 2e-06, "loss": 0.1935, "step": 1607 }, { "epoch": 0.5997762029093622, "grad_norm": 2.9496257305145264, "learning_rate": 2e-06, "loss": 0.2149, "step": 1608 }, { "epoch": 0.6001491980604252, "grad_norm": 0.8303114771842957, "learning_rate": 2e-06, "loss": 0.1905, "step": 1609 }, { "epoch": 0.6005221932114883, "grad_norm": 1.0668656826019287, "learning_rate": 2e-06, "loss": 0.2092, "step": 1610 }, { "epoch": 0.6008951883625513, "grad_norm": 1.066655158996582, "learning_rate": 2e-06, "loss": 0.1814, "step": 1611 }, { "epoch": 0.6012681835136143, "grad_norm": 0.8277404308319092, "learning_rate": 2e-06, "loss": 0.2188, "step": 1612 }, { "epoch": 0.6016411786646774, "grad_norm": 0.775254487991333, "learning_rate": 2e-06, "loss": 0.206, "step": 1613 }, { "epoch": 0.6020141738157404, "grad_norm": 1.2658766508102417, "learning_rate": 2e-06, "loss": 0.1998, "step": 1614 }, { "epoch": 0.6023871689668034, "grad_norm": 0.7783849239349365, "learning_rate": 2e-06, "loss": 0.2089, "step": 1615 }, { "epoch": 0.6027601641178665, "grad_norm": 1.006895661354065, "learning_rate": 2e-06, "loss": 0.1968, "step": 1616 }, { "epoch": 0.6031331592689295, "grad_norm": 0.7341719269752502, "learning_rate": 2e-06, "loss": 0.1839, "step": 1617 }, { "epoch": 0.6035061544199926, "grad_norm": 0.9929084181785583, "learning_rate": 2e-06, "loss": 0.221, "step": 1618 }, { "epoch": 0.6038791495710556, "grad_norm": 0.5986042618751526, "learning_rate": 2e-06, "loss": 0.1965, "step": 1619 }, { "epoch": 0.6042521447221186, "grad_norm": 0.8366122245788574, "learning_rate": 2e-06, "loss": 0.2113, "step": 1620 }, { "epoch": 0.6046251398731817, "grad_norm": 1.0069868564605713, "learning_rate": 2e-06, "loss": 0.1952, "step": 1621 }, { "epoch": 0.6049981350242447, "grad_norm": 0.9151413440704346, "learning_rate": 2e-06, "loss": 0.2158, "step": 1622 }, { "epoch": 0.6053711301753077, "grad_norm": 0.9213225841522217, "learning_rate": 2e-06, "loss": 0.1923, "step": 1623 }, { "epoch": 0.6057441253263708, "grad_norm": 1.0142539739608765, "learning_rate": 2e-06, "loss": 0.1909, "step": 1624 }, { "epoch": 0.6061171204774338, "grad_norm": 0.8016992807388306, "learning_rate": 2e-06, "loss": 0.2196, "step": 1625 }, { "epoch": 0.6064901156284969, "grad_norm": 0.9168184995651245, "learning_rate": 2e-06, "loss": 0.2099, "step": 1626 }, { "epoch": 0.6068631107795599, "grad_norm": 0.7589272856712341, "learning_rate": 2e-06, "loss": 0.2035, "step": 1627 }, { "epoch": 0.6072361059306229, "grad_norm": 1.1566041707992554, "learning_rate": 2e-06, "loss": 0.1962, "step": 1628 }, { "epoch": 0.607609101081686, "grad_norm": 0.9600558876991272, "learning_rate": 2e-06, "loss": 0.1914, "step": 1629 }, { "epoch": 0.607982096232749, "grad_norm": 1.1662269830703735, "learning_rate": 2e-06, "loss": 0.1821, "step": 1630 }, { "epoch": 0.608355091383812, "grad_norm": 0.9543625116348267, "learning_rate": 2e-06, "loss": 0.2115, "step": 1631 }, { "epoch": 0.6087280865348751, "grad_norm": 1.0721625089645386, "learning_rate": 2e-06, "loss": 0.2086, "step": 1632 }, { "epoch": 0.6091010816859381, "grad_norm": 1.0890843868255615, "learning_rate": 2e-06, "loss": 0.1931, "step": 1633 }, { "epoch": 0.6094740768370012, "grad_norm": 1.0563162565231323, "learning_rate": 2e-06, "loss": 0.1903, "step": 1634 }, { "epoch": 0.6098470719880642, "grad_norm": 0.8523598313331604, "learning_rate": 2e-06, "loss": 0.2141, "step": 1635 }, { "epoch": 0.6102200671391272, "grad_norm": 0.6872006058692932, "learning_rate": 2e-06, "loss": 0.1843, "step": 1636 }, { "epoch": 0.6105930622901903, "grad_norm": 1.0608246326446533, "learning_rate": 2e-06, "loss": 0.1904, "step": 1637 }, { "epoch": 0.6109660574412533, "grad_norm": 1.0197244882583618, "learning_rate": 2e-06, "loss": 0.2075, "step": 1638 }, { "epoch": 0.6113390525923162, "grad_norm": 0.786795437335968, "learning_rate": 2e-06, "loss": 0.2053, "step": 1639 }, { "epoch": 0.6117120477433793, "grad_norm": 1.0248416662216187, "learning_rate": 2e-06, "loss": 0.2194, "step": 1640 }, { "epoch": 0.6120850428944423, "grad_norm": 0.7955386638641357, "learning_rate": 2e-06, "loss": 0.2152, "step": 1641 }, { "epoch": 0.6124580380455054, "grad_norm": 0.959690272808075, "learning_rate": 2e-06, "loss": 0.1952, "step": 1642 }, { "epoch": 0.6128310331965684, "grad_norm": 0.8183354735374451, "learning_rate": 2e-06, "loss": 0.2052, "step": 1643 }, { "epoch": 0.6132040283476314, "grad_norm": 1.1140373945236206, "learning_rate": 2e-06, "loss": 0.2158, "step": 1644 }, { "epoch": 0.6135770234986945, "grad_norm": 1.1291979551315308, "learning_rate": 2e-06, "loss": 0.1827, "step": 1645 }, { "epoch": 0.6139500186497575, "grad_norm": 0.8924630284309387, "learning_rate": 2e-06, "loss": 0.2347, "step": 1646 }, { "epoch": 0.6143230138008205, "grad_norm": 0.9780297875404358, "learning_rate": 2e-06, "loss": 0.1918, "step": 1647 }, { "epoch": 0.6146960089518836, "grad_norm": 0.9473655223846436, "learning_rate": 2e-06, "loss": 0.2057, "step": 1648 }, { "epoch": 0.6150690041029466, "grad_norm": 0.9750227332115173, "learning_rate": 2e-06, "loss": 0.1957, "step": 1649 }, { "epoch": 0.6154419992540097, "grad_norm": 0.9443356990814209, "learning_rate": 2e-06, "loss": 0.2059, "step": 1650 }, { "epoch": 0.6158149944050727, "grad_norm": 0.7919995784759521, "learning_rate": 2e-06, "loss": 0.2032, "step": 1651 }, { "epoch": 0.6161879895561357, "grad_norm": 1.0571527481079102, "learning_rate": 2e-06, "loss": 0.2127, "step": 1652 }, { "epoch": 0.6165609847071988, "grad_norm": 0.7958033680915833, "learning_rate": 2e-06, "loss": 0.2217, "step": 1653 }, { "epoch": 0.6169339798582618, "grad_norm": 0.9226006865501404, "learning_rate": 2e-06, "loss": 0.1912, "step": 1654 }, { "epoch": 0.6173069750093249, "grad_norm": 0.8423042893409729, "learning_rate": 2e-06, "loss": 0.1877, "step": 1655 }, { "epoch": 0.6176799701603879, "grad_norm": 1.0896841287612915, "learning_rate": 2e-06, "loss": 0.1649, "step": 1656 }, { "epoch": 0.6180529653114509, "grad_norm": 1.145064353942871, "learning_rate": 2e-06, "loss": 0.1878, "step": 1657 }, { "epoch": 0.618425960462514, "grad_norm": 0.9663406014442444, "learning_rate": 2e-06, "loss": 0.2002, "step": 1658 }, { "epoch": 0.618798955613577, "grad_norm": 1.1999212503433228, "learning_rate": 2e-06, "loss": 0.1877, "step": 1659 }, { "epoch": 0.61917195076464, "grad_norm": 1.0400522947311401, "learning_rate": 2e-06, "loss": 0.1905, "step": 1660 }, { "epoch": 0.6195449459157031, "grad_norm": 0.8474313616752625, "learning_rate": 2e-06, "loss": 0.2478, "step": 1661 }, { "epoch": 0.6199179410667661, "grad_norm": 0.8460491895675659, "learning_rate": 2e-06, "loss": 0.1794, "step": 1662 }, { "epoch": 0.6202909362178292, "grad_norm": 0.797245442867279, "learning_rate": 2e-06, "loss": 0.2236, "step": 1663 }, { "epoch": 0.6206639313688922, "grad_norm": 0.9216701984405518, "learning_rate": 2e-06, "loss": 0.1858, "step": 1664 }, { "epoch": 0.6210369265199552, "grad_norm": 0.774472177028656, "learning_rate": 2e-06, "loss": 0.209, "step": 1665 }, { "epoch": 0.6214099216710183, "grad_norm": 0.9496878981590271, "learning_rate": 2e-06, "loss": 0.2232, "step": 1666 }, { "epoch": 0.6217829168220813, "grad_norm": 0.8415672779083252, "learning_rate": 2e-06, "loss": 0.2343, "step": 1667 }, { "epoch": 0.6221559119731443, "grad_norm": 0.9622007608413696, "learning_rate": 2e-06, "loss": 0.1928, "step": 1668 }, { "epoch": 0.6225289071242074, "grad_norm": 0.9321330189704895, "learning_rate": 2e-06, "loss": 0.1889, "step": 1669 }, { "epoch": 0.6229019022752704, "grad_norm": 1.2715901136398315, "learning_rate": 2e-06, "loss": 0.2127, "step": 1670 }, { "epoch": 0.6232748974263335, "grad_norm": 0.9194140434265137, "learning_rate": 2e-06, "loss": 0.199, "step": 1671 }, { "epoch": 0.6236478925773965, "grad_norm": 1.1328184604644775, "learning_rate": 2e-06, "loss": 0.1953, "step": 1672 }, { "epoch": 0.6240208877284595, "grad_norm": 0.9996297955513, "learning_rate": 2e-06, "loss": 0.1905, "step": 1673 }, { "epoch": 0.6243938828795226, "grad_norm": 1.1814196109771729, "learning_rate": 2e-06, "loss": 0.1949, "step": 1674 }, { "epoch": 0.6247668780305856, "grad_norm": 0.8320417404174805, "learning_rate": 2e-06, "loss": 0.215, "step": 1675 }, { "epoch": 0.6251398731816487, "grad_norm": 0.8582906723022461, "learning_rate": 2e-06, "loss": 0.2056, "step": 1676 }, { "epoch": 0.6255128683327117, "grad_norm": 0.9747276306152344, "learning_rate": 2e-06, "loss": 0.2235, "step": 1677 }, { "epoch": 0.6258858634837747, "grad_norm": 1.1094889640808105, "learning_rate": 2e-06, "loss": 0.1899, "step": 1678 }, { "epoch": 0.6262588586348378, "grad_norm": 1.0499438047409058, "learning_rate": 2e-06, "loss": 0.2208, "step": 1679 }, { "epoch": 0.6266318537859008, "grad_norm": 1.2735142707824707, "learning_rate": 2e-06, "loss": 0.2024, "step": 1680 }, { "epoch": 0.6270048489369638, "grad_norm": 0.7293907999992371, "learning_rate": 2e-06, "loss": 0.2357, "step": 1681 }, { "epoch": 0.6273778440880269, "grad_norm": 1.2194629907608032, "learning_rate": 2e-06, "loss": 0.2004, "step": 1682 }, { "epoch": 0.6277508392390899, "grad_norm": 1.0836209058761597, "learning_rate": 2e-06, "loss": 0.1943, "step": 1683 }, { "epoch": 0.628123834390153, "grad_norm": 0.8070651888847351, "learning_rate": 2e-06, "loss": 0.2473, "step": 1684 }, { "epoch": 0.628496829541216, "grad_norm": 0.8089210987091064, "learning_rate": 2e-06, "loss": 0.1816, "step": 1685 }, { "epoch": 0.628869824692279, "grad_norm": 0.9801793098449707, "learning_rate": 2e-06, "loss": 0.1881, "step": 1686 }, { "epoch": 0.6292428198433421, "grad_norm": 0.7923787832260132, "learning_rate": 2e-06, "loss": 0.1943, "step": 1687 }, { "epoch": 0.6296158149944051, "grad_norm": 0.960749626159668, "learning_rate": 2e-06, "loss": 0.1927, "step": 1688 }, { "epoch": 0.6299888101454681, "grad_norm": 1.046474575996399, "learning_rate": 2e-06, "loss": 0.2294, "step": 1689 }, { "epoch": 0.6303618052965312, "grad_norm": 1.0034699440002441, "learning_rate": 2e-06, "loss": 0.1969, "step": 1690 }, { "epoch": 0.6307348004475942, "grad_norm": 0.9509958624839783, "learning_rate": 2e-06, "loss": 0.1898, "step": 1691 }, { "epoch": 0.6311077955986573, "grad_norm": 0.8019482493400574, "learning_rate": 2e-06, "loss": 0.2023, "step": 1692 }, { "epoch": 0.6314807907497203, "grad_norm": 0.786465048789978, "learning_rate": 2e-06, "loss": 0.2099, "step": 1693 }, { "epoch": 0.6318537859007833, "grad_norm": 0.8259560465812683, "learning_rate": 2e-06, "loss": 0.178, "step": 1694 }, { "epoch": 0.6322267810518464, "grad_norm": 0.7983558177947998, "learning_rate": 2e-06, "loss": 0.2174, "step": 1695 }, { "epoch": 0.6325997762029094, "grad_norm": 1.203829288482666, "learning_rate": 2e-06, "loss": 0.1804, "step": 1696 }, { "epoch": 0.6329727713539725, "grad_norm": 0.9253819584846497, "learning_rate": 2e-06, "loss": 0.2066, "step": 1697 }, { "epoch": 0.6333457665050354, "grad_norm": 1.0895178318023682, "learning_rate": 2e-06, "loss": 0.2096, "step": 1698 }, { "epoch": 0.6337187616560984, "grad_norm": 0.8520975112915039, "learning_rate": 2e-06, "loss": 0.2162, "step": 1699 }, { "epoch": 0.6340917568071615, "grad_norm": 0.7477272152900696, "learning_rate": 2e-06, "loss": 0.2233, "step": 1700 }, { "epoch": 0.6344647519582245, "grad_norm": 1.0048943758010864, "learning_rate": 2e-06, "loss": 0.1914, "step": 1701 }, { "epoch": 0.6348377471092875, "grad_norm": 0.9552952647209167, "learning_rate": 2e-06, "loss": 0.2159, "step": 1702 }, { "epoch": 0.6352107422603506, "grad_norm": 0.75681072473526, "learning_rate": 2e-06, "loss": 0.2058, "step": 1703 }, { "epoch": 0.6355837374114136, "grad_norm": 0.8196337223052979, "learning_rate": 2e-06, "loss": 0.2206, "step": 1704 }, { "epoch": 0.6359567325624766, "grad_norm": 0.9447998404502869, "learning_rate": 2e-06, "loss": 0.1912, "step": 1705 }, { "epoch": 0.6363297277135397, "grad_norm": 0.8967987895011902, "learning_rate": 2e-06, "loss": 0.1966, "step": 1706 }, { "epoch": 0.6367027228646027, "grad_norm": 0.7784256339073181, "learning_rate": 2e-06, "loss": 0.2153, "step": 1707 }, { "epoch": 0.6370757180156658, "grad_norm": 0.8703823685646057, "learning_rate": 2e-06, "loss": 0.2071, "step": 1708 }, { "epoch": 0.6374487131667288, "grad_norm": 0.990860104560852, "learning_rate": 2e-06, "loss": 0.1951, "step": 1709 }, { "epoch": 0.6378217083177918, "grad_norm": 0.8397653698921204, "learning_rate": 2e-06, "loss": 0.2123, "step": 1710 }, { "epoch": 0.6381947034688549, "grad_norm": 1.1628122329711914, "learning_rate": 2e-06, "loss": 0.1892, "step": 1711 }, { "epoch": 0.6385676986199179, "grad_norm": 0.9792414307594299, "learning_rate": 2e-06, "loss": 0.2108, "step": 1712 }, { "epoch": 0.638940693770981, "grad_norm": 0.9970802664756775, "learning_rate": 2e-06, "loss": 0.2052, "step": 1713 }, { "epoch": 0.639313688922044, "grad_norm": 1.131909728050232, "learning_rate": 2e-06, "loss": 0.2169, "step": 1714 }, { "epoch": 0.639686684073107, "grad_norm": 0.7568588256835938, "learning_rate": 2e-06, "loss": 0.2047, "step": 1715 }, { "epoch": 0.6400596792241701, "grad_norm": 0.830500066280365, "learning_rate": 2e-06, "loss": 0.2109, "step": 1716 }, { "epoch": 0.6404326743752331, "grad_norm": 0.8149598240852356, "learning_rate": 2e-06, "loss": 0.2307, "step": 1717 }, { "epoch": 0.6408056695262961, "grad_norm": 0.8593707084655762, "learning_rate": 2e-06, "loss": 0.2062, "step": 1718 }, { "epoch": 0.6411786646773592, "grad_norm": 0.8812693953514099, "learning_rate": 2e-06, "loss": 0.1963, "step": 1719 }, { "epoch": 0.6415516598284222, "grad_norm": 0.8509761691093445, "learning_rate": 2e-06, "loss": 0.2018, "step": 1720 }, { "epoch": 0.6419246549794853, "grad_norm": 0.6583268642425537, "learning_rate": 2e-06, "loss": 0.2143, "step": 1721 }, { "epoch": 0.6422976501305483, "grad_norm": 0.9287341833114624, "learning_rate": 2e-06, "loss": 0.189, "step": 1722 }, { "epoch": 0.6426706452816113, "grad_norm": 0.7311787009239197, "learning_rate": 2e-06, "loss": 0.221, "step": 1723 }, { "epoch": 0.6430436404326744, "grad_norm": 0.7565869688987732, "learning_rate": 2e-06, "loss": 0.2022, "step": 1724 }, { "epoch": 0.6434166355837374, "grad_norm": 0.8718202114105225, "learning_rate": 2e-06, "loss": 0.1808, "step": 1725 }, { "epoch": 0.6437896307348004, "grad_norm": 0.934622049331665, "learning_rate": 2e-06, "loss": 0.1832, "step": 1726 }, { "epoch": 0.6441626258858635, "grad_norm": 0.8025328516960144, "learning_rate": 2e-06, "loss": 0.2225, "step": 1727 }, { "epoch": 0.6445356210369265, "grad_norm": 0.9896131753921509, "learning_rate": 2e-06, "loss": 0.209, "step": 1728 }, { "epoch": 0.6449086161879896, "grad_norm": 1.1427011489868164, "learning_rate": 2e-06, "loss": 0.2154, "step": 1729 }, { "epoch": 0.6452816113390526, "grad_norm": 0.8452101349830627, "learning_rate": 2e-06, "loss": 0.1835, "step": 1730 }, { "epoch": 0.6456546064901156, "grad_norm": 0.8689064383506775, "learning_rate": 2e-06, "loss": 0.2172, "step": 1731 }, { "epoch": 0.6460276016411787, "grad_norm": 0.8214976191520691, "learning_rate": 2e-06, "loss": 0.1986, "step": 1732 }, { "epoch": 0.6464005967922417, "grad_norm": 0.8646319508552551, "learning_rate": 2e-06, "loss": 0.2206, "step": 1733 }, { "epoch": 0.6467735919433047, "grad_norm": 1.065658688545227, "learning_rate": 2e-06, "loss": 0.2069, "step": 1734 }, { "epoch": 0.6471465870943678, "grad_norm": 1.0043648481369019, "learning_rate": 2e-06, "loss": 0.2032, "step": 1735 }, { "epoch": 0.6475195822454308, "grad_norm": 0.9269819855690002, "learning_rate": 2e-06, "loss": 0.1936, "step": 1736 }, { "epoch": 0.6478925773964939, "grad_norm": 1.0294123888015747, "learning_rate": 2e-06, "loss": 0.1996, "step": 1737 }, { "epoch": 0.6482655725475569, "grad_norm": 0.8904076814651489, "learning_rate": 2e-06, "loss": 0.1922, "step": 1738 }, { "epoch": 0.6486385676986199, "grad_norm": 0.9491968750953674, "learning_rate": 2e-06, "loss": 0.2153, "step": 1739 }, { "epoch": 0.649011562849683, "grad_norm": 0.9319261312484741, "learning_rate": 2e-06, "loss": 0.2268, "step": 1740 }, { "epoch": 0.649384558000746, "grad_norm": 0.9071356654167175, "learning_rate": 2e-06, "loss": 0.2113, "step": 1741 }, { "epoch": 0.649757553151809, "grad_norm": 0.8506804704666138, "learning_rate": 2e-06, "loss": 0.2078, "step": 1742 }, { "epoch": 0.6501305483028721, "grad_norm": 0.7697766423225403, "learning_rate": 2e-06, "loss": 0.2403, "step": 1743 }, { "epoch": 0.6505035434539351, "grad_norm": 1.0240317583084106, "learning_rate": 2e-06, "loss": 0.1935, "step": 1744 }, { "epoch": 0.6508765386049982, "grad_norm": 1.0145916938781738, "learning_rate": 2e-06, "loss": 0.2016, "step": 1745 }, { "epoch": 0.6512495337560612, "grad_norm": 0.795730710029602, "learning_rate": 2e-06, "loss": 0.2074, "step": 1746 }, { "epoch": 0.6516225289071242, "grad_norm": 0.9751318097114563, "learning_rate": 2e-06, "loss": 0.2331, "step": 1747 }, { "epoch": 0.6519955240581873, "grad_norm": 0.9543254375457764, "learning_rate": 2e-06, "loss": 0.2181, "step": 1748 }, { "epoch": 0.6523685192092503, "grad_norm": 1.0666232109069824, "learning_rate": 2e-06, "loss": 0.2022, "step": 1749 }, { "epoch": 0.6527415143603134, "grad_norm": 0.9751657247543335, "learning_rate": 2e-06, "loss": 0.2145, "step": 1750 }, { "epoch": 0.6531145095113764, "grad_norm": 1.0859922170639038, "learning_rate": 2e-06, "loss": 0.1968, "step": 1751 }, { "epoch": 0.6534875046624394, "grad_norm": 0.9498375058174133, "learning_rate": 2e-06, "loss": 0.199, "step": 1752 }, { "epoch": 0.6538604998135025, "grad_norm": 0.9398866891860962, "learning_rate": 2e-06, "loss": 0.1826, "step": 1753 }, { "epoch": 0.6542334949645655, "grad_norm": 0.9192987680435181, "learning_rate": 2e-06, "loss": 0.217, "step": 1754 }, { "epoch": 0.6546064901156285, "grad_norm": 0.8296903371810913, "learning_rate": 2e-06, "loss": 0.2134, "step": 1755 }, { "epoch": 0.6549794852666916, "grad_norm": 1.1745383739471436, "learning_rate": 2e-06, "loss": 0.216, "step": 1756 }, { "epoch": 0.6553524804177546, "grad_norm": 0.9704560041427612, "learning_rate": 2e-06, "loss": 0.2141, "step": 1757 }, { "epoch": 0.6557254755688176, "grad_norm": 0.7887241840362549, "learning_rate": 2e-06, "loss": 0.2405, "step": 1758 }, { "epoch": 0.6560984707198806, "grad_norm": 0.8076916337013245, "learning_rate": 2e-06, "loss": 0.203, "step": 1759 }, { "epoch": 0.6564714658709436, "grad_norm": 0.8219658732414246, "learning_rate": 2e-06, "loss": 0.1882, "step": 1760 }, { "epoch": 0.6568444610220067, "grad_norm": 0.8406803011894226, "learning_rate": 2e-06, "loss": 0.1963, "step": 1761 }, { "epoch": 0.6572174561730697, "grad_norm": 0.8490579724311829, "learning_rate": 2e-06, "loss": 0.1897, "step": 1762 }, { "epoch": 0.6575904513241327, "grad_norm": 1.1283594369888306, "learning_rate": 2e-06, "loss": 0.1977, "step": 1763 }, { "epoch": 0.6579634464751958, "grad_norm": 0.8579171299934387, "learning_rate": 2e-06, "loss": 0.1942, "step": 1764 }, { "epoch": 0.6583364416262588, "grad_norm": 0.9736613035202026, "learning_rate": 2e-06, "loss": 0.2114, "step": 1765 }, { "epoch": 0.6587094367773219, "grad_norm": 0.8695799708366394, "learning_rate": 2e-06, "loss": 0.196, "step": 1766 }, { "epoch": 0.6590824319283849, "grad_norm": 0.7891690135002136, "learning_rate": 2e-06, "loss": 0.2244, "step": 1767 }, { "epoch": 0.6594554270794479, "grad_norm": 1.2322629690170288, "learning_rate": 2e-06, "loss": 0.1899, "step": 1768 }, { "epoch": 0.659828422230511, "grad_norm": 0.6937333941459656, "learning_rate": 2e-06, "loss": 0.2065, "step": 1769 }, { "epoch": 0.660201417381574, "grad_norm": 0.8966611623764038, "learning_rate": 2e-06, "loss": 0.1858, "step": 1770 }, { "epoch": 0.660574412532637, "grad_norm": 0.8305867910385132, "learning_rate": 2e-06, "loss": 0.208, "step": 1771 }, { "epoch": 0.6609474076837001, "grad_norm": 0.9457594156265259, "learning_rate": 2e-06, "loss": 0.2145, "step": 1772 }, { "epoch": 0.6613204028347631, "grad_norm": 1.0411853790283203, "learning_rate": 2e-06, "loss": 0.1919, "step": 1773 }, { "epoch": 0.6616933979858262, "grad_norm": 1.0725951194763184, "learning_rate": 2e-06, "loss": 0.1975, "step": 1774 }, { "epoch": 0.6620663931368892, "grad_norm": 0.9747177362442017, "learning_rate": 2e-06, "loss": 0.2057, "step": 1775 }, { "epoch": 0.6624393882879522, "grad_norm": 1.1264305114746094, "learning_rate": 2e-06, "loss": 0.1992, "step": 1776 }, { "epoch": 0.6628123834390153, "grad_norm": 0.8622643947601318, "learning_rate": 2e-06, "loss": 0.1764, "step": 1777 }, { "epoch": 0.6631853785900783, "grad_norm": 0.9533774256706238, "learning_rate": 2e-06, "loss": 0.1991, "step": 1778 }, { "epoch": 0.6635583737411413, "grad_norm": 1.313708782196045, "learning_rate": 2e-06, "loss": 0.1969, "step": 1779 }, { "epoch": 0.6639313688922044, "grad_norm": 1.0776163339614868, "learning_rate": 2e-06, "loss": 0.2113, "step": 1780 }, { "epoch": 0.6643043640432674, "grad_norm": 1.0502008199691772, "learning_rate": 2e-06, "loss": 0.1879, "step": 1781 }, { "epoch": 0.6646773591943305, "grad_norm": 0.9002297520637512, "learning_rate": 2e-06, "loss": 0.2216, "step": 1782 }, { "epoch": 0.6650503543453935, "grad_norm": 0.8969772458076477, "learning_rate": 2e-06, "loss": 0.1976, "step": 1783 }, { "epoch": 0.6654233494964565, "grad_norm": 1.235211968421936, "learning_rate": 2e-06, "loss": 0.1856, "step": 1784 }, { "epoch": 0.6657963446475196, "grad_norm": 1.0691871643066406, "learning_rate": 2e-06, "loss": 0.1936, "step": 1785 }, { "epoch": 0.6661693397985826, "grad_norm": 1.2185349464416504, "learning_rate": 2e-06, "loss": 0.2063, "step": 1786 }, { "epoch": 0.6665423349496457, "grad_norm": 1.0423874855041504, "learning_rate": 2e-06, "loss": 0.1845, "step": 1787 }, { "epoch": 0.6669153301007087, "grad_norm": 0.8261691927909851, "learning_rate": 2e-06, "loss": 0.2122, "step": 1788 }, { "epoch": 0.6672883252517717, "grad_norm": 1.1221448183059692, "learning_rate": 2e-06, "loss": 0.199, "step": 1789 }, { "epoch": 0.6676613204028348, "grad_norm": 0.7531624436378479, "learning_rate": 2e-06, "loss": 0.2322, "step": 1790 }, { "epoch": 0.6680343155538978, "grad_norm": 0.9606071710586548, "learning_rate": 2e-06, "loss": 0.2302, "step": 1791 }, { "epoch": 0.6684073107049608, "grad_norm": 0.9980217218399048, "learning_rate": 2e-06, "loss": 0.2125, "step": 1792 }, { "epoch": 0.6687803058560239, "grad_norm": 1.2167829275131226, "learning_rate": 2e-06, "loss": 0.2125, "step": 1793 }, { "epoch": 0.6691533010070869, "grad_norm": 0.8332329988479614, "learning_rate": 2e-06, "loss": 0.2194, "step": 1794 }, { "epoch": 0.66952629615815, "grad_norm": 0.8558403253555298, "learning_rate": 2e-06, "loss": 0.1975, "step": 1795 }, { "epoch": 0.669899291309213, "grad_norm": 0.9729085564613342, "learning_rate": 2e-06, "loss": 0.219, "step": 1796 }, { "epoch": 0.670272286460276, "grad_norm": 0.7875694036483765, "learning_rate": 2e-06, "loss": 0.2166, "step": 1797 }, { "epoch": 0.6706452816113391, "grad_norm": 1.0174416303634644, "learning_rate": 2e-06, "loss": 0.2095, "step": 1798 }, { "epoch": 0.6710182767624021, "grad_norm": 1.0792499780654907, "learning_rate": 2e-06, "loss": 0.1737, "step": 1799 }, { "epoch": 0.6713912719134651, "grad_norm": 1.0636968612670898, "learning_rate": 2e-06, "loss": 0.1821, "step": 1800 }, { "epoch": 0.6717642670645282, "grad_norm": 0.8770163059234619, "learning_rate": 2e-06, "loss": 0.2038, "step": 1801 }, { "epoch": 0.6721372622155912, "grad_norm": 0.8711845278739929, "learning_rate": 2e-06, "loss": 0.2083, "step": 1802 }, { "epoch": 0.6725102573666543, "grad_norm": 0.8189000487327576, "learning_rate": 2e-06, "loss": 0.2168, "step": 1803 }, { "epoch": 0.6728832525177173, "grad_norm": 0.7656701803207397, "learning_rate": 2e-06, "loss": 0.1945, "step": 1804 }, { "epoch": 0.6732562476687803, "grad_norm": 1.0872633457183838, "learning_rate": 2e-06, "loss": 0.189, "step": 1805 }, { "epoch": 0.6736292428198434, "grad_norm": 1.0174263715744019, "learning_rate": 2e-06, "loss": 0.1951, "step": 1806 }, { "epoch": 0.6740022379709064, "grad_norm": 0.985224187374115, "learning_rate": 2e-06, "loss": 0.1865, "step": 1807 }, { "epoch": 0.6743752331219695, "grad_norm": 1.600449800491333, "learning_rate": 2e-06, "loss": 0.2003, "step": 1808 }, { "epoch": 0.6747482282730325, "grad_norm": 0.8234907984733582, "learning_rate": 2e-06, "loss": 0.2288, "step": 1809 }, { "epoch": 0.6751212234240955, "grad_norm": 1.160229206085205, "learning_rate": 2e-06, "loss": 0.214, "step": 1810 }, { "epoch": 0.6754942185751586, "grad_norm": 0.8127809762954712, "learning_rate": 2e-06, "loss": 0.2225, "step": 1811 }, { "epoch": 0.6758672137262216, "grad_norm": 0.9934259653091431, "learning_rate": 2e-06, "loss": 0.2127, "step": 1812 }, { "epoch": 0.6762402088772846, "grad_norm": 0.7721952795982361, "learning_rate": 2e-06, "loss": 0.2123, "step": 1813 }, { "epoch": 0.6766132040283477, "grad_norm": 0.759935200214386, "learning_rate": 2e-06, "loss": 0.2351, "step": 1814 }, { "epoch": 0.6769861991794107, "grad_norm": 0.8567644953727722, "learning_rate": 2e-06, "loss": 0.2028, "step": 1815 }, { "epoch": 0.6773591943304738, "grad_norm": 1.299582839012146, "learning_rate": 2e-06, "loss": 0.2061, "step": 1816 }, { "epoch": 0.6777321894815368, "grad_norm": 0.7223970293998718, "learning_rate": 2e-06, "loss": 0.2214, "step": 1817 }, { "epoch": 0.6781051846325997, "grad_norm": 0.9386372566223145, "learning_rate": 2e-06, "loss": 0.2034, "step": 1818 }, { "epoch": 0.6784781797836628, "grad_norm": 0.8271152973175049, "learning_rate": 2e-06, "loss": 0.2307, "step": 1819 }, { "epoch": 0.6788511749347258, "grad_norm": 0.782386839389801, "learning_rate": 2e-06, "loss": 0.2106, "step": 1820 }, { "epoch": 0.6792241700857888, "grad_norm": 0.9473801255226135, "learning_rate": 2e-06, "loss": 0.1921, "step": 1821 }, { "epoch": 0.6795971652368519, "grad_norm": 0.9778918623924255, "learning_rate": 2e-06, "loss": 0.1848, "step": 1822 }, { "epoch": 0.6799701603879149, "grad_norm": 0.8836502432823181, "learning_rate": 2e-06, "loss": 0.2097, "step": 1823 }, { "epoch": 0.680343155538978, "grad_norm": 0.9873999953269958, "learning_rate": 2e-06, "loss": 0.2016, "step": 1824 }, { "epoch": 0.680716150690041, "grad_norm": 0.956471860408783, "learning_rate": 2e-06, "loss": 0.2217, "step": 1825 }, { "epoch": 0.681089145841104, "grad_norm": 0.8802580833435059, "learning_rate": 2e-06, "loss": 0.2149, "step": 1826 }, { "epoch": 0.6814621409921671, "grad_norm": 1.0153300762176514, "learning_rate": 2e-06, "loss": 0.1957, "step": 1827 }, { "epoch": 0.6818351361432301, "grad_norm": 0.9347707629203796, "learning_rate": 2e-06, "loss": 0.2098, "step": 1828 }, { "epoch": 0.6822081312942931, "grad_norm": 0.7177311778068542, "learning_rate": 2e-06, "loss": 0.2075, "step": 1829 }, { "epoch": 0.6825811264453562, "grad_norm": 0.7667344808578491, "learning_rate": 2e-06, "loss": 0.2025, "step": 1830 }, { "epoch": 0.6829541215964192, "grad_norm": 0.9389854669570923, "learning_rate": 2e-06, "loss": 0.1948, "step": 1831 }, { "epoch": 0.6833271167474823, "grad_norm": 0.9871043562889099, "learning_rate": 2e-06, "loss": 0.1882, "step": 1832 }, { "epoch": 0.6837001118985453, "grad_norm": 0.765370786190033, "learning_rate": 2e-06, "loss": 0.2196, "step": 1833 }, { "epoch": 0.6840731070496083, "grad_norm": 0.7440075278282166, "learning_rate": 2e-06, "loss": 0.2058, "step": 1834 }, { "epoch": 0.6844461022006714, "grad_norm": 0.8874169588088989, "learning_rate": 2e-06, "loss": 0.2171, "step": 1835 }, { "epoch": 0.6848190973517344, "grad_norm": 1.0255513191223145, "learning_rate": 2e-06, "loss": 0.2016, "step": 1836 }, { "epoch": 0.6851920925027974, "grad_norm": 0.8972360491752625, "learning_rate": 2e-06, "loss": 0.2284, "step": 1837 }, { "epoch": 0.6855650876538605, "grad_norm": 0.9080281853675842, "learning_rate": 2e-06, "loss": 0.2142, "step": 1838 }, { "epoch": 0.6859380828049235, "grad_norm": 0.8789563775062561, "learning_rate": 2e-06, "loss": 0.2083, "step": 1839 }, { "epoch": 0.6863110779559866, "grad_norm": 1.0147823095321655, "learning_rate": 2e-06, "loss": 0.1981, "step": 1840 }, { "epoch": 0.6866840731070496, "grad_norm": 1.016061544418335, "learning_rate": 2e-06, "loss": 0.2076, "step": 1841 }, { "epoch": 0.6870570682581126, "grad_norm": 0.8814531564712524, "learning_rate": 2e-06, "loss": 0.1902, "step": 1842 }, { "epoch": 0.6874300634091757, "grad_norm": 0.798760175704956, "learning_rate": 2e-06, "loss": 0.1931, "step": 1843 }, { "epoch": 0.6878030585602387, "grad_norm": 0.9233955144882202, "learning_rate": 2e-06, "loss": 0.2277, "step": 1844 }, { "epoch": 0.6881760537113017, "grad_norm": 0.8491867184638977, "learning_rate": 2e-06, "loss": 0.2078, "step": 1845 }, { "epoch": 0.6885490488623648, "grad_norm": 1.0325279235839844, "learning_rate": 2e-06, "loss": 0.1824, "step": 1846 }, { "epoch": 0.6889220440134278, "grad_norm": 0.8555834293365479, "learning_rate": 2e-06, "loss": 0.2012, "step": 1847 }, { "epoch": 0.6892950391644909, "grad_norm": 0.7743140459060669, "learning_rate": 2e-06, "loss": 0.1885, "step": 1848 }, { "epoch": 0.6896680343155539, "grad_norm": 0.9899098873138428, "learning_rate": 2e-06, "loss": 0.1861, "step": 1849 }, { "epoch": 0.6900410294666169, "grad_norm": 1.0367153882980347, "learning_rate": 2e-06, "loss": 0.196, "step": 1850 }, { "epoch": 0.69041402461768, "grad_norm": 0.780989408493042, "learning_rate": 2e-06, "loss": 0.2065, "step": 1851 }, { "epoch": 0.690787019768743, "grad_norm": 1.0127615928649902, "learning_rate": 2e-06, "loss": 0.1987, "step": 1852 }, { "epoch": 0.691160014919806, "grad_norm": 1.1428464651107788, "learning_rate": 2e-06, "loss": 0.2108, "step": 1853 }, { "epoch": 0.6915330100708691, "grad_norm": 0.7984495759010315, "learning_rate": 2e-06, "loss": 0.2208, "step": 1854 }, { "epoch": 0.6919060052219321, "grad_norm": 1.0880039930343628, "learning_rate": 2e-06, "loss": 0.1908, "step": 1855 }, { "epoch": 0.6922790003729952, "grad_norm": 0.9282162189483643, "learning_rate": 2e-06, "loss": 0.2339, "step": 1856 }, { "epoch": 0.6926519955240582, "grad_norm": 0.8766353130340576, "learning_rate": 2e-06, "loss": 0.1856, "step": 1857 }, { "epoch": 0.6930249906751212, "grad_norm": 0.8652535676956177, "learning_rate": 2e-06, "loss": 0.2421, "step": 1858 }, { "epoch": 0.6933979858261843, "grad_norm": 0.7287376523017883, "learning_rate": 2e-06, "loss": 0.2219, "step": 1859 }, { "epoch": 0.6937709809772473, "grad_norm": 1.0175480842590332, "learning_rate": 2e-06, "loss": 0.194, "step": 1860 }, { "epoch": 0.6941439761283104, "grad_norm": 0.9386530518531799, "learning_rate": 2e-06, "loss": 0.2003, "step": 1861 }, { "epoch": 0.6945169712793734, "grad_norm": 0.9723847508430481, "learning_rate": 2e-06, "loss": 0.2025, "step": 1862 }, { "epoch": 0.6948899664304364, "grad_norm": 0.8910704851150513, "learning_rate": 2e-06, "loss": 0.1783, "step": 1863 }, { "epoch": 0.6952629615814995, "grad_norm": 0.9360957741737366, "learning_rate": 2e-06, "loss": 0.1731, "step": 1864 }, { "epoch": 0.6956359567325625, "grad_norm": 1.2292749881744385, "learning_rate": 2e-06, "loss": 0.2296, "step": 1865 }, { "epoch": 0.6960089518836255, "grad_norm": 1.1944184303283691, "learning_rate": 2e-06, "loss": 0.2294, "step": 1866 }, { "epoch": 0.6963819470346886, "grad_norm": 0.8043590784072876, "learning_rate": 2e-06, "loss": 0.219, "step": 1867 }, { "epoch": 0.6967549421857516, "grad_norm": 0.8860931992530823, "learning_rate": 2e-06, "loss": 0.2188, "step": 1868 }, { "epoch": 0.6971279373368147, "grad_norm": 0.9879488945007324, "learning_rate": 2e-06, "loss": 0.1939, "step": 1869 }, { "epoch": 0.6975009324878777, "grad_norm": 1.0794733762741089, "learning_rate": 2e-06, "loss": 0.2149, "step": 1870 }, { "epoch": 0.6978739276389407, "grad_norm": 1.0121008157730103, "learning_rate": 2e-06, "loss": 0.1941, "step": 1871 }, { "epoch": 0.6982469227900038, "grad_norm": 1.0184800624847412, "learning_rate": 2e-06, "loss": 0.1997, "step": 1872 }, { "epoch": 0.6986199179410668, "grad_norm": 0.9295178055763245, "learning_rate": 2e-06, "loss": 0.2099, "step": 1873 }, { "epoch": 0.6989929130921299, "grad_norm": 0.8631184101104736, "learning_rate": 2e-06, "loss": 0.2271, "step": 1874 }, { "epoch": 0.6993659082431929, "grad_norm": 0.9377182722091675, "learning_rate": 2e-06, "loss": 0.2023, "step": 1875 }, { "epoch": 0.6997389033942559, "grad_norm": 0.8404821157455444, "learning_rate": 2e-06, "loss": 0.2397, "step": 1876 }, { "epoch": 0.7001118985453189, "grad_norm": 0.9690471887588501, "learning_rate": 2e-06, "loss": 0.1974, "step": 1877 }, { "epoch": 0.7004848936963819, "grad_norm": 0.7613245248794556, "learning_rate": 2e-06, "loss": 0.213, "step": 1878 }, { "epoch": 0.7008578888474449, "grad_norm": 0.9159087538719177, "learning_rate": 2e-06, "loss": 0.2166, "step": 1879 }, { "epoch": 0.701230883998508, "grad_norm": 0.9254552125930786, "learning_rate": 2e-06, "loss": 0.2152, "step": 1880 }, { "epoch": 0.701603879149571, "grad_norm": 0.7897544503211975, "learning_rate": 2e-06, "loss": 0.1992, "step": 1881 }, { "epoch": 0.701976874300634, "grad_norm": 0.8474463224411011, "learning_rate": 2e-06, "loss": 0.2032, "step": 1882 }, { "epoch": 0.7023498694516971, "grad_norm": 1.1076806783676147, "learning_rate": 2e-06, "loss": 0.2039, "step": 1883 }, { "epoch": 0.7027228646027601, "grad_norm": 0.8353797793388367, "learning_rate": 2e-06, "loss": 0.1967, "step": 1884 }, { "epoch": 0.7030958597538232, "grad_norm": 0.9024094343185425, "learning_rate": 2e-06, "loss": 0.2197, "step": 1885 }, { "epoch": 0.7034688549048862, "grad_norm": 0.9957160949707031, "learning_rate": 2e-06, "loss": 0.1932, "step": 1886 }, { "epoch": 0.7038418500559492, "grad_norm": 0.862204372882843, "learning_rate": 2e-06, "loss": 0.1721, "step": 1887 }, { "epoch": 0.7042148452070123, "grad_norm": 0.8480924963951111, "learning_rate": 2e-06, "loss": 0.2036, "step": 1888 }, { "epoch": 0.7045878403580753, "grad_norm": 1.1751099824905396, "learning_rate": 2e-06, "loss": 0.2005, "step": 1889 }, { "epoch": 0.7049608355091384, "grad_norm": 0.9520360827445984, "learning_rate": 2e-06, "loss": 0.2119, "step": 1890 }, { "epoch": 0.7053338306602014, "grad_norm": 0.8688490986824036, "learning_rate": 2e-06, "loss": 0.1974, "step": 1891 }, { "epoch": 0.7057068258112644, "grad_norm": 0.8690934181213379, "learning_rate": 2e-06, "loss": 0.2159, "step": 1892 }, { "epoch": 0.7060798209623275, "grad_norm": 0.7759982943534851, "learning_rate": 2e-06, "loss": 0.228, "step": 1893 }, { "epoch": 0.7064528161133905, "grad_norm": 0.8807022571563721, "learning_rate": 2e-06, "loss": 0.2074, "step": 1894 }, { "epoch": 0.7068258112644535, "grad_norm": 0.9922583699226379, "learning_rate": 2e-06, "loss": 0.1923, "step": 1895 }, { "epoch": 0.7071988064155166, "grad_norm": 0.9635892510414124, "learning_rate": 2e-06, "loss": 0.205, "step": 1896 }, { "epoch": 0.7075718015665796, "grad_norm": 0.8116633892059326, "learning_rate": 2e-06, "loss": 0.2037, "step": 1897 }, { "epoch": 0.7079447967176427, "grad_norm": 0.8385411500930786, "learning_rate": 2e-06, "loss": 0.2011, "step": 1898 }, { "epoch": 0.7083177918687057, "grad_norm": 1.219459891319275, "learning_rate": 2e-06, "loss": 0.1984, "step": 1899 }, { "epoch": 0.7086907870197687, "grad_norm": 0.7826680541038513, "learning_rate": 2e-06, "loss": 0.212, "step": 1900 }, { "epoch": 0.7090637821708318, "grad_norm": 1.1658294200897217, "learning_rate": 2e-06, "loss": 0.1646, "step": 1901 }, { "epoch": 0.7094367773218948, "grad_norm": 1.0682201385498047, "learning_rate": 2e-06, "loss": 0.2202, "step": 1902 }, { "epoch": 0.7098097724729578, "grad_norm": 0.7221997976303101, "learning_rate": 2e-06, "loss": 0.1859, "step": 1903 }, { "epoch": 0.7101827676240209, "grad_norm": 1.1048306226730347, "learning_rate": 2e-06, "loss": 0.2009, "step": 1904 }, { "epoch": 0.7105557627750839, "grad_norm": 0.9591043591499329, "learning_rate": 2e-06, "loss": 0.2232, "step": 1905 }, { "epoch": 0.710928757926147, "grad_norm": 0.9131747484207153, "learning_rate": 2e-06, "loss": 0.2217, "step": 1906 }, { "epoch": 0.71130175307721, "grad_norm": 0.9719731211662292, "learning_rate": 2e-06, "loss": 0.2119, "step": 1907 }, { "epoch": 0.711674748228273, "grad_norm": 0.8204597234725952, "learning_rate": 2e-06, "loss": 0.1951, "step": 1908 }, { "epoch": 0.7120477433793361, "grad_norm": 0.9899845123291016, "learning_rate": 2e-06, "loss": 0.2111, "step": 1909 }, { "epoch": 0.7124207385303991, "grad_norm": 1.1292401552200317, "learning_rate": 2e-06, "loss": 0.1846, "step": 1910 }, { "epoch": 0.7127937336814621, "grad_norm": 1.0082097053527832, "learning_rate": 2e-06, "loss": 0.1945, "step": 1911 }, { "epoch": 0.7131667288325252, "grad_norm": 1.1485270261764526, "learning_rate": 2e-06, "loss": 0.1878, "step": 1912 }, { "epoch": 0.7135397239835882, "grad_norm": 1.0757004022598267, "learning_rate": 2e-06, "loss": 0.1927, "step": 1913 }, { "epoch": 0.7139127191346513, "grad_norm": 0.751781702041626, "learning_rate": 2e-06, "loss": 0.2214, "step": 1914 }, { "epoch": 0.7142857142857143, "grad_norm": 0.7339298725128174, "learning_rate": 2e-06, "loss": 0.2091, "step": 1915 }, { "epoch": 0.7146587094367773, "grad_norm": 0.9108266234397888, "learning_rate": 2e-06, "loss": 0.1778, "step": 1916 }, { "epoch": 0.7150317045878404, "grad_norm": 0.7854178547859192, "learning_rate": 2e-06, "loss": 0.1945, "step": 1917 }, { "epoch": 0.7154046997389034, "grad_norm": 0.8387170433998108, "learning_rate": 2e-06, "loss": 0.2185, "step": 1918 }, { "epoch": 0.7157776948899665, "grad_norm": 0.7763946056365967, "learning_rate": 2e-06, "loss": 0.2256, "step": 1919 }, { "epoch": 0.7161506900410295, "grad_norm": 0.8907976746559143, "learning_rate": 2e-06, "loss": 0.2169, "step": 1920 }, { "epoch": 0.7165236851920925, "grad_norm": 0.9769330024719238, "learning_rate": 2e-06, "loss": 0.2012, "step": 1921 }, { "epoch": 0.7168966803431556, "grad_norm": 0.7695717811584473, "learning_rate": 2e-06, "loss": 0.2023, "step": 1922 }, { "epoch": 0.7172696754942186, "grad_norm": 0.7578920722007751, "learning_rate": 2e-06, "loss": 0.1971, "step": 1923 }, { "epoch": 0.7176426706452816, "grad_norm": 1.0582191944122314, "learning_rate": 2e-06, "loss": 0.1882, "step": 1924 }, { "epoch": 0.7180156657963447, "grad_norm": 0.6958286762237549, "learning_rate": 2e-06, "loss": 0.2082, "step": 1925 }, { "epoch": 0.7183886609474077, "grad_norm": 0.9272208213806152, "learning_rate": 2e-06, "loss": 0.1964, "step": 1926 }, { "epoch": 0.7187616560984708, "grad_norm": 1.01496160030365, "learning_rate": 2e-06, "loss": 0.1838, "step": 1927 }, { "epoch": 0.7191346512495338, "grad_norm": 0.9239795207977295, "learning_rate": 2e-06, "loss": 0.2071, "step": 1928 }, { "epoch": 0.7195076464005968, "grad_norm": 0.9062318205833435, "learning_rate": 2e-06, "loss": 0.2243, "step": 1929 }, { "epoch": 0.7198806415516599, "grad_norm": 0.8528268337249756, "learning_rate": 2e-06, "loss": 0.2108, "step": 1930 }, { "epoch": 0.7202536367027229, "grad_norm": 0.8037415146827698, "learning_rate": 2e-06, "loss": 0.202, "step": 1931 }, { "epoch": 0.720626631853786, "grad_norm": 0.7332692742347717, "learning_rate": 2e-06, "loss": 0.1836, "step": 1932 }, { "epoch": 0.720999627004849, "grad_norm": 1.1220067739486694, "learning_rate": 2e-06, "loss": 0.2057, "step": 1933 }, { "epoch": 0.721372622155912, "grad_norm": 0.9211771488189697, "learning_rate": 2e-06, "loss": 0.2007, "step": 1934 }, { "epoch": 0.7217456173069751, "grad_norm": 0.7977085709571838, "learning_rate": 2e-06, "loss": 0.1913, "step": 1935 }, { "epoch": 0.7221186124580381, "grad_norm": 0.942264974117279, "learning_rate": 2e-06, "loss": 0.2369, "step": 1936 }, { "epoch": 0.722491607609101, "grad_norm": 0.9515184760093689, "learning_rate": 2e-06, "loss": 0.1994, "step": 1937 }, { "epoch": 0.7228646027601641, "grad_norm": 0.833041787147522, "learning_rate": 2e-06, "loss": 0.2101, "step": 1938 }, { "epoch": 0.7232375979112271, "grad_norm": 0.9375584125518799, "learning_rate": 2e-06, "loss": 0.2181, "step": 1939 }, { "epoch": 0.7236105930622901, "grad_norm": 0.9885155558586121, "learning_rate": 2e-06, "loss": 0.2011, "step": 1940 }, { "epoch": 0.7239835882133532, "grad_norm": 0.803938090801239, "learning_rate": 2e-06, "loss": 0.2023, "step": 1941 }, { "epoch": 0.7243565833644162, "grad_norm": 0.7585703134536743, "learning_rate": 2e-06, "loss": 0.2015, "step": 1942 }, { "epoch": 0.7247295785154793, "grad_norm": 0.8579400777816772, "learning_rate": 2e-06, "loss": 0.2062, "step": 1943 }, { "epoch": 0.7251025736665423, "grad_norm": 1.037219762802124, "learning_rate": 2e-06, "loss": 0.1889, "step": 1944 }, { "epoch": 0.7254755688176053, "grad_norm": 0.9795365333557129, "learning_rate": 2e-06, "loss": 0.187, "step": 1945 }, { "epoch": 0.7258485639686684, "grad_norm": 0.8377014398574829, "learning_rate": 2e-06, "loss": 0.206, "step": 1946 }, { "epoch": 0.7262215591197314, "grad_norm": 0.841389536857605, "learning_rate": 2e-06, "loss": 0.2099, "step": 1947 }, { "epoch": 0.7265945542707944, "grad_norm": 0.8638618588447571, "learning_rate": 2e-06, "loss": 0.2005, "step": 1948 }, { "epoch": 0.7269675494218575, "grad_norm": 0.6890285015106201, "learning_rate": 2e-06, "loss": 0.1937, "step": 1949 }, { "epoch": 0.7273405445729205, "grad_norm": 0.7577584385871887, "learning_rate": 2e-06, "loss": 0.1915, "step": 1950 }, { "epoch": 0.7277135397239836, "grad_norm": 1.1333080530166626, "learning_rate": 2e-06, "loss": 0.2221, "step": 1951 }, { "epoch": 0.7280865348750466, "grad_norm": 0.949486494064331, "learning_rate": 2e-06, "loss": 0.2119, "step": 1952 }, { "epoch": 0.7284595300261096, "grad_norm": 1.0919969081878662, "learning_rate": 2e-06, "loss": 0.2215, "step": 1953 }, { "epoch": 0.7288325251771727, "grad_norm": 0.7585273385047913, "learning_rate": 2e-06, "loss": 0.2131, "step": 1954 }, { "epoch": 0.7292055203282357, "grad_norm": 0.8571286201477051, "learning_rate": 2e-06, "loss": 0.2292, "step": 1955 }, { "epoch": 0.7295785154792988, "grad_norm": 0.8554438352584839, "learning_rate": 2e-06, "loss": 0.2173, "step": 1956 }, { "epoch": 0.7299515106303618, "grad_norm": 1.1886935234069824, "learning_rate": 2e-06, "loss": 0.2173, "step": 1957 }, { "epoch": 0.7303245057814248, "grad_norm": 1.095496416091919, "learning_rate": 2e-06, "loss": 0.1863, "step": 1958 }, { "epoch": 0.7306975009324879, "grad_norm": 0.7964881062507629, "learning_rate": 2e-06, "loss": 0.2202, "step": 1959 }, { "epoch": 0.7310704960835509, "grad_norm": 0.8633349537849426, "learning_rate": 2e-06, "loss": 0.2012, "step": 1960 }, { "epoch": 0.7314434912346139, "grad_norm": 0.8816505074501038, "learning_rate": 2e-06, "loss": 0.1965, "step": 1961 }, { "epoch": 0.731816486385677, "grad_norm": 0.9222331047058105, "learning_rate": 2e-06, "loss": 0.2124, "step": 1962 }, { "epoch": 0.73218948153674, "grad_norm": 0.8169852495193481, "learning_rate": 2e-06, "loss": 0.2153, "step": 1963 }, { "epoch": 0.732562476687803, "grad_norm": 0.9603633880615234, "learning_rate": 2e-06, "loss": 0.1845, "step": 1964 }, { "epoch": 0.7329354718388661, "grad_norm": 1.0770964622497559, "learning_rate": 2e-06, "loss": 0.1713, "step": 1965 }, { "epoch": 0.7333084669899291, "grad_norm": 0.9560950398445129, "learning_rate": 2e-06, "loss": 0.1937, "step": 1966 }, { "epoch": 0.7336814621409922, "grad_norm": 0.9708755016326904, "learning_rate": 2e-06, "loss": 0.1911, "step": 1967 }, { "epoch": 0.7340544572920552, "grad_norm": 0.8545412421226501, "learning_rate": 2e-06, "loss": 0.203, "step": 1968 }, { "epoch": 0.7344274524431182, "grad_norm": 0.7743270993232727, "learning_rate": 2e-06, "loss": 0.2086, "step": 1969 }, { "epoch": 0.7348004475941813, "grad_norm": 0.8020002841949463, "learning_rate": 2e-06, "loss": 0.2202, "step": 1970 }, { "epoch": 0.7351734427452443, "grad_norm": 0.7130528688430786, "learning_rate": 2e-06, "loss": 0.2041, "step": 1971 }, { "epoch": 0.7355464378963074, "grad_norm": 1.010872483253479, "learning_rate": 2e-06, "loss": 0.1967, "step": 1972 }, { "epoch": 0.7359194330473704, "grad_norm": 0.795883297920227, "learning_rate": 2e-06, "loss": 0.219, "step": 1973 }, { "epoch": 0.7362924281984334, "grad_norm": 0.8687382340431213, "learning_rate": 2e-06, "loss": 0.1892, "step": 1974 }, { "epoch": 0.7366654233494965, "grad_norm": 0.8555102348327637, "learning_rate": 2e-06, "loss": 0.1931, "step": 1975 }, { "epoch": 0.7370384185005595, "grad_norm": 0.8014510273933411, "learning_rate": 2e-06, "loss": 0.2199, "step": 1976 }, { "epoch": 0.7374114136516225, "grad_norm": 0.7611752152442932, "learning_rate": 2e-06, "loss": 0.1987, "step": 1977 }, { "epoch": 0.7377844088026856, "grad_norm": 1.0370134115219116, "learning_rate": 2e-06, "loss": 0.189, "step": 1978 }, { "epoch": 0.7381574039537486, "grad_norm": 1.1499807834625244, "learning_rate": 2e-06, "loss": 0.2027, "step": 1979 }, { "epoch": 0.7385303991048117, "grad_norm": 0.8957857489585876, "learning_rate": 2e-06, "loss": 0.2247, "step": 1980 }, { "epoch": 0.7389033942558747, "grad_norm": 0.8941522836685181, "learning_rate": 2e-06, "loss": 0.1959, "step": 1981 }, { "epoch": 0.7392763894069377, "grad_norm": 0.9261765480041504, "learning_rate": 2e-06, "loss": 0.2188, "step": 1982 }, { "epoch": 0.7396493845580008, "grad_norm": 0.9979825019836426, "learning_rate": 2e-06, "loss": 0.2186, "step": 1983 }, { "epoch": 0.7400223797090638, "grad_norm": 0.8954075574874878, "learning_rate": 2e-06, "loss": 0.2181, "step": 1984 }, { "epoch": 0.7403953748601269, "grad_norm": 0.7195862531661987, "learning_rate": 2e-06, "loss": 0.2008, "step": 1985 }, { "epoch": 0.7407683700111899, "grad_norm": 0.8640710115432739, "learning_rate": 2e-06, "loss": 0.2212, "step": 1986 }, { "epoch": 0.7411413651622529, "grad_norm": 0.9441689848899841, "learning_rate": 2e-06, "loss": 0.2152, "step": 1987 }, { "epoch": 0.741514360313316, "grad_norm": 1.2657701969146729, "learning_rate": 2e-06, "loss": 0.1783, "step": 1988 }, { "epoch": 0.741887355464379, "grad_norm": 0.8713098168373108, "learning_rate": 2e-06, "loss": 0.2045, "step": 1989 }, { "epoch": 0.742260350615442, "grad_norm": 1.0794754028320312, "learning_rate": 2e-06, "loss": 0.1964, "step": 1990 }, { "epoch": 0.7426333457665051, "grad_norm": 0.742228627204895, "learning_rate": 2e-06, "loss": 0.2118, "step": 1991 }, { "epoch": 0.7430063409175681, "grad_norm": 0.8171719312667847, "learning_rate": 2e-06, "loss": 0.2201, "step": 1992 }, { "epoch": 0.7433793360686312, "grad_norm": 0.868884801864624, "learning_rate": 2e-06, "loss": 0.1915, "step": 1993 }, { "epoch": 0.7437523312196942, "grad_norm": 0.9475461840629578, "learning_rate": 2e-06, "loss": 0.2063, "step": 1994 }, { "epoch": 0.7441253263707572, "grad_norm": 0.6850764751434326, "learning_rate": 2e-06, "loss": 0.2177, "step": 1995 }, { "epoch": 0.7444983215218202, "grad_norm": 0.755739688873291, "learning_rate": 2e-06, "loss": 0.2101, "step": 1996 }, { "epoch": 0.7448713166728832, "grad_norm": 0.7355456352233887, "learning_rate": 2e-06, "loss": 0.2146, "step": 1997 }, { "epoch": 0.7452443118239462, "grad_norm": 0.9550268054008484, "learning_rate": 2e-06, "loss": 0.2099, "step": 1998 }, { "epoch": 0.7456173069750093, "grad_norm": 0.9204156994819641, "learning_rate": 2e-06, "loss": 0.1995, "step": 1999 }, { "epoch": 0.7459903021260723, "grad_norm": 0.8637933135032654, "learning_rate": 2e-06, "loss": 0.2189, "step": 2000 }, { "epoch": 0.7463632972771354, "grad_norm": 0.8277137875556946, "learning_rate": 2e-06, "loss": 0.2211, "step": 2001 }, { "epoch": 0.7467362924281984, "grad_norm": 0.9404897689819336, "learning_rate": 2e-06, "loss": 0.2263, "step": 2002 }, { "epoch": 0.7471092875792614, "grad_norm": 0.7396906018257141, "learning_rate": 2e-06, "loss": 0.1982, "step": 2003 }, { "epoch": 0.7474822827303245, "grad_norm": 0.999075710773468, "learning_rate": 2e-06, "loss": 0.2148, "step": 2004 }, { "epoch": 0.7478552778813875, "grad_norm": 1.0756038427352905, "learning_rate": 2e-06, "loss": 0.2021, "step": 2005 }, { "epoch": 0.7482282730324505, "grad_norm": 0.7598267197608948, "learning_rate": 2e-06, "loss": 0.2121, "step": 2006 }, { "epoch": 0.7486012681835136, "grad_norm": 0.8578387498855591, "learning_rate": 2e-06, "loss": 0.2203, "step": 2007 }, { "epoch": 0.7489742633345766, "grad_norm": 1.0526646375656128, "learning_rate": 2e-06, "loss": 0.2061, "step": 2008 }, { "epoch": 0.7493472584856397, "grad_norm": 0.8890109658241272, "learning_rate": 2e-06, "loss": 0.2121, "step": 2009 }, { "epoch": 0.7497202536367027, "grad_norm": 0.8522915244102478, "learning_rate": 2e-06, "loss": 0.2125, "step": 2010 }, { "epoch": 0.7500932487877657, "grad_norm": 0.8477670550346375, "learning_rate": 2e-06, "loss": 0.1825, "step": 2011 }, { "epoch": 0.7504662439388288, "grad_norm": 0.7013988494873047, "learning_rate": 2e-06, "loss": 0.1855, "step": 2012 }, { "epoch": 0.7508392390898918, "grad_norm": 0.8021622896194458, "learning_rate": 2e-06, "loss": 0.2021, "step": 2013 }, { "epoch": 0.7512122342409548, "grad_norm": 0.8575087785720825, "learning_rate": 2e-06, "loss": 0.2072, "step": 2014 }, { "epoch": 0.7515852293920179, "grad_norm": 0.8452922701835632, "learning_rate": 2e-06, "loss": 0.1743, "step": 2015 }, { "epoch": 0.7519582245430809, "grad_norm": 0.6487075090408325, "learning_rate": 2e-06, "loss": 0.2267, "step": 2016 }, { "epoch": 0.752331219694144, "grad_norm": 0.886183500289917, "learning_rate": 2e-06, "loss": 0.2259, "step": 2017 }, { "epoch": 0.752704214845207, "grad_norm": 0.9033263325691223, "learning_rate": 2e-06, "loss": 0.2192, "step": 2018 }, { "epoch": 0.75307720999627, "grad_norm": 1.0035030841827393, "learning_rate": 2e-06, "loss": 0.1984, "step": 2019 }, { "epoch": 0.7534502051473331, "grad_norm": 1.031430959701538, "learning_rate": 2e-06, "loss": 0.2106, "step": 2020 }, { "epoch": 0.7538232002983961, "grad_norm": 0.9003169536590576, "learning_rate": 2e-06, "loss": 0.2172, "step": 2021 }, { "epoch": 0.7541961954494592, "grad_norm": 0.6994163393974304, "learning_rate": 2e-06, "loss": 0.2399, "step": 2022 }, { "epoch": 0.7545691906005222, "grad_norm": 0.8205198645591736, "learning_rate": 2e-06, "loss": 0.1982, "step": 2023 }, { "epoch": 0.7549421857515852, "grad_norm": 0.8193161487579346, "learning_rate": 2e-06, "loss": 0.2018, "step": 2024 }, { "epoch": 0.7553151809026483, "grad_norm": 1.0097953081130981, "learning_rate": 2e-06, "loss": 0.2206, "step": 2025 }, { "epoch": 0.7556881760537113, "grad_norm": 0.9363391399383545, "learning_rate": 2e-06, "loss": 0.1972, "step": 2026 }, { "epoch": 0.7560611712047743, "grad_norm": 0.7034000158309937, "learning_rate": 2e-06, "loss": 0.1963, "step": 2027 }, { "epoch": 0.7564341663558374, "grad_norm": 0.8629530668258667, "learning_rate": 2e-06, "loss": 0.2057, "step": 2028 }, { "epoch": 0.7568071615069004, "grad_norm": 1.3882451057434082, "learning_rate": 2e-06, "loss": 0.2045, "step": 2029 }, { "epoch": 0.7571801566579635, "grad_norm": 0.7744529843330383, "learning_rate": 2e-06, "loss": 0.2329, "step": 2030 }, { "epoch": 0.7575531518090265, "grad_norm": 0.7922937870025635, "learning_rate": 2e-06, "loss": 0.2117, "step": 2031 }, { "epoch": 0.7579261469600895, "grad_norm": 0.8217656016349792, "learning_rate": 2e-06, "loss": 0.2343, "step": 2032 }, { "epoch": 0.7582991421111526, "grad_norm": 1.0157663822174072, "learning_rate": 2e-06, "loss": 0.2257, "step": 2033 }, { "epoch": 0.7586721372622156, "grad_norm": 0.83697110414505, "learning_rate": 2e-06, "loss": 0.2179, "step": 2034 }, { "epoch": 0.7590451324132786, "grad_norm": 1.054932713508606, "learning_rate": 2e-06, "loss": 0.1935, "step": 2035 }, { "epoch": 0.7594181275643417, "grad_norm": 0.7514228224754333, "learning_rate": 2e-06, "loss": 0.2148, "step": 2036 }, { "epoch": 0.7597911227154047, "grad_norm": 0.8150668740272522, "learning_rate": 2e-06, "loss": 0.2041, "step": 2037 }, { "epoch": 0.7601641178664678, "grad_norm": 0.9326938390731812, "learning_rate": 2e-06, "loss": 0.2044, "step": 2038 }, { "epoch": 0.7605371130175308, "grad_norm": 0.939437747001648, "learning_rate": 2e-06, "loss": 0.1973, "step": 2039 }, { "epoch": 0.7609101081685938, "grad_norm": 0.7615601420402527, "learning_rate": 2e-06, "loss": 0.1964, "step": 2040 }, { "epoch": 0.7612831033196569, "grad_norm": 0.7470563650131226, "learning_rate": 2e-06, "loss": 0.2106, "step": 2041 }, { "epoch": 0.7616560984707199, "grad_norm": 0.8100437521934509, "learning_rate": 2e-06, "loss": 0.2307, "step": 2042 }, { "epoch": 0.762029093621783, "grad_norm": 0.777355432510376, "learning_rate": 2e-06, "loss": 0.2343, "step": 2043 }, { "epoch": 0.762402088772846, "grad_norm": 1.0295674800872803, "learning_rate": 2e-06, "loss": 0.2011, "step": 2044 }, { "epoch": 0.762775083923909, "grad_norm": 0.8090959787368774, "learning_rate": 2e-06, "loss": 0.211, "step": 2045 }, { "epoch": 0.7631480790749721, "grad_norm": 0.8336998820304871, "learning_rate": 2e-06, "loss": 0.1717, "step": 2046 }, { "epoch": 0.7635210742260351, "grad_norm": 0.9440287947654724, "learning_rate": 2e-06, "loss": 0.2177, "step": 2047 }, { "epoch": 0.7638940693770981, "grad_norm": 0.8812392950057983, "learning_rate": 2e-06, "loss": 0.2177, "step": 2048 }, { "epoch": 0.7642670645281612, "grad_norm": 0.8010275959968567, "learning_rate": 2e-06, "loss": 0.2099, "step": 2049 }, { "epoch": 0.7646400596792242, "grad_norm": 0.6277552247047424, "learning_rate": 2e-06, "loss": 0.2079, "step": 2050 }, { "epoch": 0.7650130548302873, "grad_norm": 0.6822249293327332, "learning_rate": 2e-06, "loss": 0.1793, "step": 2051 }, { "epoch": 0.7653860499813503, "grad_norm": 0.8985717296600342, "learning_rate": 2e-06, "loss": 0.2039, "step": 2052 }, { "epoch": 0.7657590451324133, "grad_norm": 0.7571220397949219, "learning_rate": 2e-06, "loss": 0.2086, "step": 2053 }, { "epoch": 0.7661320402834764, "grad_norm": 0.8418977856636047, "learning_rate": 2e-06, "loss": 0.2103, "step": 2054 }, { "epoch": 0.7665050354345394, "grad_norm": 0.8672335147857666, "learning_rate": 2e-06, "loss": 0.2309, "step": 2055 }, { "epoch": 0.7668780305856023, "grad_norm": 0.9560227394104004, "learning_rate": 2e-06, "loss": 0.2082, "step": 2056 }, { "epoch": 0.7672510257366654, "grad_norm": 0.93088299036026, "learning_rate": 2e-06, "loss": 0.2019, "step": 2057 }, { "epoch": 0.7676240208877284, "grad_norm": 0.835898220539093, "learning_rate": 2e-06, "loss": 0.2129, "step": 2058 }, { "epoch": 0.7679970160387914, "grad_norm": 0.8077571988105774, "learning_rate": 2e-06, "loss": 0.2145, "step": 2059 }, { "epoch": 0.7683700111898545, "grad_norm": 1.0327978134155273, "learning_rate": 2e-06, "loss": 0.2035, "step": 2060 }, { "epoch": 0.7687430063409175, "grad_norm": 0.7985562086105347, "learning_rate": 2e-06, "loss": 0.221, "step": 2061 }, { "epoch": 0.7691160014919806, "grad_norm": 1.142077922821045, "learning_rate": 2e-06, "loss": 0.1961, "step": 2062 }, { "epoch": 0.7694889966430436, "grad_norm": 0.857454240322113, "learning_rate": 2e-06, "loss": 0.1837, "step": 2063 }, { "epoch": 0.7698619917941066, "grad_norm": 0.9535216093063354, "learning_rate": 2e-06, "loss": 0.2193, "step": 2064 }, { "epoch": 0.7702349869451697, "grad_norm": 1.098910927772522, "learning_rate": 2e-06, "loss": 0.2055, "step": 2065 }, { "epoch": 0.7706079820962327, "grad_norm": 0.9089769721031189, "learning_rate": 2e-06, "loss": 0.2092, "step": 2066 }, { "epoch": 0.7709809772472958, "grad_norm": 1.012262225151062, "learning_rate": 2e-06, "loss": 0.2188, "step": 2067 }, { "epoch": 0.7713539723983588, "grad_norm": 0.7337329983711243, "learning_rate": 2e-06, "loss": 0.1966, "step": 2068 }, { "epoch": 0.7717269675494218, "grad_norm": 1.055047631263733, "learning_rate": 2e-06, "loss": 0.2086, "step": 2069 }, { "epoch": 0.7720999627004849, "grad_norm": 0.7814789414405823, "learning_rate": 2e-06, "loss": 0.2329, "step": 2070 }, { "epoch": 0.7724729578515479, "grad_norm": 0.9319881200790405, "learning_rate": 2e-06, "loss": 0.2098, "step": 2071 }, { "epoch": 0.7728459530026109, "grad_norm": 0.9415208101272583, "learning_rate": 2e-06, "loss": 0.1985, "step": 2072 }, { "epoch": 0.773218948153674, "grad_norm": 0.7804548740386963, "learning_rate": 2e-06, "loss": 0.2076, "step": 2073 }, { "epoch": 0.773591943304737, "grad_norm": 0.85384202003479, "learning_rate": 2e-06, "loss": 0.2175, "step": 2074 }, { "epoch": 0.7739649384558001, "grad_norm": 0.8579732179641724, "learning_rate": 2e-06, "loss": 0.2213, "step": 2075 }, { "epoch": 0.7743379336068631, "grad_norm": 1.0825189352035522, "learning_rate": 2e-06, "loss": 0.1957, "step": 2076 }, { "epoch": 0.7747109287579261, "grad_norm": 1.516652226448059, "learning_rate": 2e-06, "loss": 0.1937, "step": 2077 }, { "epoch": 0.7750839239089892, "grad_norm": 0.9855908751487732, "learning_rate": 2e-06, "loss": 0.2284, "step": 2078 }, { "epoch": 0.7754569190600522, "grad_norm": 0.8604542016983032, "learning_rate": 2e-06, "loss": 0.209, "step": 2079 }, { "epoch": 0.7758299142111152, "grad_norm": 0.6935897469520569, "learning_rate": 2e-06, "loss": 0.2163, "step": 2080 }, { "epoch": 0.7762029093621783, "grad_norm": 0.8170755505561829, "learning_rate": 2e-06, "loss": 0.2107, "step": 2081 }, { "epoch": 0.7765759045132413, "grad_norm": 1.052754521369934, "learning_rate": 2e-06, "loss": 0.1792, "step": 2082 }, { "epoch": 0.7769488996643044, "grad_norm": 0.9640365839004517, "learning_rate": 2e-06, "loss": 0.1981, "step": 2083 }, { "epoch": 0.7773218948153674, "grad_norm": 0.7818226218223572, "learning_rate": 2e-06, "loss": 0.2246, "step": 2084 }, { "epoch": 0.7776948899664304, "grad_norm": 0.968097984790802, "learning_rate": 2e-06, "loss": 0.2165, "step": 2085 }, { "epoch": 0.7780678851174935, "grad_norm": 0.8936861753463745, "learning_rate": 2e-06, "loss": 0.2222, "step": 2086 }, { "epoch": 0.7784408802685565, "grad_norm": 0.9251615405082703, "learning_rate": 2e-06, "loss": 0.201, "step": 2087 }, { "epoch": 0.7788138754196196, "grad_norm": 0.7978858351707458, "learning_rate": 2e-06, "loss": 0.2197, "step": 2088 }, { "epoch": 0.7791868705706826, "grad_norm": 1.0219053030014038, "learning_rate": 2e-06, "loss": 0.186, "step": 2089 }, { "epoch": 0.7795598657217456, "grad_norm": 1.2609341144561768, "learning_rate": 2e-06, "loss": 0.202, "step": 2090 }, { "epoch": 0.7799328608728087, "grad_norm": 0.8034345507621765, "learning_rate": 2e-06, "loss": 0.2061, "step": 2091 }, { "epoch": 0.7803058560238717, "grad_norm": 0.9567638635635376, "learning_rate": 2e-06, "loss": 0.2201, "step": 2092 }, { "epoch": 0.7806788511749347, "grad_norm": 1.0282928943634033, "learning_rate": 2e-06, "loss": 0.2149, "step": 2093 }, { "epoch": 0.7810518463259978, "grad_norm": 0.8969002962112427, "learning_rate": 2e-06, "loss": 0.2059, "step": 2094 }, { "epoch": 0.7814248414770608, "grad_norm": 0.8826708197593689, "learning_rate": 2e-06, "loss": 0.214, "step": 2095 }, { "epoch": 0.7817978366281239, "grad_norm": 0.7789673209190369, "learning_rate": 2e-06, "loss": 0.2015, "step": 2096 }, { "epoch": 0.7821708317791869, "grad_norm": 1.006363034248352, "learning_rate": 2e-06, "loss": 0.197, "step": 2097 }, { "epoch": 0.7825438269302499, "grad_norm": 1.0947983264923096, "learning_rate": 2e-06, "loss": 0.2007, "step": 2098 }, { "epoch": 0.782916822081313, "grad_norm": 1.044594168663025, "learning_rate": 2e-06, "loss": 0.1841, "step": 2099 }, { "epoch": 0.783289817232376, "grad_norm": 1.0360909700393677, "learning_rate": 2e-06, "loss": 0.2169, "step": 2100 }, { "epoch": 0.783662812383439, "grad_norm": 1.0792407989501953, "learning_rate": 2e-06, "loss": 0.2045, "step": 2101 }, { "epoch": 0.7840358075345021, "grad_norm": 1.0510461330413818, "learning_rate": 2e-06, "loss": 0.2029, "step": 2102 }, { "epoch": 0.7844088026855651, "grad_norm": 0.9930972456932068, "learning_rate": 2e-06, "loss": 0.1871, "step": 2103 }, { "epoch": 0.7847817978366282, "grad_norm": 0.7638452649116516, "learning_rate": 2e-06, "loss": 0.213, "step": 2104 }, { "epoch": 0.7851547929876912, "grad_norm": 0.9772177934646606, "learning_rate": 2e-06, "loss": 0.2028, "step": 2105 }, { "epoch": 0.7855277881387542, "grad_norm": 0.852179765701294, "learning_rate": 2e-06, "loss": 0.2158, "step": 2106 }, { "epoch": 0.7859007832898173, "grad_norm": 0.8613146543502808, "learning_rate": 2e-06, "loss": 0.2112, "step": 2107 }, { "epoch": 0.7862737784408803, "grad_norm": 1.1197576522827148, "learning_rate": 2e-06, "loss": 0.196, "step": 2108 }, { "epoch": 0.7866467735919433, "grad_norm": 1.2573108673095703, "learning_rate": 2e-06, "loss": 0.1864, "step": 2109 }, { "epoch": 0.7870197687430064, "grad_norm": 1.1320912837982178, "learning_rate": 2e-06, "loss": 0.1928, "step": 2110 }, { "epoch": 0.7873927638940694, "grad_norm": 0.8806666135787964, "learning_rate": 2e-06, "loss": 0.2149, "step": 2111 }, { "epoch": 0.7877657590451325, "grad_norm": 0.8493534922599792, "learning_rate": 2e-06, "loss": 0.2147, "step": 2112 }, { "epoch": 0.7881387541961955, "grad_norm": 0.8707627654075623, "learning_rate": 2e-06, "loss": 0.2252, "step": 2113 }, { "epoch": 0.7885117493472585, "grad_norm": 1.008444905281067, "learning_rate": 2e-06, "loss": 0.2071, "step": 2114 }, { "epoch": 0.7888847444983216, "grad_norm": 0.8814007639884949, "learning_rate": 2e-06, "loss": 0.1987, "step": 2115 }, { "epoch": 0.7892577396493845, "grad_norm": 0.9039292931556702, "learning_rate": 2e-06, "loss": 0.2029, "step": 2116 }, { "epoch": 0.7896307348004475, "grad_norm": 1.1139143705368042, "learning_rate": 2e-06, "loss": 0.1902, "step": 2117 }, { "epoch": 0.7900037299515106, "grad_norm": 0.9806655645370483, "learning_rate": 2e-06, "loss": 0.202, "step": 2118 }, { "epoch": 0.7903767251025736, "grad_norm": 0.9790909290313721, "learning_rate": 2e-06, "loss": 0.2163, "step": 2119 }, { "epoch": 0.7907497202536367, "grad_norm": 1.0338118076324463, "learning_rate": 2e-06, "loss": 0.2137, "step": 2120 }, { "epoch": 0.7911227154046997, "grad_norm": 1.0623197555541992, "learning_rate": 2e-06, "loss": 0.2096, "step": 2121 }, { "epoch": 0.7914957105557627, "grad_norm": 0.9294946193695068, "learning_rate": 2e-06, "loss": 0.1988, "step": 2122 }, { "epoch": 0.7918687057068258, "grad_norm": 0.9647356271743774, "learning_rate": 2e-06, "loss": 0.2, "step": 2123 }, { "epoch": 0.7922417008578888, "grad_norm": 0.8801307082176208, "learning_rate": 2e-06, "loss": 0.2106, "step": 2124 }, { "epoch": 0.7926146960089518, "grad_norm": 0.8929944038391113, "learning_rate": 2e-06, "loss": 0.2244, "step": 2125 }, { "epoch": 0.7929876911600149, "grad_norm": 0.9660553336143494, "learning_rate": 2e-06, "loss": 0.1919, "step": 2126 }, { "epoch": 0.7933606863110779, "grad_norm": 0.9022288918495178, "learning_rate": 2e-06, "loss": 0.2042, "step": 2127 }, { "epoch": 0.793733681462141, "grad_norm": 0.8259539604187012, "learning_rate": 2e-06, "loss": 0.2141, "step": 2128 }, { "epoch": 0.794106676613204, "grad_norm": 0.7443177700042725, "learning_rate": 2e-06, "loss": 0.2098, "step": 2129 }, { "epoch": 0.794479671764267, "grad_norm": 0.9221109747886658, "learning_rate": 2e-06, "loss": 0.2032, "step": 2130 }, { "epoch": 0.7948526669153301, "grad_norm": 0.8877180218696594, "learning_rate": 2e-06, "loss": 0.1801, "step": 2131 }, { "epoch": 0.7952256620663931, "grad_norm": 1.0397790670394897, "learning_rate": 2e-06, "loss": 0.1945, "step": 2132 }, { "epoch": 0.7955986572174562, "grad_norm": 0.8289620876312256, "learning_rate": 2e-06, "loss": 0.221, "step": 2133 }, { "epoch": 0.7959716523685192, "grad_norm": 0.8644962310791016, "learning_rate": 2e-06, "loss": 0.1974, "step": 2134 }, { "epoch": 0.7963446475195822, "grad_norm": 1.0320852994918823, "learning_rate": 2e-06, "loss": 0.2022, "step": 2135 }, { "epoch": 0.7967176426706453, "grad_norm": 0.9414823651313782, "learning_rate": 2e-06, "loss": 0.2284, "step": 2136 }, { "epoch": 0.7970906378217083, "grad_norm": 1.1233230829238892, "learning_rate": 2e-06, "loss": 0.2048, "step": 2137 }, { "epoch": 0.7974636329727713, "grad_norm": 1.0390242338180542, "learning_rate": 2e-06, "loss": 0.189, "step": 2138 }, { "epoch": 0.7978366281238344, "grad_norm": 0.6905797123908997, "learning_rate": 2e-06, "loss": 0.1744, "step": 2139 }, { "epoch": 0.7982096232748974, "grad_norm": 1.0349066257476807, "learning_rate": 2e-06, "loss": 0.2383, "step": 2140 }, { "epoch": 0.7985826184259605, "grad_norm": 1.076573371887207, "learning_rate": 2e-06, "loss": 0.2066, "step": 2141 }, { "epoch": 0.7989556135770235, "grad_norm": 1.3008960485458374, "learning_rate": 2e-06, "loss": 0.2027, "step": 2142 }, { "epoch": 0.7993286087280865, "grad_norm": 0.9854296445846558, "learning_rate": 2e-06, "loss": 0.2122, "step": 2143 }, { "epoch": 0.7997016038791496, "grad_norm": 0.845083475112915, "learning_rate": 2e-06, "loss": 0.24, "step": 2144 }, { "epoch": 0.8000745990302126, "grad_norm": 0.8874463438987732, "learning_rate": 2e-06, "loss": 0.1954, "step": 2145 }, { "epoch": 0.8004475941812756, "grad_norm": 0.9687674641609192, "learning_rate": 2e-06, "loss": 0.2045, "step": 2146 }, { "epoch": 0.8008205893323387, "grad_norm": 0.8313170671463013, "learning_rate": 2e-06, "loss": 0.2243, "step": 2147 }, { "epoch": 0.8011935844834017, "grad_norm": 0.8815109729766846, "learning_rate": 2e-06, "loss": 0.2149, "step": 2148 }, { "epoch": 0.8015665796344648, "grad_norm": 0.7660952210426331, "learning_rate": 2e-06, "loss": 0.2064, "step": 2149 }, { "epoch": 0.8019395747855278, "grad_norm": 0.7890306711196899, "learning_rate": 2e-06, "loss": 0.2111, "step": 2150 }, { "epoch": 0.8023125699365908, "grad_norm": 0.9392753839492798, "learning_rate": 2e-06, "loss": 0.2088, "step": 2151 }, { "epoch": 0.8026855650876539, "grad_norm": 0.7566826939582825, "learning_rate": 2e-06, "loss": 0.2031, "step": 2152 }, { "epoch": 0.8030585602387169, "grad_norm": 0.8500667810440063, "learning_rate": 2e-06, "loss": 0.2224, "step": 2153 }, { "epoch": 0.80343155538978, "grad_norm": 0.8020805716514587, "learning_rate": 2e-06, "loss": 0.2189, "step": 2154 }, { "epoch": 0.803804550540843, "grad_norm": 0.9415767788887024, "learning_rate": 2e-06, "loss": 0.2072, "step": 2155 }, { "epoch": 0.804177545691906, "grad_norm": 0.9060941934585571, "learning_rate": 2e-06, "loss": 0.2222, "step": 2156 }, { "epoch": 0.8045505408429691, "grad_norm": 0.8865082263946533, "learning_rate": 2e-06, "loss": 0.2001, "step": 2157 }, { "epoch": 0.8049235359940321, "grad_norm": 0.8906190395355225, "learning_rate": 2e-06, "loss": 0.1858, "step": 2158 }, { "epoch": 0.8052965311450951, "grad_norm": 0.9747007489204407, "learning_rate": 2e-06, "loss": 0.1742, "step": 2159 }, { "epoch": 0.8056695262961582, "grad_norm": 0.8534250855445862, "learning_rate": 2e-06, "loss": 0.2206, "step": 2160 }, { "epoch": 0.8060425214472212, "grad_norm": 0.9292688965797424, "learning_rate": 2e-06, "loss": 0.2232, "step": 2161 }, { "epoch": 0.8064155165982843, "grad_norm": 0.8793826103210449, "learning_rate": 2e-06, "loss": 0.1952, "step": 2162 }, { "epoch": 0.8067885117493473, "grad_norm": 0.9708840847015381, "learning_rate": 2e-06, "loss": 0.1944, "step": 2163 }, { "epoch": 0.8071615069004103, "grad_norm": 1.0369855165481567, "learning_rate": 2e-06, "loss": 0.2075, "step": 2164 }, { "epoch": 0.8075345020514734, "grad_norm": 0.7950000166893005, "learning_rate": 2e-06, "loss": 0.2149, "step": 2165 }, { "epoch": 0.8079074972025364, "grad_norm": 0.7654295563697815, "learning_rate": 2e-06, "loss": 0.2207, "step": 2166 }, { "epoch": 0.8082804923535994, "grad_norm": 0.8213416337966919, "learning_rate": 2e-06, "loss": 0.2139, "step": 2167 }, { "epoch": 0.8086534875046625, "grad_norm": 0.6942474842071533, "learning_rate": 2e-06, "loss": 0.2235, "step": 2168 }, { "epoch": 0.8090264826557255, "grad_norm": 1.0514196157455444, "learning_rate": 2e-06, "loss": 0.216, "step": 2169 }, { "epoch": 0.8093994778067886, "grad_norm": 0.8436524271965027, "learning_rate": 2e-06, "loss": 0.1999, "step": 2170 }, { "epoch": 0.8097724729578516, "grad_norm": 0.9785959124565125, "learning_rate": 2e-06, "loss": 0.206, "step": 2171 }, { "epoch": 0.8101454681089146, "grad_norm": 0.9375293850898743, "learning_rate": 2e-06, "loss": 0.2171, "step": 2172 }, { "epoch": 0.8105184632599777, "grad_norm": 0.9578524231910706, "learning_rate": 2e-06, "loss": 0.2065, "step": 2173 }, { "epoch": 0.8108914584110407, "grad_norm": 0.7647719979286194, "learning_rate": 2e-06, "loss": 0.1848, "step": 2174 }, { "epoch": 0.8112644535621036, "grad_norm": 0.8400478959083557, "learning_rate": 2e-06, "loss": 0.2099, "step": 2175 }, { "epoch": 0.8116374487131667, "grad_norm": 0.9914858937263489, "learning_rate": 2e-06, "loss": 0.2044, "step": 2176 }, { "epoch": 0.8120104438642297, "grad_norm": 0.9741401076316833, "learning_rate": 2e-06, "loss": 0.1909, "step": 2177 }, { "epoch": 0.8123834390152928, "grad_norm": 0.925728976726532, "learning_rate": 2e-06, "loss": 0.1997, "step": 2178 }, { "epoch": 0.8127564341663558, "grad_norm": 0.7836978435516357, "learning_rate": 2e-06, "loss": 0.2066, "step": 2179 }, { "epoch": 0.8131294293174188, "grad_norm": 0.909141480922699, "learning_rate": 2e-06, "loss": 0.185, "step": 2180 }, { "epoch": 0.8135024244684819, "grad_norm": 1.0574278831481934, "learning_rate": 2e-06, "loss": 0.2311, "step": 2181 }, { "epoch": 0.8138754196195449, "grad_norm": 1.4279241561889648, "learning_rate": 2e-06, "loss": 0.1937, "step": 2182 }, { "epoch": 0.8142484147706079, "grad_norm": 0.9415436387062073, "learning_rate": 2e-06, "loss": 0.2006, "step": 2183 }, { "epoch": 0.814621409921671, "grad_norm": 0.7973086833953857, "learning_rate": 2e-06, "loss": 0.2156, "step": 2184 }, { "epoch": 0.814994405072734, "grad_norm": 0.9254840612411499, "learning_rate": 2e-06, "loss": 0.2144, "step": 2185 }, { "epoch": 0.8153674002237971, "grad_norm": 0.7984524965286255, "learning_rate": 2e-06, "loss": 0.1956, "step": 2186 }, { "epoch": 0.8157403953748601, "grad_norm": 0.8919991850852966, "learning_rate": 2e-06, "loss": 0.2089, "step": 2187 }, { "epoch": 0.8161133905259231, "grad_norm": 0.8138364553451538, "learning_rate": 2e-06, "loss": 0.2021, "step": 2188 }, { "epoch": 0.8164863856769862, "grad_norm": 0.8017678260803223, "learning_rate": 2e-06, "loss": 0.1907, "step": 2189 }, { "epoch": 0.8168593808280492, "grad_norm": 0.931653618812561, "learning_rate": 2e-06, "loss": 0.2006, "step": 2190 }, { "epoch": 0.8172323759791122, "grad_norm": 0.9271136522293091, "learning_rate": 2e-06, "loss": 0.1961, "step": 2191 }, { "epoch": 0.8176053711301753, "grad_norm": 0.8329090476036072, "learning_rate": 2e-06, "loss": 0.2091, "step": 2192 }, { "epoch": 0.8179783662812383, "grad_norm": 0.8206929564476013, "learning_rate": 2e-06, "loss": 0.2057, "step": 2193 }, { "epoch": 0.8183513614323014, "grad_norm": 0.7524323463439941, "learning_rate": 2e-06, "loss": 0.2114, "step": 2194 }, { "epoch": 0.8187243565833644, "grad_norm": 1.0256776809692383, "learning_rate": 2e-06, "loss": 0.1831, "step": 2195 }, { "epoch": 0.8190973517344274, "grad_norm": 0.9393559694290161, "learning_rate": 2e-06, "loss": 0.2157, "step": 2196 }, { "epoch": 0.8194703468854905, "grad_norm": 0.9088670015335083, "learning_rate": 2e-06, "loss": 0.2248, "step": 2197 }, { "epoch": 0.8198433420365535, "grad_norm": 0.6865124106407166, "learning_rate": 2e-06, "loss": 0.1875, "step": 2198 }, { "epoch": 0.8202163371876166, "grad_norm": 0.8958433866500854, "learning_rate": 2e-06, "loss": 0.217, "step": 2199 }, { "epoch": 0.8205893323386796, "grad_norm": 1.1017295122146606, "learning_rate": 2e-06, "loss": 0.2134, "step": 2200 }, { "epoch": 0.8209623274897426, "grad_norm": 0.7093621492385864, "learning_rate": 2e-06, "loss": 0.2114, "step": 2201 }, { "epoch": 0.8213353226408057, "grad_norm": 0.8973530530929565, "learning_rate": 2e-06, "loss": 0.1969, "step": 2202 }, { "epoch": 0.8217083177918687, "grad_norm": 0.7356114387512207, "learning_rate": 2e-06, "loss": 0.1973, "step": 2203 }, { "epoch": 0.8220813129429317, "grad_norm": 0.8283209800720215, "learning_rate": 2e-06, "loss": 0.2242, "step": 2204 }, { "epoch": 0.8224543080939948, "grad_norm": 0.7633107900619507, "learning_rate": 2e-06, "loss": 0.1826, "step": 2205 }, { "epoch": 0.8228273032450578, "grad_norm": 0.7988774180412292, "learning_rate": 2e-06, "loss": 0.2061, "step": 2206 }, { "epoch": 0.8232002983961209, "grad_norm": 1.0189142227172852, "learning_rate": 2e-06, "loss": 0.2144, "step": 2207 }, { "epoch": 0.8235732935471839, "grad_norm": 1.0101637840270996, "learning_rate": 2e-06, "loss": 0.2158, "step": 2208 }, { "epoch": 0.8239462886982469, "grad_norm": 1.0981757640838623, "learning_rate": 2e-06, "loss": 0.1974, "step": 2209 }, { "epoch": 0.82431928384931, "grad_norm": 0.8457747101783752, "learning_rate": 2e-06, "loss": 0.2377, "step": 2210 }, { "epoch": 0.824692279000373, "grad_norm": 1.1890300512313843, "learning_rate": 2e-06, "loss": 0.1934, "step": 2211 }, { "epoch": 0.825065274151436, "grad_norm": 0.9111526012420654, "learning_rate": 2e-06, "loss": 0.2062, "step": 2212 }, { "epoch": 0.8254382693024991, "grad_norm": 1.1368087530136108, "learning_rate": 2e-06, "loss": 0.1946, "step": 2213 }, { "epoch": 0.8258112644535621, "grad_norm": 0.8384529948234558, "learning_rate": 2e-06, "loss": 0.2114, "step": 2214 }, { "epoch": 0.8261842596046252, "grad_norm": 0.8073965907096863, "learning_rate": 2e-06, "loss": 0.2109, "step": 2215 }, { "epoch": 0.8265572547556882, "grad_norm": 0.7483471035957336, "learning_rate": 2e-06, "loss": 0.2263, "step": 2216 }, { "epoch": 0.8269302499067512, "grad_norm": 0.8656496405601501, "learning_rate": 2e-06, "loss": 0.2344, "step": 2217 }, { "epoch": 0.8273032450578143, "grad_norm": 0.9432700872421265, "learning_rate": 2e-06, "loss": 0.2229, "step": 2218 }, { "epoch": 0.8276762402088773, "grad_norm": 0.8450913429260254, "learning_rate": 2e-06, "loss": 0.216, "step": 2219 }, { "epoch": 0.8280492353599403, "grad_norm": 0.9111404418945312, "learning_rate": 2e-06, "loss": 0.1937, "step": 2220 }, { "epoch": 0.8284222305110034, "grad_norm": 1.1142668724060059, "learning_rate": 2e-06, "loss": 0.2359, "step": 2221 }, { "epoch": 0.8287952256620664, "grad_norm": 0.9068588614463806, "learning_rate": 2e-06, "loss": 0.2001, "step": 2222 }, { "epoch": 0.8291682208131295, "grad_norm": 0.9338834881782532, "learning_rate": 2e-06, "loss": 0.1758, "step": 2223 }, { "epoch": 0.8295412159641925, "grad_norm": 0.7009779810905457, "learning_rate": 2e-06, "loss": 0.2135, "step": 2224 }, { "epoch": 0.8299142111152555, "grad_norm": 0.8854984641075134, "learning_rate": 2e-06, "loss": 0.2234, "step": 2225 }, { "epoch": 0.8302872062663186, "grad_norm": 1.138958215713501, "learning_rate": 2e-06, "loss": 0.2214, "step": 2226 }, { "epoch": 0.8306602014173816, "grad_norm": 0.8468315601348877, "learning_rate": 2e-06, "loss": 0.2013, "step": 2227 }, { "epoch": 0.8310331965684447, "grad_norm": 0.980452299118042, "learning_rate": 2e-06, "loss": 0.2183, "step": 2228 }, { "epoch": 0.8314061917195077, "grad_norm": 0.9963353872299194, "learning_rate": 2e-06, "loss": 0.1911, "step": 2229 }, { "epoch": 0.8317791868705707, "grad_norm": 1.0621715784072876, "learning_rate": 2e-06, "loss": 0.1958, "step": 2230 }, { "epoch": 0.8321521820216338, "grad_norm": 0.7828145027160645, "learning_rate": 2e-06, "loss": 0.1995, "step": 2231 }, { "epoch": 0.8325251771726968, "grad_norm": 1.0862290859222412, "learning_rate": 2e-06, "loss": 0.2105, "step": 2232 }, { "epoch": 0.8328981723237598, "grad_norm": 0.8620064854621887, "learning_rate": 2e-06, "loss": 0.191, "step": 2233 }, { "epoch": 0.8332711674748229, "grad_norm": 0.7799484729766846, "learning_rate": 2e-06, "loss": 0.209, "step": 2234 }, { "epoch": 0.8336441626258858, "grad_norm": 0.8902480006217957, "learning_rate": 2e-06, "loss": 0.2047, "step": 2235 }, { "epoch": 0.8340171577769488, "grad_norm": 0.7876725196838379, "learning_rate": 2e-06, "loss": 0.1929, "step": 2236 }, { "epoch": 0.8343901529280119, "grad_norm": 0.7685226202011108, "learning_rate": 2e-06, "loss": 0.2224, "step": 2237 }, { "epoch": 0.8347631480790749, "grad_norm": 0.7760909795761108, "learning_rate": 2e-06, "loss": 0.2112, "step": 2238 }, { "epoch": 0.835136143230138, "grad_norm": 0.7991086840629578, "learning_rate": 2e-06, "loss": 0.2424, "step": 2239 }, { "epoch": 0.835509138381201, "grad_norm": 0.7730279564857483, "learning_rate": 2e-06, "loss": 0.2129, "step": 2240 }, { "epoch": 0.835882133532264, "grad_norm": 0.8325549364089966, "learning_rate": 2e-06, "loss": 0.1999, "step": 2241 }, { "epoch": 0.8362551286833271, "grad_norm": 1.0043010711669922, "learning_rate": 2e-06, "loss": 0.2008, "step": 2242 }, { "epoch": 0.8366281238343901, "grad_norm": 0.8331825733184814, "learning_rate": 2e-06, "loss": 0.222, "step": 2243 }, { "epoch": 0.8370011189854532, "grad_norm": 0.9031542539596558, "learning_rate": 2e-06, "loss": 0.2164, "step": 2244 }, { "epoch": 0.8373741141365162, "grad_norm": 1.0797556638717651, "learning_rate": 2e-06, "loss": 0.1861, "step": 2245 }, { "epoch": 0.8377471092875792, "grad_norm": 0.8844748139381409, "learning_rate": 2e-06, "loss": 0.1998, "step": 2246 }, { "epoch": 0.8381201044386423, "grad_norm": 0.6486740112304688, "learning_rate": 2e-06, "loss": 0.2184, "step": 2247 }, { "epoch": 0.8384930995897053, "grad_norm": 0.8790176510810852, "learning_rate": 2e-06, "loss": 0.1822, "step": 2248 }, { "epoch": 0.8388660947407683, "grad_norm": 0.8308786153793335, "learning_rate": 2e-06, "loss": 0.213, "step": 2249 }, { "epoch": 0.8392390898918314, "grad_norm": 0.9362897276878357, "learning_rate": 2e-06, "loss": 0.2239, "step": 2250 }, { "epoch": 0.8396120850428944, "grad_norm": 0.832772970199585, "learning_rate": 2e-06, "loss": 0.1826, "step": 2251 }, { "epoch": 0.8399850801939575, "grad_norm": 0.7811563611030579, "learning_rate": 2e-06, "loss": 0.2066, "step": 2252 }, { "epoch": 0.8403580753450205, "grad_norm": 1.206157922744751, "learning_rate": 2e-06, "loss": 0.2018, "step": 2253 }, { "epoch": 0.8407310704960835, "grad_norm": 1.008945107460022, "learning_rate": 2e-06, "loss": 0.2095, "step": 2254 }, { "epoch": 0.8411040656471466, "grad_norm": 0.8218916654586792, "learning_rate": 2e-06, "loss": 0.199, "step": 2255 }, { "epoch": 0.8414770607982096, "grad_norm": 0.8961811065673828, "learning_rate": 2e-06, "loss": 0.204, "step": 2256 }, { "epoch": 0.8418500559492726, "grad_norm": 1.2538803815841675, "learning_rate": 2e-06, "loss": 0.1804, "step": 2257 }, { "epoch": 0.8422230511003357, "grad_norm": 0.8062776327133179, "learning_rate": 2e-06, "loss": 0.2363, "step": 2258 }, { "epoch": 0.8425960462513987, "grad_norm": 0.9983118772506714, "learning_rate": 2e-06, "loss": 0.2108, "step": 2259 }, { "epoch": 0.8429690414024618, "grad_norm": 0.9363144040107727, "learning_rate": 2e-06, "loss": 0.2028, "step": 2260 }, { "epoch": 0.8433420365535248, "grad_norm": 0.8546808958053589, "learning_rate": 2e-06, "loss": 0.2216, "step": 2261 }, { "epoch": 0.8437150317045878, "grad_norm": 0.9066362977027893, "learning_rate": 2e-06, "loss": 0.1884, "step": 2262 }, { "epoch": 0.8440880268556509, "grad_norm": 0.863492488861084, "learning_rate": 2e-06, "loss": 0.2072, "step": 2263 }, { "epoch": 0.8444610220067139, "grad_norm": 1.0831570625305176, "learning_rate": 2e-06, "loss": 0.2196, "step": 2264 }, { "epoch": 0.844834017157777, "grad_norm": 0.8572536110877991, "learning_rate": 2e-06, "loss": 0.2184, "step": 2265 }, { "epoch": 0.84520701230884, "grad_norm": 1.1555269956588745, "learning_rate": 2e-06, "loss": 0.1933, "step": 2266 }, { "epoch": 0.845580007459903, "grad_norm": 1.0708087682724, "learning_rate": 2e-06, "loss": 0.1968, "step": 2267 }, { "epoch": 0.8459530026109661, "grad_norm": 0.8447484970092773, "learning_rate": 2e-06, "loss": 0.2113, "step": 2268 }, { "epoch": 0.8463259977620291, "grad_norm": 1.1539299488067627, "learning_rate": 2e-06, "loss": 0.1857, "step": 2269 }, { "epoch": 0.8466989929130921, "grad_norm": 0.9158959984779358, "learning_rate": 2e-06, "loss": 0.196, "step": 2270 }, { "epoch": 0.8470719880641552, "grad_norm": 0.6954349279403687, "learning_rate": 2e-06, "loss": 0.1854, "step": 2271 }, { "epoch": 0.8474449832152182, "grad_norm": 0.8567847013473511, "learning_rate": 2e-06, "loss": 0.2102, "step": 2272 }, { "epoch": 0.8478179783662813, "grad_norm": 0.8163101673126221, "learning_rate": 2e-06, "loss": 0.1977, "step": 2273 }, { "epoch": 0.8481909735173443, "grad_norm": 0.9235292077064514, "learning_rate": 2e-06, "loss": 0.225, "step": 2274 }, { "epoch": 0.8485639686684073, "grad_norm": 0.965640664100647, "learning_rate": 2e-06, "loss": 0.1844, "step": 2275 }, { "epoch": 0.8489369638194704, "grad_norm": 1.1087433099746704, "learning_rate": 2e-06, "loss": 0.1916, "step": 2276 }, { "epoch": 0.8493099589705334, "grad_norm": 0.9514452219009399, "learning_rate": 2e-06, "loss": 0.1915, "step": 2277 }, { "epoch": 0.8496829541215964, "grad_norm": 0.8949972987174988, "learning_rate": 2e-06, "loss": 0.2273, "step": 2278 }, { "epoch": 0.8500559492726595, "grad_norm": 0.8323771357536316, "learning_rate": 2e-06, "loss": 0.2302, "step": 2279 }, { "epoch": 0.8504289444237225, "grad_norm": 0.8809195756912231, "learning_rate": 2e-06, "loss": 0.1967, "step": 2280 }, { "epoch": 0.8508019395747856, "grad_norm": 0.9737092852592468, "learning_rate": 2e-06, "loss": 0.2064, "step": 2281 }, { "epoch": 0.8511749347258486, "grad_norm": 1.0676629543304443, "learning_rate": 2e-06, "loss": 0.1942, "step": 2282 }, { "epoch": 0.8515479298769116, "grad_norm": 0.8314113020896912, "learning_rate": 2e-06, "loss": 0.2297, "step": 2283 }, { "epoch": 0.8519209250279747, "grad_norm": 0.895431637763977, "learning_rate": 2e-06, "loss": 0.222, "step": 2284 }, { "epoch": 0.8522939201790377, "grad_norm": 1.0911614894866943, "learning_rate": 2e-06, "loss": 0.1917, "step": 2285 }, { "epoch": 0.8526669153301007, "grad_norm": 0.83078533411026, "learning_rate": 2e-06, "loss": 0.1805, "step": 2286 }, { "epoch": 0.8530399104811638, "grad_norm": 1.2444372177124023, "learning_rate": 2e-06, "loss": 0.2244, "step": 2287 }, { "epoch": 0.8534129056322268, "grad_norm": 0.7520518898963928, "learning_rate": 2e-06, "loss": 0.199, "step": 2288 }, { "epoch": 0.8537859007832899, "grad_norm": 0.8529826998710632, "learning_rate": 2e-06, "loss": 0.2318, "step": 2289 }, { "epoch": 0.8541588959343529, "grad_norm": 0.9428061842918396, "learning_rate": 2e-06, "loss": 0.2046, "step": 2290 }, { "epoch": 0.8545318910854159, "grad_norm": 0.9194631576538086, "learning_rate": 2e-06, "loss": 0.1882, "step": 2291 }, { "epoch": 0.854904886236479, "grad_norm": 0.8858545422554016, "learning_rate": 2e-06, "loss": 0.1981, "step": 2292 }, { "epoch": 0.855277881387542, "grad_norm": 0.8526286482810974, "learning_rate": 2e-06, "loss": 0.2012, "step": 2293 }, { "epoch": 0.8556508765386049, "grad_norm": 1.1862539052963257, "learning_rate": 2e-06, "loss": 0.1988, "step": 2294 }, { "epoch": 0.856023871689668, "grad_norm": 0.9317255616188049, "learning_rate": 2e-06, "loss": 0.1897, "step": 2295 }, { "epoch": 0.856396866840731, "grad_norm": 0.9041059613227844, "learning_rate": 2e-06, "loss": 0.2029, "step": 2296 }, { "epoch": 0.8567698619917941, "grad_norm": 0.906578779220581, "learning_rate": 2e-06, "loss": 0.1849, "step": 2297 }, { "epoch": 0.8571428571428571, "grad_norm": 0.8508633375167847, "learning_rate": 2e-06, "loss": 0.1837, "step": 2298 }, { "epoch": 0.8575158522939201, "grad_norm": 1.080852746963501, "learning_rate": 2e-06, "loss": 0.2069, "step": 2299 }, { "epoch": 0.8578888474449832, "grad_norm": 0.7686804533004761, "learning_rate": 2e-06, "loss": 0.1976, "step": 2300 }, { "epoch": 0.8582618425960462, "grad_norm": 1.1949530839920044, "learning_rate": 2e-06, "loss": 0.2108, "step": 2301 }, { "epoch": 0.8586348377471092, "grad_norm": 0.774158775806427, "learning_rate": 2e-06, "loss": 0.2246, "step": 2302 }, { "epoch": 0.8590078328981723, "grad_norm": 0.854658305644989, "learning_rate": 2e-06, "loss": 0.2009, "step": 2303 }, { "epoch": 0.8593808280492353, "grad_norm": 0.9895430207252502, "learning_rate": 2e-06, "loss": 0.2026, "step": 2304 }, { "epoch": 0.8597538232002984, "grad_norm": 0.828836977481842, "learning_rate": 2e-06, "loss": 0.2066, "step": 2305 }, { "epoch": 0.8601268183513614, "grad_norm": 0.95902019739151, "learning_rate": 2e-06, "loss": 0.1777, "step": 2306 }, { "epoch": 0.8604998135024244, "grad_norm": 0.7958859801292419, "learning_rate": 2e-06, "loss": 0.2069, "step": 2307 }, { "epoch": 0.8608728086534875, "grad_norm": 1.0066289901733398, "learning_rate": 2e-06, "loss": 0.2133, "step": 2308 }, { "epoch": 0.8612458038045505, "grad_norm": 0.8576673865318298, "learning_rate": 2e-06, "loss": 0.1982, "step": 2309 }, { "epoch": 0.8616187989556136, "grad_norm": 0.9775345921516418, "learning_rate": 2e-06, "loss": 0.198, "step": 2310 }, { "epoch": 0.8619917941066766, "grad_norm": 1.0948039293289185, "learning_rate": 2e-06, "loss": 0.2033, "step": 2311 }, { "epoch": 0.8623647892577396, "grad_norm": 0.7817434668540955, "learning_rate": 2e-06, "loss": 0.1912, "step": 2312 }, { "epoch": 0.8627377844088027, "grad_norm": 1.026911973953247, "learning_rate": 2e-06, "loss": 0.1901, "step": 2313 }, { "epoch": 0.8631107795598657, "grad_norm": 0.7605583071708679, "learning_rate": 2e-06, "loss": 0.2162, "step": 2314 }, { "epoch": 0.8634837747109287, "grad_norm": 0.8692958354949951, "learning_rate": 2e-06, "loss": 0.2164, "step": 2315 }, { "epoch": 0.8638567698619918, "grad_norm": 0.8536618947982788, "learning_rate": 2e-06, "loss": 0.2379, "step": 2316 }, { "epoch": 0.8642297650130548, "grad_norm": 0.8726062774658203, "learning_rate": 2e-06, "loss": 0.2012, "step": 2317 }, { "epoch": 0.8646027601641179, "grad_norm": 0.9020558595657349, "learning_rate": 2e-06, "loss": 0.1945, "step": 2318 }, { "epoch": 0.8649757553151809, "grad_norm": 1.0120903253555298, "learning_rate": 2e-06, "loss": 0.2066, "step": 2319 }, { "epoch": 0.8653487504662439, "grad_norm": 0.9046547412872314, "learning_rate": 2e-06, "loss": 0.197, "step": 2320 }, { "epoch": 0.865721745617307, "grad_norm": 1.1883221864700317, "learning_rate": 2e-06, "loss": 0.2027, "step": 2321 }, { "epoch": 0.86609474076837, "grad_norm": 0.8835269808769226, "learning_rate": 2e-06, "loss": 0.2165, "step": 2322 }, { "epoch": 0.866467735919433, "grad_norm": 0.9105651378631592, "learning_rate": 2e-06, "loss": 0.2298, "step": 2323 }, { "epoch": 0.8668407310704961, "grad_norm": 0.8807889223098755, "learning_rate": 2e-06, "loss": 0.1793, "step": 2324 }, { "epoch": 0.8672137262215591, "grad_norm": 0.8799693584442139, "learning_rate": 2e-06, "loss": 0.2159, "step": 2325 }, { "epoch": 0.8675867213726222, "grad_norm": 1.0581670999526978, "learning_rate": 2e-06, "loss": 0.216, "step": 2326 }, { "epoch": 0.8679597165236852, "grad_norm": 0.9471609592437744, "learning_rate": 2e-06, "loss": 0.1866, "step": 2327 }, { "epoch": 0.8683327116747482, "grad_norm": 0.9784427881240845, "learning_rate": 2e-06, "loss": 0.2255, "step": 2328 }, { "epoch": 0.8687057068258113, "grad_norm": 0.9148431420326233, "learning_rate": 2e-06, "loss": 0.2087, "step": 2329 }, { "epoch": 0.8690787019768743, "grad_norm": 1.001655101776123, "learning_rate": 2e-06, "loss": 0.1849, "step": 2330 }, { "epoch": 0.8694516971279374, "grad_norm": 1.071579098701477, "learning_rate": 2e-06, "loss": 0.1839, "step": 2331 }, { "epoch": 0.8698246922790004, "grad_norm": 0.8449417948722839, "learning_rate": 2e-06, "loss": 0.2368, "step": 2332 }, { "epoch": 0.8701976874300634, "grad_norm": 1.0276548862457275, "learning_rate": 2e-06, "loss": 0.1978, "step": 2333 }, { "epoch": 0.8705706825811265, "grad_norm": 0.898478627204895, "learning_rate": 2e-06, "loss": 0.2193, "step": 2334 }, { "epoch": 0.8709436777321895, "grad_norm": 1.129884123802185, "learning_rate": 2e-06, "loss": 0.2062, "step": 2335 }, { "epoch": 0.8713166728832525, "grad_norm": 1.132789134979248, "learning_rate": 2e-06, "loss": 0.1995, "step": 2336 }, { "epoch": 0.8716896680343156, "grad_norm": 0.7551388144493103, "learning_rate": 2e-06, "loss": 0.1994, "step": 2337 }, { "epoch": 0.8720626631853786, "grad_norm": 0.8078520894050598, "learning_rate": 2e-06, "loss": 0.2009, "step": 2338 }, { "epoch": 0.8724356583364417, "grad_norm": 0.7732698917388916, "learning_rate": 2e-06, "loss": 0.2201, "step": 2339 }, { "epoch": 0.8728086534875047, "grad_norm": 1.0446650981903076, "learning_rate": 2e-06, "loss": 0.1943, "step": 2340 }, { "epoch": 0.8731816486385677, "grad_norm": 0.7483903169631958, "learning_rate": 2e-06, "loss": 0.2158, "step": 2341 }, { "epoch": 0.8735546437896308, "grad_norm": 0.8886820673942566, "learning_rate": 2e-06, "loss": 0.1947, "step": 2342 }, { "epoch": 0.8739276389406938, "grad_norm": 0.7333488464355469, "learning_rate": 2e-06, "loss": 0.2073, "step": 2343 }, { "epoch": 0.8743006340917568, "grad_norm": 0.7972422242164612, "learning_rate": 2e-06, "loss": 0.1921, "step": 2344 }, { "epoch": 0.8746736292428199, "grad_norm": 0.9004570245742798, "learning_rate": 2e-06, "loss": 0.2097, "step": 2345 }, { "epoch": 0.8750466243938829, "grad_norm": 1.104238748550415, "learning_rate": 2e-06, "loss": 0.2124, "step": 2346 }, { "epoch": 0.875419619544946, "grad_norm": 0.9378988742828369, "learning_rate": 2e-06, "loss": 0.227, "step": 2347 }, { "epoch": 0.875792614696009, "grad_norm": 0.9966977834701538, "learning_rate": 2e-06, "loss": 0.2157, "step": 2348 }, { "epoch": 0.876165609847072, "grad_norm": 0.8823171854019165, "learning_rate": 2e-06, "loss": 0.2046, "step": 2349 }, { "epoch": 0.8765386049981351, "grad_norm": 0.7601616382598877, "learning_rate": 2e-06, "loss": 0.2007, "step": 2350 }, { "epoch": 0.8769116001491981, "grad_norm": 1.0657789707183838, "learning_rate": 2e-06, "loss": 0.1967, "step": 2351 }, { "epoch": 0.8772845953002611, "grad_norm": 1.0337498188018799, "learning_rate": 2e-06, "loss": 0.2328, "step": 2352 }, { "epoch": 0.8776575904513242, "grad_norm": 1.255030870437622, "learning_rate": 2e-06, "loss": 0.1859, "step": 2353 }, { "epoch": 0.8780305856023871, "grad_norm": 0.8008043169975281, "learning_rate": 2e-06, "loss": 0.2187, "step": 2354 }, { "epoch": 0.8784035807534502, "grad_norm": 0.9973489046096802, "learning_rate": 2e-06, "loss": 0.1949, "step": 2355 }, { "epoch": 0.8787765759045132, "grad_norm": 0.8946110606193542, "learning_rate": 2e-06, "loss": 0.2176, "step": 2356 }, { "epoch": 0.8791495710555762, "grad_norm": 1.0572824478149414, "learning_rate": 2e-06, "loss": 0.1893, "step": 2357 }, { "epoch": 0.8795225662066393, "grad_norm": 0.8679421544075012, "learning_rate": 2e-06, "loss": 0.2016, "step": 2358 }, { "epoch": 0.8798955613577023, "grad_norm": 0.7772968411445618, "learning_rate": 2e-06, "loss": 0.1933, "step": 2359 }, { "epoch": 0.8802685565087653, "grad_norm": 0.920763373374939, "learning_rate": 2e-06, "loss": 0.2301, "step": 2360 }, { "epoch": 0.8806415516598284, "grad_norm": 0.9864059090614319, "learning_rate": 2e-06, "loss": 0.205, "step": 2361 }, { "epoch": 0.8810145468108914, "grad_norm": 0.9743165969848633, "learning_rate": 2e-06, "loss": 0.2086, "step": 2362 }, { "epoch": 0.8813875419619545, "grad_norm": 2.6666128635406494, "learning_rate": 2e-06, "loss": 0.2023, "step": 2363 }, { "epoch": 0.8817605371130175, "grad_norm": 1.1307668685913086, "learning_rate": 2e-06, "loss": 0.2131, "step": 2364 }, { "epoch": 0.8821335322640805, "grad_norm": 0.9062139987945557, "learning_rate": 2e-06, "loss": 0.1886, "step": 2365 }, { "epoch": 0.8825065274151436, "grad_norm": 0.8367603421211243, "learning_rate": 2e-06, "loss": 0.2, "step": 2366 }, { "epoch": 0.8828795225662066, "grad_norm": 0.8452056646347046, "learning_rate": 2e-06, "loss": 0.1777, "step": 2367 }, { "epoch": 0.8832525177172696, "grad_norm": 0.9433693289756775, "learning_rate": 2e-06, "loss": 0.2016, "step": 2368 }, { "epoch": 0.8836255128683327, "grad_norm": 0.8911710977554321, "learning_rate": 2e-06, "loss": 0.2049, "step": 2369 }, { "epoch": 0.8839985080193957, "grad_norm": 0.8847655653953552, "learning_rate": 2e-06, "loss": 0.1817, "step": 2370 }, { "epoch": 0.8843715031704588, "grad_norm": 0.7180054783821106, "learning_rate": 2e-06, "loss": 0.2313, "step": 2371 }, { "epoch": 0.8847444983215218, "grad_norm": 0.924666166305542, "learning_rate": 2e-06, "loss": 0.231, "step": 2372 }, { "epoch": 0.8851174934725848, "grad_norm": 0.7942616939544678, "learning_rate": 2e-06, "loss": 0.1929, "step": 2373 }, { "epoch": 0.8854904886236479, "grad_norm": 0.7566806674003601, "learning_rate": 2e-06, "loss": 0.2103, "step": 2374 }, { "epoch": 0.8858634837747109, "grad_norm": 0.8275310397148132, "learning_rate": 2e-06, "loss": 0.211, "step": 2375 }, { "epoch": 0.886236478925774, "grad_norm": 0.82159423828125, "learning_rate": 2e-06, "loss": 0.22, "step": 2376 }, { "epoch": 0.886609474076837, "grad_norm": 0.8875710368156433, "learning_rate": 2e-06, "loss": 0.2039, "step": 2377 }, { "epoch": 0.8869824692279, "grad_norm": 0.8086434602737427, "learning_rate": 2e-06, "loss": 0.2234, "step": 2378 }, { "epoch": 0.8873554643789631, "grad_norm": 0.8110647201538086, "learning_rate": 2e-06, "loss": 0.2025, "step": 2379 }, { "epoch": 0.8877284595300261, "grad_norm": 0.8597199320793152, "learning_rate": 2e-06, "loss": 0.2309, "step": 2380 }, { "epoch": 0.8881014546810891, "grad_norm": 0.7757603526115417, "learning_rate": 2e-06, "loss": 0.1952, "step": 2381 }, { "epoch": 0.8884744498321522, "grad_norm": 0.7990423440933228, "learning_rate": 2e-06, "loss": 0.2245, "step": 2382 }, { "epoch": 0.8888474449832152, "grad_norm": 0.8957937359809875, "learning_rate": 2e-06, "loss": 0.2194, "step": 2383 }, { "epoch": 0.8892204401342783, "grad_norm": 0.8694407939910889, "learning_rate": 2e-06, "loss": 0.2017, "step": 2384 }, { "epoch": 0.8895934352853413, "grad_norm": 0.8614179491996765, "learning_rate": 2e-06, "loss": 0.1927, "step": 2385 }, { "epoch": 0.8899664304364043, "grad_norm": 0.9796443581581116, "learning_rate": 2e-06, "loss": 0.1818, "step": 2386 }, { "epoch": 0.8903394255874674, "grad_norm": 0.7090235352516174, "learning_rate": 2e-06, "loss": 0.2413, "step": 2387 }, { "epoch": 0.8907124207385304, "grad_norm": 1.1362873315811157, "learning_rate": 2e-06, "loss": 0.1851, "step": 2388 }, { "epoch": 0.8910854158895934, "grad_norm": 0.7856814861297607, "learning_rate": 2e-06, "loss": 0.2063, "step": 2389 }, { "epoch": 0.8914584110406565, "grad_norm": 1.0836511850357056, "learning_rate": 2e-06, "loss": 0.2025, "step": 2390 }, { "epoch": 0.8918314061917195, "grad_norm": 0.9416884183883667, "learning_rate": 2e-06, "loss": 0.2073, "step": 2391 }, { "epoch": 0.8922044013427826, "grad_norm": 0.9157678484916687, "learning_rate": 2e-06, "loss": 0.2211, "step": 2392 }, { "epoch": 0.8925773964938456, "grad_norm": 0.8283977508544922, "learning_rate": 2e-06, "loss": 0.1944, "step": 2393 }, { "epoch": 0.8929503916449086, "grad_norm": 0.9373624920845032, "learning_rate": 2e-06, "loss": 0.19, "step": 2394 }, { "epoch": 0.8933233867959717, "grad_norm": 0.8762903809547424, "learning_rate": 2e-06, "loss": 0.2156, "step": 2395 }, { "epoch": 0.8936963819470347, "grad_norm": 1.0603152513504028, "learning_rate": 2e-06, "loss": 0.1977, "step": 2396 }, { "epoch": 0.8940693770980978, "grad_norm": 0.8700597882270813, "learning_rate": 2e-06, "loss": 0.2319, "step": 2397 }, { "epoch": 0.8944423722491608, "grad_norm": 0.8134177327156067, "learning_rate": 2e-06, "loss": 0.1936, "step": 2398 }, { "epoch": 0.8948153674002238, "grad_norm": 0.7757183313369751, "learning_rate": 2e-06, "loss": 0.1768, "step": 2399 }, { "epoch": 0.8951883625512869, "grad_norm": 0.9331920146942139, "learning_rate": 2e-06, "loss": 0.1864, "step": 2400 }, { "epoch": 0.8955613577023499, "grad_norm": 0.908774197101593, "learning_rate": 2e-06, "loss": 0.2208, "step": 2401 }, { "epoch": 0.8959343528534129, "grad_norm": 0.8003936409950256, "learning_rate": 2e-06, "loss": 0.2252, "step": 2402 }, { "epoch": 0.896307348004476, "grad_norm": 0.9895719885826111, "learning_rate": 2e-06, "loss": 0.2042, "step": 2403 }, { "epoch": 0.896680343155539, "grad_norm": 0.7413432598114014, "learning_rate": 2e-06, "loss": 0.2214, "step": 2404 }, { "epoch": 0.897053338306602, "grad_norm": 0.6962586045265198, "learning_rate": 2e-06, "loss": 0.2226, "step": 2405 }, { "epoch": 0.8974263334576651, "grad_norm": 0.8389998078346252, "learning_rate": 2e-06, "loss": 0.1992, "step": 2406 }, { "epoch": 0.8977993286087281, "grad_norm": 0.9401604533195496, "learning_rate": 2e-06, "loss": 0.194, "step": 2407 }, { "epoch": 0.8981723237597912, "grad_norm": 0.8290508389472961, "learning_rate": 2e-06, "loss": 0.2094, "step": 2408 }, { "epoch": 0.8985453189108542, "grad_norm": 1.0688302516937256, "learning_rate": 2e-06, "loss": 0.205, "step": 2409 }, { "epoch": 0.8989183140619172, "grad_norm": 0.9043827652931213, "learning_rate": 2e-06, "loss": 0.2035, "step": 2410 }, { "epoch": 0.8992913092129803, "grad_norm": 0.8657143115997314, "learning_rate": 2e-06, "loss": 0.2125, "step": 2411 }, { "epoch": 0.8996643043640433, "grad_norm": 0.8001573085784912, "learning_rate": 2e-06, "loss": 0.1849, "step": 2412 }, { "epoch": 0.9000372995151062, "grad_norm": 0.9570964574813843, "learning_rate": 2e-06, "loss": 0.2092, "step": 2413 }, { "epoch": 0.9004102946661693, "grad_norm": 0.9955922365188599, "learning_rate": 2e-06, "loss": 0.1906, "step": 2414 }, { "epoch": 0.9007832898172323, "grad_norm": 1.2115730047225952, "learning_rate": 2e-06, "loss": 0.2185, "step": 2415 }, { "epoch": 0.9011562849682954, "grad_norm": 1.0245060920715332, "learning_rate": 2e-06, "loss": 0.2152, "step": 2416 }, { "epoch": 0.9015292801193584, "grad_norm": 0.9311987161636353, "learning_rate": 2e-06, "loss": 0.2067, "step": 2417 }, { "epoch": 0.9019022752704214, "grad_norm": 0.6848462224006653, "learning_rate": 2e-06, "loss": 0.2236, "step": 2418 }, { "epoch": 0.9022752704214845, "grad_norm": 0.8465884923934937, "learning_rate": 2e-06, "loss": 0.2222, "step": 2419 }, { "epoch": 0.9026482655725475, "grad_norm": 0.7294789552688599, "learning_rate": 2e-06, "loss": 0.2175, "step": 2420 }, { "epoch": 0.9030212607236106, "grad_norm": 1.0976815223693848, "learning_rate": 2e-06, "loss": 0.1799, "step": 2421 }, { "epoch": 0.9033942558746736, "grad_norm": 0.9759774208068848, "learning_rate": 2e-06, "loss": 0.2186, "step": 2422 }, { "epoch": 0.9037672510257366, "grad_norm": 0.8858247399330139, "learning_rate": 2e-06, "loss": 0.2137, "step": 2423 }, { "epoch": 0.9041402461767997, "grad_norm": 1.1787666082382202, "learning_rate": 2e-06, "loss": 0.2116, "step": 2424 }, { "epoch": 0.9045132413278627, "grad_norm": 0.8084837794303894, "learning_rate": 2e-06, "loss": 0.1965, "step": 2425 }, { "epoch": 0.9048862364789257, "grad_norm": 1.040175199508667, "learning_rate": 2e-06, "loss": 0.1898, "step": 2426 }, { "epoch": 0.9052592316299888, "grad_norm": 0.7725767493247986, "learning_rate": 2e-06, "loss": 0.207, "step": 2427 }, { "epoch": 0.9056322267810518, "grad_norm": 0.8293384313583374, "learning_rate": 2e-06, "loss": 0.2132, "step": 2428 }, { "epoch": 0.9060052219321149, "grad_norm": 1.0746567249298096, "learning_rate": 2e-06, "loss": 0.2084, "step": 2429 }, { "epoch": 0.9063782170831779, "grad_norm": 0.9493822455406189, "learning_rate": 2e-06, "loss": 0.1912, "step": 2430 }, { "epoch": 0.9067512122342409, "grad_norm": 0.868105411529541, "learning_rate": 2e-06, "loss": 0.2083, "step": 2431 }, { "epoch": 0.907124207385304, "grad_norm": 1.0786259174346924, "learning_rate": 2e-06, "loss": 0.218, "step": 2432 }, { "epoch": 0.907497202536367, "grad_norm": 0.9895271062850952, "learning_rate": 2e-06, "loss": 0.2063, "step": 2433 }, { "epoch": 0.90787019768743, "grad_norm": 0.8024197816848755, "learning_rate": 2e-06, "loss": 0.2055, "step": 2434 }, { "epoch": 0.9082431928384931, "grad_norm": 0.961408257484436, "learning_rate": 2e-06, "loss": 0.2341, "step": 2435 }, { "epoch": 0.9086161879895561, "grad_norm": 0.8633098602294922, "learning_rate": 2e-06, "loss": 0.2032, "step": 2436 }, { "epoch": 0.9089891831406192, "grad_norm": 0.8822959065437317, "learning_rate": 2e-06, "loss": 0.2056, "step": 2437 }, { "epoch": 0.9093621782916822, "grad_norm": 1.1385302543640137, "learning_rate": 2e-06, "loss": 0.2101, "step": 2438 }, { "epoch": 0.9097351734427452, "grad_norm": 1.1181535720825195, "learning_rate": 2e-06, "loss": 0.2262, "step": 2439 }, { "epoch": 0.9101081685938083, "grad_norm": 0.7781365513801575, "learning_rate": 2e-06, "loss": 0.2064, "step": 2440 }, { "epoch": 0.9104811637448713, "grad_norm": 0.9644418954849243, "learning_rate": 2e-06, "loss": 0.2056, "step": 2441 }, { "epoch": 0.9108541588959344, "grad_norm": 0.9658606052398682, "learning_rate": 2e-06, "loss": 0.2115, "step": 2442 }, { "epoch": 0.9112271540469974, "grad_norm": 1.1145058870315552, "learning_rate": 2e-06, "loss": 0.2137, "step": 2443 }, { "epoch": 0.9116001491980604, "grad_norm": 0.8913991451263428, "learning_rate": 2e-06, "loss": 0.2334, "step": 2444 }, { "epoch": 0.9119731443491235, "grad_norm": 0.7906640768051147, "learning_rate": 2e-06, "loss": 0.2257, "step": 2445 }, { "epoch": 0.9123461395001865, "grad_norm": 0.8874973654747009, "learning_rate": 2e-06, "loss": 0.2348, "step": 2446 }, { "epoch": 0.9127191346512495, "grad_norm": 0.6862461566925049, "learning_rate": 2e-06, "loss": 0.205, "step": 2447 }, { "epoch": 0.9130921298023126, "grad_norm": 1.005303978919983, "learning_rate": 2e-06, "loss": 0.2096, "step": 2448 }, { "epoch": 0.9134651249533756, "grad_norm": 0.9334594011306763, "learning_rate": 2e-06, "loss": 0.2088, "step": 2449 }, { "epoch": 0.9138381201044387, "grad_norm": 0.8214448094367981, "learning_rate": 2e-06, "loss": 0.2177, "step": 2450 }, { "epoch": 0.9142111152555017, "grad_norm": 0.8827018141746521, "learning_rate": 2e-06, "loss": 0.2176, "step": 2451 }, { "epoch": 0.9145841104065647, "grad_norm": 0.8432923555374146, "learning_rate": 2e-06, "loss": 0.197, "step": 2452 }, { "epoch": 0.9149571055576278, "grad_norm": 0.7640820741653442, "learning_rate": 2e-06, "loss": 0.2068, "step": 2453 }, { "epoch": 0.9153301007086908, "grad_norm": 0.7094398736953735, "learning_rate": 2e-06, "loss": 0.19, "step": 2454 }, { "epoch": 0.9157030958597538, "grad_norm": 0.8536086678504944, "learning_rate": 2e-06, "loss": 0.1952, "step": 2455 }, { "epoch": 0.9160760910108169, "grad_norm": 0.9735382795333862, "learning_rate": 2e-06, "loss": 0.1911, "step": 2456 }, { "epoch": 0.9164490861618799, "grad_norm": 0.8809613585472107, "learning_rate": 2e-06, "loss": 0.2117, "step": 2457 }, { "epoch": 0.916822081312943, "grad_norm": 1.0430599451065063, "learning_rate": 2e-06, "loss": 0.2222, "step": 2458 }, { "epoch": 0.917195076464006, "grad_norm": 0.9742457270622253, "learning_rate": 2e-06, "loss": 0.2087, "step": 2459 }, { "epoch": 0.917568071615069, "grad_norm": 0.9385451674461365, "learning_rate": 2e-06, "loss": 0.2111, "step": 2460 }, { "epoch": 0.9179410667661321, "grad_norm": 0.9141610860824585, "learning_rate": 2e-06, "loss": 0.206, "step": 2461 }, { "epoch": 0.9183140619171951, "grad_norm": 0.9264875650405884, "learning_rate": 2e-06, "loss": 0.1966, "step": 2462 }, { "epoch": 0.9186870570682582, "grad_norm": 0.7595977783203125, "learning_rate": 2e-06, "loss": 0.1953, "step": 2463 }, { "epoch": 0.9190600522193212, "grad_norm": 1.324661135673523, "learning_rate": 2e-06, "loss": 0.2098, "step": 2464 }, { "epoch": 0.9194330473703842, "grad_norm": 0.8098351955413818, "learning_rate": 2e-06, "loss": 0.2197, "step": 2465 }, { "epoch": 0.9198060425214473, "grad_norm": 0.9097784161567688, "learning_rate": 2e-06, "loss": 0.1937, "step": 2466 }, { "epoch": 0.9201790376725103, "grad_norm": 0.7859197854995728, "learning_rate": 2e-06, "loss": 0.2245, "step": 2467 }, { "epoch": 0.9205520328235733, "grad_norm": 1.0298209190368652, "learning_rate": 2e-06, "loss": 0.2171, "step": 2468 }, { "epoch": 0.9209250279746364, "grad_norm": 0.7887113094329834, "learning_rate": 2e-06, "loss": 0.2175, "step": 2469 }, { "epoch": 0.9212980231256994, "grad_norm": 1.1837142705917358, "learning_rate": 2e-06, "loss": 0.2014, "step": 2470 }, { "epoch": 0.9216710182767625, "grad_norm": 0.8797836303710938, "learning_rate": 2e-06, "loss": 0.2316, "step": 2471 }, { "epoch": 0.9220440134278255, "grad_norm": 1.1338684558868408, "learning_rate": 2e-06, "loss": 0.1878, "step": 2472 }, { "epoch": 0.9224170085788884, "grad_norm": 1.03104829788208, "learning_rate": 2e-06, "loss": 0.2093, "step": 2473 }, { "epoch": 0.9227900037299515, "grad_norm": 0.994834840297699, "learning_rate": 2e-06, "loss": 0.218, "step": 2474 }, { "epoch": 0.9231629988810145, "grad_norm": 1.1050065755844116, "learning_rate": 2e-06, "loss": 0.1774, "step": 2475 }, { "epoch": 0.9235359940320775, "grad_norm": 0.8913307785987854, "learning_rate": 2e-06, "loss": 0.1921, "step": 2476 }, { "epoch": 0.9239089891831406, "grad_norm": 0.8286313414573669, "learning_rate": 2e-06, "loss": 0.1993, "step": 2477 }, { "epoch": 0.9242819843342036, "grad_norm": 0.9877755045890808, "learning_rate": 2e-06, "loss": 0.1952, "step": 2478 }, { "epoch": 0.9246549794852666, "grad_norm": 0.9960859417915344, "learning_rate": 2e-06, "loss": 0.171, "step": 2479 }, { "epoch": 0.9250279746363297, "grad_norm": 0.7744290232658386, "learning_rate": 2e-06, "loss": 0.2138, "step": 2480 }, { "epoch": 0.9254009697873927, "grad_norm": 1.119287371635437, "learning_rate": 2e-06, "loss": 0.1863, "step": 2481 }, { "epoch": 0.9257739649384558, "grad_norm": 1.1510258913040161, "learning_rate": 2e-06, "loss": 0.2047, "step": 2482 }, { "epoch": 0.9261469600895188, "grad_norm": 0.7482684254646301, "learning_rate": 2e-06, "loss": 0.2114, "step": 2483 }, { "epoch": 0.9265199552405818, "grad_norm": 0.7635931968688965, "learning_rate": 2e-06, "loss": 0.1845, "step": 2484 }, { "epoch": 0.9268929503916449, "grad_norm": 0.8982135653495789, "learning_rate": 2e-06, "loss": 0.2072, "step": 2485 }, { "epoch": 0.9272659455427079, "grad_norm": 0.9554506540298462, "learning_rate": 2e-06, "loss": 0.2182, "step": 2486 }, { "epoch": 0.927638940693771, "grad_norm": 1.0811885595321655, "learning_rate": 2e-06, "loss": 0.2034, "step": 2487 }, { "epoch": 0.928011935844834, "grad_norm": 0.9642618894577026, "learning_rate": 2e-06, "loss": 0.2045, "step": 2488 }, { "epoch": 0.928384930995897, "grad_norm": 0.7878226637840271, "learning_rate": 2e-06, "loss": 0.1965, "step": 2489 }, { "epoch": 0.9287579261469601, "grad_norm": 1.276680827140808, "learning_rate": 2e-06, "loss": 0.1983, "step": 2490 }, { "epoch": 0.9291309212980231, "grad_norm": 0.7282111644744873, "learning_rate": 2e-06, "loss": 0.2125, "step": 2491 }, { "epoch": 0.9295039164490861, "grad_norm": 0.7462581992149353, "learning_rate": 2e-06, "loss": 0.221, "step": 2492 }, { "epoch": 0.9298769116001492, "grad_norm": 0.8370550870895386, "learning_rate": 2e-06, "loss": 0.213, "step": 2493 }, { "epoch": 0.9302499067512122, "grad_norm": 1.1402047872543335, "learning_rate": 2e-06, "loss": 0.2032, "step": 2494 }, { "epoch": 0.9306229019022753, "grad_norm": 1.1040935516357422, "learning_rate": 2e-06, "loss": 0.2006, "step": 2495 }, { "epoch": 0.9309958970533383, "grad_norm": 0.8479167819023132, "learning_rate": 2e-06, "loss": 0.1944, "step": 2496 }, { "epoch": 0.9313688922044013, "grad_norm": 1.0603399276733398, "learning_rate": 2e-06, "loss": 0.2092, "step": 2497 }, { "epoch": 0.9317418873554644, "grad_norm": 1.014512538909912, "learning_rate": 2e-06, "loss": 0.1888, "step": 2498 }, { "epoch": 0.9321148825065274, "grad_norm": 0.8555555939674377, "learning_rate": 2e-06, "loss": 0.2089, "step": 2499 }, { "epoch": 0.9324878776575904, "grad_norm": 0.6958943009376526, "learning_rate": 2e-06, "loss": 0.1998, "step": 2500 }, { "epoch": 0.9328608728086535, "grad_norm": 0.900701642036438, "learning_rate": 2e-06, "loss": 0.1755, "step": 2501 }, { "epoch": 0.9332338679597165, "grad_norm": 0.8184986710548401, "learning_rate": 2e-06, "loss": 0.1966, "step": 2502 }, { "epoch": 0.9336068631107796, "grad_norm": 0.9587969779968262, "learning_rate": 2e-06, "loss": 0.2033, "step": 2503 }, { "epoch": 0.9339798582618426, "grad_norm": 0.9269519448280334, "learning_rate": 2e-06, "loss": 0.2046, "step": 2504 }, { "epoch": 0.9343528534129056, "grad_norm": 0.9511339664459229, "learning_rate": 2e-06, "loss": 0.1893, "step": 2505 }, { "epoch": 0.9347258485639687, "grad_norm": 1.7285170555114746, "learning_rate": 2e-06, "loss": 0.208, "step": 2506 }, { "epoch": 0.9350988437150317, "grad_norm": 0.7651569843292236, "learning_rate": 2e-06, "loss": 0.1957, "step": 2507 }, { "epoch": 0.9354718388660948, "grad_norm": 0.9690420627593994, "learning_rate": 2e-06, "loss": 0.2268, "step": 2508 }, { "epoch": 0.9358448340171578, "grad_norm": 1.0609732866287231, "learning_rate": 2e-06, "loss": 0.1783, "step": 2509 }, { "epoch": 0.9362178291682208, "grad_norm": 0.8795483708381653, "learning_rate": 2e-06, "loss": 0.2065, "step": 2510 }, { "epoch": 0.9365908243192839, "grad_norm": 0.7738765478134155, "learning_rate": 2e-06, "loss": 0.2043, "step": 2511 }, { "epoch": 0.9369638194703469, "grad_norm": 0.9840923547744751, "learning_rate": 2e-06, "loss": 0.203, "step": 2512 }, { "epoch": 0.9373368146214099, "grad_norm": 0.9401374459266663, "learning_rate": 2e-06, "loss": 0.2152, "step": 2513 }, { "epoch": 0.937709809772473, "grad_norm": 1.1053568124771118, "learning_rate": 2e-06, "loss": 0.1914, "step": 2514 }, { "epoch": 0.938082804923536, "grad_norm": 0.931867778301239, "learning_rate": 2e-06, "loss": 0.2196, "step": 2515 }, { "epoch": 0.9384558000745991, "grad_norm": 0.9997725486755371, "learning_rate": 2e-06, "loss": 0.1941, "step": 2516 }, { "epoch": 0.9388287952256621, "grad_norm": 0.944036066532135, "learning_rate": 2e-06, "loss": 0.2133, "step": 2517 }, { "epoch": 0.9392017903767251, "grad_norm": 0.9152997732162476, "learning_rate": 2e-06, "loss": 0.1932, "step": 2518 }, { "epoch": 0.9395747855277882, "grad_norm": 0.8166235089302063, "learning_rate": 2e-06, "loss": 0.2267, "step": 2519 }, { "epoch": 0.9399477806788512, "grad_norm": 0.7684289216995239, "learning_rate": 2e-06, "loss": 0.2008, "step": 2520 }, { "epoch": 0.9403207758299142, "grad_norm": 0.840573251247406, "learning_rate": 2e-06, "loss": 0.2354, "step": 2521 }, { "epoch": 0.9406937709809773, "grad_norm": 1.028581142425537, "learning_rate": 2e-06, "loss": 0.1888, "step": 2522 }, { "epoch": 0.9410667661320403, "grad_norm": 0.9855469465255737, "learning_rate": 2e-06, "loss": 0.1907, "step": 2523 }, { "epoch": 0.9414397612831034, "grad_norm": 0.933066725730896, "learning_rate": 2e-06, "loss": 0.2064, "step": 2524 }, { "epoch": 0.9418127564341664, "grad_norm": 0.8298156261444092, "learning_rate": 2e-06, "loss": 0.2102, "step": 2525 }, { "epoch": 0.9421857515852294, "grad_norm": 0.940024197101593, "learning_rate": 2e-06, "loss": 0.2019, "step": 2526 }, { "epoch": 0.9425587467362925, "grad_norm": 0.8374473452568054, "learning_rate": 2e-06, "loss": 0.1906, "step": 2527 }, { "epoch": 0.9429317418873555, "grad_norm": 1.095763921737671, "learning_rate": 2e-06, "loss": 0.2048, "step": 2528 }, { "epoch": 0.9433047370384186, "grad_norm": 1.017600655555725, "learning_rate": 2e-06, "loss": 0.1877, "step": 2529 }, { "epoch": 0.9436777321894816, "grad_norm": 0.8778122663497925, "learning_rate": 2e-06, "loss": 0.1903, "step": 2530 }, { "epoch": 0.9440507273405446, "grad_norm": 0.848817765712738, "learning_rate": 2e-06, "loss": 0.1918, "step": 2531 }, { "epoch": 0.9444237224916077, "grad_norm": 0.9397016167640686, "learning_rate": 2e-06, "loss": 0.2079, "step": 2532 }, { "epoch": 0.9447967176426706, "grad_norm": 0.9716043472290039, "learning_rate": 2e-06, "loss": 0.2126, "step": 2533 }, { "epoch": 0.9451697127937336, "grad_norm": 0.8045206069946289, "learning_rate": 2e-06, "loss": 0.1991, "step": 2534 }, { "epoch": 0.9455427079447967, "grad_norm": 0.9357680678367615, "learning_rate": 2e-06, "loss": 0.1994, "step": 2535 }, { "epoch": 0.9459157030958597, "grad_norm": 0.8321680426597595, "learning_rate": 2e-06, "loss": 0.2084, "step": 2536 }, { "epoch": 0.9462886982469227, "grad_norm": 0.8264437913894653, "learning_rate": 2e-06, "loss": 0.1986, "step": 2537 }, { "epoch": 0.9466616933979858, "grad_norm": 1.0754871368408203, "learning_rate": 2e-06, "loss": 0.1959, "step": 2538 }, { "epoch": 0.9470346885490488, "grad_norm": 0.7127580642700195, "learning_rate": 2e-06, "loss": 0.2216, "step": 2539 }, { "epoch": 0.9474076837001119, "grad_norm": 0.8122989535331726, "learning_rate": 2e-06, "loss": 0.2149, "step": 2540 }, { "epoch": 0.9477806788511749, "grad_norm": 1.0701189041137695, "learning_rate": 2e-06, "loss": 0.1928, "step": 2541 }, { "epoch": 0.9481536740022379, "grad_norm": 0.7834951281547546, "learning_rate": 2e-06, "loss": 0.2008, "step": 2542 }, { "epoch": 0.948526669153301, "grad_norm": 2.7310423851013184, "learning_rate": 2e-06, "loss": 0.1905, "step": 2543 }, { "epoch": 0.948899664304364, "grad_norm": 1.0492775440216064, "learning_rate": 2e-06, "loss": 0.2064, "step": 2544 }, { "epoch": 0.949272659455427, "grad_norm": 0.9218771457672119, "learning_rate": 2e-06, "loss": 0.2129, "step": 2545 }, { "epoch": 0.9496456546064901, "grad_norm": 0.7271510362625122, "learning_rate": 2e-06, "loss": 0.241, "step": 2546 }, { "epoch": 0.9500186497575531, "grad_norm": 0.892724871635437, "learning_rate": 2e-06, "loss": 0.1952, "step": 2547 }, { "epoch": 0.9503916449086162, "grad_norm": 0.8873052597045898, "learning_rate": 2e-06, "loss": 0.2499, "step": 2548 }, { "epoch": 0.9507646400596792, "grad_norm": 0.7649499177932739, "learning_rate": 2e-06, "loss": 0.2061, "step": 2549 }, { "epoch": 0.9511376352107422, "grad_norm": 1.1099367141723633, "learning_rate": 2e-06, "loss": 0.1781, "step": 2550 }, { "epoch": 0.9515106303618053, "grad_norm": 1.0976638793945312, "learning_rate": 2e-06, "loss": 0.2073, "step": 2551 }, { "epoch": 0.9518836255128683, "grad_norm": 0.9059686064720154, "learning_rate": 2e-06, "loss": 0.2035, "step": 2552 }, { "epoch": 0.9522566206639314, "grad_norm": 0.9987105131149292, "learning_rate": 2e-06, "loss": 0.1979, "step": 2553 }, { "epoch": 0.9526296158149944, "grad_norm": 1.3251837491989136, "learning_rate": 2e-06, "loss": 0.1988, "step": 2554 }, { "epoch": 0.9530026109660574, "grad_norm": 0.8682688474655151, "learning_rate": 2e-06, "loss": 0.1993, "step": 2555 }, { "epoch": 0.9533756061171205, "grad_norm": 1.0010793209075928, "learning_rate": 2e-06, "loss": 0.1968, "step": 2556 }, { "epoch": 0.9537486012681835, "grad_norm": 0.748514711856842, "learning_rate": 2e-06, "loss": 0.1892, "step": 2557 }, { "epoch": 0.9541215964192465, "grad_norm": 0.9412303566932678, "learning_rate": 2e-06, "loss": 0.2108, "step": 2558 }, { "epoch": 0.9544945915703096, "grad_norm": 0.8908416628837585, "learning_rate": 2e-06, "loss": 0.217, "step": 2559 }, { "epoch": 0.9548675867213726, "grad_norm": 1.0861479043960571, "learning_rate": 2e-06, "loss": 0.1954, "step": 2560 }, { "epoch": 0.9552405818724357, "grad_norm": 0.9390236735343933, "learning_rate": 2e-06, "loss": 0.2313, "step": 2561 }, { "epoch": 0.9556135770234987, "grad_norm": 0.8108839392662048, "learning_rate": 2e-06, "loss": 0.1809, "step": 2562 }, { "epoch": 0.9559865721745617, "grad_norm": 0.7045066356658936, "learning_rate": 2e-06, "loss": 0.203, "step": 2563 }, { "epoch": 0.9563595673256248, "grad_norm": 0.9694694876670837, "learning_rate": 2e-06, "loss": 0.1945, "step": 2564 }, { "epoch": 0.9567325624766878, "grad_norm": 1.040213704109192, "learning_rate": 2e-06, "loss": 0.2118, "step": 2565 }, { "epoch": 0.9571055576277508, "grad_norm": 0.8879432082176208, "learning_rate": 2e-06, "loss": 0.2047, "step": 2566 }, { "epoch": 0.9574785527788139, "grad_norm": 0.8729621171951294, "learning_rate": 2e-06, "loss": 0.2157, "step": 2567 }, { "epoch": 0.9578515479298769, "grad_norm": 0.8125555515289307, "learning_rate": 2e-06, "loss": 0.2068, "step": 2568 }, { "epoch": 0.95822454308094, "grad_norm": 0.8519281148910522, "learning_rate": 2e-06, "loss": 0.2074, "step": 2569 }, { "epoch": 0.958597538232003, "grad_norm": 1.1402510404586792, "learning_rate": 2e-06, "loss": 0.2115, "step": 2570 }, { "epoch": 0.958970533383066, "grad_norm": 0.9472774863243103, "learning_rate": 2e-06, "loss": 0.1865, "step": 2571 }, { "epoch": 0.9593435285341291, "grad_norm": 0.9298191070556641, "learning_rate": 2e-06, "loss": 0.201, "step": 2572 }, { "epoch": 0.9597165236851921, "grad_norm": 0.8449552655220032, "learning_rate": 2e-06, "loss": 0.2148, "step": 2573 }, { "epoch": 0.9600895188362552, "grad_norm": 0.9147621989250183, "learning_rate": 2e-06, "loss": 0.1976, "step": 2574 }, { "epoch": 0.9604625139873182, "grad_norm": 0.999949038028717, "learning_rate": 2e-06, "loss": 0.2037, "step": 2575 }, { "epoch": 0.9608355091383812, "grad_norm": 0.8418442606925964, "learning_rate": 2e-06, "loss": 0.1764, "step": 2576 }, { "epoch": 0.9612085042894443, "grad_norm": 0.8222423195838928, "learning_rate": 2e-06, "loss": 0.2237, "step": 2577 }, { "epoch": 0.9615814994405073, "grad_norm": 0.8998282551765442, "learning_rate": 2e-06, "loss": 0.1761, "step": 2578 }, { "epoch": 0.9619544945915703, "grad_norm": 0.9930028915405273, "learning_rate": 2e-06, "loss": 0.2039, "step": 2579 }, { "epoch": 0.9623274897426334, "grad_norm": 0.842960774898529, "learning_rate": 2e-06, "loss": 0.2074, "step": 2580 }, { "epoch": 0.9627004848936964, "grad_norm": 0.8579899072647095, "learning_rate": 2e-06, "loss": 0.2183, "step": 2581 }, { "epoch": 0.9630734800447595, "grad_norm": 0.9018408060073853, "learning_rate": 2e-06, "loss": 0.215, "step": 2582 }, { "epoch": 0.9634464751958225, "grad_norm": 1.1001180410385132, "learning_rate": 2e-06, "loss": 0.2032, "step": 2583 }, { "epoch": 0.9638194703468855, "grad_norm": 1.1638039350509644, "learning_rate": 2e-06, "loss": 0.2116, "step": 2584 }, { "epoch": 0.9641924654979486, "grad_norm": 0.9204376339912415, "learning_rate": 2e-06, "loss": 0.2525, "step": 2585 }, { "epoch": 0.9645654606490116, "grad_norm": 1.085433840751648, "learning_rate": 2e-06, "loss": 0.1911, "step": 2586 }, { "epoch": 0.9649384558000746, "grad_norm": 1.0581157207489014, "learning_rate": 2e-06, "loss": 0.1937, "step": 2587 }, { "epoch": 0.9653114509511377, "grad_norm": 0.8178553581237793, "learning_rate": 2e-06, "loss": 0.2174, "step": 2588 }, { "epoch": 0.9656844461022007, "grad_norm": 0.9523549675941467, "learning_rate": 2e-06, "loss": 0.1907, "step": 2589 }, { "epoch": 0.9660574412532638, "grad_norm": 1.1341607570648193, "learning_rate": 2e-06, "loss": 0.1935, "step": 2590 }, { "epoch": 0.9664304364043268, "grad_norm": 0.8773930668830872, "learning_rate": 2e-06, "loss": 0.2237, "step": 2591 }, { "epoch": 0.9668034315553897, "grad_norm": 0.8558007478713989, "learning_rate": 2e-06, "loss": 0.2001, "step": 2592 }, { "epoch": 0.9671764267064528, "grad_norm": 0.8496037721633911, "learning_rate": 2e-06, "loss": 0.2136, "step": 2593 }, { "epoch": 0.9675494218575158, "grad_norm": 0.788047730922699, "learning_rate": 2e-06, "loss": 0.2219, "step": 2594 }, { "epoch": 0.9679224170085788, "grad_norm": 0.8697710633277893, "learning_rate": 2e-06, "loss": 0.2037, "step": 2595 }, { "epoch": 0.9682954121596419, "grad_norm": 0.908140242099762, "learning_rate": 2e-06, "loss": 0.2277, "step": 2596 }, { "epoch": 0.9686684073107049, "grad_norm": 0.7189774513244629, "learning_rate": 2e-06, "loss": 0.2036, "step": 2597 }, { "epoch": 0.969041402461768, "grad_norm": 0.8601841926574707, "learning_rate": 2e-06, "loss": 0.209, "step": 2598 }, { "epoch": 0.969414397612831, "grad_norm": 1.0295802354812622, "learning_rate": 2e-06, "loss": 0.2071, "step": 2599 }, { "epoch": 0.969787392763894, "grad_norm": 0.8820616602897644, "learning_rate": 2e-06, "loss": 0.2021, "step": 2600 }, { "epoch": 0.9701603879149571, "grad_norm": 0.7189311981201172, "learning_rate": 2e-06, "loss": 0.2152, "step": 2601 }, { "epoch": 0.9705333830660201, "grad_norm": 0.684007465839386, "learning_rate": 2e-06, "loss": 0.2156, "step": 2602 }, { "epoch": 0.9709063782170831, "grad_norm": 0.7610335350036621, "learning_rate": 2e-06, "loss": 0.213, "step": 2603 }, { "epoch": 0.9712793733681462, "grad_norm": 0.9443777799606323, "learning_rate": 2e-06, "loss": 0.1961, "step": 2604 }, { "epoch": 0.9716523685192092, "grad_norm": 0.7116599678993225, "learning_rate": 2e-06, "loss": 0.2142, "step": 2605 }, { "epoch": 0.9720253636702723, "grad_norm": 1.0785940885543823, "learning_rate": 2e-06, "loss": 0.1982, "step": 2606 }, { "epoch": 0.9723983588213353, "grad_norm": 1.0069056749343872, "learning_rate": 2e-06, "loss": 0.2064, "step": 2607 }, { "epoch": 0.9727713539723983, "grad_norm": 1.093491554260254, "learning_rate": 2e-06, "loss": 0.1971, "step": 2608 }, { "epoch": 0.9731443491234614, "grad_norm": 0.9136694669723511, "learning_rate": 2e-06, "loss": 0.1942, "step": 2609 }, { "epoch": 0.9735173442745244, "grad_norm": 0.967004120349884, "learning_rate": 2e-06, "loss": 0.2013, "step": 2610 }, { "epoch": 0.9738903394255874, "grad_norm": 0.8172029852867126, "learning_rate": 2e-06, "loss": 0.2004, "step": 2611 }, { "epoch": 0.9742633345766505, "grad_norm": 0.7451677918434143, "learning_rate": 2e-06, "loss": 0.2023, "step": 2612 }, { "epoch": 0.9746363297277135, "grad_norm": 0.8521310091018677, "learning_rate": 2e-06, "loss": 0.1886, "step": 2613 }, { "epoch": 0.9750093248787766, "grad_norm": 0.8110417127609253, "learning_rate": 2e-06, "loss": 0.2031, "step": 2614 }, { "epoch": 0.9753823200298396, "grad_norm": 1.0696580410003662, "learning_rate": 2e-06, "loss": 0.2, "step": 2615 }, { "epoch": 0.9757553151809026, "grad_norm": 0.8851494193077087, "learning_rate": 2e-06, "loss": 0.2111, "step": 2616 }, { "epoch": 0.9761283103319657, "grad_norm": 1.0403269529342651, "learning_rate": 2e-06, "loss": 0.2164, "step": 2617 }, { "epoch": 0.9765013054830287, "grad_norm": 1.3327375650405884, "learning_rate": 2e-06, "loss": 0.1963, "step": 2618 }, { "epoch": 0.9768743006340918, "grad_norm": 0.9137250781059265, "learning_rate": 2e-06, "loss": 0.2088, "step": 2619 }, { "epoch": 0.9772472957851548, "grad_norm": 1.0072728395462036, "learning_rate": 2e-06, "loss": 0.2103, "step": 2620 }, { "epoch": 0.9776202909362178, "grad_norm": 0.7895276546478271, "learning_rate": 2e-06, "loss": 0.2336, "step": 2621 }, { "epoch": 0.9779932860872809, "grad_norm": 1.1946121454238892, "learning_rate": 2e-06, "loss": 0.2199, "step": 2622 }, { "epoch": 0.9783662812383439, "grad_norm": 0.9407545924186707, "learning_rate": 2e-06, "loss": 0.1948, "step": 2623 }, { "epoch": 0.9787392763894069, "grad_norm": 1.0381468534469604, "learning_rate": 2e-06, "loss": 0.2002, "step": 2624 }, { "epoch": 0.97911227154047, "grad_norm": 0.8310437202453613, "learning_rate": 2e-06, "loss": 0.2026, "step": 2625 }, { "epoch": 0.979485266691533, "grad_norm": 0.7296404838562012, "learning_rate": 2e-06, "loss": 0.2292, "step": 2626 }, { "epoch": 0.9798582618425961, "grad_norm": 0.9984168410301208, "learning_rate": 2e-06, "loss": 0.2038, "step": 2627 }, { "epoch": 0.9802312569936591, "grad_norm": 0.893227219581604, "learning_rate": 2e-06, "loss": 0.2076, "step": 2628 }, { "epoch": 0.9806042521447221, "grad_norm": 0.8872542381286621, "learning_rate": 2e-06, "loss": 0.1855, "step": 2629 }, { "epoch": 0.9809772472957852, "grad_norm": 0.95980304479599, "learning_rate": 2e-06, "loss": 0.2099, "step": 2630 }, { "epoch": 0.9813502424468482, "grad_norm": 0.7821521759033203, "learning_rate": 2e-06, "loss": 0.2234, "step": 2631 }, { "epoch": 0.9817232375979112, "grad_norm": 0.8861757516860962, "learning_rate": 2e-06, "loss": 0.2035, "step": 2632 }, { "epoch": 0.9820962327489743, "grad_norm": 0.8625972270965576, "learning_rate": 2e-06, "loss": 0.2239, "step": 2633 }, { "epoch": 0.9824692279000373, "grad_norm": 0.9387033581733704, "learning_rate": 2e-06, "loss": 0.2084, "step": 2634 }, { "epoch": 0.9828422230511004, "grad_norm": 0.8813108801841736, "learning_rate": 2e-06, "loss": 0.2115, "step": 2635 }, { "epoch": 0.9832152182021634, "grad_norm": 0.9181045889854431, "learning_rate": 2e-06, "loss": 0.196, "step": 2636 }, { "epoch": 0.9835882133532264, "grad_norm": 0.8052351474761963, "learning_rate": 2e-06, "loss": 0.2179, "step": 2637 }, { "epoch": 0.9839612085042895, "grad_norm": 0.6665188670158386, "learning_rate": 2e-06, "loss": 0.2272, "step": 2638 }, { "epoch": 0.9843342036553525, "grad_norm": 0.9237984418869019, "learning_rate": 2e-06, "loss": 0.2077, "step": 2639 }, { "epoch": 0.9847071988064156, "grad_norm": 0.9443796277046204, "learning_rate": 2e-06, "loss": 0.1942, "step": 2640 }, { "epoch": 0.9850801939574786, "grad_norm": 0.8169564008712769, "learning_rate": 2e-06, "loss": 0.2059, "step": 2641 }, { "epoch": 0.9854531891085416, "grad_norm": 0.9299099445343018, "learning_rate": 2e-06, "loss": 0.2111, "step": 2642 }, { "epoch": 0.9858261842596047, "grad_norm": 0.9020723700523376, "learning_rate": 2e-06, "loss": 0.2023, "step": 2643 }, { "epoch": 0.9861991794106677, "grad_norm": 1.0199021100997925, "learning_rate": 2e-06, "loss": 0.2183, "step": 2644 }, { "epoch": 0.9865721745617307, "grad_norm": 1.052562952041626, "learning_rate": 2e-06, "loss": 0.2062, "step": 2645 }, { "epoch": 0.9869451697127938, "grad_norm": 1.334853172302246, "learning_rate": 2e-06, "loss": 0.1993, "step": 2646 }, { "epoch": 0.9873181648638568, "grad_norm": 0.8189576864242554, "learning_rate": 2e-06, "loss": 0.1983, "step": 2647 }, { "epoch": 0.9876911600149199, "grad_norm": 0.7850491404533386, "learning_rate": 2e-06, "loss": 0.1968, "step": 2648 }, { "epoch": 0.9880641551659829, "grad_norm": 0.7639428377151489, "learning_rate": 2e-06, "loss": 0.1934, "step": 2649 }, { "epoch": 0.9884371503170459, "grad_norm": 0.8061408996582031, "learning_rate": 2e-06, "loss": 0.2152, "step": 2650 }, { "epoch": 0.988810145468109, "grad_norm": 0.9092050194740295, "learning_rate": 2e-06, "loss": 0.2185, "step": 2651 }, { "epoch": 0.9891831406191719, "grad_norm": 1.1217612028121948, "learning_rate": 2e-06, "loss": 0.2049, "step": 2652 }, { "epoch": 0.9895561357702349, "grad_norm": 0.8594268560409546, "learning_rate": 2e-06, "loss": 0.2286, "step": 2653 }, { "epoch": 0.989929130921298, "grad_norm": 0.8636600971221924, "learning_rate": 2e-06, "loss": 0.1986, "step": 2654 }, { "epoch": 0.990302126072361, "grad_norm": 0.9485329985618591, "learning_rate": 2e-06, "loss": 0.1905, "step": 2655 }, { "epoch": 0.990675121223424, "grad_norm": 0.9460011720657349, "learning_rate": 2e-06, "loss": 0.2146, "step": 2656 }, { "epoch": 0.9910481163744871, "grad_norm": 0.8610755205154419, "learning_rate": 2e-06, "loss": 0.217, "step": 2657 }, { "epoch": 0.9914211115255501, "grad_norm": 0.9639337658882141, "learning_rate": 2e-06, "loss": 0.2102, "step": 2658 }, { "epoch": 0.9917941066766132, "grad_norm": 0.9938673973083496, "learning_rate": 2e-06, "loss": 0.1737, "step": 2659 }, { "epoch": 0.9921671018276762, "grad_norm": 0.779983401298523, "learning_rate": 2e-06, "loss": 0.2187, "step": 2660 }, { "epoch": 0.9925400969787392, "grad_norm": 0.912548303604126, "learning_rate": 2e-06, "loss": 0.2018, "step": 2661 }, { "epoch": 0.9929130921298023, "grad_norm": 0.8887343406677246, "learning_rate": 2e-06, "loss": 0.224, "step": 2662 }, { "epoch": 0.9932860872808653, "grad_norm": 0.9327540993690491, "learning_rate": 2e-06, "loss": 0.2094, "step": 2663 }, { "epoch": 0.9936590824319284, "grad_norm": 0.8546448349952698, "learning_rate": 2e-06, "loss": 0.2178, "step": 2664 }, { "epoch": 0.9940320775829914, "grad_norm": 0.9061537981033325, "learning_rate": 2e-06, "loss": 0.2056, "step": 2665 }, { "epoch": 0.9944050727340544, "grad_norm": 1.126995325088501, "learning_rate": 2e-06, "loss": 0.2079, "step": 2666 }, { "epoch": 0.9947780678851175, "grad_norm": 0.9529813528060913, "learning_rate": 2e-06, "loss": 0.2238, "step": 2667 }, { "epoch": 0.9951510630361805, "grad_norm": 0.9333920478820801, "learning_rate": 2e-06, "loss": 0.2076, "step": 2668 }, { "epoch": 0.9955240581872435, "grad_norm": 0.7806336879730225, "learning_rate": 2e-06, "loss": 0.2134, "step": 2669 }, { "epoch": 0.9958970533383066, "grad_norm": 0.7717742919921875, "learning_rate": 2e-06, "loss": 0.213, "step": 2670 }, { "epoch": 0.9962700484893696, "grad_norm": 0.8048689961433411, "learning_rate": 2e-06, "loss": 0.2235, "step": 2671 }, { "epoch": 0.9966430436404327, "grad_norm": 1.0661793947219849, "learning_rate": 2e-06, "loss": 0.2024, "step": 2672 }, { "epoch": 0.9970160387914957, "grad_norm": 0.7358060479164124, "learning_rate": 2e-06, "loss": 0.2123, "step": 2673 }, { "epoch": 0.9973890339425587, "grad_norm": 0.9741930365562439, "learning_rate": 2e-06, "loss": 0.2054, "step": 2674 }, { "epoch": 0.9977620290936218, "grad_norm": 0.9048694968223572, "learning_rate": 2e-06, "loss": 0.2112, "step": 2675 }, { "epoch": 0.9981350242446848, "grad_norm": 0.8385659456253052, "learning_rate": 2e-06, "loss": 0.2218, "step": 2676 }, { "epoch": 0.9985080193957478, "grad_norm": 0.7623404264450073, "learning_rate": 2e-06, "loss": 0.2211, "step": 2677 }, { "epoch": 0.9988810145468109, "grad_norm": 0.8900711536407471, "learning_rate": 2e-06, "loss": 0.2175, "step": 2678 }, { "epoch": 0.9992540096978739, "grad_norm": 0.8542293310165405, "learning_rate": 2e-06, "loss": 0.2146, "step": 2679 }, { "epoch": 0.999627004848937, "grad_norm": 1.0713595151901245, "learning_rate": 2e-06, "loss": 0.2132, "step": 2680 }, { "epoch": 1.0, "grad_norm": 1.1600266695022583, "learning_rate": 2e-06, "loss": 0.1978, "step": 2681 }, { "epoch": 1.000372995151063, "grad_norm": 1.0447443723678589, "learning_rate": 2e-06, "loss": 0.1815, "step": 2682 }, { "epoch": 1.000745990302126, "grad_norm": 1.097954273223877, "learning_rate": 2e-06, "loss": 0.1683, "step": 2683 }, { "epoch": 1.001118985453189, "grad_norm": 0.9442607760429382, "learning_rate": 2e-06, "loss": 0.1729, "step": 2684 }, { "epoch": 1.0014919806042522, "grad_norm": 0.7438101768493652, "learning_rate": 2e-06, "loss": 0.1724, "step": 2685 }, { "epoch": 1.001864975755315, "grad_norm": 1.1796773672103882, "learning_rate": 2e-06, "loss": 0.1997, "step": 2686 }, { "epoch": 1.0022379709063782, "grad_norm": 1.198225498199463, "learning_rate": 2e-06, "loss": 0.1906, "step": 2687 }, { "epoch": 1.0026109660574412, "grad_norm": 1.1352828741073608, "learning_rate": 2e-06, "loss": 0.1984, "step": 2688 }, { "epoch": 1.0029839612085043, "grad_norm": 0.967483401298523, "learning_rate": 2e-06, "loss": 0.2043, "step": 2689 }, { "epoch": 1.0033569563595672, "grad_norm": 0.6942856907844543, "learning_rate": 2e-06, "loss": 0.1865, "step": 2690 }, { "epoch": 1.0037299515106304, "grad_norm": 1.2396790981292725, "learning_rate": 2e-06, "loss": 0.1546, "step": 2691 }, { "epoch": 1.0041029466616933, "grad_norm": 1.025323510169983, "learning_rate": 2e-06, "loss": 0.1593, "step": 2692 }, { "epoch": 1.0044759418127565, "grad_norm": 1.27430260181427, "learning_rate": 2e-06, "loss": 0.1563, "step": 2693 }, { "epoch": 1.0048489369638194, "grad_norm": 0.9603986144065857, "learning_rate": 2e-06, "loss": 0.1703, "step": 2694 }, { "epoch": 1.0052219321148825, "grad_norm": 0.7308183312416077, "learning_rate": 2e-06, "loss": 0.18, "step": 2695 }, { "epoch": 1.0055949272659455, "grad_norm": 0.8312178254127502, "learning_rate": 2e-06, "loss": 0.1717, "step": 2696 }, { "epoch": 1.0059679224170086, "grad_norm": 0.9816367030143738, "learning_rate": 2e-06, "loss": 0.1744, "step": 2697 }, { "epoch": 1.0063409175680715, "grad_norm": 0.9169459342956543, "learning_rate": 2e-06, "loss": 0.2004, "step": 2698 }, { "epoch": 1.0067139127191347, "grad_norm": 0.931804895401001, "learning_rate": 2e-06, "loss": 0.198, "step": 2699 }, { "epoch": 1.0070869078701976, "grad_norm": 2.388424873352051, "learning_rate": 2e-06, "loss": 0.1945, "step": 2700 }, { "epoch": 1.0074599030212608, "grad_norm": 0.856780469417572, "learning_rate": 2e-06, "loss": 0.1853, "step": 2701 }, { "epoch": 1.0078328981723237, "grad_norm": 0.8598095178604126, "learning_rate": 2e-06, "loss": 0.1566, "step": 2702 }, { "epoch": 1.0082058933233868, "grad_norm": 0.9442662596702576, "learning_rate": 2e-06, "loss": 0.1674, "step": 2703 }, { "epoch": 1.0085788884744498, "grad_norm": 0.7884081602096558, "learning_rate": 2e-06, "loss": 0.1612, "step": 2704 }, { "epoch": 1.008951883625513, "grad_norm": 0.8441736698150635, "learning_rate": 2e-06, "loss": 0.1724, "step": 2705 }, { "epoch": 1.0093248787765758, "grad_norm": 0.7050710320472717, "learning_rate": 2e-06, "loss": 0.2081, "step": 2706 }, { "epoch": 1.009697873927639, "grad_norm": 0.9839222431182861, "learning_rate": 2e-06, "loss": 0.1941, "step": 2707 }, { "epoch": 1.010070869078702, "grad_norm": 0.8034447431564331, "learning_rate": 2e-06, "loss": 0.179, "step": 2708 }, { "epoch": 1.010443864229765, "grad_norm": 0.8290744423866272, "learning_rate": 2e-06, "loss": 0.1818, "step": 2709 }, { "epoch": 1.010816859380828, "grad_norm": 0.7609930634498596, "learning_rate": 2e-06, "loss": 0.173, "step": 2710 }, { "epoch": 1.0111898545318911, "grad_norm": 0.8000243902206421, "learning_rate": 2e-06, "loss": 0.1733, "step": 2711 }, { "epoch": 1.011562849682954, "grad_norm": 0.8761548399925232, "learning_rate": 2e-06, "loss": 0.1767, "step": 2712 }, { "epoch": 1.0119358448340172, "grad_norm": 0.9136971831321716, "learning_rate": 2e-06, "loss": 0.214, "step": 2713 }, { "epoch": 1.0123088399850801, "grad_norm": 1.1395328044891357, "learning_rate": 2e-06, "loss": 0.1711, "step": 2714 }, { "epoch": 1.0126818351361433, "grad_norm": 1.085076093673706, "learning_rate": 2e-06, "loss": 0.1851, "step": 2715 }, { "epoch": 1.0130548302872062, "grad_norm": 0.7084498405456543, "learning_rate": 2e-06, "loss": 0.1864, "step": 2716 }, { "epoch": 1.0134278254382694, "grad_norm": 0.9580311179161072, "learning_rate": 2e-06, "loss": 0.165, "step": 2717 }, { "epoch": 1.0138008205893323, "grad_norm": 1.0756787061691284, "learning_rate": 2e-06, "loss": 0.1947, "step": 2718 }, { "epoch": 1.0141738157403954, "grad_norm": 0.8770915865898132, "learning_rate": 2e-06, "loss": 0.1717, "step": 2719 }, { "epoch": 1.0145468108914584, "grad_norm": 0.8339223861694336, "learning_rate": 2e-06, "loss": 0.185, "step": 2720 }, { "epoch": 1.0149198060425215, "grad_norm": 1.1434849500656128, "learning_rate": 2e-06, "loss": 0.1463, "step": 2721 }, { "epoch": 1.0152928011935844, "grad_norm": 0.925717830657959, "learning_rate": 2e-06, "loss": 0.1806, "step": 2722 }, { "epoch": 1.0156657963446476, "grad_norm": 0.873471200466156, "learning_rate": 2e-06, "loss": 0.1882, "step": 2723 }, { "epoch": 1.0160387914957105, "grad_norm": 0.74427330493927, "learning_rate": 2e-06, "loss": 0.1646, "step": 2724 }, { "epoch": 1.0164117866467737, "grad_norm": 0.9794034361839294, "learning_rate": 2e-06, "loss": 0.1846, "step": 2725 }, { "epoch": 1.0167847817978366, "grad_norm": 0.6818392276763916, "learning_rate": 2e-06, "loss": 0.1793, "step": 2726 }, { "epoch": 1.0171577769488998, "grad_norm": 0.9442707300186157, "learning_rate": 2e-06, "loss": 0.1719, "step": 2727 }, { "epoch": 1.0175307720999627, "grad_norm": 1.2070300579071045, "learning_rate": 2e-06, "loss": 0.1853, "step": 2728 }, { "epoch": 1.0179037672510258, "grad_norm": 0.8791511654853821, "learning_rate": 2e-06, "loss": 0.1651, "step": 2729 }, { "epoch": 1.0182767624020888, "grad_norm": 0.6559994220733643, "learning_rate": 2e-06, "loss": 0.2139, "step": 2730 }, { "epoch": 1.018649757553152, "grad_norm": 0.9310471415519714, "learning_rate": 2e-06, "loss": 0.1754, "step": 2731 }, { "epoch": 1.0190227527042148, "grad_norm": 0.7153603434562683, "learning_rate": 2e-06, "loss": 0.1544, "step": 2732 }, { "epoch": 1.019395747855278, "grad_norm": 1.0573346614837646, "learning_rate": 2e-06, "loss": 0.2107, "step": 2733 }, { "epoch": 1.019768743006341, "grad_norm": 0.9007905125617981, "learning_rate": 2e-06, "loss": 0.1801, "step": 2734 }, { "epoch": 1.020141738157404, "grad_norm": 0.7480244636535645, "learning_rate": 2e-06, "loss": 0.1817, "step": 2735 }, { "epoch": 1.020514733308467, "grad_norm": 0.9727272987365723, "learning_rate": 2e-06, "loss": 0.1814, "step": 2736 }, { "epoch": 1.0208877284595301, "grad_norm": 1.053868293762207, "learning_rate": 2e-06, "loss": 0.1529, "step": 2737 }, { "epoch": 1.021260723610593, "grad_norm": 0.9163157939910889, "learning_rate": 2e-06, "loss": 0.1969, "step": 2738 }, { "epoch": 1.0216337187616562, "grad_norm": 0.8282973170280457, "learning_rate": 2e-06, "loss": 0.1617, "step": 2739 }, { "epoch": 1.0220067139127191, "grad_norm": 0.7834814190864563, "learning_rate": 2e-06, "loss": 0.1561, "step": 2740 }, { "epoch": 1.0223797090637823, "grad_norm": 0.6656597256660461, "learning_rate": 2e-06, "loss": 0.2023, "step": 2741 }, { "epoch": 1.0227527042148452, "grad_norm": 0.7474796772003174, "learning_rate": 2e-06, "loss": 0.1629, "step": 2742 }, { "epoch": 1.0231256993659081, "grad_norm": 0.7348130941390991, "learning_rate": 2e-06, "loss": 0.1786, "step": 2743 }, { "epoch": 1.0234986945169713, "grad_norm": 0.7941016554832458, "learning_rate": 2e-06, "loss": 0.1692, "step": 2744 }, { "epoch": 1.0238716896680342, "grad_norm": 1.0550957918167114, "learning_rate": 2e-06, "loss": 0.1758, "step": 2745 }, { "epoch": 1.0242446848190974, "grad_norm": 0.7649113535881042, "learning_rate": 2e-06, "loss": 0.1744, "step": 2746 }, { "epoch": 1.0246176799701603, "grad_norm": 1.1162025928497314, "learning_rate": 2e-06, "loss": 0.1745, "step": 2747 }, { "epoch": 1.0249906751212234, "grad_norm": 0.810795783996582, "learning_rate": 2e-06, "loss": 0.1734, "step": 2748 }, { "epoch": 1.0253636702722864, "grad_norm": 1.1281132698059082, "learning_rate": 2e-06, "loss": 0.1684, "step": 2749 }, { "epoch": 1.0257366654233495, "grad_norm": 0.8706809878349304, "learning_rate": 2e-06, "loss": 0.1688, "step": 2750 }, { "epoch": 1.0261096605744124, "grad_norm": 0.7799490094184875, "learning_rate": 2e-06, "loss": 0.1789, "step": 2751 }, { "epoch": 1.0264826557254756, "grad_norm": 0.835249662399292, "learning_rate": 2e-06, "loss": 0.1705, "step": 2752 }, { "epoch": 1.0268556508765385, "grad_norm": 0.8919861316680908, "learning_rate": 2e-06, "loss": 0.1846, "step": 2753 }, { "epoch": 1.0272286460276017, "grad_norm": 0.8469378352165222, "learning_rate": 2e-06, "loss": 0.1833, "step": 2754 }, { "epoch": 1.0276016411786646, "grad_norm": 0.8274410367012024, "learning_rate": 2e-06, "loss": 0.1628, "step": 2755 }, { "epoch": 1.0279746363297277, "grad_norm": 1.0846909284591675, "learning_rate": 2e-06, "loss": 0.1833, "step": 2756 }, { "epoch": 1.0283476314807907, "grad_norm": 1.0431996583938599, "learning_rate": 2e-06, "loss": 0.1878, "step": 2757 }, { "epoch": 1.0287206266318538, "grad_norm": 0.8321583271026611, "learning_rate": 2e-06, "loss": 0.1844, "step": 2758 }, { "epoch": 1.0290936217829167, "grad_norm": 0.7139904499053955, "learning_rate": 2e-06, "loss": 0.199, "step": 2759 }, { "epoch": 1.02946661693398, "grad_norm": 0.7880525588989258, "learning_rate": 2e-06, "loss": 0.1762, "step": 2760 }, { "epoch": 1.0298396120850428, "grad_norm": 0.8176659345626831, "learning_rate": 2e-06, "loss": 0.1518, "step": 2761 }, { "epoch": 1.030212607236106, "grad_norm": 0.7474918365478516, "learning_rate": 2e-06, "loss": 0.1884, "step": 2762 }, { "epoch": 1.030585602387169, "grad_norm": 0.8250299692153931, "learning_rate": 2e-06, "loss": 0.1598, "step": 2763 }, { "epoch": 1.030958597538232, "grad_norm": 0.7233309745788574, "learning_rate": 2e-06, "loss": 0.1644, "step": 2764 }, { "epoch": 1.031331592689295, "grad_norm": 0.809908926486969, "learning_rate": 2e-06, "loss": 0.1797, "step": 2765 }, { "epoch": 1.0317045878403581, "grad_norm": 1.2023403644561768, "learning_rate": 2e-06, "loss": 0.1963, "step": 2766 }, { "epoch": 1.032077582991421, "grad_norm": 0.9969161152839661, "learning_rate": 2e-06, "loss": 0.1709, "step": 2767 }, { "epoch": 1.0324505781424842, "grad_norm": 1.0414427518844604, "learning_rate": 2e-06, "loss": 0.1622, "step": 2768 }, { "epoch": 1.0328235732935471, "grad_norm": 0.8285816311836243, "learning_rate": 2e-06, "loss": 0.1774, "step": 2769 }, { "epoch": 1.0331965684446103, "grad_norm": 0.7561730742454529, "learning_rate": 2e-06, "loss": 0.1752, "step": 2770 }, { "epoch": 1.0335695635956732, "grad_norm": 0.7270819544792175, "learning_rate": 2e-06, "loss": 0.1739, "step": 2771 }, { "epoch": 1.0339425587467364, "grad_norm": 0.8863081336021423, "learning_rate": 2e-06, "loss": 0.1999, "step": 2772 }, { "epoch": 1.0343155538977993, "grad_norm": 1.2154971361160278, "learning_rate": 2e-06, "loss": 0.1828, "step": 2773 }, { "epoch": 1.0346885490488624, "grad_norm": 0.7708361744880676, "learning_rate": 2e-06, "loss": 0.2048, "step": 2774 }, { "epoch": 1.0350615441999254, "grad_norm": 0.7586668133735657, "learning_rate": 2e-06, "loss": 0.1942, "step": 2775 }, { "epoch": 1.0354345393509885, "grad_norm": 0.9584177732467651, "learning_rate": 2e-06, "loss": 0.1887, "step": 2776 }, { "epoch": 1.0358075345020514, "grad_norm": 1.0107324123382568, "learning_rate": 2e-06, "loss": 0.1656, "step": 2777 }, { "epoch": 1.0361805296531146, "grad_norm": 0.683355987071991, "learning_rate": 2e-06, "loss": 0.1714, "step": 2778 }, { "epoch": 1.0365535248041775, "grad_norm": 0.8478438258171082, "learning_rate": 2e-06, "loss": 0.1979, "step": 2779 }, { "epoch": 1.0369265199552407, "grad_norm": 0.8257111310958862, "learning_rate": 2e-06, "loss": 0.19, "step": 2780 }, { "epoch": 1.0372995151063036, "grad_norm": 0.8592674732208252, "learning_rate": 2e-06, "loss": 0.1588, "step": 2781 }, { "epoch": 1.0376725102573667, "grad_norm": 0.8576211333274841, "learning_rate": 2e-06, "loss": 0.1807, "step": 2782 }, { "epoch": 1.0380455054084297, "grad_norm": 1.0251588821411133, "learning_rate": 2e-06, "loss": 0.1749, "step": 2783 }, { "epoch": 1.0384185005594928, "grad_norm": 0.8940705060958862, "learning_rate": 2e-06, "loss": 0.1715, "step": 2784 }, { "epoch": 1.0387914957105557, "grad_norm": 0.8707239031791687, "learning_rate": 2e-06, "loss": 0.1891, "step": 2785 }, { "epoch": 1.0391644908616189, "grad_norm": 0.9826474189758301, "learning_rate": 2e-06, "loss": 0.2028, "step": 2786 }, { "epoch": 1.0395374860126818, "grad_norm": 0.8269293904304504, "learning_rate": 2e-06, "loss": 0.1758, "step": 2787 }, { "epoch": 1.039910481163745, "grad_norm": 0.7608094811439514, "learning_rate": 2e-06, "loss": 0.1362, "step": 2788 }, { "epoch": 1.0402834763148079, "grad_norm": 0.8680043816566467, "learning_rate": 2e-06, "loss": 0.1715, "step": 2789 }, { "epoch": 1.040656471465871, "grad_norm": 1.1029447317123413, "learning_rate": 2e-06, "loss": 0.1771, "step": 2790 }, { "epoch": 1.041029466616934, "grad_norm": 0.8991516828536987, "learning_rate": 2e-06, "loss": 0.1528, "step": 2791 }, { "epoch": 1.0414024617679971, "grad_norm": 1.3283387422561646, "learning_rate": 2e-06, "loss": 0.192, "step": 2792 }, { "epoch": 1.04177545691906, "grad_norm": 1.133072853088379, "learning_rate": 2e-06, "loss": 0.182, "step": 2793 }, { "epoch": 1.0421484520701232, "grad_norm": 0.9449373483657837, "learning_rate": 2e-06, "loss": 0.1728, "step": 2794 }, { "epoch": 1.0425214472211861, "grad_norm": 1.0297455787658691, "learning_rate": 2e-06, "loss": 0.1609, "step": 2795 }, { "epoch": 1.0428944423722493, "grad_norm": 0.7776621580123901, "learning_rate": 2e-06, "loss": 0.1935, "step": 2796 }, { "epoch": 1.0432674375233122, "grad_norm": 0.8139644265174866, "learning_rate": 2e-06, "loss": 0.1818, "step": 2797 }, { "epoch": 1.0436404326743753, "grad_norm": 0.7957473993301392, "learning_rate": 2e-06, "loss": 0.2014, "step": 2798 }, { "epoch": 1.0440134278254383, "grad_norm": 0.8170952796936035, "learning_rate": 2e-06, "loss": 0.1896, "step": 2799 }, { "epoch": 1.0443864229765012, "grad_norm": 0.9969373941421509, "learning_rate": 2e-06, "loss": 0.1587, "step": 2800 }, { "epoch": 1.0447594181275643, "grad_norm": 0.906288743019104, "learning_rate": 2e-06, "loss": 0.195, "step": 2801 }, { "epoch": 1.0451324132786275, "grad_norm": 0.7835710644721985, "learning_rate": 2e-06, "loss": 0.163, "step": 2802 }, { "epoch": 1.0455054084296904, "grad_norm": 0.8455098271369934, "learning_rate": 2e-06, "loss": 0.1914, "step": 2803 }, { "epoch": 1.0458784035807533, "grad_norm": 0.8388649821281433, "learning_rate": 2e-06, "loss": 0.1858, "step": 2804 }, { "epoch": 1.0462513987318165, "grad_norm": 0.7097242474555969, "learning_rate": 2e-06, "loss": 0.1679, "step": 2805 }, { "epoch": 1.0466243938828794, "grad_norm": 0.847710132598877, "learning_rate": 2e-06, "loss": 0.1547, "step": 2806 }, { "epoch": 1.0469973890339426, "grad_norm": 0.7290799617767334, "learning_rate": 2e-06, "loss": 0.1789, "step": 2807 }, { "epoch": 1.0473703841850055, "grad_norm": 0.8581903576850891, "learning_rate": 2e-06, "loss": 0.1685, "step": 2808 }, { "epoch": 1.0477433793360686, "grad_norm": 0.8401177525520325, "learning_rate": 2e-06, "loss": 0.1564, "step": 2809 }, { "epoch": 1.0481163744871316, "grad_norm": 0.9025365710258484, "learning_rate": 2e-06, "loss": 0.1623, "step": 2810 }, { "epoch": 1.0484893696381947, "grad_norm": 1.023821473121643, "learning_rate": 2e-06, "loss": 0.177, "step": 2811 }, { "epoch": 1.0488623647892577, "grad_norm": 0.7299588918685913, "learning_rate": 2e-06, "loss": 0.1912, "step": 2812 }, { "epoch": 1.0492353599403208, "grad_norm": 0.9720585346221924, "learning_rate": 2e-06, "loss": 0.1898, "step": 2813 }, { "epoch": 1.0496083550913837, "grad_norm": 0.7704899907112122, "learning_rate": 2e-06, "loss": 0.1798, "step": 2814 }, { "epoch": 1.0499813502424469, "grad_norm": 0.7225067615509033, "learning_rate": 2e-06, "loss": 0.1725, "step": 2815 }, { "epoch": 1.0503543453935098, "grad_norm": 0.8607799410820007, "learning_rate": 2e-06, "loss": 0.1675, "step": 2816 }, { "epoch": 1.050727340544573, "grad_norm": 1.3161778450012207, "learning_rate": 2e-06, "loss": 0.1823, "step": 2817 }, { "epoch": 1.0511003356956359, "grad_norm": 0.9690086245536804, "learning_rate": 2e-06, "loss": 0.1806, "step": 2818 }, { "epoch": 1.051473330846699, "grad_norm": 0.840670645236969, "learning_rate": 2e-06, "loss": 0.184, "step": 2819 }, { "epoch": 1.051846325997762, "grad_norm": 0.8303834795951843, "learning_rate": 2e-06, "loss": 0.1681, "step": 2820 }, { "epoch": 1.052219321148825, "grad_norm": 0.957219660282135, "learning_rate": 2e-06, "loss": 0.1749, "step": 2821 }, { "epoch": 1.052592316299888, "grad_norm": 0.7577560544013977, "learning_rate": 2e-06, "loss": 0.1775, "step": 2822 }, { "epoch": 1.0529653114509512, "grad_norm": 0.7460768818855286, "learning_rate": 2e-06, "loss": 0.1866, "step": 2823 }, { "epoch": 1.053338306602014, "grad_norm": 1.096859097480774, "learning_rate": 2e-06, "loss": 0.1875, "step": 2824 }, { "epoch": 1.0537113017530773, "grad_norm": 0.997310996055603, "learning_rate": 2e-06, "loss": 0.1936, "step": 2825 }, { "epoch": 1.0540842969041402, "grad_norm": 1.0109567642211914, "learning_rate": 2e-06, "loss": 0.1698, "step": 2826 }, { "epoch": 1.0544572920552033, "grad_norm": 0.8871678709983826, "learning_rate": 2e-06, "loss": 0.1903, "step": 2827 }, { "epoch": 1.0548302872062663, "grad_norm": 0.7609587907791138, "learning_rate": 2e-06, "loss": 0.1991, "step": 2828 }, { "epoch": 1.0552032823573294, "grad_norm": 0.8347757458686829, "learning_rate": 2e-06, "loss": 0.1803, "step": 2829 }, { "epoch": 1.0555762775083923, "grad_norm": 0.8690023422241211, "learning_rate": 2e-06, "loss": 0.1686, "step": 2830 }, { "epoch": 1.0559492726594555, "grad_norm": 0.8629107475280762, "learning_rate": 2e-06, "loss": 0.2154, "step": 2831 }, { "epoch": 1.0563222678105184, "grad_norm": 0.9058822393417358, "learning_rate": 2e-06, "loss": 0.1817, "step": 2832 }, { "epoch": 1.0566952629615816, "grad_norm": 0.7954281568527222, "learning_rate": 2e-06, "loss": 0.1917, "step": 2833 }, { "epoch": 1.0570682581126445, "grad_norm": 0.9204204082489014, "learning_rate": 2e-06, "loss": 0.1856, "step": 2834 }, { "epoch": 1.0574412532637076, "grad_norm": 0.983889102935791, "learning_rate": 2e-06, "loss": 0.1931, "step": 2835 }, { "epoch": 1.0578142484147706, "grad_norm": 0.9389910697937012, "learning_rate": 2e-06, "loss": 0.1805, "step": 2836 }, { "epoch": 1.0581872435658337, "grad_norm": 1.012071967124939, "learning_rate": 2e-06, "loss": 0.1857, "step": 2837 }, { "epoch": 1.0585602387168966, "grad_norm": 0.7518911361694336, "learning_rate": 2e-06, "loss": 0.1758, "step": 2838 }, { "epoch": 1.0589332338679598, "grad_norm": 1.064497947692871, "learning_rate": 2e-06, "loss": 0.1924, "step": 2839 }, { "epoch": 1.0593062290190227, "grad_norm": 0.708740770816803, "learning_rate": 2e-06, "loss": 0.1907, "step": 2840 }, { "epoch": 1.0596792241700859, "grad_norm": 1.0248111486434937, "learning_rate": 2e-06, "loss": 0.1612, "step": 2841 }, { "epoch": 1.0600522193211488, "grad_norm": 0.824824333190918, "learning_rate": 2e-06, "loss": 0.1985, "step": 2842 }, { "epoch": 1.060425214472212, "grad_norm": 0.8297292590141296, "learning_rate": 2e-06, "loss": 0.1702, "step": 2843 }, { "epoch": 1.0607982096232749, "grad_norm": 0.838735044002533, "learning_rate": 2e-06, "loss": 0.1891, "step": 2844 }, { "epoch": 1.061171204774338, "grad_norm": 0.8866360783576965, "learning_rate": 2e-06, "loss": 0.1836, "step": 2845 }, { "epoch": 1.061544199925401, "grad_norm": 0.8161541223526001, "learning_rate": 2e-06, "loss": 0.187, "step": 2846 }, { "epoch": 1.061917195076464, "grad_norm": 0.8858712315559387, "learning_rate": 2e-06, "loss": 0.174, "step": 2847 }, { "epoch": 1.062290190227527, "grad_norm": 1.765224814414978, "learning_rate": 2e-06, "loss": 0.1921, "step": 2848 }, { "epoch": 1.0626631853785902, "grad_norm": 0.7505460977554321, "learning_rate": 2e-06, "loss": 0.1714, "step": 2849 }, { "epoch": 1.063036180529653, "grad_norm": 0.921471118927002, "learning_rate": 2e-06, "loss": 0.1787, "step": 2850 }, { "epoch": 1.0634091756807162, "grad_norm": 0.8976722359657288, "learning_rate": 2e-06, "loss": 0.1744, "step": 2851 }, { "epoch": 1.0637821708317792, "grad_norm": 0.6925411224365234, "learning_rate": 2e-06, "loss": 0.1704, "step": 2852 }, { "epoch": 1.0641551659828423, "grad_norm": 0.9396781325340271, "learning_rate": 2e-06, "loss": 0.1753, "step": 2853 }, { "epoch": 1.0645281611339052, "grad_norm": 0.7130155563354492, "learning_rate": 2e-06, "loss": 0.189, "step": 2854 }, { "epoch": 1.0649011562849684, "grad_norm": 0.9529948234558105, "learning_rate": 2e-06, "loss": 0.1843, "step": 2855 }, { "epoch": 1.0652741514360313, "grad_norm": 0.8656014800071716, "learning_rate": 2e-06, "loss": 0.1545, "step": 2856 }, { "epoch": 1.0656471465870943, "grad_norm": 0.9576983451843262, "learning_rate": 2e-06, "loss": 0.1847, "step": 2857 }, { "epoch": 1.0660201417381574, "grad_norm": 0.8504987955093384, "learning_rate": 2e-06, "loss": 0.1717, "step": 2858 }, { "epoch": 1.0663931368892205, "grad_norm": 0.8877257704734802, "learning_rate": 2e-06, "loss": 0.1713, "step": 2859 }, { "epoch": 1.0667661320402835, "grad_norm": 0.8906508088111877, "learning_rate": 2e-06, "loss": 0.1885, "step": 2860 }, { "epoch": 1.0671391271913464, "grad_norm": 0.8110640645027161, "learning_rate": 2e-06, "loss": 0.194, "step": 2861 }, { "epoch": 1.0675121223424096, "grad_norm": 0.790050745010376, "learning_rate": 2e-06, "loss": 0.1811, "step": 2862 }, { "epoch": 1.0678851174934727, "grad_norm": 1.0702248811721802, "learning_rate": 2e-06, "loss": 0.1654, "step": 2863 }, { "epoch": 1.0682581126445356, "grad_norm": 0.9709312915802002, "learning_rate": 2e-06, "loss": 0.1593, "step": 2864 }, { "epoch": 1.0686311077955986, "grad_norm": 0.8817199468612671, "learning_rate": 2e-06, "loss": 0.1573, "step": 2865 }, { "epoch": 1.0690041029466617, "grad_norm": 0.8344977498054504, "learning_rate": 2e-06, "loss": 0.1786, "step": 2866 }, { "epoch": 1.0693770980977246, "grad_norm": 0.7899600863456726, "learning_rate": 2e-06, "loss": 0.179, "step": 2867 }, { "epoch": 1.0697500932487878, "grad_norm": 0.8667052388191223, "learning_rate": 2e-06, "loss": 0.1845, "step": 2868 }, { "epoch": 1.0701230883998507, "grad_norm": 0.7714499831199646, "learning_rate": 2e-06, "loss": 0.1551, "step": 2869 }, { "epoch": 1.0704960835509139, "grad_norm": 0.7883120775222778, "learning_rate": 2e-06, "loss": 0.1674, "step": 2870 }, { "epoch": 1.0708690787019768, "grad_norm": 1.0375494956970215, "learning_rate": 2e-06, "loss": 0.1885, "step": 2871 }, { "epoch": 1.07124207385304, "grad_norm": 0.8083270788192749, "learning_rate": 2e-06, "loss": 0.1808, "step": 2872 }, { "epoch": 1.0716150690041029, "grad_norm": 0.806652307510376, "learning_rate": 2e-06, "loss": 0.1815, "step": 2873 }, { "epoch": 1.071988064155166, "grad_norm": 0.9385865926742554, "learning_rate": 2e-06, "loss": 0.1951, "step": 2874 }, { "epoch": 1.072361059306229, "grad_norm": 0.7775506973266602, "learning_rate": 2e-06, "loss": 0.1428, "step": 2875 }, { "epoch": 1.072734054457292, "grad_norm": 0.7739818096160889, "learning_rate": 2e-06, "loss": 0.1947, "step": 2876 }, { "epoch": 1.073107049608355, "grad_norm": 1.051674246788025, "learning_rate": 2e-06, "loss": 0.1671, "step": 2877 }, { "epoch": 1.0734800447594182, "grad_norm": 0.7527362108230591, "learning_rate": 2e-06, "loss": 0.1681, "step": 2878 }, { "epoch": 1.073853039910481, "grad_norm": 0.9900997877120972, "learning_rate": 2e-06, "loss": 0.1529, "step": 2879 }, { "epoch": 1.0742260350615442, "grad_norm": 0.9112406969070435, "learning_rate": 2e-06, "loss": 0.1613, "step": 2880 }, { "epoch": 1.0745990302126072, "grad_norm": 1.1524943113327026, "learning_rate": 2e-06, "loss": 0.1831, "step": 2881 }, { "epoch": 1.0749720253636703, "grad_norm": 6.8672261238098145, "learning_rate": 2e-06, "loss": 0.2024, "step": 2882 }, { "epoch": 1.0753450205147332, "grad_norm": 0.999470591545105, "learning_rate": 2e-06, "loss": 0.173, "step": 2883 }, { "epoch": 1.0757180156657964, "grad_norm": 0.8699063062667847, "learning_rate": 2e-06, "loss": 0.1607, "step": 2884 }, { "epoch": 1.0760910108168593, "grad_norm": 0.7059068083763123, "learning_rate": 2e-06, "loss": 0.1925, "step": 2885 }, { "epoch": 1.0764640059679225, "grad_norm": 0.8551477193832397, "learning_rate": 2e-06, "loss": 0.1768, "step": 2886 }, { "epoch": 1.0768370011189854, "grad_norm": 0.8209019303321838, "learning_rate": 2e-06, "loss": 0.1957, "step": 2887 }, { "epoch": 1.0772099962700485, "grad_norm": 0.9296057820320129, "learning_rate": 2e-06, "loss": 0.1721, "step": 2888 }, { "epoch": 1.0775829914211115, "grad_norm": 0.8678487539291382, "learning_rate": 2e-06, "loss": 0.1706, "step": 2889 }, { "epoch": 1.0779559865721746, "grad_norm": 0.8345114588737488, "learning_rate": 2e-06, "loss": 0.2143, "step": 2890 }, { "epoch": 1.0783289817232375, "grad_norm": 0.6958304047584534, "learning_rate": 2e-06, "loss": 0.1827, "step": 2891 }, { "epoch": 1.0787019768743007, "grad_norm": 0.7273594737052917, "learning_rate": 2e-06, "loss": 0.1772, "step": 2892 }, { "epoch": 1.0790749720253636, "grad_norm": 0.8090475797653198, "learning_rate": 2e-06, "loss": 0.1818, "step": 2893 }, { "epoch": 1.0794479671764268, "grad_norm": 1.0084630250930786, "learning_rate": 2e-06, "loss": 0.1732, "step": 2894 }, { "epoch": 1.0798209623274897, "grad_norm": 0.7793363928794861, "learning_rate": 2e-06, "loss": 0.1983, "step": 2895 }, { "epoch": 1.0801939574785528, "grad_norm": 0.8659668564796448, "learning_rate": 2e-06, "loss": 0.1805, "step": 2896 }, { "epoch": 1.0805669526296158, "grad_norm": 0.8860588669776917, "learning_rate": 2e-06, "loss": 0.1699, "step": 2897 }, { "epoch": 1.080939947780679, "grad_norm": 0.8294172883033752, "learning_rate": 2e-06, "loss": 0.1714, "step": 2898 }, { "epoch": 1.0813129429317418, "grad_norm": 0.9888381958007812, "learning_rate": 2e-06, "loss": 0.1656, "step": 2899 }, { "epoch": 1.081685938082805, "grad_norm": 0.9827030897140503, "learning_rate": 2e-06, "loss": 0.2198, "step": 2900 }, { "epoch": 1.082058933233868, "grad_norm": 0.7244043350219727, "learning_rate": 2e-06, "loss": 0.1517, "step": 2901 }, { "epoch": 1.082431928384931, "grad_norm": 0.8366286158561707, "learning_rate": 2e-06, "loss": 0.1576, "step": 2902 }, { "epoch": 1.082804923535994, "grad_norm": 0.7045345902442932, "learning_rate": 2e-06, "loss": 0.1889, "step": 2903 }, { "epoch": 1.0831779186870572, "grad_norm": 0.9545950293540955, "learning_rate": 2e-06, "loss": 0.1813, "step": 2904 }, { "epoch": 1.08355091383812, "grad_norm": 0.8982693552970886, "learning_rate": 2e-06, "loss": 0.1834, "step": 2905 }, { "epoch": 1.0839239089891832, "grad_norm": 0.9285375475883484, "learning_rate": 2e-06, "loss": 0.1701, "step": 2906 }, { "epoch": 1.0842969041402462, "grad_norm": 0.7864367961883545, "learning_rate": 2e-06, "loss": 0.1906, "step": 2907 }, { "epoch": 1.0846698992913093, "grad_norm": 0.6869208812713623, "learning_rate": 2e-06, "loss": 0.1843, "step": 2908 }, { "epoch": 1.0850428944423722, "grad_norm": 0.915077805519104, "learning_rate": 2e-06, "loss": 0.1597, "step": 2909 }, { "epoch": 1.0854158895934354, "grad_norm": 0.6596343517303467, "learning_rate": 2e-06, "loss": 0.1755, "step": 2910 }, { "epoch": 1.0857888847444983, "grad_norm": 0.940787136554718, "learning_rate": 2e-06, "loss": 0.1681, "step": 2911 }, { "epoch": 1.0861618798955615, "grad_norm": 1.0210473537445068, "learning_rate": 2e-06, "loss": 0.1772, "step": 2912 }, { "epoch": 1.0865348750466244, "grad_norm": 0.7446099519729614, "learning_rate": 2e-06, "loss": 0.1634, "step": 2913 }, { "epoch": 1.0869078701976875, "grad_norm": 0.9767152667045593, "learning_rate": 2e-06, "loss": 0.1787, "step": 2914 }, { "epoch": 1.0872808653487505, "grad_norm": 0.6213828325271606, "learning_rate": 2e-06, "loss": 0.1612, "step": 2915 }, { "epoch": 1.0876538604998136, "grad_norm": 0.9520675539970398, "learning_rate": 2e-06, "loss": 0.1895, "step": 2916 }, { "epoch": 1.0880268556508765, "grad_norm": 0.8916996121406555, "learning_rate": 2e-06, "loss": 0.1713, "step": 2917 }, { "epoch": 1.0883998508019395, "grad_norm": 0.914394199848175, "learning_rate": 2e-06, "loss": 0.1869, "step": 2918 }, { "epoch": 1.0887728459530026, "grad_norm": 0.6412103772163391, "learning_rate": 2e-06, "loss": 0.184, "step": 2919 }, { "epoch": 1.0891458411040658, "grad_norm": 0.9672562479972839, "learning_rate": 2e-06, "loss": 0.1868, "step": 2920 }, { "epoch": 1.0895188362551287, "grad_norm": 0.8313114047050476, "learning_rate": 2e-06, "loss": 0.1667, "step": 2921 }, { "epoch": 1.0898918314061916, "grad_norm": 0.9475269317626953, "learning_rate": 2e-06, "loss": 0.161, "step": 2922 }, { "epoch": 1.0902648265572548, "grad_norm": 0.8919236660003662, "learning_rate": 2e-06, "loss": 0.1859, "step": 2923 }, { "epoch": 1.0906378217083177, "grad_norm": 1.018736481666565, "learning_rate": 2e-06, "loss": 0.1886, "step": 2924 }, { "epoch": 1.0910108168593808, "grad_norm": 1.1543399095535278, "learning_rate": 2e-06, "loss": 0.1547, "step": 2925 }, { "epoch": 1.0913838120104438, "grad_norm": 1.2423560619354248, "learning_rate": 2e-06, "loss": 0.1403, "step": 2926 }, { "epoch": 1.091756807161507, "grad_norm": 0.8271656036376953, "learning_rate": 2e-06, "loss": 0.1875, "step": 2927 }, { "epoch": 1.0921298023125698, "grad_norm": 0.8620582222938538, "learning_rate": 2e-06, "loss": 0.1749, "step": 2928 }, { "epoch": 1.092502797463633, "grad_norm": 1.0529457330703735, "learning_rate": 2e-06, "loss": 0.1861, "step": 2929 }, { "epoch": 1.092875792614696, "grad_norm": 0.8754382729530334, "learning_rate": 2e-06, "loss": 0.1811, "step": 2930 }, { "epoch": 1.093248787765759, "grad_norm": 0.911868691444397, "learning_rate": 2e-06, "loss": 0.1662, "step": 2931 }, { "epoch": 1.093621782916822, "grad_norm": 0.8108081221580505, "learning_rate": 2e-06, "loss": 0.162, "step": 2932 }, { "epoch": 1.0939947780678851, "grad_norm": 0.8147796988487244, "learning_rate": 2e-06, "loss": 0.1774, "step": 2933 }, { "epoch": 1.094367773218948, "grad_norm": 0.9889827966690063, "learning_rate": 2e-06, "loss": 0.1806, "step": 2934 }, { "epoch": 1.0947407683700112, "grad_norm": 0.7028340697288513, "learning_rate": 2e-06, "loss": 0.197, "step": 2935 }, { "epoch": 1.0951137635210741, "grad_norm": 1.0248242616653442, "learning_rate": 2e-06, "loss": 0.1739, "step": 2936 }, { "epoch": 1.0954867586721373, "grad_norm": 0.758582353591919, "learning_rate": 2e-06, "loss": 0.1863, "step": 2937 }, { "epoch": 1.0958597538232002, "grad_norm": 0.8506664037704468, "learning_rate": 2e-06, "loss": 0.1713, "step": 2938 }, { "epoch": 1.0962327489742634, "grad_norm": 0.8815340399742126, "learning_rate": 2e-06, "loss": 0.1923, "step": 2939 }, { "epoch": 1.0966057441253263, "grad_norm": 0.742565393447876, "learning_rate": 2e-06, "loss": 0.1792, "step": 2940 }, { "epoch": 1.0969787392763894, "grad_norm": 1.0528573989868164, "learning_rate": 2e-06, "loss": 0.2032, "step": 2941 }, { "epoch": 1.0973517344274524, "grad_norm": 0.9578142762184143, "learning_rate": 2e-06, "loss": 0.184, "step": 2942 }, { "epoch": 1.0977247295785155, "grad_norm": 1.0098962783813477, "learning_rate": 2e-06, "loss": 0.2049, "step": 2943 }, { "epoch": 1.0980977247295785, "grad_norm": 0.7507339119911194, "learning_rate": 2e-06, "loss": 0.1844, "step": 2944 }, { "epoch": 1.0984707198806416, "grad_norm": 1.0438917875289917, "learning_rate": 2e-06, "loss": 0.1625, "step": 2945 }, { "epoch": 1.0988437150317045, "grad_norm": 1.102524995803833, "learning_rate": 2e-06, "loss": 0.184, "step": 2946 }, { "epoch": 1.0992167101827677, "grad_norm": 0.8745558857917786, "learning_rate": 2e-06, "loss": 0.1583, "step": 2947 }, { "epoch": 1.0995897053338306, "grad_norm": 1.0811736583709717, "learning_rate": 2e-06, "loss": 0.2005, "step": 2948 }, { "epoch": 1.0999627004848938, "grad_norm": 1.0287493467330933, "learning_rate": 2e-06, "loss": 0.1656, "step": 2949 }, { "epoch": 1.1003356956359567, "grad_norm": 1.7233316898345947, "learning_rate": 2e-06, "loss": 0.1551, "step": 2950 }, { "epoch": 1.1007086907870198, "grad_norm": 1.0097558498382568, "learning_rate": 2e-06, "loss": 0.1862, "step": 2951 }, { "epoch": 1.1010816859380828, "grad_norm": 1.232151746749878, "learning_rate": 2e-06, "loss": 0.1612, "step": 2952 }, { "epoch": 1.101454681089146, "grad_norm": 0.8488647937774658, "learning_rate": 2e-06, "loss": 0.172, "step": 2953 }, { "epoch": 1.1018276762402088, "grad_norm": 0.830190896987915, "learning_rate": 2e-06, "loss": 0.1613, "step": 2954 }, { "epoch": 1.102200671391272, "grad_norm": 0.8213499188423157, "learning_rate": 2e-06, "loss": 0.1612, "step": 2955 }, { "epoch": 1.102573666542335, "grad_norm": 0.9945517182350159, "learning_rate": 2e-06, "loss": 0.1745, "step": 2956 }, { "epoch": 1.102946661693398, "grad_norm": 0.6726274490356445, "learning_rate": 2e-06, "loss": 0.1447, "step": 2957 }, { "epoch": 1.103319656844461, "grad_norm": 0.9559417366981506, "learning_rate": 2e-06, "loss": 0.185, "step": 2958 }, { "epoch": 1.1036926519955241, "grad_norm": 0.772922694683075, "learning_rate": 2e-06, "loss": 0.1866, "step": 2959 }, { "epoch": 1.104065647146587, "grad_norm": 0.8583593368530273, "learning_rate": 2e-06, "loss": 0.201, "step": 2960 }, { "epoch": 1.1044386422976502, "grad_norm": 0.8175254464149475, "learning_rate": 2e-06, "loss": 0.1743, "step": 2961 }, { "epoch": 1.1048116374487131, "grad_norm": 0.9466366171836853, "learning_rate": 2e-06, "loss": 0.1634, "step": 2962 }, { "epoch": 1.1051846325997763, "grad_norm": 0.8782375454902649, "learning_rate": 2e-06, "loss": 0.198, "step": 2963 }, { "epoch": 1.1055576277508392, "grad_norm": 0.8751148581504822, "learning_rate": 2e-06, "loss": 0.161, "step": 2964 }, { "epoch": 1.1059306229019024, "grad_norm": 1.39255690574646, "learning_rate": 2e-06, "loss": 0.1602, "step": 2965 }, { "epoch": 1.1063036180529653, "grad_norm": 0.7585110068321228, "learning_rate": 2e-06, "loss": 0.1653, "step": 2966 }, { "epoch": 1.1066766132040284, "grad_norm": 1.0947906970977783, "learning_rate": 2e-06, "loss": 0.1933, "step": 2967 }, { "epoch": 1.1070496083550914, "grad_norm": 1.0268210172653198, "learning_rate": 2e-06, "loss": 0.1886, "step": 2968 }, { "epoch": 1.1074226035061545, "grad_norm": 0.7950237393379211, "learning_rate": 2e-06, "loss": 0.196, "step": 2969 }, { "epoch": 1.1077955986572174, "grad_norm": 0.7909383773803711, "learning_rate": 2e-06, "loss": 0.1748, "step": 2970 }, { "epoch": 1.1081685938082806, "grad_norm": 0.6887432336807251, "learning_rate": 2e-06, "loss": 0.1842, "step": 2971 }, { "epoch": 1.1085415889593435, "grad_norm": 0.7123523950576782, "learning_rate": 2e-06, "loss": 0.2062, "step": 2972 }, { "epoch": 1.1089145841104067, "grad_norm": 0.9726337790489197, "learning_rate": 2e-06, "loss": 0.1922, "step": 2973 }, { "epoch": 1.1092875792614696, "grad_norm": 0.9752889275550842, "learning_rate": 2e-06, "loss": 0.179, "step": 2974 }, { "epoch": 1.1096605744125327, "grad_norm": 0.9044820070266724, "learning_rate": 2e-06, "loss": 0.1913, "step": 2975 }, { "epoch": 1.1100335695635957, "grad_norm": 0.8654804825782776, "learning_rate": 2e-06, "loss": 0.1863, "step": 2976 }, { "epoch": 1.1104065647146588, "grad_norm": 0.8212589025497437, "learning_rate": 2e-06, "loss": 0.1941, "step": 2977 }, { "epoch": 1.1107795598657217, "grad_norm": 0.7234451174736023, "learning_rate": 2e-06, "loss": 0.1656, "step": 2978 }, { "epoch": 1.1111525550167847, "grad_norm": 0.9626159071922302, "learning_rate": 2e-06, "loss": 0.1947, "step": 2979 }, { "epoch": 1.1115255501678478, "grad_norm": 0.8479810357093811, "learning_rate": 2e-06, "loss": 0.2142, "step": 2980 }, { "epoch": 1.111898545318911, "grad_norm": 0.7669976353645325, "learning_rate": 2e-06, "loss": 0.1624, "step": 2981 }, { "epoch": 1.112271540469974, "grad_norm": 0.8073399662971497, "learning_rate": 2e-06, "loss": 0.178, "step": 2982 }, { "epoch": 1.1126445356210368, "grad_norm": 0.6607053279876709, "learning_rate": 2e-06, "loss": 0.1796, "step": 2983 }, { "epoch": 1.1130175307721, "grad_norm": 0.9592468738555908, "learning_rate": 2e-06, "loss": 0.191, "step": 2984 }, { "epoch": 1.113390525923163, "grad_norm": 0.8434776663780212, "learning_rate": 2e-06, "loss": 0.1922, "step": 2985 }, { "epoch": 1.113763521074226, "grad_norm": 0.8213046789169312, "learning_rate": 2e-06, "loss": 0.1555, "step": 2986 }, { "epoch": 1.114136516225289, "grad_norm": 0.6968788504600525, "learning_rate": 2e-06, "loss": 0.1913, "step": 2987 }, { "epoch": 1.1145095113763521, "grad_norm": 0.8527931571006775, "learning_rate": 2e-06, "loss": 0.1677, "step": 2988 }, { "epoch": 1.114882506527415, "grad_norm": 0.7234674692153931, "learning_rate": 2e-06, "loss": 0.1734, "step": 2989 }, { "epoch": 1.1152555016784782, "grad_norm": 0.8428654670715332, "learning_rate": 2e-06, "loss": 0.1887, "step": 2990 }, { "epoch": 1.1156284968295411, "grad_norm": 0.8374068140983582, "learning_rate": 2e-06, "loss": 0.1585, "step": 2991 }, { "epoch": 1.1160014919806043, "grad_norm": 0.7898584008216858, "learning_rate": 2e-06, "loss": 0.1895, "step": 2992 }, { "epoch": 1.1163744871316672, "grad_norm": 0.8857154846191406, "learning_rate": 2e-06, "loss": 0.1626, "step": 2993 }, { "epoch": 1.1167474822827304, "grad_norm": 0.8572154641151428, "learning_rate": 2e-06, "loss": 0.1779, "step": 2994 }, { "epoch": 1.1171204774337933, "grad_norm": 0.7891286611557007, "learning_rate": 2e-06, "loss": 0.185, "step": 2995 }, { "epoch": 1.1174934725848564, "grad_norm": 0.8262563347816467, "learning_rate": 2e-06, "loss": 0.1609, "step": 2996 }, { "epoch": 1.1178664677359194, "grad_norm": 0.8122099041938782, "learning_rate": 2e-06, "loss": 0.1753, "step": 2997 }, { "epoch": 1.1182394628869825, "grad_norm": 0.7761348485946655, "learning_rate": 2e-06, "loss": 0.1952, "step": 2998 }, { "epoch": 1.1186124580380454, "grad_norm": 0.7541782259941101, "learning_rate": 2e-06, "loss": 0.1888, "step": 2999 }, { "epoch": 1.1189854531891086, "grad_norm": 1.023330807685852, "learning_rate": 2e-06, "loss": 0.169, "step": 3000 }, { "epoch": 1.1193584483401715, "grad_norm": 0.9556139707565308, "learning_rate": 2e-06, "loss": 0.1587, "step": 3001 }, { "epoch": 1.1197314434912347, "grad_norm": 0.9055883884429932, "learning_rate": 2e-06, "loss": 0.178, "step": 3002 }, { "epoch": 1.1201044386422976, "grad_norm": 0.968643069267273, "learning_rate": 2e-06, "loss": 0.2118, "step": 3003 }, { "epoch": 1.1204774337933607, "grad_norm": 0.9255464673042297, "learning_rate": 2e-06, "loss": 0.1624, "step": 3004 }, { "epoch": 1.1208504289444237, "grad_norm": 0.9465776681900024, "learning_rate": 2e-06, "loss": 0.1901, "step": 3005 }, { "epoch": 1.1212234240954868, "grad_norm": 0.9010048508644104, "learning_rate": 2e-06, "loss": 0.1827, "step": 3006 }, { "epoch": 1.1215964192465497, "grad_norm": 0.7733981013298035, "learning_rate": 2e-06, "loss": 0.1916, "step": 3007 }, { "epoch": 1.1219694143976129, "grad_norm": 0.9129499793052673, "learning_rate": 2e-06, "loss": 0.175, "step": 3008 }, { "epoch": 1.1223424095486758, "grad_norm": 0.9282241463661194, "learning_rate": 2e-06, "loss": 0.201, "step": 3009 }, { "epoch": 1.122715404699739, "grad_norm": 0.7231472730636597, "learning_rate": 2e-06, "loss": 0.1819, "step": 3010 }, { "epoch": 1.1230883998508019, "grad_norm": 0.9672186374664307, "learning_rate": 2e-06, "loss": 0.1516, "step": 3011 }, { "epoch": 1.123461395001865, "grad_norm": 0.6962900757789612, "learning_rate": 2e-06, "loss": 0.1933, "step": 3012 }, { "epoch": 1.123834390152928, "grad_norm": 0.8213397264480591, "learning_rate": 2e-06, "loss": 0.1816, "step": 3013 }, { "epoch": 1.1242073853039911, "grad_norm": 0.9203565120697021, "learning_rate": 2e-06, "loss": 0.1653, "step": 3014 }, { "epoch": 1.124580380455054, "grad_norm": 0.8568668365478516, "learning_rate": 2e-06, "loss": 0.1835, "step": 3015 }, { "epoch": 1.1249533756061172, "grad_norm": 0.8275501728057861, "learning_rate": 2e-06, "loss": 0.1993, "step": 3016 }, { "epoch": 1.1253263707571801, "grad_norm": 0.8197013139724731, "learning_rate": 2e-06, "loss": 0.1805, "step": 3017 }, { "epoch": 1.1256993659082433, "grad_norm": 0.8844603896141052, "learning_rate": 2e-06, "loss": 0.1582, "step": 3018 }, { "epoch": 1.1260723610593062, "grad_norm": 0.8695966005325317, "learning_rate": 2e-06, "loss": 0.1892, "step": 3019 }, { "epoch": 1.1264453562103693, "grad_norm": 0.7089771628379822, "learning_rate": 2e-06, "loss": 0.1578, "step": 3020 }, { "epoch": 1.1268183513614323, "grad_norm": 0.7932804226875305, "learning_rate": 2e-06, "loss": 0.1915, "step": 3021 }, { "epoch": 1.1271913465124954, "grad_norm": 0.961810827255249, "learning_rate": 2e-06, "loss": 0.1472, "step": 3022 }, { "epoch": 1.1275643416635583, "grad_norm": 0.8838828206062317, "learning_rate": 2e-06, "loss": 0.158, "step": 3023 }, { "epoch": 1.1279373368146215, "grad_norm": 0.8694051504135132, "learning_rate": 2e-06, "loss": 0.1531, "step": 3024 }, { "epoch": 1.1283103319656844, "grad_norm": 0.7956530451774597, "learning_rate": 2e-06, "loss": 0.1411, "step": 3025 }, { "epoch": 1.1286833271167476, "grad_norm": 0.7906654477119446, "learning_rate": 2e-06, "loss": 0.2002, "step": 3026 }, { "epoch": 1.1290563222678105, "grad_norm": 0.9235507845878601, "learning_rate": 2e-06, "loss": 0.1883, "step": 3027 }, { "epoch": 1.1294293174188736, "grad_norm": 0.8902104496955872, "learning_rate": 2e-06, "loss": 0.1781, "step": 3028 }, { "epoch": 1.1298023125699366, "grad_norm": 0.8512759208679199, "learning_rate": 2e-06, "loss": 0.1824, "step": 3029 }, { "epoch": 1.1301753077209997, "grad_norm": 0.8360360860824585, "learning_rate": 2e-06, "loss": 0.1736, "step": 3030 }, { "epoch": 1.1305483028720626, "grad_norm": 0.9134873747825623, "learning_rate": 2e-06, "loss": 0.167, "step": 3031 }, { "epoch": 1.1309212980231256, "grad_norm": 0.797511637210846, "learning_rate": 2e-06, "loss": 0.202, "step": 3032 }, { "epoch": 1.1312942931741887, "grad_norm": 0.8937825560569763, "learning_rate": 2e-06, "loss": 0.1842, "step": 3033 }, { "epoch": 1.1316672883252519, "grad_norm": 0.7420101761817932, "learning_rate": 2e-06, "loss": 0.1803, "step": 3034 }, { "epoch": 1.1320402834763148, "grad_norm": 1.1556342840194702, "learning_rate": 2e-06, "loss": 0.183, "step": 3035 }, { "epoch": 1.1324132786273777, "grad_norm": 1.0903174877166748, "learning_rate": 2e-06, "loss": 0.1812, "step": 3036 }, { "epoch": 1.1327862737784409, "grad_norm": 0.7459410429000854, "learning_rate": 2e-06, "loss": 0.1836, "step": 3037 }, { "epoch": 1.133159268929504, "grad_norm": 0.7113562822341919, "learning_rate": 2e-06, "loss": 0.1907, "step": 3038 }, { "epoch": 1.133532264080567, "grad_norm": 0.8279144763946533, "learning_rate": 2e-06, "loss": 0.1829, "step": 3039 }, { "epoch": 1.1339052592316299, "grad_norm": 0.8620124459266663, "learning_rate": 2e-06, "loss": 0.1822, "step": 3040 }, { "epoch": 1.134278254382693, "grad_norm": 0.7931593060493469, "learning_rate": 2e-06, "loss": 0.1775, "step": 3041 }, { "epoch": 1.1346512495337562, "grad_norm": 0.732243537902832, "learning_rate": 2e-06, "loss": 0.1602, "step": 3042 }, { "epoch": 1.135024244684819, "grad_norm": 0.8690540194511414, "learning_rate": 2e-06, "loss": 0.1895, "step": 3043 }, { "epoch": 1.135397239835882, "grad_norm": 0.9621692895889282, "learning_rate": 2e-06, "loss": 0.1716, "step": 3044 }, { "epoch": 1.1357702349869452, "grad_norm": 0.70601487159729, "learning_rate": 2e-06, "loss": 0.1716, "step": 3045 }, { "epoch": 1.136143230138008, "grad_norm": 1.2023966312408447, "learning_rate": 2e-06, "loss": 0.1851, "step": 3046 }, { "epoch": 1.1365162252890713, "grad_norm": 0.7760782241821289, "learning_rate": 2e-06, "loss": 0.1345, "step": 3047 }, { "epoch": 1.1368892204401342, "grad_norm": 0.7182890772819519, "learning_rate": 2e-06, "loss": 0.1746, "step": 3048 }, { "epoch": 1.1372622155911973, "grad_norm": 0.6259891986846924, "learning_rate": 2e-06, "loss": 0.207, "step": 3049 }, { "epoch": 1.1376352107422603, "grad_norm": 0.9367859363555908, "learning_rate": 2e-06, "loss": 0.1732, "step": 3050 }, { "epoch": 1.1380082058933234, "grad_norm": 0.8220085501670837, "learning_rate": 2e-06, "loss": 0.1706, "step": 3051 }, { "epoch": 1.1383812010443863, "grad_norm": 0.792517900466919, "learning_rate": 2e-06, "loss": 0.1993, "step": 3052 }, { "epoch": 1.1387541961954495, "grad_norm": 0.8190054297447205, "learning_rate": 2e-06, "loss": 0.1755, "step": 3053 }, { "epoch": 1.1391271913465124, "grad_norm": 0.7554114460945129, "learning_rate": 2e-06, "loss": 0.1965, "step": 3054 }, { "epoch": 1.1395001864975756, "grad_norm": 1.2817059755325317, "learning_rate": 2e-06, "loss": 0.167, "step": 3055 }, { "epoch": 1.1398731816486385, "grad_norm": 1.1344681978225708, "learning_rate": 2e-06, "loss": 0.1669, "step": 3056 }, { "epoch": 1.1402461767997016, "grad_norm": 0.8400190472602844, "learning_rate": 2e-06, "loss": 0.1936, "step": 3057 }, { "epoch": 1.1406191719507646, "grad_norm": 0.8073164820671082, "learning_rate": 2e-06, "loss": 0.1855, "step": 3058 }, { "epoch": 1.1409921671018277, "grad_norm": 1.0009198188781738, "learning_rate": 2e-06, "loss": 0.1834, "step": 3059 }, { "epoch": 1.1413651622528906, "grad_norm": 0.8746243119239807, "learning_rate": 2e-06, "loss": 0.1654, "step": 3060 }, { "epoch": 1.1417381574039538, "grad_norm": 0.7642545104026794, "learning_rate": 2e-06, "loss": 0.2006, "step": 3061 }, { "epoch": 1.1421111525550167, "grad_norm": 0.9087741374969482, "learning_rate": 2e-06, "loss": 0.1597, "step": 3062 }, { "epoch": 1.1424841477060799, "grad_norm": 0.9032549262046814, "learning_rate": 2e-06, "loss": 0.1744, "step": 3063 }, { "epoch": 1.1428571428571428, "grad_norm": 0.8823798894882202, "learning_rate": 2e-06, "loss": 0.1638, "step": 3064 }, { "epoch": 1.143230138008206, "grad_norm": 1.127102017402649, "learning_rate": 2e-06, "loss": 0.1551, "step": 3065 }, { "epoch": 1.1436031331592689, "grad_norm": 0.8375194072723389, "learning_rate": 2e-06, "loss": 0.1919, "step": 3066 }, { "epoch": 1.143976128310332, "grad_norm": 0.7580286264419556, "learning_rate": 2e-06, "loss": 0.193, "step": 3067 }, { "epoch": 1.144349123461395, "grad_norm": 0.6859383583068848, "learning_rate": 2e-06, "loss": 0.1482, "step": 3068 }, { "epoch": 1.144722118612458, "grad_norm": 0.7508188486099243, "learning_rate": 2e-06, "loss": 0.2053, "step": 3069 }, { "epoch": 1.145095113763521, "grad_norm": 0.6727738380432129, "learning_rate": 2e-06, "loss": 0.1683, "step": 3070 }, { "epoch": 1.1454681089145842, "grad_norm": 1.0289968252182007, "learning_rate": 2e-06, "loss": 0.1902, "step": 3071 }, { "epoch": 1.145841104065647, "grad_norm": 0.8551223874092102, "learning_rate": 2e-06, "loss": 0.1765, "step": 3072 }, { "epoch": 1.1462140992167102, "grad_norm": 0.9598458409309387, "learning_rate": 2e-06, "loss": 0.1716, "step": 3073 }, { "epoch": 1.1465870943677732, "grad_norm": 0.9725390672683716, "learning_rate": 2e-06, "loss": 0.1656, "step": 3074 }, { "epoch": 1.1469600895188363, "grad_norm": 0.9021058678627014, "learning_rate": 2e-06, "loss": 0.1647, "step": 3075 }, { "epoch": 1.1473330846698993, "grad_norm": 1.083536148071289, "learning_rate": 2e-06, "loss": 0.1618, "step": 3076 }, { "epoch": 1.1477060798209624, "grad_norm": 0.712942898273468, "learning_rate": 2e-06, "loss": 0.1818, "step": 3077 }, { "epoch": 1.1480790749720253, "grad_norm": 0.8583605289459229, "learning_rate": 2e-06, "loss": 0.185, "step": 3078 }, { "epoch": 1.1484520701230885, "grad_norm": 0.6993825435638428, "learning_rate": 2e-06, "loss": 0.1701, "step": 3079 }, { "epoch": 1.1488250652741514, "grad_norm": 0.7844125628471375, "learning_rate": 2e-06, "loss": 0.1857, "step": 3080 }, { "epoch": 1.1491980604252146, "grad_norm": 0.916871190071106, "learning_rate": 2e-06, "loss": 0.1798, "step": 3081 }, { "epoch": 1.1495710555762775, "grad_norm": 0.8795260190963745, "learning_rate": 2e-06, "loss": 0.1552, "step": 3082 }, { "epoch": 1.1499440507273406, "grad_norm": 0.892754852771759, "learning_rate": 2e-06, "loss": 0.1516, "step": 3083 }, { "epoch": 1.1503170458784036, "grad_norm": 0.6868711113929749, "learning_rate": 2e-06, "loss": 0.1534, "step": 3084 }, { "epoch": 1.1506900410294667, "grad_norm": 0.7697017788887024, "learning_rate": 2e-06, "loss": 0.1879, "step": 3085 }, { "epoch": 1.1510630361805296, "grad_norm": 0.8271315097808838, "learning_rate": 2e-06, "loss": 0.1544, "step": 3086 }, { "epoch": 1.1514360313315928, "grad_norm": 0.9591032862663269, "learning_rate": 2e-06, "loss": 0.1666, "step": 3087 }, { "epoch": 1.1518090264826557, "grad_norm": 0.7375714778900146, "learning_rate": 2e-06, "loss": 0.1684, "step": 3088 }, { "epoch": 1.1521820216337189, "grad_norm": 1.091103434562683, "learning_rate": 2e-06, "loss": 0.1849, "step": 3089 }, { "epoch": 1.1525550167847818, "grad_norm": 0.9722118377685547, "learning_rate": 2e-06, "loss": 0.1461, "step": 3090 }, { "epoch": 1.152928011935845, "grad_norm": 0.7421842217445374, "learning_rate": 2e-06, "loss": 0.1974, "step": 3091 }, { "epoch": 1.1533010070869079, "grad_norm": 0.9781521558761597, "learning_rate": 2e-06, "loss": 0.1791, "step": 3092 }, { "epoch": 1.1536740022379708, "grad_norm": 0.9056086540222168, "learning_rate": 2e-06, "loss": 0.1969, "step": 3093 }, { "epoch": 1.154046997389034, "grad_norm": 0.8922998905181885, "learning_rate": 2e-06, "loss": 0.1759, "step": 3094 }, { "epoch": 1.154419992540097, "grad_norm": 0.7272158861160278, "learning_rate": 2e-06, "loss": 0.1707, "step": 3095 }, { "epoch": 1.15479298769116, "grad_norm": 0.7914904356002808, "learning_rate": 2e-06, "loss": 0.1589, "step": 3096 }, { "epoch": 1.155165982842223, "grad_norm": 0.82799232006073, "learning_rate": 2e-06, "loss": 0.1831, "step": 3097 }, { "epoch": 1.155538977993286, "grad_norm": 0.7118151187896729, "learning_rate": 2e-06, "loss": 0.174, "step": 3098 }, { "epoch": 1.1559119731443492, "grad_norm": 1.0244306325912476, "learning_rate": 2e-06, "loss": 0.1761, "step": 3099 }, { "epoch": 1.1562849682954122, "grad_norm": 0.9815647602081299, "learning_rate": 2e-06, "loss": 0.2021, "step": 3100 }, { "epoch": 1.156657963446475, "grad_norm": 0.895966649055481, "learning_rate": 2e-06, "loss": 0.1983, "step": 3101 }, { "epoch": 1.1570309585975382, "grad_norm": 0.7623142004013062, "learning_rate": 2e-06, "loss": 0.1699, "step": 3102 }, { "epoch": 1.1574039537486014, "grad_norm": 0.989051342010498, "learning_rate": 2e-06, "loss": 0.1759, "step": 3103 }, { "epoch": 1.1577769488996643, "grad_norm": 0.9648937582969666, "learning_rate": 2e-06, "loss": 0.1427, "step": 3104 }, { "epoch": 1.1581499440507272, "grad_norm": 0.733771026134491, "learning_rate": 2e-06, "loss": 0.1692, "step": 3105 }, { "epoch": 1.1585229392017904, "grad_norm": 0.9389103651046753, "learning_rate": 2e-06, "loss": 0.1784, "step": 3106 }, { "epoch": 1.1588959343528533, "grad_norm": 0.8549570441246033, "learning_rate": 2e-06, "loss": 0.2029, "step": 3107 }, { "epoch": 1.1592689295039165, "grad_norm": 0.9905832409858704, "learning_rate": 2e-06, "loss": 0.1871, "step": 3108 }, { "epoch": 1.1596419246549794, "grad_norm": 0.8362147212028503, "learning_rate": 2e-06, "loss": 0.1836, "step": 3109 }, { "epoch": 1.1600149198060425, "grad_norm": 0.9306763410568237, "learning_rate": 2e-06, "loss": 0.1604, "step": 3110 }, { "epoch": 1.1603879149571055, "grad_norm": 0.8124809861183167, "learning_rate": 2e-06, "loss": 0.1742, "step": 3111 }, { "epoch": 1.1607609101081686, "grad_norm": 0.8747238516807556, "learning_rate": 2e-06, "loss": 0.2044, "step": 3112 }, { "epoch": 1.1611339052592315, "grad_norm": 0.7869054675102234, "learning_rate": 2e-06, "loss": 0.1986, "step": 3113 }, { "epoch": 1.1615069004102947, "grad_norm": 0.9867730736732483, "learning_rate": 2e-06, "loss": 0.166, "step": 3114 }, { "epoch": 1.1618798955613576, "grad_norm": 0.8363789319992065, "learning_rate": 2e-06, "loss": 0.164, "step": 3115 }, { "epoch": 1.1622528907124208, "grad_norm": 0.8576522469520569, "learning_rate": 2e-06, "loss": 0.1736, "step": 3116 }, { "epoch": 1.1626258858634837, "grad_norm": 0.9439851641654968, "learning_rate": 2e-06, "loss": 0.2004, "step": 3117 }, { "epoch": 1.1629988810145468, "grad_norm": 1.0021262168884277, "learning_rate": 2e-06, "loss": 0.1973, "step": 3118 }, { "epoch": 1.1633718761656098, "grad_norm": 0.7740960717201233, "learning_rate": 2e-06, "loss": 0.1789, "step": 3119 }, { "epoch": 1.163744871316673, "grad_norm": 0.8685408234596252, "learning_rate": 2e-06, "loss": 0.1748, "step": 3120 }, { "epoch": 1.1641178664677359, "grad_norm": 0.8246059417724609, "learning_rate": 2e-06, "loss": 0.1707, "step": 3121 }, { "epoch": 1.164490861618799, "grad_norm": 0.8680852055549622, "learning_rate": 2e-06, "loss": 0.1883, "step": 3122 }, { "epoch": 1.164863856769862, "grad_norm": 0.7794876098632812, "learning_rate": 2e-06, "loss": 0.1657, "step": 3123 }, { "epoch": 1.165236851920925, "grad_norm": 0.6811487078666687, "learning_rate": 2e-06, "loss": 0.1975, "step": 3124 }, { "epoch": 1.165609847071988, "grad_norm": 0.9651445150375366, "learning_rate": 2e-06, "loss": 0.1727, "step": 3125 }, { "epoch": 1.1659828422230512, "grad_norm": 0.8190502524375916, "learning_rate": 2e-06, "loss": 0.1758, "step": 3126 }, { "epoch": 1.166355837374114, "grad_norm": 1.0575768947601318, "learning_rate": 2e-06, "loss": 0.1681, "step": 3127 }, { "epoch": 1.1667288325251772, "grad_norm": 1.0416932106018066, "learning_rate": 2e-06, "loss": 0.1642, "step": 3128 }, { "epoch": 1.1671018276762402, "grad_norm": 0.9978415369987488, "learning_rate": 2e-06, "loss": 0.1876, "step": 3129 }, { "epoch": 1.1674748228273033, "grad_norm": 0.8107171058654785, "learning_rate": 2e-06, "loss": 0.2012, "step": 3130 }, { "epoch": 1.1678478179783662, "grad_norm": 0.8562737703323364, "learning_rate": 2e-06, "loss": 0.1697, "step": 3131 }, { "epoch": 1.1682208131294294, "grad_norm": 0.8479187488555908, "learning_rate": 2e-06, "loss": 0.1723, "step": 3132 }, { "epoch": 1.1685938082804923, "grad_norm": 0.8576910495758057, "learning_rate": 2e-06, "loss": 0.2024, "step": 3133 }, { "epoch": 1.1689668034315555, "grad_norm": 0.8109558820724487, "learning_rate": 2e-06, "loss": 0.1727, "step": 3134 }, { "epoch": 1.1693397985826184, "grad_norm": 0.7045148611068726, "learning_rate": 2e-06, "loss": 0.2018, "step": 3135 }, { "epoch": 1.1697127937336815, "grad_norm": 0.7715195417404175, "learning_rate": 2e-06, "loss": 0.1905, "step": 3136 }, { "epoch": 1.1700857888847445, "grad_norm": 0.7072034478187561, "learning_rate": 2e-06, "loss": 0.1855, "step": 3137 }, { "epoch": 1.1704587840358076, "grad_norm": 0.8690103888511658, "learning_rate": 2e-06, "loss": 0.1796, "step": 3138 }, { "epoch": 1.1708317791868705, "grad_norm": 0.7917973399162292, "learning_rate": 2e-06, "loss": 0.2047, "step": 3139 }, { "epoch": 1.1712047743379337, "grad_norm": 0.7659318447113037, "learning_rate": 2e-06, "loss": 0.2023, "step": 3140 }, { "epoch": 1.1715777694889966, "grad_norm": 0.8396939635276794, "learning_rate": 2e-06, "loss": 0.1627, "step": 3141 }, { "epoch": 1.1719507646400598, "grad_norm": 0.8795982003211975, "learning_rate": 2e-06, "loss": 0.1724, "step": 3142 }, { "epoch": 1.1723237597911227, "grad_norm": 1.076664924621582, "learning_rate": 2e-06, "loss": 0.1728, "step": 3143 }, { "epoch": 1.1726967549421858, "grad_norm": 0.7514013648033142, "learning_rate": 2e-06, "loss": 0.1467, "step": 3144 }, { "epoch": 1.1730697500932488, "grad_norm": 1.0156468152999878, "learning_rate": 2e-06, "loss": 0.1708, "step": 3145 }, { "epoch": 1.173442745244312, "grad_norm": 0.7821884751319885, "learning_rate": 2e-06, "loss": 0.1938, "step": 3146 }, { "epoch": 1.1738157403953748, "grad_norm": 0.9291337728500366, "learning_rate": 2e-06, "loss": 0.1706, "step": 3147 }, { "epoch": 1.174188735546438, "grad_norm": 0.737267017364502, "learning_rate": 2e-06, "loss": 0.1695, "step": 3148 }, { "epoch": 1.174561730697501, "grad_norm": 1.0853137969970703, "learning_rate": 2e-06, "loss": 0.1866, "step": 3149 }, { "epoch": 1.174934725848564, "grad_norm": 0.9123322367668152, "learning_rate": 2e-06, "loss": 0.1661, "step": 3150 }, { "epoch": 1.175307720999627, "grad_norm": 1.1403766870498657, "learning_rate": 2e-06, "loss": 0.1748, "step": 3151 }, { "epoch": 1.1756807161506901, "grad_norm": 0.8820918798446655, "learning_rate": 2e-06, "loss": 0.2034, "step": 3152 }, { "epoch": 1.176053711301753, "grad_norm": 0.8070523738861084, "learning_rate": 2e-06, "loss": 0.1484, "step": 3153 }, { "epoch": 1.176426706452816, "grad_norm": 0.9884210824966431, "learning_rate": 2e-06, "loss": 0.1906, "step": 3154 }, { "epoch": 1.1767997016038791, "grad_norm": 0.7778683304786682, "learning_rate": 2e-06, "loss": 0.1677, "step": 3155 }, { "epoch": 1.1771726967549423, "grad_norm": 0.707036018371582, "learning_rate": 2e-06, "loss": 0.1906, "step": 3156 }, { "epoch": 1.1775456919060052, "grad_norm": 0.879346489906311, "learning_rate": 2e-06, "loss": 0.185, "step": 3157 }, { "epoch": 1.1779186870570681, "grad_norm": 0.6589690446853638, "learning_rate": 2e-06, "loss": 0.1929, "step": 3158 }, { "epoch": 1.1782916822081313, "grad_norm": 0.8836626410484314, "learning_rate": 2e-06, "loss": 0.1685, "step": 3159 }, { "epoch": 1.1786646773591944, "grad_norm": 0.9177768230438232, "learning_rate": 2e-06, "loss": 0.1834, "step": 3160 }, { "epoch": 1.1790376725102574, "grad_norm": 0.7908859848976135, "learning_rate": 2e-06, "loss": 0.1783, "step": 3161 }, { "epoch": 1.1794106676613203, "grad_norm": 0.7977958917617798, "learning_rate": 2e-06, "loss": 0.1681, "step": 3162 }, { "epoch": 1.1797836628123834, "grad_norm": 0.8390917181968689, "learning_rate": 2e-06, "loss": 0.1944, "step": 3163 }, { "epoch": 1.1801566579634466, "grad_norm": 1.0304301977157593, "learning_rate": 2e-06, "loss": 0.1623, "step": 3164 }, { "epoch": 1.1805296531145095, "grad_norm": 0.7790015339851379, "learning_rate": 2e-06, "loss": 0.1643, "step": 3165 }, { "epoch": 1.1809026482655725, "grad_norm": 0.8778613805770874, "learning_rate": 2e-06, "loss": 0.1979, "step": 3166 }, { "epoch": 1.1812756434166356, "grad_norm": 1.1807633638381958, "learning_rate": 2e-06, "loss": 0.1861, "step": 3167 }, { "epoch": 1.1816486385676985, "grad_norm": 0.8023701310157776, "learning_rate": 2e-06, "loss": 0.1847, "step": 3168 }, { "epoch": 1.1820216337187617, "grad_norm": 0.7317067980766296, "learning_rate": 2e-06, "loss": 0.2002, "step": 3169 }, { "epoch": 1.1823946288698246, "grad_norm": 0.9843307733535767, "learning_rate": 2e-06, "loss": 0.1742, "step": 3170 }, { "epoch": 1.1827676240208878, "grad_norm": 0.9840295910835266, "learning_rate": 2e-06, "loss": 0.1958, "step": 3171 }, { "epoch": 1.1831406191719507, "grad_norm": 0.760865330696106, "learning_rate": 2e-06, "loss": 0.1821, "step": 3172 }, { "epoch": 1.1835136143230138, "grad_norm": 0.8871913552284241, "learning_rate": 2e-06, "loss": 0.1864, "step": 3173 }, { "epoch": 1.1838866094740768, "grad_norm": 0.9085450768470764, "learning_rate": 2e-06, "loss": 0.1687, "step": 3174 }, { "epoch": 1.18425960462514, "grad_norm": 0.853379487991333, "learning_rate": 2e-06, "loss": 0.1738, "step": 3175 }, { "epoch": 1.1846325997762028, "grad_norm": 0.7774769067764282, "learning_rate": 2e-06, "loss": 0.2101, "step": 3176 }, { "epoch": 1.185005594927266, "grad_norm": 0.9661226272583008, "learning_rate": 2e-06, "loss": 0.1587, "step": 3177 }, { "epoch": 1.185378590078329, "grad_norm": 0.9396353363990784, "learning_rate": 2e-06, "loss": 0.1812, "step": 3178 }, { "epoch": 1.185751585229392, "grad_norm": 0.9347936511039734, "learning_rate": 2e-06, "loss": 0.2023, "step": 3179 }, { "epoch": 1.186124580380455, "grad_norm": 0.7192155122756958, "learning_rate": 2e-06, "loss": 0.1632, "step": 3180 }, { "epoch": 1.1864975755315181, "grad_norm": 0.7554670572280884, "learning_rate": 2e-06, "loss": 0.1711, "step": 3181 }, { "epoch": 1.186870570682581, "grad_norm": 0.7150728702545166, "learning_rate": 2e-06, "loss": 0.1746, "step": 3182 }, { "epoch": 1.1872435658336442, "grad_norm": 0.6790586113929749, "learning_rate": 2e-06, "loss": 0.1834, "step": 3183 }, { "epoch": 1.1876165609847071, "grad_norm": 1.0438916683197021, "learning_rate": 2e-06, "loss": 0.1848, "step": 3184 }, { "epoch": 1.1879895561357703, "grad_norm": 1.0375503301620483, "learning_rate": 2e-06, "loss": 0.1983, "step": 3185 }, { "epoch": 1.1883625512868332, "grad_norm": 0.7773823142051697, "learning_rate": 2e-06, "loss": 0.1641, "step": 3186 }, { "epoch": 1.1887355464378964, "grad_norm": 0.8495008945465088, "learning_rate": 2e-06, "loss": 0.1754, "step": 3187 }, { "epoch": 1.1891085415889593, "grad_norm": 0.928395688533783, "learning_rate": 2e-06, "loss": 0.177, "step": 3188 }, { "epoch": 1.1894815367400224, "grad_norm": 0.8586599826812744, "learning_rate": 2e-06, "loss": 0.1869, "step": 3189 }, { "epoch": 1.1898545318910854, "grad_norm": 0.9390157461166382, "learning_rate": 2e-06, "loss": 0.1719, "step": 3190 }, { "epoch": 1.1902275270421485, "grad_norm": 0.6344807147979736, "learning_rate": 2e-06, "loss": 0.199, "step": 3191 }, { "epoch": 1.1906005221932114, "grad_norm": 0.8370267152786255, "learning_rate": 2e-06, "loss": 0.1976, "step": 3192 }, { "epoch": 1.1909735173442746, "grad_norm": 1.072619915008545, "learning_rate": 2e-06, "loss": 0.1486, "step": 3193 }, { "epoch": 1.1913465124953375, "grad_norm": 0.8324980735778809, "learning_rate": 2e-06, "loss": 0.1416, "step": 3194 }, { "epoch": 1.1917195076464007, "grad_norm": 0.7687094807624817, "learning_rate": 2e-06, "loss": 0.1712, "step": 3195 }, { "epoch": 1.1920925027974636, "grad_norm": 1.1814143657684326, "learning_rate": 2e-06, "loss": 0.1477, "step": 3196 }, { "epoch": 1.1924654979485267, "grad_norm": 0.8518527746200562, "learning_rate": 2e-06, "loss": 0.1624, "step": 3197 }, { "epoch": 1.1928384930995897, "grad_norm": 0.7813330292701721, "learning_rate": 2e-06, "loss": 0.1842, "step": 3198 }, { "epoch": 1.1932114882506528, "grad_norm": 0.7543914914131165, "learning_rate": 2e-06, "loss": 0.1815, "step": 3199 }, { "epoch": 1.1935844834017157, "grad_norm": 0.82588791847229, "learning_rate": 2e-06, "loss": 0.1554, "step": 3200 }, { "epoch": 1.193957478552779, "grad_norm": 0.9049121141433716, "learning_rate": 2e-06, "loss": 0.1793, "step": 3201 }, { "epoch": 1.1943304737038418, "grad_norm": 0.7791276574134827, "learning_rate": 2e-06, "loss": 0.1948, "step": 3202 }, { "epoch": 1.194703468854905, "grad_norm": 0.8451886177062988, "learning_rate": 2e-06, "loss": 0.1817, "step": 3203 }, { "epoch": 1.195076464005968, "grad_norm": 1.0839483737945557, "learning_rate": 2e-06, "loss": 0.1663, "step": 3204 }, { "epoch": 1.195449459157031, "grad_norm": 1.022441029548645, "learning_rate": 2e-06, "loss": 0.1962, "step": 3205 }, { "epoch": 1.195822454308094, "grad_norm": 0.7772676944732666, "learning_rate": 2e-06, "loss": 0.1805, "step": 3206 }, { "epoch": 1.1961954494591571, "grad_norm": 0.8146712779998779, "learning_rate": 2e-06, "loss": 0.1884, "step": 3207 }, { "epoch": 1.19656844461022, "grad_norm": 0.8844830989837646, "learning_rate": 2e-06, "loss": 0.1661, "step": 3208 }, { "epoch": 1.1969414397612832, "grad_norm": 0.731651782989502, "learning_rate": 2e-06, "loss": 0.1628, "step": 3209 }, { "epoch": 1.1973144349123461, "grad_norm": 0.9754188656806946, "learning_rate": 2e-06, "loss": 0.1999, "step": 3210 }, { "epoch": 1.197687430063409, "grad_norm": 0.9516862630844116, "learning_rate": 2e-06, "loss": 0.1694, "step": 3211 }, { "epoch": 1.1980604252144722, "grad_norm": 0.9047666192054749, "learning_rate": 2e-06, "loss": 0.1808, "step": 3212 }, { "epoch": 1.1984334203655354, "grad_norm": 0.7278349995613098, "learning_rate": 2e-06, "loss": 0.1777, "step": 3213 }, { "epoch": 1.1988064155165983, "grad_norm": 0.8950292468070984, "learning_rate": 2e-06, "loss": 0.1786, "step": 3214 }, { "epoch": 1.1991794106676612, "grad_norm": 0.9229537844657898, "learning_rate": 2e-06, "loss": 0.1562, "step": 3215 }, { "epoch": 1.1995524058187244, "grad_norm": 0.8889737725257874, "learning_rate": 2e-06, "loss": 0.2094, "step": 3216 }, { "epoch": 1.1999254009697875, "grad_norm": 0.8189123868942261, "learning_rate": 2e-06, "loss": 0.1768, "step": 3217 }, { "epoch": 1.2002983961208504, "grad_norm": 0.7604515552520752, "learning_rate": 2e-06, "loss": 0.1904, "step": 3218 }, { "epoch": 1.2006713912719134, "grad_norm": 0.9923982620239258, "learning_rate": 2e-06, "loss": 0.1793, "step": 3219 }, { "epoch": 1.2010443864229765, "grad_norm": 0.9033641219139099, "learning_rate": 2e-06, "loss": 0.1887, "step": 3220 }, { "epoch": 1.2014173815740397, "grad_norm": 1.0953088998794556, "learning_rate": 2e-06, "loss": 0.1849, "step": 3221 }, { "epoch": 1.2017903767251026, "grad_norm": 0.8663217425346375, "learning_rate": 2e-06, "loss": 0.1658, "step": 3222 }, { "epoch": 1.2021633718761655, "grad_norm": 0.9251155853271484, "learning_rate": 2e-06, "loss": 0.1855, "step": 3223 }, { "epoch": 1.2025363670272287, "grad_norm": 0.8335624933242798, "learning_rate": 2e-06, "loss": 0.1837, "step": 3224 }, { "epoch": 1.2029093621782916, "grad_norm": 0.9986758232116699, "learning_rate": 2e-06, "loss": 0.1692, "step": 3225 }, { "epoch": 1.2032823573293547, "grad_norm": 0.94474858045578, "learning_rate": 2e-06, "loss": 0.1817, "step": 3226 }, { "epoch": 1.2036553524804177, "grad_norm": 0.8060982823371887, "learning_rate": 2e-06, "loss": 0.2019, "step": 3227 }, { "epoch": 1.2040283476314808, "grad_norm": 0.9454132914543152, "learning_rate": 2e-06, "loss": 0.1803, "step": 3228 }, { "epoch": 1.2044013427825437, "grad_norm": 0.8073694109916687, "learning_rate": 2e-06, "loss": 0.1822, "step": 3229 }, { "epoch": 1.2047743379336069, "grad_norm": 0.871017336845398, "learning_rate": 2e-06, "loss": 0.1613, "step": 3230 }, { "epoch": 1.2051473330846698, "grad_norm": 0.862648606300354, "learning_rate": 2e-06, "loss": 0.1752, "step": 3231 }, { "epoch": 1.205520328235733, "grad_norm": 0.8989576697349548, "learning_rate": 2e-06, "loss": 0.1476, "step": 3232 }, { "epoch": 1.205893323386796, "grad_norm": 1.0184097290039062, "learning_rate": 2e-06, "loss": 0.189, "step": 3233 }, { "epoch": 1.206266318537859, "grad_norm": 0.9016634821891785, "learning_rate": 2e-06, "loss": 0.154, "step": 3234 }, { "epoch": 1.206639313688922, "grad_norm": 0.8381644487380981, "learning_rate": 2e-06, "loss": 0.1593, "step": 3235 }, { "epoch": 1.2070123088399851, "grad_norm": 0.7903696894645691, "learning_rate": 2e-06, "loss": 0.1765, "step": 3236 }, { "epoch": 1.207385303991048, "grad_norm": 0.8675532341003418, "learning_rate": 2e-06, "loss": 0.1636, "step": 3237 }, { "epoch": 1.2077582991421112, "grad_norm": 0.8489370346069336, "learning_rate": 2e-06, "loss": 0.1657, "step": 3238 }, { "epoch": 1.2081312942931741, "grad_norm": 0.786296546459198, "learning_rate": 2e-06, "loss": 0.1814, "step": 3239 }, { "epoch": 1.2085042894442373, "grad_norm": 0.8526907563209534, "learning_rate": 2e-06, "loss": 0.1786, "step": 3240 }, { "epoch": 1.2088772845953002, "grad_norm": 0.768966794013977, "learning_rate": 2e-06, "loss": 0.1854, "step": 3241 }, { "epoch": 1.2092502797463633, "grad_norm": 0.900233805179596, "learning_rate": 2e-06, "loss": 0.159, "step": 3242 }, { "epoch": 1.2096232748974263, "grad_norm": 0.8253253698348999, "learning_rate": 2e-06, "loss": 0.1725, "step": 3243 }, { "epoch": 1.2099962700484894, "grad_norm": 0.9610042572021484, "learning_rate": 2e-06, "loss": 0.1906, "step": 3244 }, { "epoch": 1.2103692651995523, "grad_norm": 0.9348904490470886, "learning_rate": 2e-06, "loss": 0.1658, "step": 3245 }, { "epoch": 1.2107422603506155, "grad_norm": 0.861602783203125, "learning_rate": 2e-06, "loss": 0.1574, "step": 3246 }, { "epoch": 1.2111152555016784, "grad_norm": 0.955656886100769, "learning_rate": 2e-06, "loss": 0.1951, "step": 3247 }, { "epoch": 1.2114882506527416, "grad_norm": 0.7461603879928589, "learning_rate": 2e-06, "loss": 0.1748, "step": 3248 }, { "epoch": 1.2118612458038045, "grad_norm": 0.7716725468635559, "learning_rate": 2e-06, "loss": 0.195, "step": 3249 }, { "epoch": 1.2122342409548676, "grad_norm": 1.4598941802978516, "learning_rate": 2e-06, "loss": 0.2123, "step": 3250 }, { "epoch": 1.2126072361059306, "grad_norm": 0.7669709920883179, "learning_rate": 2e-06, "loss": 0.1876, "step": 3251 }, { "epoch": 1.2129802312569937, "grad_norm": 0.8234936594963074, "learning_rate": 2e-06, "loss": 0.1798, "step": 3252 }, { "epoch": 1.2133532264080567, "grad_norm": 0.810755729675293, "learning_rate": 2e-06, "loss": 0.1872, "step": 3253 }, { "epoch": 1.2137262215591198, "grad_norm": 0.8722345232963562, "learning_rate": 2e-06, "loss": 0.2074, "step": 3254 }, { "epoch": 1.2140992167101827, "grad_norm": 0.9641183614730835, "learning_rate": 2e-06, "loss": 0.1829, "step": 3255 }, { "epoch": 1.2144722118612459, "grad_norm": 0.9822907447814941, "learning_rate": 2e-06, "loss": 0.1663, "step": 3256 }, { "epoch": 1.2148452070123088, "grad_norm": 0.8878806233406067, "learning_rate": 2e-06, "loss": 0.1752, "step": 3257 }, { "epoch": 1.215218202163372, "grad_norm": 0.9884559512138367, "learning_rate": 2e-06, "loss": 0.1399, "step": 3258 }, { "epoch": 1.2155911973144349, "grad_norm": 0.7708275318145752, "learning_rate": 2e-06, "loss": 0.1548, "step": 3259 }, { "epoch": 1.215964192465498, "grad_norm": 0.7927259802818298, "learning_rate": 2e-06, "loss": 0.1605, "step": 3260 }, { "epoch": 1.216337187616561, "grad_norm": 0.9770100116729736, "learning_rate": 2e-06, "loss": 0.1813, "step": 3261 }, { "epoch": 1.216710182767624, "grad_norm": 0.7358347177505493, "learning_rate": 2e-06, "loss": 0.1875, "step": 3262 }, { "epoch": 1.217083177918687, "grad_norm": 0.9071336388587952, "learning_rate": 2e-06, "loss": 0.1658, "step": 3263 }, { "epoch": 1.2174561730697502, "grad_norm": 0.8504250049591064, "learning_rate": 2e-06, "loss": 0.1769, "step": 3264 }, { "epoch": 1.217829168220813, "grad_norm": 0.9126635789871216, "learning_rate": 2e-06, "loss": 0.1549, "step": 3265 }, { "epoch": 1.2182021633718763, "grad_norm": 0.7635887265205383, "learning_rate": 2e-06, "loss": 0.1745, "step": 3266 }, { "epoch": 1.2185751585229392, "grad_norm": 0.8920212388038635, "learning_rate": 2e-06, "loss": 0.1775, "step": 3267 }, { "epoch": 1.2189481536740023, "grad_norm": 0.8246463537216187, "learning_rate": 2e-06, "loss": 0.1629, "step": 3268 }, { "epoch": 1.2193211488250653, "grad_norm": 0.6937832832336426, "learning_rate": 2e-06, "loss": 0.2003, "step": 3269 }, { "epoch": 1.2196941439761284, "grad_norm": 0.8229489922523499, "learning_rate": 2e-06, "loss": 0.1688, "step": 3270 }, { "epoch": 1.2200671391271913, "grad_norm": 0.9750005006790161, "learning_rate": 2e-06, "loss": 0.1855, "step": 3271 }, { "epoch": 1.2204401342782543, "grad_norm": 0.8259793519973755, "learning_rate": 2e-06, "loss": 0.1708, "step": 3272 }, { "epoch": 1.2208131294293174, "grad_norm": 0.991344690322876, "learning_rate": 2e-06, "loss": 0.1595, "step": 3273 }, { "epoch": 1.2211861245803806, "grad_norm": 0.9491727948188782, "learning_rate": 2e-06, "loss": 0.1836, "step": 3274 }, { "epoch": 1.2215591197314435, "grad_norm": 0.7574078440666199, "learning_rate": 2e-06, "loss": 0.1649, "step": 3275 }, { "epoch": 1.2219321148825064, "grad_norm": 0.978081226348877, "learning_rate": 2e-06, "loss": 0.1668, "step": 3276 }, { "epoch": 1.2223051100335696, "grad_norm": 0.8880773186683655, "learning_rate": 2e-06, "loss": 0.1677, "step": 3277 }, { "epoch": 1.2226781051846327, "grad_norm": 0.7735600471496582, "learning_rate": 2e-06, "loss": 0.1763, "step": 3278 }, { "epoch": 1.2230511003356956, "grad_norm": 0.8324731588363647, "learning_rate": 2e-06, "loss": 0.1953, "step": 3279 }, { "epoch": 1.2234240954867586, "grad_norm": 1.084121823310852, "learning_rate": 2e-06, "loss": 0.1781, "step": 3280 }, { "epoch": 1.2237970906378217, "grad_norm": 0.9488623738288879, "learning_rate": 2e-06, "loss": 0.1643, "step": 3281 }, { "epoch": 1.2241700857888849, "grad_norm": 0.8826572895050049, "learning_rate": 2e-06, "loss": 0.1694, "step": 3282 }, { "epoch": 1.2245430809399478, "grad_norm": 0.9055793881416321, "learning_rate": 2e-06, "loss": 0.1737, "step": 3283 }, { "epoch": 1.2249160760910107, "grad_norm": 0.7469251751899719, "learning_rate": 2e-06, "loss": 0.2005, "step": 3284 }, { "epoch": 1.2252890712420739, "grad_norm": 0.7145209908485413, "learning_rate": 2e-06, "loss": 0.1669, "step": 3285 }, { "epoch": 1.2256620663931368, "grad_norm": 0.8582295775413513, "learning_rate": 2e-06, "loss": 0.1701, "step": 3286 }, { "epoch": 1.2260350615442, "grad_norm": 1.0873770713806152, "learning_rate": 2e-06, "loss": 0.1674, "step": 3287 }, { "epoch": 1.2264080566952629, "grad_norm": 0.8442969918251038, "learning_rate": 2e-06, "loss": 0.1845, "step": 3288 }, { "epoch": 1.226781051846326, "grad_norm": 0.7890739440917969, "learning_rate": 2e-06, "loss": 0.1942, "step": 3289 }, { "epoch": 1.227154046997389, "grad_norm": 0.8074654340744019, "learning_rate": 2e-06, "loss": 0.1661, "step": 3290 }, { "epoch": 1.227527042148452, "grad_norm": 0.9691796898841858, "learning_rate": 2e-06, "loss": 0.158, "step": 3291 }, { "epoch": 1.227900037299515, "grad_norm": 0.7009101510047913, "learning_rate": 2e-06, "loss": 0.193, "step": 3292 }, { "epoch": 1.2282730324505782, "grad_norm": 0.8654706478118896, "learning_rate": 2e-06, "loss": 0.1888, "step": 3293 }, { "epoch": 1.228646027601641, "grad_norm": 0.8423889875411987, "learning_rate": 2e-06, "loss": 0.1553, "step": 3294 }, { "epoch": 1.2290190227527042, "grad_norm": 0.9004198312759399, "learning_rate": 2e-06, "loss": 0.1926, "step": 3295 }, { "epoch": 1.2293920179037672, "grad_norm": 1.007470726966858, "learning_rate": 2e-06, "loss": 0.1724, "step": 3296 }, { "epoch": 1.2297650130548303, "grad_norm": 0.7950885891914368, "learning_rate": 2e-06, "loss": 0.1916, "step": 3297 }, { "epoch": 1.2301380082058933, "grad_norm": 0.8011130094528198, "learning_rate": 2e-06, "loss": 0.1762, "step": 3298 }, { "epoch": 1.2305110033569564, "grad_norm": 0.8759319186210632, "learning_rate": 2e-06, "loss": 0.1738, "step": 3299 }, { "epoch": 1.2308839985080193, "grad_norm": 1.011559247970581, "learning_rate": 2e-06, "loss": 0.1797, "step": 3300 }, { "epoch": 1.2312569936590825, "grad_norm": 0.9534258842468262, "learning_rate": 2e-06, "loss": 0.1845, "step": 3301 }, { "epoch": 1.2316299888101454, "grad_norm": 0.7696974873542786, "learning_rate": 2e-06, "loss": 0.1902, "step": 3302 }, { "epoch": 1.2320029839612086, "grad_norm": 0.7385734915733337, "learning_rate": 2e-06, "loss": 0.1545, "step": 3303 }, { "epoch": 1.2323759791122715, "grad_norm": 0.9486271739006042, "learning_rate": 2e-06, "loss": 0.1905, "step": 3304 }, { "epoch": 1.2327489742633346, "grad_norm": 0.8371375799179077, "learning_rate": 2e-06, "loss": 0.1673, "step": 3305 }, { "epoch": 1.2331219694143976, "grad_norm": 0.8582019209861755, "learning_rate": 2e-06, "loss": 0.1901, "step": 3306 }, { "epoch": 1.2334949645654607, "grad_norm": 0.7915928363800049, "learning_rate": 2e-06, "loss": 0.1693, "step": 3307 }, { "epoch": 1.2338679597165236, "grad_norm": 0.6742517948150635, "learning_rate": 2e-06, "loss": 0.1668, "step": 3308 }, { "epoch": 1.2342409548675868, "grad_norm": 0.9014734029769897, "learning_rate": 2e-06, "loss": 0.1666, "step": 3309 }, { "epoch": 1.2346139500186497, "grad_norm": 1.0617607831954956, "learning_rate": 2e-06, "loss": 0.1765, "step": 3310 }, { "epoch": 1.2349869451697129, "grad_norm": 0.8812627196311951, "learning_rate": 2e-06, "loss": 0.1742, "step": 3311 }, { "epoch": 1.2353599403207758, "grad_norm": 0.8380026817321777, "learning_rate": 2e-06, "loss": 0.1682, "step": 3312 }, { "epoch": 1.235732935471839, "grad_norm": 1.0298974514007568, "learning_rate": 2e-06, "loss": 0.1655, "step": 3313 }, { "epoch": 1.2361059306229019, "grad_norm": 0.8663948774337769, "learning_rate": 2e-06, "loss": 0.1849, "step": 3314 }, { "epoch": 1.236478925773965, "grad_norm": 0.9392432570457458, "learning_rate": 2e-06, "loss": 0.1802, "step": 3315 }, { "epoch": 1.236851920925028, "grad_norm": 0.978553831577301, "learning_rate": 2e-06, "loss": 0.1694, "step": 3316 }, { "epoch": 1.237224916076091, "grad_norm": 0.8932417631149292, "learning_rate": 2e-06, "loss": 0.1838, "step": 3317 }, { "epoch": 1.237597911227154, "grad_norm": 0.8616489768028259, "learning_rate": 2e-06, "loss": 0.1894, "step": 3318 }, { "epoch": 1.2379709063782172, "grad_norm": 0.6649342179298401, "learning_rate": 2e-06, "loss": 0.1958, "step": 3319 }, { "epoch": 1.23834390152928, "grad_norm": 0.9157842993736267, "learning_rate": 2e-06, "loss": 0.1579, "step": 3320 }, { "epoch": 1.2387168966803432, "grad_norm": 0.8716059923171997, "learning_rate": 2e-06, "loss": 0.1548, "step": 3321 }, { "epoch": 1.2390898918314062, "grad_norm": 0.8244392275810242, "learning_rate": 2e-06, "loss": 0.1757, "step": 3322 }, { "epoch": 1.2394628869824693, "grad_norm": 0.7916471362113953, "learning_rate": 2e-06, "loss": 0.175, "step": 3323 }, { "epoch": 1.2398358821335322, "grad_norm": 0.770331621170044, "learning_rate": 2e-06, "loss": 0.1822, "step": 3324 }, { "epoch": 1.2402088772845954, "grad_norm": 0.9696360230445862, "learning_rate": 2e-06, "loss": 0.1575, "step": 3325 }, { "epoch": 1.2405818724356583, "grad_norm": 0.7174018025398254, "learning_rate": 2e-06, "loss": 0.188, "step": 3326 }, { "epoch": 1.2409548675867215, "grad_norm": 0.7654297947883606, "learning_rate": 2e-06, "loss": 0.1897, "step": 3327 }, { "epoch": 1.2413278627377844, "grad_norm": 0.8742178678512573, "learning_rate": 2e-06, "loss": 0.1955, "step": 3328 }, { "epoch": 1.2417008578888473, "grad_norm": 0.8055425882339478, "learning_rate": 2e-06, "loss": 0.155, "step": 3329 }, { "epoch": 1.2420738530399105, "grad_norm": 1.020470380783081, "learning_rate": 2e-06, "loss": 0.1668, "step": 3330 }, { "epoch": 1.2424468481909736, "grad_norm": 0.8503913879394531, "learning_rate": 2e-06, "loss": 0.2041, "step": 3331 }, { "epoch": 1.2428198433420365, "grad_norm": 0.8493950963020325, "learning_rate": 2e-06, "loss": 0.1806, "step": 3332 }, { "epoch": 1.2431928384930995, "grad_norm": 0.7783392667770386, "learning_rate": 2e-06, "loss": 0.1847, "step": 3333 }, { "epoch": 1.2435658336441626, "grad_norm": 0.8209758400917053, "learning_rate": 2e-06, "loss": 0.1686, "step": 3334 }, { "epoch": 1.2439388287952258, "grad_norm": 0.8989965319633484, "learning_rate": 2e-06, "loss": 0.1694, "step": 3335 }, { "epoch": 1.2443118239462887, "grad_norm": 0.9490621089935303, "learning_rate": 2e-06, "loss": 0.2182, "step": 3336 }, { "epoch": 1.2446848190973516, "grad_norm": 0.8969272971153259, "learning_rate": 2e-06, "loss": 0.1864, "step": 3337 }, { "epoch": 1.2450578142484148, "grad_norm": 0.9026789665222168, "learning_rate": 2e-06, "loss": 0.1746, "step": 3338 }, { "epoch": 1.245430809399478, "grad_norm": 0.9275833964347839, "learning_rate": 2e-06, "loss": 0.1927, "step": 3339 }, { "epoch": 1.2458038045505408, "grad_norm": 0.767615795135498, "learning_rate": 2e-06, "loss": 0.1586, "step": 3340 }, { "epoch": 1.2461767997016038, "grad_norm": 0.8231456875801086, "learning_rate": 2e-06, "loss": 0.1685, "step": 3341 }, { "epoch": 1.246549794852667, "grad_norm": 1.0836302042007446, "learning_rate": 2e-06, "loss": 0.177, "step": 3342 }, { "epoch": 1.24692279000373, "grad_norm": 0.7376538515090942, "learning_rate": 2e-06, "loss": 0.1663, "step": 3343 }, { "epoch": 1.247295785154793, "grad_norm": 1.021653175354004, "learning_rate": 2e-06, "loss": 0.1866, "step": 3344 }, { "epoch": 1.247668780305856, "grad_norm": 0.9321963787078857, "learning_rate": 2e-06, "loss": 0.1669, "step": 3345 }, { "epoch": 1.248041775456919, "grad_norm": 0.9565950632095337, "learning_rate": 2e-06, "loss": 0.2036, "step": 3346 }, { "epoch": 1.248414770607982, "grad_norm": 0.8496840000152588, "learning_rate": 2e-06, "loss": 0.1688, "step": 3347 }, { "epoch": 1.2487877657590452, "grad_norm": 0.7822213172912598, "learning_rate": 2e-06, "loss": 0.2007, "step": 3348 }, { "epoch": 1.249160760910108, "grad_norm": 0.7038050889968872, "learning_rate": 2e-06, "loss": 0.1665, "step": 3349 }, { "epoch": 1.2495337560611712, "grad_norm": 0.9303511381149292, "learning_rate": 2e-06, "loss": 0.1746, "step": 3350 }, { "epoch": 1.2499067512122342, "grad_norm": 0.8511055707931519, "learning_rate": 2e-06, "loss": 0.1808, "step": 3351 }, { "epoch": 1.2502797463632973, "grad_norm": 0.775958240032196, "learning_rate": 2e-06, "loss": 0.1747, "step": 3352 }, { "epoch": 1.2506527415143602, "grad_norm": 0.8463741540908813, "learning_rate": 2e-06, "loss": 0.1975, "step": 3353 }, { "epoch": 1.2510257366654234, "grad_norm": 0.835712730884552, "learning_rate": 2e-06, "loss": 0.1616, "step": 3354 }, { "epoch": 1.2513987318164863, "grad_norm": 0.8376092910766602, "learning_rate": 2e-06, "loss": 0.1733, "step": 3355 }, { "epoch": 1.2517717269675495, "grad_norm": 0.9930102825164795, "learning_rate": 2e-06, "loss": 0.1589, "step": 3356 }, { "epoch": 1.2521447221186124, "grad_norm": 1.1112449169158936, "learning_rate": 2e-06, "loss": 0.1613, "step": 3357 }, { "epoch": 1.2525177172696755, "grad_norm": 0.742405116558075, "learning_rate": 2e-06, "loss": 0.1661, "step": 3358 }, { "epoch": 1.2528907124207385, "grad_norm": 0.8182538747787476, "learning_rate": 2e-06, "loss": 0.1612, "step": 3359 }, { "epoch": 1.2532637075718016, "grad_norm": 0.9043644070625305, "learning_rate": 2e-06, "loss": 0.1838, "step": 3360 }, { "epoch": 1.2536367027228645, "grad_norm": 1.141879677772522, "learning_rate": 2e-06, "loss": 0.1462, "step": 3361 }, { "epoch": 1.2540096978739277, "grad_norm": 1.0150796175003052, "learning_rate": 2e-06, "loss": 0.1886, "step": 3362 }, { "epoch": 1.2543826930249906, "grad_norm": 0.8159539103507996, "learning_rate": 2e-06, "loss": 0.1863, "step": 3363 }, { "epoch": 1.2547556881760538, "grad_norm": 0.6446197628974915, "learning_rate": 2e-06, "loss": 0.1834, "step": 3364 }, { "epoch": 1.2551286833271167, "grad_norm": 0.8473682999610901, "learning_rate": 2e-06, "loss": 0.1912, "step": 3365 }, { "epoch": 1.2555016784781798, "grad_norm": 0.7837570309638977, "learning_rate": 2e-06, "loss": 0.199, "step": 3366 }, { "epoch": 1.2558746736292428, "grad_norm": 0.860008955001831, "learning_rate": 2e-06, "loss": 0.1754, "step": 3367 }, { "epoch": 1.256247668780306, "grad_norm": 0.8537551164627075, "learning_rate": 2e-06, "loss": 0.1942, "step": 3368 }, { "epoch": 1.2566206639313688, "grad_norm": 0.8908722996711731, "learning_rate": 2e-06, "loss": 0.1822, "step": 3369 }, { "epoch": 1.256993659082432, "grad_norm": 0.7877961993217468, "learning_rate": 2e-06, "loss": 0.1989, "step": 3370 }, { "epoch": 1.257366654233495, "grad_norm": 0.8129847645759583, "learning_rate": 2e-06, "loss": 0.1891, "step": 3371 }, { "epoch": 1.257739649384558, "grad_norm": 0.7783628106117249, "learning_rate": 2e-06, "loss": 0.1816, "step": 3372 }, { "epoch": 1.258112644535621, "grad_norm": 0.8858956098556519, "learning_rate": 2e-06, "loss": 0.1844, "step": 3373 }, { "epoch": 1.2584856396866841, "grad_norm": 1.051157832145691, "learning_rate": 2e-06, "loss": 0.1723, "step": 3374 }, { "epoch": 1.258858634837747, "grad_norm": 0.7804939150810242, "learning_rate": 2e-06, "loss": 0.1879, "step": 3375 }, { "epoch": 1.2592316299888102, "grad_norm": 0.7052149772644043, "learning_rate": 2e-06, "loss": 0.1583, "step": 3376 }, { "epoch": 1.2596046251398731, "grad_norm": 0.7698361873626709, "learning_rate": 2e-06, "loss": 0.1615, "step": 3377 }, { "epoch": 1.2599776202909363, "grad_norm": 0.856197714805603, "learning_rate": 2e-06, "loss": 0.1962, "step": 3378 }, { "epoch": 1.2603506154419992, "grad_norm": 0.8717204928398132, "learning_rate": 2e-06, "loss": 0.1832, "step": 3379 }, { "epoch": 1.2607236105930624, "grad_norm": 0.9286772012710571, "learning_rate": 2e-06, "loss": 0.1889, "step": 3380 }, { "epoch": 1.2610966057441253, "grad_norm": 0.8275402188301086, "learning_rate": 2e-06, "loss": 0.2055, "step": 3381 }, { "epoch": 1.2614696008951882, "grad_norm": 0.9069740772247314, "learning_rate": 2e-06, "loss": 0.1666, "step": 3382 }, { "epoch": 1.2618425960462514, "grad_norm": 0.7774726152420044, "learning_rate": 2e-06, "loss": 0.1809, "step": 3383 }, { "epoch": 1.2622155911973145, "grad_norm": 0.8130296468734741, "learning_rate": 2e-06, "loss": 0.1811, "step": 3384 }, { "epoch": 1.2625885863483775, "grad_norm": 0.7389621734619141, "learning_rate": 2e-06, "loss": 0.2018, "step": 3385 }, { "epoch": 1.2629615814994404, "grad_norm": 0.9418674111366272, "learning_rate": 2e-06, "loss": 0.1803, "step": 3386 }, { "epoch": 1.2633345766505035, "grad_norm": 0.7591339945793152, "learning_rate": 2e-06, "loss": 0.1909, "step": 3387 }, { "epoch": 1.2637075718015667, "grad_norm": 0.8138677477836609, "learning_rate": 2e-06, "loss": 0.1797, "step": 3388 }, { "epoch": 1.2640805669526296, "grad_norm": 0.8125805258750916, "learning_rate": 2e-06, "loss": 0.1665, "step": 3389 }, { "epoch": 1.2644535621036925, "grad_norm": 0.7709164023399353, "learning_rate": 2e-06, "loss": 0.1671, "step": 3390 }, { "epoch": 1.2648265572547557, "grad_norm": 0.8257821798324585, "learning_rate": 2e-06, "loss": 0.1937, "step": 3391 }, { "epoch": 1.2651995524058188, "grad_norm": 0.8422971963882446, "learning_rate": 2e-06, "loss": 0.1989, "step": 3392 }, { "epoch": 1.2655725475568818, "grad_norm": 0.6928606033325195, "learning_rate": 2e-06, "loss": 0.1746, "step": 3393 }, { "epoch": 1.2659455427079447, "grad_norm": 0.9882359504699707, "learning_rate": 2e-06, "loss": 0.1862, "step": 3394 }, { "epoch": 1.2663185378590078, "grad_norm": 0.8621399998664856, "learning_rate": 2e-06, "loss": 0.1743, "step": 3395 }, { "epoch": 1.266691533010071, "grad_norm": 1.0959256887435913, "learning_rate": 2e-06, "loss": 0.1849, "step": 3396 }, { "epoch": 1.267064528161134, "grad_norm": 0.7865292429924011, "learning_rate": 2e-06, "loss": 0.1799, "step": 3397 }, { "epoch": 1.2674375233121968, "grad_norm": 0.8051555752754211, "learning_rate": 2e-06, "loss": 0.1864, "step": 3398 }, { "epoch": 1.26781051846326, "grad_norm": 0.9391381144523621, "learning_rate": 2e-06, "loss": 0.1665, "step": 3399 }, { "epoch": 1.2681835136143231, "grad_norm": 0.8075238466262817, "learning_rate": 2e-06, "loss": 0.1772, "step": 3400 }, { "epoch": 1.268556508765386, "grad_norm": 0.7545331716537476, "learning_rate": 2e-06, "loss": 0.201, "step": 3401 }, { "epoch": 1.268929503916449, "grad_norm": 1.0169298648834229, "learning_rate": 2e-06, "loss": 0.1961, "step": 3402 }, { "epoch": 1.2693024990675121, "grad_norm": 0.8848214149475098, "learning_rate": 2e-06, "loss": 0.1644, "step": 3403 }, { "epoch": 1.2696754942185753, "grad_norm": 0.8766170144081116, "learning_rate": 2e-06, "loss": 0.1618, "step": 3404 }, { "epoch": 1.2700484893696382, "grad_norm": 0.7571418881416321, "learning_rate": 2e-06, "loss": 0.2112, "step": 3405 }, { "epoch": 1.2704214845207011, "grad_norm": 0.7592628002166748, "learning_rate": 2e-06, "loss": 0.1948, "step": 3406 }, { "epoch": 1.2707944796717643, "grad_norm": 0.8979808688163757, "learning_rate": 2e-06, "loss": 0.1786, "step": 3407 }, { "epoch": 1.2711674748228274, "grad_norm": 0.6716671586036682, "learning_rate": 2e-06, "loss": 0.17, "step": 3408 }, { "epoch": 1.2715404699738904, "grad_norm": 0.8468878865242004, "learning_rate": 2e-06, "loss": 0.203, "step": 3409 }, { "epoch": 1.2719134651249533, "grad_norm": 0.7712104320526123, "learning_rate": 2e-06, "loss": 0.1905, "step": 3410 }, { "epoch": 1.2722864602760164, "grad_norm": 0.9257428050041199, "learning_rate": 2e-06, "loss": 0.1844, "step": 3411 }, { "epoch": 1.2726594554270794, "grad_norm": 0.6785974502563477, "learning_rate": 2e-06, "loss": 0.1914, "step": 3412 }, { "epoch": 1.2730324505781425, "grad_norm": 1.3537355661392212, "learning_rate": 2e-06, "loss": 0.1815, "step": 3413 }, { "epoch": 1.2734054457292054, "grad_norm": 0.8483142256736755, "learning_rate": 2e-06, "loss": 0.1768, "step": 3414 }, { "epoch": 1.2737784408802686, "grad_norm": 1.1422353982925415, "learning_rate": 2e-06, "loss": 0.1546, "step": 3415 }, { "epoch": 1.2741514360313315, "grad_norm": 0.9172865152359009, "learning_rate": 2e-06, "loss": 0.1744, "step": 3416 }, { "epoch": 1.2745244311823947, "grad_norm": 0.7021533250808716, "learning_rate": 2e-06, "loss": 0.1613, "step": 3417 }, { "epoch": 1.2748974263334576, "grad_norm": 0.8259556293487549, "learning_rate": 2e-06, "loss": 0.1717, "step": 3418 }, { "epoch": 1.2752704214845207, "grad_norm": 0.8163936138153076, "learning_rate": 2e-06, "loss": 0.1762, "step": 3419 }, { "epoch": 1.2756434166355837, "grad_norm": 0.9422831535339355, "learning_rate": 2e-06, "loss": 0.173, "step": 3420 }, { "epoch": 1.2760164117866468, "grad_norm": 0.9047982692718506, "learning_rate": 2e-06, "loss": 0.1398, "step": 3421 }, { "epoch": 1.2763894069377097, "grad_norm": 0.8642675280570984, "learning_rate": 2e-06, "loss": 0.1919, "step": 3422 }, { "epoch": 1.276762402088773, "grad_norm": 0.9346157908439636, "learning_rate": 2e-06, "loss": 0.1839, "step": 3423 }, { "epoch": 1.2771353972398358, "grad_norm": 1.0517826080322266, "learning_rate": 2e-06, "loss": 0.1504, "step": 3424 }, { "epoch": 1.277508392390899, "grad_norm": 0.8306368589401245, "learning_rate": 2e-06, "loss": 0.1839, "step": 3425 }, { "epoch": 1.277881387541962, "grad_norm": 0.7983290553092957, "learning_rate": 2e-06, "loss": 0.1762, "step": 3426 }, { "epoch": 1.278254382693025, "grad_norm": 0.9148134589195251, "learning_rate": 2e-06, "loss": 0.1868, "step": 3427 }, { "epoch": 1.278627377844088, "grad_norm": 0.8771006464958191, "learning_rate": 2e-06, "loss": 0.183, "step": 3428 }, { "epoch": 1.2790003729951511, "grad_norm": 0.8368268609046936, "learning_rate": 2e-06, "loss": 0.1773, "step": 3429 }, { "epoch": 1.279373368146214, "grad_norm": 0.739120602607727, "learning_rate": 2e-06, "loss": 0.1769, "step": 3430 }, { "epoch": 1.2797463632972772, "grad_norm": 0.7802889943122864, "learning_rate": 2e-06, "loss": 0.1903, "step": 3431 }, { "epoch": 1.2801193584483401, "grad_norm": 0.7751269340515137, "learning_rate": 2e-06, "loss": 0.1626, "step": 3432 }, { "epoch": 1.2804923535994033, "grad_norm": 0.8479204773902893, "learning_rate": 2e-06, "loss": 0.1707, "step": 3433 }, { "epoch": 1.2808653487504662, "grad_norm": 0.7793521285057068, "learning_rate": 2e-06, "loss": 0.1784, "step": 3434 }, { "epoch": 1.2812383439015294, "grad_norm": 0.9618846774101257, "learning_rate": 2e-06, "loss": 0.1799, "step": 3435 }, { "epoch": 1.2816113390525923, "grad_norm": 0.851000964641571, "learning_rate": 2e-06, "loss": 0.1816, "step": 3436 }, { "epoch": 1.2819843342036554, "grad_norm": 0.8636474609375, "learning_rate": 2e-06, "loss": 0.1705, "step": 3437 }, { "epoch": 1.2823573293547184, "grad_norm": 0.8866831660270691, "learning_rate": 2e-06, "loss": 0.1706, "step": 3438 }, { "epoch": 1.2827303245057815, "grad_norm": 0.7788911461830139, "learning_rate": 2e-06, "loss": 0.1625, "step": 3439 }, { "epoch": 1.2831033196568444, "grad_norm": 0.8367660641670227, "learning_rate": 2e-06, "loss": 0.1798, "step": 3440 }, { "epoch": 1.2834763148079076, "grad_norm": 0.8873689770698547, "learning_rate": 2e-06, "loss": 0.156, "step": 3441 }, { "epoch": 1.2838493099589705, "grad_norm": 0.8883787989616394, "learning_rate": 2e-06, "loss": 0.1868, "step": 3442 }, { "epoch": 1.2842223051100334, "grad_norm": 0.9331953525543213, "learning_rate": 2e-06, "loss": 0.156, "step": 3443 }, { "epoch": 1.2845953002610966, "grad_norm": 0.8173520565032959, "learning_rate": 2e-06, "loss": 0.1537, "step": 3444 }, { "epoch": 1.2849682954121597, "grad_norm": 0.9498346447944641, "learning_rate": 2e-06, "loss": 0.1806, "step": 3445 }, { "epoch": 1.2853412905632227, "grad_norm": 0.895688533782959, "learning_rate": 2e-06, "loss": 0.1623, "step": 3446 }, { "epoch": 1.2857142857142856, "grad_norm": 0.8428869843482971, "learning_rate": 2e-06, "loss": 0.1719, "step": 3447 }, { "epoch": 1.2860872808653487, "grad_norm": 0.7438954710960388, "learning_rate": 2e-06, "loss": 0.1907, "step": 3448 }, { "epoch": 1.2864602760164119, "grad_norm": 0.7870915532112122, "learning_rate": 2e-06, "loss": 0.178, "step": 3449 }, { "epoch": 1.2868332711674748, "grad_norm": 0.8029397130012512, "learning_rate": 2e-06, "loss": 0.1936, "step": 3450 }, { "epoch": 1.2872062663185377, "grad_norm": 0.7314515709877014, "learning_rate": 2e-06, "loss": 0.1884, "step": 3451 }, { "epoch": 1.2875792614696009, "grad_norm": 0.8736901879310608, "learning_rate": 2e-06, "loss": 0.1544, "step": 3452 }, { "epoch": 1.287952256620664, "grad_norm": 0.7890917658805847, "learning_rate": 2e-06, "loss": 0.2086, "step": 3453 }, { "epoch": 1.288325251771727, "grad_norm": 0.7943508625030518, "learning_rate": 2e-06, "loss": 0.2101, "step": 3454 }, { "epoch": 1.28869824692279, "grad_norm": 0.8769956827163696, "learning_rate": 2e-06, "loss": 0.1624, "step": 3455 }, { "epoch": 1.289071242073853, "grad_norm": 0.7464186549186707, "learning_rate": 2e-06, "loss": 0.1895, "step": 3456 }, { "epoch": 1.2894442372249162, "grad_norm": 0.9821365475654602, "learning_rate": 2e-06, "loss": 0.1554, "step": 3457 }, { "epoch": 1.2898172323759791, "grad_norm": 0.8359647989273071, "learning_rate": 2e-06, "loss": 0.1744, "step": 3458 }, { "epoch": 1.290190227527042, "grad_norm": 0.9072916507720947, "learning_rate": 2e-06, "loss": 0.141, "step": 3459 }, { "epoch": 1.2905632226781052, "grad_norm": 0.8552671670913696, "learning_rate": 2e-06, "loss": 0.1806, "step": 3460 }, { "epoch": 1.2909362178291683, "grad_norm": 0.712040364742279, "learning_rate": 2e-06, "loss": 0.1927, "step": 3461 }, { "epoch": 1.2913092129802313, "grad_norm": 0.8424603939056396, "learning_rate": 2e-06, "loss": 0.1906, "step": 3462 }, { "epoch": 1.2916822081312942, "grad_norm": 0.8206614255905151, "learning_rate": 2e-06, "loss": 0.1851, "step": 3463 }, { "epoch": 1.2920552032823573, "grad_norm": 0.8678895235061646, "learning_rate": 2e-06, "loss": 0.1733, "step": 3464 }, { "epoch": 1.2924281984334205, "grad_norm": 1.121751070022583, "learning_rate": 2e-06, "loss": 0.1707, "step": 3465 }, { "epoch": 1.2928011935844834, "grad_norm": 0.761107325553894, "learning_rate": 2e-06, "loss": 0.2014, "step": 3466 }, { "epoch": 1.2931741887355463, "grad_norm": 0.7471256256103516, "learning_rate": 2e-06, "loss": 0.1714, "step": 3467 }, { "epoch": 1.2935471838866095, "grad_norm": 0.7299144864082336, "learning_rate": 2e-06, "loss": 0.1989, "step": 3468 }, { "epoch": 1.2939201790376726, "grad_norm": 0.7292885184288025, "learning_rate": 2e-06, "loss": 0.1725, "step": 3469 }, { "epoch": 1.2942931741887356, "grad_norm": 0.865257203578949, "learning_rate": 2e-06, "loss": 0.1694, "step": 3470 }, { "epoch": 1.2946661693397985, "grad_norm": 0.8350855112075806, "learning_rate": 2e-06, "loss": 0.2165, "step": 3471 }, { "epoch": 1.2950391644908616, "grad_norm": 0.9833308458328247, "learning_rate": 2e-06, "loss": 0.191, "step": 3472 }, { "epoch": 1.2954121596419246, "grad_norm": 0.7581614255905151, "learning_rate": 2e-06, "loss": 0.1889, "step": 3473 }, { "epoch": 1.2957851547929877, "grad_norm": 0.7036195993423462, "learning_rate": 2e-06, "loss": 0.17, "step": 3474 }, { "epoch": 1.2961581499440507, "grad_norm": 0.9631215333938599, "learning_rate": 2e-06, "loss": 0.1921, "step": 3475 }, { "epoch": 1.2965311450951138, "grad_norm": 0.7833698391914368, "learning_rate": 2e-06, "loss": 0.1653, "step": 3476 }, { "epoch": 1.2969041402461767, "grad_norm": 0.9034780859947205, "learning_rate": 2e-06, "loss": 0.1727, "step": 3477 }, { "epoch": 1.2972771353972399, "grad_norm": 0.8868849873542786, "learning_rate": 2e-06, "loss": 0.1877, "step": 3478 }, { "epoch": 1.2976501305483028, "grad_norm": 0.9908497333526611, "learning_rate": 2e-06, "loss": 0.1727, "step": 3479 }, { "epoch": 1.298023125699366, "grad_norm": 1.0780117511749268, "learning_rate": 2e-06, "loss": 0.1824, "step": 3480 }, { "epoch": 1.2983961208504289, "grad_norm": 0.7969884872436523, "learning_rate": 2e-06, "loss": 0.1961, "step": 3481 }, { "epoch": 1.298769116001492, "grad_norm": 0.7080938220024109, "learning_rate": 2e-06, "loss": 0.1844, "step": 3482 }, { "epoch": 1.299142111152555, "grad_norm": 0.74416583776474, "learning_rate": 2e-06, "loss": 0.1956, "step": 3483 }, { "epoch": 1.299515106303618, "grad_norm": 0.7142170071601868, "learning_rate": 2e-06, "loss": 0.1575, "step": 3484 }, { "epoch": 1.299888101454681, "grad_norm": 0.8319236636161804, "learning_rate": 2e-06, "loss": 0.1815, "step": 3485 }, { "epoch": 1.3002610966057442, "grad_norm": 0.7043223977088928, "learning_rate": 2e-06, "loss": 0.1902, "step": 3486 }, { "epoch": 1.300634091756807, "grad_norm": 0.9965785145759583, "learning_rate": 2e-06, "loss": 0.182, "step": 3487 }, { "epoch": 1.3010070869078703, "grad_norm": 0.9984187483787537, "learning_rate": 2e-06, "loss": 0.177, "step": 3488 }, { "epoch": 1.3013800820589332, "grad_norm": 0.8230312466621399, "learning_rate": 2e-06, "loss": 0.1851, "step": 3489 }, { "epoch": 1.3017530772099963, "grad_norm": 0.9267519116401672, "learning_rate": 2e-06, "loss": 0.1511, "step": 3490 }, { "epoch": 1.3021260723610593, "grad_norm": 1.0271918773651123, "learning_rate": 2e-06, "loss": 0.1864, "step": 3491 }, { "epoch": 1.3024990675121224, "grad_norm": 0.7048060894012451, "learning_rate": 2e-06, "loss": 0.1758, "step": 3492 }, { "epoch": 1.3028720626631853, "grad_norm": 0.8692246675491333, "learning_rate": 2e-06, "loss": 0.1936, "step": 3493 }, { "epoch": 1.3032450578142485, "grad_norm": 0.9325323104858398, "learning_rate": 2e-06, "loss": 0.1974, "step": 3494 }, { "epoch": 1.3036180529653114, "grad_norm": 0.8448071479797363, "learning_rate": 2e-06, "loss": 0.1977, "step": 3495 }, { "epoch": 1.3039910481163746, "grad_norm": 0.9959546327590942, "learning_rate": 2e-06, "loss": 0.1649, "step": 3496 }, { "epoch": 1.3043640432674375, "grad_norm": 0.9607266187667847, "learning_rate": 2e-06, "loss": 0.1682, "step": 3497 }, { "epoch": 1.3047370384185006, "grad_norm": 0.8607829809188843, "learning_rate": 2e-06, "loss": 0.1858, "step": 3498 }, { "epoch": 1.3051100335695636, "grad_norm": 0.745607852935791, "learning_rate": 2e-06, "loss": 0.1922, "step": 3499 }, { "epoch": 1.3054830287206267, "grad_norm": 0.9684203863143921, "learning_rate": 2e-06, "loss": 0.1847, "step": 3500 }, { "epoch": 1.3058560238716896, "grad_norm": 0.8838106393814087, "learning_rate": 2e-06, "loss": 0.1705, "step": 3501 }, { "epoch": 1.3062290190227528, "grad_norm": 0.6893669366836548, "learning_rate": 2e-06, "loss": 0.1854, "step": 3502 }, { "epoch": 1.3066020141738157, "grad_norm": 1.0511152744293213, "learning_rate": 2e-06, "loss": 0.1377, "step": 3503 }, { "epoch": 1.3069750093248786, "grad_norm": 0.9512085318565369, "learning_rate": 2e-06, "loss": 0.1607, "step": 3504 }, { "epoch": 1.3073480044759418, "grad_norm": 0.7935556769371033, "learning_rate": 2e-06, "loss": 0.1804, "step": 3505 }, { "epoch": 1.307720999627005, "grad_norm": 0.8509654402732849, "learning_rate": 2e-06, "loss": 0.1935, "step": 3506 }, { "epoch": 1.3080939947780679, "grad_norm": 0.7852665185928345, "learning_rate": 2e-06, "loss": 0.1956, "step": 3507 }, { "epoch": 1.3084669899291308, "grad_norm": 0.9303327798843384, "learning_rate": 2e-06, "loss": 0.1933, "step": 3508 }, { "epoch": 1.308839985080194, "grad_norm": 0.826799213886261, "learning_rate": 2e-06, "loss": 0.1857, "step": 3509 }, { "epoch": 1.309212980231257, "grad_norm": 0.9075019955635071, "learning_rate": 2e-06, "loss": 0.1621, "step": 3510 }, { "epoch": 1.30958597538232, "grad_norm": 0.799278736114502, "learning_rate": 2e-06, "loss": 0.1763, "step": 3511 }, { "epoch": 1.309958970533383, "grad_norm": 0.8598150610923767, "learning_rate": 2e-06, "loss": 0.1953, "step": 3512 }, { "epoch": 1.310331965684446, "grad_norm": 0.8468312621116638, "learning_rate": 2e-06, "loss": 0.1801, "step": 3513 }, { "epoch": 1.3107049608355092, "grad_norm": 0.6998907923698425, "learning_rate": 2e-06, "loss": 0.204, "step": 3514 }, { "epoch": 1.3110779559865722, "grad_norm": 0.8227024078369141, "learning_rate": 2e-06, "loss": 0.1816, "step": 3515 }, { "epoch": 1.311450951137635, "grad_norm": 0.9913482666015625, "learning_rate": 2e-06, "loss": 0.1653, "step": 3516 }, { "epoch": 1.3118239462886983, "grad_norm": 0.943264901638031, "learning_rate": 2e-06, "loss": 0.1543, "step": 3517 }, { "epoch": 1.3121969414397614, "grad_norm": 0.7550935745239258, "learning_rate": 2e-06, "loss": 0.1902, "step": 3518 }, { "epoch": 1.3125699365908243, "grad_norm": 0.8443924784660339, "learning_rate": 2e-06, "loss": 0.2, "step": 3519 }, { "epoch": 1.3129429317418873, "grad_norm": 0.9058012962341309, "learning_rate": 2e-06, "loss": 0.1988, "step": 3520 }, { "epoch": 1.3133159268929504, "grad_norm": 0.7314608097076416, "learning_rate": 2e-06, "loss": 0.1784, "step": 3521 }, { "epoch": 1.3136889220440136, "grad_norm": 0.7242250442504883, "learning_rate": 2e-06, "loss": 0.2013, "step": 3522 }, { "epoch": 1.3140619171950765, "grad_norm": 0.8896464109420776, "learning_rate": 2e-06, "loss": 0.1736, "step": 3523 }, { "epoch": 1.3144349123461394, "grad_norm": 0.8654773831367493, "learning_rate": 2e-06, "loss": 0.1667, "step": 3524 }, { "epoch": 1.3148079074972026, "grad_norm": 0.7222042083740234, "learning_rate": 2e-06, "loss": 0.1894, "step": 3525 }, { "epoch": 1.3151809026482657, "grad_norm": 0.8195365071296692, "learning_rate": 2e-06, "loss": 0.1891, "step": 3526 }, { "epoch": 1.3155538977993286, "grad_norm": 0.8395869135856628, "learning_rate": 2e-06, "loss": 0.1459, "step": 3527 }, { "epoch": 1.3159268929503916, "grad_norm": 0.9338215589523315, "learning_rate": 2e-06, "loss": 0.167, "step": 3528 }, { "epoch": 1.3162998881014547, "grad_norm": 0.7787503600120544, "learning_rate": 2e-06, "loss": 0.1633, "step": 3529 }, { "epoch": 1.3166728832525176, "grad_norm": 0.8153784871101379, "learning_rate": 2e-06, "loss": 0.187, "step": 3530 }, { "epoch": 1.3170458784035808, "grad_norm": 0.8160140514373779, "learning_rate": 2e-06, "loss": 0.1581, "step": 3531 }, { "epoch": 1.3174188735546437, "grad_norm": 0.8012742400169373, "learning_rate": 2e-06, "loss": 0.1751, "step": 3532 }, { "epoch": 1.3177918687057069, "grad_norm": 0.8687873482704163, "learning_rate": 2e-06, "loss": 0.167, "step": 3533 }, { "epoch": 1.3181648638567698, "grad_norm": 0.7522039413452148, "learning_rate": 2e-06, "loss": 0.1851, "step": 3534 }, { "epoch": 1.318537859007833, "grad_norm": 0.7726649641990662, "learning_rate": 2e-06, "loss": 0.1729, "step": 3535 }, { "epoch": 1.3189108541588959, "grad_norm": 1.0126386880874634, "learning_rate": 2e-06, "loss": 0.1626, "step": 3536 }, { "epoch": 1.319283849309959, "grad_norm": 0.7855116128921509, "learning_rate": 2e-06, "loss": 0.1712, "step": 3537 }, { "epoch": 1.319656844461022, "grad_norm": 1.0064393281936646, "learning_rate": 2e-06, "loss": 0.202, "step": 3538 }, { "epoch": 1.320029839612085, "grad_norm": 0.7562961578369141, "learning_rate": 2e-06, "loss": 0.16, "step": 3539 }, { "epoch": 1.320402834763148, "grad_norm": 1.415057897567749, "learning_rate": 2e-06, "loss": 0.2099, "step": 3540 }, { "epoch": 1.3207758299142112, "grad_norm": 0.8418998122215271, "learning_rate": 2e-06, "loss": 0.1682, "step": 3541 }, { "epoch": 1.321148825065274, "grad_norm": 0.8778200149536133, "learning_rate": 2e-06, "loss": 0.1845, "step": 3542 }, { "epoch": 1.3215218202163372, "grad_norm": 0.9023861885070801, "learning_rate": 2e-06, "loss": 0.1657, "step": 3543 }, { "epoch": 1.3218948153674002, "grad_norm": 0.8259907960891724, "learning_rate": 2e-06, "loss": 0.1752, "step": 3544 }, { "epoch": 1.3222678105184633, "grad_norm": 0.8332602381706238, "learning_rate": 2e-06, "loss": 0.1871, "step": 3545 }, { "epoch": 1.3226408056695262, "grad_norm": 0.867527186870575, "learning_rate": 2e-06, "loss": 0.1719, "step": 3546 }, { "epoch": 1.3230138008205894, "grad_norm": 0.8810713291168213, "learning_rate": 2e-06, "loss": 0.1792, "step": 3547 }, { "epoch": 1.3233867959716523, "grad_norm": 0.8919878005981445, "learning_rate": 2e-06, "loss": 0.1628, "step": 3548 }, { "epoch": 1.3237597911227155, "grad_norm": 0.7543186545372009, "learning_rate": 2e-06, "loss": 0.1772, "step": 3549 }, { "epoch": 1.3241327862737784, "grad_norm": 0.9065512418746948, "learning_rate": 2e-06, "loss": 0.1626, "step": 3550 }, { "epoch": 1.3245057814248415, "grad_norm": 0.7425785064697266, "learning_rate": 2e-06, "loss": 0.1752, "step": 3551 }, { "epoch": 1.3248787765759045, "grad_norm": 0.8500096797943115, "learning_rate": 2e-06, "loss": 0.1716, "step": 3552 }, { "epoch": 1.3252517717269676, "grad_norm": 0.8537177443504333, "learning_rate": 2e-06, "loss": 0.2047, "step": 3553 }, { "epoch": 1.3256247668780305, "grad_norm": 0.8357781171798706, "learning_rate": 2e-06, "loss": 0.1539, "step": 3554 }, { "epoch": 1.3259977620290937, "grad_norm": 0.6988581418991089, "learning_rate": 2e-06, "loss": 0.2113, "step": 3555 }, { "epoch": 1.3263707571801566, "grad_norm": 0.8920429348945618, "learning_rate": 2e-06, "loss": 0.2028, "step": 3556 }, { "epoch": 1.3267437523312198, "grad_norm": 0.7355238795280457, "learning_rate": 2e-06, "loss": 0.1684, "step": 3557 }, { "epoch": 1.3271167474822827, "grad_norm": 0.8334603309631348, "learning_rate": 2e-06, "loss": 0.1895, "step": 3558 }, { "epoch": 1.3274897426333458, "grad_norm": 0.934355616569519, "learning_rate": 2e-06, "loss": 0.1987, "step": 3559 }, { "epoch": 1.3278627377844088, "grad_norm": 0.8834189176559448, "learning_rate": 2e-06, "loss": 0.1969, "step": 3560 }, { "epoch": 1.3282357329354717, "grad_norm": 0.8306138515472412, "learning_rate": 2e-06, "loss": 0.1676, "step": 3561 }, { "epoch": 1.3286087280865349, "grad_norm": 0.7662971615791321, "learning_rate": 2e-06, "loss": 0.1859, "step": 3562 }, { "epoch": 1.328981723237598, "grad_norm": 0.7877843976020813, "learning_rate": 2e-06, "loss": 0.1863, "step": 3563 }, { "epoch": 1.329354718388661, "grad_norm": 0.7044568657875061, "learning_rate": 2e-06, "loss": 0.1853, "step": 3564 }, { "epoch": 1.3297277135397239, "grad_norm": 0.9124874472618103, "learning_rate": 2e-06, "loss": 0.1692, "step": 3565 }, { "epoch": 1.330100708690787, "grad_norm": 0.8338447213172913, "learning_rate": 2e-06, "loss": 0.1856, "step": 3566 }, { "epoch": 1.3304737038418502, "grad_norm": 0.8886219263076782, "learning_rate": 2e-06, "loss": 0.1897, "step": 3567 }, { "epoch": 1.330846698992913, "grad_norm": 0.8743810057640076, "learning_rate": 2e-06, "loss": 0.1707, "step": 3568 }, { "epoch": 1.331219694143976, "grad_norm": 0.9743001461029053, "learning_rate": 2e-06, "loss": 0.1792, "step": 3569 }, { "epoch": 1.3315926892950392, "grad_norm": 0.7047078609466553, "learning_rate": 2e-06, "loss": 0.1485, "step": 3570 }, { "epoch": 1.3319656844461023, "grad_norm": 0.8155364990234375, "learning_rate": 2e-06, "loss": 0.1868, "step": 3571 }, { "epoch": 1.3323386795971652, "grad_norm": 0.8437638282775879, "learning_rate": 2e-06, "loss": 0.1938, "step": 3572 }, { "epoch": 1.3327116747482282, "grad_norm": 0.8208735585212708, "learning_rate": 2e-06, "loss": 0.1758, "step": 3573 }, { "epoch": 1.3330846698992913, "grad_norm": 0.8507697582244873, "learning_rate": 2e-06, "loss": 0.1872, "step": 3574 }, { "epoch": 1.3334576650503545, "grad_norm": 1.0221673250198364, "learning_rate": 2e-06, "loss": 0.1811, "step": 3575 }, { "epoch": 1.3338306602014174, "grad_norm": 1.0259143114089966, "learning_rate": 2e-06, "loss": 0.1552, "step": 3576 }, { "epoch": 1.3342036553524803, "grad_norm": 0.84647136926651, "learning_rate": 2e-06, "loss": 0.1657, "step": 3577 }, { "epoch": 1.3345766505035435, "grad_norm": 0.7799012064933777, "learning_rate": 2e-06, "loss": 0.1667, "step": 3578 }, { "epoch": 1.3349496456546066, "grad_norm": 0.783950924873352, "learning_rate": 2e-06, "loss": 0.179, "step": 3579 }, { "epoch": 1.3353226408056695, "grad_norm": 0.8423864245414734, "learning_rate": 2e-06, "loss": 0.1635, "step": 3580 }, { "epoch": 1.3356956359567325, "grad_norm": 0.9163655638694763, "learning_rate": 2e-06, "loss": 0.1675, "step": 3581 }, { "epoch": 1.3360686311077956, "grad_norm": 0.9183643460273743, "learning_rate": 2e-06, "loss": 0.1816, "step": 3582 }, { "epoch": 1.3364416262588588, "grad_norm": 0.7788419723510742, "learning_rate": 2e-06, "loss": 0.1685, "step": 3583 }, { "epoch": 1.3368146214099217, "grad_norm": 0.766430675983429, "learning_rate": 2e-06, "loss": 0.1901, "step": 3584 }, { "epoch": 1.3371876165609846, "grad_norm": 0.8182829022407532, "learning_rate": 2e-06, "loss": 0.1761, "step": 3585 }, { "epoch": 1.3375606117120478, "grad_norm": 1.0208874940872192, "learning_rate": 2e-06, "loss": 0.1947, "step": 3586 }, { "epoch": 1.337933606863111, "grad_norm": 0.6351725459098816, "learning_rate": 2e-06, "loss": 0.1826, "step": 3587 }, { "epoch": 1.3383066020141738, "grad_norm": 0.8203393220901489, "learning_rate": 2e-06, "loss": 0.1675, "step": 3588 }, { "epoch": 1.3386795971652368, "grad_norm": 0.9045948386192322, "learning_rate": 2e-06, "loss": 0.146, "step": 3589 }, { "epoch": 1.3390525923163, "grad_norm": 0.9547637701034546, "learning_rate": 2e-06, "loss": 0.1901, "step": 3590 }, { "epoch": 1.3394255874673628, "grad_norm": 0.7877739071846008, "learning_rate": 2e-06, "loss": 0.1727, "step": 3591 }, { "epoch": 1.339798582618426, "grad_norm": 0.7385690212249756, "learning_rate": 2e-06, "loss": 0.181, "step": 3592 }, { "epoch": 1.340171577769489, "grad_norm": 0.9216082692146301, "learning_rate": 2e-06, "loss": 0.1833, "step": 3593 }, { "epoch": 1.340544572920552, "grad_norm": 0.8181357979774475, "learning_rate": 2e-06, "loss": 0.1669, "step": 3594 }, { "epoch": 1.340917568071615, "grad_norm": 0.8494221568107605, "learning_rate": 2e-06, "loss": 0.1739, "step": 3595 }, { "epoch": 1.3412905632226781, "grad_norm": 0.7377349734306335, "learning_rate": 2e-06, "loss": 0.1831, "step": 3596 }, { "epoch": 1.341663558373741, "grad_norm": 0.8541255593299866, "learning_rate": 2e-06, "loss": 0.1939, "step": 3597 }, { "epoch": 1.3420365535248042, "grad_norm": 0.9828417301177979, "learning_rate": 2e-06, "loss": 0.1809, "step": 3598 }, { "epoch": 1.3424095486758671, "grad_norm": 0.927667498588562, "learning_rate": 2e-06, "loss": 0.1747, "step": 3599 }, { "epoch": 1.3427825438269303, "grad_norm": 0.7784784436225891, "learning_rate": 2e-06, "loss": 0.2118, "step": 3600 }, { "epoch": 1.3431555389779932, "grad_norm": 0.8601499199867249, "learning_rate": 2e-06, "loss": 0.1803, "step": 3601 }, { "epoch": 1.3435285341290564, "grad_norm": 0.8997236490249634, "learning_rate": 2e-06, "loss": 0.1517, "step": 3602 }, { "epoch": 1.3439015292801193, "grad_norm": 0.8633137345314026, "learning_rate": 2e-06, "loss": 0.1862, "step": 3603 }, { "epoch": 1.3442745244311824, "grad_norm": 0.7448244094848633, "learning_rate": 2e-06, "loss": 0.1734, "step": 3604 }, { "epoch": 1.3446475195822454, "grad_norm": 0.9602162837982178, "learning_rate": 2e-06, "loss": 0.1777, "step": 3605 }, { "epoch": 1.3450205147333085, "grad_norm": 0.8684641718864441, "learning_rate": 2e-06, "loss": 0.1792, "step": 3606 }, { "epoch": 1.3453935098843715, "grad_norm": 0.8100408911705017, "learning_rate": 2e-06, "loss": 0.1687, "step": 3607 }, { "epoch": 1.3457665050354346, "grad_norm": 1.104630947113037, "learning_rate": 2e-06, "loss": 0.1531, "step": 3608 }, { "epoch": 1.3461395001864975, "grad_norm": 0.8846316933631897, "learning_rate": 2e-06, "loss": 0.1543, "step": 3609 }, { "epoch": 1.3465124953375607, "grad_norm": 0.7638189196586609, "learning_rate": 2e-06, "loss": 0.1796, "step": 3610 }, { "epoch": 1.3468854904886236, "grad_norm": 0.7917189598083496, "learning_rate": 2e-06, "loss": 0.1809, "step": 3611 }, { "epoch": 1.3472584856396868, "grad_norm": 0.8159979581832886, "learning_rate": 2e-06, "loss": 0.1549, "step": 3612 }, { "epoch": 1.3476314807907497, "grad_norm": 0.9030905365943909, "learning_rate": 2e-06, "loss": 0.1727, "step": 3613 }, { "epoch": 1.3480044759418128, "grad_norm": 1.038041353225708, "learning_rate": 2e-06, "loss": 0.1844, "step": 3614 }, { "epoch": 1.3483774710928758, "grad_norm": 1.303641676902771, "learning_rate": 2e-06, "loss": 0.1732, "step": 3615 }, { "epoch": 1.348750466243939, "grad_norm": 0.7515785098075867, "learning_rate": 2e-06, "loss": 0.19, "step": 3616 }, { "epoch": 1.3491234613950018, "grad_norm": 0.9691701531410217, "learning_rate": 2e-06, "loss": 0.1956, "step": 3617 }, { "epoch": 1.349496456546065, "grad_norm": 0.9112219214439392, "learning_rate": 2e-06, "loss": 0.2046, "step": 3618 }, { "epoch": 1.349869451697128, "grad_norm": 0.884212851524353, "learning_rate": 2e-06, "loss": 0.1818, "step": 3619 }, { "epoch": 1.350242446848191, "grad_norm": 0.9163956046104431, "learning_rate": 2e-06, "loss": 0.1742, "step": 3620 }, { "epoch": 1.350615441999254, "grad_norm": 0.8489210605621338, "learning_rate": 2e-06, "loss": 0.1669, "step": 3621 }, { "epoch": 1.350988437150317, "grad_norm": 0.8145354986190796, "learning_rate": 2e-06, "loss": 0.161, "step": 3622 }, { "epoch": 1.35136143230138, "grad_norm": 0.8844558596611023, "learning_rate": 2e-06, "loss": 0.1696, "step": 3623 }, { "epoch": 1.3517344274524432, "grad_norm": 0.7853000164031982, "learning_rate": 2e-06, "loss": 0.2181, "step": 3624 }, { "epoch": 1.3521074226035061, "grad_norm": 0.9659788608551025, "learning_rate": 2e-06, "loss": 0.1348, "step": 3625 }, { "epoch": 1.352480417754569, "grad_norm": 0.9042392373085022, "learning_rate": 2e-06, "loss": 0.1674, "step": 3626 }, { "epoch": 1.3528534129056322, "grad_norm": 0.8739671111106873, "learning_rate": 2e-06, "loss": 0.1797, "step": 3627 }, { "epoch": 1.3532264080566954, "grad_norm": 0.7573285698890686, "learning_rate": 2e-06, "loss": 0.1971, "step": 3628 }, { "epoch": 1.3535994032077583, "grad_norm": 1.0241622924804688, "learning_rate": 2e-06, "loss": 0.1851, "step": 3629 }, { "epoch": 1.3539723983588212, "grad_norm": 0.993787944316864, "learning_rate": 2e-06, "loss": 0.1831, "step": 3630 }, { "epoch": 1.3543453935098844, "grad_norm": 0.8161888718605042, "learning_rate": 2e-06, "loss": 0.2129, "step": 3631 }, { "epoch": 1.3547183886609475, "grad_norm": 0.6883386969566345, "learning_rate": 2e-06, "loss": 0.1978, "step": 3632 }, { "epoch": 1.3550913838120104, "grad_norm": 1.069414496421814, "learning_rate": 2e-06, "loss": 0.168, "step": 3633 }, { "epoch": 1.3554643789630734, "grad_norm": 1.0125114917755127, "learning_rate": 2e-06, "loss": 0.147, "step": 3634 }, { "epoch": 1.3558373741141365, "grad_norm": 0.9145538806915283, "learning_rate": 2e-06, "loss": 0.1778, "step": 3635 }, { "epoch": 1.3562103692651997, "grad_norm": 0.7186193466186523, "learning_rate": 2e-06, "loss": 0.1714, "step": 3636 }, { "epoch": 1.3565833644162626, "grad_norm": 0.865889310836792, "learning_rate": 2e-06, "loss": 0.1729, "step": 3637 }, { "epoch": 1.3569563595673255, "grad_norm": 0.8581032156944275, "learning_rate": 2e-06, "loss": 0.1735, "step": 3638 }, { "epoch": 1.3573293547183887, "grad_norm": 0.8382797241210938, "learning_rate": 2e-06, "loss": 0.2026, "step": 3639 }, { "epoch": 1.3577023498694518, "grad_norm": 0.7909567356109619, "learning_rate": 2e-06, "loss": 0.1734, "step": 3640 }, { "epoch": 1.3580753450205147, "grad_norm": 0.7466737627983093, "learning_rate": 2e-06, "loss": 0.2037, "step": 3641 }, { "epoch": 1.3584483401715777, "grad_norm": 0.8211172819137573, "learning_rate": 2e-06, "loss": 0.1697, "step": 3642 }, { "epoch": 1.3588213353226408, "grad_norm": 0.8499763607978821, "learning_rate": 2e-06, "loss": 0.2066, "step": 3643 }, { "epoch": 1.359194330473704, "grad_norm": 0.9824777841567993, "learning_rate": 2e-06, "loss": 0.1867, "step": 3644 }, { "epoch": 1.359567325624767, "grad_norm": 0.861191987991333, "learning_rate": 2e-06, "loss": 0.1761, "step": 3645 }, { "epoch": 1.3599403207758298, "grad_norm": 0.7015323042869568, "learning_rate": 2e-06, "loss": 0.1711, "step": 3646 }, { "epoch": 1.360313315926893, "grad_norm": 0.820705235004425, "learning_rate": 2e-06, "loss": 0.1902, "step": 3647 }, { "epoch": 1.3606863110779561, "grad_norm": 0.7300550937652588, "learning_rate": 2e-06, "loss": 0.1773, "step": 3648 }, { "epoch": 1.361059306229019, "grad_norm": 0.8403828144073486, "learning_rate": 2e-06, "loss": 0.1958, "step": 3649 }, { "epoch": 1.361432301380082, "grad_norm": 0.8235494494438171, "learning_rate": 2e-06, "loss": 0.1658, "step": 3650 }, { "epoch": 1.3618052965311451, "grad_norm": 0.745837390422821, "learning_rate": 2e-06, "loss": 0.1695, "step": 3651 }, { "epoch": 1.362178291682208, "grad_norm": 0.8523180484771729, "learning_rate": 2e-06, "loss": 0.1738, "step": 3652 }, { "epoch": 1.3625512868332712, "grad_norm": 0.6496727466583252, "learning_rate": 2e-06, "loss": 0.1982, "step": 3653 }, { "epoch": 1.3629242819843341, "grad_norm": 0.865291953086853, "learning_rate": 2e-06, "loss": 0.1754, "step": 3654 }, { "epoch": 1.3632972771353973, "grad_norm": 0.7384949326515198, "learning_rate": 2e-06, "loss": 0.2045, "step": 3655 }, { "epoch": 1.3636702722864602, "grad_norm": 0.7269544005393982, "learning_rate": 2e-06, "loss": 0.1894, "step": 3656 }, { "epoch": 1.3640432674375234, "grad_norm": 0.9611575603485107, "learning_rate": 2e-06, "loss": 0.1603, "step": 3657 }, { "epoch": 1.3644162625885863, "grad_norm": 0.7415220737457275, "learning_rate": 2e-06, "loss": 0.1735, "step": 3658 }, { "epoch": 1.3647892577396494, "grad_norm": 0.7607932686805725, "learning_rate": 2e-06, "loss": 0.199, "step": 3659 }, { "epoch": 1.3651622528907124, "grad_norm": 0.878135621547699, "learning_rate": 2e-06, "loss": 0.1979, "step": 3660 }, { "epoch": 1.3655352480417755, "grad_norm": 1.0799223184585571, "learning_rate": 2e-06, "loss": 0.1668, "step": 3661 }, { "epoch": 1.3659082431928384, "grad_norm": 0.7996242046356201, "learning_rate": 2e-06, "loss": 0.1848, "step": 3662 }, { "epoch": 1.3662812383439016, "grad_norm": 0.8289181590080261, "learning_rate": 2e-06, "loss": 0.1653, "step": 3663 }, { "epoch": 1.3666542334949645, "grad_norm": 0.7862618565559387, "learning_rate": 2e-06, "loss": 0.2119, "step": 3664 }, { "epoch": 1.3670272286460277, "grad_norm": 1.074906587600708, "learning_rate": 2e-06, "loss": 0.1571, "step": 3665 }, { "epoch": 1.3674002237970906, "grad_norm": 0.8772356510162354, "learning_rate": 2e-06, "loss": 0.2046, "step": 3666 }, { "epoch": 1.3677732189481537, "grad_norm": 0.8373510241508484, "learning_rate": 2e-06, "loss": 0.17, "step": 3667 }, { "epoch": 1.3681462140992167, "grad_norm": 0.7692822217941284, "learning_rate": 2e-06, "loss": 0.18, "step": 3668 }, { "epoch": 1.3685192092502798, "grad_norm": 0.8668972849845886, "learning_rate": 2e-06, "loss": 0.1655, "step": 3669 }, { "epoch": 1.3688922044013427, "grad_norm": 0.8166212439537048, "learning_rate": 2e-06, "loss": 0.1998, "step": 3670 }, { "epoch": 1.3692651995524059, "grad_norm": 0.7678532004356384, "learning_rate": 2e-06, "loss": 0.1891, "step": 3671 }, { "epoch": 1.3696381947034688, "grad_norm": 0.8810176849365234, "learning_rate": 2e-06, "loss": 0.1883, "step": 3672 }, { "epoch": 1.370011189854532, "grad_norm": 0.8852528929710388, "learning_rate": 2e-06, "loss": 0.1698, "step": 3673 }, { "epoch": 1.370384185005595, "grad_norm": 0.9653005599975586, "learning_rate": 2e-06, "loss": 0.1608, "step": 3674 }, { "epoch": 1.370757180156658, "grad_norm": 1.2038050889968872, "learning_rate": 2e-06, "loss": 0.1618, "step": 3675 }, { "epoch": 1.371130175307721, "grad_norm": 0.9375373721122742, "learning_rate": 2e-06, "loss": 0.1842, "step": 3676 }, { "epoch": 1.3715031704587841, "grad_norm": 0.8090606331825256, "learning_rate": 2e-06, "loss": 0.1793, "step": 3677 }, { "epoch": 1.371876165609847, "grad_norm": 0.880561113357544, "learning_rate": 2e-06, "loss": 0.1718, "step": 3678 }, { "epoch": 1.37224916076091, "grad_norm": 0.7231031656265259, "learning_rate": 2e-06, "loss": 0.1865, "step": 3679 }, { "epoch": 1.3726221559119731, "grad_norm": 0.9513649344444275, "learning_rate": 2e-06, "loss": 0.1615, "step": 3680 }, { "epoch": 1.3729951510630363, "grad_norm": 1.0555766820907593, "learning_rate": 2e-06, "loss": 0.1579, "step": 3681 }, { "epoch": 1.3733681462140992, "grad_norm": 0.6832377910614014, "learning_rate": 2e-06, "loss": 0.1828, "step": 3682 }, { "epoch": 1.3737411413651621, "grad_norm": 1.1560895442962646, "learning_rate": 2e-06, "loss": 0.1624, "step": 3683 }, { "epoch": 1.3741141365162253, "grad_norm": 0.7961918115615845, "learning_rate": 2e-06, "loss": 0.1879, "step": 3684 }, { "epoch": 1.3744871316672884, "grad_norm": 0.8071333169937134, "learning_rate": 2e-06, "loss": 0.1719, "step": 3685 }, { "epoch": 1.3748601268183513, "grad_norm": 0.7563462257385254, "learning_rate": 2e-06, "loss": 0.1969, "step": 3686 }, { "epoch": 1.3752331219694143, "grad_norm": 0.9253326654434204, "learning_rate": 2e-06, "loss": 0.1698, "step": 3687 }, { "epoch": 1.3756061171204774, "grad_norm": 1.0324550867080688, "learning_rate": 2e-06, "loss": 0.1842, "step": 3688 }, { "epoch": 1.3759791122715406, "grad_norm": 0.8233676552772522, "learning_rate": 2e-06, "loss": 0.1824, "step": 3689 }, { "epoch": 1.3763521074226035, "grad_norm": 0.7004298567771912, "learning_rate": 2e-06, "loss": 0.1937, "step": 3690 }, { "epoch": 1.3767251025736664, "grad_norm": 0.7760279178619385, "learning_rate": 2e-06, "loss": 0.1673, "step": 3691 }, { "epoch": 1.3770980977247296, "grad_norm": 0.9002978801727295, "learning_rate": 2e-06, "loss": 0.1868, "step": 3692 }, { "epoch": 1.3774710928757927, "grad_norm": 0.8597623705863953, "learning_rate": 2e-06, "loss": 0.1674, "step": 3693 }, { "epoch": 1.3778440880268557, "grad_norm": 0.8717635869979858, "learning_rate": 2e-06, "loss": 0.172, "step": 3694 }, { "epoch": 1.3782170831779186, "grad_norm": 1.0002869367599487, "learning_rate": 2e-06, "loss": 0.1684, "step": 3695 }, { "epoch": 1.3785900783289817, "grad_norm": 0.7346344590187073, "learning_rate": 2e-06, "loss": 0.1675, "step": 3696 }, { "epoch": 1.3789630734800449, "grad_norm": 0.9241524934768677, "learning_rate": 2e-06, "loss": 0.1434, "step": 3697 }, { "epoch": 1.3793360686311078, "grad_norm": 0.8445773720741272, "learning_rate": 2e-06, "loss": 0.1638, "step": 3698 }, { "epoch": 1.3797090637821707, "grad_norm": 0.753909170627594, "learning_rate": 2e-06, "loss": 0.1807, "step": 3699 }, { "epoch": 1.3800820589332339, "grad_norm": 0.7493666410446167, "learning_rate": 2e-06, "loss": 0.2013, "step": 3700 }, { "epoch": 1.380455054084297, "grad_norm": 0.9014124274253845, "learning_rate": 2e-06, "loss": 0.1592, "step": 3701 }, { "epoch": 1.38082804923536, "grad_norm": 1.009493350982666, "learning_rate": 2e-06, "loss": 0.1974, "step": 3702 }, { "epoch": 1.3812010443864229, "grad_norm": 0.7277168035507202, "learning_rate": 2e-06, "loss": 0.1861, "step": 3703 }, { "epoch": 1.381574039537486, "grad_norm": 0.8998321890830994, "learning_rate": 2e-06, "loss": 0.1837, "step": 3704 }, { "epoch": 1.3819470346885492, "grad_norm": 0.7127776741981506, "learning_rate": 2e-06, "loss": 0.169, "step": 3705 }, { "epoch": 1.382320029839612, "grad_norm": 0.6813253164291382, "learning_rate": 2e-06, "loss": 0.176, "step": 3706 }, { "epoch": 1.382693024990675, "grad_norm": 0.9557228088378906, "learning_rate": 2e-06, "loss": 0.1727, "step": 3707 }, { "epoch": 1.3830660201417382, "grad_norm": 0.8107898831367493, "learning_rate": 2e-06, "loss": 0.1628, "step": 3708 }, { "epoch": 1.383439015292801, "grad_norm": 0.6788238883018494, "learning_rate": 2e-06, "loss": 0.1778, "step": 3709 }, { "epoch": 1.3838120104438643, "grad_norm": 1.0632338523864746, "learning_rate": 2e-06, "loss": 0.1865, "step": 3710 }, { "epoch": 1.3841850055949272, "grad_norm": 0.8087816834449768, "learning_rate": 2e-06, "loss": 0.1927, "step": 3711 }, { "epoch": 1.3845580007459903, "grad_norm": 0.908806562423706, "learning_rate": 2e-06, "loss": 0.1695, "step": 3712 }, { "epoch": 1.3849309958970533, "grad_norm": 0.7336817979812622, "learning_rate": 2e-06, "loss": 0.2078, "step": 3713 }, { "epoch": 1.3853039910481164, "grad_norm": 0.7544472813606262, "learning_rate": 2e-06, "loss": 0.1808, "step": 3714 }, { "epoch": 1.3856769861991793, "grad_norm": 1.0805692672729492, "learning_rate": 2e-06, "loss": 0.1606, "step": 3715 }, { "epoch": 1.3860499813502425, "grad_norm": 0.8819043040275574, "learning_rate": 2e-06, "loss": 0.1758, "step": 3716 }, { "epoch": 1.3864229765013054, "grad_norm": 0.8924500942230225, "learning_rate": 2e-06, "loss": 0.1905, "step": 3717 }, { "epoch": 1.3867959716523686, "grad_norm": 0.9361057877540588, "learning_rate": 2e-06, "loss": 0.1708, "step": 3718 }, { "epoch": 1.3871689668034315, "grad_norm": 0.7389687299728394, "learning_rate": 2e-06, "loss": 0.1592, "step": 3719 }, { "epoch": 1.3875419619544946, "grad_norm": 0.9742639064788818, "learning_rate": 2e-06, "loss": 0.1623, "step": 3720 }, { "epoch": 1.3879149571055576, "grad_norm": 0.8276338577270508, "learning_rate": 2e-06, "loss": 0.1836, "step": 3721 }, { "epoch": 1.3882879522566207, "grad_norm": 0.6912877559661865, "learning_rate": 2e-06, "loss": 0.2043, "step": 3722 }, { "epoch": 1.3886609474076836, "grad_norm": 0.800980269908905, "learning_rate": 2e-06, "loss": 0.1578, "step": 3723 }, { "epoch": 1.3890339425587468, "grad_norm": 0.9739058017730713, "learning_rate": 2e-06, "loss": 0.1518, "step": 3724 }, { "epoch": 1.3894069377098097, "grad_norm": 0.7171704769134521, "learning_rate": 2e-06, "loss": 0.1873, "step": 3725 }, { "epoch": 1.3897799328608729, "grad_norm": 0.8861685395240784, "learning_rate": 2e-06, "loss": 0.1712, "step": 3726 }, { "epoch": 1.3901529280119358, "grad_norm": 0.9731402397155762, "learning_rate": 2e-06, "loss": 0.1414, "step": 3727 }, { "epoch": 1.390525923162999, "grad_norm": 0.8577961325645447, "learning_rate": 2e-06, "loss": 0.1581, "step": 3728 }, { "epoch": 1.3908989183140619, "grad_norm": 0.9071046710014343, "learning_rate": 2e-06, "loss": 0.2025, "step": 3729 }, { "epoch": 1.391271913465125, "grad_norm": 0.819230318069458, "learning_rate": 2e-06, "loss": 0.1892, "step": 3730 }, { "epoch": 1.391644908616188, "grad_norm": 0.7738454937934875, "learning_rate": 2e-06, "loss": 0.208, "step": 3731 }, { "epoch": 1.392017903767251, "grad_norm": 1.0370961427688599, "learning_rate": 2e-06, "loss": 0.2243, "step": 3732 }, { "epoch": 1.392390898918314, "grad_norm": 0.9288608431816101, "learning_rate": 2e-06, "loss": 0.1804, "step": 3733 }, { "epoch": 1.3927638940693772, "grad_norm": 0.8036839365959167, "learning_rate": 2e-06, "loss": 0.1708, "step": 3734 }, { "epoch": 1.39313688922044, "grad_norm": 0.9567200541496277, "learning_rate": 2e-06, "loss": 0.1709, "step": 3735 }, { "epoch": 1.3935098843715032, "grad_norm": 0.7619872093200684, "learning_rate": 2e-06, "loss": 0.1868, "step": 3736 }, { "epoch": 1.3938828795225662, "grad_norm": 0.8426878452301025, "learning_rate": 2e-06, "loss": 0.1786, "step": 3737 }, { "epoch": 1.3942558746736293, "grad_norm": 0.8029159903526306, "learning_rate": 2e-06, "loss": 0.1689, "step": 3738 }, { "epoch": 1.3946288698246923, "grad_norm": 0.6768768429756165, "learning_rate": 2e-06, "loss": 0.203, "step": 3739 }, { "epoch": 1.3950018649757552, "grad_norm": 0.7981117367744446, "learning_rate": 2e-06, "loss": 0.1763, "step": 3740 }, { "epoch": 1.3953748601268183, "grad_norm": 0.7936888933181763, "learning_rate": 2e-06, "loss": 0.2003, "step": 3741 }, { "epoch": 1.3957478552778815, "grad_norm": 0.7479488253593445, "learning_rate": 2e-06, "loss": 0.1828, "step": 3742 }, { "epoch": 1.3961208504289444, "grad_norm": 0.6944711208343506, "learning_rate": 2e-06, "loss": 0.1847, "step": 3743 }, { "epoch": 1.3964938455800073, "grad_norm": 0.7770182490348816, "learning_rate": 2e-06, "loss": 0.195, "step": 3744 }, { "epoch": 1.3968668407310705, "grad_norm": 0.8097798824310303, "learning_rate": 2e-06, "loss": 0.1955, "step": 3745 }, { "epoch": 1.3972398358821336, "grad_norm": 0.9461820721626282, "learning_rate": 2e-06, "loss": 0.1881, "step": 3746 }, { "epoch": 1.3976128310331966, "grad_norm": 0.8137440085411072, "learning_rate": 2e-06, "loss": 0.2015, "step": 3747 }, { "epoch": 1.3979858261842595, "grad_norm": 0.7917315363883972, "learning_rate": 2e-06, "loss": 0.1834, "step": 3748 }, { "epoch": 1.3983588213353226, "grad_norm": 0.8651593327522278, "learning_rate": 2e-06, "loss": 0.1756, "step": 3749 }, { "epoch": 1.3987318164863858, "grad_norm": 0.970923125743866, "learning_rate": 2e-06, "loss": 0.1797, "step": 3750 }, { "epoch": 1.3991048116374487, "grad_norm": 0.8567001819610596, "learning_rate": 2e-06, "loss": 0.1655, "step": 3751 }, { "epoch": 1.3994778067885116, "grad_norm": 0.8745978474617004, "learning_rate": 2e-06, "loss": 0.2025, "step": 3752 }, { "epoch": 1.3998508019395748, "grad_norm": 0.7860786318778992, "learning_rate": 2e-06, "loss": 0.1896, "step": 3753 }, { "epoch": 1.400223797090638, "grad_norm": 0.7396131753921509, "learning_rate": 2e-06, "loss": 0.1841, "step": 3754 }, { "epoch": 1.4005967922417009, "grad_norm": 0.8019807934761047, "learning_rate": 2e-06, "loss": 0.197, "step": 3755 }, { "epoch": 1.4009697873927638, "grad_norm": 0.7490811944007874, "learning_rate": 2e-06, "loss": 0.144, "step": 3756 }, { "epoch": 1.401342782543827, "grad_norm": 0.755095362663269, "learning_rate": 2e-06, "loss": 0.1764, "step": 3757 }, { "epoch": 1.40171577769489, "grad_norm": 0.7235062122344971, "learning_rate": 2e-06, "loss": 0.1791, "step": 3758 }, { "epoch": 1.402088772845953, "grad_norm": 1.0550817251205444, "learning_rate": 2e-06, "loss": 0.2079, "step": 3759 }, { "epoch": 1.402461767997016, "grad_norm": 0.8237030506134033, "learning_rate": 2e-06, "loss": 0.2092, "step": 3760 }, { "epoch": 1.402834763148079, "grad_norm": 1.004006028175354, "learning_rate": 2e-06, "loss": 0.1603, "step": 3761 }, { "epoch": 1.4032077582991422, "grad_norm": 1.0104196071624756, "learning_rate": 2e-06, "loss": 0.1783, "step": 3762 }, { "epoch": 1.4035807534502052, "grad_norm": 0.9355546236038208, "learning_rate": 2e-06, "loss": 0.1886, "step": 3763 }, { "epoch": 1.403953748601268, "grad_norm": 0.8509006500244141, "learning_rate": 2e-06, "loss": 0.1904, "step": 3764 }, { "epoch": 1.4043267437523312, "grad_norm": 0.8506998419761658, "learning_rate": 2e-06, "loss": 0.1715, "step": 3765 }, { "epoch": 1.4046997389033944, "grad_norm": 0.855426549911499, "learning_rate": 2e-06, "loss": 0.1803, "step": 3766 }, { "epoch": 1.4050727340544573, "grad_norm": 0.8464958071708679, "learning_rate": 2e-06, "loss": 0.1771, "step": 3767 }, { "epoch": 1.4054457292055202, "grad_norm": 0.8016109466552734, "learning_rate": 2e-06, "loss": 0.164, "step": 3768 }, { "epoch": 1.4058187243565834, "grad_norm": 1.1009093523025513, "learning_rate": 2e-06, "loss": 0.1687, "step": 3769 }, { "epoch": 1.4061917195076463, "grad_norm": 0.8189394474029541, "learning_rate": 2e-06, "loss": 0.184, "step": 3770 }, { "epoch": 1.4065647146587095, "grad_norm": 0.8824810981750488, "learning_rate": 2e-06, "loss": 0.1771, "step": 3771 }, { "epoch": 1.4069377098097724, "grad_norm": 0.8448038101196289, "learning_rate": 2e-06, "loss": 0.1723, "step": 3772 }, { "epoch": 1.4073107049608355, "grad_norm": 0.8465287685394287, "learning_rate": 2e-06, "loss": 0.1558, "step": 3773 }, { "epoch": 1.4076837001118985, "grad_norm": 0.8679412603378296, "learning_rate": 2e-06, "loss": 0.1821, "step": 3774 }, { "epoch": 1.4080566952629616, "grad_norm": 0.8062983751296997, "learning_rate": 2e-06, "loss": 0.1732, "step": 3775 }, { "epoch": 1.4084296904140245, "grad_norm": 0.8888713121414185, "learning_rate": 2e-06, "loss": 0.1777, "step": 3776 }, { "epoch": 1.4088026855650877, "grad_norm": 0.7728244662284851, "learning_rate": 2e-06, "loss": 0.1767, "step": 3777 }, { "epoch": 1.4091756807161506, "grad_norm": 0.8725257515907288, "learning_rate": 2e-06, "loss": 0.164, "step": 3778 }, { "epoch": 1.4095486758672138, "grad_norm": 0.7870349884033203, "learning_rate": 2e-06, "loss": 0.168, "step": 3779 }, { "epoch": 1.4099216710182767, "grad_norm": 0.9979579448699951, "learning_rate": 2e-06, "loss": 0.1932, "step": 3780 }, { "epoch": 1.4102946661693399, "grad_norm": 1.3414082527160645, "learning_rate": 2e-06, "loss": 0.2001, "step": 3781 }, { "epoch": 1.4106676613204028, "grad_norm": 0.8538371920585632, "learning_rate": 2e-06, "loss": 0.1726, "step": 3782 }, { "epoch": 1.411040656471466, "grad_norm": 0.7830291390419006, "learning_rate": 2e-06, "loss": 0.2083, "step": 3783 }, { "epoch": 1.4114136516225289, "grad_norm": 0.7482149004936218, "learning_rate": 2e-06, "loss": 0.1637, "step": 3784 }, { "epoch": 1.411786646773592, "grad_norm": 0.8938924670219421, "learning_rate": 2e-06, "loss": 0.1709, "step": 3785 }, { "epoch": 1.412159641924655, "grad_norm": 0.7335385084152222, "learning_rate": 2e-06, "loss": 0.1615, "step": 3786 }, { "epoch": 1.412532637075718, "grad_norm": 0.7507263422012329, "learning_rate": 2e-06, "loss": 0.2044, "step": 3787 }, { "epoch": 1.412905632226781, "grad_norm": 0.7612239718437195, "learning_rate": 2e-06, "loss": 0.1733, "step": 3788 }, { "epoch": 1.4132786273778442, "grad_norm": 0.8683601021766663, "learning_rate": 2e-06, "loss": 0.1655, "step": 3789 }, { "epoch": 1.413651622528907, "grad_norm": 0.806210994720459, "learning_rate": 2e-06, "loss": 0.1644, "step": 3790 }, { "epoch": 1.4140246176799702, "grad_norm": 0.9020691514015198, "learning_rate": 2e-06, "loss": 0.2074, "step": 3791 }, { "epoch": 1.4143976128310332, "grad_norm": 0.9259414076805115, "learning_rate": 2e-06, "loss": 0.1577, "step": 3792 }, { "epoch": 1.4147706079820963, "grad_norm": 1.0188504457473755, "learning_rate": 2e-06, "loss": 0.1513, "step": 3793 }, { "epoch": 1.4151436031331592, "grad_norm": 0.7866448760032654, "learning_rate": 2e-06, "loss": 0.1984, "step": 3794 }, { "epoch": 1.4155165982842224, "grad_norm": 0.8792333602905273, "learning_rate": 2e-06, "loss": 0.1583, "step": 3795 }, { "epoch": 1.4158895934352853, "grad_norm": 1.0018999576568604, "learning_rate": 2e-06, "loss": 0.2088, "step": 3796 }, { "epoch": 1.4162625885863485, "grad_norm": 1.0706758499145508, "learning_rate": 2e-06, "loss": 0.1916, "step": 3797 }, { "epoch": 1.4166355837374114, "grad_norm": 0.787064790725708, "learning_rate": 2e-06, "loss": 0.1692, "step": 3798 }, { "epoch": 1.4170085788884745, "grad_norm": 0.8370335102081299, "learning_rate": 2e-06, "loss": 0.1753, "step": 3799 }, { "epoch": 1.4173815740395375, "grad_norm": 0.8079814314842224, "learning_rate": 2e-06, "loss": 0.1764, "step": 3800 }, { "epoch": 1.4177545691906004, "grad_norm": 0.8071802258491516, "learning_rate": 2e-06, "loss": 0.1521, "step": 3801 }, { "epoch": 1.4181275643416635, "grad_norm": 0.8619352579116821, "learning_rate": 2e-06, "loss": 0.1879, "step": 3802 }, { "epoch": 1.4185005594927267, "grad_norm": 0.914818286895752, "learning_rate": 2e-06, "loss": 0.17, "step": 3803 }, { "epoch": 1.4188735546437896, "grad_norm": 0.9357638955116272, "learning_rate": 2e-06, "loss": 0.1837, "step": 3804 }, { "epoch": 1.4192465497948525, "grad_norm": 0.9956328868865967, "learning_rate": 2e-06, "loss": 0.173, "step": 3805 }, { "epoch": 1.4196195449459157, "grad_norm": 0.8878787755966187, "learning_rate": 2e-06, "loss": 0.1972, "step": 3806 }, { "epoch": 1.4199925400969788, "grad_norm": 0.7047377228736877, "learning_rate": 2e-06, "loss": 0.1982, "step": 3807 }, { "epoch": 1.4203655352480418, "grad_norm": 0.9539629817008972, "learning_rate": 2e-06, "loss": 0.1682, "step": 3808 }, { "epoch": 1.4207385303991047, "grad_norm": 0.7749426960945129, "learning_rate": 2e-06, "loss": 0.1972, "step": 3809 }, { "epoch": 1.4211115255501678, "grad_norm": 0.7996075749397278, "learning_rate": 2e-06, "loss": 0.1893, "step": 3810 }, { "epoch": 1.421484520701231, "grad_norm": 0.7046074867248535, "learning_rate": 2e-06, "loss": 0.1914, "step": 3811 }, { "epoch": 1.421857515852294, "grad_norm": 0.9405723214149475, "learning_rate": 2e-06, "loss": 0.1837, "step": 3812 }, { "epoch": 1.4222305110033568, "grad_norm": 0.8071953058242798, "learning_rate": 2e-06, "loss": 0.1744, "step": 3813 }, { "epoch": 1.42260350615442, "grad_norm": 0.8008927702903748, "learning_rate": 2e-06, "loss": 0.1626, "step": 3814 }, { "epoch": 1.4229765013054831, "grad_norm": 1.2046319246292114, "learning_rate": 2e-06, "loss": 0.1667, "step": 3815 }, { "epoch": 1.423349496456546, "grad_norm": 1.1965245008468628, "learning_rate": 2e-06, "loss": 0.1756, "step": 3816 }, { "epoch": 1.423722491607609, "grad_norm": 0.9221085906028748, "learning_rate": 2e-06, "loss": 0.1851, "step": 3817 }, { "epoch": 1.4240954867586721, "grad_norm": 0.9758844375610352, "learning_rate": 2e-06, "loss": 0.1669, "step": 3818 }, { "epoch": 1.4244684819097353, "grad_norm": 0.820090115070343, "learning_rate": 2e-06, "loss": 0.1832, "step": 3819 }, { "epoch": 1.4248414770607982, "grad_norm": 0.7352648973464966, "learning_rate": 2e-06, "loss": 0.1886, "step": 3820 }, { "epoch": 1.4252144722118611, "grad_norm": 0.9645358324050903, "learning_rate": 2e-06, "loss": 0.1748, "step": 3821 }, { "epoch": 1.4255874673629243, "grad_norm": 1.0299363136291504, "learning_rate": 2e-06, "loss": 0.1727, "step": 3822 }, { "epoch": 1.4259604625139874, "grad_norm": 0.7815077900886536, "learning_rate": 2e-06, "loss": 0.187, "step": 3823 }, { "epoch": 1.4263334576650504, "grad_norm": 0.7824479341506958, "learning_rate": 2e-06, "loss": 0.1822, "step": 3824 }, { "epoch": 1.4267064528161133, "grad_norm": 0.7147390842437744, "learning_rate": 2e-06, "loss": 0.1588, "step": 3825 }, { "epoch": 1.4270794479671765, "grad_norm": 0.8747034072875977, "learning_rate": 2e-06, "loss": 0.1823, "step": 3826 }, { "epoch": 1.4274524431182396, "grad_norm": 0.8173452019691467, "learning_rate": 2e-06, "loss": 0.2201, "step": 3827 }, { "epoch": 1.4278254382693025, "grad_norm": 0.7967537641525269, "learning_rate": 2e-06, "loss": 0.1666, "step": 3828 }, { "epoch": 1.4281984334203655, "grad_norm": 0.8563060164451599, "learning_rate": 2e-06, "loss": 0.1881, "step": 3829 }, { "epoch": 1.4285714285714286, "grad_norm": 0.7474492192268372, "learning_rate": 2e-06, "loss": 0.2, "step": 3830 }, { "epoch": 1.4289444237224915, "grad_norm": 1.0357029438018799, "learning_rate": 2e-06, "loss": 0.1785, "step": 3831 }, { "epoch": 1.4293174188735547, "grad_norm": 0.8304354548454285, "learning_rate": 2e-06, "loss": 0.1917, "step": 3832 }, { "epoch": 1.4296904140246176, "grad_norm": 0.8925066590309143, "learning_rate": 2e-06, "loss": 0.1681, "step": 3833 }, { "epoch": 1.4300634091756808, "grad_norm": 0.8898158073425293, "learning_rate": 2e-06, "loss": 0.1897, "step": 3834 }, { "epoch": 1.4304364043267437, "grad_norm": 1.004048466682434, "learning_rate": 2e-06, "loss": 0.1916, "step": 3835 }, { "epoch": 1.4308093994778068, "grad_norm": 0.8916377425193787, "learning_rate": 2e-06, "loss": 0.1868, "step": 3836 }, { "epoch": 1.4311823946288698, "grad_norm": 0.7774838805198669, "learning_rate": 2e-06, "loss": 0.1896, "step": 3837 }, { "epoch": 1.431555389779933, "grad_norm": 0.8402687311172485, "learning_rate": 2e-06, "loss": 0.1906, "step": 3838 }, { "epoch": 1.4319283849309958, "grad_norm": 1.2297877073287964, "learning_rate": 2e-06, "loss": 0.1783, "step": 3839 }, { "epoch": 1.432301380082059, "grad_norm": 0.8302341103553772, "learning_rate": 2e-06, "loss": 0.1818, "step": 3840 }, { "epoch": 1.432674375233122, "grad_norm": 0.7464562058448792, "learning_rate": 2e-06, "loss": 0.1776, "step": 3841 }, { "epoch": 1.433047370384185, "grad_norm": 0.9157408475875854, "learning_rate": 2e-06, "loss": 0.1912, "step": 3842 }, { "epoch": 1.433420365535248, "grad_norm": 0.7400463819503784, "learning_rate": 2e-06, "loss": 0.1877, "step": 3843 }, { "epoch": 1.4337933606863111, "grad_norm": 0.8068944215774536, "learning_rate": 2e-06, "loss": 0.1778, "step": 3844 }, { "epoch": 1.434166355837374, "grad_norm": 0.8187158703804016, "learning_rate": 2e-06, "loss": 0.1716, "step": 3845 }, { "epoch": 1.4345393509884372, "grad_norm": 0.7823240756988525, "learning_rate": 2e-06, "loss": 0.1923, "step": 3846 }, { "epoch": 1.4349123461395001, "grad_norm": 0.8213873505592346, "learning_rate": 2e-06, "loss": 0.19, "step": 3847 }, { "epoch": 1.4352853412905633, "grad_norm": 0.8238137364387512, "learning_rate": 2e-06, "loss": 0.1682, "step": 3848 }, { "epoch": 1.4356583364416262, "grad_norm": 0.6565106511116028, "learning_rate": 2e-06, "loss": 0.1847, "step": 3849 }, { "epoch": 1.4360313315926894, "grad_norm": 0.8693360686302185, "learning_rate": 2e-06, "loss": 0.1636, "step": 3850 }, { "epoch": 1.4364043267437523, "grad_norm": 0.8093113303184509, "learning_rate": 2e-06, "loss": 0.1934, "step": 3851 }, { "epoch": 1.4367773218948154, "grad_norm": 0.7505038976669312, "learning_rate": 2e-06, "loss": 0.1926, "step": 3852 }, { "epoch": 1.4371503170458784, "grad_norm": 0.9500243067741394, "learning_rate": 2e-06, "loss": 0.1835, "step": 3853 }, { "epoch": 1.4375233121969415, "grad_norm": 0.9216253161430359, "learning_rate": 2e-06, "loss": 0.1929, "step": 3854 }, { "epoch": 1.4378963073480044, "grad_norm": 0.9808184504508972, "learning_rate": 2e-06, "loss": 0.2006, "step": 3855 }, { "epoch": 1.4382693024990676, "grad_norm": 0.9967320561408997, "learning_rate": 2e-06, "loss": 0.1508, "step": 3856 }, { "epoch": 1.4386422976501305, "grad_norm": 0.7585851550102234, "learning_rate": 2e-06, "loss": 0.1655, "step": 3857 }, { "epoch": 1.4390152928011934, "grad_norm": 0.7009420990943909, "learning_rate": 2e-06, "loss": 0.187, "step": 3858 }, { "epoch": 1.4393882879522566, "grad_norm": 0.9594032764434814, "learning_rate": 2e-06, "loss": 0.1817, "step": 3859 }, { "epoch": 1.4397612831033197, "grad_norm": 0.7856749892234802, "learning_rate": 2e-06, "loss": 0.1786, "step": 3860 }, { "epoch": 1.4401342782543827, "grad_norm": 0.8269216418266296, "learning_rate": 2e-06, "loss": 0.1913, "step": 3861 }, { "epoch": 1.4405072734054456, "grad_norm": 0.9013754725456238, "learning_rate": 2e-06, "loss": 0.183, "step": 3862 }, { "epoch": 1.4408802685565087, "grad_norm": 0.7746537923812866, "learning_rate": 2e-06, "loss": 0.1788, "step": 3863 }, { "epoch": 1.441253263707572, "grad_norm": 0.9738933444023132, "learning_rate": 2e-06, "loss": 0.1698, "step": 3864 }, { "epoch": 1.4416262588586348, "grad_norm": 0.8799697160720825, "learning_rate": 2e-06, "loss": 0.1864, "step": 3865 }, { "epoch": 1.4419992540096978, "grad_norm": 0.8527650833129883, "learning_rate": 2e-06, "loss": 0.1633, "step": 3866 }, { "epoch": 1.442372249160761, "grad_norm": 0.9145278334617615, "learning_rate": 2e-06, "loss": 0.1834, "step": 3867 }, { "epoch": 1.442745244311824, "grad_norm": 0.6877943873405457, "learning_rate": 2e-06, "loss": 0.1591, "step": 3868 }, { "epoch": 1.443118239462887, "grad_norm": 0.820213794708252, "learning_rate": 2e-06, "loss": 0.2053, "step": 3869 }, { "epoch": 1.44349123461395, "grad_norm": 0.8032492399215698, "learning_rate": 2e-06, "loss": 0.1771, "step": 3870 }, { "epoch": 1.443864229765013, "grad_norm": 0.7176924347877502, "learning_rate": 2e-06, "loss": 0.2065, "step": 3871 }, { "epoch": 1.4442372249160762, "grad_norm": 0.8132625222206116, "learning_rate": 2e-06, "loss": 0.2003, "step": 3872 }, { "epoch": 1.4446102200671391, "grad_norm": 0.8047323226928711, "learning_rate": 2e-06, "loss": 0.1885, "step": 3873 }, { "epoch": 1.444983215218202, "grad_norm": 0.7533175945281982, "learning_rate": 2e-06, "loss": 0.1802, "step": 3874 }, { "epoch": 1.4453562103692652, "grad_norm": 0.9265661835670471, "learning_rate": 2e-06, "loss": 0.1837, "step": 3875 }, { "epoch": 1.4457292055203284, "grad_norm": 0.7849175930023193, "learning_rate": 2e-06, "loss": 0.1971, "step": 3876 }, { "epoch": 1.4461022006713913, "grad_norm": 0.8622214794158936, "learning_rate": 2e-06, "loss": 0.1839, "step": 3877 }, { "epoch": 1.4464751958224542, "grad_norm": 1.00078547000885, "learning_rate": 2e-06, "loss": 0.176, "step": 3878 }, { "epoch": 1.4468481909735174, "grad_norm": 1.0023603439331055, "learning_rate": 2e-06, "loss": 0.1968, "step": 3879 }, { "epoch": 1.4472211861245805, "grad_norm": 0.8537371158599854, "learning_rate": 2e-06, "loss": 0.1548, "step": 3880 }, { "epoch": 1.4475941812756434, "grad_norm": 0.8781086802482605, "learning_rate": 2e-06, "loss": 0.1991, "step": 3881 }, { "epoch": 1.4479671764267064, "grad_norm": 0.9379209280014038, "learning_rate": 2e-06, "loss": 0.1876, "step": 3882 }, { "epoch": 1.4483401715777695, "grad_norm": 0.9999616742134094, "learning_rate": 2e-06, "loss": 0.181, "step": 3883 }, { "epoch": 1.4487131667288327, "grad_norm": 1.162238359451294, "learning_rate": 2e-06, "loss": 0.1722, "step": 3884 }, { "epoch": 1.4490861618798956, "grad_norm": 0.7548871040344238, "learning_rate": 2e-06, "loss": 0.2032, "step": 3885 }, { "epoch": 1.4494591570309585, "grad_norm": 0.8394784331321716, "learning_rate": 2e-06, "loss": 0.1565, "step": 3886 }, { "epoch": 1.4498321521820217, "grad_norm": 0.7330541014671326, "learning_rate": 2e-06, "loss": 0.2066, "step": 3887 }, { "epoch": 1.4502051473330846, "grad_norm": 0.9312681555747986, "learning_rate": 2e-06, "loss": 0.1699, "step": 3888 }, { "epoch": 1.4505781424841477, "grad_norm": 0.7832631468772888, "learning_rate": 2e-06, "loss": 0.2036, "step": 3889 }, { "epoch": 1.4509511376352107, "grad_norm": 0.8856799602508545, "learning_rate": 2e-06, "loss": 0.1733, "step": 3890 }, { "epoch": 1.4513241327862738, "grad_norm": 0.9878309965133667, "learning_rate": 2e-06, "loss": 0.1834, "step": 3891 }, { "epoch": 1.4516971279373367, "grad_norm": 1.0780059099197388, "learning_rate": 2e-06, "loss": 0.1655, "step": 3892 }, { "epoch": 1.4520701230883999, "grad_norm": 0.8261033892631531, "learning_rate": 2e-06, "loss": 0.1776, "step": 3893 }, { "epoch": 1.4524431182394628, "grad_norm": 0.9813472628593445, "learning_rate": 2e-06, "loss": 0.177, "step": 3894 }, { "epoch": 1.452816113390526, "grad_norm": 0.8057411313056946, "learning_rate": 2e-06, "loss": 0.1927, "step": 3895 }, { "epoch": 1.453189108541589, "grad_norm": 0.7946409583091736, "learning_rate": 2e-06, "loss": 0.1709, "step": 3896 }, { "epoch": 1.453562103692652, "grad_norm": 0.8440740704536438, "learning_rate": 2e-06, "loss": 0.1771, "step": 3897 }, { "epoch": 1.453935098843715, "grad_norm": 0.7834180593490601, "learning_rate": 2e-06, "loss": 0.1817, "step": 3898 }, { "epoch": 1.4543080939947781, "grad_norm": 0.8219806551933289, "learning_rate": 2e-06, "loss": 0.2098, "step": 3899 }, { "epoch": 1.454681089145841, "grad_norm": 0.9730623364448547, "learning_rate": 2e-06, "loss": 0.1735, "step": 3900 }, { "epoch": 1.4550540842969042, "grad_norm": 0.7731031775474548, "learning_rate": 2e-06, "loss": 0.1612, "step": 3901 }, { "epoch": 1.4554270794479671, "grad_norm": 0.8181484937667847, "learning_rate": 2e-06, "loss": 0.1859, "step": 3902 }, { "epoch": 1.4558000745990303, "grad_norm": 0.774776816368103, "learning_rate": 2e-06, "loss": 0.2003, "step": 3903 }, { "epoch": 1.4561730697500932, "grad_norm": 0.6859404444694519, "learning_rate": 2e-06, "loss": 0.1781, "step": 3904 }, { "epoch": 1.4565460649011563, "grad_norm": 0.9552941918373108, "learning_rate": 2e-06, "loss": 0.1802, "step": 3905 }, { "epoch": 1.4569190600522193, "grad_norm": 0.7942746877670288, "learning_rate": 2e-06, "loss": 0.2026, "step": 3906 }, { "epoch": 1.4572920552032824, "grad_norm": 0.9871114492416382, "learning_rate": 2e-06, "loss": 0.1812, "step": 3907 }, { "epoch": 1.4576650503543453, "grad_norm": 0.7825681567192078, "learning_rate": 2e-06, "loss": 0.1909, "step": 3908 }, { "epoch": 1.4580380455054085, "grad_norm": 0.8895496129989624, "learning_rate": 2e-06, "loss": 0.1875, "step": 3909 }, { "epoch": 1.4584110406564714, "grad_norm": 0.7843710780143738, "learning_rate": 2e-06, "loss": 0.1665, "step": 3910 }, { "epoch": 1.4587840358075346, "grad_norm": 0.9631704092025757, "learning_rate": 2e-06, "loss": 0.2054, "step": 3911 }, { "epoch": 1.4591570309585975, "grad_norm": 0.8838503956794739, "learning_rate": 2e-06, "loss": 0.1692, "step": 3912 }, { "epoch": 1.4595300261096606, "grad_norm": 0.9273983240127563, "learning_rate": 2e-06, "loss": 0.1719, "step": 3913 }, { "epoch": 1.4599030212607236, "grad_norm": 0.9254978895187378, "learning_rate": 2e-06, "loss": 0.1497, "step": 3914 }, { "epoch": 1.4602760164117867, "grad_norm": 0.97032630443573, "learning_rate": 2e-06, "loss": 0.1986, "step": 3915 }, { "epoch": 1.4606490115628497, "grad_norm": 1.044014573097229, "learning_rate": 2e-06, "loss": 0.1874, "step": 3916 }, { "epoch": 1.4610220067139128, "grad_norm": 0.9408986568450928, "learning_rate": 2e-06, "loss": 0.1929, "step": 3917 }, { "epoch": 1.4613950018649757, "grad_norm": 0.721001923084259, "learning_rate": 2e-06, "loss": 0.1845, "step": 3918 }, { "epoch": 1.4617679970160387, "grad_norm": 0.7584692239761353, "learning_rate": 2e-06, "loss": 0.193, "step": 3919 }, { "epoch": 1.4621409921671018, "grad_norm": 0.749744713306427, "learning_rate": 2e-06, "loss": 0.176, "step": 3920 }, { "epoch": 1.462513987318165, "grad_norm": 0.7661462426185608, "learning_rate": 2e-06, "loss": 0.1757, "step": 3921 }, { "epoch": 1.4628869824692279, "grad_norm": 0.7059104442596436, "learning_rate": 2e-06, "loss": 0.1989, "step": 3922 }, { "epoch": 1.4632599776202908, "grad_norm": 0.7255252003669739, "learning_rate": 2e-06, "loss": 0.1691, "step": 3923 }, { "epoch": 1.463632972771354, "grad_norm": 0.881889283657074, "learning_rate": 2e-06, "loss": 0.1698, "step": 3924 }, { "epoch": 1.464005967922417, "grad_norm": 0.9358631372451782, "learning_rate": 2e-06, "loss": 0.1574, "step": 3925 }, { "epoch": 1.46437896307348, "grad_norm": 0.7845851182937622, "learning_rate": 2e-06, "loss": 0.1754, "step": 3926 }, { "epoch": 1.464751958224543, "grad_norm": 0.7952301502227783, "learning_rate": 2e-06, "loss": 0.1655, "step": 3927 }, { "epoch": 1.465124953375606, "grad_norm": 0.7269449234008789, "learning_rate": 2e-06, "loss": 0.1844, "step": 3928 }, { "epoch": 1.4654979485266693, "grad_norm": 0.8285092115402222, "learning_rate": 2e-06, "loss": 0.1762, "step": 3929 }, { "epoch": 1.4658709436777322, "grad_norm": 0.8926289677619934, "learning_rate": 2e-06, "loss": 0.1479, "step": 3930 }, { "epoch": 1.4662439388287951, "grad_norm": 0.8856480121612549, "learning_rate": 2e-06, "loss": 0.1862, "step": 3931 }, { "epoch": 1.4666169339798583, "grad_norm": 0.8252087235450745, "learning_rate": 2e-06, "loss": 0.2119, "step": 3932 }, { "epoch": 1.4669899291309214, "grad_norm": 0.754766047000885, "learning_rate": 2e-06, "loss": 0.1737, "step": 3933 }, { "epoch": 1.4673629242819843, "grad_norm": 0.8606979250907898, "learning_rate": 2e-06, "loss": 0.1686, "step": 3934 }, { "epoch": 1.4677359194330473, "grad_norm": 0.7399778962135315, "learning_rate": 2e-06, "loss": 0.158, "step": 3935 }, { "epoch": 1.4681089145841104, "grad_norm": 0.7295520901679993, "learning_rate": 2e-06, "loss": 0.1698, "step": 3936 }, { "epoch": 1.4684819097351736, "grad_norm": 1.1000847816467285, "learning_rate": 2e-06, "loss": 0.1802, "step": 3937 }, { "epoch": 1.4688549048862365, "grad_norm": 0.8650122284889221, "learning_rate": 2e-06, "loss": 0.1866, "step": 3938 }, { "epoch": 1.4692279000372994, "grad_norm": 0.992974042892456, "learning_rate": 2e-06, "loss": 0.1827, "step": 3939 }, { "epoch": 1.4696008951883626, "grad_norm": 0.9215326905250549, "learning_rate": 2e-06, "loss": 0.1935, "step": 3940 }, { "epoch": 1.4699738903394257, "grad_norm": 0.8453366160392761, "learning_rate": 2e-06, "loss": 0.2006, "step": 3941 }, { "epoch": 1.4703468854904886, "grad_norm": 1.0125929117202759, "learning_rate": 2e-06, "loss": 0.1727, "step": 3942 }, { "epoch": 1.4707198806415516, "grad_norm": 1.2706208229064941, "learning_rate": 2e-06, "loss": 0.1468, "step": 3943 }, { "epoch": 1.4710928757926147, "grad_norm": 1.1614748239517212, "learning_rate": 2e-06, "loss": 0.1787, "step": 3944 }, { "epoch": 1.4714658709436779, "grad_norm": 0.7730237245559692, "learning_rate": 2e-06, "loss": 0.1857, "step": 3945 }, { "epoch": 1.4718388660947408, "grad_norm": 0.9535537958145142, "learning_rate": 2e-06, "loss": 0.1723, "step": 3946 }, { "epoch": 1.4722118612458037, "grad_norm": 0.7656475305557251, "learning_rate": 2e-06, "loss": 0.1805, "step": 3947 }, { "epoch": 1.4725848563968669, "grad_norm": 0.8452351689338684, "learning_rate": 2e-06, "loss": 0.1912, "step": 3948 }, { "epoch": 1.4729578515479298, "grad_norm": 0.899433970451355, "learning_rate": 2e-06, "loss": 0.1775, "step": 3949 }, { "epoch": 1.473330846698993, "grad_norm": 0.7769073247909546, "learning_rate": 2e-06, "loss": 0.1943, "step": 3950 }, { "epoch": 1.4737038418500559, "grad_norm": 0.7230724692344666, "learning_rate": 2e-06, "loss": 0.1949, "step": 3951 }, { "epoch": 1.474076837001119, "grad_norm": 0.7711920738220215, "learning_rate": 2e-06, "loss": 0.178, "step": 3952 }, { "epoch": 1.474449832152182, "grad_norm": 0.9296865463256836, "learning_rate": 2e-06, "loss": 0.1843, "step": 3953 }, { "epoch": 1.474822827303245, "grad_norm": 0.8454717993736267, "learning_rate": 2e-06, "loss": 0.1585, "step": 3954 }, { "epoch": 1.475195822454308, "grad_norm": 0.7405737638473511, "learning_rate": 2e-06, "loss": 0.1695, "step": 3955 }, { "epoch": 1.4755688176053712, "grad_norm": 1.0568963289260864, "learning_rate": 2e-06, "loss": 0.1627, "step": 3956 }, { "epoch": 1.475941812756434, "grad_norm": 0.9673308730125427, "learning_rate": 2e-06, "loss": 0.1623, "step": 3957 }, { "epoch": 1.4763148079074973, "grad_norm": 0.8402366638183594, "learning_rate": 2e-06, "loss": 0.1767, "step": 3958 }, { "epoch": 1.4766878030585602, "grad_norm": 0.7732430696487427, "learning_rate": 2e-06, "loss": 0.1911, "step": 3959 }, { "epoch": 1.4770607982096233, "grad_norm": 1.0010384321212769, "learning_rate": 2e-06, "loss": 0.1772, "step": 3960 }, { "epoch": 1.4774337933606863, "grad_norm": 0.7583075165748596, "learning_rate": 2e-06, "loss": 0.1685, "step": 3961 }, { "epoch": 1.4778067885117494, "grad_norm": 0.8778873085975647, "learning_rate": 2e-06, "loss": 0.1728, "step": 3962 }, { "epoch": 1.4781797836628123, "grad_norm": 3.590789794921875, "learning_rate": 2e-06, "loss": 0.2015, "step": 3963 }, { "epoch": 1.4785527788138755, "grad_norm": 0.8631922602653503, "learning_rate": 2e-06, "loss": 0.1781, "step": 3964 }, { "epoch": 1.4789257739649384, "grad_norm": 0.7828360199928284, "learning_rate": 2e-06, "loss": 0.1852, "step": 3965 }, { "epoch": 1.4792987691160016, "grad_norm": 0.8146758079528809, "learning_rate": 2e-06, "loss": 0.1746, "step": 3966 }, { "epoch": 1.4796717642670645, "grad_norm": 0.8146464824676514, "learning_rate": 2e-06, "loss": 0.2015, "step": 3967 }, { "epoch": 1.4800447594181276, "grad_norm": 0.8769454956054688, "learning_rate": 2e-06, "loss": 0.1839, "step": 3968 }, { "epoch": 1.4804177545691906, "grad_norm": 0.9164703488349915, "learning_rate": 2e-06, "loss": 0.1464, "step": 3969 }, { "epoch": 1.4807907497202537, "grad_norm": 1.0191597938537598, "learning_rate": 2e-06, "loss": 0.1643, "step": 3970 }, { "epoch": 1.4811637448713166, "grad_norm": 0.8779909610748291, "learning_rate": 2e-06, "loss": 0.1738, "step": 3971 }, { "epoch": 1.4815367400223798, "grad_norm": 0.7600595355033875, "learning_rate": 2e-06, "loss": 0.1887, "step": 3972 }, { "epoch": 1.4819097351734427, "grad_norm": 0.7876225709915161, "learning_rate": 2e-06, "loss": 0.1714, "step": 3973 }, { "epoch": 1.4822827303245059, "grad_norm": 0.935255765914917, "learning_rate": 2e-06, "loss": 0.1862, "step": 3974 }, { "epoch": 1.4826557254755688, "grad_norm": 0.7261038422584534, "learning_rate": 2e-06, "loss": 0.1902, "step": 3975 }, { "epoch": 1.483028720626632, "grad_norm": 1.1240713596343994, "learning_rate": 2e-06, "loss": 0.1885, "step": 3976 }, { "epoch": 1.4834017157776949, "grad_norm": 0.8609089851379395, "learning_rate": 2e-06, "loss": 0.181, "step": 3977 }, { "epoch": 1.483774710928758, "grad_norm": 0.7416454553604126, "learning_rate": 2e-06, "loss": 0.2097, "step": 3978 }, { "epoch": 1.484147706079821, "grad_norm": 1.1434787511825562, "learning_rate": 2e-06, "loss": 0.1556, "step": 3979 }, { "epoch": 1.4845207012308839, "grad_norm": 0.8660809993743896, "learning_rate": 2e-06, "loss": 0.1729, "step": 3980 }, { "epoch": 1.484893696381947, "grad_norm": 0.8387376666069031, "learning_rate": 2e-06, "loss": 0.1845, "step": 3981 }, { "epoch": 1.4852666915330102, "grad_norm": 0.8776223659515381, "learning_rate": 2e-06, "loss": 0.1773, "step": 3982 }, { "epoch": 1.485639686684073, "grad_norm": 0.7732442617416382, "learning_rate": 2e-06, "loss": 0.1869, "step": 3983 }, { "epoch": 1.486012681835136, "grad_norm": 0.9409886598587036, "learning_rate": 2e-06, "loss": 0.1859, "step": 3984 }, { "epoch": 1.4863856769861992, "grad_norm": 0.828402578830719, "learning_rate": 2e-06, "loss": 0.1518, "step": 3985 }, { "epoch": 1.4867586721372623, "grad_norm": 0.9363305568695068, "learning_rate": 2e-06, "loss": 0.1714, "step": 3986 }, { "epoch": 1.4871316672883252, "grad_norm": 1.0529049634933472, "learning_rate": 2e-06, "loss": 0.192, "step": 3987 }, { "epoch": 1.4875046624393882, "grad_norm": 0.830339789390564, "learning_rate": 2e-06, "loss": 0.1736, "step": 3988 }, { "epoch": 1.4878776575904513, "grad_norm": 0.8205186724662781, "learning_rate": 2e-06, "loss": 0.194, "step": 3989 }, { "epoch": 1.4882506527415145, "grad_norm": 0.8250502347946167, "learning_rate": 2e-06, "loss": 0.2024, "step": 3990 }, { "epoch": 1.4886236478925774, "grad_norm": 0.7237592935562134, "learning_rate": 2e-06, "loss": 0.1877, "step": 3991 }, { "epoch": 1.4889966430436403, "grad_norm": 0.797916829586029, "learning_rate": 2e-06, "loss": 0.1722, "step": 3992 }, { "epoch": 1.4893696381947035, "grad_norm": 0.8628688454627991, "learning_rate": 2e-06, "loss": 0.1786, "step": 3993 }, { "epoch": 1.4897426333457666, "grad_norm": 0.7603047490119934, "learning_rate": 2e-06, "loss": 0.1972, "step": 3994 }, { "epoch": 1.4901156284968295, "grad_norm": 0.8417566418647766, "learning_rate": 2e-06, "loss": 0.1841, "step": 3995 }, { "epoch": 1.4904886236478925, "grad_norm": 1.3554086685180664, "learning_rate": 2e-06, "loss": 0.1744, "step": 3996 }, { "epoch": 1.4908616187989556, "grad_norm": 0.8719472885131836, "learning_rate": 2e-06, "loss": 0.1958, "step": 3997 }, { "epoch": 1.4912346139500188, "grad_norm": 0.9284753203392029, "learning_rate": 2e-06, "loss": 0.1525, "step": 3998 }, { "epoch": 1.4916076091010817, "grad_norm": 0.7234894633293152, "learning_rate": 2e-06, "loss": 0.1728, "step": 3999 }, { "epoch": 1.4919806042521446, "grad_norm": 0.8169152736663818, "learning_rate": 2e-06, "loss": 0.1767, "step": 4000 }, { "epoch": 1.4923535994032078, "grad_norm": 0.8446696400642395, "learning_rate": 2e-06, "loss": 0.171, "step": 4001 }, { "epoch": 1.492726594554271, "grad_norm": 0.8115003108978271, "learning_rate": 2e-06, "loss": 0.1786, "step": 4002 }, { "epoch": 1.4930995897053339, "grad_norm": 0.8695127964019775, "learning_rate": 2e-06, "loss": 0.1862, "step": 4003 }, { "epoch": 1.4934725848563968, "grad_norm": 0.7662029266357422, "learning_rate": 2e-06, "loss": 0.1626, "step": 4004 }, { "epoch": 1.49384558000746, "grad_norm": 0.815667986869812, "learning_rate": 2e-06, "loss": 0.1785, "step": 4005 }, { "epoch": 1.494218575158523, "grad_norm": 0.8651843070983887, "learning_rate": 2e-06, "loss": 0.195, "step": 4006 }, { "epoch": 1.494591570309586, "grad_norm": 0.7085371613502502, "learning_rate": 2e-06, "loss": 0.1883, "step": 4007 }, { "epoch": 1.494964565460649, "grad_norm": 0.9568456411361694, "learning_rate": 2e-06, "loss": 0.1797, "step": 4008 }, { "epoch": 1.495337560611712, "grad_norm": 0.8679647445678711, "learning_rate": 2e-06, "loss": 0.1975, "step": 4009 }, { "epoch": 1.495710555762775, "grad_norm": 0.7308457493782043, "learning_rate": 2e-06, "loss": 0.2176, "step": 4010 }, { "epoch": 1.4960835509138382, "grad_norm": 1.0001556873321533, "learning_rate": 2e-06, "loss": 0.2063, "step": 4011 }, { "epoch": 1.496456546064901, "grad_norm": 0.9359763264656067, "learning_rate": 2e-06, "loss": 0.1777, "step": 4012 }, { "epoch": 1.4968295412159642, "grad_norm": 0.7628212571144104, "learning_rate": 2e-06, "loss": 0.163, "step": 4013 }, { "epoch": 1.4972025363670272, "grad_norm": 0.9005221128463745, "learning_rate": 2e-06, "loss": 0.1771, "step": 4014 }, { "epoch": 1.4975755315180903, "grad_norm": 0.8648070096969604, "learning_rate": 2e-06, "loss": 0.1848, "step": 4015 }, { "epoch": 1.4979485266691532, "grad_norm": 0.7161102890968323, "learning_rate": 2e-06, "loss": 0.1871, "step": 4016 }, { "epoch": 1.4983215218202164, "grad_norm": 0.8180111646652222, "learning_rate": 2e-06, "loss": 0.1853, "step": 4017 }, { "epoch": 1.4986945169712793, "grad_norm": 0.81512451171875, "learning_rate": 2e-06, "loss": 0.186, "step": 4018 }, { "epoch": 1.4990675121223425, "grad_norm": 0.8718905448913574, "learning_rate": 2e-06, "loss": 0.1577, "step": 4019 }, { "epoch": 1.4994405072734054, "grad_norm": 0.8261348605155945, "learning_rate": 2e-06, "loss": 0.1699, "step": 4020 }, { "epoch": 1.4998135024244685, "grad_norm": 0.8237884044647217, "learning_rate": 2e-06, "loss": 0.1655, "step": 4021 }, { "epoch": 1.5001864975755315, "grad_norm": 0.748367965221405, "learning_rate": 2e-06, "loss": 0.163, "step": 4022 }, { "epoch": 1.5005594927265946, "grad_norm": 0.764103353023529, "learning_rate": 2e-06, "loss": 0.1478, "step": 4023 }, { "epoch": 1.5009324878776575, "grad_norm": 0.8322896361351013, "learning_rate": 2e-06, "loss": 0.1895, "step": 4024 }, { "epoch": 1.5013054830287205, "grad_norm": 0.6977496147155762, "learning_rate": 2e-06, "loss": 0.1651, "step": 4025 }, { "epoch": 1.5016784781797836, "grad_norm": 1.084789752960205, "learning_rate": 2e-06, "loss": 0.1993, "step": 4026 }, { "epoch": 1.5020514733308468, "grad_norm": 0.8549523949623108, "learning_rate": 2e-06, "loss": 0.1681, "step": 4027 }, { "epoch": 1.5024244684819097, "grad_norm": 1.017059326171875, "learning_rate": 2e-06, "loss": 0.1741, "step": 4028 }, { "epoch": 1.5027974636329726, "grad_norm": 0.7369011044502258, "learning_rate": 2e-06, "loss": 0.1862, "step": 4029 }, { "epoch": 1.5031704587840358, "grad_norm": 1.034924864768982, "learning_rate": 2e-06, "loss": 0.1714, "step": 4030 }, { "epoch": 1.503543453935099, "grad_norm": 0.9852278232574463, "learning_rate": 2e-06, "loss": 0.1987, "step": 4031 }, { "epoch": 1.5039164490861618, "grad_norm": 0.836482584476471, "learning_rate": 2e-06, "loss": 0.1853, "step": 4032 }, { "epoch": 1.5042894442372248, "grad_norm": 0.9395976662635803, "learning_rate": 2e-06, "loss": 0.1817, "step": 4033 }, { "epoch": 1.504662439388288, "grad_norm": 0.8545815944671631, "learning_rate": 2e-06, "loss": 0.1606, "step": 4034 }, { "epoch": 1.505035434539351, "grad_norm": 0.7125906348228455, "learning_rate": 2e-06, "loss": 0.2107, "step": 4035 }, { "epoch": 1.505408429690414, "grad_norm": 0.9237890839576721, "learning_rate": 2e-06, "loss": 0.1687, "step": 4036 }, { "epoch": 1.505781424841477, "grad_norm": 0.791635274887085, "learning_rate": 2e-06, "loss": 0.1733, "step": 4037 }, { "epoch": 1.50615441999254, "grad_norm": 0.8945311903953552, "learning_rate": 2e-06, "loss": 0.1971, "step": 4038 }, { "epoch": 1.5065274151436032, "grad_norm": 0.8263905048370361, "learning_rate": 2e-06, "loss": 0.1815, "step": 4039 }, { "epoch": 1.5069004102946661, "grad_norm": 0.824672281742096, "learning_rate": 2e-06, "loss": 0.148, "step": 4040 }, { "epoch": 1.507273405445729, "grad_norm": 0.8886070251464844, "learning_rate": 2e-06, "loss": 0.1744, "step": 4041 }, { "epoch": 1.5076464005967922, "grad_norm": 0.9601172208786011, "learning_rate": 2e-06, "loss": 0.1786, "step": 4042 }, { "epoch": 1.5080193957478554, "grad_norm": 0.8381677865982056, "learning_rate": 2e-06, "loss": 0.2008, "step": 4043 }, { "epoch": 1.5083923908989183, "grad_norm": 0.8513845801353455, "learning_rate": 2e-06, "loss": 0.1952, "step": 4044 }, { "epoch": 1.5087653860499812, "grad_norm": 1.0250705480575562, "learning_rate": 2e-06, "loss": 0.1612, "step": 4045 }, { "epoch": 1.5091383812010444, "grad_norm": 0.8059238195419312, "learning_rate": 2e-06, "loss": 0.1628, "step": 4046 }, { "epoch": 1.5095113763521075, "grad_norm": 0.7915655970573425, "learning_rate": 2e-06, "loss": 0.1886, "step": 4047 }, { "epoch": 1.5098843715031705, "grad_norm": 0.8210203647613525, "learning_rate": 2e-06, "loss": 0.1699, "step": 4048 }, { "epoch": 1.5102573666542334, "grad_norm": 0.8380529284477234, "learning_rate": 2e-06, "loss": 0.1692, "step": 4049 }, { "epoch": 1.5106303618052965, "grad_norm": 0.7879037857055664, "learning_rate": 2e-06, "loss": 0.1763, "step": 4050 }, { "epoch": 1.5110033569563597, "grad_norm": 0.8599416017532349, "learning_rate": 2e-06, "loss": 0.1985, "step": 4051 }, { "epoch": 1.5113763521074226, "grad_norm": 0.789882481098175, "learning_rate": 2e-06, "loss": 0.1726, "step": 4052 }, { "epoch": 1.5117493472584855, "grad_norm": 0.8177428841590881, "learning_rate": 2e-06, "loss": 0.2054, "step": 4053 }, { "epoch": 1.5121223424095487, "grad_norm": 0.8293223977088928, "learning_rate": 2e-06, "loss": 0.1769, "step": 4054 }, { "epoch": 1.5124953375606118, "grad_norm": 0.8421322107315063, "learning_rate": 2e-06, "loss": 0.2023, "step": 4055 }, { "epoch": 1.5128683327116748, "grad_norm": 0.7301773428916931, "learning_rate": 2e-06, "loss": 0.1961, "step": 4056 }, { "epoch": 1.5132413278627377, "grad_norm": 0.8003236055374146, "learning_rate": 2e-06, "loss": 0.1718, "step": 4057 }, { "epoch": 1.5136143230138008, "grad_norm": 0.7707826495170593, "learning_rate": 2e-06, "loss": 0.1642, "step": 4058 }, { "epoch": 1.513987318164864, "grad_norm": 0.6900650858879089, "learning_rate": 2e-06, "loss": 0.1903, "step": 4059 }, { "epoch": 1.514360313315927, "grad_norm": 0.9222540259361267, "learning_rate": 2e-06, "loss": 0.2017, "step": 4060 }, { "epoch": 1.5147333084669898, "grad_norm": 0.7647155523300171, "learning_rate": 2e-06, "loss": 0.2005, "step": 4061 }, { "epoch": 1.515106303618053, "grad_norm": 0.9496968388557434, "learning_rate": 2e-06, "loss": 0.1653, "step": 4062 }, { "epoch": 1.5154792987691161, "grad_norm": 0.9347925782203674, "learning_rate": 2e-06, "loss": 0.1837, "step": 4063 }, { "epoch": 1.515852293920179, "grad_norm": 1.0638681650161743, "learning_rate": 2e-06, "loss": 0.1733, "step": 4064 }, { "epoch": 1.516225289071242, "grad_norm": 0.9778865575790405, "learning_rate": 2e-06, "loss": 0.1864, "step": 4065 }, { "epoch": 1.5165982842223051, "grad_norm": 0.897054135799408, "learning_rate": 2e-06, "loss": 0.1679, "step": 4066 }, { "epoch": 1.5169712793733683, "grad_norm": 0.8093224763870239, "learning_rate": 2e-06, "loss": 0.1584, "step": 4067 }, { "epoch": 1.5173442745244312, "grad_norm": 1.1190472841262817, "learning_rate": 2e-06, "loss": 0.1531, "step": 4068 }, { "epoch": 1.5177172696754941, "grad_norm": 0.9715657830238342, "learning_rate": 2e-06, "loss": 0.1627, "step": 4069 }, { "epoch": 1.5180902648265573, "grad_norm": 0.8448849320411682, "learning_rate": 2e-06, "loss": 0.1734, "step": 4070 }, { "epoch": 1.5184632599776204, "grad_norm": 0.9134995341300964, "learning_rate": 2e-06, "loss": 0.1988, "step": 4071 }, { "epoch": 1.5188362551286834, "grad_norm": 0.765121579170227, "learning_rate": 2e-06, "loss": 0.164, "step": 4072 }, { "epoch": 1.5192092502797463, "grad_norm": 0.9313433766365051, "learning_rate": 2e-06, "loss": 0.1771, "step": 4073 }, { "epoch": 1.5195822454308094, "grad_norm": 0.7989755272865295, "learning_rate": 2e-06, "loss": 0.1891, "step": 4074 }, { "epoch": 1.5199552405818726, "grad_norm": 0.8798702359199524, "learning_rate": 2e-06, "loss": 0.1895, "step": 4075 }, { "epoch": 1.5203282357329355, "grad_norm": 0.738896906375885, "learning_rate": 2e-06, "loss": 0.1599, "step": 4076 }, { "epoch": 1.5207012308839984, "grad_norm": 0.7468244433403015, "learning_rate": 2e-06, "loss": 0.1925, "step": 4077 }, { "epoch": 1.5210742260350616, "grad_norm": 0.8433496356010437, "learning_rate": 2e-06, "loss": 0.1512, "step": 4078 }, { "epoch": 1.5214472211861247, "grad_norm": 0.652600884437561, "learning_rate": 2e-06, "loss": 0.1996, "step": 4079 }, { "epoch": 1.5218202163371877, "grad_norm": 0.833248496055603, "learning_rate": 2e-06, "loss": 0.1792, "step": 4080 }, { "epoch": 1.5221932114882506, "grad_norm": 0.9093927145004272, "learning_rate": 2e-06, "loss": 0.1904, "step": 4081 }, { "epoch": 1.5225662066393135, "grad_norm": 0.7221584916114807, "learning_rate": 2e-06, "loss": 0.1793, "step": 4082 }, { "epoch": 1.5229392017903767, "grad_norm": 0.8489587903022766, "learning_rate": 2e-06, "loss": 0.1459, "step": 4083 }, { "epoch": 1.5233121969414398, "grad_norm": 0.7454043030738831, "learning_rate": 2e-06, "loss": 0.2202, "step": 4084 }, { "epoch": 1.5236851920925027, "grad_norm": 0.8535373210906982, "learning_rate": 2e-06, "loss": 0.1761, "step": 4085 }, { "epoch": 1.5240581872435657, "grad_norm": 0.8221094608306885, "learning_rate": 2e-06, "loss": 0.1805, "step": 4086 }, { "epoch": 1.5244311823946288, "grad_norm": 0.8829692006111145, "learning_rate": 2e-06, "loss": 0.1736, "step": 4087 }, { "epoch": 1.524804177545692, "grad_norm": 0.9309015274047852, "learning_rate": 2e-06, "loss": 0.1823, "step": 4088 }, { "epoch": 1.525177172696755, "grad_norm": 0.8443466424942017, "learning_rate": 2e-06, "loss": 0.1859, "step": 4089 }, { "epoch": 1.5255501678478178, "grad_norm": 0.8748769164085388, "learning_rate": 2e-06, "loss": 0.17, "step": 4090 }, { "epoch": 1.525923162998881, "grad_norm": 0.940502405166626, "learning_rate": 2e-06, "loss": 0.1699, "step": 4091 }, { "epoch": 1.5262961581499441, "grad_norm": 0.7839881777763367, "learning_rate": 2e-06, "loss": 0.1901, "step": 4092 }, { "epoch": 1.526669153301007, "grad_norm": 0.7007580399513245, "learning_rate": 2e-06, "loss": 0.1712, "step": 4093 }, { "epoch": 1.52704214845207, "grad_norm": 0.7645153999328613, "learning_rate": 2e-06, "loss": 0.1849, "step": 4094 }, { "epoch": 1.5274151436031331, "grad_norm": 0.8005964756011963, "learning_rate": 2e-06, "loss": 0.161, "step": 4095 }, { "epoch": 1.5277881387541963, "grad_norm": 0.819484293460846, "learning_rate": 2e-06, "loss": 0.167, "step": 4096 }, { "epoch": 1.5281611339052592, "grad_norm": 0.8384400606155396, "learning_rate": 2e-06, "loss": 0.1639, "step": 4097 }, { "epoch": 1.5285341290563221, "grad_norm": 1.0326931476593018, "learning_rate": 2e-06, "loss": 0.1851, "step": 4098 }, { "epoch": 1.5289071242073853, "grad_norm": 1.5956941843032837, "learning_rate": 2e-06, "loss": 0.1602, "step": 4099 }, { "epoch": 1.5292801193584484, "grad_norm": 0.9917837381362915, "learning_rate": 2e-06, "loss": 0.1735, "step": 4100 }, { "epoch": 1.5296531145095114, "grad_norm": 0.8293807506561279, "learning_rate": 2e-06, "loss": 0.178, "step": 4101 }, { "epoch": 1.5300261096605743, "grad_norm": 0.8960368037223816, "learning_rate": 2e-06, "loss": 0.1878, "step": 4102 }, { "epoch": 1.5303991048116374, "grad_norm": 0.9151940941810608, "learning_rate": 2e-06, "loss": 0.1821, "step": 4103 }, { "epoch": 1.5307720999627006, "grad_norm": 0.8313994407653809, "learning_rate": 2e-06, "loss": 0.1644, "step": 4104 }, { "epoch": 1.5311450951137635, "grad_norm": 0.8595501184463501, "learning_rate": 2e-06, "loss": 0.1821, "step": 4105 }, { "epoch": 1.5315180902648264, "grad_norm": 0.7808237075805664, "learning_rate": 2e-06, "loss": 0.1705, "step": 4106 }, { "epoch": 1.5318910854158896, "grad_norm": 1.0719351768493652, "learning_rate": 2e-06, "loss": 0.1628, "step": 4107 }, { "epoch": 1.5322640805669527, "grad_norm": 0.8871902823448181, "learning_rate": 2e-06, "loss": 0.173, "step": 4108 }, { "epoch": 1.5326370757180157, "grad_norm": 0.8046426773071289, "learning_rate": 2e-06, "loss": 0.1742, "step": 4109 }, { "epoch": 1.5330100708690786, "grad_norm": 0.768656849861145, "learning_rate": 2e-06, "loss": 0.1793, "step": 4110 }, { "epoch": 1.5333830660201417, "grad_norm": 0.8042858839035034, "learning_rate": 2e-06, "loss": 0.1954, "step": 4111 }, { "epoch": 1.5337560611712049, "grad_norm": 0.7734361290931702, "learning_rate": 2e-06, "loss": 0.1939, "step": 4112 }, { "epoch": 1.5341290563222678, "grad_norm": 0.7532808184623718, "learning_rate": 2e-06, "loss": 0.1693, "step": 4113 }, { "epoch": 1.5345020514733307, "grad_norm": 0.7110929489135742, "learning_rate": 2e-06, "loss": 0.1757, "step": 4114 }, { "epoch": 1.534875046624394, "grad_norm": 0.9490069150924683, "learning_rate": 2e-06, "loss": 0.1526, "step": 4115 }, { "epoch": 1.535248041775457, "grad_norm": 1.196196436882019, "learning_rate": 2e-06, "loss": 0.1826, "step": 4116 }, { "epoch": 1.53562103692652, "grad_norm": 0.9747472405433655, "learning_rate": 2e-06, "loss": 0.1524, "step": 4117 }, { "epoch": 1.535994032077583, "grad_norm": 0.9050301909446716, "learning_rate": 2e-06, "loss": 0.1721, "step": 4118 }, { "epoch": 1.536367027228646, "grad_norm": 0.7541864514350891, "learning_rate": 2e-06, "loss": 0.1675, "step": 4119 }, { "epoch": 1.5367400223797092, "grad_norm": 0.8371217846870422, "learning_rate": 2e-06, "loss": 0.1813, "step": 4120 }, { "epoch": 1.5371130175307721, "grad_norm": 0.8529453277587891, "learning_rate": 2e-06, "loss": 0.1889, "step": 4121 }, { "epoch": 1.537486012681835, "grad_norm": 0.8753451704978943, "learning_rate": 2e-06, "loss": 0.1948, "step": 4122 }, { "epoch": 1.5378590078328982, "grad_norm": 0.8984821438789368, "learning_rate": 2e-06, "loss": 0.1668, "step": 4123 }, { "epoch": 1.5382320029839613, "grad_norm": 0.9407668709754944, "learning_rate": 2e-06, "loss": 0.1705, "step": 4124 }, { "epoch": 1.5386049981350243, "grad_norm": 0.9077891111373901, "learning_rate": 2e-06, "loss": 0.1739, "step": 4125 }, { "epoch": 1.5389779932860872, "grad_norm": 1.0767946243286133, "learning_rate": 2e-06, "loss": 0.1658, "step": 4126 }, { "epoch": 1.5393509884371503, "grad_norm": 0.8493524789810181, "learning_rate": 2e-06, "loss": 0.1848, "step": 4127 }, { "epoch": 1.5397239835882135, "grad_norm": 0.9382396936416626, "learning_rate": 2e-06, "loss": 0.1679, "step": 4128 }, { "epoch": 1.5400969787392764, "grad_norm": 1.0240694284439087, "learning_rate": 2e-06, "loss": 0.1586, "step": 4129 }, { "epoch": 1.5404699738903394, "grad_norm": 1.045308232307434, "learning_rate": 2e-06, "loss": 0.1547, "step": 4130 }, { "epoch": 1.5408429690414025, "grad_norm": 0.7285276651382446, "learning_rate": 2e-06, "loss": 0.1779, "step": 4131 }, { "epoch": 1.5412159641924656, "grad_norm": 0.9204332232475281, "learning_rate": 2e-06, "loss": 0.1642, "step": 4132 }, { "epoch": 1.5415889593435286, "grad_norm": 0.7884463667869568, "learning_rate": 2e-06, "loss": 0.177, "step": 4133 }, { "epoch": 1.5419619544945915, "grad_norm": 0.8083403706550598, "learning_rate": 2e-06, "loss": 0.1547, "step": 4134 }, { "epoch": 1.5423349496456547, "grad_norm": 0.8431960940361023, "learning_rate": 2e-06, "loss": 0.1642, "step": 4135 }, { "epoch": 1.5427079447967178, "grad_norm": 0.7796348333358765, "learning_rate": 2e-06, "loss": 0.1565, "step": 4136 }, { "epoch": 1.5430809399477807, "grad_norm": 0.7714481949806213, "learning_rate": 2e-06, "loss": 0.1858, "step": 4137 }, { "epoch": 1.5434539350988437, "grad_norm": 0.9359300136566162, "learning_rate": 2e-06, "loss": 0.1746, "step": 4138 }, { "epoch": 1.5438269302499068, "grad_norm": 1.1490548849105835, "learning_rate": 2e-06, "loss": 0.1909, "step": 4139 }, { "epoch": 1.54419992540097, "grad_norm": 0.7710203528404236, "learning_rate": 2e-06, "loss": 0.1708, "step": 4140 }, { "epoch": 1.5445729205520329, "grad_norm": 0.8320659399032593, "learning_rate": 2e-06, "loss": 0.1762, "step": 4141 }, { "epoch": 1.5449459157030958, "grad_norm": 0.8064866662025452, "learning_rate": 2e-06, "loss": 0.1566, "step": 4142 }, { "epoch": 1.5453189108541587, "grad_norm": 1.1656074523925781, "learning_rate": 2e-06, "loss": 0.1565, "step": 4143 }, { "epoch": 1.5456919060052219, "grad_norm": 0.9485105276107788, "learning_rate": 2e-06, "loss": 0.1773, "step": 4144 }, { "epoch": 1.546064901156285, "grad_norm": 0.7360901832580566, "learning_rate": 2e-06, "loss": 0.1572, "step": 4145 }, { "epoch": 1.546437896307348, "grad_norm": 0.9220475554466248, "learning_rate": 2e-06, "loss": 0.1915, "step": 4146 }, { "epoch": 1.5468108914584109, "grad_norm": 0.8714838624000549, "learning_rate": 2e-06, "loss": 0.1718, "step": 4147 }, { "epoch": 1.547183886609474, "grad_norm": 0.9447026252746582, "learning_rate": 2e-06, "loss": 0.1979, "step": 4148 }, { "epoch": 1.5475568817605372, "grad_norm": 0.8532106876373291, "learning_rate": 2e-06, "loss": 0.1501, "step": 4149 }, { "epoch": 1.5479298769116001, "grad_norm": 0.8114668726921082, "learning_rate": 2e-06, "loss": 0.1889, "step": 4150 }, { "epoch": 1.548302872062663, "grad_norm": 1.0346999168395996, "learning_rate": 2e-06, "loss": 0.196, "step": 4151 }, { "epoch": 1.5486758672137262, "grad_norm": 0.8173236846923828, "learning_rate": 2e-06, "loss": 0.2249, "step": 4152 }, { "epoch": 1.5490488623647893, "grad_norm": 0.7859200239181519, "learning_rate": 2e-06, "loss": 0.2018, "step": 4153 }, { "epoch": 1.5494218575158523, "grad_norm": 0.9604834318161011, "learning_rate": 2e-06, "loss": 0.1717, "step": 4154 }, { "epoch": 1.5497948526669152, "grad_norm": 0.9983347654342651, "learning_rate": 2e-06, "loss": 0.1764, "step": 4155 }, { "epoch": 1.5501678478179783, "grad_norm": 0.9057901501655579, "learning_rate": 2e-06, "loss": 0.1448, "step": 4156 }, { "epoch": 1.5505408429690415, "grad_norm": 0.8724414110183716, "learning_rate": 2e-06, "loss": 0.1648, "step": 4157 }, { "epoch": 1.5509138381201044, "grad_norm": 0.8353371620178223, "learning_rate": 2e-06, "loss": 0.1842, "step": 4158 }, { "epoch": 1.5512868332711673, "grad_norm": 0.8395655155181885, "learning_rate": 2e-06, "loss": 0.1868, "step": 4159 }, { "epoch": 1.5516598284222305, "grad_norm": 0.7200774550437927, "learning_rate": 2e-06, "loss": 0.176, "step": 4160 }, { "epoch": 1.5520328235732936, "grad_norm": 0.9477209448814392, "learning_rate": 2e-06, "loss": 0.1894, "step": 4161 }, { "epoch": 1.5524058187243566, "grad_norm": 0.8211243152618408, "learning_rate": 2e-06, "loss": 0.1734, "step": 4162 }, { "epoch": 1.5527788138754195, "grad_norm": 0.8006266951560974, "learning_rate": 2e-06, "loss": 0.1963, "step": 4163 }, { "epoch": 1.5531518090264826, "grad_norm": 0.922012209892273, "learning_rate": 2e-06, "loss": 0.1679, "step": 4164 }, { "epoch": 1.5535248041775458, "grad_norm": 0.9152402281761169, "learning_rate": 2e-06, "loss": 0.152, "step": 4165 }, { "epoch": 1.5538977993286087, "grad_norm": 1.0899714231491089, "learning_rate": 2e-06, "loss": 0.1698, "step": 4166 }, { "epoch": 1.5542707944796716, "grad_norm": 0.8580893278121948, "learning_rate": 2e-06, "loss": 0.1787, "step": 4167 }, { "epoch": 1.5546437896307348, "grad_norm": 0.8721203804016113, "learning_rate": 2e-06, "loss": 0.1753, "step": 4168 }, { "epoch": 1.555016784781798, "grad_norm": 0.9417023658752441, "learning_rate": 2e-06, "loss": 0.1584, "step": 4169 }, { "epoch": 1.5553897799328609, "grad_norm": 0.863758385181427, "learning_rate": 2e-06, "loss": 0.1877, "step": 4170 }, { "epoch": 1.5557627750839238, "grad_norm": 0.8195855617523193, "learning_rate": 2e-06, "loss": 0.1985, "step": 4171 }, { "epoch": 1.556135770234987, "grad_norm": 0.8696504831314087, "learning_rate": 2e-06, "loss": 0.1824, "step": 4172 }, { "epoch": 1.55650876538605, "grad_norm": 0.8101404905319214, "learning_rate": 2e-06, "loss": 0.1772, "step": 4173 }, { "epoch": 1.556881760537113, "grad_norm": 0.8952045440673828, "learning_rate": 2e-06, "loss": 0.1679, "step": 4174 }, { "epoch": 1.557254755688176, "grad_norm": 0.6562054753303528, "learning_rate": 2e-06, "loss": 0.1856, "step": 4175 }, { "epoch": 1.557627750839239, "grad_norm": 0.8031101822853088, "learning_rate": 2e-06, "loss": 0.2093, "step": 4176 }, { "epoch": 1.5580007459903022, "grad_norm": 0.9403236508369446, "learning_rate": 2e-06, "loss": 0.1628, "step": 4177 }, { "epoch": 1.5583737411413652, "grad_norm": 0.7881459593772888, "learning_rate": 2e-06, "loss": 0.1794, "step": 4178 }, { "epoch": 1.558746736292428, "grad_norm": 0.9663479328155518, "learning_rate": 2e-06, "loss": 0.1691, "step": 4179 }, { "epoch": 1.5591197314434913, "grad_norm": 0.7966075539588928, "learning_rate": 2e-06, "loss": 0.2058, "step": 4180 }, { "epoch": 1.5594927265945544, "grad_norm": 0.7796584367752075, "learning_rate": 2e-06, "loss": 0.1817, "step": 4181 }, { "epoch": 1.5598657217456173, "grad_norm": 0.8556807041168213, "learning_rate": 2e-06, "loss": 0.1796, "step": 4182 }, { "epoch": 1.5602387168966803, "grad_norm": 0.8008453249931335, "learning_rate": 2e-06, "loss": 0.1849, "step": 4183 }, { "epoch": 1.5606117120477434, "grad_norm": 0.716372013092041, "learning_rate": 2e-06, "loss": 0.1885, "step": 4184 }, { "epoch": 1.5609847071988066, "grad_norm": 0.7987371683120728, "learning_rate": 2e-06, "loss": 0.1732, "step": 4185 }, { "epoch": 1.5613577023498695, "grad_norm": 0.8166126012802124, "learning_rate": 2e-06, "loss": 0.1483, "step": 4186 }, { "epoch": 1.5617306975009324, "grad_norm": 1.037377953529358, "learning_rate": 2e-06, "loss": 0.177, "step": 4187 }, { "epoch": 1.5621036926519956, "grad_norm": 0.8251647353172302, "learning_rate": 2e-06, "loss": 0.1754, "step": 4188 }, { "epoch": 1.5624766878030587, "grad_norm": 0.9858704805374146, "learning_rate": 2e-06, "loss": 0.1591, "step": 4189 }, { "epoch": 1.5628496829541216, "grad_norm": 0.9428002834320068, "learning_rate": 2e-06, "loss": 0.1935, "step": 4190 }, { "epoch": 1.5632226781051846, "grad_norm": 0.7781723141670227, "learning_rate": 2e-06, "loss": 0.1982, "step": 4191 }, { "epoch": 1.5635956732562477, "grad_norm": 0.8529068827629089, "learning_rate": 2e-06, "loss": 0.1635, "step": 4192 }, { "epoch": 1.5639686684073109, "grad_norm": 0.8764315843582153, "learning_rate": 2e-06, "loss": 0.163, "step": 4193 }, { "epoch": 1.5643416635583738, "grad_norm": 0.8152859807014465, "learning_rate": 2e-06, "loss": 0.178, "step": 4194 }, { "epoch": 1.5647146587094367, "grad_norm": 0.8542141318321228, "learning_rate": 2e-06, "loss": 0.186, "step": 4195 }, { "epoch": 1.5650876538604999, "grad_norm": 0.8239805698394775, "learning_rate": 2e-06, "loss": 0.1674, "step": 4196 }, { "epoch": 1.565460649011563, "grad_norm": 0.7910576462745667, "learning_rate": 2e-06, "loss": 0.149, "step": 4197 }, { "epoch": 1.565833644162626, "grad_norm": 0.771645188331604, "learning_rate": 2e-06, "loss": 0.1737, "step": 4198 }, { "epoch": 1.5662066393136889, "grad_norm": 0.7176403403282166, "learning_rate": 2e-06, "loss": 0.1846, "step": 4199 }, { "epoch": 1.566579634464752, "grad_norm": 0.9715863466262817, "learning_rate": 2e-06, "loss": 0.1971, "step": 4200 }, { "epoch": 1.566952629615815, "grad_norm": 0.8816301226615906, "learning_rate": 2e-06, "loss": 0.1866, "step": 4201 }, { "epoch": 1.567325624766878, "grad_norm": 0.8639457821846008, "learning_rate": 2e-06, "loss": 0.1817, "step": 4202 }, { "epoch": 1.567698619917941, "grad_norm": 1.1565359830856323, "learning_rate": 2e-06, "loss": 0.1536, "step": 4203 }, { "epoch": 1.568071615069004, "grad_norm": 0.9970366358757019, "learning_rate": 2e-06, "loss": 0.1779, "step": 4204 }, { "epoch": 1.568444610220067, "grad_norm": 0.8794967532157898, "learning_rate": 2e-06, "loss": 0.1862, "step": 4205 }, { "epoch": 1.5688176053711302, "grad_norm": 0.9534955620765686, "learning_rate": 2e-06, "loss": 0.1949, "step": 4206 }, { "epoch": 1.5691906005221932, "grad_norm": 0.7404173612594604, "learning_rate": 2e-06, "loss": 0.1807, "step": 4207 }, { "epoch": 1.569563595673256, "grad_norm": 0.7081741690635681, "learning_rate": 2e-06, "loss": 0.1614, "step": 4208 }, { "epoch": 1.5699365908243192, "grad_norm": 0.7042449712753296, "learning_rate": 2e-06, "loss": 0.1778, "step": 4209 }, { "epoch": 1.5703095859753824, "grad_norm": 0.9761459231376648, "learning_rate": 2e-06, "loss": 0.176, "step": 4210 }, { "epoch": 1.5706825811264453, "grad_norm": 0.8450081944465637, "learning_rate": 2e-06, "loss": 0.2087, "step": 4211 }, { "epoch": 1.5710555762775082, "grad_norm": 0.7562037110328674, "learning_rate": 2e-06, "loss": 0.2003, "step": 4212 }, { "epoch": 1.5714285714285714, "grad_norm": 0.9084733724594116, "learning_rate": 2e-06, "loss": 0.1811, "step": 4213 }, { "epoch": 1.5718015665796345, "grad_norm": 0.8665415048599243, "learning_rate": 2e-06, "loss": 0.1648, "step": 4214 }, { "epoch": 1.5721745617306975, "grad_norm": 0.7405847311019897, "learning_rate": 2e-06, "loss": 0.1765, "step": 4215 }, { "epoch": 1.5725475568817604, "grad_norm": 0.7022413015365601, "learning_rate": 2e-06, "loss": 0.1793, "step": 4216 }, { "epoch": 1.5729205520328235, "grad_norm": 0.9128865599632263, "learning_rate": 2e-06, "loss": 0.1866, "step": 4217 }, { "epoch": 1.5732935471838867, "grad_norm": 0.90017169713974, "learning_rate": 2e-06, "loss": 0.1814, "step": 4218 }, { "epoch": 1.5736665423349496, "grad_norm": 0.8982779383659363, "learning_rate": 2e-06, "loss": 0.2007, "step": 4219 }, { "epoch": 1.5740395374860126, "grad_norm": 0.8461298942565918, "learning_rate": 2e-06, "loss": 0.2085, "step": 4220 }, { "epoch": 1.5744125326370757, "grad_norm": 1.2690719366073608, "learning_rate": 2e-06, "loss": 0.1793, "step": 4221 }, { "epoch": 1.5747855277881389, "grad_norm": 0.816475510597229, "learning_rate": 2e-06, "loss": 0.1872, "step": 4222 }, { "epoch": 1.5751585229392018, "grad_norm": 0.9388387203216553, "learning_rate": 2e-06, "loss": 0.187, "step": 4223 }, { "epoch": 1.5755315180902647, "grad_norm": 0.7660279870033264, "learning_rate": 2e-06, "loss": 0.1748, "step": 4224 }, { "epoch": 1.5759045132413279, "grad_norm": 0.9462069272994995, "learning_rate": 2e-06, "loss": 0.1715, "step": 4225 }, { "epoch": 1.576277508392391, "grad_norm": 0.8422399759292603, "learning_rate": 2e-06, "loss": 0.1796, "step": 4226 }, { "epoch": 1.576650503543454, "grad_norm": 0.8426529169082642, "learning_rate": 2e-06, "loss": 0.1865, "step": 4227 }, { "epoch": 1.5770234986945169, "grad_norm": 0.9053475856781006, "learning_rate": 2e-06, "loss": 0.1541, "step": 4228 }, { "epoch": 1.57739649384558, "grad_norm": 0.8059909343719482, "learning_rate": 2e-06, "loss": 0.1754, "step": 4229 }, { "epoch": 1.5777694889966432, "grad_norm": 0.7242786884307861, "learning_rate": 2e-06, "loss": 0.1727, "step": 4230 }, { "epoch": 1.578142484147706, "grad_norm": 1.0287177562713623, "learning_rate": 2e-06, "loss": 0.1866, "step": 4231 }, { "epoch": 1.578515479298769, "grad_norm": 0.9856280088424683, "learning_rate": 2e-06, "loss": 0.1561, "step": 4232 }, { "epoch": 1.5788884744498322, "grad_norm": 0.84781414270401, "learning_rate": 2e-06, "loss": 0.1836, "step": 4233 }, { "epoch": 1.5792614696008953, "grad_norm": 0.9134243726730347, "learning_rate": 2e-06, "loss": 0.1638, "step": 4234 }, { "epoch": 1.5796344647519582, "grad_norm": 0.8466085195541382, "learning_rate": 2e-06, "loss": 0.1714, "step": 4235 }, { "epoch": 1.5800074599030212, "grad_norm": 0.8140276074409485, "learning_rate": 2e-06, "loss": 0.1694, "step": 4236 }, { "epoch": 1.5803804550540843, "grad_norm": 0.7761430740356445, "learning_rate": 2e-06, "loss": 0.1612, "step": 4237 }, { "epoch": 1.5807534502051475, "grad_norm": 0.8349398970603943, "learning_rate": 2e-06, "loss": 0.1667, "step": 4238 }, { "epoch": 1.5811264453562104, "grad_norm": 0.9454677700996399, "learning_rate": 2e-06, "loss": 0.1866, "step": 4239 }, { "epoch": 1.5814994405072733, "grad_norm": 0.9404776692390442, "learning_rate": 2e-06, "loss": 0.1545, "step": 4240 }, { "epoch": 1.5818724356583365, "grad_norm": 0.9758716821670532, "learning_rate": 2e-06, "loss": 0.1638, "step": 4241 }, { "epoch": 1.5822454308093996, "grad_norm": 0.9842278361320496, "learning_rate": 2e-06, "loss": 0.1645, "step": 4242 }, { "epoch": 1.5826184259604625, "grad_norm": 0.8352392315864563, "learning_rate": 2e-06, "loss": 0.1567, "step": 4243 }, { "epoch": 1.5829914211115255, "grad_norm": 0.781416654586792, "learning_rate": 2e-06, "loss": 0.1861, "step": 4244 }, { "epoch": 1.5833644162625886, "grad_norm": 0.9165177345275879, "learning_rate": 2e-06, "loss": 0.179, "step": 4245 }, { "epoch": 1.5837374114136518, "grad_norm": 0.9267071485519409, "learning_rate": 2e-06, "loss": 0.1949, "step": 4246 }, { "epoch": 1.5841104065647147, "grad_norm": 0.7663930654525757, "learning_rate": 2e-06, "loss": 0.1766, "step": 4247 }, { "epoch": 1.5844834017157776, "grad_norm": 0.8594549298286438, "learning_rate": 2e-06, "loss": 0.1785, "step": 4248 }, { "epoch": 1.5848563968668408, "grad_norm": 0.826262354850769, "learning_rate": 2e-06, "loss": 0.1823, "step": 4249 }, { "epoch": 1.585229392017904, "grad_norm": 0.74913489818573, "learning_rate": 2e-06, "loss": 0.1796, "step": 4250 }, { "epoch": 1.5856023871689668, "grad_norm": 0.6929414868354797, "learning_rate": 2e-06, "loss": 0.1766, "step": 4251 }, { "epoch": 1.5859753823200298, "grad_norm": 0.907921314239502, "learning_rate": 2e-06, "loss": 0.1969, "step": 4252 }, { "epoch": 1.586348377471093, "grad_norm": 0.9806113839149475, "learning_rate": 2e-06, "loss": 0.1967, "step": 4253 }, { "epoch": 1.586721372622156, "grad_norm": 0.6829648017883301, "learning_rate": 2e-06, "loss": 0.1906, "step": 4254 }, { "epoch": 1.587094367773219, "grad_norm": 0.8354027271270752, "learning_rate": 2e-06, "loss": 0.1825, "step": 4255 }, { "epoch": 1.587467362924282, "grad_norm": 0.8098002076148987, "learning_rate": 2e-06, "loss": 0.1756, "step": 4256 }, { "epoch": 1.587840358075345, "grad_norm": 0.8393106460571289, "learning_rate": 2e-06, "loss": 0.1577, "step": 4257 }, { "epoch": 1.5882133532264082, "grad_norm": 0.9882705807685852, "learning_rate": 2e-06, "loss": 0.1746, "step": 4258 }, { "epoch": 1.5885863483774711, "grad_norm": 1.0626741647720337, "learning_rate": 2e-06, "loss": 0.1305, "step": 4259 }, { "epoch": 1.588959343528534, "grad_norm": 0.8969502449035645, "learning_rate": 2e-06, "loss": 0.1683, "step": 4260 }, { "epoch": 1.589332338679597, "grad_norm": 0.7872474193572998, "learning_rate": 2e-06, "loss": 0.1901, "step": 4261 }, { "epoch": 1.5897053338306601, "grad_norm": 0.7961575388908386, "learning_rate": 2e-06, "loss": 0.1744, "step": 4262 }, { "epoch": 1.5900783289817233, "grad_norm": 0.8536022901535034, "learning_rate": 2e-06, "loss": 0.1863, "step": 4263 }, { "epoch": 1.5904513241327862, "grad_norm": 0.673376739025116, "learning_rate": 2e-06, "loss": 0.1901, "step": 4264 }, { "epoch": 1.5908243192838492, "grad_norm": 0.8698404431343079, "learning_rate": 2e-06, "loss": 0.1764, "step": 4265 }, { "epoch": 1.5911973144349123, "grad_norm": 0.7666788101196289, "learning_rate": 2e-06, "loss": 0.1728, "step": 4266 }, { "epoch": 1.5915703095859755, "grad_norm": 1.1037095785140991, "learning_rate": 2e-06, "loss": 0.1607, "step": 4267 }, { "epoch": 1.5919433047370384, "grad_norm": 0.7549976110458374, "learning_rate": 2e-06, "loss": 0.1729, "step": 4268 }, { "epoch": 1.5923162998881013, "grad_norm": 0.7913274168968201, "learning_rate": 2e-06, "loss": 0.1901, "step": 4269 }, { "epoch": 1.5926892950391645, "grad_norm": 0.930164098739624, "learning_rate": 2e-06, "loss": 0.1791, "step": 4270 }, { "epoch": 1.5930622901902276, "grad_norm": 0.8598212599754333, "learning_rate": 2e-06, "loss": 0.1769, "step": 4271 }, { "epoch": 1.5934352853412905, "grad_norm": 1.0158264636993408, "learning_rate": 2e-06, "loss": 0.1931, "step": 4272 }, { "epoch": 1.5938082804923535, "grad_norm": 0.9230509400367737, "learning_rate": 2e-06, "loss": 0.182, "step": 4273 }, { "epoch": 1.5941812756434166, "grad_norm": 0.7212433815002441, "learning_rate": 2e-06, "loss": 0.1823, "step": 4274 }, { "epoch": 1.5945542707944798, "grad_norm": 0.7125661373138428, "learning_rate": 2e-06, "loss": 0.1975, "step": 4275 }, { "epoch": 1.5949272659455427, "grad_norm": 0.9372103214263916, "learning_rate": 2e-06, "loss": 0.1913, "step": 4276 }, { "epoch": 1.5953002610966056, "grad_norm": 0.8273377418518066, "learning_rate": 2e-06, "loss": 0.1714, "step": 4277 }, { "epoch": 1.5956732562476688, "grad_norm": 0.8350762724876404, "learning_rate": 2e-06, "loss": 0.1909, "step": 4278 }, { "epoch": 1.596046251398732, "grad_norm": 0.727899968624115, "learning_rate": 2e-06, "loss": 0.1653, "step": 4279 }, { "epoch": 1.5964192465497948, "grad_norm": 0.8208472728729248, "learning_rate": 2e-06, "loss": 0.1875, "step": 4280 }, { "epoch": 1.5967922417008578, "grad_norm": 0.7977333068847656, "learning_rate": 2e-06, "loss": 0.1769, "step": 4281 }, { "epoch": 1.597165236851921, "grad_norm": 0.7664821147918701, "learning_rate": 2e-06, "loss": 0.1946, "step": 4282 }, { "epoch": 1.597538232002984, "grad_norm": 0.9444142580032349, "learning_rate": 2e-06, "loss": 0.1647, "step": 4283 }, { "epoch": 1.597911227154047, "grad_norm": 0.7697938084602356, "learning_rate": 2e-06, "loss": 0.1674, "step": 4284 }, { "epoch": 1.59828422230511, "grad_norm": 0.7858242988586426, "learning_rate": 2e-06, "loss": 0.1807, "step": 4285 }, { "epoch": 1.598657217456173, "grad_norm": 0.8072312474250793, "learning_rate": 2e-06, "loss": 0.1618, "step": 4286 }, { "epoch": 1.5990302126072362, "grad_norm": 0.8654451966285706, "learning_rate": 2e-06, "loss": 0.1903, "step": 4287 }, { "epoch": 1.5994032077582991, "grad_norm": 0.9465362429618835, "learning_rate": 2e-06, "loss": 0.1434, "step": 4288 }, { "epoch": 1.599776202909362, "grad_norm": 0.7486302852630615, "learning_rate": 2e-06, "loss": 0.174, "step": 4289 }, { "epoch": 1.6001491980604252, "grad_norm": 1.0225789546966553, "learning_rate": 2e-06, "loss": 0.1893, "step": 4290 }, { "epoch": 1.6005221932114884, "grad_norm": 0.8358786106109619, "learning_rate": 2e-06, "loss": 0.167, "step": 4291 }, { "epoch": 1.6008951883625513, "grad_norm": 0.8483272790908813, "learning_rate": 2e-06, "loss": 0.1657, "step": 4292 }, { "epoch": 1.6012681835136142, "grad_norm": 0.767719030380249, "learning_rate": 2e-06, "loss": 0.1838, "step": 4293 }, { "epoch": 1.6016411786646774, "grad_norm": 0.9047721028327942, "learning_rate": 2e-06, "loss": 0.1598, "step": 4294 }, { "epoch": 1.6020141738157405, "grad_norm": 0.8195227384567261, "learning_rate": 2e-06, "loss": 0.1706, "step": 4295 }, { "epoch": 1.6023871689668034, "grad_norm": 0.8307429552078247, "learning_rate": 2e-06, "loss": 0.1874, "step": 4296 }, { "epoch": 1.6027601641178664, "grad_norm": 0.8061182498931885, "learning_rate": 2e-06, "loss": 0.1674, "step": 4297 }, { "epoch": 1.6031331592689295, "grad_norm": 0.7072155475616455, "learning_rate": 2e-06, "loss": 0.1741, "step": 4298 }, { "epoch": 1.6035061544199927, "grad_norm": 0.7601576447486877, "learning_rate": 2e-06, "loss": 0.1722, "step": 4299 }, { "epoch": 1.6038791495710556, "grad_norm": 0.7439846396446228, "learning_rate": 2e-06, "loss": 0.1967, "step": 4300 }, { "epoch": 1.6042521447221185, "grad_norm": 0.970917820930481, "learning_rate": 2e-06, "loss": 0.1737, "step": 4301 }, { "epoch": 1.6046251398731817, "grad_norm": 0.7217164635658264, "learning_rate": 2e-06, "loss": 0.1813, "step": 4302 }, { "epoch": 1.6049981350242448, "grad_norm": 0.7471009492874146, "learning_rate": 2e-06, "loss": 0.1822, "step": 4303 }, { "epoch": 1.6053711301753077, "grad_norm": 0.9827234148979187, "learning_rate": 2e-06, "loss": 0.2004, "step": 4304 }, { "epoch": 1.6057441253263707, "grad_norm": 0.778762698173523, "learning_rate": 2e-06, "loss": 0.1857, "step": 4305 }, { "epoch": 1.6061171204774338, "grad_norm": 0.9349126815795898, "learning_rate": 2e-06, "loss": 0.1798, "step": 4306 }, { "epoch": 1.606490115628497, "grad_norm": 0.9445522427558899, "learning_rate": 2e-06, "loss": 0.1413, "step": 4307 }, { "epoch": 1.60686311077956, "grad_norm": 0.8937616944313049, "learning_rate": 2e-06, "loss": 0.1677, "step": 4308 }, { "epoch": 1.6072361059306228, "grad_norm": 0.9323290586471558, "learning_rate": 2e-06, "loss": 0.1894, "step": 4309 }, { "epoch": 1.607609101081686, "grad_norm": 0.8489744663238525, "learning_rate": 2e-06, "loss": 0.1809, "step": 4310 }, { "epoch": 1.6079820962327491, "grad_norm": 0.9306292533874512, "learning_rate": 2e-06, "loss": 0.2172, "step": 4311 }, { "epoch": 1.608355091383812, "grad_norm": 0.7646037340164185, "learning_rate": 2e-06, "loss": 0.1838, "step": 4312 }, { "epoch": 1.608728086534875, "grad_norm": 0.8811163902282715, "learning_rate": 2e-06, "loss": 0.1868, "step": 4313 }, { "epoch": 1.6091010816859381, "grad_norm": 0.7292848825454712, "learning_rate": 2e-06, "loss": 0.1753, "step": 4314 }, { "epoch": 1.6094740768370013, "grad_norm": 0.851342499256134, "learning_rate": 2e-06, "loss": 0.1904, "step": 4315 }, { "epoch": 1.6098470719880642, "grad_norm": 0.6916496753692627, "learning_rate": 2e-06, "loss": 0.1628, "step": 4316 }, { "epoch": 1.6102200671391271, "grad_norm": 0.8984551429748535, "learning_rate": 2e-06, "loss": 0.1691, "step": 4317 }, { "epoch": 1.6105930622901903, "grad_norm": 1.1273778676986694, "learning_rate": 2e-06, "loss": 0.198, "step": 4318 }, { "epoch": 1.6109660574412534, "grad_norm": 0.8177773356437683, "learning_rate": 2e-06, "loss": 0.1784, "step": 4319 }, { "epoch": 1.6113390525923164, "grad_norm": 0.7793472409248352, "learning_rate": 2e-06, "loss": 0.1706, "step": 4320 }, { "epoch": 1.6117120477433793, "grad_norm": 1.0211999416351318, "learning_rate": 2e-06, "loss": 0.2137, "step": 4321 }, { "epoch": 1.6120850428944422, "grad_norm": 0.8139433860778809, "learning_rate": 2e-06, "loss": 0.1666, "step": 4322 }, { "epoch": 1.6124580380455054, "grad_norm": 0.812997579574585, "learning_rate": 2e-06, "loss": 0.2023, "step": 4323 }, { "epoch": 1.6128310331965685, "grad_norm": 0.7943370938301086, "learning_rate": 2e-06, "loss": 0.2042, "step": 4324 }, { "epoch": 1.6132040283476314, "grad_norm": 0.955883800983429, "learning_rate": 2e-06, "loss": 0.1578, "step": 4325 }, { "epoch": 1.6135770234986944, "grad_norm": 0.7162894606590271, "learning_rate": 2e-06, "loss": 0.1827, "step": 4326 }, { "epoch": 1.6139500186497575, "grad_norm": 0.7891223430633545, "learning_rate": 2e-06, "loss": 0.1798, "step": 4327 }, { "epoch": 1.6143230138008207, "grad_norm": 1.152029275894165, "learning_rate": 2e-06, "loss": 0.1728, "step": 4328 }, { "epoch": 1.6146960089518836, "grad_norm": 0.9768672585487366, "learning_rate": 2e-06, "loss": 0.1516, "step": 4329 }, { "epoch": 1.6150690041029465, "grad_norm": 0.8554395437240601, "learning_rate": 2e-06, "loss": 0.1989, "step": 4330 }, { "epoch": 1.6154419992540097, "grad_norm": 0.8623990416526794, "learning_rate": 2e-06, "loss": 0.1876, "step": 4331 }, { "epoch": 1.6158149944050728, "grad_norm": 0.6676610708236694, "learning_rate": 2e-06, "loss": 0.1448, "step": 4332 }, { "epoch": 1.6161879895561357, "grad_norm": 0.9673129916191101, "learning_rate": 2e-06, "loss": 0.1779, "step": 4333 }, { "epoch": 1.6165609847071987, "grad_norm": 0.6815115213394165, "learning_rate": 2e-06, "loss": 0.1979, "step": 4334 }, { "epoch": 1.6169339798582618, "grad_norm": 0.7966251969337463, "learning_rate": 2e-06, "loss": 0.1752, "step": 4335 }, { "epoch": 1.617306975009325, "grad_norm": 0.827973484992981, "learning_rate": 2e-06, "loss": 0.1608, "step": 4336 }, { "epoch": 1.617679970160388, "grad_norm": 0.7448579668998718, "learning_rate": 2e-06, "loss": 0.1853, "step": 4337 }, { "epoch": 1.6180529653114508, "grad_norm": 0.9215285181999207, "learning_rate": 2e-06, "loss": 0.1976, "step": 4338 }, { "epoch": 1.618425960462514, "grad_norm": 0.7312119603157043, "learning_rate": 2e-06, "loss": 0.2099, "step": 4339 }, { "epoch": 1.6187989556135771, "grad_norm": 0.8573967218399048, "learning_rate": 2e-06, "loss": 0.1653, "step": 4340 }, { "epoch": 1.61917195076464, "grad_norm": 0.8986856937408447, "learning_rate": 2e-06, "loss": 0.1587, "step": 4341 }, { "epoch": 1.619544945915703, "grad_norm": 0.9563603401184082, "learning_rate": 2e-06, "loss": 0.1709, "step": 4342 }, { "epoch": 1.6199179410667661, "grad_norm": 0.7621894478797913, "learning_rate": 2e-06, "loss": 0.1829, "step": 4343 }, { "epoch": 1.6202909362178293, "grad_norm": 0.9479605555534363, "learning_rate": 2e-06, "loss": 0.1693, "step": 4344 }, { "epoch": 1.6206639313688922, "grad_norm": 0.8961230516433716, "learning_rate": 2e-06, "loss": 0.1747, "step": 4345 }, { "epoch": 1.6210369265199551, "grad_norm": 0.9907352924346924, "learning_rate": 2e-06, "loss": 0.1909, "step": 4346 }, { "epoch": 1.6214099216710183, "grad_norm": 0.944377064704895, "learning_rate": 2e-06, "loss": 0.1686, "step": 4347 }, { "epoch": 1.6217829168220814, "grad_norm": 0.7828280925750732, "learning_rate": 2e-06, "loss": 0.1725, "step": 4348 }, { "epoch": 1.6221559119731443, "grad_norm": 1.0055699348449707, "learning_rate": 2e-06, "loss": 0.1774, "step": 4349 }, { "epoch": 1.6225289071242073, "grad_norm": 0.8583962321281433, "learning_rate": 2e-06, "loss": 0.1913, "step": 4350 }, { "epoch": 1.6229019022752704, "grad_norm": 0.8431967496871948, "learning_rate": 2e-06, "loss": 0.1818, "step": 4351 }, { "epoch": 1.6232748974263336, "grad_norm": 0.8116976618766785, "learning_rate": 2e-06, "loss": 0.1754, "step": 4352 }, { "epoch": 1.6236478925773965, "grad_norm": 0.7368567585945129, "learning_rate": 2e-06, "loss": 0.1963, "step": 4353 }, { "epoch": 1.6240208877284594, "grad_norm": 0.7526631951332092, "learning_rate": 2e-06, "loss": 0.1846, "step": 4354 }, { "epoch": 1.6243938828795226, "grad_norm": 0.7612675428390503, "learning_rate": 2e-06, "loss": 0.1754, "step": 4355 }, { "epoch": 1.6247668780305857, "grad_norm": 0.9434053301811218, "learning_rate": 2e-06, "loss": 0.1668, "step": 4356 }, { "epoch": 1.6251398731816487, "grad_norm": 1.0020055770874023, "learning_rate": 2e-06, "loss": 0.1581, "step": 4357 }, { "epoch": 1.6255128683327116, "grad_norm": 0.7867600321769714, "learning_rate": 2e-06, "loss": 0.1906, "step": 4358 }, { "epoch": 1.6258858634837747, "grad_norm": 0.8074172139167786, "learning_rate": 2e-06, "loss": 0.177, "step": 4359 }, { "epoch": 1.6262588586348379, "grad_norm": 0.6943619251251221, "learning_rate": 2e-06, "loss": 0.1697, "step": 4360 }, { "epoch": 1.6266318537859008, "grad_norm": 1.046207308769226, "learning_rate": 2e-06, "loss": 0.168, "step": 4361 }, { "epoch": 1.6270048489369637, "grad_norm": 1.0066949129104614, "learning_rate": 2e-06, "loss": 0.1736, "step": 4362 }, { "epoch": 1.6273778440880269, "grad_norm": 0.7832383513450623, "learning_rate": 2e-06, "loss": 0.1769, "step": 4363 }, { "epoch": 1.62775083923909, "grad_norm": 0.785958468914032, "learning_rate": 2e-06, "loss": 0.1647, "step": 4364 }, { "epoch": 1.628123834390153, "grad_norm": 0.7718594074249268, "learning_rate": 2e-06, "loss": 0.1754, "step": 4365 }, { "epoch": 1.6284968295412159, "grad_norm": 0.7607621550559998, "learning_rate": 2e-06, "loss": 0.173, "step": 4366 }, { "epoch": 1.628869824692279, "grad_norm": 1.0219069719314575, "learning_rate": 2e-06, "loss": 0.157, "step": 4367 }, { "epoch": 1.6292428198433422, "grad_norm": 0.8709717988967896, "learning_rate": 2e-06, "loss": 0.1774, "step": 4368 }, { "epoch": 1.629615814994405, "grad_norm": 0.8913688659667969, "learning_rate": 2e-06, "loss": 0.1842, "step": 4369 }, { "epoch": 1.629988810145468, "grad_norm": 0.6512132287025452, "learning_rate": 2e-06, "loss": 0.1839, "step": 4370 }, { "epoch": 1.6303618052965312, "grad_norm": 1.5993688106536865, "learning_rate": 2e-06, "loss": 0.1839, "step": 4371 }, { "epoch": 1.6307348004475943, "grad_norm": 0.8392807841300964, "learning_rate": 2e-06, "loss": 0.1821, "step": 4372 }, { "epoch": 1.6311077955986573, "grad_norm": 0.7421169877052307, "learning_rate": 2e-06, "loss": 0.1784, "step": 4373 }, { "epoch": 1.6314807907497202, "grad_norm": 0.8375647664070129, "learning_rate": 2e-06, "loss": 0.16, "step": 4374 }, { "epoch": 1.6318537859007833, "grad_norm": 0.7111315727233887, "learning_rate": 2e-06, "loss": 0.1664, "step": 4375 }, { "epoch": 1.6322267810518465, "grad_norm": 0.889406681060791, "learning_rate": 2e-06, "loss": 0.186, "step": 4376 }, { "epoch": 1.6325997762029094, "grad_norm": 0.8813725113868713, "learning_rate": 2e-06, "loss": 0.2013, "step": 4377 }, { "epoch": 1.6329727713539723, "grad_norm": 0.9086058735847473, "learning_rate": 2e-06, "loss": 0.1583, "step": 4378 }, { "epoch": 1.6333457665050353, "grad_norm": 0.8715556263923645, "learning_rate": 2e-06, "loss": 0.1828, "step": 4379 }, { "epoch": 1.6337187616560984, "grad_norm": 0.8489289879798889, "learning_rate": 2e-06, "loss": 0.1796, "step": 4380 }, { "epoch": 1.6340917568071616, "grad_norm": 0.8262899518013, "learning_rate": 2e-06, "loss": 0.1946, "step": 4381 }, { "epoch": 1.6344647519582245, "grad_norm": 0.7584642171859741, "learning_rate": 2e-06, "loss": 0.187, "step": 4382 }, { "epoch": 1.6348377471092874, "grad_norm": 0.8772071599960327, "learning_rate": 2e-06, "loss": 0.1615, "step": 4383 }, { "epoch": 1.6352107422603506, "grad_norm": 0.852628231048584, "learning_rate": 2e-06, "loss": 0.174, "step": 4384 }, { "epoch": 1.6355837374114137, "grad_norm": 0.7541442513465881, "learning_rate": 2e-06, "loss": 0.175, "step": 4385 }, { "epoch": 1.6359567325624766, "grad_norm": 0.8393694162368774, "learning_rate": 2e-06, "loss": 0.1557, "step": 4386 }, { "epoch": 1.6363297277135396, "grad_norm": 0.7746807336807251, "learning_rate": 2e-06, "loss": 0.1769, "step": 4387 }, { "epoch": 1.6367027228646027, "grad_norm": 0.8963153958320618, "learning_rate": 2e-06, "loss": 0.1892, "step": 4388 }, { "epoch": 1.6370757180156659, "grad_norm": 0.9870458245277405, "learning_rate": 2e-06, "loss": 0.1556, "step": 4389 }, { "epoch": 1.6374487131667288, "grad_norm": 0.9248251914978027, "learning_rate": 2e-06, "loss": 0.1761, "step": 4390 }, { "epoch": 1.6378217083177917, "grad_norm": 0.8071631789207458, "learning_rate": 2e-06, "loss": 0.1827, "step": 4391 }, { "epoch": 1.6381947034688549, "grad_norm": 0.8157825469970703, "learning_rate": 2e-06, "loss": 0.1835, "step": 4392 }, { "epoch": 1.638567698619918, "grad_norm": 0.8973947763442993, "learning_rate": 2e-06, "loss": 0.1886, "step": 4393 }, { "epoch": 1.638940693770981, "grad_norm": 0.8940775394439697, "learning_rate": 2e-06, "loss": 0.2037, "step": 4394 }, { "epoch": 1.6393136889220439, "grad_norm": 0.868441641330719, "learning_rate": 2e-06, "loss": 0.1741, "step": 4395 }, { "epoch": 1.639686684073107, "grad_norm": 0.8700555562973022, "learning_rate": 2e-06, "loss": 0.154, "step": 4396 }, { "epoch": 1.6400596792241702, "grad_norm": 0.7920747995376587, "learning_rate": 2e-06, "loss": 0.2065, "step": 4397 }, { "epoch": 1.640432674375233, "grad_norm": 0.984136164188385, "learning_rate": 2e-06, "loss": 0.1533, "step": 4398 }, { "epoch": 1.640805669526296, "grad_norm": 0.9366825819015503, "learning_rate": 2e-06, "loss": 0.1713, "step": 4399 }, { "epoch": 1.6411786646773592, "grad_norm": 0.8147644400596619, "learning_rate": 2e-06, "loss": 0.182, "step": 4400 }, { "epoch": 1.6415516598284223, "grad_norm": 0.847450852394104, "learning_rate": 2e-06, "loss": 0.1983, "step": 4401 }, { "epoch": 1.6419246549794853, "grad_norm": 0.8195078372955322, "learning_rate": 2e-06, "loss": 0.1863, "step": 4402 }, { "epoch": 1.6422976501305482, "grad_norm": 0.8404159545898438, "learning_rate": 2e-06, "loss": 0.1684, "step": 4403 }, { "epoch": 1.6426706452816113, "grad_norm": 0.8589555621147156, "learning_rate": 2e-06, "loss": 0.1574, "step": 4404 }, { "epoch": 1.6430436404326745, "grad_norm": 0.8383800387382507, "learning_rate": 2e-06, "loss": 0.1452, "step": 4405 }, { "epoch": 1.6434166355837374, "grad_norm": 0.9944324493408203, "learning_rate": 2e-06, "loss": 0.1598, "step": 4406 }, { "epoch": 1.6437896307348003, "grad_norm": 0.888216495513916, "learning_rate": 2e-06, "loss": 0.1838, "step": 4407 }, { "epoch": 1.6441626258858635, "grad_norm": 0.8049572706222534, "learning_rate": 2e-06, "loss": 0.1902, "step": 4408 }, { "epoch": 1.6445356210369266, "grad_norm": 0.8452509045600891, "learning_rate": 2e-06, "loss": 0.1786, "step": 4409 }, { "epoch": 1.6449086161879896, "grad_norm": 0.7759578227996826, "learning_rate": 2e-06, "loss": 0.154, "step": 4410 }, { "epoch": 1.6452816113390525, "grad_norm": 0.9143553376197815, "learning_rate": 2e-06, "loss": 0.1892, "step": 4411 }, { "epoch": 1.6456546064901156, "grad_norm": 0.7323988080024719, "learning_rate": 2e-06, "loss": 0.1962, "step": 4412 }, { "epoch": 1.6460276016411788, "grad_norm": 0.776404082775116, "learning_rate": 2e-06, "loss": 0.1998, "step": 4413 }, { "epoch": 1.6464005967922417, "grad_norm": 0.7980219125747681, "learning_rate": 2e-06, "loss": 0.1459, "step": 4414 }, { "epoch": 1.6467735919433046, "grad_norm": 0.7967458367347717, "learning_rate": 2e-06, "loss": 0.1818, "step": 4415 }, { "epoch": 1.6471465870943678, "grad_norm": 0.9949086904525757, "learning_rate": 2e-06, "loss": 0.2045, "step": 4416 }, { "epoch": 1.647519582245431, "grad_norm": 0.7620731592178345, "learning_rate": 2e-06, "loss": 0.1648, "step": 4417 }, { "epoch": 1.6478925773964939, "grad_norm": 0.8594008088111877, "learning_rate": 2e-06, "loss": 0.2051, "step": 4418 }, { "epoch": 1.6482655725475568, "grad_norm": 0.8209002614021301, "learning_rate": 2e-06, "loss": 0.1614, "step": 4419 }, { "epoch": 1.64863856769862, "grad_norm": 0.8702422380447388, "learning_rate": 2e-06, "loss": 0.1713, "step": 4420 }, { "epoch": 1.649011562849683, "grad_norm": 0.7059189677238464, "learning_rate": 2e-06, "loss": 0.1845, "step": 4421 }, { "epoch": 1.649384558000746, "grad_norm": 0.8209080100059509, "learning_rate": 2e-06, "loss": 0.1673, "step": 4422 }, { "epoch": 1.649757553151809, "grad_norm": 0.8208640813827515, "learning_rate": 2e-06, "loss": 0.1714, "step": 4423 }, { "epoch": 1.650130548302872, "grad_norm": 0.8005861639976501, "learning_rate": 2e-06, "loss": 0.1476, "step": 4424 }, { "epoch": 1.6505035434539352, "grad_norm": 0.7953968644142151, "learning_rate": 2e-06, "loss": 0.1551, "step": 4425 }, { "epoch": 1.6508765386049982, "grad_norm": 0.8134786486625671, "learning_rate": 2e-06, "loss": 0.2033, "step": 4426 }, { "epoch": 1.651249533756061, "grad_norm": 0.979403555393219, "learning_rate": 2e-06, "loss": 0.158, "step": 4427 }, { "epoch": 1.6516225289071242, "grad_norm": 0.689964771270752, "learning_rate": 2e-06, "loss": 0.2141, "step": 4428 }, { "epoch": 1.6519955240581874, "grad_norm": 1.0099163055419922, "learning_rate": 2e-06, "loss": 0.2016, "step": 4429 }, { "epoch": 1.6523685192092503, "grad_norm": 0.7711634635925293, "learning_rate": 2e-06, "loss": 0.1922, "step": 4430 }, { "epoch": 1.6527415143603132, "grad_norm": 1.098665475845337, "learning_rate": 2e-06, "loss": 0.1897, "step": 4431 }, { "epoch": 1.6531145095113764, "grad_norm": 0.752470850944519, "learning_rate": 2e-06, "loss": 0.2122, "step": 4432 }, { "epoch": 1.6534875046624395, "grad_norm": 0.7344213724136353, "learning_rate": 2e-06, "loss": 0.177, "step": 4433 }, { "epoch": 1.6538604998135025, "grad_norm": 0.9020958542823792, "learning_rate": 2e-06, "loss": 0.1795, "step": 4434 }, { "epoch": 1.6542334949645654, "grad_norm": 0.7775170803070068, "learning_rate": 2e-06, "loss": 0.1716, "step": 4435 }, { "epoch": 1.6546064901156285, "grad_norm": 0.8180723190307617, "learning_rate": 2e-06, "loss": 0.1889, "step": 4436 }, { "epoch": 1.6549794852666917, "grad_norm": 0.7936978936195374, "learning_rate": 2e-06, "loss": 0.1631, "step": 4437 }, { "epoch": 1.6553524804177546, "grad_norm": 0.9265734553337097, "learning_rate": 2e-06, "loss": 0.1806, "step": 4438 }, { "epoch": 1.6557254755688176, "grad_norm": 0.8929697275161743, "learning_rate": 2e-06, "loss": 0.1857, "step": 4439 }, { "epoch": 1.6560984707198805, "grad_norm": 0.659045398235321, "learning_rate": 2e-06, "loss": 0.1898, "step": 4440 }, { "epoch": 1.6564714658709436, "grad_norm": 0.7845157980918884, "learning_rate": 2e-06, "loss": 0.1746, "step": 4441 }, { "epoch": 1.6568444610220068, "grad_norm": 0.7323185801506042, "learning_rate": 2e-06, "loss": 0.1596, "step": 4442 }, { "epoch": 1.6572174561730697, "grad_norm": 1.0137544870376587, "learning_rate": 2e-06, "loss": 0.1799, "step": 4443 }, { "epoch": 1.6575904513241326, "grad_norm": 0.8316275477409363, "learning_rate": 2e-06, "loss": 0.1961, "step": 4444 }, { "epoch": 1.6579634464751958, "grad_norm": 0.8301703333854675, "learning_rate": 2e-06, "loss": 0.1697, "step": 4445 }, { "epoch": 1.658336441626259, "grad_norm": 0.7497019171714783, "learning_rate": 2e-06, "loss": 0.2001, "step": 4446 }, { "epoch": 1.6587094367773219, "grad_norm": 0.7632739543914795, "learning_rate": 2e-06, "loss": 0.1926, "step": 4447 }, { "epoch": 1.6590824319283848, "grad_norm": 0.7777487635612488, "learning_rate": 2e-06, "loss": 0.1884, "step": 4448 }, { "epoch": 1.659455427079448, "grad_norm": 0.8013160824775696, "learning_rate": 2e-06, "loss": 0.1615, "step": 4449 }, { "epoch": 1.659828422230511, "grad_norm": 0.7835131883621216, "learning_rate": 2e-06, "loss": 0.1921, "step": 4450 }, { "epoch": 1.660201417381574, "grad_norm": 0.778343915939331, "learning_rate": 2e-06, "loss": 0.1789, "step": 4451 }, { "epoch": 1.660574412532637, "grad_norm": 0.7589355707168579, "learning_rate": 2e-06, "loss": 0.1912, "step": 4452 }, { "epoch": 1.6609474076837, "grad_norm": 0.9490116834640503, "learning_rate": 2e-06, "loss": 0.1818, "step": 4453 }, { "epoch": 1.6613204028347632, "grad_norm": 0.7136086225509644, "learning_rate": 2e-06, "loss": 0.1861, "step": 4454 }, { "epoch": 1.6616933979858262, "grad_norm": 0.8387042284011841, "learning_rate": 2e-06, "loss": 0.182, "step": 4455 }, { "epoch": 1.662066393136889, "grad_norm": 0.7731079459190369, "learning_rate": 2e-06, "loss": 0.177, "step": 4456 }, { "epoch": 1.6624393882879522, "grad_norm": 0.9516328573226929, "learning_rate": 2e-06, "loss": 0.1456, "step": 4457 }, { "epoch": 1.6628123834390154, "grad_norm": 0.8131281137466431, "learning_rate": 2e-06, "loss": 0.1634, "step": 4458 }, { "epoch": 1.6631853785900783, "grad_norm": 0.8524467945098877, "learning_rate": 2e-06, "loss": 0.1807, "step": 4459 }, { "epoch": 1.6635583737411412, "grad_norm": 0.8568451404571533, "learning_rate": 2e-06, "loss": 0.1771, "step": 4460 }, { "epoch": 1.6639313688922044, "grad_norm": 1.0497304201126099, "learning_rate": 2e-06, "loss": 0.1585, "step": 4461 }, { "epoch": 1.6643043640432675, "grad_norm": 0.7996121048927307, "learning_rate": 2e-06, "loss": 0.1778, "step": 4462 }, { "epoch": 1.6646773591943305, "grad_norm": 0.8584092855453491, "learning_rate": 2e-06, "loss": 0.1703, "step": 4463 }, { "epoch": 1.6650503543453934, "grad_norm": 0.8358001112937927, "learning_rate": 2e-06, "loss": 0.1894, "step": 4464 }, { "epoch": 1.6654233494964565, "grad_norm": 0.9176812171936035, "learning_rate": 2e-06, "loss": 0.1743, "step": 4465 }, { "epoch": 1.6657963446475197, "grad_norm": 0.9725039601325989, "learning_rate": 2e-06, "loss": 0.1982, "step": 4466 }, { "epoch": 1.6661693397985826, "grad_norm": 0.834955096244812, "learning_rate": 2e-06, "loss": 0.1823, "step": 4467 }, { "epoch": 1.6665423349496455, "grad_norm": 0.7313769459724426, "learning_rate": 2e-06, "loss": 0.1743, "step": 4468 }, { "epoch": 1.6669153301007087, "grad_norm": 0.7799262404441833, "learning_rate": 2e-06, "loss": 0.1592, "step": 4469 }, { "epoch": 1.6672883252517718, "grad_norm": 0.6716504693031311, "learning_rate": 2e-06, "loss": 0.193, "step": 4470 }, { "epoch": 1.6676613204028348, "grad_norm": 0.9079967141151428, "learning_rate": 2e-06, "loss": 0.1648, "step": 4471 }, { "epoch": 1.6680343155538977, "grad_norm": 0.9285648465156555, "learning_rate": 2e-06, "loss": 0.1946, "step": 4472 }, { "epoch": 1.6684073107049608, "grad_norm": 0.7596743106842041, "learning_rate": 2e-06, "loss": 0.1759, "step": 4473 }, { "epoch": 1.668780305856024, "grad_norm": 0.9490578770637512, "learning_rate": 2e-06, "loss": 0.1841, "step": 4474 }, { "epoch": 1.669153301007087, "grad_norm": 0.6973186731338501, "learning_rate": 2e-06, "loss": 0.198, "step": 4475 }, { "epoch": 1.6695262961581498, "grad_norm": 0.7219462990760803, "learning_rate": 2e-06, "loss": 0.1861, "step": 4476 }, { "epoch": 1.669899291309213, "grad_norm": 0.9459484219551086, "learning_rate": 2e-06, "loss": 0.1816, "step": 4477 }, { "epoch": 1.6702722864602761, "grad_norm": 0.7649198770523071, "learning_rate": 2e-06, "loss": 0.1776, "step": 4478 }, { "epoch": 1.670645281611339, "grad_norm": 0.8370360732078552, "learning_rate": 2e-06, "loss": 0.1942, "step": 4479 }, { "epoch": 1.671018276762402, "grad_norm": 0.8398522734642029, "learning_rate": 2e-06, "loss": 0.1492, "step": 4480 }, { "epoch": 1.6713912719134651, "grad_norm": 0.8528311848640442, "learning_rate": 2e-06, "loss": 0.1829, "step": 4481 }, { "epoch": 1.6717642670645283, "grad_norm": 0.8527035713195801, "learning_rate": 2e-06, "loss": 0.162, "step": 4482 }, { "epoch": 1.6721372622155912, "grad_norm": 0.754700779914856, "learning_rate": 2e-06, "loss": 0.1841, "step": 4483 }, { "epoch": 1.6725102573666542, "grad_norm": 0.7440940737724304, "learning_rate": 2e-06, "loss": 0.1817, "step": 4484 }, { "epoch": 1.6728832525177173, "grad_norm": 0.7443313002586365, "learning_rate": 2e-06, "loss": 0.1893, "step": 4485 }, { "epoch": 1.6732562476687804, "grad_norm": 0.9470669627189636, "learning_rate": 2e-06, "loss": 0.1695, "step": 4486 }, { "epoch": 1.6736292428198434, "grad_norm": 0.9014724493026733, "learning_rate": 2e-06, "loss": 0.1577, "step": 4487 }, { "epoch": 1.6740022379709063, "grad_norm": 0.6782959699630737, "learning_rate": 2e-06, "loss": 0.1798, "step": 4488 }, { "epoch": 1.6743752331219695, "grad_norm": 0.6929476857185364, "learning_rate": 2e-06, "loss": 0.1785, "step": 4489 }, { "epoch": 1.6747482282730326, "grad_norm": 0.8314085006713867, "learning_rate": 2e-06, "loss": 0.1941, "step": 4490 }, { "epoch": 1.6751212234240955, "grad_norm": 0.8586918115615845, "learning_rate": 2e-06, "loss": 0.1534, "step": 4491 }, { "epoch": 1.6754942185751585, "grad_norm": 0.8705965280532837, "learning_rate": 2e-06, "loss": 0.1693, "step": 4492 }, { "epoch": 1.6758672137262216, "grad_norm": 0.825709342956543, "learning_rate": 2e-06, "loss": 0.1998, "step": 4493 }, { "epoch": 1.6762402088772848, "grad_norm": 0.8006658554077148, "learning_rate": 2e-06, "loss": 0.1862, "step": 4494 }, { "epoch": 1.6766132040283477, "grad_norm": 1.042004108428955, "learning_rate": 2e-06, "loss": 0.17, "step": 4495 }, { "epoch": 1.6769861991794106, "grad_norm": 0.8554074168205261, "learning_rate": 2e-06, "loss": 0.1777, "step": 4496 }, { "epoch": 1.6773591943304738, "grad_norm": 0.7592594027519226, "learning_rate": 2e-06, "loss": 0.1749, "step": 4497 }, { "epoch": 1.677732189481537, "grad_norm": 0.763516366481781, "learning_rate": 2e-06, "loss": 0.1892, "step": 4498 }, { "epoch": 1.6781051846325998, "grad_norm": 0.8051284551620483, "learning_rate": 2e-06, "loss": 0.1842, "step": 4499 }, { "epoch": 1.6784781797836628, "grad_norm": 0.8488687872886658, "learning_rate": 2e-06, "loss": 0.1495, "step": 4500 } ], "logging_steps": 1.0, "max_steps": 5362, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2786025402073088.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }