|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 966, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003105590062111801, |
|
"grad_norm": 60.46764647043296, |
|
"learning_rate": 5.154639175257732e-07, |
|
"loss": 11.059, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006211180124223602, |
|
"grad_norm": 60.76414526284177, |
|
"learning_rate": 1.0309278350515464e-06, |
|
"loss": 11.012, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009316770186335404, |
|
"grad_norm": 59.20725802433676, |
|
"learning_rate": 1.5463917525773197e-06, |
|
"loss": 11.1319, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012422360248447204, |
|
"grad_norm": 60.61459028364047, |
|
"learning_rate": 2.061855670103093e-06, |
|
"loss": 11.03, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015527950310559006, |
|
"grad_norm": 64.67058884756766, |
|
"learning_rate": 2.577319587628866e-06, |
|
"loss": 10.8306, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018633540372670808, |
|
"grad_norm": 70.79205232466896, |
|
"learning_rate": 3.0927835051546395e-06, |
|
"loss": 10.6598, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.021739130434782608, |
|
"grad_norm": 98.53755227750479, |
|
"learning_rate": 3.608247422680412e-06, |
|
"loss": 9.4929, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.024844720496894408, |
|
"grad_norm": 111.40861108613349, |
|
"learning_rate": 4.123711340206186e-06, |
|
"loss": 9.0865, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.027950310559006212, |
|
"grad_norm": 127.60092525337744, |
|
"learning_rate": 4.639175257731959e-06, |
|
"loss": 8.316, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.031055900621118012, |
|
"grad_norm": 57.38601512479266, |
|
"learning_rate": 5.154639175257732e-06, |
|
"loss": 3.5203, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.034161490683229816, |
|
"grad_norm": 40.087017160526905, |
|
"learning_rate": 5.670103092783505e-06, |
|
"loss": 2.5941, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.037267080745341616, |
|
"grad_norm": 36.371937459465364, |
|
"learning_rate": 6.185567010309279e-06, |
|
"loss": 2.3632, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.040372670807453416, |
|
"grad_norm": 5.902085049785153, |
|
"learning_rate": 6.701030927835052e-06, |
|
"loss": 1.3097, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 4.395696518971964, |
|
"learning_rate": 7.216494845360824e-06, |
|
"loss": 1.2339, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.046583850931677016, |
|
"grad_norm": 3.1604888794138524, |
|
"learning_rate": 7.731958762886599e-06, |
|
"loss": 1.1463, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.049689440993788817, |
|
"grad_norm": 2.399444834184007, |
|
"learning_rate": 8.247422680412371e-06, |
|
"loss": 1.0723, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.052795031055900624, |
|
"grad_norm": 1.5487302596438641, |
|
"learning_rate": 8.762886597938144e-06, |
|
"loss": 0.9467, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.055900621118012424, |
|
"grad_norm": 80.99605354011946, |
|
"learning_rate": 9.278350515463918e-06, |
|
"loss": 0.9669, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.059006211180124224, |
|
"grad_norm": 35.062984197987575, |
|
"learning_rate": 9.793814432989691e-06, |
|
"loss": 0.8903, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.062111801242236024, |
|
"grad_norm": 1.7039403556284178, |
|
"learning_rate": 1.0309278350515464e-05, |
|
"loss": 0.8611, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06521739130434782, |
|
"grad_norm": 1.1286556630596418, |
|
"learning_rate": 1.0824742268041238e-05, |
|
"loss": 0.7956, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06832298136645963, |
|
"grad_norm": 0.8885421752095347, |
|
"learning_rate": 1.134020618556701e-05, |
|
"loss": 0.7944, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 0.7771123311111944, |
|
"learning_rate": 1.1855670103092783e-05, |
|
"loss": 0.7888, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07453416149068323, |
|
"grad_norm": 0.8290301807562498, |
|
"learning_rate": 1.2371134020618558e-05, |
|
"loss": 0.7524, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07763975155279502, |
|
"grad_norm": 0.9178800987434453, |
|
"learning_rate": 1.2886597938144329e-05, |
|
"loss": 0.7276, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08074534161490683, |
|
"grad_norm": 0.7280169831391284, |
|
"learning_rate": 1.3402061855670103e-05, |
|
"loss": 0.7049, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08385093167701864, |
|
"grad_norm": 0.590489381671068, |
|
"learning_rate": 1.3917525773195878e-05, |
|
"loss": 0.6846, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 0.6956647472162396, |
|
"learning_rate": 1.4432989690721649e-05, |
|
"loss": 0.6518, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.09006211180124224, |
|
"grad_norm": 0.7233291715436561, |
|
"learning_rate": 1.4948453608247423e-05, |
|
"loss": 0.6575, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09316770186335403, |
|
"grad_norm": 0.6557463930133224, |
|
"learning_rate": 1.5463917525773197e-05, |
|
"loss": 0.6648, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09627329192546584, |
|
"grad_norm": 0.5940038812473861, |
|
"learning_rate": 1.5979381443298968e-05, |
|
"loss": 0.6414, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09937888198757763, |
|
"grad_norm": 0.514015705745489, |
|
"learning_rate": 1.6494845360824743e-05, |
|
"loss": 0.6178, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.10248447204968944, |
|
"grad_norm": 0.5977361497140969, |
|
"learning_rate": 1.7010309278350517e-05, |
|
"loss": 0.6216, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10559006211180125, |
|
"grad_norm": 0.5377773183845758, |
|
"learning_rate": 1.7525773195876288e-05, |
|
"loss": 0.6195, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10869565217391304, |
|
"grad_norm": 0.40152764208172104, |
|
"learning_rate": 1.8041237113402062e-05, |
|
"loss": 0.5758, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11180124223602485, |
|
"grad_norm": 0.40244189444549017, |
|
"learning_rate": 1.8556701030927837e-05, |
|
"loss": 0.6178, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11490683229813664, |
|
"grad_norm": 0.49886656483811526, |
|
"learning_rate": 1.9072164948453608e-05, |
|
"loss": 0.6062, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11801242236024845, |
|
"grad_norm": 0.43178714425173426, |
|
"learning_rate": 1.9587628865979382e-05, |
|
"loss": 0.5929, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.12111801242236025, |
|
"grad_norm": 0.37953785852942284, |
|
"learning_rate": 2.0103092783505157e-05, |
|
"loss": 0.57, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12422360248447205, |
|
"grad_norm": 0.3712229743609745, |
|
"learning_rate": 2.0618556701030927e-05, |
|
"loss": 0.5812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12732919254658384, |
|
"grad_norm": 0.38350882873215847, |
|
"learning_rate": 2.1134020618556702e-05, |
|
"loss": 0.5714, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.13043478260869565, |
|
"grad_norm": 0.4036659557430701, |
|
"learning_rate": 2.1649484536082476e-05, |
|
"loss": 0.5813, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.13354037267080746, |
|
"grad_norm": 0.33097703493186653, |
|
"learning_rate": 2.2164948453608247e-05, |
|
"loss": 0.5537, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13664596273291926, |
|
"grad_norm": 0.339069211939581, |
|
"learning_rate": 2.268041237113402e-05, |
|
"loss": 0.57, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13975155279503104, |
|
"grad_norm": 0.34080115423530455, |
|
"learning_rate": 2.3195876288659796e-05, |
|
"loss": 0.5434, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.3309319915944845, |
|
"learning_rate": 2.3711340206185567e-05, |
|
"loss": 0.5436, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14596273291925466, |
|
"grad_norm": 0.36258498646852527, |
|
"learning_rate": 2.422680412371134e-05, |
|
"loss": 0.5372, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14906832298136646, |
|
"grad_norm": 0.3289309195150263, |
|
"learning_rate": 2.4742268041237116e-05, |
|
"loss": 0.5519, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.15217391304347827, |
|
"grad_norm": 0.29200888913110107, |
|
"learning_rate": 2.5257731958762887e-05, |
|
"loss": 0.5269, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15527950310559005, |
|
"grad_norm": 0.2913726775318078, |
|
"learning_rate": 2.5773195876288658e-05, |
|
"loss": 0.5398, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15838509316770186, |
|
"grad_norm": 0.36183923103400334, |
|
"learning_rate": 2.6288659793814435e-05, |
|
"loss": 0.5313, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.16149068322981366, |
|
"grad_norm": 0.289832432081365, |
|
"learning_rate": 2.6804123711340206e-05, |
|
"loss": 0.5294, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.16459627329192547, |
|
"grad_norm": 0.28159321988499836, |
|
"learning_rate": 2.7319587628865977e-05, |
|
"loss": 0.5102, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16770186335403728, |
|
"grad_norm": 0.33289230730425107, |
|
"learning_rate": 2.7835051546391755e-05, |
|
"loss": 0.5325, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.17080745341614906, |
|
"grad_norm": 0.2711500030362234, |
|
"learning_rate": 2.8350515463917526e-05, |
|
"loss": 0.5203, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 0.2675188946251961, |
|
"learning_rate": 2.8865979381443297e-05, |
|
"loss": 0.5224, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17701863354037267, |
|
"grad_norm": 0.26579895922328955, |
|
"learning_rate": 2.9381443298969075e-05, |
|
"loss": 0.5294, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.18012422360248448, |
|
"grad_norm": 0.2489797381353846, |
|
"learning_rate": 2.9896907216494846e-05, |
|
"loss": 0.5111, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.18322981366459629, |
|
"grad_norm": 0.26304984714934, |
|
"learning_rate": 3.0412371134020617e-05, |
|
"loss": 0.5063, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.18633540372670807, |
|
"grad_norm": 0.29536218486713367, |
|
"learning_rate": 3.0927835051546395e-05, |
|
"loss": 0.5278, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18944099378881987, |
|
"grad_norm": 0.2629951129122119, |
|
"learning_rate": 3.1443298969072166e-05, |
|
"loss": 0.5066, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.19254658385093168, |
|
"grad_norm": 0.324166573780215, |
|
"learning_rate": 3.1958762886597937e-05, |
|
"loss": 0.5054, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1956521739130435, |
|
"grad_norm": 0.2729720585938641, |
|
"learning_rate": 3.2474226804123714e-05, |
|
"loss": 0.5142, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19875776397515527, |
|
"grad_norm": 0.27422169347085695, |
|
"learning_rate": 3.2989690721649485e-05, |
|
"loss": 0.5119, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.20186335403726707, |
|
"grad_norm": 0.26064941279629095, |
|
"learning_rate": 3.3505154639175256e-05, |
|
"loss": 0.5037, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20496894409937888, |
|
"grad_norm": 0.2589323970095713, |
|
"learning_rate": 3.4020618556701034e-05, |
|
"loss": 0.5181, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2080745341614907, |
|
"grad_norm": 0.2795495681392583, |
|
"learning_rate": 3.4536082474226805e-05, |
|
"loss": 0.5006, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2111801242236025, |
|
"grad_norm": 0.2785747261533415, |
|
"learning_rate": 3.5051546391752576e-05, |
|
"loss": 0.483, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 0.25302395243466885, |
|
"learning_rate": 3.5567010309278354e-05, |
|
"loss": 0.4883, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 0.2752883227640764, |
|
"learning_rate": 3.6082474226804125e-05, |
|
"loss": 0.5094, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2204968944099379, |
|
"grad_norm": 0.3024166121222451, |
|
"learning_rate": 3.6597938144329896e-05, |
|
"loss": 0.4881, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2236024844720497, |
|
"grad_norm": 0.3097948500444575, |
|
"learning_rate": 3.7113402061855674e-05, |
|
"loss": 0.4839, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2267080745341615, |
|
"grad_norm": 0.2876918544530116, |
|
"learning_rate": 3.7628865979381445e-05, |
|
"loss": 0.5144, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22981366459627328, |
|
"grad_norm": 0.3416229447277982, |
|
"learning_rate": 3.8144329896907216e-05, |
|
"loss": 0.4961, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2329192546583851, |
|
"grad_norm": 0.3199113220311117, |
|
"learning_rate": 3.865979381443299e-05, |
|
"loss": 0.473, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2360248447204969, |
|
"grad_norm": 0.3005248837372916, |
|
"learning_rate": 3.9175257731958764e-05, |
|
"loss": 0.4869, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2391304347826087, |
|
"grad_norm": 0.3020219962118337, |
|
"learning_rate": 3.9690721649484535e-05, |
|
"loss": 0.5047, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2422360248447205, |
|
"grad_norm": 0.29698825337519646, |
|
"learning_rate": 4.020618556701031e-05, |
|
"loss": 0.5022, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2453416149068323, |
|
"grad_norm": 0.3021333930392965, |
|
"learning_rate": 4.0721649484536084e-05, |
|
"loss": 0.4866, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 0.29250713103592757, |
|
"learning_rate": 4.1237113402061855e-05, |
|
"loss": 0.4896, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2515527950310559, |
|
"grad_norm": 0.27724800469538824, |
|
"learning_rate": 4.175257731958763e-05, |
|
"loss": 0.4836, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2546583850931677, |
|
"grad_norm": 0.3272751041798097, |
|
"learning_rate": 4.2268041237113404e-05, |
|
"loss": 0.5079, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2577639751552795, |
|
"grad_norm": 0.2875779003405876, |
|
"learning_rate": 4.2783505154639175e-05, |
|
"loss": 0.4822, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 0.351548025457743, |
|
"learning_rate": 4.329896907216495e-05, |
|
"loss": 0.4719, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2639751552795031, |
|
"grad_norm": 0.3104421632805538, |
|
"learning_rate": 4.3814432989690723e-05, |
|
"loss": 0.4985, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2670807453416149, |
|
"grad_norm": 0.29340838316836443, |
|
"learning_rate": 4.4329896907216494e-05, |
|
"loss": 0.4506, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2701863354037267, |
|
"grad_norm": 0.31888072280932184, |
|
"learning_rate": 4.484536082474227e-05, |
|
"loss": 0.4718, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2732919254658385, |
|
"grad_norm": 0.2881905604568596, |
|
"learning_rate": 4.536082474226804e-05, |
|
"loss": 0.4718, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.27639751552795033, |
|
"grad_norm": 0.382391969622348, |
|
"learning_rate": 4.5876288659793814e-05, |
|
"loss": 0.489, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2795031055900621, |
|
"grad_norm": 0.28677795566141734, |
|
"learning_rate": 4.639175257731959e-05, |
|
"loss": 0.4625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2826086956521739, |
|
"grad_norm": 0.44192895579293406, |
|
"learning_rate": 4.690721649484536e-05, |
|
"loss": 0.4901, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.36788344235249887, |
|
"learning_rate": 4.7422680412371134e-05, |
|
"loss": 0.4682, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2888198757763975, |
|
"grad_norm": 0.5143785581301379, |
|
"learning_rate": 4.793814432989691e-05, |
|
"loss": 0.4748, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2919254658385093, |
|
"grad_norm": 0.3714484820764116, |
|
"learning_rate": 4.845360824742268e-05, |
|
"loss": 0.4733, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2950310559006211, |
|
"grad_norm": 0.4411279949864707, |
|
"learning_rate": 4.8969072164948454e-05, |
|
"loss": 0.4719, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2981366459627329, |
|
"grad_norm": 0.4095900221196949, |
|
"learning_rate": 4.948453608247423e-05, |
|
"loss": 0.4679, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.30124223602484473, |
|
"grad_norm": 0.3876387401039132, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4727, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.30434782608695654, |
|
"grad_norm": 0.35671507475673714, |
|
"learning_rate": 4.994246260069045e-05, |
|
"loss": 0.4582, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.30745341614906835, |
|
"grad_norm": 0.40457113215141677, |
|
"learning_rate": 4.98849252013809e-05, |
|
"loss": 0.4817, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3105590062111801, |
|
"grad_norm": 0.40014058749708475, |
|
"learning_rate": 4.982738780207135e-05, |
|
"loss": 0.4486, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3136645962732919, |
|
"grad_norm": 0.4870121731575367, |
|
"learning_rate": 4.97698504027618e-05, |
|
"loss": 0.4663, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3167701863354037, |
|
"grad_norm": 0.4340851079572886, |
|
"learning_rate": 4.9712313003452246e-05, |
|
"loss": 0.4484, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3198757763975155, |
|
"grad_norm": 0.35686684080021636, |
|
"learning_rate": 4.9654775604142695e-05, |
|
"loss": 0.467, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.32298136645962733, |
|
"grad_norm": 0.4494359291517841, |
|
"learning_rate": 4.9597238204833143e-05, |
|
"loss": 0.4694, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.32608695652173914, |
|
"grad_norm": 0.4372407930618466, |
|
"learning_rate": 4.953970080552359e-05, |
|
"loss": 0.4648, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.32919254658385094, |
|
"grad_norm": 0.34466736034003903, |
|
"learning_rate": 4.948216340621404e-05, |
|
"loss": 0.4444, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.33229813664596275, |
|
"grad_norm": 0.4001800803927703, |
|
"learning_rate": 4.942462600690449e-05, |
|
"loss": 0.464, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.33540372670807456, |
|
"grad_norm": 0.3577590335432523, |
|
"learning_rate": 4.936708860759494e-05, |
|
"loss": 0.4647, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3385093167701863, |
|
"grad_norm": 0.3827072494556767, |
|
"learning_rate": 4.930955120828539e-05, |
|
"loss": 0.4452, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3416149068322981, |
|
"grad_norm": 0.40554119841147346, |
|
"learning_rate": 4.9252013808975836e-05, |
|
"loss": 0.457, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3447204968944099, |
|
"grad_norm": 0.3980370218198526, |
|
"learning_rate": 4.9194476409666285e-05, |
|
"loss": 0.4566, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.38595447982147235, |
|
"learning_rate": 4.913693901035673e-05, |
|
"loss": 0.4436, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.35093167701863354, |
|
"grad_norm": 0.3335566121887473, |
|
"learning_rate": 4.907940161104718e-05, |
|
"loss": 0.4525, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.35403726708074534, |
|
"grad_norm": 0.44048069823182057, |
|
"learning_rate": 4.902186421173763e-05, |
|
"loss": 0.4775, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.3511836624614759, |
|
"learning_rate": 4.896432681242808e-05, |
|
"loss": 0.4529, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.36024844720496896, |
|
"grad_norm": 0.40512550088435406, |
|
"learning_rate": 4.890678941311853e-05, |
|
"loss": 0.4856, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.36335403726708076, |
|
"grad_norm": 0.4709820706303788, |
|
"learning_rate": 4.884925201380898e-05, |
|
"loss": 0.4613, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.36645962732919257, |
|
"grad_norm": 0.3163807878418199, |
|
"learning_rate": 4.8791714614499426e-05, |
|
"loss": 0.476, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3695652173913043, |
|
"grad_norm": 0.421853544537181, |
|
"learning_rate": 4.8734177215189874e-05, |
|
"loss": 0.4675, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.37267080745341613, |
|
"grad_norm": 0.37140388109626665, |
|
"learning_rate": 4.867663981588032e-05, |
|
"loss": 0.452, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37577639751552794, |
|
"grad_norm": 0.42352163355515543, |
|
"learning_rate": 4.861910241657077e-05, |
|
"loss": 0.4468, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.37888198757763975, |
|
"grad_norm": 0.4144419361914004, |
|
"learning_rate": 4.856156501726122e-05, |
|
"loss": 0.4526, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.38198757763975155, |
|
"grad_norm": 0.40675120816526916, |
|
"learning_rate": 4.850402761795167e-05, |
|
"loss": 0.4611, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.38509316770186336, |
|
"grad_norm": 0.5826147735025056, |
|
"learning_rate": 4.844649021864212e-05, |
|
"loss": 0.4803, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.38819875776397517, |
|
"grad_norm": 0.3282657199624206, |
|
"learning_rate": 4.838895281933257e-05, |
|
"loss": 0.4552, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 0.5159501988757971, |
|
"learning_rate": 4.8331415420023015e-05, |
|
"loss": 0.4794, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3944099378881988, |
|
"grad_norm": 0.3620503849683116, |
|
"learning_rate": 4.8273878020713464e-05, |
|
"loss": 0.4631, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.39751552795031053, |
|
"grad_norm": 0.4221189340341717, |
|
"learning_rate": 4.821634062140391e-05, |
|
"loss": 0.4696, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.40062111801242234, |
|
"grad_norm": 0.46423436394369083, |
|
"learning_rate": 4.815880322209436e-05, |
|
"loss": 0.4573, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.40372670807453415, |
|
"grad_norm": 0.4261777248289121, |
|
"learning_rate": 4.810126582278481e-05, |
|
"loss": 0.4608, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.40683229813664595, |
|
"grad_norm": 0.45519667338748365, |
|
"learning_rate": 4.804372842347526e-05, |
|
"loss": 0.4621, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.40993788819875776, |
|
"grad_norm": 0.4384463354130905, |
|
"learning_rate": 4.798619102416571e-05, |
|
"loss": 0.4656, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.41304347826086957, |
|
"grad_norm": 0.41199291319131776, |
|
"learning_rate": 4.7928653624856157e-05, |
|
"loss": 0.4535, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4161490683229814, |
|
"grad_norm": 0.3655597225332361, |
|
"learning_rate": 4.7871116225546605e-05, |
|
"loss": 0.4501, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.4192546583850932, |
|
"grad_norm": 0.44932133556116877, |
|
"learning_rate": 4.7813578826237054e-05, |
|
"loss": 0.4767, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.422360248447205, |
|
"grad_norm": 0.3329354062585348, |
|
"learning_rate": 4.77560414269275e-05, |
|
"loss": 0.4455, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4254658385093168, |
|
"grad_norm": 0.45152077511616723, |
|
"learning_rate": 4.769850402761795e-05, |
|
"loss": 0.4623, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.3188549796798649, |
|
"learning_rate": 4.76409666283084e-05, |
|
"loss": 0.4304, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.43167701863354035, |
|
"grad_norm": 0.39747649807961544, |
|
"learning_rate": 4.758342922899885e-05, |
|
"loss": 0.4486, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.2901963778324694, |
|
"learning_rate": 4.75258918296893e-05, |
|
"loss": 0.4485, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.43788819875776397, |
|
"grad_norm": 0.5357034478107343, |
|
"learning_rate": 4.7468354430379746e-05, |
|
"loss": 0.4773, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4409937888198758, |
|
"grad_norm": 0.3892373232000147, |
|
"learning_rate": 4.7410817031070195e-05, |
|
"loss": 0.4408, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4440993788819876, |
|
"grad_norm": 0.45033922342477917, |
|
"learning_rate": 4.7353279631760644e-05, |
|
"loss": 0.4598, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4472049689440994, |
|
"grad_norm": 0.37908550777510663, |
|
"learning_rate": 4.729574223245109e-05, |
|
"loss": 0.4452, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4503105590062112, |
|
"grad_norm": 0.4290373855109045, |
|
"learning_rate": 4.723820483314154e-05, |
|
"loss": 0.4536, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.453416149068323, |
|
"grad_norm": 0.35676947230487216, |
|
"learning_rate": 4.718066743383199e-05, |
|
"loss": 0.4648, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.45652173913043476, |
|
"grad_norm": 0.33636058827665144, |
|
"learning_rate": 4.712313003452244e-05, |
|
"loss": 0.444, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.45962732919254656, |
|
"grad_norm": 0.3823016634046083, |
|
"learning_rate": 4.706559263521289e-05, |
|
"loss": 0.4406, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.46273291925465837, |
|
"grad_norm": 0.3818789119419192, |
|
"learning_rate": 4.700805523590334e-05, |
|
"loss": 0.4488, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4658385093167702, |
|
"grad_norm": 0.33345974040131937, |
|
"learning_rate": 4.6950517836593785e-05, |
|
"loss": 0.4647, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.468944099378882, |
|
"grad_norm": 0.47073824185480967, |
|
"learning_rate": 4.689298043728424e-05, |
|
"loss": 0.4534, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4720496894409938, |
|
"grad_norm": 0.40070437909888434, |
|
"learning_rate": 4.683544303797468e-05, |
|
"loss": 0.4367, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4751552795031056, |
|
"grad_norm": 0.407305468388989, |
|
"learning_rate": 4.677790563866514e-05, |
|
"loss": 0.4415, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4782608695652174, |
|
"grad_norm": 0.4058611659098106, |
|
"learning_rate": 4.672036823935558e-05, |
|
"loss": 0.4576, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4813664596273292, |
|
"grad_norm": 0.3967515788115339, |
|
"learning_rate": 4.6662830840046035e-05, |
|
"loss": 0.4524, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.484472049689441, |
|
"grad_norm": 0.4407590164610378, |
|
"learning_rate": 4.660529344073648e-05, |
|
"loss": 0.457, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.48757763975155277, |
|
"grad_norm": 0.43880737794315955, |
|
"learning_rate": 4.654775604142693e-05, |
|
"loss": 0.4365, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4906832298136646, |
|
"grad_norm": 0.47864526006501984, |
|
"learning_rate": 4.6490218642117375e-05, |
|
"loss": 0.4479, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4937888198757764, |
|
"grad_norm": 0.4692672779398985, |
|
"learning_rate": 4.643268124280783e-05, |
|
"loss": 0.4546, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 0.4097305951007724, |
|
"learning_rate": 4.637514384349827e-05, |
|
"loss": 0.4355, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.43610759922666353, |
|
"learning_rate": 4.631760644418873e-05, |
|
"loss": 0.447, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.5031055900621118, |
|
"grad_norm": 0.2978982430601787, |
|
"learning_rate": 4.626006904487917e-05, |
|
"loss": 0.4524, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.5062111801242236, |
|
"grad_norm": 0.43653406806069966, |
|
"learning_rate": 4.6202531645569625e-05, |
|
"loss": 0.4171, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5093167701863354, |
|
"grad_norm": 0.40670821189566986, |
|
"learning_rate": 4.614499424626007e-05, |
|
"loss": 0.439, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.5124223602484472, |
|
"grad_norm": 0.33901355170318703, |
|
"learning_rate": 4.608745684695052e-05, |
|
"loss": 0.4461, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.515527950310559, |
|
"grad_norm": 0.43610331613751346, |
|
"learning_rate": 4.6029919447640965e-05, |
|
"loss": 0.4554, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5186335403726708, |
|
"grad_norm": 0.3625661313466411, |
|
"learning_rate": 4.597238204833142e-05, |
|
"loss": 0.4554, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.3394393399478139, |
|
"learning_rate": 4.591484464902186e-05, |
|
"loss": 0.4367, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5248447204968945, |
|
"grad_norm": 0.3588563348596153, |
|
"learning_rate": 4.585730724971232e-05, |
|
"loss": 0.4461, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5279503105590062, |
|
"grad_norm": 0.3802785353634964, |
|
"learning_rate": 4.579976985040276e-05, |
|
"loss": 0.4387, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.531055900621118, |
|
"grad_norm": 0.3869023722709017, |
|
"learning_rate": 4.5742232451093215e-05, |
|
"loss": 0.4528, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5341614906832298, |
|
"grad_norm": 0.36676418356051843, |
|
"learning_rate": 4.568469505178366e-05, |
|
"loss": 0.4348, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5372670807453416, |
|
"grad_norm": 0.46126816544453725, |
|
"learning_rate": 4.562715765247411e-05, |
|
"loss": 0.4231, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5403726708074534, |
|
"grad_norm": 0.35343634631539705, |
|
"learning_rate": 4.556962025316456e-05, |
|
"loss": 0.4369, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5434782608695652, |
|
"grad_norm": 0.4549103689048508, |
|
"learning_rate": 4.551208285385501e-05, |
|
"loss": 0.4387, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.546583850931677, |
|
"grad_norm": 0.4303714186336393, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.4546, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5496894409937888, |
|
"grad_norm": 0.4531267139678119, |
|
"learning_rate": 4.539700805523591e-05, |
|
"loss": 0.4356, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5527950310559007, |
|
"grad_norm": 0.42240540949166944, |
|
"learning_rate": 4.5339470655926356e-05, |
|
"loss": 0.4442, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5559006211180124, |
|
"grad_norm": 0.3163983623110262, |
|
"learning_rate": 4.5281933256616805e-05, |
|
"loss": 0.4255, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5590062111801242, |
|
"grad_norm": 0.37954620340652895, |
|
"learning_rate": 4.5224395857307253e-05, |
|
"loss": 0.4387, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.562111801242236, |
|
"grad_norm": 0.33565801845470367, |
|
"learning_rate": 4.51668584579977e-05, |
|
"loss": 0.4415, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5652173913043478, |
|
"grad_norm": 0.3349864414277053, |
|
"learning_rate": 4.510932105868815e-05, |
|
"loss": 0.4082, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5683229813664596, |
|
"grad_norm": 0.439294679014343, |
|
"learning_rate": 4.50517836593786e-05, |
|
"loss": 0.446, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.28999671538444516, |
|
"learning_rate": 4.499424626006905e-05, |
|
"loss": 0.4103, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5745341614906833, |
|
"grad_norm": 0.40660335920713986, |
|
"learning_rate": 4.49367088607595e-05, |
|
"loss": 0.444, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.577639751552795, |
|
"grad_norm": 0.3033161017839996, |
|
"learning_rate": 4.4879171461449946e-05, |
|
"loss": 0.435, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5807453416149069, |
|
"grad_norm": 0.30568413065453626, |
|
"learning_rate": 4.4821634062140395e-05, |
|
"loss": 0.4237, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5838509316770186, |
|
"grad_norm": 0.32587134975274057, |
|
"learning_rate": 4.476409666283084e-05, |
|
"loss": 0.4331, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5869565217391305, |
|
"grad_norm": 0.28290562376532075, |
|
"learning_rate": 4.470655926352129e-05, |
|
"loss": 0.4342, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5900621118012422, |
|
"grad_norm": 0.3630490197737241, |
|
"learning_rate": 4.464902186421174e-05, |
|
"loss": 0.4344, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.593167701863354, |
|
"grad_norm": 0.3559890010930286, |
|
"learning_rate": 4.459148446490219e-05, |
|
"loss": 0.4498, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5962732919254659, |
|
"grad_norm": 0.3499252907427838, |
|
"learning_rate": 4.453394706559264e-05, |
|
"loss": 0.4507, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5993788819875776, |
|
"grad_norm": 0.33006303704048223, |
|
"learning_rate": 4.447640966628309e-05, |
|
"loss": 0.4281, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.6024844720496895, |
|
"grad_norm": 0.36984061156296816, |
|
"learning_rate": 4.4418872266973536e-05, |
|
"loss": 0.4518, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.6055900621118012, |
|
"grad_norm": 0.3439812296873207, |
|
"learning_rate": 4.4361334867663984e-05, |
|
"loss": 0.4448, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 0.30685626808124417, |
|
"learning_rate": 4.430379746835443e-05, |
|
"loss": 0.4377, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.6118012422360248, |
|
"grad_norm": 0.3925848437000049, |
|
"learning_rate": 4.424626006904488e-05, |
|
"loss": 0.4396, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.6149068322981367, |
|
"grad_norm": 0.32639373809266464, |
|
"learning_rate": 4.418872266973533e-05, |
|
"loss": 0.4321, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6180124223602484, |
|
"grad_norm": 0.376079541285074, |
|
"learning_rate": 4.413118527042578e-05, |
|
"loss": 0.4242, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6211180124223602, |
|
"grad_norm": 0.3749608850464733, |
|
"learning_rate": 4.407364787111623e-05, |
|
"loss": 0.4259, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6242236024844721, |
|
"grad_norm": 0.4461881134050382, |
|
"learning_rate": 4.401611047180668e-05, |
|
"loss": 0.4341, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.6273291925465838, |
|
"grad_norm": 0.4877320414028972, |
|
"learning_rate": 4.3958573072497125e-05, |
|
"loss": 0.4344, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6304347826086957, |
|
"grad_norm": 0.4070659780535386, |
|
"learning_rate": 4.3901035673187574e-05, |
|
"loss": 0.4227, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6335403726708074, |
|
"grad_norm": 0.4635439998393952, |
|
"learning_rate": 4.384349827387802e-05, |
|
"loss": 0.4355, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6366459627329193, |
|
"grad_norm": 0.35952245913430025, |
|
"learning_rate": 4.378596087456847e-05, |
|
"loss": 0.423, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.639751552795031, |
|
"grad_norm": 0.520771866846795, |
|
"learning_rate": 4.372842347525892e-05, |
|
"loss": 0.4306, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 0.3273697337468707, |
|
"learning_rate": 4.367088607594937e-05, |
|
"loss": 0.4324, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6459627329192547, |
|
"grad_norm": 0.4813614761483608, |
|
"learning_rate": 4.361334867663982e-05, |
|
"loss": 0.4478, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6490683229813664, |
|
"grad_norm": 0.3900984777507702, |
|
"learning_rate": 4.3555811277330267e-05, |
|
"loss": 0.4269, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 0.3853092679143466, |
|
"learning_rate": 4.3498273878020715e-05, |
|
"loss": 0.4407, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.65527950310559, |
|
"grad_norm": 0.46225724309871613, |
|
"learning_rate": 4.3440736478711164e-05, |
|
"loss": 0.436, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6583850931677019, |
|
"grad_norm": 0.31651404685134377, |
|
"learning_rate": 4.338319907940161e-05, |
|
"loss": 0.4316, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6614906832298136, |
|
"grad_norm": 0.44516432018668023, |
|
"learning_rate": 4.332566168009206e-05, |
|
"loss": 0.4426, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6645962732919255, |
|
"grad_norm": 0.3462443744991128, |
|
"learning_rate": 4.326812428078251e-05, |
|
"loss": 0.4465, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6677018633540373, |
|
"grad_norm": 0.4436257780311306, |
|
"learning_rate": 4.321058688147296e-05, |
|
"loss": 0.4241, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6708074534161491, |
|
"grad_norm": 0.3788099950107418, |
|
"learning_rate": 4.315304948216341e-05, |
|
"loss": 0.4206, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6739130434782609, |
|
"grad_norm": 0.3667132129478159, |
|
"learning_rate": 4.3095512082853856e-05, |
|
"loss": 0.4336, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6770186335403726, |
|
"grad_norm": 0.43405694529571, |
|
"learning_rate": 4.3037974683544305e-05, |
|
"loss": 0.4285, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6801242236024845, |
|
"grad_norm": 0.37501605794405696, |
|
"learning_rate": 4.2980437284234754e-05, |
|
"loss": 0.4354, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6832298136645962, |
|
"grad_norm": 0.7491502232791192, |
|
"learning_rate": 4.29228998849252e-05, |
|
"loss": 0.4622, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6863354037267081, |
|
"grad_norm": 0.34683109305557713, |
|
"learning_rate": 4.286536248561565e-05, |
|
"loss": 0.4349, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6894409937888198, |
|
"grad_norm": 0.41649862939635707, |
|
"learning_rate": 4.28078250863061e-05, |
|
"loss": 0.4278, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6925465838509317, |
|
"grad_norm": 0.33273645633734766, |
|
"learning_rate": 4.275028768699655e-05, |
|
"loss": 0.4241, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.358638671370147, |
|
"learning_rate": 4.2692750287687e-05, |
|
"loss": 0.4294, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6987577639751553, |
|
"grad_norm": 0.3505002399312612, |
|
"learning_rate": 4.2635212888377446e-05, |
|
"loss": 0.4339, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.7018633540372671, |
|
"grad_norm": 0.28967971081827765, |
|
"learning_rate": 4.2577675489067895e-05, |
|
"loss": 0.4433, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.7049689440993789, |
|
"grad_norm": 0.3792183124094411, |
|
"learning_rate": 4.2520138089758344e-05, |
|
"loss": 0.4263, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.7080745341614907, |
|
"grad_norm": 0.2915459102300122, |
|
"learning_rate": 4.246260069044879e-05, |
|
"loss": 0.4226, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.7111801242236024, |
|
"grad_norm": 0.357404227614541, |
|
"learning_rate": 4.240506329113924e-05, |
|
"loss": 0.4183, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.33657909101352584, |
|
"learning_rate": 4.234752589182969e-05, |
|
"loss": 0.4383, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.717391304347826, |
|
"grad_norm": 0.28799404238315757, |
|
"learning_rate": 4.228998849252014e-05, |
|
"loss": 0.4059, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.7204968944099379, |
|
"grad_norm": 0.3559137710527895, |
|
"learning_rate": 4.223245109321059e-05, |
|
"loss": 0.431, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.7236024844720497, |
|
"grad_norm": 0.3571859472649835, |
|
"learning_rate": 4.2174913693901036e-05, |
|
"loss": 0.4365, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.7267080745341615, |
|
"grad_norm": 0.27866414620295615, |
|
"learning_rate": 4.2117376294591485e-05, |
|
"loss": 0.4164, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7298136645962733, |
|
"grad_norm": 0.35761820704128017, |
|
"learning_rate": 4.2059838895281933e-05, |
|
"loss": 0.4155, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7329192546583851, |
|
"grad_norm": 0.38239702778323204, |
|
"learning_rate": 4.200230149597238e-05, |
|
"loss": 0.4441, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7360248447204969, |
|
"grad_norm": 0.37338686711282476, |
|
"learning_rate": 4.194476409666283e-05, |
|
"loss": 0.4287, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7391304347826086, |
|
"grad_norm": 0.31078006795719737, |
|
"learning_rate": 4.188722669735328e-05, |
|
"loss": 0.4314, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7422360248447205, |
|
"grad_norm": 0.42962957316409206, |
|
"learning_rate": 4.182968929804373e-05, |
|
"loss": 0.4258, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 0.3531531884915285, |
|
"learning_rate": 4.177215189873418e-05, |
|
"loss": 0.4348, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7484472049689441, |
|
"grad_norm": 0.4645354016036932, |
|
"learning_rate": 4.1714614499424626e-05, |
|
"loss": 0.4204, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7515527950310559, |
|
"grad_norm": 1.09153721353785, |
|
"learning_rate": 4.1657077100115075e-05, |
|
"loss": 0.4386, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7546583850931677, |
|
"grad_norm": 0.32971689202723414, |
|
"learning_rate": 4.159953970080552e-05, |
|
"loss": 0.4286, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7577639751552795, |
|
"grad_norm": 0.47923594956031046, |
|
"learning_rate": 4.154200230149597e-05, |
|
"loss": 0.4355, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7608695652173914, |
|
"grad_norm": 0.3499125189435591, |
|
"learning_rate": 4.148446490218642e-05, |
|
"loss": 0.4363, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7639751552795031, |
|
"grad_norm": 0.3676637215847227, |
|
"learning_rate": 4.142692750287687e-05, |
|
"loss": 0.4351, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7670807453416149, |
|
"grad_norm": 0.3727821108079694, |
|
"learning_rate": 4.136939010356732e-05, |
|
"loss": 0.4418, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7701863354037267, |
|
"grad_norm": 0.3252006506678716, |
|
"learning_rate": 4.131185270425777e-05, |
|
"loss": 0.4158, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7732919254658385, |
|
"grad_norm": 0.6538129311302192, |
|
"learning_rate": 4.1254315304948216e-05, |
|
"loss": 0.457, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7763975155279503, |
|
"grad_norm": 0.33906627374077886, |
|
"learning_rate": 4.1196777905638664e-05, |
|
"loss": 0.4318, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7795031055900621, |
|
"grad_norm": 0.356301991033165, |
|
"learning_rate": 4.113924050632912e-05, |
|
"loss": 0.4236, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.32783848540999616, |
|
"learning_rate": 4.108170310701956e-05, |
|
"loss": 0.4448, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 0.33633346589846297, |
|
"learning_rate": 4.102416570771002e-05, |
|
"loss": 0.4084, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7888198757763976, |
|
"grad_norm": 0.34262159693990346, |
|
"learning_rate": 4.096662830840046e-05, |
|
"loss": 0.4343, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7919254658385093, |
|
"grad_norm": 0.4238089460532713, |
|
"learning_rate": 4.0909090909090915e-05, |
|
"loss": 0.4197, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7950310559006211, |
|
"grad_norm": 0.34636542219919175, |
|
"learning_rate": 4.085155350978136e-05, |
|
"loss": 0.4216, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7981366459627329, |
|
"grad_norm": 0.5147966796611364, |
|
"learning_rate": 4.079401611047181e-05, |
|
"loss": 0.4335, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.8012422360248447, |
|
"grad_norm": 0.3784633526026262, |
|
"learning_rate": 4.0736478711162254e-05, |
|
"loss": 0.4388, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.8043478260869565, |
|
"grad_norm": 0.5353215946365089, |
|
"learning_rate": 4.067894131185271e-05, |
|
"loss": 0.4317, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.8074534161490683, |
|
"grad_norm": 0.43051530301687313, |
|
"learning_rate": 4.062140391254315e-05, |
|
"loss": 0.4321, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8105590062111802, |
|
"grad_norm": 0.42796425153438244, |
|
"learning_rate": 4.056386651323361e-05, |
|
"loss": 0.4088, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.8136645962732919, |
|
"grad_norm": 0.5934293854830046, |
|
"learning_rate": 4.050632911392405e-05, |
|
"loss": 0.4158, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.8167701863354038, |
|
"grad_norm": 0.4393755394280156, |
|
"learning_rate": 4.0448791714614505e-05, |
|
"loss": 0.4469, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.8198757763975155, |
|
"grad_norm": 0.4605347528048276, |
|
"learning_rate": 4.0391254315304947e-05, |
|
"loss": 0.4346, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.8229813664596274, |
|
"grad_norm": 0.35259936117009355, |
|
"learning_rate": 4.03337169159954e-05, |
|
"loss": 0.4168, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.8260869565217391, |
|
"grad_norm": 0.4804542899872928, |
|
"learning_rate": 4.0276179516685844e-05, |
|
"loss": 0.4302, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.8291925465838509, |
|
"grad_norm": 0.49703176852970304, |
|
"learning_rate": 4.02186421173763e-05, |
|
"loss": 0.4376, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8322981366459627, |
|
"grad_norm": 0.32332376265052126, |
|
"learning_rate": 4.016110471806674e-05, |
|
"loss": 0.4151, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.8354037267080745, |
|
"grad_norm": 0.3837962855801273, |
|
"learning_rate": 4.01035673187572e-05, |
|
"loss": 0.4165, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8385093167701864, |
|
"grad_norm": 0.3057885184710408, |
|
"learning_rate": 4.004602991944764e-05, |
|
"loss": 0.4193, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8416149068322981, |
|
"grad_norm": 0.33815716235605003, |
|
"learning_rate": 3.9988492520138094e-05, |
|
"loss": 0.4122, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.84472049689441, |
|
"grad_norm": 0.35543954456463683, |
|
"learning_rate": 3.9930955120828536e-05, |
|
"loss": 0.4312, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.8478260869565217, |
|
"grad_norm": 0.4061479720971117, |
|
"learning_rate": 3.987341772151899e-05, |
|
"loss": 0.4326, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8509316770186336, |
|
"grad_norm": 0.3293967556583535, |
|
"learning_rate": 3.9815880322209434e-05, |
|
"loss": 0.4162, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8540372670807453, |
|
"grad_norm": 0.32127496899850444, |
|
"learning_rate": 3.975834292289989e-05, |
|
"loss": 0.4064, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.3106744319229529, |
|
"learning_rate": 3.970080552359033e-05, |
|
"loss": 0.4219, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.860248447204969, |
|
"grad_norm": 0.2851226156515557, |
|
"learning_rate": 3.964326812428079e-05, |
|
"loss": 0.4357, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8633540372670807, |
|
"grad_norm": 0.3367137774364346, |
|
"learning_rate": 3.958573072497123e-05, |
|
"loss": 0.4221, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8664596273291926, |
|
"grad_norm": 0.274716671666842, |
|
"learning_rate": 3.9528193325661684e-05, |
|
"loss": 0.4286, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.32476182770932666, |
|
"learning_rate": 3.9470655926352126e-05, |
|
"loss": 0.4156, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8726708074534162, |
|
"grad_norm": 0.34109454423469643, |
|
"learning_rate": 3.941311852704258e-05, |
|
"loss": 0.4133, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8757763975155279, |
|
"grad_norm": 0.35511307476273746, |
|
"learning_rate": 3.9355581127733024e-05, |
|
"loss": 0.4317, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8788819875776398, |
|
"grad_norm": 0.3270722625275185, |
|
"learning_rate": 3.929804372842348e-05, |
|
"loss": 0.4182, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8819875776397516, |
|
"grad_norm": 0.30707956127514435, |
|
"learning_rate": 3.924050632911392e-05, |
|
"loss": 0.4128, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8850931677018633, |
|
"grad_norm": 0.352987960191196, |
|
"learning_rate": 3.9182968929804377e-05, |
|
"loss": 0.4202, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8881987577639752, |
|
"grad_norm": 0.3209556725057783, |
|
"learning_rate": 3.912543153049482e-05, |
|
"loss": 0.4531, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8913043478260869, |
|
"grad_norm": 0.3424777350197383, |
|
"learning_rate": 3.9067894131185274e-05, |
|
"loss": 0.4261, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8944099378881988, |
|
"grad_norm": 0.36115235473805046, |
|
"learning_rate": 3.9010356731875716e-05, |
|
"loss": 0.4208, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8975155279503105, |
|
"grad_norm": 0.3345731728145184, |
|
"learning_rate": 3.895281933256617e-05, |
|
"loss": 0.4243, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.9006211180124224, |
|
"grad_norm": 0.3479109694931497, |
|
"learning_rate": 3.8895281933256613e-05, |
|
"loss": 0.408, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9037267080745341, |
|
"grad_norm": 0.35901431270989403, |
|
"learning_rate": 3.883774453394707e-05, |
|
"loss": 0.4275, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.906832298136646, |
|
"grad_norm": 0.33289357045170126, |
|
"learning_rate": 3.878020713463751e-05, |
|
"loss": 0.4078, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.9099378881987578, |
|
"grad_norm": 0.33168510073705165, |
|
"learning_rate": 3.8722669735327966e-05, |
|
"loss": 0.4218, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.9130434782608695, |
|
"grad_norm": 0.2975318289744658, |
|
"learning_rate": 3.866513233601841e-05, |
|
"loss": 0.4311, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.9161490683229814, |
|
"grad_norm": 0.31426977572692477, |
|
"learning_rate": 3.8607594936708864e-05, |
|
"loss": 0.4297, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.9192546583850931, |
|
"grad_norm": 0.3070483941031755, |
|
"learning_rate": 3.8550057537399306e-05, |
|
"loss": 0.4192, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.922360248447205, |
|
"grad_norm": 0.2810848054459513, |
|
"learning_rate": 3.849252013808976e-05, |
|
"loss": 0.427, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.9254658385093167, |
|
"grad_norm": 0.2991841633857078, |
|
"learning_rate": 3.84349827387802e-05, |
|
"loss": 0.4052, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 0.33847151615147736, |
|
"learning_rate": 3.837744533947066e-05, |
|
"loss": 0.419, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9316770186335404, |
|
"grad_norm": 0.29017927632864937, |
|
"learning_rate": 3.83199079401611e-05, |
|
"loss": 0.4235, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9347826086956522, |
|
"grad_norm": 0.32565509697744177, |
|
"learning_rate": 3.8262370540851556e-05, |
|
"loss": 0.4218, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.937888198757764, |
|
"grad_norm": 0.31402325607805354, |
|
"learning_rate": 3.8204833141542005e-05, |
|
"loss": 0.4374, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.9409937888198758, |
|
"grad_norm": 0.3147076556719568, |
|
"learning_rate": 3.8147295742232454e-05, |
|
"loss": 0.4155, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.9440993788819876, |
|
"grad_norm": 0.29699738407713266, |
|
"learning_rate": 3.80897583429229e-05, |
|
"loss": 0.4111, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9472049689440993, |
|
"grad_norm": 0.2888210602850056, |
|
"learning_rate": 3.803222094361335e-05, |
|
"loss": 0.4221, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9503105590062112, |
|
"grad_norm": 0.2939573629666098, |
|
"learning_rate": 3.79746835443038e-05, |
|
"loss": 0.4032, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.953416149068323, |
|
"grad_norm": 0.2962446654764285, |
|
"learning_rate": 3.791714614499425e-05, |
|
"loss": 0.4214, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.271891153920885, |
|
"learning_rate": 3.78596087456847e-05, |
|
"loss": 0.4198, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9596273291925466, |
|
"grad_norm": 0.32256951843172593, |
|
"learning_rate": 3.7802071346375146e-05, |
|
"loss": 0.4216, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.9627329192546584, |
|
"grad_norm": 0.33232339921643056, |
|
"learning_rate": 3.7744533947065595e-05, |
|
"loss": 0.4177, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9658385093167702, |
|
"grad_norm": 0.35814851356254335, |
|
"learning_rate": 3.7686996547756043e-05, |
|
"loss": 0.425, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.968944099378882, |
|
"grad_norm": 0.29938770364659023, |
|
"learning_rate": 3.762945914844649e-05, |
|
"loss": 0.4128, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9720496894409938, |
|
"grad_norm": 0.38739922253123726, |
|
"learning_rate": 3.757192174913694e-05, |
|
"loss": 0.4113, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9751552795031055, |
|
"grad_norm": 0.31386603107673766, |
|
"learning_rate": 3.751438434982739e-05, |
|
"loss": 0.4104, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9782608695652174, |
|
"grad_norm": 0.34687136495142834, |
|
"learning_rate": 3.745684695051784e-05, |
|
"loss": 0.4307, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9813664596273292, |
|
"grad_norm": 0.3492017123521989, |
|
"learning_rate": 3.739930955120829e-05, |
|
"loss": 0.4077, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.984472049689441, |
|
"grad_norm": 0.29396206255406326, |
|
"learning_rate": 3.7341772151898736e-05, |
|
"loss": 0.4067, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9875776397515528, |
|
"grad_norm": 0.31882677984452723, |
|
"learning_rate": 3.7284234752589185e-05, |
|
"loss": 0.4207, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9906832298136646, |
|
"grad_norm": 0.37165416285954644, |
|
"learning_rate": 3.722669735327963e-05, |
|
"loss": 0.4339, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 0.3190088839703568, |
|
"learning_rate": 3.716915995397008e-05, |
|
"loss": 0.4079, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9968944099378882, |
|
"grad_norm": 0.3115319771959773, |
|
"learning_rate": 3.711162255466053e-05, |
|
"loss": 0.4322, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3044086608586031, |
|
"learning_rate": 3.705408515535098e-05, |
|
"loss": 0.4097, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.0031055900621118, |
|
"grad_norm": 0.33417590278362963, |
|
"learning_rate": 3.699654775604143e-05, |
|
"loss": 0.3323, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.0062111801242235, |
|
"grad_norm": 0.341573477224664, |
|
"learning_rate": 3.693901035673188e-05, |
|
"loss": 0.3571, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.0093167701863355, |
|
"grad_norm": 0.27258326161115387, |
|
"learning_rate": 3.6881472957422326e-05, |
|
"loss": 0.3404, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.0124223602484472, |
|
"grad_norm": 0.33991178542501627, |
|
"learning_rate": 3.6823935558112774e-05, |
|
"loss": 0.3493, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.015527950310559, |
|
"grad_norm": 0.3446263251981706, |
|
"learning_rate": 3.676639815880322e-05, |
|
"loss": 0.3473, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.0186335403726707, |
|
"grad_norm": 0.33801547973317314, |
|
"learning_rate": 3.670886075949367e-05, |
|
"loss": 0.3697, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.0217391304347827, |
|
"grad_norm": 0.35908354782023477, |
|
"learning_rate": 3.665132336018412e-05, |
|
"loss": 0.3476, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.0248447204968945, |
|
"grad_norm": 0.3234656105570385, |
|
"learning_rate": 3.659378596087457e-05, |
|
"loss": 0.3622, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0279503105590062, |
|
"grad_norm": 0.35587249506855595, |
|
"learning_rate": 3.653624856156502e-05, |
|
"loss": 0.3555, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.031055900621118, |
|
"grad_norm": 0.31905169592308186, |
|
"learning_rate": 3.647871116225547e-05, |
|
"loss": 0.3461, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.0341614906832297, |
|
"grad_norm": 0.36840310397083925, |
|
"learning_rate": 3.6421173762945915e-05, |
|
"loss": 0.3429, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.0372670807453417, |
|
"grad_norm": 0.3651205860513462, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.3435, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.0403726708074534, |
|
"grad_norm": 0.31066005439052724, |
|
"learning_rate": 3.630609896432681e-05, |
|
"loss": 0.3272, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.3759419584351618, |
|
"learning_rate": 3.624856156501726e-05, |
|
"loss": 0.3395, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.046583850931677, |
|
"grad_norm": 0.3021549547887614, |
|
"learning_rate": 3.619102416570771e-05, |
|
"loss": 0.3417, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.049689440993789, |
|
"grad_norm": 0.3205703918762732, |
|
"learning_rate": 3.613348676639816e-05, |
|
"loss": 0.3433, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.0527950310559007, |
|
"grad_norm": 0.4534884210584356, |
|
"learning_rate": 3.607594936708861e-05, |
|
"loss": 0.3594, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.0559006211180124, |
|
"grad_norm": 0.367415386580333, |
|
"learning_rate": 3.6018411967779057e-05, |
|
"loss": 0.3524, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0590062111801242, |
|
"grad_norm": 0.3127875635159284, |
|
"learning_rate": 3.5960874568469505e-05, |
|
"loss": 0.333, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.062111801242236, |
|
"grad_norm": 0.4511553956189257, |
|
"learning_rate": 3.5903337169159954e-05, |
|
"loss": 0.3454, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.065217391304348, |
|
"grad_norm": 0.27133796776358254, |
|
"learning_rate": 3.58457997698504e-05, |
|
"loss": 0.3307, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.0683229813664596, |
|
"grad_norm": 0.37172783607468407, |
|
"learning_rate": 3.578826237054085e-05, |
|
"loss": 0.332, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 0.31903478698253923, |
|
"learning_rate": 3.57307249712313e-05, |
|
"loss": 0.3641, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.0745341614906831, |
|
"grad_norm": 0.3590599821405197, |
|
"learning_rate": 3.567318757192175e-05, |
|
"loss": 0.3368, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.0776397515527951, |
|
"grad_norm": 0.3228666493670707, |
|
"learning_rate": 3.56156501726122e-05, |
|
"loss": 0.3518, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.0807453416149069, |
|
"grad_norm": 0.35040485427397144, |
|
"learning_rate": 3.5558112773302646e-05, |
|
"loss": 0.3567, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.0838509316770186, |
|
"grad_norm": 0.3223473550373259, |
|
"learning_rate": 3.5500575373993095e-05, |
|
"loss": 0.3292, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.3162329124544906, |
|
"learning_rate": 3.5443037974683544e-05, |
|
"loss": 0.3386, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0900621118012421, |
|
"grad_norm": 0.35250805959488396, |
|
"learning_rate": 3.538550057537399e-05, |
|
"loss": 0.3286, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.093167701863354, |
|
"grad_norm": 0.31027768437301634, |
|
"learning_rate": 3.532796317606444e-05, |
|
"loss": 0.3411, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.0962732919254659, |
|
"grad_norm": 0.28606898633939265, |
|
"learning_rate": 3.52704257767549e-05, |
|
"loss": 0.3407, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.0993788819875776, |
|
"grad_norm": 0.3579167421662421, |
|
"learning_rate": 3.521288837744534e-05, |
|
"loss": 0.3262, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.1024844720496894, |
|
"grad_norm": 0.3402295001253341, |
|
"learning_rate": 3.5155350978135794e-05, |
|
"loss": 0.3324, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.1055900621118013, |
|
"grad_norm": 0.31366685836024, |
|
"learning_rate": 3.5097813578826236e-05, |
|
"loss": 0.3463, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.108695652173913, |
|
"grad_norm": 0.46838911104977027, |
|
"learning_rate": 3.504027617951669e-05, |
|
"loss": 0.3565, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.1118012422360248, |
|
"grad_norm": 0.3060846523455061, |
|
"learning_rate": 3.4982738780207134e-05, |
|
"loss": 0.357, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.1149068322981366, |
|
"grad_norm": 0.4392245103993425, |
|
"learning_rate": 3.492520138089759e-05, |
|
"loss": 0.3568, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.1180124223602483, |
|
"grad_norm": 0.3916417909387617, |
|
"learning_rate": 3.486766398158803e-05, |
|
"loss": 0.3446, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.1211180124223603, |
|
"grad_norm": 0.3501561418628378, |
|
"learning_rate": 3.4810126582278487e-05, |
|
"loss": 0.3282, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.124223602484472, |
|
"grad_norm": 0.37454862360065444, |
|
"learning_rate": 3.475258918296893e-05, |
|
"loss": 0.3543, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.1273291925465838, |
|
"grad_norm": 0.2884683302507566, |
|
"learning_rate": 3.4695051783659384e-05, |
|
"loss": 0.3337, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.3254717305148171, |
|
"learning_rate": 3.4637514384349826e-05, |
|
"loss": 0.3271, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.1335403726708075, |
|
"grad_norm": 0.3256237761211695, |
|
"learning_rate": 3.457997698504028e-05, |
|
"loss": 0.3298, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.1366459627329193, |
|
"grad_norm": 0.30981574585542065, |
|
"learning_rate": 3.4522439585730723e-05, |
|
"loss": 0.3685, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.139751552795031, |
|
"grad_norm": 0.29936602875383006, |
|
"learning_rate": 3.446490218642118e-05, |
|
"loss": 0.3524, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.2961907533597477, |
|
"learning_rate": 3.440736478711162e-05, |
|
"loss": 0.3414, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.1459627329192545, |
|
"grad_norm": 0.2898757967419472, |
|
"learning_rate": 3.4349827387802076e-05, |
|
"loss": 0.3275, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.1490683229813665, |
|
"grad_norm": 0.35918811245436444, |
|
"learning_rate": 3.429228998849252e-05, |
|
"loss": 0.3502, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.1521739130434783, |
|
"grad_norm": 0.2775107307381104, |
|
"learning_rate": 3.4234752589182974e-05, |
|
"loss": 0.3409, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.15527950310559, |
|
"grad_norm": 0.2986400287100927, |
|
"learning_rate": 3.4177215189873416e-05, |
|
"loss": 0.3312, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.1583850931677018, |
|
"grad_norm": 0.33238801993955036, |
|
"learning_rate": 3.411967779056387e-05, |
|
"loss": 0.3443, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.1614906832298137, |
|
"grad_norm": 0.2893594359102009, |
|
"learning_rate": 3.406214039125431e-05, |
|
"loss": 0.3332, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.1645962732919255, |
|
"grad_norm": 0.32293840276637376, |
|
"learning_rate": 3.400460299194477e-05, |
|
"loss": 0.3354, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1677018633540373, |
|
"grad_norm": 0.27306219223391365, |
|
"learning_rate": 3.394706559263521e-05, |
|
"loss": 0.3209, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.170807453416149, |
|
"grad_norm": 0.3342500084639322, |
|
"learning_rate": 3.3889528193325666e-05, |
|
"loss": 0.3729, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 0.2661392532196279, |
|
"learning_rate": 3.383199079401611e-05, |
|
"loss": 0.3383, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.1770186335403727, |
|
"grad_norm": 0.3386471665658259, |
|
"learning_rate": 3.3774453394706564e-05, |
|
"loss": 0.318, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.1801242236024845, |
|
"grad_norm": 0.3155587203894488, |
|
"learning_rate": 3.3716915995397006e-05, |
|
"loss": 0.3321, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1832298136645962, |
|
"grad_norm": 0.3451778286777197, |
|
"learning_rate": 3.365937859608746e-05, |
|
"loss": 0.361, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.186335403726708, |
|
"grad_norm": 0.3227976748273063, |
|
"learning_rate": 3.36018411967779e-05, |
|
"loss": 0.3349, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.18944099378882, |
|
"grad_norm": 0.320511150129644, |
|
"learning_rate": 3.354430379746836e-05, |
|
"loss": 0.3449, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.1925465838509317, |
|
"grad_norm": 0.31955908520280063, |
|
"learning_rate": 3.34867663981588e-05, |
|
"loss": 0.3351, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.1956521739130435, |
|
"grad_norm": 0.30633810764776365, |
|
"learning_rate": 3.3429228998849256e-05, |
|
"loss": 0.3275, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.1987577639751552, |
|
"grad_norm": 0.41299034529321954, |
|
"learning_rate": 3.33716915995397e-05, |
|
"loss": 0.3309, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.201863354037267, |
|
"grad_norm": 0.2750482509074482, |
|
"learning_rate": 3.3314154200230153e-05, |
|
"loss": 0.3398, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.204968944099379, |
|
"grad_norm": 0.3081268249974453, |
|
"learning_rate": 3.3256616800920595e-05, |
|
"loss": 0.3322, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.2080745341614907, |
|
"grad_norm": 0.3520674198029431, |
|
"learning_rate": 3.319907940161105e-05, |
|
"loss": 0.3663, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.2111801242236024, |
|
"grad_norm": 0.32565232106148584, |
|
"learning_rate": 3.314154200230149e-05, |
|
"loss": 0.343, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 0.2938812397405531, |
|
"learning_rate": 3.308400460299195e-05, |
|
"loss": 0.3378, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.3141073779827861, |
|
"learning_rate": 3.302646720368239e-05, |
|
"loss": 0.3335, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.220496894409938, |
|
"grad_norm": 0.3418673255721663, |
|
"learning_rate": 3.2968929804372846e-05, |
|
"loss": 0.36, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.2236024844720497, |
|
"grad_norm": 0.24297614998734132, |
|
"learning_rate": 3.291139240506329e-05, |
|
"loss": 0.3387, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.2267080745341614, |
|
"grad_norm": 0.3267179467149504, |
|
"learning_rate": 3.285385500575374e-05, |
|
"loss": 0.3488, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.2298136645962732, |
|
"grad_norm": 0.3057560458812451, |
|
"learning_rate": 3.2796317606444185e-05, |
|
"loss": 0.3268, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.2329192546583851, |
|
"grad_norm": 0.3134897896860434, |
|
"learning_rate": 3.273878020713464e-05, |
|
"loss": 0.3459, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.236024844720497, |
|
"grad_norm": 0.3047314985401556, |
|
"learning_rate": 3.268124280782508e-05, |
|
"loss": 0.3291, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.2391304347826086, |
|
"grad_norm": 0.31348581848675783, |
|
"learning_rate": 3.262370540851554e-05, |
|
"loss": 0.3446, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"grad_norm": 0.3482328869260001, |
|
"learning_rate": 3.256616800920598e-05, |
|
"loss": 0.3561, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2453416149068324, |
|
"grad_norm": 0.31183834841742225, |
|
"learning_rate": 3.2508630609896436e-05, |
|
"loss": 0.3547, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.2484472049689441, |
|
"grad_norm": 0.3061676085086065, |
|
"learning_rate": 3.245109321058688e-05, |
|
"loss": 0.3595, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.2515527950310559, |
|
"grad_norm": 0.32549148328343397, |
|
"learning_rate": 3.239355581127733e-05, |
|
"loss": 0.3342, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.2546583850931676, |
|
"grad_norm": 0.30445969084522895, |
|
"learning_rate": 3.233601841196778e-05, |
|
"loss": 0.3242, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.2577639751552794, |
|
"grad_norm": 0.2742819629805248, |
|
"learning_rate": 3.227848101265823e-05, |
|
"loss": 0.3522, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.2608695652173914, |
|
"grad_norm": 0.32581875150876105, |
|
"learning_rate": 3.222094361334868e-05, |
|
"loss": 0.3429, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.263975155279503, |
|
"grad_norm": 0.2902255052156193, |
|
"learning_rate": 3.216340621403913e-05, |
|
"loss": 0.3369, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.2670807453416149, |
|
"grad_norm": 0.284761382807809, |
|
"learning_rate": 3.210586881472958e-05, |
|
"loss": 0.36, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.2701863354037268, |
|
"grad_norm": 0.3025552167032939, |
|
"learning_rate": 3.2048331415420025e-05, |
|
"loss": 0.3445, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.2732919254658386, |
|
"grad_norm": 0.3305696776607858, |
|
"learning_rate": 3.1990794016110474e-05, |
|
"loss": 0.3463, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2763975155279503, |
|
"grad_norm": 0.3077574972549534, |
|
"learning_rate": 3.193325661680092e-05, |
|
"loss": 0.3594, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.279503105590062, |
|
"grad_norm": 0.27442755120830326, |
|
"learning_rate": 3.187571921749137e-05, |
|
"loss": 0.3362, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.2826086956521738, |
|
"grad_norm": 0.3038026451556641, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 0.3353, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 0.2758156658151106, |
|
"learning_rate": 3.176064441887227e-05, |
|
"loss": 0.337, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.2888198757763976, |
|
"grad_norm": 0.26613400787975794, |
|
"learning_rate": 3.170310701956272e-05, |
|
"loss": 0.3347, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.2919254658385093, |
|
"grad_norm": 0.30006243856469433, |
|
"learning_rate": 3.1645569620253167e-05, |
|
"loss": 0.3575, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.295031055900621, |
|
"grad_norm": 0.32225619437705794, |
|
"learning_rate": 3.1588032220943615e-05, |
|
"loss": 0.3404, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.298136645962733, |
|
"grad_norm": 0.2933513705620206, |
|
"learning_rate": 3.1530494821634064e-05, |
|
"loss": 0.3367, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.3012422360248448, |
|
"grad_norm": 0.34221232972865906, |
|
"learning_rate": 3.147295742232451e-05, |
|
"loss": 0.3507, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.3207028944029123, |
|
"learning_rate": 3.141542002301496e-05, |
|
"loss": 0.339, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.3074534161490683, |
|
"grad_norm": 0.28691874649916205, |
|
"learning_rate": 3.135788262370541e-05, |
|
"loss": 0.3158, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.31055900621118, |
|
"grad_norm": 0.32572558244440175, |
|
"learning_rate": 3.130034522439586e-05, |
|
"loss": 0.3425, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.3136645962732918, |
|
"grad_norm": 0.2840181269830042, |
|
"learning_rate": 3.124280782508631e-05, |
|
"loss": 0.3446, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.3167701863354038, |
|
"grad_norm": 0.314090935226993, |
|
"learning_rate": 3.1185270425776756e-05, |
|
"loss": 0.3315, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.3198757763975155, |
|
"grad_norm": 0.31197822717588264, |
|
"learning_rate": 3.1127733026467205e-05, |
|
"loss": 0.3443, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.3229813664596273, |
|
"grad_norm": 0.2864210002126174, |
|
"learning_rate": 3.1070195627157654e-05, |
|
"loss": 0.3375, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.3260869565217392, |
|
"grad_norm": 0.25519688185589984, |
|
"learning_rate": 3.10126582278481e-05, |
|
"loss": 0.3384, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.329192546583851, |
|
"grad_norm": 0.2744740349540228, |
|
"learning_rate": 3.095512082853855e-05, |
|
"loss": 0.3383, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.3322981366459627, |
|
"grad_norm": 0.2607087924929348, |
|
"learning_rate": 3.0897583429229e-05, |
|
"loss": 0.3555, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.3354037267080745, |
|
"grad_norm": 0.27684287170228183, |
|
"learning_rate": 3.084004602991945e-05, |
|
"loss": 0.3261, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.3385093167701863, |
|
"grad_norm": 0.3095550998483706, |
|
"learning_rate": 3.07825086306099e-05, |
|
"loss": 0.3512, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.341614906832298, |
|
"grad_norm": 0.25842001969735057, |
|
"learning_rate": 3.0724971231300346e-05, |
|
"loss": 0.3296, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.34472049689441, |
|
"grad_norm": 0.30589083199518, |
|
"learning_rate": 3.0667433831990795e-05, |
|
"loss": 0.3329, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.3478260869565217, |
|
"grad_norm": 0.2825831249071207, |
|
"learning_rate": 3.0609896432681244e-05, |
|
"loss": 0.3403, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.3509316770186335, |
|
"grad_norm": 0.2849649494187899, |
|
"learning_rate": 3.055235903337169e-05, |
|
"loss": 0.3329, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.3540372670807455, |
|
"grad_norm": 0.31227992790240827, |
|
"learning_rate": 3.0494821634062144e-05, |
|
"loss": 0.3402, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 0.28830226140066545, |
|
"learning_rate": 3.043728423475259e-05, |
|
"loss": 0.3343, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.360248447204969, |
|
"grad_norm": 0.2920144019191934, |
|
"learning_rate": 3.0379746835443042e-05, |
|
"loss": 0.3262, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.3633540372670807, |
|
"grad_norm": 0.25016168615415485, |
|
"learning_rate": 3.0322209436133487e-05, |
|
"loss": 0.3394, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.3664596273291925, |
|
"grad_norm": 0.30261081735444717, |
|
"learning_rate": 3.026467203682394e-05, |
|
"loss": 0.3462, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3695652173913042, |
|
"grad_norm": 0.2881616381341832, |
|
"learning_rate": 3.0207134637514385e-05, |
|
"loss": 0.3318, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.3726708074534162, |
|
"grad_norm": 0.2880320213397424, |
|
"learning_rate": 3.0149597238204837e-05, |
|
"loss": 0.3467, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.375776397515528, |
|
"grad_norm": 0.27020350890941985, |
|
"learning_rate": 3.0092059838895282e-05, |
|
"loss": 0.332, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.3788819875776397, |
|
"grad_norm": 0.3103789819064371, |
|
"learning_rate": 3.0034522439585734e-05, |
|
"loss": 0.3557, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.3819875776397517, |
|
"grad_norm": 0.2689229148068124, |
|
"learning_rate": 2.997698504027618e-05, |
|
"loss": 0.3439, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.3850931677018634, |
|
"grad_norm": 0.3284067395525679, |
|
"learning_rate": 2.991944764096663e-05, |
|
"loss": 0.3361, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.3881987577639752, |
|
"grad_norm": 0.31384093062312546, |
|
"learning_rate": 2.9861910241657077e-05, |
|
"loss": 0.3397, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.2699369577142723, |
|
"learning_rate": 2.980437284234753e-05, |
|
"loss": 0.3586, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.3944099378881987, |
|
"grad_norm": 0.3036288617772187, |
|
"learning_rate": 2.9746835443037974e-05, |
|
"loss": 0.3565, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.3975155279503104, |
|
"grad_norm": 0.3124807578288405, |
|
"learning_rate": 2.9689298043728427e-05, |
|
"loss": 0.3419, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4006211180124224, |
|
"grad_norm": 0.2979650176094835, |
|
"learning_rate": 2.9631760644418872e-05, |
|
"loss": 0.348, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.4037267080745341, |
|
"grad_norm": 0.2681659506306783, |
|
"learning_rate": 2.9574223245109324e-05, |
|
"loss": 0.3299, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.406832298136646, |
|
"grad_norm": 0.32598633427460977, |
|
"learning_rate": 2.951668584579977e-05, |
|
"loss": 0.3386, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.4099378881987579, |
|
"grad_norm": 0.3247280401348239, |
|
"learning_rate": 2.945914844649022e-05, |
|
"loss": 0.3616, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.4130434782608696, |
|
"grad_norm": 0.2674177394891557, |
|
"learning_rate": 2.940161104718067e-05, |
|
"loss": 0.3477, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.4161490683229814, |
|
"grad_norm": 0.31284120066769544, |
|
"learning_rate": 2.934407364787112e-05, |
|
"loss": 0.3359, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.4192546583850931, |
|
"grad_norm": 0.29650681379778476, |
|
"learning_rate": 2.9286536248561568e-05, |
|
"loss": 0.3534, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.4223602484472049, |
|
"grad_norm": 0.28958329200728805, |
|
"learning_rate": 2.9228998849252016e-05, |
|
"loss": 0.3414, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.4254658385093169, |
|
"grad_norm": 0.3164618567976454, |
|
"learning_rate": 2.9171461449942465e-05, |
|
"loss": 0.351, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.32604862143805774, |
|
"learning_rate": 2.9113924050632914e-05, |
|
"loss": 0.3185, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.4316770186335404, |
|
"grad_norm": 0.2371091150488046, |
|
"learning_rate": 2.9056386651323363e-05, |
|
"loss": 0.3268, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.434782608695652, |
|
"grad_norm": 0.28836250577098943, |
|
"learning_rate": 2.899884925201381e-05, |
|
"loss": 0.3203, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.437888198757764, |
|
"grad_norm": 0.29935943849859553, |
|
"learning_rate": 2.894131185270426e-05, |
|
"loss": 0.3419, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.4409937888198758, |
|
"grad_norm": 0.2678768364941078, |
|
"learning_rate": 2.888377445339471e-05, |
|
"loss": 0.3423, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.4440993788819876, |
|
"grad_norm": 0.3004413989905001, |
|
"learning_rate": 2.8826237054085157e-05, |
|
"loss": 0.3448, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.4472049689440993, |
|
"grad_norm": 0.3437138642713499, |
|
"learning_rate": 2.8768699654775606e-05, |
|
"loss": 0.3624, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.450310559006211, |
|
"grad_norm": 0.27833054674558505, |
|
"learning_rate": 2.8711162255466055e-05, |
|
"loss": 0.3559, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.453416149068323, |
|
"grad_norm": 0.30426248134832284, |
|
"learning_rate": 2.8653624856156504e-05, |
|
"loss": 0.3409, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.4565217391304348, |
|
"grad_norm": 0.2884530747421473, |
|
"learning_rate": 2.8596087456846952e-05, |
|
"loss": 0.3543, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.4596273291925466, |
|
"grad_norm": 0.26674718010863235, |
|
"learning_rate": 2.85385500575374e-05, |
|
"loss": 0.3352, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.4627329192546583, |
|
"grad_norm": 0.24962947417256104, |
|
"learning_rate": 2.848101265822785e-05, |
|
"loss": 0.3331, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.4658385093167703, |
|
"grad_norm": 0.24321872392892266, |
|
"learning_rate": 2.84234752589183e-05, |
|
"loss": 0.3346, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.468944099378882, |
|
"grad_norm": 0.25806201920649635, |
|
"learning_rate": 2.8365937859608747e-05, |
|
"loss": 0.3433, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.4720496894409938, |
|
"grad_norm": 0.260107860168702, |
|
"learning_rate": 2.8308400460299196e-05, |
|
"loss": 0.3518, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.4751552795031055, |
|
"grad_norm": 0.27151545722001336, |
|
"learning_rate": 2.8250863060989645e-05, |
|
"loss": 0.3222, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 0.2699064437677885, |
|
"learning_rate": 2.8193325661680093e-05, |
|
"loss": 0.3408, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.4813664596273293, |
|
"grad_norm": 0.2534825847738341, |
|
"learning_rate": 2.8135788262370542e-05, |
|
"loss": 0.3355, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.484472049689441, |
|
"grad_norm": 0.2596248018497863, |
|
"learning_rate": 2.807825086306099e-05, |
|
"loss": 0.3317, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.4875776397515528, |
|
"grad_norm": 0.22547358749920884, |
|
"learning_rate": 2.802071346375144e-05, |
|
"loss": 0.3172, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.4906832298136645, |
|
"grad_norm": 0.28156958226578077, |
|
"learning_rate": 2.796317606444189e-05, |
|
"loss": 0.3476, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4937888198757765, |
|
"grad_norm": 0.26615285376164327, |
|
"learning_rate": 2.7905638665132337e-05, |
|
"loss": 0.3489, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.4968944099378882, |
|
"grad_norm": 0.246150116031317, |
|
"learning_rate": 2.7848101265822786e-05, |
|
"loss": 0.3464, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.25268560596400597, |
|
"learning_rate": 2.7790563866513235e-05, |
|
"loss": 0.3391, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.5031055900621118, |
|
"grad_norm": 0.2946205590355613, |
|
"learning_rate": 2.7733026467203683e-05, |
|
"loss": 0.3541, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.5062111801242235, |
|
"grad_norm": 0.2750424223242439, |
|
"learning_rate": 2.7675489067894132e-05, |
|
"loss": 0.3276, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.5093167701863353, |
|
"grad_norm": 0.28954598608369275, |
|
"learning_rate": 2.761795166858458e-05, |
|
"loss": 0.3554, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.5124223602484472, |
|
"grad_norm": 0.29461626033953947, |
|
"learning_rate": 2.756041426927503e-05, |
|
"loss": 0.3293, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.515527950310559, |
|
"grad_norm": 0.2407514728215296, |
|
"learning_rate": 2.7502876869965478e-05, |
|
"loss": 0.3263, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.518633540372671, |
|
"grad_norm": 0.24475815135162626, |
|
"learning_rate": 2.7445339470655927e-05, |
|
"loss": 0.3423, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.5217391304347827, |
|
"grad_norm": 0.32665261682040186, |
|
"learning_rate": 2.7387802071346376e-05, |
|
"loss": 0.3333, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.5248447204968945, |
|
"grad_norm": 0.2552003566164109, |
|
"learning_rate": 2.7330264672036824e-05, |
|
"loss": 0.3466, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.5279503105590062, |
|
"grad_norm": 0.2871320623730171, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.3447, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.531055900621118, |
|
"grad_norm": 0.2440639273175817, |
|
"learning_rate": 2.7215189873417722e-05, |
|
"loss": 0.338, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.5341614906832297, |
|
"grad_norm": 0.24115465120440344, |
|
"learning_rate": 2.715765247410817e-05, |
|
"loss": 0.3245, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.5372670807453415, |
|
"grad_norm": 0.2781064697786101, |
|
"learning_rate": 2.7100115074798623e-05, |
|
"loss": 0.3637, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.5403726708074534, |
|
"grad_norm": 0.27237179201858924, |
|
"learning_rate": 2.7042577675489068e-05, |
|
"loss": 0.353, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.5434782608695652, |
|
"grad_norm": 0.3018049050362612, |
|
"learning_rate": 2.698504027617952e-05, |
|
"loss": 0.33, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.5465838509316772, |
|
"grad_norm": 0.319532872255584, |
|
"learning_rate": 2.6927502876869965e-05, |
|
"loss": 0.341, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.549689440993789, |
|
"grad_norm": 0.23318500669833875, |
|
"learning_rate": 2.6869965477560418e-05, |
|
"loss": 0.3324, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.5527950310559007, |
|
"grad_norm": 0.3108509837550317, |
|
"learning_rate": 2.6812428078250863e-05, |
|
"loss": 0.3484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5559006211180124, |
|
"grad_norm": 0.27432952612163103, |
|
"learning_rate": 2.6754890678941315e-05, |
|
"loss": 0.3301, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.5590062111801242, |
|
"grad_norm": 0.2474022932813197, |
|
"learning_rate": 2.669735327963176e-05, |
|
"loss": 0.3335, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.562111801242236, |
|
"grad_norm": 0.25918516414740417, |
|
"learning_rate": 2.6639815880322212e-05, |
|
"loss": 0.3531, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.28631487498944946, |
|
"learning_rate": 2.6582278481012658e-05, |
|
"loss": 0.3461, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.5683229813664596, |
|
"grad_norm": 0.2541239393514543, |
|
"learning_rate": 2.652474108170311e-05, |
|
"loss": 0.3406, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.26793962508861174, |
|
"learning_rate": 2.646720368239356e-05, |
|
"loss": 0.3333, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.5745341614906834, |
|
"grad_norm": 0.30945895076026697, |
|
"learning_rate": 2.6409666283084007e-05, |
|
"loss": 0.3429, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.5776397515527951, |
|
"grad_norm": 0.2625598679112342, |
|
"learning_rate": 2.6352128883774456e-05, |
|
"loss": 0.3475, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.5807453416149069, |
|
"grad_norm": 0.3340128053196445, |
|
"learning_rate": 2.6294591484464905e-05, |
|
"loss": 0.3548, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.5838509316770186, |
|
"grad_norm": 0.33090740840028027, |
|
"learning_rate": 2.6237054085155354e-05, |
|
"loss": 0.3482, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5869565217391304, |
|
"grad_norm": 0.25904205831808136, |
|
"learning_rate": 2.6179516685845802e-05, |
|
"loss": 0.3322, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.5900621118012421, |
|
"grad_norm": 0.3458888736647229, |
|
"learning_rate": 2.612197928653625e-05, |
|
"loss": 0.3466, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.5931677018633539, |
|
"grad_norm": 0.26139554234188184, |
|
"learning_rate": 2.60644418872267e-05, |
|
"loss": 0.3357, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.5962732919254659, |
|
"grad_norm": 0.24797420076401436, |
|
"learning_rate": 2.600690448791715e-05, |
|
"loss": 0.3263, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.5993788819875776, |
|
"grad_norm": 0.25127963694679545, |
|
"learning_rate": 2.5949367088607597e-05, |
|
"loss": 0.3541, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.6024844720496896, |
|
"grad_norm": 0.24350107098267543, |
|
"learning_rate": 2.5891829689298046e-05, |
|
"loss": 0.3332, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.6055900621118013, |
|
"grad_norm": 0.2597186201230917, |
|
"learning_rate": 2.5834292289988495e-05, |
|
"loss": 0.3357, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.608695652173913, |
|
"grad_norm": 0.2553977260875351, |
|
"learning_rate": 2.5776754890678943e-05, |
|
"loss": 0.3381, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.6118012422360248, |
|
"grad_norm": 0.2495485503111441, |
|
"learning_rate": 2.5719217491369392e-05, |
|
"loss": 0.3489, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.6149068322981366, |
|
"grad_norm": 0.2826237704718821, |
|
"learning_rate": 2.566168009205984e-05, |
|
"loss": 0.3269, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.6180124223602483, |
|
"grad_norm": 0.2907559187980417, |
|
"learning_rate": 2.560414269275029e-05, |
|
"loss": 0.353, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.62111801242236, |
|
"grad_norm": 0.30078662752184515, |
|
"learning_rate": 2.5546605293440738e-05, |
|
"loss": 0.3344, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.624223602484472, |
|
"grad_norm": 0.2494274026603714, |
|
"learning_rate": 2.5489067894131187e-05, |
|
"loss": 0.3262, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.6273291925465838, |
|
"grad_norm": 0.22856587280801138, |
|
"learning_rate": 2.5431530494821636e-05, |
|
"loss": 0.3316, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.6304347826086958, |
|
"grad_norm": 0.24524446266248454, |
|
"learning_rate": 2.5373993095512084e-05, |
|
"loss": 0.3254, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.6335403726708075, |
|
"grad_norm": 0.2781145066258604, |
|
"learning_rate": 2.5316455696202533e-05, |
|
"loss": 0.3343, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.6366459627329193, |
|
"grad_norm": 0.24971582793985952, |
|
"learning_rate": 2.5258918296892982e-05, |
|
"loss": 0.3423, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.639751552795031, |
|
"grad_norm": 0.2961483358525156, |
|
"learning_rate": 2.520138089758343e-05, |
|
"loss": 0.3554, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 0.30349368090110823, |
|
"learning_rate": 2.514384349827388e-05, |
|
"loss": 0.3563, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.6459627329192545, |
|
"grad_norm": 0.28292757074394537, |
|
"learning_rate": 2.5086306098964328e-05, |
|
"loss": 0.352, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.6490683229813663, |
|
"grad_norm": 0.25778656185495347, |
|
"learning_rate": 2.5028768699654777e-05, |
|
"loss": 0.3486, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.32420346337090605, |
|
"learning_rate": 2.4971231300345226e-05, |
|
"loss": 0.3497, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.65527950310559, |
|
"grad_norm": 0.24803469539845557, |
|
"learning_rate": 2.4913693901035674e-05, |
|
"loss": 0.3325, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.658385093167702, |
|
"grad_norm": 0.23193714998127715, |
|
"learning_rate": 2.4856156501726123e-05, |
|
"loss": 0.3244, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.6614906832298137, |
|
"grad_norm": 0.31410082505061837, |
|
"learning_rate": 2.4798619102416572e-05, |
|
"loss": 0.3295, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.6645962732919255, |
|
"grad_norm": 0.29805963194310403, |
|
"learning_rate": 2.474108170310702e-05, |
|
"loss": 0.3576, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.6677018633540373, |
|
"grad_norm": 0.2773254453129355, |
|
"learning_rate": 2.468354430379747e-05, |
|
"loss": 0.3382, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.670807453416149, |
|
"grad_norm": 0.32678020135127306, |
|
"learning_rate": 2.4626006904487918e-05, |
|
"loss": 0.3196, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.6739130434782608, |
|
"grad_norm": 0.3166277691971712, |
|
"learning_rate": 2.4568469505178367e-05, |
|
"loss": 0.3567, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.6770186335403725, |
|
"grad_norm": 0.28823972531727493, |
|
"learning_rate": 2.4510932105868815e-05, |
|
"loss": 0.3303, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6801242236024845, |
|
"grad_norm": 0.31416636195922193, |
|
"learning_rate": 2.4453394706559264e-05, |
|
"loss": 0.3468, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.6832298136645962, |
|
"grad_norm": 0.29389175839717274, |
|
"learning_rate": 2.4395857307249713e-05, |
|
"loss": 0.3334, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.6863354037267082, |
|
"grad_norm": 0.2574868658425901, |
|
"learning_rate": 2.433831990794016e-05, |
|
"loss": 0.3459, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.68944099378882, |
|
"grad_norm": 0.43013005229440787, |
|
"learning_rate": 2.428078250863061e-05, |
|
"loss": 0.3663, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.6925465838509317, |
|
"grad_norm": 0.29719384149686173, |
|
"learning_rate": 2.422324510932106e-05, |
|
"loss": 0.3227, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.6956521739130435, |
|
"grad_norm": 0.2630824870951196, |
|
"learning_rate": 2.4165707710011508e-05, |
|
"loss": 0.3334, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.6987577639751552, |
|
"grad_norm": 0.262646615576403, |
|
"learning_rate": 2.4108170310701956e-05, |
|
"loss": 0.3289, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.701863354037267, |
|
"grad_norm": 0.29464184604515603, |
|
"learning_rate": 2.4050632911392405e-05, |
|
"loss": 0.3606, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.704968944099379, |
|
"grad_norm": 0.270420959511805, |
|
"learning_rate": 2.3993095512082854e-05, |
|
"loss": 0.3344, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.7080745341614907, |
|
"grad_norm": 0.25692484125212045, |
|
"learning_rate": 2.3935558112773303e-05, |
|
"loss": 0.3487, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7111801242236024, |
|
"grad_norm": 0.2438708000844588, |
|
"learning_rate": 2.387802071346375e-05, |
|
"loss": 0.3368, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.3013744640384419, |
|
"learning_rate": 2.38204833141542e-05, |
|
"loss": 0.3374, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.7173913043478262, |
|
"grad_norm": 0.25432846986941376, |
|
"learning_rate": 2.376294591484465e-05, |
|
"loss": 0.3229, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.720496894409938, |
|
"grad_norm": 0.25904590947672523, |
|
"learning_rate": 2.3705408515535098e-05, |
|
"loss": 0.3446, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.7236024844720497, |
|
"grad_norm": 0.30606145603760704, |
|
"learning_rate": 2.3647871116225546e-05, |
|
"loss": 0.3341, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.7267080745341614, |
|
"grad_norm": 0.29538170031014566, |
|
"learning_rate": 2.3590333716915995e-05, |
|
"loss": 0.3289, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.7298136645962732, |
|
"grad_norm": 0.27134852683063904, |
|
"learning_rate": 2.3532796317606444e-05, |
|
"loss": 0.3547, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.7329192546583851, |
|
"grad_norm": 0.2825681002780479, |
|
"learning_rate": 2.3475258918296892e-05, |
|
"loss": 0.3379, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.736024844720497, |
|
"grad_norm": 0.2540853936510712, |
|
"learning_rate": 2.341772151898734e-05, |
|
"loss": 0.3382, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.23898673149156113, |
|
"learning_rate": 2.336018411967779e-05, |
|
"loss": 0.3204, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.7422360248447206, |
|
"grad_norm": 0.28690037448179756, |
|
"learning_rate": 2.330264672036824e-05, |
|
"loss": 0.3475, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.7453416149068324, |
|
"grad_norm": 0.2517436769783244, |
|
"learning_rate": 2.3245109321058687e-05, |
|
"loss": 0.3336, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.7484472049689441, |
|
"grad_norm": 0.26052794049930406, |
|
"learning_rate": 2.3187571921749136e-05, |
|
"loss": 0.3659, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.7515527950310559, |
|
"grad_norm": 0.2520454393087574, |
|
"learning_rate": 2.3130034522439585e-05, |
|
"loss": 0.341, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.7546583850931676, |
|
"grad_norm": 0.24469475054712242, |
|
"learning_rate": 2.3072497123130034e-05, |
|
"loss": 0.3385, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.7577639751552794, |
|
"grad_norm": 0.2688171235493825, |
|
"learning_rate": 2.3014959723820482e-05, |
|
"loss": 0.3194, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.7608695652173914, |
|
"grad_norm": 0.24660650779589638, |
|
"learning_rate": 2.295742232451093e-05, |
|
"loss": 0.3414, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.763975155279503, |
|
"grad_norm": 0.24074948906029303, |
|
"learning_rate": 2.289988492520138e-05, |
|
"loss": 0.3487, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.7670807453416149, |
|
"grad_norm": 0.2683374003415654, |
|
"learning_rate": 2.284234752589183e-05, |
|
"loss": 0.3324, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.7701863354037268, |
|
"grad_norm": 0.2615920960522321, |
|
"learning_rate": 2.278481012658228e-05, |
|
"loss": 0.3609, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.7732919254658386, |
|
"grad_norm": 0.24217423401661245, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.3642, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.7763975155279503, |
|
"grad_norm": 0.3146547539059143, |
|
"learning_rate": 2.2669735327963178e-05, |
|
"loss": 0.3333, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.779503105590062, |
|
"grad_norm": 0.2562957388358894, |
|
"learning_rate": 2.2612197928653627e-05, |
|
"loss": 0.3446, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.7826086956521738, |
|
"grad_norm": 0.2514732345893343, |
|
"learning_rate": 2.2554660529344075e-05, |
|
"loss": 0.3313, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 0.27268825204355784, |
|
"learning_rate": 2.2497123130034524e-05, |
|
"loss": 0.3431, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.7888198757763976, |
|
"grad_norm": 0.24683018483720148, |
|
"learning_rate": 2.2439585730724973e-05, |
|
"loss": 0.3264, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.7919254658385093, |
|
"grad_norm": 0.2511343392474156, |
|
"learning_rate": 2.238204833141542e-05, |
|
"loss": 0.3231, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.795031055900621, |
|
"grad_norm": 0.29263014098541856, |
|
"learning_rate": 2.232451093210587e-05, |
|
"loss": 0.3425, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.798136645962733, |
|
"grad_norm": 0.26439452081008274, |
|
"learning_rate": 2.226697353279632e-05, |
|
"loss": 0.3404, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.8012422360248448, |
|
"grad_norm": 0.26624397225893237, |
|
"learning_rate": 2.2209436133486768e-05, |
|
"loss": 0.3521, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.8043478260869565, |
|
"grad_norm": 0.27231006655087864, |
|
"learning_rate": 2.2151898734177217e-05, |
|
"loss": 0.3499, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.8074534161490683, |
|
"grad_norm": 0.2717715300694685, |
|
"learning_rate": 2.2094361334867665e-05, |
|
"loss": 0.3488, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.81055900621118, |
|
"grad_norm": 0.2555398572654679, |
|
"learning_rate": 2.2036823935558114e-05, |
|
"loss": 0.3484, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.8136645962732918, |
|
"grad_norm": 0.27285873888872886, |
|
"learning_rate": 2.1979286536248563e-05, |
|
"loss": 0.3301, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.8167701863354038, |
|
"grad_norm": 0.25210730048319585, |
|
"learning_rate": 2.192174913693901e-05, |
|
"loss": 0.3424, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.8198757763975155, |
|
"grad_norm": 0.26842055467218623, |
|
"learning_rate": 2.186421173762946e-05, |
|
"loss": 0.3408, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.8229813664596275, |
|
"grad_norm": 0.3208770216327945, |
|
"learning_rate": 2.180667433831991e-05, |
|
"loss": 0.3312, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.2559541162545561, |
|
"learning_rate": 2.1749136939010358e-05, |
|
"loss": 0.36, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.829192546583851, |
|
"grad_norm": 0.32134394732411636, |
|
"learning_rate": 2.1691599539700806e-05, |
|
"loss": 0.3394, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.8322981366459627, |
|
"grad_norm": 0.2708594663810051, |
|
"learning_rate": 2.1634062140391255e-05, |
|
"loss": 0.3431, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.8354037267080745, |
|
"grad_norm": 0.3010404719152366, |
|
"learning_rate": 2.1576524741081704e-05, |
|
"loss": 0.3417, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.8385093167701863, |
|
"grad_norm": 0.3070106173244936, |
|
"learning_rate": 2.1518987341772153e-05, |
|
"loss": 0.3388, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.841614906832298, |
|
"grad_norm": 0.24023699838734106, |
|
"learning_rate": 2.14614499424626e-05, |
|
"loss": 0.359, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.84472049689441, |
|
"grad_norm": 0.27420152661967667, |
|
"learning_rate": 2.140391254315305e-05, |
|
"loss": 0.3262, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.8478260869565217, |
|
"grad_norm": 0.3128089706224423, |
|
"learning_rate": 2.13463751438435e-05, |
|
"loss": 0.3284, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.8509316770186337, |
|
"grad_norm": 0.26784524761567324, |
|
"learning_rate": 2.1288837744533947e-05, |
|
"loss": 0.3337, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.8540372670807455, |
|
"grad_norm": 0.2897924996984458, |
|
"learning_rate": 2.1231300345224396e-05, |
|
"loss": 0.3325, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 0.25009521464769496, |
|
"learning_rate": 2.1173762945914845e-05, |
|
"loss": 0.3457, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.860248447204969, |
|
"grad_norm": 0.3055325339631166, |
|
"learning_rate": 2.1116225546605294e-05, |
|
"loss": 0.3506, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.8633540372670807, |
|
"grad_norm": 0.28613812992385934, |
|
"learning_rate": 2.1058688147295742e-05, |
|
"loss": 0.3609, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8664596273291925, |
|
"grad_norm": 0.2705173567449075, |
|
"learning_rate": 2.100115074798619e-05, |
|
"loss": 0.3259, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.8695652173913042, |
|
"grad_norm": 0.3180478776075474, |
|
"learning_rate": 2.094361334867664e-05, |
|
"loss": 0.3459, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.8726708074534162, |
|
"grad_norm": 0.2667088182720578, |
|
"learning_rate": 2.088607594936709e-05, |
|
"loss": 0.3415, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.875776397515528, |
|
"grad_norm": 0.2721287511996052, |
|
"learning_rate": 2.0828538550057537e-05, |
|
"loss": 0.3295, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.87888198757764, |
|
"grad_norm": 0.24692799289830528, |
|
"learning_rate": 2.0771001150747986e-05, |
|
"loss": 0.3389, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.8819875776397517, |
|
"grad_norm": 0.25562500894154333, |
|
"learning_rate": 2.0713463751438435e-05, |
|
"loss": 0.3319, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.8850931677018634, |
|
"grad_norm": 0.2788963760074411, |
|
"learning_rate": 2.0655926352128883e-05, |
|
"loss": 0.3285, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.8881987577639752, |
|
"grad_norm": 0.24657052891375197, |
|
"learning_rate": 2.0598388952819332e-05, |
|
"loss": 0.3457, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.891304347826087, |
|
"grad_norm": 0.32309129817645427, |
|
"learning_rate": 2.054085155350978e-05, |
|
"loss": 0.3403, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.8944099378881987, |
|
"grad_norm": 0.30175425766070024, |
|
"learning_rate": 2.048331415420023e-05, |
|
"loss": 0.3471, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.8975155279503104, |
|
"grad_norm": 0.26841376007608464, |
|
"learning_rate": 2.042577675489068e-05, |
|
"loss": 0.3375, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.9006211180124224, |
|
"grad_norm": 0.262662943207609, |
|
"learning_rate": 2.0368239355581127e-05, |
|
"loss": 0.3323, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.9037267080745341, |
|
"grad_norm": 0.2701013469116013, |
|
"learning_rate": 2.0310701956271576e-05, |
|
"loss": 0.3421, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.9068322981366461, |
|
"grad_norm": 0.2714596873719603, |
|
"learning_rate": 2.0253164556962025e-05, |
|
"loss": 0.3459, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.9099378881987579, |
|
"grad_norm": 0.27588772682551244, |
|
"learning_rate": 2.0195627157652473e-05, |
|
"loss": 0.3393, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.2599899065726882, |
|
"learning_rate": 2.0138089758342922e-05, |
|
"loss": 0.3272, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.9161490683229814, |
|
"grad_norm": 0.29859390134967695, |
|
"learning_rate": 2.008055235903337e-05, |
|
"loss": 0.3406, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.9192546583850931, |
|
"grad_norm": 0.2506363801804046, |
|
"learning_rate": 2.002301495972382e-05, |
|
"loss": 0.3442, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.9223602484472049, |
|
"grad_norm": 0.27643958694894183, |
|
"learning_rate": 1.9965477560414268e-05, |
|
"loss": 0.3266, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.9254658385093166, |
|
"grad_norm": 0.24433788612177662, |
|
"learning_rate": 1.9907940161104717e-05, |
|
"loss": 0.3282, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 0.23745008386988042, |
|
"learning_rate": 1.9850402761795166e-05, |
|
"loss": 0.3362, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.9316770186335404, |
|
"grad_norm": 0.26121859150272697, |
|
"learning_rate": 1.9792865362485614e-05, |
|
"loss": 0.3286, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.9347826086956523, |
|
"grad_norm": 0.2593462473033886, |
|
"learning_rate": 1.9735327963176063e-05, |
|
"loss": 0.3277, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.937888198757764, |
|
"grad_norm": 0.251734596039316, |
|
"learning_rate": 1.9677790563866512e-05, |
|
"loss": 0.3243, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.9409937888198758, |
|
"grad_norm": 0.2796993503020773, |
|
"learning_rate": 1.962025316455696e-05, |
|
"loss": 0.341, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.9440993788819876, |
|
"grad_norm": 0.24405119567007771, |
|
"learning_rate": 1.956271576524741e-05, |
|
"loss": 0.3263, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.9472049689440993, |
|
"grad_norm": 0.2551104705286801, |
|
"learning_rate": 1.9505178365937858e-05, |
|
"loss": 0.3273, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.950310559006211, |
|
"grad_norm": 0.30160867369839583, |
|
"learning_rate": 1.9447640966628307e-05, |
|
"loss": 0.3461, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.9534161490683228, |
|
"grad_norm": 0.2307083280109175, |
|
"learning_rate": 1.9390103567318755e-05, |
|
"loss": 0.3195, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.27856475014068843, |
|
"learning_rate": 1.9332566168009204e-05, |
|
"loss": 0.3534, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.9596273291925466, |
|
"grad_norm": 0.2422123663936176, |
|
"learning_rate": 1.9275028768699653e-05, |
|
"loss": 0.3444, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.9627329192546585, |
|
"grad_norm": 0.2508344560707552, |
|
"learning_rate": 1.92174913693901e-05, |
|
"loss": 0.3316, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.9658385093167703, |
|
"grad_norm": 0.25457679605852573, |
|
"learning_rate": 1.915995397008055e-05, |
|
"loss": 0.3511, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.968944099378882, |
|
"grad_norm": 0.255448027513179, |
|
"learning_rate": 1.9102416570771002e-05, |
|
"loss": 0.3417, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.9720496894409938, |
|
"grad_norm": 0.2655225587813165, |
|
"learning_rate": 1.904487917146145e-05, |
|
"loss": 0.3364, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.9751552795031055, |
|
"grad_norm": 0.2644532896395622, |
|
"learning_rate": 1.89873417721519e-05, |
|
"loss": 0.3439, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.9782608695652173, |
|
"grad_norm": 0.25431765900047304, |
|
"learning_rate": 1.892980437284235e-05, |
|
"loss": 0.3277, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.981366459627329, |
|
"grad_norm": 0.253925081047276, |
|
"learning_rate": 1.8872266973532797e-05, |
|
"loss": 0.345, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.984472049689441, |
|
"grad_norm": 0.2542103813230237, |
|
"learning_rate": 1.8814729574223246e-05, |
|
"loss": 0.3458, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.9875776397515528, |
|
"grad_norm": 0.298104123148457, |
|
"learning_rate": 1.8757192174913695e-05, |
|
"loss": 0.3278, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9906832298136647, |
|
"grad_norm": 0.2322373735825899, |
|
"learning_rate": 1.8699654775604144e-05, |
|
"loss": 0.3426, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.9937888198757765, |
|
"grad_norm": 0.24606988538470728, |
|
"learning_rate": 1.8642117376294592e-05, |
|
"loss": 0.3358, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.9968944099378882, |
|
"grad_norm": 0.2849652231363428, |
|
"learning_rate": 1.858457997698504e-05, |
|
"loss": 0.315, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.277308601131606, |
|
"learning_rate": 1.852704257767549e-05, |
|
"loss": 0.3275, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.0031055900621118, |
|
"grad_norm": 0.3117446232875321, |
|
"learning_rate": 1.846950517836594e-05, |
|
"loss": 0.2631, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.0062111801242235, |
|
"grad_norm": 0.27820371561408924, |
|
"learning_rate": 1.8411967779056387e-05, |
|
"loss": 0.26, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.0093167701863353, |
|
"grad_norm": 0.35212508445991075, |
|
"learning_rate": 1.8354430379746836e-05, |
|
"loss": 0.2589, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.012422360248447, |
|
"grad_norm": 0.29598296168936833, |
|
"learning_rate": 1.8296892980437285e-05, |
|
"loss": 0.2727, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.015527950310559, |
|
"grad_norm": 0.23748767777958518, |
|
"learning_rate": 1.8239355581127733e-05, |
|
"loss": 0.2603, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.018633540372671, |
|
"grad_norm": 0.3396040317332316, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.2494, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.0217391304347827, |
|
"grad_norm": 0.26773683268799814, |
|
"learning_rate": 1.812428078250863e-05, |
|
"loss": 0.2538, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.0248447204968945, |
|
"grad_norm": 0.26908218639639603, |
|
"learning_rate": 1.806674338319908e-05, |
|
"loss": 0.2636, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.027950310559006, |
|
"grad_norm": 0.2934635435841592, |
|
"learning_rate": 1.8009205983889528e-05, |
|
"loss": 0.25, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.031055900621118, |
|
"grad_norm": 0.2785089363356141, |
|
"learning_rate": 1.7951668584579977e-05, |
|
"loss": 0.2441, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.0341614906832297, |
|
"grad_norm": 0.2638349484890508, |
|
"learning_rate": 1.7894131185270426e-05, |
|
"loss": 0.2519, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.0372670807453415, |
|
"grad_norm": 0.2586235412884467, |
|
"learning_rate": 1.7836593785960874e-05, |
|
"loss": 0.2509, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.040372670807453, |
|
"grad_norm": 0.2472488808463837, |
|
"learning_rate": 1.7779056386651323e-05, |
|
"loss": 0.2538, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.0434782608695654, |
|
"grad_norm": 0.2795998982851747, |
|
"learning_rate": 1.7721518987341772e-05, |
|
"loss": 0.245, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.046583850931677, |
|
"grad_norm": 0.22761521829171, |
|
"learning_rate": 1.766398158803222e-05, |
|
"loss": 0.257, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.049689440993789, |
|
"grad_norm": 0.27491770536559856, |
|
"learning_rate": 1.760644418872267e-05, |
|
"loss": 0.2543, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.0527950310559007, |
|
"grad_norm": 0.2694008994446243, |
|
"learning_rate": 1.7548906789413118e-05, |
|
"loss": 0.2523, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.0559006211180124, |
|
"grad_norm": 0.2649790265269665, |
|
"learning_rate": 1.7491369390103567e-05, |
|
"loss": 0.2638, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.059006211180124, |
|
"grad_norm": 0.2592501818191762, |
|
"learning_rate": 1.7433831990794016e-05, |
|
"loss": 0.2577, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.062111801242236, |
|
"grad_norm": 0.2669822602469171, |
|
"learning_rate": 1.7376294591484464e-05, |
|
"loss": 0.2609, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.0652173913043477, |
|
"grad_norm": 0.24387894874703012, |
|
"learning_rate": 1.7318757192174913e-05, |
|
"loss": 0.2633, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.0683229813664594, |
|
"grad_norm": 0.2492896708129992, |
|
"learning_rate": 1.7261219792865362e-05, |
|
"loss": 0.2574, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.0714285714285716, |
|
"grad_norm": 0.23542048799701373, |
|
"learning_rate": 1.720368239355581e-05, |
|
"loss": 0.2472, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.0745341614906834, |
|
"grad_norm": 0.23318721989860372, |
|
"learning_rate": 1.714614499424626e-05, |
|
"loss": 0.2492, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.077639751552795, |
|
"grad_norm": 0.2184370669246145, |
|
"learning_rate": 1.7088607594936708e-05, |
|
"loss": 0.2614, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.080745341614907, |
|
"grad_norm": 0.23606974543337897, |
|
"learning_rate": 1.7031070195627157e-05, |
|
"loss": 0.2669, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.0838509316770186, |
|
"grad_norm": 0.24573655537821745, |
|
"learning_rate": 1.6973532796317605e-05, |
|
"loss": 0.2495, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.0869565217391304, |
|
"grad_norm": 0.22577283258104885, |
|
"learning_rate": 1.6915995397008054e-05, |
|
"loss": 0.2324, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.090062111801242, |
|
"grad_norm": 0.22880524187260692, |
|
"learning_rate": 1.6858457997698503e-05, |
|
"loss": 0.2454, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.093167701863354, |
|
"grad_norm": 0.23649506425354394, |
|
"learning_rate": 1.680092059838895e-05, |
|
"loss": 0.2706, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.0962732919254656, |
|
"grad_norm": 0.25403811052331426, |
|
"learning_rate": 1.67433831990794e-05, |
|
"loss": 0.2543, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.099378881987578, |
|
"grad_norm": 0.2443521353581772, |
|
"learning_rate": 1.668584579976985e-05, |
|
"loss": 0.2484, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.1024844720496896, |
|
"grad_norm": 0.21359909291298998, |
|
"learning_rate": 1.6628308400460298e-05, |
|
"loss": 0.242, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.1055900621118013, |
|
"grad_norm": 0.2270460742418379, |
|
"learning_rate": 1.6570771001150746e-05, |
|
"loss": 0.248, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.108695652173913, |
|
"grad_norm": 0.22643050215454086, |
|
"learning_rate": 1.6513233601841195e-05, |
|
"loss": 0.2477, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.111801242236025, |
|
"grad_norm": 0.22299140134872011, |
|
"learning_rate": 1.6455696202531644e-05, |
|
"loss": 0.2568, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.1149068322981366, |
|
"grad_norm": 0.21597401049687515, |
|
"learning_rate": 1.6398158803222093e-05, |
|
"loss": 0.2567, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.1180124223602483, |
|
"grad_norm": 0.21424964627998483, |
|
"learning_rate": 1.634062140391254e-05, |
|
"loss": 0.2594, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.12111801242236, |
|
"grad_norm": 0.22064934278360224, |
|
"learning_rate": 1.628308400460299e-05, |
|
"loss": 0.2548, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.124223602484472, |
|
"grad_norm": 0.2226010291399242, |
|
"learning_rate": 1.622554660529344e-05, |
|
"loss": 0.2392, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.127329192546584, |
|
"grad_norm": 0.23817934921154135, |
|
"learning_rate": 1.616800920598389e-05, |
|
"loss": 0.263, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.130434782608696, |
|
"grad_norm": 0.21811012125800597, |
|
"learning_rate": 1.611047180667434e-05, |
|
"loss": 0.239, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.1335403726708075, |
|
"grad_norm": 0.22669201592312113, |
|
"learning_rate": 1.605293440736479e-05, |
|
"loss": 0.2526, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.1366459627329193, |
|
"grad_norm": 0.23305634956402152, |
|
"learning_rate": 1.5995397008055237e-05, |
|
"loss": 0.2685, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.139751552795031, |
|
"grad_norm": 0.2115884014346869, |
|
"learning_rate": 1.5937859608745686e-05, |
|
"loss": 0.2393, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 0.2115627751194399, |
|
"learning_rate": 1.5880322209436135e-05, |
|
"loss": 0.251, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.1459627329192545, |
|
"grad_norm": 0.21692908050798426, |
|
"learning_rate": 1.5822784810126583e-05, |
|
"loss": 0.2512, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.1490683229813663, |
|
"grad_norm": 0.22462301830943301, |
|
"learning_rate": 1.5765247410817032e-05, |
|
"loss": 0.2511, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.1521739130434785, |
|
"grad_norm": 0.19326801109974084, |
|
"learning_rate": 1.570771001150748e-05, |
|
"loss": 0.2452, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.1552795031055902, |
|
"grad_norm": 0.23274032126488928, |
|
"learning_rate": 1.565017261219793e-05, |
|
"loss": 0.2529, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.158385093167702, |
|
"grad_norm": 0.22582002907669432, |
|
"learning_rate": 1.5592635212888378e-05, |
|
"loss": 0.2597, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.1614906832298137, |
|
"grad_norm": 0.21665150454694335, |
|
"learning_rate": 1.5535097813578827e-05, |
|
"loss": 0.258, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.1645962732919255, |
|
"grad_norm": 0.21401338541621684, |
|
"learning_rate": 1.5477560414269276e-05, |
|
"loss": 0.2556, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.1677018633540373, |
|
"grad_norm": 0.23527336018366451, |
|
"learning_rate": 1.5420023014959724e-05, |
|
"loss": 0.2582, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.170807453416149, |
|
"grad_norm": 0.21445105954714194, |
|
"learning_rate": 1.5362485615650173e-05, |
|
"loss": 0.2503, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.21511080318136375, |
|
"learning_rate": 1.5304948216340622e-05, |
|
"loss": 0.2566, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.1770186335403725, |
|
"grad_norm": 0.24111702768301724, |
|
"learning_rate": 1.5247410817031072e-05, |
|
"loss": 0.2425, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.1801242236024843, |
|
"grad_norm": 0.21498468064553858, |
|
"learning_rate": 1.5189873417721521e-05, |
|
"loss": 0.2439, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.1832298136645965, |
|
"grad_norm": 0.21967291818581178, |
|
"learning_rate": 1.513233601841197e-05, |
|
"loss": 0.2509, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.186335403726708, |
|
"grad_norm": 0.2205935490599146, |
|
"learning_rate": 1.5074798619102418e-05, |
|
"loss": 0.2579, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.18944099378882, |
|
"grad_norm": 0.23758914721632698, |
|
"learning_rate": 1.5017261219792867e-05, |
|
"loss": 0.2399, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.1925465838509317, |
|
"grad_norm": 0.19571832530537867, |
|
"learning_rate": 1.4959723820483316e-05, |
|
"loss": 0.2404, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.1956521739130435, |
|
"grad_norm": 0.20772523111005442, |
|
"learning_rate": 1.4902186421173765e-05, |
|
"loss": 0.2489, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.198757763975155, |
|
"grad_norm": 0.2078388868457063, |
|
"learning_rate": 1.4844649021864213e-05, |
|
"loss": 0.2404, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.201863354037267, |
|
"grad_norm": 0.24074640885647317, |
|
"learning_rate": 1.4787111622554662e-05, |
|
"loss": 0.2647, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.2049689440993787, |
|
"grad_norm": 0.20937990276765678, |
|
"learning_rate": 1.472957422324511e-05, |
|
"loss": 0.2578, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.208074534161491, |
|
"grad_norm": 0.2639807190802869, |
|
"learning_rate": 1.467203682393556e-05, |
|
"loss": 0.2607, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.2111801242236027, |
|
"grad_norm": 0.26293955788698453, |
|
"learning_rate": 1.4614499424626008e-05, |
|
"loss": 0.2638, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.2142857142857144, |
|
"grad_norm": 0.23828883015584687, |
|
"learning_rate": 1.4556962025316457e-05, |
|
"loss": 0.2577, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.217391304347826, |
|
"grad_norm": 0.24740324327511762, |
|
"learning_rate": 1.4499424626006906e-05, |
|
"loss": 0.2603, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.220496894409938, |
|
"grad_norm": 0.22582258369375163, |
|
"learning_rate": 1.4441887226697354e-05, |
|
"loss": 0.2442, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.2236024844720497, |
|
"grad_norm": 0.24839008006413138, |
|
"learning_rate": 1.4384349827387803e-05, |
|
"loss": 0.2591, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.2267080745341614, |
|
"grad_norm": 0.2507061092171656, |
|
"learning_rate": 1.4326812428078252e-05, |
|
"loss": 0.2525, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.229813664596273, |
|
"grad_norm": 0.214855054431312, |
|
"learning_rate": 1.42692750287687e-05, |
|
"loss": 0.2436, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.232919254658385, |
|
"grad_norm": 0.21592522701402342, |
|
"learning_rate": 1.421173762945915e-05, |
|
"loss": 0.2516, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.2360248447204967, |
|
"grad_norm": 0.20915695199545198, |
|
"learning_rate": 1.4154200230149598e-05, |
|
"loss": 0.2597, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.239130434782609, |
|
"grad_norm": 0.22903634190903957, |
|
"learning_rate": 1.4096662830840047e-05, |
|
"loss": 0.2667, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.2422360248447206, |
|
"grad_norm": 0.21439993038647093, |
|
"learning_rate": 1.4039125431530495e-05, |
|
"loss": 0.2436, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.2453416149068324, |
|
"grad_norm": 0.21937639860358657, |
|
"learning_rate": 1.3981588032220944e-05, |
|
"loss": 0.2458, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.248447204968944, |
|
"grad_norm": 0.2013130017609961, |
|
"learning_rate": 1.3924050632911393e-05, |
|
"loss": 0.2491, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.251552795031056, |
|
"grad_norm": 0.22887750081435682, |
|
"learning_rate": 1.3866513233601842e-05, |
|
"loss": 0.2441, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.2546583850931676, |
|
"grad_norm": 0.2288064963841507, |
|
"learning_rate": 1.380897583429229e-05, |
|
"loss": 0.2418, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.2577639751552794, |
|
"grad_norm": 0.23248918550222136, |
|
"learning_rate": 1.3751438434982739e-05, |
|
"loss": 0.2625, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.260869565217391, |
|
"grad_norm": 0.2090094171369587, |
|
"learning_rate": 1.3693901035673188e-05, |
|
"loss": 0.2507, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.2639751552795033, |
|
"grad_norm": 0.23154174958563464, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.2586, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.267080745341615, |
|
"grad_norm": 0.24350218064576923, |
|
"learning_rate": 1.3578826237054085e-05, |
|
"loss": 0.263, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.270186335403727, |
|
"grad_norm": 0.2278585941156764, |
|
"learning_rate": 1.3521288837744534e-05, |
|
"loss": 0.2555, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.2732919254658386, |
|
"grad_norm": 0.20801276930170154, |
|
"learning_rate": 1.3463751438434983e-05, |
|
"loss": 0.2572, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.2763975155279503, |
|
"grad_norm": 0.22919123007559652, |
|
"learning_rate": 1.3406214039125431e-05, |
|
"loss": 0.2582, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.279503105590062, |
|
"grad_norm": 0.21268694380279451, |
|
"learning_rate": 1.334867663981588e-05, |
|
"loss": 0.2512, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.282608695652174, |
|
"grad_norm": 0.2182606134520971, |
|
"learning_rate": 1.3291139240506329e-05, |
|
"loss": 0.2536, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 0.2177977754376004, |
|
"learning_rate": 1.323360184119678e-05, |
|
"loss": 0.2589, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.2888198757763973, |
|
"grad_norm": 0.2079260936390528, |
|
"learning_rate": 1.3176064441887228e-05, |
|
"loss": 0.2445, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.291925465838509, |
|
"grad_norm": 0.21654285079809454, |
|
"learning_rate": 1.3118527042577677e-05, |
|
"loss": 0.2492, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.2950310559006213, |
|
"grad_norm": 0.22224222175484207, |
|
"learning_rate": 1.3060989643268126e-05, |
|
"loss": 0.2555, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.298136645962733, |
|
"grad_norm": 0.2013544241929392, |
|
"learning_rate": 1.3003452243958574e-05, |
|
"loss": 0.2457, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.301242236024845, |
|
"grad_norm": 0.21733404218015004, |
|
"learning_rate": 1.2945914844649023e-05, |
|
"loss": 0.2659, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.3043478260869565, |
|
"grad_norm": 0.21179336140885693, |
|
"learning_rate": 1.2888377445339472e-05, |
|
"loss": 0.2426, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.3074534161490683, |
|
"grad_norm": 0.2285599698694653, |
|
"learning_rate": 1.283084004602992e-05, |
|
"loss": 0.2429, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.31055900621118, |
|
"grad_norm": 0.19835079918909265, |
|
"learning_rate": 1.2773302646720369e-05, |
|
"loss": 0.2489, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.313664596273292, |
|
"grad_norm": 0.2298623252387309, |
|
"learning_rate": 1.2715765247410818e-05, |
|
"loss": 0.2655, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.3167701863354035, |
|
"grad_norm": 0.23867880872639935, |
|
"learning_rate": 1.2658227848101267e-05, |
|
"loss": 0.2498, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.3198757763975157, |
|
"grad_norm": 0.21037856832784158, |
|
"learning_rate": 1.2600690448791715e-05, |
|
"loss": 0.2589, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.3229813664596275, |
|
"grad_norm": 0.24695028457966048, |
|
"learning_rate": 1.2543153049482164e-05, |
|
"loss": 0.2502, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.3260869565217392, |
|
"grad_norm": 0.23360363557581765, |
|
"learning_rate": 1.2485615650172613e-05, |
|
"loss": 0.259, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.329192546583851, |
|
"grad_norm": 0.22335503888847086, |
|
"learning_rate": 1.2428078250863062e-05, |
|
"loss": 0.2456, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.3322981366459627, |
|
"grad_norm": 0.21231134626201825, |
|
"learning_rate": 1.237054085155351e-05, |
|
"loss": 0.26, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.3354037267080745, |
|
"grad_norm": 0.20990198210516803, |
|
"learning_rate": 1.2313003452243959e-05, |
|
"loss": 0.2441, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.3385093167701863, |
|
"grad_norm": 0.221067131454967, |
|
"learning_rate": 1.2255466052934408e-05, |
|
"loss": 0.2469, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.341614906832298, |
|
"grad_norm": 0.22138406777470937, |
|
"learning_rate": 1.2197928653624856e-05, |
|
"loss": 0.261, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.3447204968944098, |
|
"grad_norm": 0.21398489008508845, |
|
"learning_rate": 1.2140391254315305e-05, |
|
"loss": 0.2566, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.3478260869565215, |
|
"grad_norm": 0.20448116831895594, |
|
"learning_rate": 1.2082853855005754e-05, |
|
"loss": 0.2598, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.3509316770186337, |
|
"grad_norm": 0.21255766006062407, |
|
"learning_rate": 1.2025316455696203e-05, |
|
"loss": 0.2526, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.3540372670807455, |
|
"grad_norm": 0.19087455271546003, |
|
"learning_rate": 1.1967779056386651e-05, |
|
"loss": 0.2537, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.357142857142857, |
|
"grad_norm": 0.20379774772998854, |
|
"learning_rate": 1.19102416570771e-05, |
|
"loss": 0.2668, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.360248447204969, |
|
"grad_norm": 0.19801295062012142, |
|
"learning_rate": 1.1852704257767549e-05, |
|
"loss": 0.2479, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.3633540372670807, |
|
"grad_norm": 0.2053725094185451, |
|
"learning_rate": 1.1795166858457997e-05, |
|
"loss": 0.2597, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.3664596273291925, |
|
"grad_norm": 0.19414430502845648, |
|
"learning_rate": 1.1737629459148446e-05, |
|
"loss": 0.2445, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.369565217391304, |
|
"grad_norm": 0.20779479767313294, |
|
"learning_rate": 1.1680092059838895e-05, |
|
"loss": 0.2649, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.372670807453416, |
|
"grad_norm": 0.20304929332054908, |
|
"learning_rate": 1.1622554660529344e-05, |
|
"loss": 0.2624, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.375776397515528, |
|
"grad_norm": 0.20512146624367367, |
|
"learning_rate": 1.1565017261219792e-05, |
|
"loss": 0.2532, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.37888198757764, |
|
"grad_norm": 0.1948376797912715, |
|
"learning_rate": 1.1507479861910241e-05, |
|
"loss": 0.2593, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.3819875776397517, |
|
"grad_norm": 0.20111608619484334, |
|
"learning_rate": 1.144994246260069e-05, |
|
"loss": 0.2431, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.3850931677018634, |
|
"grad_norm": 0.20424563225076126, |
|
"learning_rate": 1.139240506329114e-05, |
|
"loss": 0.239, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.388198757763975, |
|
"grad_norm": 0.20385122820209117, |
|
"learning_rate": 1.1334867663981589e-05, |
|
"loss": 0.2519, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.391304347826087, |
|
"grad_norm": 0.2169017997179514, |
|
"learning_rate": 1.1277330264672038e-05, |
|
"loss": 0.2599, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.3944099378881987, |
|
"grad_norm": 0.20583351351917192, |
|
"learning_rate": 1.1219792865362486e-05, |
|
"loss": 0.2515, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.3975155279503104, |
|
"grad_norm": 0.20864268761499544, |
|
"learning_rate": 1.1162255466052935e-05, |
|
"loss": 0.2674, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.400621118012422, |
|
"grad_norm": 0.18352483617724127, |
|
"learning_rate": 1.1104718066743384e-05, |
|
"loss": 0.2517, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.403726708074534, |
|
"grad_norm": 0.19458848397143083, |
|
"learning_rate": 1.1047180667433833e-05, |
|
"loss": 0.2348, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.406832298136646, |
|
"grad_norm": 0.22085258658145707, |
|
"learning_rate": 1.0989643268124281e-05, |
|
"loss": 0.2626, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.409937888198758, |
|
"grad_norm": 0.2244287112114885, |
|
"learning_rate": 1.093210586881473e-05, |
|
"loss": 0.2656, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.4130434782608696, |
|
"grad_norm": 0.2064604218573695, |
|
"learning_rate": 1.0874568469505179e-05, |
|
"loss": 0.2474, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.4161490683229814, |
|
"grad_norm": 0.2170623734624135, |
|
"learning_rate": 1.0817031070195628e-05, |
|
"loss": 0.2673, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.419254658385093, |
|
"grad_norm": 0.21813795262022834, |
|
"learning_rate": 1.0759493670886076e-05, |
|
"loss": 0.2566, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.422360248447205, |
|
"grad_norm": 0.20015983943955706, |
|
"learning_rate": 1.0701956271576525e-05, |
|
"loss": 0.2433, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.4254658385093166, |
|
"grad_norm": 0.2518786075901923, |
|
"learning_rate": 1.0644418872266974e-05, |
|
"loss": 0.2542, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 0.2039696978745147, |
|
"learning_rate": 1.0586881472957422e-05, |
|
"loss": 0.2635, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.4316770186335406, |
|
"grad_norm": 0.20193387084839037, |
|
"learning_rate": 1.0529344073647871e-05, |
|
"loss": 0.2529, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.4347826086956523, |
|
"grad_norm": 0.22256582381404963, |
|
"learning_rate": 1.047180667433832e-05, |
|
"loss": 0.2502, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.437888198757764, |
|
"grad_norm": 0.20375665724837322, |
|
"learning_rate": 1.0414269275028769e-05, |
|
"loss": 0.241, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.440993788819876, |
|
"grad_norm": 0.2179110760676831, |
|
"learning_rate": 1.0356731875719217e-05, |
|
"loss": 0.2599, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.4440993788819876, |
|
"grad_norm": 0.22202976810767208, |
|
"learning_rate": 1.0299194476409666e-05, |
|
"loss": 0.2505, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.4472049689440993, |
|
"grad_norm": 0.42670457942092715, |
|
"learning_rate": 1.0241657077100115e-05, |
|
"loss": 0.241, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.450310559006211, |
|
"grad_norm": 0.20784564321109833, |
|
"learning_rate": 1.0184119677790564e-05, |
|
"loss": 0.2567, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.453416149068323, |
|
"grad_norm": 0.20121980240137796, |
|
"learning_rate": 1.0126582278481012e-05, |
|
"loss": 0.2451, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.4565217391304346, |
|
"grad_norm": 0.21747971229319626, |
|
"learning_rate": 1.0069044879171461e-05, |
|
"loss": 0.2387, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.4596273291925463, |
|
"grad_norm": 0.18957130652801002, |
|
"learning_rate": 1.001150747986191e-05, |
|
"loss": 0.2404, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.4627329192546585, |
|
"grad_norm": 0.19623974528931779, |
|
"learning_rate": 9.953970080552358e-06, |
|
"loss": 0.2505, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.4658385093167703, |
|
"grad_norm": 0.2090564420719582, |
|
"learning_rate": 9.896432681242807e-06, |
|
"loss": 0.259, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.468944099378882, |
|
"grad_norm": 0.20995347548362167, |
|
"learning_rate": 9.838895281933256e-06, |
|
"loss": 0.2557, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.472049689440994, |
|
"grad_norm": 0.21072680749655628, |
|
"learning_rate": 9.781357882623705e-06, |
|
"loss": 0.2507, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.4751552795031055, |
|
"grad_norm": 0.2028138320185975, |
|
"learning_rate": 9.723820483314153e-06, |
|
"loss": 0.2428, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.4782608695652173, |
|
"grad_norm": 0.203416816769087, |
|
"learning_rate": 9.666283084004602e-06, |
|
"loss": 0.2549, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.481366459627329, |
|
"grad_norm": 0.2114980169350222, |
|
"learning_rate": 9.60874568469505e-06, |
|
"loss": 0.2544, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"grad_norm": 0.1947781123063217, |
|
"learning_rate": 9.551208285385501e-06, |
|
"loss": 0.246, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.487577639751553, |
|
"grad_norm": 0.2313621289649826, |
|
"learning_rate": 9.49367088607595e-06, |
|
"loss": 0.2688, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.4906832298136647, |
|
"grad_norm": 0.2070540850596655, |
|
"learning_rate": 9.436133486766399e-06, |
|
"loss": 0.2594, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.4937888198757765, |
|
"grad_norm": 0.21169469541077635, |
|
"learning_rate": 9.378596087456847e-06, |
|
"loss": 0.2493, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.4968944099378882, |
|
"grad_norm": 0.19281802475760265, |
|
"learning_rate": 9.321058688147296e-06, |
|
"loss": 0.25, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.2175842957962285, |
|
"learning_rate": 9.263521288837745e-06, |
|
"loss": 0.2678, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.5031055900621118, |
|
"grad_norm": 0.1942027851518837, |
|
"learning_rate": 9.205983889528194e-06, |
|
"loss": 0.2505, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.5062111801242235, |
|
"grad_norm": 0.2119389750172559, |
|
"learning_rate": 9.148446490218642e-06, |
|
"loss": 0.2647, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.5093167701863353, |
|
"grad_norm": 0.20993843490643438, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.2381, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.512422360248447, |
|
"grad_norm": 0.20387329805308116, |
|
"learning_rate": 9.03337169159954e-06, |
|
"loss": 0.2404, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.5155279503105588, |
|
"grad_norm": 0.206875715468925, |
|
"learning_rate": 8.975834292289988e-06, |
|
"loss": 0.2625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.518633540372671, |
|
"grad_norm": 0.20699195679204746, |
|
"learning_rate": 8.918296892980437e-06, |
|
"loss": 0.2496, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.5217391304347827, |
|
"grad_norm": 0.20082335227786552, |
|
"learning_rate": 8.860759493670886e-06, |
|
"loss": 0.2472, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.5248447204968945, |
|
"grad_norm": 0.20337421721934987, |
|
"learning_rate": 8.803222094361335e-06, |
|
"loss": 0.2465, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.527950310559006, |
|
"grad_norm": 0.19690561472031543, |
|
"learning_rate": 8.745684695051783e-06, |
|
"loss": 0.2562, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.531055900621118, |
|
"grad_norm": 0.20942292198434145, |
|
"learning_rate": 8.688147295742232e-06, |
|
"loss": 0.2448, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.5341614906832297, |
|
"grad_norm": 0.22511418926211027, |
|
"learning_rate": 8.630609896432681e-06, |
|
"loss": 0.2585, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.5372670807453415, |
|
"grad_norm": 0.21038192778136464, |
|
"learning_rate": 8.57307249712313e-06, |
|
"loss": 0.2463, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.5403726708074537, |
|
"grad_norm": 0.18890075777071388, |
|
"learning_rate": 8.515535097813578e-06, |
|
"loss": 0.2521, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.5434782608695654, |
|
"grad_norm": 0.21205002134781, |
|
"learning_rate": 8.457997698504027e-06, |
|
"loss": 0.2585, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.546583850931677, |
|
"grad_norm": 0.1941024098027217, |
|
"learning_rate": 8.400460299194476e-06, |
|
"loss": 0.2566, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.549689440993789, |
|
"grad_norm": 0.30349180360429645, |
|
"learning_rate": 8.342922899884924e-06, |
|
"loss": 0.2623, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.5527950310559007, |
|
"grad_norm": 0.22803507573466544, |
|
"learning_rate": 8.285385500575373e-06, |
|
"loss": 0.2558, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.5559006211180124, |
|
"grad_norm": 0.2020632346168216, |
|
"learning_rate": 8.227848101265822e-06, |
|
"loss": 0.2586, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.559006211180124, |
|
"grad_norm": 0.19503633689058, |
|
"learning_rate": 8.17031070195627e-06, |
|
"loss": 0.2628, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.562111801242236, |
|
"grad_norm": 0.19443407045409983, |
|
"learning_rate": 8.11277330264672e-06, |
|
"loss": 0.2492, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.5652173913043477, |
|
"grad_norm": 0.20150007916652513, |
|
"learning_rate": 8.05523590333717e-06, |
|
"loss": 0.256, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.5683229813664594, |
|
"grad_norm": 0.20193826865741932, |
|
"learning_rate": 7.997698504027619e-06, |
|
"loss": 0.2716, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.19621787984261999, |
|
"learning_rate": 7.940161104718067e-06, |
|
"loss": 0.2443, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.5745341614906834, |
|
"grad_norm": 0.19610522530135707, |
|
"learning_rate": 7.882623705408516e-06, |
|
"loss": 0.2562, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.577639751552795, |
|
"grad_norm": 0.1980537399225623, |
|
"learning_rate": 7.825086306098965e-06, |
|
"loss": 0.2475, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.580745341614907, |
|
"grad_norm": 0.19074805307763945, |
|
"learning_rate": 7.767548906789413e-06, |
|
"loss": 0.2557, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.5838509316770186, |
|
"grad_norm": 0.19613067044699573, |
|
"learning_rate": 7.710011507479862e-06, |
|
"loss": 0.2633, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.5869565217391304, |
|
"grad_norm": 0.19682931877320217, |
|
"learning_rate": 7.652474108170311e-06, |
|
"loss": 0.2378, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.590062111801242, |
|
"grad_norm": 0.20053417585734873, |
|
"learning_rate": 7.5949367088607605e-06, |
|
"loss": 0.2498, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.593167701863354, |
|
"grad_norm": 0.19178100866522357, |
|
"learning_rate": 7.537399309551209e-06, |
|
"loss": 0.2355, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.596273291925466, |
|
"grad_norm": 0.2084827189783707, |
|
"learning_rate": 7.479861910241658e-06, |
|
"loss": 0.2764, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.599378881987578, |
|
"grad_norm": 0.19540838307901068, |
|
"learning_rate": 7.422324510932107e-06, |
|
"loss": 0.2437, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.6024844720496896, |
|
"grad_norm": 0.19587457349490991, |
|
"learning_rate": 7.364787111622555e-06, |
|
"loss": 0.2489, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.6055900621118013, |
|
"grad_norm": 0.25075690817051544, |
|
"learning_rate": 7.307249712313004e-06, |
|
"loss": 0.2648, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 0.20743291534086578, |
|
"learning_rate": 7.249712313003453e-06, |
|
"loss": 0.2646, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.611801242236025, |
|
"grad_norm": 0.21071395029449075, |
|
"learning_rate": 7.1921749136939016e-06, |
|
"loss": 0.2427, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.6149068322981366, |
|
"grad_norm": 0.20235523726201224, |
|
"learning_rate": 7.13463751438435e-06, |
|
"loss": 0.2587, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.6180124223602483, |
|
"grad_norm": 0.20149232436113795, |
|
"learning_rate": 7.077100115074799e-06, |
|
"loss": 0.2516, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.62111801242236, |
|
"grad_norm": 0.21144648873433503, |
|
"learning_rate": 7.019562715765248e-06, |
|
"loss": 0.2582, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.624223602484472, |
|
"grad_norm": 0.2162300937976304, |
|
"learning_rate": 6.9620253164556965e-06, |
|
"loss": 0.2556, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.6273291925465836, |
|
"grad_norm": 0.21106771620646603, |
|
"learning_rate": 6.904487917146145e-06, |
|
"loss": 0.2558, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.630434782608696, |
|
"grad_norm": 0.23609832773446915, |
|
"learning_rate": 6.846950517836594e-06, |
|
"loss": 0.2572, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.6335403726708075, |
|
"grad_norm": 0.21122404379666423, |
|
"learning_rate": 6.789413118527043e-06, |
|
"loss": 0.2434, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.6366459627329193, |
|
"grad_norm": 0.2015181805089703, |
|
"learning_rate": 6.731875719217491e-06, |
|
"loss": 0.2418, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.639751552795031, |
|
"grad_norm": 0.20647243106844593, |
|
"learning_rate": 6.67433831990794e-06, |
|
"loss": 0.265, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.642857142857143, |
|
"grad_norm": 0.2083640341120549, |
|
"learning_rate": 6.61680092059839e-06, |
|
"loss": 0.2531, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.6459627329192545, |
|
"grad_norm": 0.20501908976688168, |
|
"learning_rate": 6.559263521288838e-06, |
|
"loss": 0.2514, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.6490683229813663, |
|
"grad_norm": 0.19844284276810914, |
|
"learning_rate": 6.501726121979287e-06, |
|
"loss": 0.2644, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.6521739130434785, |
|
"grad_norm": 0.21201237882135082, |
|
"learning_rate": 6.444188722669736e-06, |
|
"loss": 0.2568, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.6552795031055902, |
|
"grad_norm": 0.22195301360518224, |
|
"learning_rate": 6.3866513233601846e-06, |
|
"loss": 0.261, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.658385093167702, |
|
"grad_norm": 0.19287865061356418, |
|
"learning_rate": 6.329113924050633e-06, |
|
"loss": 0.2469, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.6614906832298137, |
|
"grad_norm": 0.19640829139853255, |
|
"learning_rate": 6.271576524741082e-06, |
|
"loss": 0.2462, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.6645962732919255, |
|
"grad_norm": 0.20101972350059313, |
|
"learning_rate": 6.214039125431531e-06, |
|
"loss": 0.255, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.6677018633540373, |
|
"grad_norm": 0.2841326489307957, |
|
"learning_rate": 6.1565017261219795e-06, |
|
"loss": 0.2457, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.670807453416149, |
|
"grad_norm": 0.18827454901664883, |
|
"learning_rate": 6.098964326812428e-06, |
|
"loss": 0.2427, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.6739130434782608, |
|
"grad_norm": 0.20109847479853832, |
|
"learning_rate": 6.041426927502877e-06, |
|
"loss": 0.2402, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.6770186335403725, |
|
"grad_norm": 0.1910402172602598, |
|
"learning_rate": 5.983889528193326e-06, |
|
"loss": 0.2627, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.6801242236024843, |
|
"grad_norm": 0.1974312904693097, |
|
"learning_rate": 5.926352128883774e-06, |
|
"loss": 0.2625, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.683229813664596, |
|
"grad_norm": 0.19911868656713894, |
|
"learning_rate": 5.868814729574223e-06, |
|
"loss": 0.2368, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.686335403726708, |
|
"grad_norm": 0.21362726329843149, |
|
"learning_rate": 5.811277330264672e-06, |
|
"loss": 0.2534, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.68944099378882, |
|
"grad_norm": 0.20941798902436187, |
|
"learning_rate": 5.7537399309551206e-06, |
|
"loss": 0.2454, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.6925465838509317, |
|
"grad_norm": 0.19014291486371018, |
|
"learning_rate": 5.69620253164557e-06, |
|
"loss": 0.2446, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.6956521739130435, |
|
"grad_norm": 0.19597012112115988, |
|
"learning_rate": 5.638665132336019e-06, |
|
"loss": 0.2537, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.698757763975155, |
|
"grad_norm": 0.19714293851097728, |
|
"learning_rate": 5.581127733026468e-06, |
|
"loss": 0.2468, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.701863354037267, |
|
"grad_norm": 0.19621178971442163, |
|
"learning_rate": 5.523590333716916e-06, |
|
"loss": 0.2507, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.704968944099379, |
|
"grad_norm": 0.19491684844874946, |
|
"learning_rate": 5.466052934407365e-06, |
|
"loss": 0.2413, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.708074534161491, |
|
"grad_norm": 0.27521581958829827, |
|
"learning_rate": 5.408515535097814e-06, |
|
"loss": 0.2633, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.7111801242236027, |
|
"grad_norm": 0.2168313001961523, |
|
"learning_rate": 5.3509781357882625e-06, |
|
"loss": 0.264, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 0.18797210234683806, |
|
"learning_rate": 5.293440736478711e-06, |
|
"loss": 0.2447, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.717391304347826, |
|
"grad_norm": 0.21084636753160527, |
|
"learning_rate": 5.23590333716916e-06, |
|
"loss": 0.2619, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.720496894409938, |
|
"grad_norm": 0.20635684776280216, |
|
"learning_rate": 5.178365937859609e-06, |
|
"loss": 0.2583, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.7236024844720497, |
|
"grad_norm": 0.19468296653400607, |
|
"learning_rate": 5.120828538550057e-06, |
|
"loss": 0.2492, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.7267080745341614, |
|
"grad_norm": 0.20398048699150237, |
|
"learning_rate": 5.063291139240506e-06, |
|
"loss": 0.2549, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.729813664596273, |
|
"grad_norm": 0.18689815600092072, |
|
"learning_rate": 5.005753739930955e-06, |
|
"loss": 0.2488, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.732919254658385, |
|
"grad_norm": 0.20597588086540602, |
|
"learning_rate": 4.948216340621404e-06, |
|
"loss": 0.2667, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.7360248447204967, |
|
"grad_norm": 0.196856493986424, |
|
"learning_rate": 4.890678941311852e-06, |
|
"loss": 0.2513, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.7391304347826084, |
|
"grad_norm": 0.19482252545749987, |
|
"learning_rate": 4.833141542002301e-06, |
|
"loss": 0.2498, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.7422360248447206, |
|
"grad_norm": 0.19795769225255558, |
|
"learning_rate": 4.775604142692751e-06, |
|
"loss": 0.2499, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.7453416149068324, |
|
"grad_norm": 0.21689477537897567, |
|
"learning_rate": 4.718066743383199e-06, |
|
"loss": 0.284, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.748447204968944, |
|
"grad_norm": 0.2418368942479182, |
|
"learning_rate": 4.660529344073648e-06, |
|
"loss": 0.2766, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.751552795031056, |
|
"grad_norm": 0.20477977797718222, |
|
"learning_rate": 4.602991944764097e-06, |
|
"loss": 0.2401, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.7546583850931676, |
|
"grad_norm": 0.2006217058218365, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.2471, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.7577639751552794, |
|
"grad_norm": 0.20256868584609686, |
|
"learning_rate": 4.487917146144994e-06, |
|
"loss": 0.2441, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.7608695652173916, |
|
"grad_norm": 0.21086545356411496, |
|
"learning_rate": 4.430379746835443e-06, |
|
"loss": 0.255, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.7639751552795033, |
|
"grad_norm": 0.19012940644030216, |
|
"learning_rate": 4.372842347525892e-06, |
|
"loss": 0.2524, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.767080745341615, |
|
"grad_norm": 0.20733606950697256, |
|
"learning_rate": 4.3153049482163404e-06, |
|
"loss": 0.2502, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.770186335403727, |
|
"grad_norm": 0.19869202500390978, |
|
"learning_rate": 4.257767548906789e-06, |
|
"loss": 0.2488, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.7732919254658386, |
|
"grad_norm": 0.21061575298666055, |
|
"learning_rate": 4.200230149597238e-06, |
|
"loss": 0.2535, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.7763975155279503, |
|
"grad_norm": 0.1902154270690934, |
|
"learning_rate": 4.142692750287687e-06, |
|
"loss": 0.2523, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.779503105590062, |
|
"grad_norm": 0.19515929958573747, |
|
"learning_rate": 4.085155350978135e-06, |
|
"loss": 0.2544, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.782608695652174, |
|
"grad_norm": 0.19252012143661815, |
|
"learning_rate": 4.027617951668585e-06, |
|
"loss": 0.2638, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.7857142857142856, |
|
"grad_norm": 0.1923327877416844, |
|
"learning_rate": 3.970080552359034e-06, |
|
"loss": 0.2462, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.7888198757763973, |
|
"grad_norm": 0.18586501981252762, |
|
"learning_rate": 3.912543153049482e-06, |
|
"loss": 0.2403, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.791925465838509, |
|
"grad_norm": 0.1986091973327919, |
|
"learning_rate": 3.855005753739931e-06, |
|
"loss": 0.2655, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.795031055900621, |
|
"grad_norm": 0.18536496942596287, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 0.2389, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.798136645962733, |
|
"grad_norm": 0.19607218549803698, |
|
"learning_rate": 3.739930955120829e-06, |
|
"loss": 0.2542, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.801242236024845, |
|
"grad_norm": 0.19944282694872204, |
|
"learning_rate": 3.6823935558112777e-06, |
|
"loss": 0.2434, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.8043478260869565, |
|
"grad_norm": 0.1972448743409019, |
|
"learning_rate": 3.6248561565017264e-06, |
|
"loss": 0.247, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.8074534161490683, |
|
"grad_norm": 0.19361398823404677, |
|
"learning_rate": 3.567318757192175e-06, |
|
"loss": 0.2597, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.81055900621118, |
|
"grad_norm": 0.19293534537923737, |
|
"learning_rate": 3.509781357882624e-06, |
|
"loss": 0.2679, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.813664596273292, |
|
"grad_norm": 0.1978927145961964, |
|
"learning_rate": 3.4522439585730726e-06, |
|
"loss": 0.2474, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.816770186335404, |
|
"grad_norm": 0.18672700788585406, |
|
"learning_rate": 3.3947065592635213e-06, |
|
"loss": 0.2468, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.8198757763975157, |
|
"grad_norm": 0.1856966835076563, |
|
"learning_rate": 3.33716915995397e-06, |
|
"loss": 0.2424, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.8229813664596275, |
|
"grad_norm": 0.19224475733121915, |
|
"learning_rate": 3.279631760644419e-06, |
|
"loss": 0.2477, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.8260869565217392, |
|
"grad_norm": 0.20470161040078505, |
|
"learning_rate": 3.222094361334868e-06, |
|
"loss": 0.2706, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.829192546583851, |
|
"grad_norm": 0.19429220598035837, |
|
"learning_rate": 3.1645569620253167e-06, |
|
"loss": 0.2477, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.8322981366459627, |
|
"grad_norm": 0.1894109295691752, |
|
"learning_rate": 3.1070195627157654e-06, |
|
"loss": 0.2528, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.8354037267080745, |
|
"grad_norm": 0.18097305550473375, |
|
"learning_rate": 3.049482163406214e-06, |
|
"loss": 0.2559, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.8385093167701863, |
|
"grad_norm": 0.19783393465985816, |
|
"learning_rate": 2.991944764096663e-06, |
|
"loss": 0.2594, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.841614906832298, |
|
"grad_norm": 0.20897012225810746, |
|
"learning_rate": 2.9344073647871116e-06, |
|
"loss": 0.2653, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.8447204968944098, |
|
"grad_norm": 0.1896928698309342, |
|
"learning_rate": 2.8768699654775603e-06, |
|
"loss": 0.2477, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.8478260869565215, |
|
"grad_norm": 0.1784496457827597, |
|
"learning_rate": 2.8193325661680094e-06, |
|
"loss": 0.244, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.8509316770186337, |
|
"grad_norm": 0.18587478842335634, |
|
"learning_rate": 2.761795166858458e-06, |
|
"loss": 0.25, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.8540372670807455, |
|
"grad_norm": 0.20084898371613977, |
|
"learning_rate": 2.704257767548907e-06, |
|
"loss": 0.2577, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.17885954860642703, |
|
"learning_rate": 2.6467203682393556e-06, |
|
"loss": 0.2407, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.860248447204969, |
|
"grad_norm": 0.18561208551570504, |
|
"learning_rate": 2.5891829689298043e-06, |
|
"loss": 0.2659, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.8633540372670807, |
|
"grad_norm": 0.21932109217206247, |
|
"learning_rate": 2.531645569620253e-06, |
|
"loss": 0.2403, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.8664596273291925, |
|
"grad_norm": 0.20030388225206663, |
|
"learning_rate": 2.474108170310702e-06, |
|
"loss": 0.2544, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.869565217391304, |
|
"grad_norm": 0.20878429975095714, |
|
"learning_rate": 2.4165707710011505e-06, |
|
"loss": 0.2497, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.8726708074534164, |
|
"grad_norm": 0.22186219053087963, |
|
"learning_rate": 2.3590333716915997e-06, |
|
"loss": 0.2672, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.875776397515528, |
|
"grad_norm": 0.18672043459559956, |
|
"learning_rate": 2.3014959723820484e-06, |
|
"loss": 0.2485, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.87888198757764, |
|
"grad_norm": 0.18051985217560826, |
|
"learning_rate": 2.243958573072497e-06, |
|
"loss": 0.2479, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.8819875776397517, |
|
"grad_norm": 0.20846631011511568, |
|
"learning_rate": 2.186421173762946e-06, |
|
"loss": 0.2632, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.8850931677018634, |
|
"grad_norm": 0.17696337319445454, |
|
"learning_rate": 2.1288837744533946e-06, |
|
"loss": 0.2288, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.888198757763975, |
|
"grad_norm": 0.18293271617504872, |
|
"learning_rate": 2.0713463751438433e-06, |
|
"loss": 0.2495, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.891304347826087, |
|
"grad_norm": 0.17853543153947618, |
|
"learning_rate": 2.0138089758342925e-06, |
|
"loss": 0.2468, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.8944099378881987, |
|
"grad_norm": 0.18420177129422804, |
|
"learning_rate": 1.956271576524741e-06, |
|
"loss": 0.2538, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.8975155279503104, |
|
"grad_norm": 0.18170230261287915, |
|
"learning_rate": 1.8987341772151901e-06, |
|
"loss": 0.2589, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.900621118012422, |
|
"grad_norm": 0.18685594957937918, |
|
"learning_rate": 1.8411967779056388e-06, |
|
"loss": 0.2442, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.903726708074534, |
|
"grad_norm": 0.18690296703530773, |
|
"learning_rate": 1.7836593785960876e-06, |
|
"loss": 0.2451, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.906832298136646, |
|
"grad_norm": 0.20799939121665842, |
|
"learning_rate": 1.7261219792865363e-06, |
|
"loss": 0.2649, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.909937888198758, |
|
"grad_norm": 0.18563245527227562, |
|
"learning_rate": 1.668584579976985e-06, |
|
"loss": 0.2473, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.9130434782608696, |
|
"grad_norm": 0.18286501772853814, |
|
"learning_rate": 1.611047180667434e-06, |
|
"loss": 0.2486, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.9161490683229814, |
|
"grad_norm": 0.17534488578059473, |
|
"learning_rate": 1.5535097813578827e-06, |
|
"loss": 0.259, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.919254658385093, |
|
"grad_norm": 0.19817242903037158, |
|
"learning_rate": 1.4959723820483314e-06, |
|
"loss": 0.2428, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.922360248447205, |
|
"grad_norm": 0.18335244034678858, |
|
"learning_rate": 1.4384349827387801e-06, |
|
"loss": 0.252, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.9254658385093166, |
|
"grad_norm": 0.17672927011117798, |
|
"learning_rate": 1.380897583429229e-06, |
|
"loss": 0.239, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.928571428571429, |
|
"grad_norm": 0.19756419475586987, |
|
"learning_rate": 1.3233601841196778e-06, |
|
"loss": 0.2481, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.9316770186335406, |
|
"grad_norm": 0.18227787024732953, |
|
"learning_rate": 1.2658227848101265e-06, |
|
"loss": 0.2503, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.9347826086956523, |
|
"grad_norm": 0.17546530423346965, |
|
"learning_rate": 1.2082853855005753e-06, |
|
"loss": 0.2435, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.937888198757764, |
|
"grad_norm": 0.17977719939100784, |
|
"learning_rate": 1.1507479861910242e-06, |
|
"loss": 0.2462, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.940993788819876, |
|
"grad_norm": 0.182411120857819, |
|
"learning_rate": 1.093210586881473e-06, |
|
"loss": 0.2508, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.9440993788819876, |
|
"grad_norm": 0.19191429782536917, |
|
"learning_rate": 1.0356731875719217e-06, |
|
"loss": 0.2419, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.9472049689440993, |
|
"grad_norm": 0.17465750941257832, |
|
"learning_rate": 9.781357882623706e-07, |
|
"loss": 0.2394, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.950310559006211, |
|
"grad_norm": 0.17956692661649218, |
|
"learning_rate": 9.205983889528194e-07, |
|
"loss": 0.2478, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.953416149068323, |
|
"grad_norm": 0.18930304144220808, |
|
"learning_rate": 8.630609896432681e-07, |
|
"loss": 0.2371, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.9565217391304346, |
|
"grad_norm": 0.19245986180447752, |
|
"learning_rate": 8.05523590333717e-07, |
|
"loss": 0.2324, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.9596273291925463, |
|
"grad_norm": 0.18874027199979995, |
|
"learning_rate": 7.479861910241657e-07, |
|
"loss": 0.2482, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.9627329192546585, |
|
"grad_norm": 0.18606185656754726, |
|
"learning_rate": 6.904487917146145e-07, |
|
"loss": 0.2602, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.9658385093167703, |
|
"grad_norm": 0.18622516429740096, |
|
"learning_rate": 6.329113924050633e-07, |
|
"loss": 0.2514, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.968944099378882, |
|
"grad_norm": 0.1910726758431884, |
|
"learning_rate": 5.753739930955121e-07, |
|
"loss": 0.2526, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.972049689440994, |
|
"grad_norm": 0.18715451349236073, |
|
"learning_rate": 5.178365937859608e-07, |
|
"loss": 0.2512, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.9751552795031055, |
|
"grad_norm": 0.18279876224123887, |
|
"learning_rate": 4.602991944764097e-07, |
|
"loss": 0.2624, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.9782608695652173, |
|
"grad_norm": 0.18112311672532813, |
|
"learning_rate": 4.027617951668585e-07, |
|
"loss": 0.261, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.981366459627329, |
|
"grad_norm": 0.17566965768374485, |
|
"learning_rate": 3.4522439585730727e-07, |
|
"loss": 0.2437, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.9844720496894412, |
|
"grad_norm": 0.19721806486553123, |
|
"learning_rate": 2.8768699654775605e-07, |
|
"loss": 0.2594, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.987577639751553, |
|
"grad_norm": 0.19840508059977566, |
|
"learning_rate": 2.3014959723820486e-07, |
|
"loss": 0.2415, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.9906832298136647, |
|
"grad_norm": 0.20273843178894588, |
|
"learning_rate": 1.7261219792865363e-07, |
|
"loss": 0.2523, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.9937888198757765, |
|
"grad_norm": 0.19347512173901257, |
|
"learning_rate": 1.1507479861910243e-07, |
|
"loss": 0.256, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.9968944099378882, |
|
"grad_norm": 0.17382060962656506, |
|
"learning_rate": 5.7537399309551214e-08, |
|
"loss": 0.2351, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.17740605884855634, |
|
"learning_rate": 0.0, |
|
"loss": 0.2395, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 966, |
|
"total_flos": 8.211023406049526e+17, |
|
"train_loss": 0.4564525331862225, |
|
"train_runtime": 83597.7389, |
|
"train_samples_per_second": 0.185, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 966, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.211023406049526e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|