diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6030 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3535169785169785, + "eval_steps": 500, + "global_step": 500000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.2499999999999995e-06, + "loss": 7.4049, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 1.2499999999999999e-05, + "loss": 4.7791, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 1.875e-05, + "loss": 3.8115, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 2.4999999999999998e-05, + "loss": 3.3298, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 3.125e-05, + "loss": 3.0365, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 3.75e-05, + "loss": 2.8445, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 4.375e-05, + "loss": 2.7101, + "step": 3500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999999999999996e-05, + "loss": 2.6043, + "step": 4000 + }, + { + "epoch": 0.01, + "learning_rate": 5.625e-05, + "loss": 2.5286, + "step": 4500 + }, + { + "epoch": 0.01, + "learning_rate": 6.25e-05, + "loss": 2.467, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 6.874999999999999e-05, + "loss": 2.4192, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-05, + "loss": 2.3785, + "step": 6000 + }, + { + "epoch": 0.02, + "learning_rate": 8.124999999999998e-05, + "loss": 2.3456, + "step": 6500 + }, + { + "epoch": 0.02, + "learning_rate": 8.75e-05, + "loss": 2.3183, + "step": 7000 + }, + { + "epoch": 0.02, + "learning_rate": 9.374999999999999e-05, + "loss": 2.3005, + "step": 7500 + }, + { + "epoch": 0.02, + "learning_rate": 9.999999999999999e-05, + "loss": 2.2808, + "step": 8000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010625, + "loss": 2.2722, + "step": 8500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001125, + "loss": 2.2577, + "step": 9000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011874999999999999, + "loss": 2.2479, + "step": 9500 + }, + { + "epoch": 0.03, + "learning_rate": 0.000125, + "loss": 2.2452, + "step": 10000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013125, + "loss": 2.2419, + "step": 10500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013749999999999998, + "loss": 2.2345, + "step": 11000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00014375, + "loss": 2.2276, + "step": 11500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015, + "loss": 2.2285, + "step": 12000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015625, + "loss": 2.2178, + "step": 12500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016249999999999997, + "loss": 2.2214, + "step": 13000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016874999999999998, + "loss": 2.2269, + "step": 13500 + }, + { + "epoch": 0.04, + "learning_rate": 0.000175, + "loss": 2.2322, + "step": 14000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00018124999999999996, + "loss": 2.2292, + "step": 14500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00018749999999999998, + "loss": 2.2274, + "step": 15000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019375, + "loss": 2.232, + "step": 15500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999999999999998, + "loss": 2.2316, + "step": 16000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00020624999999999997, + "loss": 2.238, + "step": 16500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002125, + "loss": 2.2426, + "step": 17000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00021874999999999998, + "loss": 2.2408, + "step": 17500 + }, + { + "epoch": 0.05, + "learning_rate": 0.000225, + "loss": 2.2456, + "step": 18000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00023124999999999998, + "loss": 2.2473, + "step": 18500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00023749999999999997, + "loss": 2.2549, + "step": 19000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00024375, + "loss": 2.2542, + "step": 19500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00025, + "loss": 2.2604, + "step": 20000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00025624999999999997, + "loss": 2.2626, + "step": 20500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002625, + "loss": 2.2694, + "step": 21000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00026875, + "loss": 2.2724, + "step": 21500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00027499999999999996, + "loss": 2.2722, + "step": 22000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00028125, + "loss": 2.2807, + "step": 22500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002875, + "loss": 2.2846, + "step": 23000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029374999999999996, + "loss": 2.292, + "step": 23500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003, + "loss": 2.297, + "step": 24000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002996848739495798, + "loss": 2.3038, + "step": 24500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029936974789915966, + "loss": 2.2998, + "step": 25000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002990546218487395, + "loss": 2.2977, + "step": 25500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002987394957983193, + "loss": 2.2925, + "step": 26000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002984243697478991, + "loss": 2.2943, + "step": 26500 + }, + { + "epoch": 0.07, + "learning_rate": 0.000298109243697479, + "loss": 2.2899, + "step": 27000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002977941176470588, + "loss": 2.2899, + "step": 27500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002974789915966386, + "loss": 2.2806, + "step": 28000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002971638655462185, + "loss": 2.2823, + "step": 28500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002968487394957983, + "loss": 2.2795, + "step": 29000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029653361344537813, + "loss": 2.2736, + "step": 29500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029621848739495795, + "loss": 2.2704, + "step": 30000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002959033613445378, + "loss": 2.269, + "step": 30500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029558823529411763, + "loss": 2.2601, + "step": 31000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029527310924369745, + "loss": 2.2674, + "step": 31500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029495798319327727, + "loss": 2.2575, + "step": 32000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002946428571428571, + "loss": 2.2583, + "step": 32500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029432773109243696, + "loss": 2.2494, + "step": 33000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002940126050420168, + "loss": 2.2412, + "step": 33500 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002936974789915966, + "loss": 2.2453, + "step": 34000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002933823529411764, + "loss": 2.2401, + "step": 34500 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002930672268907563, + "loss": 2.2375, + "step": 35000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002927521008403361, + "loss": 2.2302, + "step": 35500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002924369747899159, + "loss": 2.2275, + "step": 36000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002921218487394958, + "loss": 2.223, + "step": 36500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002918067226890756, + "loss": 2.2252, + "step": 37000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002914915966386554, + "loss": 2.2238, + "step": 37500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029117647058823524, + "loss": 2.2206, + "step": 38000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002908613445378151, + "loss": 2.2162, + "step": 38500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029054621848739493, + "loss": 2.2141, + "step": 39000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029023109243697475, + "loss": 2.2215, + "step": 39500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002899159663865546, + "loss": 2.2107, + "step": 40000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028960084033613444, + "loss": 2.2125, + "step": 40500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028928571428571425, + "loss": 2.2086, + "step": 41000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028897058823529407, + "loss": 2.2097, + "step": 41500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028865546218487394, + "loss": 2.1996, + "step": 42000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028834033613445376, + "loss": 2.1972, + "step": 42500 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002880252100840336, + "loss": 2.1906, + "step": 43000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028771008403361345, + "loss": 2.2015, + "step": 43500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028739495798319327, + "loss": 2.1927, + "step": 44000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002870798319327731, + "loss": 2.1889, + "step": 44500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028676470588235296, + "loss": 2.1904, + "step": 45000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002864495798319328, + "loss": 2.1809, + "step": 45500 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002861344537815126, + "loss": 2.1777, + "step": 46000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002858193277310924, + "loss": 2.1772, + "step": 46500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002855042016806722, + "loss": 2.1714, + "step": 47000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028518907563025204, + "loss": 2.171, + "step": 47500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002848739495798319, + "loss": 2.1814, + "step": 48000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028455882352941173, + "loss": 2.1713, + "step": 48500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028424369747899155, + "loss": 2.168, + "step": 49000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028392857142857137, + "loss": 2.1635, + "step": 49500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028361344537815124, + "loss": 2.1689, + "step": 50000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028329831932773106, + "loss": 2.1614, + "step": 50500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002829831932773109, + "loss": 2.1594, + "step": 51000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028266806722689075, + "loss": 2.1585, + "step": 51500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028235294117647056, + "loss": 2.1639, + "step": 52000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002820378151260504, + "loss": 2.1574, + "step": 52500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028172268907563025, + "loss": 2.1512, + "step": 53000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028140756302521007, + "loss": 2.1571, + "step": 53500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002810924369747899, + "loss": 2.1513, + "step": 54000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002807773109243697, + "loss": 2.1503, + "step": 54500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002804621848739496, + "loss": 2.1462, + "step": 55000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002801470588235294, + "loss": 2.1449, + "step": 55500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002798319327731092, + "loss": 2.1383, + "step": 56000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002795168067226891, + "loss": 2.1431, + "step": 56500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002792016806722689, + "loss": 2.1349, + "step": 57000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002788865546218487, + "loss": 2.1423, + "step": 57500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00027857142857142854, + "loss": 2.1375, + "step": 58000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002782563025210084, + "loss": 2.1261, + "step": 58500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002779411764705882, + "loss": 2.1277, + "step": 59000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00027762605042016804, + "loss": 2.1204, + "step": 59500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002773109243697479, + "loss": 2.1277, + "step": 60000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00027699579831932773, + "loss": 2.1246, + "step": 60500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00027668067226890755, + "loss": 2.1207, + "step": 61000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00027636554621848737, + "loss": 2.1161, + "step": 61500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002760504201680672, + "loss": 2.1198, + "step": 62000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000275735294117647, + "loss": 2.1122, + "step": 62500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00027542016806722687, + "loss": 2.1113, + "step": 63000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002751050420168067, + "loss": 2.1164, + "step": 63500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002747899159663865, + "loss": 2.1096, + "step": 64000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002744747899159664, + "loss": 2.106, + "step": 64500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002741596638655462, + "loss": 2.1015, + "step": 65000 + }, + { + "epoch": 0.18, + "learning_rate": 0.000273844537815126, + "loss": 2.1041, + "step": 65500 + }, + { + "epoch": 0.18, + "learning_rate": 0.00027352941176470583, + "loss": 2.0991, + "step": 66000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002732142857142857, + "loss": 2.0956, + "step": 66500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002728991596638655, + "loss": 2.0945, + "step": 67000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00027258403361344534, + "loss": 2.0958, + "step": 67500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002722689075630252, + "loss": 2.1115, + "step": 68000 + }, + { + "epoch": 0.19, + "learning_rate": 0.000271953781512605, + "loss": 2.1205, + "step": 68500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00027163865546218484, + "loss": 2.1098, + "step": 69000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00027132352941176466, + "loss": 2.1046, + "step": 69500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00027100840336134453, + "loss": 2.1013, + "step": 70000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00027069327731092435, + "loss": 2.0989, + "step": 70500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00027037815126050417, + "loss": 2.0927, + "step": 71000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00027006302521008404, + "loss": 2.0827, + "step": 71500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00026974789915966386, + "loss": 2.0851, + "step": 72000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002694327731092437, + "loss": 2.0841, + "step": 72500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002691176470588235, + "loss": 2.0816, + "step": 73000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00026880252100840336, + "loss": 2.0761, + "step": 73500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002684873949579832, + "loss": 2.0745, + "step": 74000 + }, + { + "epoch": 0.2, + "learning_rate": 0.000268172268907563, + "loss": 2.0762, + "step": 74500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00026785714285714287, + "loss": 2.0704, + "step": 75000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002675420168067227, + "loss": 2.0737, + "step": 75500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002672268907563025, + "loss": 2.0687, + "step": 76000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002669117647058823, + "loss": 2.0743, + "step": 76500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00026659663865546214, + "loss": 2.0758, + "step": 77000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00026628151260504196, + "loss": 2.0677, + "step": 77500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00026596638655462183, + "loss": 2.0643, + "step": 78000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00026565126050420165, + "loss": 2.0588, + "step": 78500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00026533613445378146, + "loss": 2.0676, + "step": 79000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00026502100840336134, + "loss": 2.065, + "step": 79500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00026470588235294115, + "loss": 2.064, + "step": 80000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00026439075630252097, + "loss": 2.0579, + "step": 80500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002640756302521008, + "loss": 2.0614, + "step": 81000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00026376050420168066, + "loss": 2.0664, + "step": 81500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002634453781512605, + "loss": 2.0648, + "step": 82000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002631302521008403, + "loss": 2.0564, + "step": 82500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00026281512605042017, + "loss": 2.0569, + "step": 83000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002625, + "loss": 2.0517, + "step": 83500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002621848739495798, + "loss": 2.0453, + "step": 84000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002618697478991596, + "loss": 2.0447, + "step": 84500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002615546218487395, + "loss": 2.0431, + "step": 85000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002612394957983193, + "loss": 2.0414, + "step": 85500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002609243697478991, + "loss": 2.0381, + "step": 86000 + }, + { + "epoch": 0.23, + "learning_rate": 0.000260609243697479, + "loss": 2.0392, + "step": 86500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002602941176470588, + "loss": 2.0317, + "step": 87000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00025997899159663863, + "loss": 2.0338, + "step": 87500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00025966386554621845, + "loss": 2.0348, + "step": 88000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002593487394957983, + "loss": 2.0349, + "step": 88500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00025903361344537814, + "loss": 2.0295, + "step": 89000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00025871848739495796, + "loss": 2.0253, + "step": 89500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00025840336134453783, + "loss": 2.0272, + "step": 90000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00025808823529411764, + "loss": 2.0273, + "step": 90500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00025777310924369746, + "loss": 2.0295, + "step": 91000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002574579831932773, + "loss": 2.0284, + "step": 91500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002571428571428571, + "loss": 2.0335, + "step": 92000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002568277310924369, + "loss": 2.032, + "step": 92500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002565126050420168, + "loss": 2.0256, + "step": 93000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002561974789915966, + "loss": 2.0278, + "step": 93500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002558823529411764, + "loss": 2.0226, + "step": 94000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002555672268907563, + "loss": 2.0186, + "step": 94500 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002552521008403361, + "loss": 2.0111, + "step": 95000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00025493697478991593, + "loss": 2.0139, + "step": 95500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00025462184873949575, + "loss": 2.0092, + "step": 96000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002543067226890756, + "loss": 2.0139, + "step": 96500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00025399159663865543, + "loss": 2.0017, + "step": 97000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00025367647058823525, + "loss": 2.0061, + "step": 97500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002533613445378151, + "loss": 2.0049, + "step": 98000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00025304621848739494, + "loss": 2.0054, + "step": 98500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00025273109243697476, + "loss": 2.0045, + "step": 99000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002524159663865546, + "loss": 2.0043, + "step": 99500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00025210084033613445, + "loss": 2.0007, + "step": 100000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00025178571428571426, + "loss": 2.0047, + "step": 100500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002514705882352941, + "loss": 2.0061, + "step": 101000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00025115546218487395, + "loss": 2.0006, + "step": 101500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00025084033613445377, + "loss": 1.9995, + "step": 102000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002505252100840336, + "loss": 1.9997, + "step": 102500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00025021008403361346, + "loss": 1.9942, + "step": 103000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002498949579831933, + "loss": 1.9951, + "step": 103500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002495798319327731, + "loss": 1.9872, + "step": 104000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002492647058823529, + "loss": 1.9908, + "step": 104500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002489495798319328, + "loss": 1.9933, + "step": 105000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002486344537815126, + "loss": 1.9884, + "step": 105500 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002483193277310924, + "loss": 1.9885, + "step": 106000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00024800420168067224, + "loss": 1.9881, + "step": 106500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00024768907563025205, + "loss": 1.9842, + "step": 107000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00024737394957983187, + "loss": 1.9816, + "step": 107500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00024705882352941174, + "loss": 1.9772, + "step": 108000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00024674369747899156, + "loss": 1.9766, + "step": 108500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002464285714285714, + "loss": 1.9795, + "step": 109000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00024611344537815125, + "loss": 1.9754, + "step": 109500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00024579831932773107, + "loss": 1.9751, + "step": 110000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002454831932773109, + "loss": 1.9757, + "step": 110500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00024516806722689076, + "loss": 1.9755, + "step": 111000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002448529411764706, + "loss": 1.9753, + "step": 111500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002445378151260504, + "loss": 1.9657, + "step": 112000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002442226890756302, + "loss": 1.9663, + "step": 112500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00024390756302521005, + "loss": 1.9682, + "step": 113000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002435924369747899, + "loss": 1.971, + "step": 113500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00024327731092436971, + "loss": 1.9687, + "step": 114000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00024296218487394956, + "loss": 1.9826, + "step": 114500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002426470588235294, + "loss": 1.9643, + "step": 115000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00024233193277310922, + "loss": 1.9708, + "step": 115500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00024201680672268907, + "loss": 1.9628, + "step": 116000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00024170168067226888, + "loss": 1.9633, + "step": 116500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00024138655462184873, + "loss": 1.9607, + "step": 117000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00024107142857142857, + "loss": 1.9574, + "step": 117500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002407563025210084, + "loss": 1.9611, + "step": 118000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00024044117647058823, + "loss": 1.9575, + "step": 118500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00024012605042016805, + "loss": 1.9538, + "step": 119000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002398109243697479, + "loss": 1.9573, + "step": 119500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00023949579831932771, + "loss": 1.9601, + "step": 120000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00023918067226890756, + "loss": 1.9539, + "step": 120500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00023886554621848735, + "loss": 1.9532, + "step": 121000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002385504201680672, + "loss": 1.951, + "step": 121500 + }, + { + "epoch": 0.33, + "learning_rate": 0.000238235294117647, + "loss": 1.9554, + "step": 122000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00023792016806722686, + "loss": 1.9472, + "step": 122500 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002376050420168067, + "loss": 1.9462, + "step": 123000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00023728991596638652, + "loss": 1.9524, + "step": 123500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023697478991596636, + "loss": 1.9525, + "step": 124000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023665966386554618, + "loss": 1.9444, + "step": 124500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023634453781512602, + "loss": 1.9404, + "step": 125000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023602941176470587, + "loss": 1.9431, + "step": 125500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023571428571428569, + "loss": 1.943, + "step": 126000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023539915966386553, + "loss": 1.9404, + "step": 126500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00023508403361344535, + "loss": 1.9447, + "step": 127000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002347689075630252, + "loss": 1.9425, + "step": 127500 + }, + { + "epoch": 0.35, + "learning_rate": 0.000234453781512605, + "loss": 1.9376, + "step": 128000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00023413865546218485, + "loss": 1.9374, + "step": 128500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002338235294117647, + "loss": 1.9373, + "step": 129000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00023350840336134452, + "loss": 1.9364, + "step": 129500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00023319327731092436, + "loss": 1.9344, + "step": 130000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00023287815126050418, + "loss": 1.9299, + "step": 130500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00023256302521008402, + "loss": 1.9243, + "step": 131000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00023224789915966384, + "loss": 1.93, + "step": 131500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00023193277310924368, + "loss": 1.925, + "step": 132000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00023161764705882353, + "loss": 1.9256, + "step": 132500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00023130252100840335, + "loss": 1.9334, + "step": 133000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002309873949579832, + "loss": 1.9266, + "step": 133500 + }, + { + "epoch": 0.36, + "learning_rate": 0.000230672268907563, + "loss": 1.9338, + "step": 134000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00023035714285714285, + "loss": 1.9258, + "step": 134500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00023004201680672267, + "loss": 1.9253, + "step": 135000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00022972689075630252, + "loss": 1.9259, + "step": 135500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002294117647058823, + "loss": 1.9509, + "step": 136000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00022909663865546215, + "loss": 1.9366, + "step": 136500 + }, + { + "epoch": 0.37, + "learning_rate": 0.000228781512605042, + "loss": 1.9323, + "step": 137000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002284663865546218, + "loss": 1.9298, + "step": 137500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00022815126050420166, + "loss": 1.9611, + "step": 138000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00022783613445378147, + "loss": 1.9599, + "step": 138500 + }, + { + "epoch": 0.38, + "learning_rate": 0.00022752100840336132, + "loss": 1.9537, + "step": 139000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00022720588235294114, + "loss": 1.9308, + "step": 139500 + }, + { + "epoch": 0.38, + "learning_rate": 0.00022689075630252098, + "loss": 1.9208, + "step": 140000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00022657563025210083, + "loss": 1.9233, + "step": 140500 + }, + { + "epoch": 0.38, + "learning_rate": 0.00022626050420168064, + "loss": 1.9233, + "step": 141000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002259453781512605, + "loss": 1.927, + "step": 141500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002256302521008403, + "loss": 1.9331, + "step": 142000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00022531512605042015, + "loss": 1.9358, + "step": 142500 + }, + { + "epoch": 0.39, + "learning_rate": 0.000225, + "loss": 1.9415, + "step": 143000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002246848739495798, + "loss": 1.9427, + "step": 143500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00022436974789915966, + "loss": 1.9216, + "step": 144000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00022405462184873947, + "loss": 1.9196, + "step": 144500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00022373949579831932, + "loss": 1.9109, + "step": 145000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00022342436974789914, + "loss": 1.9166, + "step": 145500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00022310924369747898, + "loss": 1.9231, + "step": 146000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00022279411764705882, + "loss": 1.9091, + "step": 146500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00022247899159663864, + "loss": 1.9096, + "step": 147000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00022216386554621849, + "loss": 1.9107, + "step": 147500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002218487394957983, + "loss": 1.908, + "step": 148000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00022153361344537815, + "loss": 1.9052, + "step": 148500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00022121848739495797, + "loss": 1.9058, + "step": 149000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002209033613445378, + "loss": 1.9043, + "step": 149500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00022058823529411765, + "loss": 1.9049, + "step": 150000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00022027310924369745, + "loss": 1.9028, + "step": 150500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002199579831932773, + "loss": 1.9034, + "step": 151000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002196428571428571, + "loss": 1.8976, + "step": 151500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00021932773109243695, + "loss": 1.8957, + "step": 152000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00021901260504201677, + "loss": 1.895, + "step": 152500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00021869747899159661, + "loss": 1.8944, + "step": 153000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00021838235294117643, + "loss": 1.8904, + "step": 153500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00021806722689075628, + "loss": 1.8939, + "step": 154000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00021775210084033612, + "loss": 1.8921, + "step": 154500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00021743697478991594, + "loss": 1.8873, + "step": 155000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00021712184873949578, + "loss": 1.8907, + "step": 155500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002168067226890756, + "loss": 1.8898, + "step": 156000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00021649159663865544, + "loss": 1.8826, + "step": 156500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00021617647058823526, + "loss": 1.8896, + "step": 157000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002158613445378151, + "loss": 1.8836, + "step": 157500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00021554621848739495, + "loss": 1.9006, + "step": 158000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00021523109243697477, + "loss": 1.8949, + "step": 158500 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002149159663865546, + "loss": 1.8931, + "step": 159000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00021460084033613443, + "loss": 1.8886, + "step": 159500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00021428571428571427, + "loss": 1.892, + "step": 160000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002139705882352941, + "loss": 1.8935, + "step": 160500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021365546218487394, + "loss": 1.8977, + "step": 161000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021334033613445378, + "loss": 1.8891, + "step": 161500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002130252100840336, + "loss": 1.8846, + "step": 162000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021271008403361344, + "loss": 1.8816, + "step": 162500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021239495798319326, + "loss": 1.8791, + "step": 163000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002120798319327731, + "loss": 1.9027, + "step": 163500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021176470588235295, + "loss": 1.8873, + "step": 164000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021144957983193277, + "loss": 1.8795, + "step": 164500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002111344537815126, + "loss": 1.8775, + "step": 165000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002108193277310924, + "loss": 1.8799, + "step": 165500 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021050420168067225, + "loss": 1.8771, + "step": 166000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021018907563025206, + "loss": 1.8797, + "step": 166500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002098739495798319, + "loss": 1.8759, + "step": 167000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00020955882352941173, + "loss": 1.8742, + "step": 167500 + }, + { + "epoch": 0.45, + "learning_rate": 0.00020924369747899157, + "loss": 1.8691, + "step": 168000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002089285714285714, + "loss": 1.8702, + "step": 168500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020861344537815123, + "loss": 1.8689, + "step": 169000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020829831932773108, + "loss": 1.8701, + "step": 169500 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002079831932773109, + "loss": 1.8695, + "step": 170000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020766806722689074, + "loss": 1.8706, + "step": 170500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020735294117647056, + "loss": 1.8935, + "step": 171000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002070378151260504, + "loss": 1.9075, + "step": 171500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020672268907563025, + "loss": 1.8929, + "step": 172000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020640756302521006, + "loss": 1.8795, + "step": 172500 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002060924369747899, + "loss": 1.8845, + "step": 173000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020577731092436973, + "loss": 1.8762, + "step": 173500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020546218487394957, + "loss": 1.8745, + "step": 174000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002051470588235294, + "loss": 1.8751, + "step": 174500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020483193277310923, + "loss": 1.8746, + "step": 175000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00020451680672268908, + "loss": 1.8614, + "step": 175500 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002042016806722689, + "loss": 1.8634, + "step": 176000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00020388655462184874, + "loss": 1.8711, + "step": 176500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00020357142857142856, + "loss": 1.8648, + "step": 177000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002032563025210084, + "loss": 1.8615, + "step": 177500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00020294117647058822, + "loss": 1.8661, + "step": 178000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00020262605042016806, + "loss": 1.8616, + "step": 178500 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002023109243697479, + "loss": 1.8601, + "step": 179000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00020199579831932772, + "loss": 1.865, + "step": 179500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00020168067226890757, + "loss": 1.8664, + "step": 180000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00020136554621848736, + "loss": 1.8562, + "step": 180500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002010504201680672, + "loss": 1.8589, + "step": 181000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00020073529411764702, + "loss": 1.8583, + "step": 181500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00020042016806722687, + "loss": 1.8623, + "step": 182000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00020010504201680668, + "loss": 1.8561, + "step": 182500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019978991596638653, + "loss": 1.8537, + "step": 183000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019947478991596637, + "loss": 1.8622, + "step": 183500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001991596638655462, + "loss": 1.8568, + "step": 184000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019884453781512603, + "loss": 1.8543, + "step": 184500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019852941176470585, + "loss": 1.8528, + "step": 185000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001982142857142857, + "loss": 1.8509, + "step": 185500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001978991596638655, + "loss": 1.8489, + "step": 186000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019758403361344536, + "loss": 1.8483, + "step": 186500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001972689075630252, + "loss": 1.8487, + "step": 187000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019695378151260502, + "loss": 1.8416, + "step": 187500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019663865546218486, + "loss": 1.8497, + "step": 188000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019632352941176468, + "loss": 1.849, + "step": 188500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019600840336134453, + "loss": 1.8466, + "step": 189000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019569327731092434, + "loss": 1.8456, + "step": 189500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001953781512605042, + "loss": 1.8453, + "step": 190000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019506302521008403, + "loss": 1.8418, + "step": 190500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019474789915966385, + "loss": 1.8429, + "step": 191000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001944327731092437, + "loss": 1.8453, + "step": 191500 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001941176470588235, + "loss": 1.8458, + "step": 192000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019380252100840336, + "loss": 1.842, + "step": 192500 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001934873949579832, + "loss": 1.8381, + "step": 193000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019317226890756302, + "loss": 1.8374, + "step": 193500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019285714285714286, + "loss": 1.8359, + "step": 194000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019254201680672268, + "loss": 1.8336, + "step": 194500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019222689075630253, + "loss": 1.8365, + "step": 195000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019191176470588232, + "loss": 1.8391, + "step": 195500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019159663865546216, + "loss": 1.8431, + "step": 196000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019128151260504198, + "loss": 1.8361, + "step": 196500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019096638655462182, + "loss": 1.8583, + "step": 197000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019065126050420164, + "loss": 1.8656, + "step": 197500 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019033613445378148, + "loss": 1.8508, + "step": 198000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019002100840336133, + "loss": 1.8533, + "step": 198500 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018970588235294115, + "loss": 1.8416, + "step": 199000 + }, + { + "epoch": 0.54, + "learning_rate": 0.000189390756302521, + "loss": 1.8503, + "step": 199500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001890756302521008, + "loss": 1.844, + "step": 200000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018876050420168065, + "loss": 1.8389, + "step": 200500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001884453781512605, + "loss": 1.836, + "step": 201000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018813025210084031, + "loss": 1.8354, + "step": 201500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018781512605042016, + "loss": 1.833, + "step": 202000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018749999999999998, + "loss": 1.8308, + "step": 202500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018718487394957982, + "loss": 1.8314, + "step": 203000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018686974789915964, + "loss": 1.83, + "step": 203500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018655462184873948, + "loss": 1.8318, + "step": 204000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018623949579831933, + "loss": 1.8313, + "step": 204500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018592436974789915, + "loss": 1.8234, + "step": 205000 + }, + { + "epoch": 0.56, + "learning_rate": 0.000185609243697479, + "loss": 1.8286, + "step": 205500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001852941176470588, + "loss": 1.8246, + "step": 206000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018497899159663865, + "loss": 1.8213, + "step": 206500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018466386554621847, + "loss": 1.8237, + "step": 207000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018434873949579831, + "loss": 1.8247, + "step": 207500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018403361344537816, + "loss": 1.8215, + "step": 208000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018371848739495798, + "loss": 1.8174, + "step": 208500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018340336134453782, + "loss": 1.8215, + "step": 209000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018308823529411764, + "loss": 1.8168, + "step": 209500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018277310924369746, + "loss": 1.8186, + "step": 210000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018245798319327727, + "loss": 1.8229, + "step": 210500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018214285714285712, + "loss": 1.8165, + "step": 211000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018182773109243693, + "loss": 1.8173, + "step": 211500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018151260504201678, + "loss": 1.8115, + "step": 212000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018119747899159662, + "loss": 1.8157, + "step": 212500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018088235294117644, + "loss": 1.813, + "step": 213000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018056722689075629, + "loss": 1.8126, + "step": 213500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001802521008403361, + "loss": 1.808, + "step": 214000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017993697478991595, + "loss": 1.8116, + "step": 214500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017962184873949577, + "loss": 1.8165, + "step": 215000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001793067226890756, + "loss": 1.807, + "step": 215500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00017899159663865545, + "loss": 1.8051, + "step": 216000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017867647058823527, + "loss": 1.8081, + "step": 216500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017836134453781512, + "loss": 1.8084, + "step": 217000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017804621848739493, + "loss": 1.811, + "step": 217500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017773109243697478, + "loss": 1.8068, + "step": 218000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001774159663865546, + "loss": 1.805, + "step": 218500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017710084033613444, + "loss": 1.8042, + "step": 219000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017678571428571428, + "loss": 1.8004, + "step": 219500 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001764705882352941, + "loss": 1.8028, + "step": 220000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017615546218487395, + "loss": 1.8111, + "step": 220500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017584033613445376, + "loss": 1.8076, + "step": 221000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001755252100840336, + "loss": 1.8017, + "step": 221500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017521008403361345, + "loss": 1.802, + "step": 222000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017489495798319327, + "loss": 1.803, + "step": 222500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017457983193277312, + "loss": 1.8023, + "step": 223000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017426470588235293, + "loss": 1.7978, + "step": 223500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017394957983193278, + "loss": 1.7959, + "step": 224000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001736344537815126, + "loss": 1.7971, + "step": 224500 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001733193277310924, + "loss": 1.801, + "step": 225000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017300420168067223, + "loss": 1.7987, + "step": 225500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017268907563025207, + "loss": 1.7975, + "step": 226000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001723739495798319, + "loss": 1.7997, + "step": 226500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017205882352941174, + "loss": 1.7993, + "step": 227000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017174369747899158, + "loss": 1.8021, + "step": 227500 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001714285714285714, + "loss": 1.795, + "step": 228000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017111344537815124, + "loss": 1.7957, + "step": 228500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017079831932773106, + "loss": 1.7967, + "step": 229000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001704831932773109, + "loss": 1.7951, + "step": 229500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017016806722689075, + "loss": 1.7953, + "step": 230000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016985294117647057, + "loss": 1.7938, + "step": 230500 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001695378151260504, + "loss": 1.7925, + "step": 231000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016922268907563023, + "loss": 1.7944, + "step": 231500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016890756302521007, + "loss": 1.7897, + "step": 232000 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001685924369747899, + "loss": 1.7929, + "step": 232500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016827731092436974, + "loss": 1.7916, + "step": 233000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016796218487394958, + "loss": 1.7885, + "step": 233500 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001676470588235294, + "loss": 1.7971, + "step": 234000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016733193277310924, + "loss": 1.7871, + "step": 234500 + }, + { + "epoch": 0.64, + "learning_rate": 0.00016701680672268906, + "loss": 1.7862, + "step": 235000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001667016806722689, + "loss": 1.7858, + "step": 235500 + }, + { + "epoch": 0.64, + "learning_rate": 0.00016638655462184872, + "loss": 1.7859, + "step": 236000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00016607142857142857, + "loss": 1.7864, + "step": 236500 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001657563025210084, + "loss": 1.7869, + "step": 237000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00016544117647058823, + "loss": 1.7816, + "step": 237500 + }, + { + "epoch": 0.64, + "learning_rate": 0.00016512605042016807, + "loss": 1.7865, + "step": 238000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001648109243697479, + "loss": 1.7867, + "step": 238500 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016449579831932773, + "loss": 1.7868, + "step": 239000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016418067226890755, + "loss": 1.7795, + "step": 239500 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016386554621848737, + "loss": 1.7833, + "step": 240000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016355042016806719, + "loss": 1.7885, + "step": 240500 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016323529411764703, + "loss": 1.7886, + "step": 241000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016292016806722688, + "loss": 1.7865, + "step": 241500 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001626050420168067, + "loss": 1.787, + "step": 242000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016228991596638654, + "loss": 1.7791, + "step": 242500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016197478991596635, + "loss": 1.7786, + "step": 243000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001616596638655462, + "loss": 1.7819, + "step": 243500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016134453781512602, + "loss": 1.7766, + "step": 244000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016102941176470586, + "loss": 1.7766, + "step": 244500 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001607142857142857, + "loss": 1.7809, + "step": 245000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016039915966386552, + "loss": 1.7708, + "step": 245500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016008403361344537, + "loss": 1.7713, + "step": 246000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015976890756302519, + "loss": 1.7773, + "step": 246500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015945378151260503, + "loss": 1.7746, + "step": 247000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015913865546218485, + "loss": 1.7722, + "step": 247500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001588235294117647, + "loss": 1.7707, + "step": 248000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015850840336134454, + "loss": 1.7785, + "step": 248500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015819327731092435, + "loss": 1.7745, + "step": 249000 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001578781512605042, + "loss": 1.7746, + "step": 249500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015756302521008402, + "loss": 1.7706, + "step": 250000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015724789915966386, + "loss": 1.7689, + "step": 250500 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001569327731092437, + "loss": 1.7702, + "step": 251000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015661764705882352, + "loss": 1.7716, + "step": 251500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015630252100840337, + "loss": 1.7709, + "step": 252000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015598739495798318, + "loss": 1.7689, + "step": 252500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015567226890756303, + "loss": 1.7709, + "step": 253000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015535714285714285, + "loss": 1.7666, + "step": 253500 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001550420168067227, + "loss": 1.768, + "step": 254000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015472689075630254, + "loss": 1.7646, + "step": 254500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015441176470588233, + "loss": 1.7624, + "step": 255000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015409663865546214, + "loss": 1.7639, + "step": 255500 + }, + { + "epoch": 0.69, + "learning_rate": 0.000153781512605042, + "loss": 1.7684, + "step": 256000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015346638655462183, + "loss": 1.7621, + "step": 256500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015315126050420165, + "loss": 1.7632, + "step": 257000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001528361344537815, + "loss": 1.7617, + "step": 257500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001525210084033613, + "loss": 1.7616, + "step": 258000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015220588235294116, + "loss": 1.7607, + "step": 258500 + }, + { + "epoch": 0.7, + "learning_rate": 0.000151890756302521, + "loss": 1.7652, + "step": 259000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015157563025210082, + "loss": 1.7628, + "step": 259500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015126050420168066, + "loss": 1.7605, + "step": 260000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00015094537815126048, + "loss": 1.7624, + "step": 260500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00015063025210084032, + "loss": 1.7642, + "step": 261000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00015031512605042014, + "loss": 1.7577, + "step": 261500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00015, + "loss": 1.7586, + "step": 262000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014968487394957983, + "loss": 1.7582, + "step": 262500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014936974789915965, + "loss": 1.7578, + "step": 263000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001490546218487395, + "loss": 1.7638, + "step": 263500 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001487394957983193, + "loss": 1.7529, + "step": 264000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014842436974789916, + "loss": 1.7571, + "step": 264500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014810924369747897, + "loss": 1.7603, + "step": 265000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014779411764705882, + "loss": 1.7539, + "step": 265500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014747899159663863, + "loss": 1.7537, + "step": 266000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014716386554621848, + "loss": 1.7532, + "step": 266500 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001468487394957983, + "loss": 1.7529, + "step": 267000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014653361344537814, + "loss": 1.7504, + "step": 267500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014621848739495796, + "loss": 1.7513, + "step": 268000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001459033613445378, + "loss": 1.7492, + "step": 268500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014558823529411762, + "loss": 1.7476, + "step": 269000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014527310924369747, + "loss": 1.7557, + "step": 269500 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001449579831932773, + "loss": 1.7506, + "step": 270000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014464285714285713, + "loss": 1.7497, + "step": 270500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014432773109243697, + "loss": 1.7499, + "step": 271000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001440126050420168, + "loss": 1.7459, + "step": 271500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00014369747899159663, + "loss": 1.7445, + "step": 272000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00014338235294117648, + "loss": 1.7445, + "step": 272500 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001430672268907563, + "loss": 1.739, + "step": 273000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001427521008403361, + "loss": 1.7409, + "step": 273500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00014243697478991596, + "loss": 1.7429, + "step": 274000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00014212184873949578, + "loss": 1.741, + "step": 274500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00014180672268907562, + "loss": 1.7431, + "step": 275000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014149159663865544, + "loss": 1.745, + "step": 275500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014117647058823528, + "loss": 1.7412, + "step": 276000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014086134453781513, + "loss": 1.7372, + "step": 276500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014054621848739494, + "loss": 1.7369, + "step": 277000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001402310924369748, + "loss": 1.7373, + "step": 277500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001399159663865546, + "loss": 1.7404, + "step": 278000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013960084033613445, + "loss": 1.7424, + "step": 278500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013928571428571427, + "loss": 1.7359, + "step": 279000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001389705882352941, + "loss": 1.7376, + "step": 279500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013865546218487396, + "loss": 1.7362, + "step": 280000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013834033613445377, + "loss": 1.7405, + "step": 280500 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001380252100840336, + "loss": 1.7375, + "step": 281000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013771008403361344, + "loss": 1.7355, + "step": 281500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013739495798319325, + "loss": 1.733, + "step": 282000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001370798319327731, + "loss": 1.7353, + "step": 282500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013676470588235292, + "loss": 1.7307, + "step": 283000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013644957983193276, + "loss": 1.7362, + "step": 283500 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001361344537815126, + "loss": 1.7338, + "step": 284000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013581932773109242, + "loss": 1.732, + "step": 284500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013550420168067227, + "loss": 1.7284, + "step": 285000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013518907563025208, + "loss": 1.7307, + "step": 285500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013487394957983193, + "loss": 1.7303, + "step": 286000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00013455882352941175, + "loss": 1.7277, + "step": 286500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001342436974789916, + "loss": 1.7332, + "step": 287000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00013392857142857144, + "loss": 1.7255, + "step": 287500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00013361344537815125, + "loss": 1.7242, + "step": 288000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00013329831932773107, + "loss": 1.7248, + "step": 288500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00013298319327731091, + "loss": 1.731, + "step": 289000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00013266806722689073, + "loss": 1.7324, + "step": 289500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00013235294117647058, + "loss": 1.7314, + "step": 290000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001320378151260504, + "loss": 1.7325, + "step": 290500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00013172268907563024, + "loss": 1.7259, + "step": 291000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00013140756302521008, + "loss": 1.7271, + "step": 291500 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001310924369747899, + "loss": 1.7257, + "step": 292000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00013077731092436975, + "loss": 1.7267, + "step": 292500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00013046218487394956, + "loss": 1.7262, + "step": 293000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001301470588235294, + "loss": 1.7245, + "step": 293500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012983193277310922, + "loss": 1.7252, + "step": 294000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012951680672268907, + "loss": 1.7217, + "step": 294500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012920168067226891, + "loss": 1.7193, + "step": 295000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012888655462184873, + "loss": 1.7184, + "step": 295500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012857142857142855, + "loss": 1.7205, + "step": 296000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001282563025210084, + "loss": 1.7195, + "step": 296500 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001279411764705882, + "loss": 1.7195, + "step": 297000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012762605042016806, + "loss": 1.7126, + "step": 297500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012731092436974787, + "loss": 1.7158, + "step": 298000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012699579831932772, + "loss": 1.7137, + "step": 298500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012668067226890756, + "loss": 1.7154, + "step": 299000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012636554621848738, + "loss": 1.7192, + "step": 299500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012605042016806722, + "loss": 1.7155, + "step": 300000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012573529411764704, + "loss": 1.7106, + "step": 300500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012542016806722689, + "loss": 1.7127, + "step": 301000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012510504201680673, + "loss": 1.7103, + "step": 301500 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012478991596638655, + "loss": 1.7125, + "step": 302000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001244747899159664, + "loss": 1.7141, + "step": 302500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001241596638655462, + "loss": 1.7122, + "step": 303000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012384453781512603, + "loss": 1.7121, + "step": 303500 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012352941176470587, + "loss": 1.7106, + "step": 304000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001232142857142857, + "loss": 1.708, + "step": 304500 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012289915966386553, + "loss": 1.7086, + "step": 305000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012258403361344538, + "loss": 1.7096, + "step": 305500 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001222689075630252, + "loss": 1.7094, + "step": 306000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012195378151260503, + "loss": 1.7103, + "step": 306500 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012163865546218486, + "loss": 1.7038, + "step": 307000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001213235294117647, + "loss": 1.707, + "step": 307500 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012100840336134453, + "loss": 1.7094, + "step": 308000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012069327731092436, + "loss": 1.7078, + "step": 308500 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001203781512605042, + "loss": 1.7043, + "step": 309000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012006302521008403, + "loss": 1.7079, + "step": 309500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00011974789915966386, + "loss": 1.7025, + "step": 310000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00011943277310924367, + "loss": 1.7065, + "step": 310500 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001191176470588235, + "loss": 1.7047, + "step": 311000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00011880252100840335, + "loss": 1.6999, + "step": 311500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00011848739495798318, + "loss": 1.7018, + "step": 312000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011817226890756301, + "loss": 1.7037, + "step": 312500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011785714285714284, + "loss": 1.6972, + "step": 313000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011754201680672267, + "loss": 1.6998, + "step": 313500 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001172268907563025, + "loss": 1.7012, + "step": 314000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011691176470588235, + "loss": 1.6978, + "step": 314500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011659663865546218, + "loss": 1.7013, + "step": 315000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011628151260504201, + "loss": 1.6982, + "step": 315500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011596638655462184, + "loss": 1.699, + "step": 316000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011565126050420167, + "loss": 1.6956, + "step": 316500 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001153361344537815, + "loss": 1.7124, + "step": 317000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011502100840336134, + "loss": 1.7101, + "step": 317500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011470588235294115, + "loss": 1.7024, + "step": 318000 + }, + { + "epoch": 0.86, + "learning_rate": 0.000114390756302521, + "loss": 1.7011, + "step": 318500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011407563025210083, + "loss": 1.7009, + "step": 319000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011376050420168066, + "loss": 1.6989, + "step": 319500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011344537815126049, + "loss": 1.7002, + "step": 320000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011313025210084032, + "loss": 1.6989, + "step": 320500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011281512605042015, + "loss": 1.6977, + "step": 321000 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001125, + "loss": 1.6991, + "step": 321500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011218487394957983, + "loss": 1.6965, + "step": 322000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011186974789915966, + "loss": 1.6934, + "step": 322500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011155462184873949, + "loss": 1.6943, + "step": 323000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011123949579831932, + "loss": 1.6894, + "step": 323500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011092436974789915, + "loss": 1.6906, + "step": 324000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011060924369747898, + "loss": 1.7128, + "step": 324500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011029411764705883, + "loss": 1.6963, + "step": 325000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00010997899159663864, + "loss": 1.7206, + "step": 325500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00010966386554621848, + "loss": 1.7105, + "step": 326000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00010934873949579831, + "loss": 1.6997, + "step": 326500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00010903361344537814, + "loss": 1.7016, + "step": 327000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00010871848739495797, + "loss": 1.7026, + "step": 327500 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001084033613445378, + "loss": 1.6942, + "step": 328000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00010808823529411763, + "loss": 1.6964, + "step": 328500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00010777310924369748, + "loss": 1.696, + "step": 329000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001074579831932773, + "loss": 1.694, + "step": 329500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00010714285714285714, + "loss": 1.6878, + "step": 330000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00010682773109243697, + "loss": 1.6921, + "step": 330500 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001065126050420168, + "loss": 1.6871, + "step": 331000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00010619747899159663, + "loss": 1.6846, + "step": 331500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00010588235294117647, + "loss": 1.6895, + "step": 332000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001055672268907563, + "loss": 1.6855, + "step": 332500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00010525210084033612, + "loss": 1.6807, + "step": 333000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00010493697478991595, + "loss": 1.6864, + "step": 333500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00010462184873949579, + "loss": 1.681, + "step": 334000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010430672268907562, + "loss": 1.6913, + "step": 334500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010399159663865545, + "loss": 1.6789, + "step": 335000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010367647058823528, + "loss": 1.6939, + "step": 335500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010336134453781512, + "loss": 1.7741, + "step": 336000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010304621848739495, + "loss": 1.7619, + "step": 336500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010273109243697478, + "loss": 1.7091, + "step": 337000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010241596638655462, + "loss": 1.7116, + "step": 337500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00010210084033613445, + "loss": 1.7243, + "step": 338000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00010178571428571428, + "loss": 1.7255, + "step": 338500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00010147058823529411, + "loss": 1.715, + "step": 339000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00010115546218487395, + "loss": 1.6924, + "step": 339500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00010084033613445378, + "loss": 1.6824, + "step": 340000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001005252100840336, + "loss": 1.6885, + "step": 340500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00010021008403361343, + "loss": 1.6848, + "step": 341000 + }, + { + "epoch": 0.92, + "learning_rate": 9.989495798319326e-05, + "loss": 1.6831, + "step": 341500 + }, + { + "epoch": 0.93, + "learning_rate": 9.95798319327731e-05, + "loss": 1.6836, + "step": 342000 + }, + { + "epoch": 0.93, + "learning_rate": 9.926470588235293e-05, + "loss": 1.6765, + "step": 342500 + }, + { + "epoch": 0.93, + "learning_rate": 9.894957983193276e-05, + "loss": 1.6775, + "step": 343000 + }, + { + "epoch": 0.93, + "learning_rate": 9.86344537815126e-05, + "loss": 1.6854, + "step": 343500 + }, + { + "epoch": 0.93, + "learning_rate": 9.831932773109243e-05, + "loss": 1.6851, + "step": 344000 + }, + { + "epoch": 0.93, + "learning_rate": 9.800420168067226e-05, + "loss": 1.6761, + "step": 344500 + }, + { + "epoch": 0.93, + "learning_rate": 9.76890756302521e-05, + "loss": 1.6693, + "step": 345000 + }, + { + "epoch": 0.94, + "learning_rate": 9.737394957983193e-05, + "loss": 1.6734, + "step": 345500 + }, + { + "epoch": 0.94, + "learning_rate": 9.705882352941176e-05, + "loss": 1.6774, + "step": 346000 + }, + { + "epoch": 0.94, + "learning_rate": 9.67436974789916e-05, + "loss": 1.6749, + "step": 346500 + }, + { + "epoch": 0.94, + "learning_rate": 9.642857142857143e-05, + "loss": 1.6789, + "step": 347000 + }, + { + "epoch": 0.94, + "learning_rate": 9.611344537815126e-05, + "loss": 1.6758, + "step": 347500 + }, + { + "epoch": 0.94, + "learning_rate": 9.579831932773108e-05, + "loss": 1.6753, + "step": 348000 + }, + { + "epoch": 0.94, + "learning_rate": 9.548319327731091e-05, + "loss": 1.6737, + "step": 348500 + }, + { + "epoch": 0.94, + "learning_rate": 9.516806722689074e-05, + "loss": 1.6723, + "step": 349000 + }, + { + "epoch": 0.95, + "learning_rate": 9.485294117647057e-05, + "loss": 1.6752, + "step": 349500 + }, + { + "epoch": 0.95, + "learning_rate": 9.45378151260504e-05, + "loss": 1.6706, + "step": 350000 + }, + { + "epoch": 0.95, + "learning_rate": 9.422268907563025e-05, + "loss": 1.669, + "step": 350500 + }, + { + "epoch": 0.95, + "learning_rate": 9.390756302521008e-05, + "loss": 1.6694, + "step": 351000 + }, + { + "epoch": 0.95, + "learning_rate": 9.359243697478991e-05, + "loss": 1.6677, + "step": 351500 + }, + { + "epoch": 0.95, + "learning_rate": 9.327731092436974e-05, + "loss": 1.6709, + "step": 352000 + }, + { + "epoch": 0.95, + "learning_rate": 9.296218487394957e-05, + "loss": 1.6645, + "step": 352500 + }, + { + "epoch": 0.96, + "learning_rate": 9.26470588235294e-05, + "loss": 1.6648, + "step": 353000 + }, + { + "epoch": 0.96, + "learning_rate": 9.233193277310923e-05, + "loss": 1.6717, + "step": 353500 + }, + { + "epoch": 0.96, + "learning_rate": 9.201680672268908e-05, + "loss": 1.6614, + "step": 354000 + }, + { + "epoch": 0.96, + "learning_rate": 9.170168067226891e-05, + "loss": 1.664, + "step": 354500 + }, + { + "epoch": 0.96, + "learning_rate": 9.138655462184873e-05, + "loss": 1.6681, + "step": 355000 + }, + { + "epoch": 0.96, + "learning_rate": 9.107142857142856e-05, + "loss": 1.6683, + "step": 355500 + }, + { + "epoch": 0.96, + "learning_rate": 9.075630252100839e-05, + "loss": 1.6639, + "step": 356000 + }, + { + "epoch": 0.97, + "learning_rate": 9.044117647058822e-05, + "loss": 1.6637, + "step": 356500 + }, + { + "epoch": 0.97, + "learning_rate": 9.012605042016805e-05, + "loss": 1.6576, + "step": 357000 + }, + { + "epoch": 0.97, + "learning_rate": 8.981092436974788e-05, + "loss": 1.6616, + "step": 357500 + }, + { + "epoch": 0.97, + "learning_rate": 8.949579831932773e-05, + "loss": 1.6604, + "step": 358000 + }, + { + "epoch": 0.97, + "learning_rate": 8.918067226890756e-05, + "loss": 1.6611, + "step": 358500 + }, + { + "epoch": 0.97, + "learning_rate": 8.886554621848739e-05, + "loss": 1.6597, + "step": 359000 + }, + { + "epoch": 0.97, + "learning_rate": 8.855042016806722e-05, + "loss": 1.6613, + "step": 359500 + }, + { + "epoch": 0.97, + "learning_rate": 8.823529411764705e-05, + "loss": 1.6588, + "step": 360000 + }, + { + "epoch": 0.98, + "learning_rate": 8.792016806722688e-05, + "loss": 1.6573, + "step": 360500 + }, + { + "epoch": 0.98, + "learning_rate": 8.760504201680673e-05, + "loss": 1.6587, + "step": 361000 + }, + { + "epoch": 0.98, + "learning_rate": 8.728991596638656e-05, + "loss": 1.6581, + "step": 361500 + }, + { + "epoch": 0.98, + "learning_rate": 8.697478991596639e-05, + "loss": 1.6531, + "step": 362000 + }, + { + "epoch": 0.98, + "learning_rate": 8.66596638655462e-05, + "loss": 1.6542, + "step": 362500 + }, + { + "epoch": 0.98, + "learning_rate": 8.634453781512604e-05, + "loss": 1.6545, + "step": 363000 + }, + { + "epoch": 0.98, + "learning_rate": 8.602941176470587e-05, + "loss": 1.6519, + "step": 363500 + }, + { + "epoch": 0.99, + "learning_rate": 8.57142857142857e-05, + "loss": 1.6557, + "step": 364000 + }, + { + "epoch": 0.99, + "learning_rate": 8.539915966386553e-05, + "loss": 1.6518, + "step": 364500 + }, + { + "epoch": 0.99, + "learning_rate": 8.508403361344537e-05, + "loss": 1.6531, + "step": 365000 + }, + { + "epoch": 0.99, + "learning_rate": 8.47689075630252e-05, + "loss": 1.6481, + "step": 365500 + }, + { + "epoch": 0.99, + "learning_rate": 8.445378151260504e-05, + "loss": 1.6475, + "step": 366000 + }, + { + "epoch": 0.99, + "learning_rate": 8.413865546218487e-05, + "loss": 1.6491, + "step": 366500 + }, + { + "epoch": 0.99, + "learning_rate": 8.38235294117647e-05, + "loss": 1.6556, + "step": 367000 + }, + { + "epoch": 0.99, + "learning_rate": 8.350840336134453e-05, + "loss": 1.6472, + "step": 367500 + }, + { + "epoch": 1.0, + "learning_rate": 8.319327731092436e-05, + "loss": 1.6477, + "step": 368000 + }, + { + "epoch": 1.0, + "learning_rate": 8.28781512605042e-05, + "loss": 1.6496, + "step": 368500 + }, + { + "epoch": 1.0, + "learning_rate": 8.256302521008404e-05, + "loss": 1.6479, + "step": 369000 + }, + { + "epoch": 1.0, + "learning_rate": 8.224789915966387e-05, + "loss": 1.6492, + "step": 369500 + }, + { + "epoch": 1.0, + "learning_rate": 8.193277310924368e-05, + "loss": 1.6443, + "step": 370000 + }, + { + "epoch": 1.0, + "learning_rate": 8.161764705882352e-05, + "loss": 1.6443, + "step": 370500 + }, + { + "epoch": 1.0, + "learning_rate": 8.130252100840335e-05, + "loss": 1.6462, + "step": 371000 + }, + { + "epoch": 1.01, + "learning_rate": 8.098739495798318e-05, + "loss": 1.6454, + "step": 371500 + }, + { + "epoch": 1.01, + "learning_rate": 8.067226890756301e-05, + "loss": 1.6416, + "step": 372000 + }, + { + "epoch": 1.01, + "learning_rate": 8.035714285714285e-05, + "loss": 1.6433, + "step": 372500 + }, + { + "epoch": 1.01, + "learning_rate": 8.004201680672268e-05, + "loss": 1.6447, + "step": 373000 + }, + { + "epoch": 1.01, + "learning_rate": 7.972689075630251e-05, + "loss": 1.6454, + "step": 373500 + }, + { + "epoch": 1.01, + "learning_rate": 7.941176470588235e-05, + "loss": 1.6402, + "step": 374000 + }, + { + "epoch": 1.01, + "learning_rate": 7.909663865546218e-05, + "loss": 1.642, + "step": 374500 + }, + { + "epoch": 1.02, + "learning_rate": 7.878151260504201e-05, + "loss": 1.6401, + "step": 375000 + }, + { + "epoch": 1.02, + "learning_rate": 7.846638655462185e-05, + "loss": 1.6446, + "step": 375500 + }, + { + "epoch": 1.02, + "learning_rate": 7.815126050420168e-05, + "loss": 1.6374, + "step": 376000 + }, + { + "epoch": 1.02, + "learning_rate": 7.783613445378151e-05, + "loss": 1.6425, + "step": 376500 + }, + { + "epoch": 1.02, + "learning_rate": 7.752100840336135e-05, + "loss": 1.6418, + "step": 377000 + }, + { + "epoch": 1.02, + "learning_rate": 7.720588235294116e-05, + "loss": 1.6407, + "step": 377500 + }, + { + "epoch": 1.02, + "learning_rate": 7.6890756302521e-05, + "loss": 1.6373, + "step": 378000 + }, + { + "epoch": 1.02, + "learning_rate": 7.657563025210082e-05, + "loss": 1.6397, + "step": 378500 + }, + { + "epoch": 1.03, + "learning_rate": 7.626050420168066e-05, + "loss": 1.6408, + "step": 379000 + }, + { + "epoch": 1.03, + "learning_rate": 7.59453781512605e-05, + "loss": 1.6407, + "step": 379500 + }, + { + "epoch": 1.03, + "learning_rate": 7.563025210084033e-05, + "loss": 1.6397, + "step": 380000 + }, + { + "epoch": 1.03, + "learning_rate": 7.531512605042016e-05, + "loss": 1.6406, + "step": 380500 + }, + { + "epoch": 1.03, + "learning_rate": 7.5e-05, + "loss": 1.6399, + "step": 381000 + }, + { + "epoch": 1.03, + "learning_rate": 7.468487394957982e-05, + "loss": 1.6403, + "step": 381500 + }, + { + "epoch": 1.03, + "learning_rate": 7.436974789915966e-05, + "loss": 1.6394, + "step": 382000 + }, + { + "epoch": 1.04, + "learning_rate": 7.405462184873949e-05, + "loss": 1.6377, + "step": 382500 + }, + { + "epoch": 1.04, + "learning_rate": 7.373949579831932e-05, + "loss": 1.6365, + "step": 383000 + }, + { + "epoch": 1.04, + "learning_rate": 7.342436974789915e-05, + "loss": 1.6329, + "step": 383500 + }, + { + "epoch": 1.04, + "learning_rate": 7.310924369747898e-05, + "loss": 1.6361, + "step": 384000 + }, + { + "epoch": 1.04, + "learning_rate": 7.279411764705881e-05, + "loss": 1.6325, + "step": 384500 + }, + { + "epoch": 1.04, + "learning_rate": 7.247899159663865e-05, + "loss": 1.6347, + "step": 385000 + }, + { + "epoch": 1.04, + "learning_rate": 7.216386554621849e-05, + "loss": 1.6364, + "step": 385500 + }, + { + "epoch": 1.04, + "learning_rate": 7.184873949579832e-05, + "loss": 1.6293, + "step": 386000 + }, + { + "epoch": 1.05, + "learning_rate": 7.153361344537815e-05, + "loss": 1.6306, + "step": 386500 + }, + { + "epoch": 1.05, + "learning_rate": 7.121848739495798e-05, + "loss": 1.6308, + "step": 387000 + }, + { + "epoch": 1.05, + "learning_rate": 7.090336134453781e-05, + "loss": 1.6315, + "step": 387500 + }, + { + "epoch": 1.05, + "learning_rate": 7.058823529411764e-05, + "loss": 1.6326, + "step": 388000 + }, + { + "epoch": 1.05, + "learning_rate": 7.027310924369747e-05, + "loss": 1.6296, + "step": 388500 + }, + { + "epoch": 1.05, + "learning_rate": 6.99579831932773e-05, + "loss": 1.6332, + "step": 389000 + }, + { + "epoch": 1.05, + "learning_rate": 6.964285714285713e-05, + "loss": 1.6337, + "step": 389500 + }, + { + "epoch": 1.06, + "learning_rate": 6.932773109243698e-05, + "loss": 1.6279, + "step": 390000 + }, + { + "epoch": 1.06, + "learning_rate": 6.90126050420168e-05, + "loss": 1.6296, + "step": 390500 + }, + { + "epoch": 1.06, + "learning_rate": 6.869747899159663e-05, + "loss": 1.6244, + "step": 391000 + }, + { + "epoch": 1.06, + "learning_rate": 6.838235294117646e-05, + "loss": 1.6323, + "step": 391500 + }, + { + "epoch": 1.06, + "learning_rate": 6.80672268907563e-05, + "loss": 1.63, + "step": 392000 + }, + { + "epoch": 1.06, + "learning_rate": 6.775210084033613e-05, + "loss": 1.6253, + "step": 392500 + }, + { + "epoch": 1.06, + "learning_rate": 6.743697478991596e-05, + "loss": 1.623, + "step": 393000 + }, + { + "epoch": 1.07, + "learning_rate": 6.71218487394958e-05, + "loss": 1.6291, + "step": 393500 + }, + { + "epoch": 1.07, + "learning_rate": 6.680672268907563e-05, + "loss": 1.6264, + "step": 394000 + }, + { + "epoch": 1.07, + "learning_rate": 6.649159663865546e-05, + "loss": 1.6278, + "step": 394500 + }, + { + "epoch": 1.07, + "learning_rate": 6.617647058823529e-05, + "loss": 1.6274, + "step": 395000 + }, + { + "epoch": 1.07, + "learning_rate": 6.586134453781512e-05, + "loss": 1.6254, + "step": 395500 + }, + { + "epoch": 1.07, + "learning_rate": 6.554621848739495e-05, + "loss": 1.6237, + "step": 396000 + }, + { + "epoch": 1.07, + "learning_rate": 6.523109243697478e-05, + "loss": 1.6232, + "step": 396500 + }, + { + "epoch": 1.07, + "learning_rate": 6.491596638655461e-05, + "loss": 1.6204, + "step": 397000 + }, + { + "epoch": 1.08, + "learning_rate": 6.460084033613446e-05, + "loss": 1.6243, + "step": 397500 + }, + { + "epoch": 1.08, + "learning_rate": 6.428571428571427e-05, + "loss": 1.6234, + "step": 398000 + }, + { + "epoch": 1.08, + "learning_rate": 6.39705882352941e-05, + "loss": 1.6237, + "step": 398500 + }, + { + "epoch": 1.08, + "learning_rate": 6.365546218487394e-05, + "loss": 1.6239, + "step": 399000 + }, + { + "epoch": 1.08, + "learning_rate": 6.334033613445378e-05, + "loss": 1.6215, + "step": 399500 + }, + { + "epoch": 1.08, + "learning_rate": 6.302521008403361e-05, + "loss": 1.6229, + "step": 400000 + }, + { + "epoch": 1.08, + "learning_rate": 6.271008403361344e-05, + "loss": 1.6197, + "step": 400500 + }, + { + "epoch": 1.09, + "learning_rate": 6.239495798319327e-05, + "loss": 1.6183, + "step": 401000 + }, + { + "epoch": 1.09, + "learning_rate": 6.20798319327731e-05, + "loss": 1.6206, + "step": 401500 + }, + { + "epoch": 1.09, + "learning_rate": 6.176470588235294e-05, + "loss": 1.6182, + "step": 402000 + }, + { + "epoch": 1.09, + "learning_rate": 6.144957983193277e-05, + "loss": 1.621, + "step": 402500 + }, + { + "epoch": 1.09, + "learning_rate": 6.11344537815126e-05, + "loss": 1.6204, + "step": 403000 + }, + { + "epoch": 1.09, + "learning_rate": 6.081932773109243e-05, + "loss": 1.6213, + "step": 403500 + }, + { + "epoch": 1.09, + "learning_rate": 6.0504201680672267e-05, + "loss": 1.6187, + "step": 404000 + }, + { + "epoch": 1.09, + "learning_rate": 6.01890756302521e-05, + "loss": 1.617, + "step": 404500 + }, + { + "epoch": 1.1, + "learning_rate": 5.987394957983193e-05, + "loss": 1.6145, + "step": 405000 + }, + { + "epoch": 1.1, + "learning_rate": 5.955882352941175e-05, + "loss": 1.6146, + "step": 405500 + }, + { + "epoch": 1.1, + "learning_rate": 5.924369747899159e-05, + "loss": 1.615, + "step": 406000 + }, + { + "epoch": 1.1, + "learning_rate": 5.892857142857142e-05, + "loss": 1.6138, + "step": 406500 + }, + { + "epoch": 1.1, + "learning_rate": 5.861344537815125e-05, + "loss": 1.6158, + "step": 407000 + }, + { + "epoch": 1.1, + "learning_rate": 5.829831932773109e-05, + "loss": 1.6149, + "step": 407500 + }, + { + "epoch": 1.1, + "learning_rate": 5.798319327731092e-05, + "loss": 1.6198, + "step": 408000 + }, + { + "epoch": 1.11, + "learning_rate": 5.766806722689075e-05, + "loss": 1.6117, + "step": 408500 + }, + { + "epoch": 1.11, + "learning_rate": 5.7352941176470576e-05, + "loss": 1.6141, + "step": 409000 + }, + { + "epoch": 1.11, + "learning_rate": 5.7037815126050414e-05, + "loss": 1.6113, + "step": 409500 + }, + { + "epoch": 1.11, + "learning_rate": 5.6722689075630245e-05, + "loss": 1.6126, + "step": 410000 + }, + { + "epoch": 1.11, + "learning_rate": 5.6407563025210076e-05, + "loss": 1.612, + "step": 410500 + }, + { + "epoch": 1.11, + "learning_rate": 5.6092436974789914e-05, + "loss": 1.6067, + "step": 411000 + }, + { + "epoch": 1.11, + "learning_rate": 5.5777310924369745e-05, + "loss": 1.6128, + "step": 411500 + }, + { + "epoch": 1.12, + "learning_rate": 5.5462184873949576e-05, + "loss": 1.6148, + "step": 412000 + }, + { + "epoch": 1.12, + "learning_rate": 5.5147058823529414e-05, + "loss": 1.61, + "step": 412500 + }, + { + "epoch": 1.12, + "learning_rate": 5.483193277310924e-05, + "loss": 1.6094, + "step": 413000 + }, + { + "epoch": 1.12, + "learning_rate": 5.451680672268907e-05, + "loss": 1.6108, + "step": 413500 + }, + { + "epoch": 1.12, + "learning_rate": 5.42016806722689e-05, + "loss": 1.6029, + "step": 414000 + }, + { + "epoch": 1.12, + "learning_rate": 5.388655462184874e-05, + "loss": 1.608, + "step": 414500 + }, + { + "epoch": 1.12, + "learning_rate": 5.357142857142857e-05, + "loss": 1.6046, + "step": 415000 + }, + { + "epoch": 1.12, + "learning_rate": 5.32563025210084e-05, + "loss": 1.6096, + "step": 415500 + }, + { + "epoch": 1.13, + "learning_rate": 5.294117647058824e-05, + "loss": 1.6056, + "step": 416000 + }, + { + "epoch": 1.13, + "learning_rate": 5.262605042016806e-05, + "loss": 1.6055, + "step": 416500 + }, + { + "epoch": 1.13, + "learning_rate": 5.231092436974789e-05, + "loss": 1.6019, + "step": 417000 + }, + { + "epoch": 1.13, + "learning_rate": 5.1995798319327724e-05, + "loss": 1.603, + "step": 417500 + }, + { + "epoch": 1.13, + "learning_rate": 5.168067226890756e-05, + "loss": 1.6007, + "step": 418000 + }, + { + "epoch": 1.13, + "learning_rate": 5.136554621848739e-05, + "loss": 1.6038, + "step": 418500 + }, + { + "epoch": 1.13, + "learning_rate": 5.105042016806722e-05, + "loss": 1.6017, + "step": 419000 + }, + { + "epoch": 1.14, + "learning_rate": 5.0735294117647054e-05, + "loss": 1.6051, + "step": 419500 + }, + { + "epoch": 1.14, + "learning_rate": 5.042016806722689e-05, + "loss": 1.5992, + "step": 420000 + }, + { + "epoch": 1.14, + "learning_rate": 5.0105042016806716e-05, + "loss": 1.6022, + "step": 420500 + }, + { + "epoch": 1.14, + "learning_rate": 4.978991596638655e-05, + "loss": 1.6038, + "step": 421000 + }, + { + "epoch": 1.14, + "learning_rate": 4.947478991596638e-05, + "loss": 1.6019, + "step": 421500 + }, + { + "epoch": 1.14, + "learning_rate": 4.9159663865546216e-05, + "loss": 1.6006, + "step": 422000 + }, + { + "epoch": 1.14, + "learning_rate": 4.884453781512605e-05, + "loss": 1.6046, + "step": 422500 + }, + { + "epoch": 1.15, + "learning_rate": 4.852941176470588e-05, + "loss": 1.6049, + "step": 423000 + }, + { + "epoch": 1.15, + "learning_rate": 4.8214285714285716e-05, + "loss": 1.6004, + "step": 423500 + }, + { + "epoch": 1.15, + "learning_rate": 4.789915966386554e-05, + "loss": 1.6016, + "step": 424000 + }, + { + "epoch": 1.15, + "learning_rate": 4.758403361344537e-05, + "loss": 1.6024, + "step": 424500 + }, + { + "epoch": 1.15, + "learning_rate": 4.72689075630252e-05, + "loss": 1.5994, + "step": 425000 + }, + { + "epoch": 1.15, + "learning_rate": 4.695378151260504e-05, + "loss": 1.5989, + "step": 425500 + }, + { + "epoch": 1.15, + "learning_rate": 4.663865546218487e-05, + "loss": 1.599, + "step": 426000 + }, + { + "epoch": 1.15, + "learning_rate": 4.63235294117647e-05, + "loss": 1.5968, + "step": 426500 + }, + { + "epoch": 1.16, + "learning_rate": 4.600840336134454e-05, + "loss": 1.5968, + "step": 427000 + }, + { + "epoch": 1.16, + "learning_rate": 4.5693277310924364e-05, + "loss": 1.5981, + "step": 427500 + }, + { + "epoch": 1.16, + "learning_rate": 4.5378151260504195e-05, + "loss": 1.5961, + "step": 428000 + }, + { + "epoch": 1.16, + "learning_rate": 4.5063025210084026e-05, + "loss": 1.5967, + "step": 428500 + }, + { + "epoch": 1.16, + "learning_rate": 4.4747899159663864e-05, + "loss": 1.5963, + "step": 429000 + }, + { + "epoch": 1.16, + "learning_rate": 4.4432773109243695e-05, + "loss": 1.5937, + "step": 429500 + }, + { + "epoch": 1.16, + "learning_rate": 4.4117647058823526e-05, + "loss": 1.5963, + "step": 430000 + }, + { + "epoch": 1.17, + "learning_rate": 4.380252100840336e-05, + "loss": 1.5961, + "step": 430500 + }, + { + "epoch": 1.17, + "learning_rate": 4.3487394957983194e-05, + "loss": 1.5955, + "step": 431000 + }, + { + "epoch": 1.17, + "learning_rate": 4.317226890756302e-05, + "loss": 1.5905, + "step": 431500 + }, + { + "epoch": 1.17, + "learning_rate": 4.285714285714285e-05, + "loss": 1.5956, + "step": 432000 + }, + { + "epoch": 1.17, + "learning_rate": 4.254201680672269e-05, + "loss": 1.5938, + "step": 432500 + }, + { + "epoch": 1.17, + "learning_rate": 4.222689075630252e-05, + "loss": 1.5939, + "step": 433000 + }, + { + "epoch": 1.17, + "learning_rate": 4.191176470588235e-05, + "loss": 1.5919, + "step": 433500 + }, + { + "epoch": 1.17, + "learning_rate": 4.159663865546218e-05, + "loss": 1.5905, + "step": 434000 + }, + { + "epoch": 1.18, + "learning_rate": 4.128151260504202e-05, + "loss": 1.589, + "step": 434500 + }, + { + "epoch": 1.18, + "learning_rate": 4.096638655462184e-05, + "loss": 1.592, + "step": 435000 + }, + { + "epoch": 1.18, + "learning_rate": 4.065126050420167e-05, + "loss": 1.594, + "step": 435500 + }, + { + "epoch": 1.18, + "learning_rate": 4.0336134453781504e-05, + "loss": 1.5908, + "step": 436000 + }, + { + "epoch": 1.18, + "learning_rate": 4.002100840336134e-05, + "loss": 1.5876, + "step": 436500 + }, + { + "epoch": 1.18, + "learning_rate": 3.970588235294117e-05, + "loss": 1.5899, + "step": 437000 + }, + { + "epoch": 1.18, + "learning_rate": 3.9390756302521004e-05, + "loss": 1.5912, + "step": 437500 + }, + { + "epoch": 1.19, + "learning_rate": 3.907563025210084e-05, + "loss": 1.5885, + "step": 438000 + }, + { + "epoch": 1.19, + "learning_rate": 3.876050420168067e-05, + "loss": 1.5914, + "step": 438500 + }, + { + "epoch": 1.19, + "learning_rate": 3.84453781512605e-05, + "loss": 1.5905, + "step": 439000 + }, + { + "epoch": 1.19, + "learning_rate": 3.813025210084033e-05, + "loss": 1.5885, + "step": 439500 + }, + { + "epoch": 1.19, + "learning_rate": 3.7815126050420166e-05, + "loss": 1.5861, + "step": 440000 + }, + { + "epoch": 1.19, + "learning_rate": 3.75e-05, + "loss": 1.5877, + "step": 440500 + }, + { + "epoch": 1.19, + "learning_rate": 3.718487394957983e-05, + "loss": 1.5846, + "step": 441000 + }, + { + "epoch": 1.2, + "learning_rate": 3.686974789915966e-05, + "loss": 1.5875, + "step": 441500 + }, + { + "epoch": 1.2, + "learning_rate": 3.655462184873949e-05, + "loss": 1.5854, + "step": 442000 + }, + { + "epoch": 1.2, + "learning_rate": 3.623949579831933e-05, + "loss": 1.5824, + "step": 442500 + }, + { + "epoch": 1.2, + "learning_rate": 3.592436974789916e-05, + "loss": 1.5847, + "step": 443000 + }, + { + "epoch": 1.2, + "learning_rate": 3.560924369747899e-05, + "loss": 1.5848, + "step": 443500 + }, + { + "epoch": 1.2, + "learning_rate": 3.529411764705882e-05, + "loss": 1.5862, + "step": 444000 + }, + { + "epoch": 1.2, + "learning_rate": 3.497899159663865e-05, + "loss": 1.583, + "step": 444500 + }, + { + "epoch": 1.2, + "learning_rate": 3.466386554621849e-05, + "loss": 1.5854, + "step": 445000 + }, + { + "epoch": 1.21, + "learning_rate": 3.4348739495798313e-05, + "loss": 1.584, + "step": 445500 + }, + { + "epoch": 1.21, + "learning_rate": 3.403361344537815e-05, + "loss": 1.5825, + "step": 446000 + }, + { + "epoch": 1.21, + "learning_rate": 3.371848739495798e-05, + "loss": 1.5825, + "step": 446500 + }, + { + "epoch": 1.21, + "learning_rate": 3.340336134453781e-05, + "loss": 1.58, + "step": 447000 + }, + { + "epoch": 1.21, + "learning_rate": 3.3088235294117644e-05, + "loss": 1.5808, + "step": 447500 + }, + { + "epoch": 1.21, + "learning_rate": 3.2773109243697475e-05, + "loss": 1.5785, + "step": 448000 + }, + { + "epoch": 1.21, + "learning_rate": 3.2457983193277306e-05, + "loss": 1.5825, + "step": 448500 + }, + { + "epoch": 1.22, + "learning_rate": 3.214285714285714e-05, + "loss": 1.5783, + "step": 449000 + }, + { + "epoch": 1.22, + "learning_rate": 3.182773109243697e-05, + "loss": 1.5762, + "step": 449500 + }, + { + "epoch": 1.22, + "learning_rate": 3.1512605042016806e-05, + "loss": 1.5771, + "step": 450000 + }, + { + "epoch": 1.22, + "learning_rate": 3.119747899159664e-05, + "loss": 1.5822, + "step": 450500 + }, + { + "epoch": 1.22, + "learning_rate": 3.088235294117647e-05, + "loss": 1.5834, + "step": 451000 + }, + { + "epoch": 1.22, + "learning_rate": 3.05672268907563e-05, + "loss": 1.5763, + "step": 451500 + }, + { + "epoch": 1.22, + "learning_rate": 3.0252100840336133e-05, + "loss": 1.5771, + "step": 452000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9936974789915964e-05, + "loss": 1.5771, + "step": 452500 + }, + { + "epoch": 1.23, + "learning_rate": 2.9621848739495795e-05, + "loss": 1.5819, + "step": 453000 + }, + { + "epoch": 1.23, + "learning_rate": 2.9306722689075626e-05, + "loss": 1.5786, + "step": 453500 + }, + { + "epoch": 1.23, + "learning_rate": 2.899159663865546e-05, + "loss": 1.579, + "step": 454000 + }, + { + "epoch": 1.23, + "learning_rate": 2.8676470588235288e-05, + "loss": 1.5774, + "step": 454500 + }, + { + "epoch": 1.23, + "learning_rate": 2.8361344537815123e-05, + "loss": 1.5733, + "step": 455000 + }, + { + "epoch": 1.23, + "learning_rate": 2.8046218487394957e-05, + "loss": 1.5743, + "step": 455500 + }, + { + "epoch": 1.23, + "learning_rate": 2.7731092436974788e-05, + "loss": 1.5715, + "step": 456000 + }, + { + "epoch": 1.24, + "learning_rate": 2.741596638655462e-05, + "loss": 1.5767, + "step": 456500 + }, + { + "epoch": 1.24, + "learning_rate": 2.710084033613445e-05, + "loss": 1.5753, + "step": 457000 + }, + { + "epoch": 1.24, + "learning_rate": 2.6785714285714284e-05, + "loss": 1.5726, + "step": 457500 + }, + { + "epoch": 1.24, + "learning_rate": 2.647058823529412e-05, + "loss": 1.5729, + "step": 458000 + }, + { + "epoch": 1.24, + "learning_rate": 2.6155462184873946e-05, + "loss": 1.5735, + "step": 458500 + }, + { + "epoch": 1.24, + "learning_rate": 2.584033613445378e-05, + "loss": 1.5719, + "step": 459000 + }, + { + "epoch": 1.24, + "learning_rate": 2.552521008403361e-05, + "loss": 1.5673, + "step": 459500 + }, + { + "epoch": 1.25, + "learning_rate": 2.5210084033613446e-05, + "loss": 1.5746, + "step": 460000 + }, + { + "epoch": 1.25, + "learning_rate": 2.4894957983193274e-05, + "loss": 1.5715, + "step": 460500 + }, + { + "epoch": 1.25, + "learning_rate": 2.4579831932773108e-05, + "loss": 1.5698, + "step": 461000 + }, + { + "epoch": 1.25, + "learning_rate": 2.426470588235294e-05, + "loss": 1.569, + "step": 461500 + }, + { + "epoch": 1.25, + "learning_rate": 2.394957983193277e-05, + "loss": 1.5693, + "step": 462000 + }, + { + "epoch": 1.25, + "learning_rate": 2.36344537815126e-05, + "loss": 1.5718, + "step": 462500 + }, + { + "epoch": 1.25, + "learning_rate": 2.3319327731092435e-05, + "loss": 1.5704, + "step": 463000 + }, + { + "epoch": 1.25, + "learning_rate": 2.300420168067227e-05, + "loss": 1.566, + "step": 463500 + }, + { + "epoch": 1.26, + "learning_rate": 2.2689075630252097e-05, + "loss": 1.5702, + "step": 464000 + }, + { + "epoch": 1.26, + "learning_rate": 2.2373949579831932e-05, + "loss": 1.572, + "step": 464500 + }, + { + "epoch": 1.26, + "learning_rate": 2.2058823529411763e-05, + "loss": 1.5689, + "step": 465000 + }, + { + "epoch": 1.26, + "learning_rate": 2.1743697478991597e-05, + "loss": 1.5692, + "step": 465500 + }, + { + "epoch": 1.26, + "learning_rate": 2.1428571428571425e-05, + "loss": 1.5679, + "step": 466000 + }, + { + "epoch": 1.26, + "learning_rate": 2.111344537815126e-05, + "loss": 1.5645, + "step": 466500 + }, + { + "epoch": 1.26, + "learning_rate": 2.079831932773109e-05, + "loss": 1.5667, + "step": 467000 + }, + { + "epoch": 1.27, + "learning_rate": 2.048319327731092e-05, + "loss": 1.5659, + "step": 467500 + }, + { + "epoch": 1.27, + "learning_rate": 2.0168067226890752e-05, + "loss": 1.5628, + "step": 468000 + }, + { + "epoch": 1.27, + "learning_rate": 1.9852941176470586e-05, + "loss": 1.5622, + "step": 468500 + }, + { + "epoch": 1.27, + "learning_rate": 1.953781512605042e-05, + "loss": 1.5674, + "step": 469000 + }, + { + "epoch": 1.27, + "learning_rate": 1.922268907563025e-05, + "loss": 1.5645, + "step": 469500 + }, + { + "epoch": 1.27, + "learning_rate": 1.8907563025210083e-05, + "loss": 1.5647, + "step": 470000 + }, + { + "epoch": 1.27, + "learning_rate": 1.8592436974789914e-05, + "loss": 1.5641, + "step": 470500 + }, + { + "epoch": 1.28, + "learning_rate": 1.8277310924369745e-05, + "loss": 1.5656, + "step": 471000 + }, + { + "epoch": 1.28, + "learning_rate": 1.796218487394958e-05, + "loss": 1.5635, + "step": 471500 + }, + { + "epoch": 1.28, + "learning_rate": 1.764705882352941e-05, + "loss": 1.5612, + "step": 472000 + }, + { + "epoch": 1.28, + "learning_rate": 1.7331932773109245e-05, + "loss": 1.5619, + "step": 472500 + }, + { + "epoch": 1.28, + "learning_rate": 1.7016806722689076e-05, + "loss": 1.5643, + "step": 473000 + }, + { + "epoch": 1.28, + "learning_rate": 1.6701680672268907e-05, + "loss": 1.5607, + "step": 473500 + }, + { + "epoch": 1.28, + "learning_rate": 1.6386554621848738e-05, + "loss": 1.5642, + "step": 474000 + }, + { + "epoch": 1.28, + "learning_rate": 1.607142857142857e-05, + "loss": 1.5603, + "step": 474500 + }, + { + "epoch": 1.29, + "learning_rate": 1.5756302521008403e-05, + "loss": 1.5591, + "step": 475000 + }, + { + "epoch": 1.29, + "learning_rate": 1.5441176470588234e-05, + "loss": 1.5632, + "step": 475500 + }, + { + "epoch": 1.29, + "learning_rate": 1.5126050420168067e-05, + "loss": 1.5606, + "step": 476000 + }, + { + "epoch": 1.29, + "learning_rate": 1.4810924369747898e-05, + "loss": 1.5598, + "step": 476500 + }, + { + "epoch": 1.29, + "learning_rate": 1.449579831932773e-05, + "loss": 1.5616, + "step": 477000 + }, + { + "epoch": 1.29, + "learning_rate": 1.4180672268907561e-05, + "loss": 1.5609, + "step": 477500 + }, + { + "epoch": 1.29, + "learning_rate": 1.3865546218487394e-05, + "loss": 1.56, + "step": 478000 + }, + { + "epoch": 1.3, + "learning_rate": 1.3550420168067225e-05, + "loss": 1.5609, + "step": 478500 + }, + { + "epoch": 1.3, + "learning_rate": 1.323529411764706e-05, + "loss": 1.5603, + "step": 479000 + }, + { + "epoch": 1.3, + "learning_rate": 1.292016806722689e-05, + "loss": 1.5612, + "step": 479500 + }, + { + "epoch": 1.3, + "learning_rate": 1.2605042016806723e-05, + "loss": 1.5655, + "step": 480000 + }, + { + "epoch": 1.3, + "learning_rate": 1.2289915966386554e-05, + "loss": 1.5588, + "step": 480500 + }, + { + "epoch": 1.3, + "learning_rate": 1.1974789915966385e-05, + "loss": 1.561, + "step": 481000 + }, + { + "epoch": 1.3, + "learning_rate": 1.1659663865546218e-05, + "loss": 1.5585, + "step": 481500 + }, + { + "epoch": 1.3, + "learning_rate": 1.1344537815126049e-05, + "loss": 1.5569, + "step": 482000 + }, + { + "epoch": 1.31, + "learning_rate": 1.1029411764705881e-05, + "loss": 1.5576, + "step": 482500 + }, + { + "epoch": 1.31, + "learning_rate": 1.0714285714285712e-05, + "loss": 1.5551, + "step": 483000 + }, + { + "epoch": 1.31, + "learning_rate": 1.0399159663865545e-05, + "loss": 1.5576, + "step": 483500 + }, + { + "epoch": 1.31, + "learning_rate": 1.0084033613445376e-05, + "loss": 1.558, + "step": 484000 + }, + { + "epoch": 1.31, + "learning_rate": 9.76890756302521e-06, + "loss": 1.5595, + "step": 484500 + }, + { + "epoch": 1.31, + "learning_rate": 9.453781512605041e-06, + "loss": 1.5606, + "step": 485000 + }, + { + "epoch": 1.31, + "learning_rate": 9.138655462184872e-06, + "loss": 1.5629, + "step": 485500 + }, + { + "epoch": 1.32, + "learning_rate": 8.823529411764705e-06, + "loss": 1.5605, + "step": 486000 + }, + { + "epoch": 1.32, + "learning_rate": 8.508403361344538e-06, + "loss": 1.5582, + "step": 486500 + }, + { + "epoch": 1.32, + "learning_rate": 8.193277310924369e-06, + "loss": 1.5558, + "step": 487000 + }, + { + "epoch": 1.32, + "learning_rate": 7.878151260504201e-06, + "loss": 1.5562, + "step": 487500 + }, + { + "epoch": 1.32, + "learning_rate": 7.563025210084033e-06, + "loss": 1.559, + "step": 488000 + }, + { + "epoch": 1.32, + "learning_rate": 7.247899159663865e-06, + "loss": 1.5608, + "step": 488500 + }, + { + "epoch": 1.32, + "learning_rate": 6.932773109243697e-06, + "loss": 1.5595, + "step": 489000 + }, + { + "epoch": 1.33, + "learning_rate": 6.61764705882353e-06, + "loss": 1.5569, + "step": 489500 + }, + { + "epoch": 1.33, + "learning_rate": 6.3025210084033615e-06, + "loss": 1.5545, + "step": 490000 + }, + { + "epoch": 1.33, + "learning_rate": 5.9873949579831925e-06, + "loss": 1.557, + "step": 490500 + }, + { + "epoch": 1.33, + "learning_rate": 5.672268907563024e-06, + "loss": 1.5559, + "step": 491000 + }, + { + "epoch": 1.33, + "learning_rate": 5.357142857142856e-06, + "loss": 1.5577, + "step": 491500 + }, + { + "epoch": 1.33, + "learning_rate": 5.042016806722688e-06, + "loss": 1.5555, + "step": 492000 + }, + { + "epoch": 1.33, + "learning_rate": 4.726890756302521e-06, + "loss": 1.5554, + "step": 492500 + }, + { + "epoch": 1.33, + "learning_rate": 4.4117647058823526e-06, + "loss": 1.5552, + "step": 493000 + }, + { + "epoch": 1.34, + "learning_rate": 4.096638655462184e-06, + "loss": 1.5573, + "step": 493500 + }, + { + "epoch": 1.34, + "learning_rate": 3.7815126050420167e-06, + "loss": 1.5555, + "step": 494000 + }, + { + "epoch": 1.34, + "learning_rate": 3.4663865546218485e-06, + "loss": 1.5551, + "step": 494500 + }, + { + "epoch": 1.34, + "learning_rate": 3.1512605042016808e-06, + "loss": 1.5499, + "step": 495000 + }, + { + "epoch": 1.34, + "learning_rate": 2.836134453781512e-06, + "loss": 1.5534, + "step": 495500 + }, + { + "epoch": 1.34, + "learning_rate": 2.521008403361344e-06, + "loss": 1.5509, + "step": 496000 + }, + { + "epoch": 1.34, + "learning_rate": 2.2058823529411763e-06, + "loss": 1.5551, + "step": 496500 + }, + { + "epoch": 1.35, + "learning_rate": 1.8907563025210083e-06, + "loss": 1.5567, + "step": 497000 + }, + { + "epoch": 1.35, + "learning_rate": 1.5756302521008404e-06, + "loss": 1.5546, + "step": 497500 + }, + { + "epoch": 1.35, + "learning_rate": 1.260504201680672e-06, + "loss": 1.5567, + "step": 498000 + }, + { + "epoch": 1.35, + "learning_rate": 9.453781512605042e-07, + "loss": 1.5546, + "step": 498500 + }, + { + "epoch": 1.35, + "learning_rate": 6.30252100840336e-07, + "loss": 1.5575, + "step": 499000 + }, + { + "epoch": 1.35, + "learning_rate": 3.15126050420168e-07, + "loss": 1.5552, + "step": 499500 + }, + { + "epoch": 1.35, + "learning_rate": 0.0, + "loss": 1.552, + "step": 500000 + }, + { + "epoch": 1.35, + "step": 500000, + "total_flos": 8.422691657052488e+18, + "train_loss": 1.8320032868652343, + "train_runtime": 99252.1755, + "train_samples_per_second": 1289.644, + "train_steps_per_second": 5.038 + } + ], + "logging_steps": 500, + "max_steps": 500000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10000, + "total_flos": 8.422691657052488e+18, + "train_batch_size": 256, + "trial_name": null, + "trial_params": null +}