diff --git "a/checkpoint-6862662/trainer_state.json" "b/checkpoint-6862662/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-6862662/trainer_state.json" @@ -0,0 +1,82740 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 34.0, + "global_step": 6862662, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9998768845092475e-05, + "loss": 3.3856, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753025866639e-05, + "loss": 3.1861, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629167224031e-05, + "loss": 3.1416, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995053085814226e-05, + "loss": 3.0688, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999381449938814e-05, + "loss": 3.0467, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.999257591296206e-05, + "loss": 3.0166, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999133980370883e-05, + "loss": 3.0395, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990101217282745e-05, + "loss": 2.9982, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988862630856655e-05, + "loss": 2.9531, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987626521603424e-05, + "loss": 2.985, + "step": 5000 + }, + { + "epoch": 0.03, + "learning_rate": 4.998638793517734e-05, + "loss": 2.9528, + "step": 5500 + }, + { + "epoch": 0.03, + "learning_rate": 4.998514934875126e-05, + "loss": 2.9339, + "step": 6000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9983910762325175e-05, + "loss": 2.9353, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 4.998267217589909e-05, + "loss": 2.9251, + "step": 7000 + }, + { + "epoch": 0.04, + "learning_rate": 4.998143358947301e-05, + "loss": 2.9249, + "step": 7500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9980195003046926e-05, + "loss": 2.922, + "step": 8000 + }, + { + "epoch": 0.04, + "learning_rate": 4.997895641662084e-05, + "loss": 2.9129, + "step": 8500 + }, + { + "epoch": 0.04, + "learning_rate": 4.997771783019476e-05, + "loss": 2.8805, + "step": 9000 + }, + { + "epoch": 0.05, + "learning_rate": 4.997647924376868e-05, + "loss": 2.8891, + "step": 9500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9975240657342594e-05, + "loss": 2.8833, + "step": 10000 + }, + { + "epoch": 0.05, + "learning_rate": 4.997400207091651e-05, + "loss": 2.9069, + "step": 10500 + }, + { + "epoch": 0.05, + "learning_rate": 4.997276596166328e-05, + "loss": 2.8774, + "step": 11000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997152737523719e-05, + "loss": 2.8608, + "step": 11500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9970288788811106e-05, + "loss": 2.8853, + "step": 12000 + }, + { + "epoch": 0.06, + "learning_rate": 4.996905020238502e-05, + "loss": 2.854, + "step": 12500 + }, + { + "epoch": 0.06, + "learning_rate": 4.996781161595894e-05, + "loss": 2.8447, + "step": 13000 + }, + { + "epoch": 0.07, + "learning_rate": 4.996657550670571e-05, + "loss": 2.8466, + "step": 13500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9965336920279626e-05, + "loss": 2.8537, + "step": 14000 + }, + { + "epoch": 0.07, + "learning_rate": 4.996409833385354e-05, + "loss": 2.8332, + "step": 14500 + }, + { + "epoch": 0.07, + "learning_rate": 4.996285974742746e-05, + "loss": 2.8452, + "step": 15000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996162363817423e-05, + "loss": 2.8432, + "step": 15500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9960385051748146e-05, + "loss": 2.846, + "step": 16000 + }, + { + "epoch": 0.08, + "learning_rate": 4.995914646532206e-05, + "loss": 2.8418, + "step": 16500 + }, + { + "epoch": 0.08, + "learning_rate": 4.995790787889598e-05, + "loss": 2.8401, + "step": 17000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9956669292469896e-05, + "loss": 2.8253, + "step": 17500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9955430706043807e-05, + "loss": 2.8343, + "step": 18000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9954192119617723e-05, + "loss": 2.8231, + "step": 18500 + }, + { + "epoch": 0.09, + "learning_rate": 4.995295353319164e-05, + "loss": 2.7981, + "step": 19000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995171742393841e-05, + "loss": 2.8358, + "step": 19500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9950478837512326e-05, + "loss": 2.825, + "step": 20000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994924025108624e-05, + "loss": 2.7965, + "step": 20500 + }, + { + "epoch": 0.1, + "learning_rate": 4.994800166466016e-05, + "loss": 2.8224, + "step": 21000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9946768032579774e-05, + "loss": 2.8108, + "step": 21500 + }, + { + "epoch": 0.11, + "learning_rate": 4.994552944615369e-05, + "loss": 2.8076, + "step": 22000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994429085972761e-05, + "loss": 2.8013, + "step": 22500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9943052273301525e-05, + "loss": 2.818, + "step": 23000 + }, + { + "epoch": 0.12, + "learning_rate": 4.99418161640483e-05, + "loss": 2.8436, + "step": 23500 + }, + { + "epoch": 0.12, + "learning_rate": 4.994057757762222e-05, + "loss": 2.8163, + "step": 24000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993933899119613e-05, + "loss": 2.7991, + "step": 24500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9938100404770044e-05, + "loss": 2.7893, + "step": 25000 + }, + { + "epoch": 0.13, + "learning_rate": 4.993686181834396e-05, + "loss": 2.7931, + "step": 25500 + }, + { + "epoch": 0.13, + "learning_rate": 4.993562323191788e-05, + "loss": 2.8015, + "step": 26000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9934384645491795e-05, + "loss": 2.7764, + "step": 26500 + }, + { + "epoch": 0.13, + "learning_rate": 4.993314605906571e-05, + "loss": 2.7875, + "step": 27000 + }, + { + "epoch": 0.14, + "learning_rate": 4.993190747263963e-05, + "loss": 2.8176, + "step": 27500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9930668886213546e-05, + "loss": 2.7744, + "step": 28000 + }, + { + "epoch": 0.14, + "learning_rate": 4.992943029978746e-05, + "loss": 2.8011, + "step": 28500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9928194190534225e-05, + "loss": 2.7897, + "step": 29000 + }, + { + "epoch": 0.15, + "learning_rate": 4.992695560410814e-05, + "loss": 2.7773, + "step": 29500 + }, + { + "epoch": 0.15, + "learning_rate": 4.992571701768206e-05, + "loss": 2.7895, + "step": 30000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9924478431255976e-05, + "loss": 2.795, + "step": 30500 + }, + { + "epoch": 0.15, + "learning_rate": 4.992323984482989e-05, + "loss": 2.7796, + "step": 31000 + }, + { + "epoch": 0.16, + "learning_rate": 4.992200125840381e-05, + "loss": 2.7868, + "step": 31500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9920762671977726e-05, + "loss": 2.8024, + "step": 32000 + }, + { + "epoch": 0.16, + "learning_rate": 4.991952408555164e-05, + "loss": 2.7609, + "step": 32500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9918290453471264e-05, + "loss": 2.7815, + "step": 33000 + }, + { + "epoch": 0.17, + "learning_rate": 4.991705186704518e-05, + "loss": 2.7409, + "step": 33500 + }, + { + "epoch": 0.17, + "learning_rate": 4.99158132806191e-05, + "loss": 2.7607, + "step": 34000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9914574694193015e-05, + "loss": 2.7929, + "step": 34500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9913336107766925e-05, + "loss": 2.782, + "step": 35000 + }, + { + "epoch": 0.18, + "learning_rate": 4.99120999985137e-05, + "loss": 2.785, + "step": 35500 + }, + { + "epoch": 0.18, + "learning_rate": 4.991086141208762e-05, + "loss": 2.7697, + "step": 36000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9909622825661534e-05, + "loss": 2.7626, + "step": 36500 + }, + { + "epoch": 0.18, + "learning_rate": 4.99083867164083e-05, + "loss": 2.7679, + "step": 37000 + }, + { + "epoch": 0.19, + "learning_rate": 4.990714812998222e-05, + "loss": 2.7565, + "step": 37500 + }, + { + "epoch": 0.19, + "learning_rate": 4.990590954355614e-05, + "loss": 2.7848, + "step": 38000 + }, + { + "epoch": 0.19, + "learning_rate": 4.99046734343029e-05, + "loss": 2.7555, + "step": 38500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9903434847876816e-05, + "loss": 2.7742, + "step": 39000 + }, + { + "epoch": 0.2, + "learning_rate": 4.990219626145073e-05, + "loss": 2.7821, + "step": 39500 + }, + { + "epoch": 0.2, + "learning_rate": 4.990095767502465e-05, + "loss": 2.7616, + "step": 40000 + }, + { + "epoch": 0.2, + "learning_rate": 4.989971908859857e-05, + "loss": 2.7711, + "step": 40500 + }, + { + "epoch": 0.2, + "learning_rate": 4.9898480502172484e-05, + "loss": 2.7499, + "step": 41000 + }, + { + "epoch": 0.21, + "learning_rate": 4.98972419157464e-05, + "loss": 2.7549, + "step": 41500 + }, + { + "epoch": 0.21, + "learning_rate": 4.989600332932032e-05, + "loss": 2.7782, + "step": 42000 + }, + { + "epoch": 0.21, + "learning_rate": 4.9894764742894234e-05, + "loss": 2.748, + "step": 42500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9893528633641e-05, + "loss": 2.7325, + "step": 43000 + }, + { + "epoch": 0.22, + "learning_rate": 4.989229004721492e-05, + "loss": 2.7488, + "step": 43500 + }, + { + "epoch": 0.22, + "learning_rate": 4.989105146078884e-05, + "loss": 2.7606, + "step": 44000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9889812874362754e-05, + "loss": 2.7445, + "step": 44500 + }, + { + "epoch": 0.22, + "learning_rate": 4.988857428793667e-05, + "loss": 2.76, + "step": 45000 + }, + { + "epoch": 0.23, + "learning_rate": 4.988733570151059e-05, + "loss": 2.7375, + "step": 45500 + }, + { + "epoch": 0.23, + "learning_rate": 4.98860971150845e-05, + "loss": 2.7624, + "step": 46000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9884858528658415e-05, + "loss": 2.7588, + "step": 46500 + }, + { + "epoch": 0.23, + "learning_rate": 4.9883622419405184e-05, + "loss": 2.7508, + "step": 47000 + }, + { + "epoch": 0.24, + "learning_rate": 4.988238631015195e-05, + "loss": 2.7562, + "step": 47500 + }, + { + "epoch": 0.24, + "learning_rate": 4.988114772372587e-05, + "loss": 2.7546, + "step": 48000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9879909137299787e-05, + "loss": 2.7657, + "step": 48500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9878670550873703e-05, + "loss": 2.741, + "step": 49000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9877434441620465e-05, + "loss": 2.7813, + "step": 49500 + }, + { + "epoch": 0.25, + "learning_rate": 4.987619585519438e-05, + "loss": 2.7575, + "step": 50000 + }, + { + "epoch": 0.25, + "learning_rate": 4.98749572687683e-05, + "loss": 2.7419, + "step": 50500 + }, + { + "epoch": 0.25, + "learning_rate": 4.9873718682342216e-05, + "loss": 2.7422, + "step": 51000 + }, + { + "epoch": 0.26, + "learning_rate": 4.987248009591613e-05, + "loss": 2.7333, + "step": 51500 + }, + { + "epoch": 0.26, + "learning_rate": 4.987124150949005e-05, + "loss": 2.743, + "step": 52000 + }, + { + "epoch": 0.26, + "learning_rate": 4.987000540023682e-05, + "loss": 2.7217, + "step": 52500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9868766813810736e-05, + "loss": 2.7355, + "step": 53000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986752822738465e-05, + "loss": 2.7393, + "step": 53500 + }, + { + "epoch": 0.27, + "learning_rate": 4.986628964095857e-05, + "loss": 2.7279, + "step": 54000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986505105453249e-05, + "loss": 2.7535, + "step": 54500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9863812468106404e-05, + "loss": 2.7389, + "step": 55000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986257635885317e-05, + "loss": 2.7488, + "step": 55500 + }, + { + "epoch": 0.28, + "learning_rate": 4.986134024959994e-05, + "loss": 2.7452, + "step": 56000 + }, + { + "epoch": 0.28, + "learning_rate": 4.986010166317385e-05, + "loss": 2.7503, + "step": 56500 + }, + { + "epoch": 0.28, + "learning_rate": 4.985886307674777e-05, + "loss": 2.7213, + "step": 57000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9857624490321685e-05, + "loss": 2.7421, + "step": 57500 + }, + { + "epoch": 0.29, + "learning_rate": 4.98563859038956e-05, + "loss": 2.7295, + "step": 58000 + }, + { + "epoch": 0.29, + "learning_rate": 4.985514731746952e-05, + "loss": 2.7164, + "step": 58500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9853908731043436e-05, + "loss": 2.7262, + "step": 59000 + }, + { + "epoch": 0.29, + "learning_rate": 4.985267014461735e-05, + "loss": 2.734, + "step": 59500 + }, + { + "epoch": 0.3, + "learning_rate": 4.985143155819127e-05, + "loss": 2.7186, + "step": 60000 + }, + { + "epoch": 0.3, + "learning_rate": 4.985019297176519e-05, + "loss": 2.739, + "step": 60500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9848954385339104e-05, + "loss": 2.7356, + "step": 61000 + }, + { + "epoch": 0.3, + "learning_rate": 4.984771579891302e-05, + "loss": 2.7406, + "step": 61500 + }, + { + "epoch": 0.31, + "learning_rate": 4.984647968965979e-05, + "loss": 2.7167, + "step": 62000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9845241103233706e-05, + "loss": 2.6958, + "step": 62500 + }, + { + "epoch": 0.31, + "learning_rate": 4.984400499398047e-05, + "loss": 2.7223, + "step": 63000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9842766407554385e-05, + "loss": 2.7279, + "step": 63500 + }, + { + "epoch": 0.32, + "learning_rate": 4.98415278211283e-05, + "loss": 2.7186, + "step": 64000 + }, + { + "epoch": 0.32, + "learning_rate": 4.984029171187508e-05, + "loss": 2.7359, + "step": 64500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9839053125448995e-05, + "loss": 2.7288, + "step": 65000 + }, + { + "epoch": 0.32, + "learning_rate": 4.983781453902291e-05, + "loss": 2.7157, + "step": 65500 + }, + { + "epoch": 0.33, + "learning_rate": 4.983657595259682e-05, + "loss": 2.7194, + "step": 66000 + }, + { + "epoch": 0.33, + "learning_rate": 4.983533736617074e-05, + "loss": 2.7106, + "step": 66500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9834098779744656e-05, + "loss": 2.7313, + "step": 67000 + }, + { + "epoch": 0.33, + "learning_rate": 4.983286019331857e-05, + "loss": 2.7072, + "step": 67500 + }, + { + "epoch": 0.34, + "learning_rate": 4.983162160689249e-05, + "loss": 2.7392, + "step": 68000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9830383020466406e-05, + "loss": 2.7083, + "step": 68500 + }, + { + "epoch": 0.34, + "learning_rate": 4.982914691121317e-05, + "loss": 2.7407, + "step": 69000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9827908324787085e-05, + "loss": 2.7374, + "step": 69500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9826669738361e-05, + "loss": 2.7151, + "step": 70000 + }, + { + "epoch": 0.35, + "learning_rate": 4.982543115193492e-05, + "loss": 2.7057, + "step": 70500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9824192565508836e-05, + "loss": 2.7337, + "step": 71000 + }, + { + "epoch": 0.35, + "learning_rate": 4.982295397908275e-05, + "loss": 2.7017, + "step": 71500 + }, + { + "epoch": 0.36, + "learning_rate": 4.982171786982952e-05, + "loss": 2.7105, + "step": 72000 + }, + { + "epoch": 0.36, + "learning_rate": 4.982047928340344e-05, + "loss": 2.7399, + "step": 72500 + }, + { + "epoch": 0.36, + "learning_rate": 4.981924565132306e-05, + "loss": 2.707, + "step": 73000 + }, + { + "epoch": 0.36, + "learning_rate": 4.981800954206983e-05, + "loss": 2.7199, + "step": 73500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9816770955643745e-05, + "loss": 2.7107, + "step": 74000 + }, + { + "epoch": 0.37, + "learning_rate": 4.981553236921766e-05, + "loss": 2.7136, + "step": 74500 + }, + { + "epoch": 0.37, + "learning_rate": 4.981429378279158e-05, + "loss": 2.711, + "step": 75000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9813055196365496e-05, + "loss": 2.705, + "step": 75500 + }, + { + "epoch": 0.38, + "learning_rate": 4.981181908711226e-05, + "loss": 2.672, + "step": 76000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9810580500686175e-05, + "loss": 2.7155, + "step": 76500 + }, + { + "epoch": 0.38, + "learning_rate": 4.980934191426009e-05, + "loss": 2.7097, + "step": 77000 + }, + { + "epoch": 0.38, + "learning_rate": 4.980810332783401e-05, + "loss": 2.7019, + "step": 77500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9806864741407926e-05, + "loss": 2.7046, + "step": 78000 + }, + { + "epoch": 0.39, + "learning_rate": 4.980562615498184e-05, + "loss": 2.7231, + "step": 78500 + }, + { + "epoch": 0.39, + "learning_rate": 4.980438756855576e-05, + "loss": 2.7074, + "step": 79000 + }, + { + "epoch": 0.39, + "learning_rate": 4.980314898212968e-05, + "loss": 2.7098, + "step": 79500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9801910395703594e-05, + "loss": 2.7095, + "step": 80000 + }, + { + "epoch": 0.4, + "learning_rate": 4.980067180927751e-05, + "loss": 2.6981, + "step": 80500 + }, + { + "epoch": 0.4, + "learning_rate": 4.979943322285143e-05, + "loss": 2.7144, + "step": 81000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9798194636425344e-05, + "loss": 2.7018, + "step": 81500 + }, + { + "epoch": 0.41, + "learning_rate": 4.979695604999926e-05, + "loss": 2.7057, + "step": 82000 + }, + { + "epoch": 0.41, + "learning_rate": 4.979571746357318e-05, + "loss": 2.6865, + "step": 82500 + }, + { + "epoch": 0.41, + "learning_rate": 4.9794478877147095e-05, + "loss": 2.7394, + "step": 83000 + }, + { + "epoch": 0.41, + "learning_rate": 4.979324029072101e-05, + "loss": 2.7101, + "step": 83500 + }, + { + "epoch": 0.42, + "learning_rate": 4.979200170429493e-05, + "loss": 2.7016, + "step": 84000 + }, + { + "epoch": 0.42, + "learning_rate": 4.979076311786884e-05, + "loss": 2.6752, + "step": 84500 + }, + { + "epoch": 0.42, + "learning_rate": 4.9789524531442756e-05, + "loss": 2.7344, + "step": 85000 + }, + { + "epoch": 0.42, + "learning_rate": 4.978828594501667e-05, + "loss": 2.7173, + "step": 85500 + }, + { + "epoch": 0.43, + "learning_rate": 4.978704735859059e-05, + "loss": 2.7205, + "step": 86000 + }, + { + "epoch": 0.43, + "learning_rate": 4.978581124933736e-05, + "loss": 2.6739, + "step": 86500 + }, + { + "epoch": 0.43, + "learning_rate": 4.9784572662911276e-05, + "loss": 2.7142, + "step": 87000 + }, + { + "epoch": 0.43, + "learning_rate": 4.978333407648519e-05, + "loss": 2.6928, + "step": 87500 + }, + { + "epoch": 0.44, + "learning_rate": 4.97820954900591e-05, + "loss": 2.722, + "step": 88000 + }, + { + "epoch": 0.44, + "learning_rate": 4.978085690363302e-05, + "loss": 2.7314, + "step": 88500 + }, + { + "epoch": 0.44, + "learning_rate": 4.9779620794379795e-05, + "loss": 2.7161, + "step": 89000 + }, + { + "epoch": 0.44, + "learning_rate": 4.977838220795371e-05, + "loss": 2.7042, + "step": 89500 + }, + { + "epoch": 0.45, + "learning_rate": 4.977714362152763e-05, + "loss": 2.7171, + "step": 90000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9775905035101546e-05, + "loss": 2.7061, + "step": 90500 + }, + { + "epoch": 0.45, + "learning_rate": 4.977466892584831e-05, + "loss": 2.6724, + "step": 91000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9773430339422225e-05, + "loss": 2.6936, + "step": 91500 + }, + { + "epoch": 0.46, + "learning_rate": 4.977219175299614e-05, + "loss": 2.709, + "step": 92000 + }, + { + "epoch": 0.46, + "learning_rate": 4.977095316657006e-05, + "loss": 2.7078, + "step": 92500 + }, + { + "epoch": 0.46, + "learning_rate": 4.976971705731683e-05, + "loss": 2.6722, + "step": 93000 + }, + { + "epoch": 0.46, + "learning_rate": 4.9768478470890745e-05, + "loss": 2.6995, + "step": 93500 + }, + { + "epoch": 0.47, + "learning_rate": 4.976723988446466e-05, + "loss": 2.6995, + "step": 94000 + }, + { + "epoch": 0.47, + "learning_rate": 4.976600129803858e-05, + "loss": 2.6942, + "step": 94500 + }, + { + "epoch": 0.47, + "learning_rate": 4.9764762711612495e-05, + "loss": 2.69, + "step": 95000 + }, + { + "epoch": 0.47, + "learning_rate": 4.9763526602359264e-05, + "loss": 2.7196, + "step": 95500 + }, + { + "epoch": 0.48, + "learning_rate": 4.976228801593318e-05, + "loss": 2.6655, + "step": 96000 + }, + { + "epoch": 0.48, + "learning_rate": 4.97610494295071e-05, + "loss": 2.7035, + "step": 96500 + }, + { + "epoch": 0.48, + "learning_rate": 4.9759810843081015e-05, + "loss": 2.7225, + "step": 97000 + }, + { + "epoch": 0.48, + "learning_rate": 4.9758572256654925e-05, + "loss": 2.6971, + "step": 97500 + }, + { + "epoch": 0.49, + "learning_rate": 4.975733367022884e-05, + "loss": 2.7039, + "step": 98000 + }, + { + "epoch": 0.49, + "learning_rate": 4.975609508380276e-05, + "loss": 2.6814, + "step": 98500 + }, + { + "epoch": 0.49, + "learning_rate": 4.9754856497376676e-05, + "loss": 2.7068, + "step": 99000 + }, + { + "epoch": 0.49, + "learning_rate": 4.975361791095059e-05, + "loss": 2.6989, + "step": 99500 + }, + { + "epoch": 0.5, + "learning_rate": 4.975237932452451e-05, + "loss": 2.6904, + "step": 100000 + }, + { + "epoch": 0.5, + "learning_rate": 4.975114073809842e-05, + "loss": 2.6863, + "step": 100500 + }, + { + "epoch": 0.5, + "learning_rate": 4.974990215167234e-05, + "loss": 2.6785, + "step": 101000 + }, + { + "epoch": 0.5, + "learning_rate": 4.974866604241911e-05, + "loss": 2.6996, + "step": 101500 + }, + { + "epoch": 0.51, + "learning_rate": 4.974742993316588e-05, + "loss": 2.7096, + "step": 102000 + }, + { + "epoch": 0.51, + "learning_rate": 4.97461913467398e-05, + "loss": 2.6979, + "step": 102500 + }, + { + "epoch": 0.51, + "learning_rate": 4.9744952760313715e-05, + "loss": 2.6932, + "step": 103000 + }, + { + "epoch": 0.51, + "learning_rate": 4.974371417388763e-05, + "loss": 2.6986, + "step": 103500 + }, + { + "epoch": 0.52, + "learning_rate": 4.974247558746155e-05, + "loss": 2.6888, + "step": 104000 + }, + { + "epoch": 0.52, + "learning_rate": 4.974123700103546e-05, + "loss": 2.6854, + "step": 104500 + }, + { + "epoch": 0.52, + "learning_rate": 4.974000089178223e-05, + "loss": 2.697, + "step": 105000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9738762305356145e-05, + "loss": 2.6931, + "step": 105500 + }, + { + "epoch": 0.53, + "learning_rate": 4.973752371893006e-05, + "loss": 2.7098, + "step": 106000 + }, + { + "epoch": 0.53, + "learning_rate": 4.973628513250398e-05, + "loss": 2.6989, + "step": 106500 + }, + { + "epoch": 0.53, + "learning_rate": 4.9735046546077896e-05, + "loss": 2.7059, + "step": 107000 + }, + { + "epoch": 0.53, + "learning_rate": 4.9733810436824664e-05, + "loss": 2.6612, + "step": 107500 + }, + { + "epoch": 0.54, + "learning_rate": 4.973257185039858e-05, + "loss": 2.6945, + "step": 108000 + }, + { + "epoch": 0.54, + "learning_rate": 4.97313332639725e-05, + "loss": 2.6951, + "step": 108500 + }, + { + "epoch": 0.54, + "learning_rate": 4.9730094677546415e-05, + "loss": 2.6871, + "step": 109000 + }, + { + "epoch": 0.54, + "learning_rate": 4.972885609112033e-05, + "loss": 2.6909, + "step": 109500 + }, + { + "epoch": 0.54, + "learning_rate": 4.972761750469425e-05, + "loss": 2.6948, + "step": 110000 + }, + { + "epoch": 0.55, + "learning_rate": 4.972638139544101e-05, + "loss": 2.7037, + "step": 110500 + }, + { + "epoch": 0.55, + "learning_rate": 4.972514280901493e-05, + "loss": 2.6804, + "step": 111000 + }, + { + "epoch": 0.55, + "learning_rate": 4.9723904222588845e-05, + "loss": 2.6895, + "step": 111500 + }, + { + "epoch": 0.55, + "learning_rate": 4.972266563616276e-05, + "loss": 2.6878, + "step": 112000 + }, + { + "epoch": 0.56, + "learning_rate": 4.972142704973668e-05, + "loss": 2.6916, + "step": 112500 + }, + { + "epoch": 0.56, + "learning_rate": 4.9720188463310596e-05, + "loss": 2.6843, + "step": 113000 + }, + { + "epoch": 0.56, + "learning_rate": 4.9718952354057365e-05, + "loss": 2.6903, + "step": 113500 + }, + { + "epoch": 0.56, + "learning_rate": 4.971771376763128e-05, + "loss": 2.6777, + "step": 114000 + }, + { + "epoch": 0.57, + "learning_rate": 4.97164751812052e-05, + "loss": 2.7029, + "step": 114500 + }, + { + "epoch": 0.57, + "learning_rate": 4.9715236594779115e-05, + "loss": 2.7026, + "step": 115000 + }, + { + "epoch": 0.57, + "learning_rate": 4.971399800835303e-05, + "loss": 2.6807, + "step": 115500 + }, + { + "epoch": 0.57, + "learning_rate": 4.971275942192695e-05, + "loss": 2.6868, + "step": 116000 + }, + { + "epoch": 0.58, + "learning_rate": 4.9711520835500866e-05, + "loss": 2.6976, + "step": 116500 + }, + { + "epoch": 0.58, + "learning_rate": 4.971028472624763e-05, + "loss": 2.6817, + "step": 117000 + }, + { + "epoch": 0.58, + "learning_rate": 4.9709046139821545e-05, + "loss": 2.6925, + "step": 117500 + }, + { + "epoch": 0.58, + "learning_rate": 4.970780755339546e-05, + "loss": 2.6972, + "step": 118000 + }, + { + "epoch": 0.59, + "learning_rate": 4.970656896696938e-05, + "loss": 2.6853, + "step": 118500 + }, + { + "epoch": 0.59, + "learning_rate": 4.9705330380543296e-05, + "loss": 2.6735, + "step": 119000 + }, + { + "epoch": 0.59, + "learning_rate": 4.970409179411721e-05, + "loss": 2.702, + "step": 119500 + }, + { + "epoch": 0.59, + "learning_rate": 4.970285320769113e-05, + "loss": 2.6876, + "step": 120000 + }, + { + "epoch": 0.6, + "learning_rate": 4.970161462126505e-05, + "loss": 2.6829, + "step": 120500 + }, + { + "epoch": 0.6, + "learning_rate": 4.9700378512011815e-05, + "loss": 2.6596, + "step": 121000 + }, + { + "epoch": 0.6, + "learning_rate": 4.969913992558573e-05, + "loss": 2.6907, + "step": 121500 + }, + { + "epoch": 0.6, + "learning_rate": 4.969790133915965e-05, + "loss": 2.6968, + "step": 122000 + }, + { + "epoch": 0.61, + "learning_rate": 4.9696662752733566e-05, + "loss": 2.6804, + "step": 122500 + }, + { + "epoch": 0.61, + "learning_rate": 4.969542664348033e-05, + "loss": 2.6914, + "step": 123000 + }, + { + "epoch": 0.61, + "learning_rate": 4.96941905342271e-05, + "loss": 2.6812, + "step": 123500 + }, + { + "epoch": 0.61, + "learning_rate": 4.9692951947801014e-05, + "loss": 2.6607, + "step": 124000 + }, + { + "epoch": 0.62, + "learning_rate": 4.969171336137493e-05, + "loss": 2.686, + "step": 124500 + }, + { + "epoch": 0.62, + "learning_rate": 4.969047477494885e-05, + "loss": 2.6591, + "step": 125000 + }, + { + "epoch": 0.62, + "learning_rate": 4.9689236188522765e-05, + "loss": 2.6824, + "step": 125500 + }, + { + "epoch": 0.62, + "learning_rate": 4.968799760209668e-05, + "loss": 2.694, + "step": 126000 + }, + { + "epoch": 0.63, + "learning_rate": 4.968676149284345e-05, + "loss": 2.6881, + "step": 126500 + }, + { + "epoch": 0.63, + "learning_rate": 4.968552290641737e-05, + "loss": 2.7259, + "step": 127000 + }, + { + "epoch": 0.63, + "learning_rate": 4.9684284319991284e-05, + "loss": 2.68, + "step": 127500 + }, + { + "epoch": 0.63, + "learning_rate": 4.9683045733565195e-05, + "loss": 2.6683, + "step": 128000 + }, + { + "epoch": 0.64, + "learning_rate": 4.968180714713911e-05, + "loss": 2.6481, + "step": 128500 + }, + { + "epoch": 0.64, + "learning_rate": 4.968056856071303e-05, + "loss": 2.6635, + "step": 129000 + }, + { + "epoch": 0.64, + "learning_rate": 4.96793324514598e-05, + "loss": 2.6736, + "step": 129500 + }, + { + "epoch": 0.64, + "learning_rate": 4.9678093865033714e-05, + "loss": 2.7035, + "step": 130000 + }, + { + "epoch": 0.65, + "learning_rate": 4.967685527860763e-05, + "loss": 2.6893, + "step": 130500 + }, + { + "epoch": 0.65, + "learning_rate": 4.967561669218155e-05, + "loss": 2.6946, + "step": 131000 + }, + { + "epoch": 0.65, + "learning_rate": 4.9674378105755465e-05, + "loss": 2.6781, + "step": 131500 + }, + { + "epoch": 0.65, + "learning_rate": 4.967313951932938e-05, + "loss": 2.6633, + "step": 132000 + }, + { + "epoch": 0.66, + "learning_rate": 4.96719009329033e-05, + "loss": 2.6894, + "step": 132500 + }, + { + "epoch": 0.66, + "learning_rate": 4.9670662346477216e-05, + "loss": 2.6659, + "step": 133000 + }, + { + "epoch": 0.66, + "learning_rate": 4.966942376005113e-05, + "loss": 2.668, + "step": 133500 + }, + { + "epoch": 0.66, + "learning_rate": 4.966818517362505e-05, + "loss": 2.6745, + "step": 134000 + }, + { + "epoch": 0.67, + "learning_rate": 4.9666946587198966e-05, + "loss": 2.6407, + "step": 134500 + }, + { + "epoch": 0.67, + "learning_rate": 4.966571047794573e-05, + "loss": 2.6832, + "step": 135000 + }, + { + "epoch": 0.67, + "learning_rate": 4.96644743686925e-05, + "loss": 2.6761, + "step": 135500 + }, + { + "epoch": 0.67, + "learning_rate": 4.9663235782266414e-05, + "loss": 2.7023, + "step": 136000 + }, + { + "epoch": 0.68, + "learning_rate": 4.966199719584033e-05, + "loss": 2.6568, + "step": 136500 + }, + { + "epoch": 0.68, + "learning_rate": 4.966075860941425e-05, + "loss": 2.6889, + "step": 137000 + }, + { + "epoch": 0.68, + "learning_rate": 4.9659520022988165e-05, + "loss": 2.6811, + "step": 137500 + }, + { + "epoch": 0.68, + "learning_rate": 4.965828391373494e-05, + "loss": 2.6648, + "step": 138000 + }, + { + "epoch": 0.69, + "learning_rate": 4.965704532730885e-05, + "loss": 2.668, + "step": 138500 + }, + { + "epoch": 0.69, + "learning_rate": 4.965580674088277e-05, + "loss": 2.6594, + "step": 139000 + }, + { + "epoch": 0.69, + "learning_rate": 4.9654568154456685e-05, + "loss": 2.6537, + "step": 139500 + }, + { + "epoch": 0.69, + "learning_rate": 4.96533295680306e-05, + "loss": 2.7002, + "step": 140000 + }, + { + "epoch": 0.7, + "learning_rate": 4.965209345877737e-05, + "loss": 2.679, + "step": 140500 + }, + { + "epoch": 0.7, + "learning_rate": 4.965085487235129e-05, + "loss": 2.6813, + "step": 141000 + }, + { + "epoch": 0.7, + "learning_rate": 4.9649618763098056e-05, + "loss": 2.6977, + "step": 141500 + }, + { + "epoch": 0.7, + "learning_rate": 4.964838017667197e-05, + "loss": 2.6734, + "step": 142000 + }, + { + "epoch": 0.71, + "learning_rate": 4.964714159024589e-05, + "loss": 2.6827, + "step": 142500 + }, + { + "epoch": 0.71, + "learning_rate": 4.964590300381981e-05, + "loss": 2.6551, + "step": 143000 + }, + { + "epoch": 0.71, + "learning_rate": 4.9644664417393724e-05, + "loss": 2.6671, + "step": 143500 + }, + { + "epoch": 0.71, + "learning_rate": 4.964342583096764e-05, + "loss": 2.6576, + "step": 144000 + }, + { + "epoch": 0.72, + "learning_rate": 4.96421897217144e-05, + "loss": 2.6725, + "step": 144500 + }, + { + "epoch": 0.72, + "learning_rate": 4.964095113528832e-05, + "loss": 2.6767, + "step": 145000 + }, + { + "epoch": 0.72, + "learning_rate": 4.963971254886224e-05, + "loss": 2.6836, + "step": 145500 + }, + { + "epoch": 0.72, + "learning_rate": 4.9638473962436154e-05, + "loss": 2.6378, + "step": 146000 + }, + { + "epoch": 0.73, + "learning_rate": 4.963723537601007e-05, + "loss": 2.6721, + "step": 146500 + }, + { + "epoch": 0.73, + "learning_rate": 4.963599678958399e-05, + "loss": 2.7062, + "step": 147000 + }, + { + "epoch": 0.73, + "learning_rate": 4.9634758203157904e-05, + "loss": 2.6502, + "step": 147500 + }, + { + "epoch": 0.73, + "learning_rate": 4.9633519616731815e-05, + "loss": 2.6751, + "step": 148000 + }, + { + "epoch": 0.74, + "learning_rate": 4.963228103030573e-05, + "loss": 2.6675, + "step": 148500 + }, + { + "epoch": 0.74, + "learning_rate": 4.963104244387965e-05, + "loss": 2.6572, + "step": 149000 + }, + { + "epoch": 0.74, + "learning_rate": 4.9629803857453565e-05, + "loss": 2.6735, + "step": 149500 + }, + { + "epoch": 0.74, + "learning_rate": 4.962856774820034e-05, + "loss": 2.6621, + "step": 150000 + }, + { + "epoch": 0.75, + "learning_rate": 4.962732916177426e-05, + "loss": 2.6808, + "step": 150500 + }, + { + "epoch": 0.75, + "learning_rate": 4.962609057534817e-05, + "loss": 2.6627, + "step": 151000 + }, + { + "epoch": 0.75, + "learning_rate": 4.9624851988922085e-05, + "loss": 2.6605, + "step": 151500 + }, + { + "epoch": 0.75, + "learning_rate": 4.9623613402496e-05, + "loss": 2.6853, + "step": 152000 + }, + { + "epoch": 0.76, + "learning_rate": 4.962237481606992e-05, + "loss": 2.6536, + "step": 152500 + }, + { + "epoch": 0.76, + "learning_rate": 4.9621136229643836e-05, + "loss": 2.6741, + "step": 153000 + }, + { + "epoch": 0.76, + "learning_rate": 4.961989764321775e-05, + "loss": 2.6637, + "step": 153500 + }, + { + "epoch": 0.76, + "learning_rate": 4.9618666488310225e-05, + "loss": 2.65, + "step": 154000 + }, + { + "epoch": 0.77, + "learning_rate": 4.961742790188414e-05, + "loss": 2.6598, + "step": 154500 + }, + { + "epoch": 0.77, + "learning_rate": 4.961618931545806e-05, + "loss": 2.6615, + "step": 155000 + }, + { + "epoch": 0.77, + "learning_rate": 4.9614950729031976e-05, + "loss": 2.6592, + "step": 155500 + }, + { + "epoch": 0.77, + "learning_rate": 4.9613712142605886e-05, + "loss": 2.6707, + "step": 156000 + }, + { + "epoch": 0.78, + "learning_rate": 4.96124735561798e-05, + "loss": 2.6291, + "step": 156500 + }, + { + "epoch": 0.78, + "learning_rate": 4.961123496975372e-05, + "loss": 2.6811, + "step": 157000 + }, + { + "epoch": 0.78, + "learning_rate": 4.960999638332764e-05, + "loss": 2.6534, + "step": 157500 + }, + { + "epoch": 0.78, + "learning_rate": 4.9608757796901554e-05, + "loss": 2.6623, + "step": 158000 + }, + { + "epoch": 0.79, + "learning_rate": 4.960751921047547e-05, + "loss": 2.6498, + "step": 158500 + }, + { + "epoch": 0.79, + "learning_rate": 4.960628062404939e-05, + "loss": 2.6617, + "step": 159000 + }, + { + "epoch": 0.79, + "learning_rate": 4.9605042037623305e-05, + "loss": 2.6545, + "step": 159500 + }, + { + "epoch": 0.79, + "learning_rate": 4.960380345119722e-05, + "loss": 2.6516, + "step": 160000 + }, + { + "epoch": 0.8, + "learning_rate": 4.960256734194399e-05, + "loss": 2.6722, + "step": 160500 + }, + { + "epoch": 0.8, + "learning_rate": 4.960132875551791e-05, + "loss": 2.6724, + "step": 161000 + }, + { + "epoch": 0.8, + "learning_rate": 4.9600090169091824e-05, + "loss": 2.6304, + "step": 161500 + }, + { + "epoch": 0.8, + "learning_rate": 4.959885158266574e-05, + "loss": 2.6624, + "step": 162000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959761299623966e-05, + "loss": 2.6667, + "step": 162500 + }, + { + "epoch": 0.81, + "learning_rate": 4.9596374409813575e-05, + "loss": 2.6568, + "step": 163000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959513830056034e-05, + "loss": 2.6541, + "step": 163500 + }, + { + "epoch": 0.81, + "learning_rate": 4.9593899714134254e-05, + "loss": 2.6696, + "step": 164000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959266112770817e-05, + "loss": 2.6907, + "step": 164500 + }, + { + "epoch": 0.82, + "learning_rate": 4.959142254128209e-05, + "loss": 2.656, + "step": 165000 + }, + { + "epoch": 0.82, + "learning_rate": 4.9590183954856005e-05, + "loss": 2.6893, + "step": 165500 + }, + { + "epoch": 0.82, + "learning_rate": 4.9588950322775625e-05, + "loss": 2.6747, + "step": 166000 + }, + { + "epoch": 0.82, + "learning_rate": 4.958771173634954e-05, + "loss": 2.6528, + "step": 166500 + }, + { + "epoch": 0.83, + "learning_rate": 4.958647314992346e-05, + "loss": 2.6685, + "step": 167000 + }, + { + "epoch": 0.83, + "learning_rate": 4.9585234563497376e-05, + "loss": 2.6476, + "step": 167500 + }, + { + "epoch": 0.83, + "learning_rate": 4.958399597707129e-05, + "loss": 2.66, + "step": 168000 + }, + { + "epoch": 0.83, + "learning_rate": 4.958275739064521e-05, + "loss": 2.6539, + "step": 168500 + }, + { + "epoch": 0.84, + "learning_rate": 4.958151880421913e-05, + "loss": 2.6832, + "step": 169000 + }, + { + "epoch": 0.84, + "learning_rate": 4.958028021779304e-05, + "loss": 2.6409, + "step": 169500 + }, + { + "epoch": 0.84, + "learning_rate": 4.9579044108539806e-05, + "loss": 2.6694, + "step": 170000 + }, + { + "epoch": 0.84, + "learning_rate": 4.957780552211372e-05, + "loss": 2.6599, + "step": 170500 + }, + { + "epoch": 0.85, + "learning_rate": 4.957656693568764e-05, + "loss": 2.6693, + "step": 171000 + }, + { + "epoch": 0.85, + "learning_rate": 4.957532834926156e-05, + "loss": 2.6476, + "step": 171500 + }, + { + "epoch": 0.85, + "learning_rate": 4.9574089762835474e-05, + "loss": 2.6592, + "step": 172000 + }, + { + "epoch": 0.85, + "learning_rate": 4.957285117640939e-05, + "loss": 2.6631, + "step": 172500 + }, + { + "epoch": 0.86, + "learning_rate": 4.957161258998331e-05, + "loss": 2.6674, + "step": 173000 + }, + { + "epoch": 0.86, + "learning_rate": 4.9570374003557224e-05, + "loss": 2.6603, + "step": 173500 + }, + { + "epoch": 0.86, + "learning_rate": 4.956913541713114e-05, + "loss": 2.6461, + "step": 174000 + }, + { + "epoch": 0.86, + "learning_rate": 4.956789930787791e-05, + "loss": 2.6766, + "step": 174500 + }, + { + "epoch": 0.87, + "learning_rate": 4.956666072145183e-05, + "loss": 2.6398, + "step": 175000 + }, + { + "epoch": 0.87, + "learning_rate": 4.956542461219859e-05, + "loss": 2.6699, + "step": 175500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9564186025772506e-05, + "loss": 2.6702, + "step": 176000 + }, + { + "epoch": 0.87, + "learning_rate": 4.956294743934642e-05, + "loss": 2.6357, + "step": 176500 + }, + { + "epoch": 0.88, + "learning_rate": 4.956170885292034e-05, + "loss": 2.6474, + "step": 177000 + }, + { + "epoch": 0.88, + "learning_rate": 4.956047026649426e-05, + "loss": 2.6496, + "step": 177500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9559231680068174e-05, + "loss": 2.664, + "step": 178000 + }, + { + "epoch": 0.88, + "learning_rate": 4.955799557081494e-05, + "loss": 2.6964, + "step": 178500 + }, + { + "epoch": 0.89, + "learning_rate": 4.955675698438886e-05, + "loss": 2.6707, + "step": 179000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9555518397962776e-05, + "loss": 2.654, + "step": 179500 + }, + { + "epoch": 0.89, + "learning_rate": 4.955427981153669e-05, + "loss": 2.6789, + "step": 180000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9553043702283455e-05, + "loss": 2.6482, + "step": 180500 + }, + { + "epoch": 0.9, + "learning_rate": 4.955180511585737e-05, + "loss": 2.6471, + "step": 181000 + }, + { + "epoch": 0.9, + "learning_rate": 4.955056652943129e-05, + "loss": 2.6439, + "step": 181500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9549327943005206e-05, + "loss": 2.6462, + "step": 182000 + }, + { + "epoch": 0.9, + "learning_rate": 4.954808935657912e-05, + "loss": 2.6688, + "step": 182500 + }, + { + "epoch": 0.91, + "learning_rate": 4.954685077015304e-05, + "loss": 2.6614, + "step": 183000 + }, + { + "epoch": 0.91, + "learning_rate": 4.954561218372696e-05, + "loss": 2.673, + "step": 183500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9544373597300874e-05, + "loss": 2.6636, + "step": 184000 + }, + { + "epoch": 0.91, + "learning_rate": 4.954313748804764e-05, + "loss": 2.6533, + "step": 184500 + }, + { + "epoch": 0.92, + "learning_rate": 4.954189890162156e-05, + "loss": 2.6585, + "step": 185000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9540660315195477e-05, + "loss": 2.6397, + "step": 185500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9539421728769394e-05, + "loss": 2.6475, + "step": 186000 + }, + { + "epoch": 0.92, + "learning_rate": 4.953818314234331e-05, + "loss": 2.658, + "step": 186500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953694455591723e-05, + "loss": 2.6754, + "step": 187000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9535705969491144e-05, + "loss": 2.6488, + "step": 187500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953446738306506e-05, + "loss": 2.6816, + "step": 188000 + }, + { + "epoch": 0.93, + "learning_rate": 4.953322879663898e-05, + "loss": 2.6558, + "step": 188500 + }, + { + "epoch": 0.94, + "learning_rate": 4.953199268738574e-05, + "loss": 2.6288, + "step": 189000 + }, + { + "epoch": 0.94, + "learning_rate": 4.953075410095966e-05, + "loss": 2.6406, + "step": 189500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9529517991706426e-05, + "loss": 2.6695, + "step": 190000 + }, + { + "epoch": 0.94, + "learning_rate": 4.952827940528034e-05, + "loss": 2.6565, + "step": 190500 + }, + { + "epoch": 0.95, + "learning_rate": 4.952704081885426e-05, + "loss": 2.6577, + "step": 191000 + }, + { + "epoch": 0.95, + "learning_rate": 4.952580470960103e-05, + "loss": 2.6499, + "step": 191500 + }, + { + "epoch": 0.95, + "learning_rate": 4.95245686003478e-05, + "loss": 2.6423, + "step": 192000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9523330013921714e-05, + "loss": 2.6735, + "step": 192500 + }, + { + "epoch": 0.96, + "learning_rate": 4.952209142749563e-05, + "loss": 2.6441, + "step": 193000 + }, + { + "epoch": 0.96, + "learning_rate": 4.952085284106955e-05, + "loss": 2.6782, + "step": 193500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9519614254643465e-05, + "loss": 2.6542, + "step": 194000 + }, + { + "epoch": 0.96, + "learning_rate": 4.951837566821738e-05, + "loss": 2.6564, + "step": 194500 + }, + { + "epoch": 0.97, + "learning_rate": 4.95171370817913e-05, + "loss": 2.6682, + "step": 195000 + }, + { + "epoch": 0.97, + "learning_rate": 4.951589849536521e-05, + "loss": 2.6803, + "step": 195500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9514659908939126e-05, + "loss": 2.6768, + "step": 196000 + }, + { + "epoch": 0.97, + "learning_rate": 4.951342132251304e-05, + "loss": 2.652, + "step": 196500 + }, + { + "epoch": 0.98, + "learning_rate": 4.951218273608696e-05, + "loss": 2.6467, + "step": 197000 + }, + { + "epoch": 0.98, + "learning_rate": 4.951094414966088e-05, + "loss": 2.631, + "step": 197500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9509705563234794e-05, + "loss": 2.6418, + "step": 198000 + }, + { + "epoch": 0.98, + "learning_rate": 4.950846697680871e-05, + "loss": 2.6617, + "step": 198500 + }, + { + "epoch": 0.99, + "learning_rate": 4.950722839038263e-05, + "loss": 2.6553, + "step": 199000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9505989803956545e-05, + "loss": 2.6369, + "step": 199500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9504753694703307e-05, + "loss": 2.6282, + "step": 200000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9503515108277224e-05, + "loss": 2.65, + "step": 200500 + }, + { + "epoch": 1.0, + "learning_rate": 4.950227652185114e-05, + "loss": 2.6651, + "step": 201000 + }, + { + "epoch": 1.0, + "learning_rate": 4.950103793542506e-05, + "loss": 2.6763, + "step": 201500 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.6236883143982033, + "eval_accuracy_mlm": 0.5761070598072291, + "eval_accuracy_nsp": 0.8479990900497727, + "eval_loss": 2.567748546600342, + "eval_runtime": 145.8951, + "eval_samples_per_second": 1747.55, + "eval_steps_per_second": 72.819, + "step": 201843 + }, + { + "epoch": 1.0, + "learning_rate": 4.9499801826171826e-05, + "loss": 2.6422, + "step": 202000 + }, + { + "epoch": 1.0, + "learning_rate": 4.949856323974574e-05, + "loss": 2.6314, + "step": 202500 + }, + { + "epoch": 1.01, + "learning_rate": 4.949732465331966e-05, + "loss": 2.6069, + "step": 203000 + }, + { + "epoch": 1.01, + "learning_rate": 4.949608606689358e-05, + "loss": 2.621, + "step": 203500 + }, + { + "epoch": 1.01, + "learning_rate": 4.9494847480467494e-05, + "loss": 2.6315, + "step": 204000 + }, + { + "epoch": 1.01, + "learning_rate": 4.949361137121426e-05, + "loss": 2.6192, + "step": 204500 + }, + { + "epoch": 1.02, + "learning_rate": 4.949237278478818e-05, + "loss": 2.6241, + "step": 205000 + }, + { + "epoch": 1.02, + "learning_rate": 4.949113667553495e-05, + "loss": 2.6318, + "step": 205500 + }, + { + "epoch": 1.02, + "learning_rate": 4.9489898089108865e-05, + "loss": 2.6222, + "step": 206000 + }, + { + "epoch": 1.02, + "learning_rate": 4.948865950268278e-05, + "loss": 2.6478, + "step": 206500 + }, + { + "epoch": 1.03, + "learning_rate": 4.94874209162567e-05, + "loss": 2.6204, + "step": 207000 + }, + { + "epoch": 1.03, + "learning_rate": 4.9486182329830616e-05, + "loss": 2.6316, + "step": 207500 + }, + { + "epoch": 1.03, + "learning_rate": 4.9484946220577385e-05, + "loss": 2.6242, + "step": 208000 + }, + { + "epoch": 1.03, + "learning_rate": 4.94837076341513e-05, + "loss": 2.5915, + "step": 208500 + }, + { + "epoch": 1.04, + "learning_rate": 4.948246904772522e-05, + "loss": 2.6172, + "step": 209000 + }, + { + "epoch": 1.04, + "learning_rate": 4.9481230461299136e-05, + "loss": 2.6155, + "step": 209500 + }, + { + "epoch": 1.04, + "learning_rate": 4.947999187487305e-05, + "loss": 2.6064, + "step": 210000 + }, + { + "epoch": 1.04, + "learning_rate": 4.947875328844697e-05, + "loss": 2.6333, + "step": 210500 + }, + { + "epoch": 1.05, + "learning_rate": 4.947751470202088e-05, + "loss": 2.6295, + "step": 211000 + }, + { + "epoch": 1.05, + "learning_rate": 4.94762761155948e-05, + "loss": 2.6262, + "step": 211500 + }, + { + "epoch": 1.05, + "learning_rate": 4.9475037529168714e-05, + "loss": 2.598, + "step": 212000 + }, + { + "epoch": 1.05, + "learning_rate": 4.947379894274263e-05, + "loss": 2.6367, + "step": 212500 + }, + { + "epoch": 1.06, + "learning_rate": 4.947256035631655e-05, + "loss": 2.6183, + "step": 213000 + }, + { + "epoch": 1.06, + "learning_rate": 4.947132176989046e-05, + "loss": 2.6535, + "step": 213500 + }, + { + "epoch": 1.06, + "learning_rate": 4.9470083183464375e-05, + "loss": 2.6075, + "step": 214000 + }, + { + "epoch": 1.06, + "learning_rate": 4.946884707421114e-05, + "loss": 2.5924, + "step": 214500 + }, + { + "epoch": 1.07, + "learning_rate": 4.946760848778506e-05, + "loss": 2.6268, + "step": 215000 + }, + { + "epoch": 1.07, + "learning_rate": 4.946636990135898e-05, + "loss": 2.6207, + "step": 215500 + }, + { + "epoch": 1.07, + "learning_rate": 4.9465131314932894e-05, + "loss": 2.5849, + "step": 216000 + }, + { + "epoch": 1.07, + "learning_rate": 4.946389520567967e-05, + "loss": 2.6141, + "step": 216500 + }, + { + "epoch": 1.08, + "learning_rate": 4.946265661925359e-05, + "loss": 2.6114, + "step": 217000 + }, + { + "epoch": 1.08, + "learning_rate": 4.94614180328275e-05, + "loss": 2.6018, + "step": 217500 + }, + { + "epoch": 1.08, + "learning_rate": 4.9460179446401414e-05, + "loss": 2.6197, + "step": 218000 + }, + { + "epoch": 1.08, + "learning_rate": 4.945894085997533e-05, + "loss": 2.6252, + "step": 218500 + }, + { + "epoch": 1.09, + "learning_rate": 4.945770227354925e-05, + "loss": 2.6027, + "step": 219000 + }, + { + "epoch": 1.09, + "learning_rate": 4.9456463687123165e-05, + "loss": 2.6315, + "step": 219500 + }, + { + "epoch": 1.09, + "learning_rate": 4.9455225100697075e-05, + "loss": 2.6094, + "step": 220000 + }, + { + "epoch": 1.09, + "learning_rate": 4.9453988991443843e-05, + "loss": 2.6246, + "step": 220500 + }, + { + "epoch": 1.09, + "learning_rate": 4.945275040501776e-05, + "loss": 2.6303, + "step": 221000 + }, + { + "epoch": 1.1, + "learning_rate": 4.945151181859168e-05, + "loss": 2.6081, + "step": 221500 + }, + { + "epoch": 1.1, + "learning_rate": 4.9450273232165594e-05, + "loss": 2.6129, + "step": 222000 + }, + { + "epoch": 1.1, + "learning_rate": 4.944903464573951e-05, + "loss": 2.6229, + "step": 222500 + }, + { + "epoch": 1.1, + "learning_rate": 4.944780101365913e-05, + "loss": 2.594, + "step": 223000 + }, + { + "epoch": 1.11, + "learning_rate": 4.944656242723305e-05, + "loss": 2.621, + "step": 223500 + }, + { + "epoch": 1.11, + "learning_rate": 4.9445323840806966e-05, + "loss": 2.6097, + "step": 224000 + }, + { + "epoch": 1.11, + "learning_rate": 4.944408525438088e-05, + "loss": 2.6152, + "step": 224500 + }, + { + "epoch": 1.11, + "learning_rate": 4.94428466679548e-05, + "loss": 2.6206, + "step": 225000 + }, + { + "epoch": 1.12, + "learning_rate": 4.944161055870157e-05, + "loss": 2.6391, + "step": 225500 + }, + { + "epoch": 1.12, + "learning_rate": 4.9440371972275485e-05, + "loss": 2.6487, + "step": 226000 + }, + { + "epoch": 1.12, + "learning_rate": 4.94391333858494e-05, + "loss": 2.5981, + "step": 226500 + }, + { + "epoch": 1.12, + "learning_rate": 4.943789479942332e-05, + "loss": 2.6175, + "step": 227000 + }, + { + "epoch": 1.13, + "learning_rate": 4.9436656212997236e-05, + "loss": 2.6332, + "step": 227500 + }, + { + "epoch": 1.13, + "learning_rate": 4.9435420103744e-05, + "loss": 2.6234, + "step": 228000 + }, + { + "epoch": 1.13, + "learning_rate": 4.9434181517317915e-05, + "loss": 2.6201, + "step": 228500 + }, + { + "epoch": 1.13, + "learning_rate": 4.943294293089183e-05, + "loss": 2.6235, + "step": 229000 + }, + { + "epoch": 1.14, + "learning_rate": 4.943170434446575e-05, + "loss": 2.6339, + "step": 229500 + }, + { + "epoch": 1.14, + "learning_rate": 4.9430465758039666e-05, + "loss": 2.5977, + "step": 230000 + }, + { + "epoch": 1.14, + "learning_rate": 4.942922717161358e-05, + "loss": 2.612, + "step": 230500 + }, + { + "epoch": 1.14, + "learning_rate": 4.942799106236035e-05, + "loss": 2.6263, + "step": 231000 + }, + { + "epoch": 1.15, + "learning_rate": 4.942675247593427e-05, + "loss": 2.6239, + "step": 231500 + }, + { + "epoch": 1.15, + "learning_rate": 4.9425513889508185e-05, + "loss": 2.6085, + "step": 232000 + }, + { + "epoch": 1.15, + "learning_rate": 4.94242753030821e-05, + "loss": 2.6082, + "step": 232500 + }, + { + "epoch": 1.15, + "learning_rate": 4.942303671665602e-05, + "loss": 2.6257, + "step": 233000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9421798130229936e-05, + "loss": 2.6365, + "step": 233500 + }, + { + "epoch": 1.16, + "learning_rate": 4.942055954380385e-05, + "loss": 2.6226, + "step": 234000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9419323434550615e-05, + "loss": 2.6333, + "step": 234500 + }, + { + "epoch": 1.16, + "learning_rate": 4.941808484812453e-05, + "loss": 2.6214, + "step": 235000 + }, + { + "epoch": 1.17, + "learning_rate": 4.941684626169845e-05, + "loss": 2.614, + "step": 235500 + }, + { + "epoch": 1.17, + "learning_rate": 4.941561015244522e-05, + "loss": 2.5987, + "step": 236000 + }, + { + "epoch": 1.17, + "learning_rate": 4.9414371566019135e-05, + "loss": 2.6155, + "step": 236500 + }, + { + "epoch": 1.17, + "learning_rate": 4.941313297959305e-05, + "loss": 2.6424, + "step": 237000 + }, + { + "epoch": 1.18, + "learning_rate": 4.941189439316697e-05, + "loss": 2.6057, + "step": 237500 + }, + { + "epoch": 1.18, + "learning_rate": 4.9410655806740886e-05, + "loss": 2.6237, + "step": 238000 + }, + { + "epoch": 1.18, + "learning_rate": 4.94094172203148e-05, + "loss": 2.6184, + "step": 238500 + }, + { + "epoch": 1.18, + "learning_rate": 4.940818111106157e-05, + "loss": 2.6616, + "step": 239000 + }, + { + "epoch": 1.19, + "learning_rate": 4.940694252463549e-05, + "loss": 2.6191, + "step": 239500 + }, + { + "epoch": 1.19, + "learning_rate": 4.9405703938209405e-05, + "loss": 2.5849, + "step": 240000 + }, + { + "epoch": 1.19, + "learning_rate": 4.940446535178332e-05, + "loss": 2.6178, + "step": 240500 + }, + { + "epoch": 1.19, + "learning_rate": 4.940322676535723e-05, + "loss": 2.6466, + "step": 241000 + }, + { + "epoch": 1.2, + "learning_rate": 4.940198817893115e-05, + "loss": 2.6025, + "step": 241500 + }, + { + "epoch": 1.2, + "learning_rate": 4.940075206967792e-05, + "loss": 2.5923, + "step": 242000 + }, + { + "epoch": 1.2, + "learning_rate": 4.9399513483251835e-05, + "loss": 2.6381, + "step": 242500 + }, + { + "epoch": 1.2, + "learning_rate": 4.939827489682575e-05, + "loss": 2.5819, + "step": 243000 + }, + { + "epoch": 1.21, + "learning_rate": 4.939703631039967e-05, + "loss": 2.6115, + "step": 243500 + }, + { + "epoch": 1.21, + "learning_rate": 4.9395797723973586e-05, + "loss": 2.6289, + "step": 244000 + }, + { + "epoch": 1.21, + "learning_rate": 4.93945591375475e-05, + "loss": 2.5913, + "step": 244500 + }, + { + "epoch": 1.21, + "learning_rate": 4.939332302829427e-05, + "loss": 2.6194, + "step": 245000 + }, + { + "epoch": 1.22, + "learning_rate": 4.939208444186819e-05, + "loss": 2.6232, + "step": 245500 + }, + { + "epoch": 1.22, + "learning_rate": 4.9390845855442105e-05, + "loss": 2.6176, + "step": 246000 + }, + { + "epoch": 1.22, + "learning_rate": 4.938960726901602e-05, + "loss": 2.6158, + "step": 246500 + }, + { + "epoch": 1.22, + "learning_rate": 4.938836868258994e-05, + "loss": 2.6155, + "step": 247000 + }, + { + "epoch": 1.23, + "learning_rate": 4.9387130096163856e-05, + "loss": 2.6407, + "step": 247500 + }, + { + "epoch": 1.23, + "learning_rate": 4.9385891509737766e-05, + "loss": 2.6334, + "step": 248000 + }, + { + "epoch": 1.23, + "learning_rate": 4.938465292331168e-05, + "loss": 2.62, + "step": 248500 + }, + { + "epoch": 1.23, + "learning_rate": 4.93834143368856e-05, + "loss": 2.6266, + "step": 249000 + }, + { + "epoch": 1.24, + "learning_rate": 4.938217575045952e-05, + "loss": 2.612, + "step": 249500 + }, + { + "epoch": 1.24, + "learning_rate": 4.9380937164033434e-05, + "loss": 2.6358, + "step": 250000 + }, + { + "epoch": 1.24, + "learning_rate": 4.937969857760735e-05, + "loss": 2.6289, + "step": 250500 + }, + { + "epoch": 1.24, + "learning_rate": 4.937846246835412e-05, + "loss": 2.6102, + "step": 251000 + }, + { + "epoch": 1.25, + "learning_rate": 4.9377223881928037e-05, + "loss": 2.6004, + "step": 251500 + }, + { + "epoch": 1.25, + "learning_rate": 4.9375985295501954e-05, + "loss": 2.607, + "step": 252000 + }, + { + "epoch": 1.25, + "learning_rate": 4.937475166342157e-05, + "loss": 2.6224, + "step": 252500 + }, + { + "epoch": 1.25, + "learning_rate": 4.9373513076995484e-05, + "loss": 2.6319, + "step": 253000 + }, + { + "epoch": 1.26, + "learning_rate": 4.93722744905694e-05, + "loss": 2.6201, + "step": 253500 + }, + { + "epoch": 1.26, + "learning_rate": 4.937103590414332e-05, + "loss": 2.605, + "step": 254000 + }, + { + "epoch": 1.26, + "learning_rate": 4.9369797317717235e-05, + "loss": 2.6106, + "step": 254500 + }, + { + "epoch": 1.26, + "learning_rate": 4.936855873129115e-05, + "loss": 2.5989, + "step": 255000 + }, + { + "epoch": 1.27, + "learning_rate": 4.936732014486507e-05, + "loss": 2.6324, + "step": 255500 + }, + { + "epoch": 1.27, + "learning_rate": 4.9366081558438986e-05, + "loss": 2.615, + "step": 256000 + }, + { + "epoch": 1.27, + "learning_rate": 4.9364845449185755e-05, + "loss": 2.6136, + "step": 256500 + }, + { + "epoch": 1.27, + "learning_rate": 4.936360686275967e-05, + "loss": 2.6346, + "step": 257000 + }, + { + "epoch": 1.28, + "learning_rate": 4.936236827633359e-05, + "loss": 2.6109, + "step": 257500 + }, + { + "epoch": 1.28, + "learning_rate": 4.9361129689907506e-05, + "loss": 2.629, + "step": 258000 + }, + { + "epoch": 1.28, + "learning_rate": 4.935989110348142e-05, + "loss": 2.6382, + "step": 258500 + }, + { + "epoch": 1.28, + "learning_rate": 4.935865251705534e-05, + "loss": 2.652, + "step": 259000 + }, + { + "epoch": 1.29, + "learning_rate": 4.9357413930629256e-05, + "loss": 2.5949, + "step": 259500 + }, + { + "epoch": 1.29, + "learning_rate": 4.935617534420317e-05, + "loss": 2.6228, + "step": 260000 + }, + { + "epoch": 1.29, + "learning_rate": 4.935493675777709e-05, + "loss": 2.5946, + "step": 260500 + }, + { + "epoch": 1.29, + "learning_rate": 4.935369817135101e-05, + "loss": 2.6195, + "step": 261000 + }, + { + "epoch": 1.3, + "learning_rate": 4.935245958492492e-05, + "loss": 2.6184, + "step": 261500 + }, + { + "epoch": 1.3, + "learning_rate": 4.9351223475671686e-05, + "loss": 2.6138, + "step": 262000 + }, + { + "epoch": 1.3, + "learning_rate": 4.93499848892456e-05, + "loss": 2.5975, + "step": 262500 + }, + { + "epoch": 1.3, + "learning_rate": 4.934874630281952e-05, + "loss": 2.6182, + "step": 263000 + }, + { + "epoch": 1.31, + "learning_rate": 4.934750771639344e-05, + "loss": 2.6013, + "step": 263500 + }, + { + "epoch": 1.31, + "learning_rate": 4.9346269129967354e-05, + "loss": 2.6424, + "step": 264000 + }, + { + "epoch": 1.31, + "learning_rate": 4.934503302071412e-05, + "loss": 2.6397, + "step": 264500 + }, + { + "epoch": 1.31, + "learning_rate": 4.934379443428804e-05, + "loss": 2.6183, + "step": 265000 + }, + { + "epoch": 1.32, + "learning_rate": 4.9342555847861956e-05, + "loss": 2.6236, + "step": 265500 + }, + { + "epoch": 1.32, + "learning_rate": 4.934131726143587e-05, + "loss": 2.6169, + "step": 266000 + }, + { + "epoch": 1.32, + "learning_rate": 4.934007867500979e-05, + "loss": 2.6104, + "step": 266500 + }, + { + "epoch": 1.32, + "learning_rate": 4.933884008858371e-05, + "loss": 2.6056, + "step": 267000 + }, + { + "epoch": 1.33, + "learning_rate": 4.9337601502157624e-05, + "loss": 2.6044, + "step": 267500 + }, + { + "epoch": 1.33, + "learning_rate": 4.9336362915731534e-05, + "loss": 2.5988, + "step": 268000 + }, + { + "epoch": 1.33, + "learning_rate": 4.933512432930545e-05, + "loss": 2.64, + "step": 268500 + }, + { + "epoch": 1.33, + "learning_rate": 4.933388822005222e-05, + "loss": 2.6459, + "step": 269000 + }, + { + "epoch": 1.34, + "learning_rate": 4.933264963362614e-05, + "loss": 2.6069, + "step": 269500 + }, + { + "epoch": 1.34, + "learning_rate": 4.9331413524372906e-05, + "loss": 2.6332, + "step": 270000 + }, + { + "epoch": 1.34, + "learning_rate": 4.933017493794682e-05, + "loss": 2.6211, + "step": 270500 + }, + { + "epoch": 1.34, + "learning_rate": 4.932893635152074e-05, + "loss": 2.637, + "step": 271000 + }, + { + "epoch": 1.35, + "learning_rate": 4.9327697765094657e-05, + "loss": 2.6399, + "step": 271500 + }, + { + "epoch": 1.35, + "learning_rate": 4.9326459178668573e-05, + "loss": 2.6234, + "step": 272000 + }, + { + "epoch": 1.35, + "learning_rate": 4.9325223069415336e-05, + "loss": 2.6221, + "step": 272500 + }, + { + "epoch": 1.35, + "learning_rate": 4.932398448298925e-05, + "loss": 2.6133, + "step": 273000 + }, + { + "epoch": 1.36, + "learning_rate": 4.932274589656317e-05, + "loss": 2.614, + "step": 273500 + }, + { + "epoch": 1.36, + "learning_rate": 4.9321507310137086e-05, + "loss": 2.6026, + "step": 274000 + }, + { + "epoch": 1.36, + "learning_rate": 4.9320268723711e-05, + "loss": 2.5999, + "step": 274500 + }, + { + "epoch": 1.36, + "learning_rate": 4.931903013728492e-05, + "loss": 2.6374, + "step": 275000 + }, + { + "epoch": 1.36, + "learning_rate": 4.931779155085884e-05, + "loss": 2.6182, + "step": 275500 + }, + { + "epoch": 1.37, + "learning_rate": 4.9316552964432754e-05, + "loss": 2.6048, + "step": 276000 + }, + { + "epoch": 1.37, + "learning_rate": 4.931531685517952e-05, + "loss": 2.6308, + "step": 276500 + }, + { + "epoch": 1.37, + "learning_rate": 4.931408074592629e-05, + "loss": 2.6062, + "step": 277000 + }, + { + "epoch": 1.37, + "learning_rate": 4.931284215950021e-05, + "loss": 2.6164, + "step": 277500 + }, + { + "epoch": 1.38, + "learning_rate": 4.9311603573074126e-05, + "loss": 2.6278, + "step": 278000 + }, + { + "epoch": 1.38, + "learning_rate": 4.9310364986648036e-05, + "loss": 2.6067, + "step": 278500 + }, + { + "epoch": 1.38, + "learning_rate": 4.930912640022195e-05, + "loss": 2.6246, + "step": 279000 + }, + { + "epoch": 1.38, + "learning_rate": 4.930788781379587e-05, + "loss": 2.6262, + "step": 279500 + }, + { + "epoch": 1.39, + "learning_rate": 4.9306649227369786e-05, + "loss": 2.6116, + "step": 280000 + }, + { + "epoch": 1.39, + "learning_rate": 4.93054106409437e-05, + "loss": 2.6325, + "step": 280500 + }, + { + "epoch": 1.39, + "learning_rate": 4.930417453169047e-05, + "loss": 2.5979, + "step": 281000 + }, + { + "epoch": 1.39, + "learning_rate": 4.930293594526439e-05, + "loss": 2.6106, + "step": 281500 + }, + { + "epoch": 1.4, + "learning_rate": 4.9301699836011165e-05, + "loss": 2.6094, + "step": 282000 + }, + { + "epoch": 1.4, + "learning_rate": 4.9300461249585075e-05, + "loss": 2.6065, + "step": 282500 + }, + { + "epoch": 1.4, + "learning_rate": 4.929922266315899e-05, + "loss": 2.5977, + "step": 283000 + }, + { + "epoch": 1.4, + "learning_rate": 4.929798407673291e-05, + "loss": 2.6309, + "step": 283500 + }, + { + "epoch": 1.41, + "learning_rate": 4.9296745490306826e-05, + "loss": 2.6134, + "step": 284000 + }, + { + "epoch": 1.41, + "learning_rate": 4.929550690388074e-05, + "loss": 2.6412, + "step": 284500 + }, + { + "epoch": 1.41, + "learning_rate": 4.929426831745465e-05, + "loss": 2.6118, + "step": 285000 + }, + { + "epoch": 1.41, + "learning_rate": 4.929302973102857e-05, + "loss": 2.5835, + "step": 285500 + }, + { + "epoch": 1.42, + "learning_rate": 4.9291791144602487e-05, + "loss": 2.5993, + "step": 286000 + }, + { + "epoch": 1.42, + "learning_rate": 4.9290555035349255e-05, + "loss": 2.614, + "step": 286500 + }, + { + "epoch": 1.42, + "learning_rate": 4.928931892609603e-05, + "loss": 2.5988, + "step": 287000 + }, + { + "epoch": 1.42, + "learning_rate": 4.928808033966995e-05, + "loss": 2.6119, + "step": 287500 + }, + { + "epoch": 1.43, + "learning_rate": 4.9286841753243865e-05, + "loss": 2.6121, + "step": 288000 + }, + { + "epoch": 1.43, + "learning_rate": 4.928560316681778e-05, + "loss": 2.6205, + "step": 288500 + }, + { + "epoch": 1.43, + "learning_rate": 4.92843645803917e-05, + "loss": 2.6136, + "step": 289000 + }, + { + "epoch": 1.43, + "learning_rate": 4.928312847113846e-05, + "loss": 2.6125, + "step": 289500 + }, + { + "epoch": 1.44, + "learning_rate": 4.928188988471238e-05, + "loss": 2.6199, + "step": 290000 + }, + { + "epoch": 1.44, + "learning_rate": 4.9280651298286295e-05, + "loss": 2.612, + "step": 290500 + }, + { + "epoch": 1.44, + "learning_rate": 4.927941271186021e-05, + "loss": 2.5957, + "step": 291000 + }, + { + "epoch": 1.44, + "learning_rate": 4.927817412543413e-05, + "loss": 2.5938, + "step": 291500 + }, + { + "epoch": 1.45, + "learning_rate": 4.9276935539008045e-05, + "loss": 2.6138, + "step": 292000 + }, + { + "epoch": 1.45, + "learning_rate": 4.9275699429754814e-05, + "loss": 2.6162, + "step": 292500 + }, + { + "epoch": 1.45, + "learning_rate": 4.927446084332873e-05, + "loss": 2.6214, + "step": 293000 + }, + { + "epoch": 1.45, + "learning_rate": 4.927322225690265e-05, + "loss": 2.6009, + "step": 293500 + }, + { + "epoch": 1.46, + "learning_rate": 4.9271983670476565e-05, + "loss": 2.6031, + "step": 294000 + }, + { + "epoch": 1.46, + "learning_rate": 4.927074508405048e-05, + "loss": 2.5915, + "step": 294500 + }, + { + "epoch": 1.46, + "learning_rate": 4.92695064976244e-05, + "loss": 2.6289, + "step": 295000 + }, + { + "epoch": 1.46, + "learning_rate": 4.9268267911198316e-05, + "loss": 2.6141, + "step": 295500 + }, + { + "epoch": 1.47, + "learning_rate": 4.9267029324772226e-05, + "loss": 2.6233, + "step": 296000 + }, + { + "epoch": 1.47, + "learning_rate": 4.926579073834614e-05, + "loss": 2.6347, + "step": 296500 + }, + { + "epoch": 1.47, + "learning_rate": 4.926455215192006e-05, + "loss": 2.619, + "step": 297000 + }, + { + "epoch": 1.47, + "learning_rate": 4.926331604266683e-05, + "loss": 2.5973, + "step": 297500 + }, + { + "epoch": 1.48, + "learning_rate": 4.9262077456240745e-05, + "loss": 2.5999, + "step": 298000 + }, + { + "epoch": 1.48, + "learning_rate": 4.926083886981466e-05, + "loss": 2.6199, + "step": 298500 + }, + { + "epoch": 1.48, + "learning_rate": 4.925960028338857e-05, + "loss": 2.6028, + "step": 299000 + }, + { + "epoch": 1.48, + "learning_rate": 4.925836417413535e-05, + "loss": 2.5981, + "step": 299500 + }, + { + "epoch": 1.49, + "learning_rate": 4.925712806488211e-05, + "loss": 2.6159, + "step": 300000 + }, + { + "epoch": 1.49, + "learning_rate": 4.925588947845603e-05, + "loss": 2.6048, + "step": 300500 + }, + { + "epoch": 1.49, + "learning_rate": 4.9254650892029944e-05, + "loss": 2.5982, + "step": 301000 + }, + { + "epoch": 1.49, + "learning_rate": 4.925341230560386e-05, + "loss": 2.6199, + "step": 301500 + }, + { + "epoch": 1.5, + "learning_rate": 4.925217619635063e-05, + "loss": 2.6351, + "step": 302000 + }, + { + "epoch": 1.5, + "learning_rate": 4.925093760992455e-05, + "loss": 2.6151, + "step": 302500 + }, + { + "epoch": 1.5, + "learning_rate": 4.9249701500671315e-05, + "loss": 2.5925, + "step": 303000 + }, + { + "epoch": 1.5, + "learning_rate": 4.924846291424523e-05, + "loss": 2.5977, + "step": 303500 + }, + { + "epoch": 1.51, + "learning_rate": 4.924722432781915e-05, + "loss": 2.6201, + "step": 304000 + }, + { + "epoch": 1.51, + "learning_rate": 4.9245985741393066e-05, + "loss": 2.6132, + "step": 304500 + }, + { + "epoch": 1.51, + "learning_rate": 4.924474715496698e-05, + "loss": 2.6258, + "step": 305000 + }, + { + "epoch": 1.51, + "learning_rate": 4.92435085685409e-05, + "loss": 2.6223, + "step": 305500 + }, + { + "epoch": 1.52, + "learning_rate": 4.924226998211482e-05, + "loss": 2.6127, + "step": 306000 + }, + { + "epoch": 1.52, + "learning_rate": 4.924103139568873e-05, + "loss": 2.6071, + "step": 306500 + }, + { + "epoch": 1.52, + "learning_rate": 4.9239795286435496e-05, + "loss": 2.6147, + "step": 307000 + }, + { + "epoch": 1.52, + "learning_rate": 4.923855670000941e-05, + "loss": 2.6054, + "step": 307500 + }, + { + "epoch": 1.53, + "learning_rate": 4.923731811358333e-05, + "loss": 2.597, + "step": 308000 + }, + { + "epoch": 1.53, + "learning_rate": 4.9236082004330105e-05, + "loss": 2.599, + "step": 308500 + }, + { + "epoch": 1.53, + "learning_rate": 4.9234843417904016e-05, + "loss": 2.5992, + "step": 309000 + }, + { + "epoch": 1.53, + "learning_rate": 4.923360483147793e-05, + "loss": 2.6026, + "step": 309500 + }, + { + "epoch": 1.54, + "learning_rate": 4.923236624505185e-05, + "loss": 2.5938, + "step": 310000 + }, + { + "epoch": 1.54, + "learning_rate": 4.9231127658625766e-05, + "loss": 2.6023, + "step": 310500 + }, + { + "epoch": 1.54, + "learning_rate": 4.922988907219968e-05, + "loss": 2.586, + "step": 311000 + }, + { + "epoch": 1.54, + "learning_rate": 4.92286504857736e-05, + "loss": 2.6111, + "step": 311500 + }, + { + "epoch": 1.55, + "learning_rate": 4.922741189934752e-05, + "loss": 2.6103, + "step": 312000 + }, + { + "epoch": 1.55, + "learning_rate": 4.922617579009428e-05, + "loss": 2.598, + "step": 312500 + }, + { + "epoch": 1.55, + "learning_rate": 4.9224937203668196e-05, + "loss": 2.5982, + "step": 313000 + }, + { + "epoch": 1.55, + "learning_rate": 4.922369861724211e-05, + "loss": 2.6226, + "step": 313500 + }, + { + "epoch": 1.56, + "learning_rate": 4.922246250798889e-05, + "loss": 2.6292, + "step": 314000 + }, + { + "epoch": 1.56, + "learning_rate": 4.9221223921562806e-05, + "loss": 2.6045, + "step": 314500 + }, + { + "epoch": 1.56, + "learning_rate": 4.921998533513672e-05, + "loss": 2.596, + "step": 315000 + }, + { + "epoch": 1.56, + "learning_rate": 4.921874674871063e-05, + "loss": 2.5956, + "step": 315500 + }, + { + "epoch": 1.57, + "learning_rate": 4.921750816228455e-05, + "loss": 2.5923, + "step": 316000 + }, + { + "epoch": 1.57, + "learning_rate": 4.9216269575858467e-05, + "loss": 2.5815, + "step": 316500 + }, + { + "epoch": 1.57, + "learning_rate": 4.9215030989432383e-05, + "loss": 2.6302, + "step": 317000 + }, + { + "epoch": 1.57, + "learning_rate": 4.92137924030063e-05, + "loss": 2.5967, + "step": 317500 + }, + { + "epoch": 1.58, + "learning_rate": 4.921255381658022e-05, + "loss": 2.611, + "step": 318000 + }, + { + "epoch": 1.58, + "learning_rate": 4.9211315230154134e-05, + "loss": 2.5912, + "step": 318500 + }, + { + "epoch": 1.58, + "learning_rate": 4.921007664372805e-05, + "loss": 2.6341, + "step": 319000 + }, + { + "epoch": 1.58, + "learning_rate": 4.920883805730197e-05, + "loss": 2.5869, + "step": 319500 + }, + { + "epoch": 1.59, + "learning_rate": 4.920759947087588e-05, + "loss": 2.648, + "step": 320000 + }, + { + "epoch": 1.59, + "learning_rate": 4.9206360884449795e-05, + "loss": 2.6136, + "step": 320500 + }, + { + "epoch": 1.59, + "learning_rate": 4.9205124775196564e-05, + "loss": 2.5998, + "step": 321000 + }, + { + "epoch": 1.59, + "learning_rate": 4.920388618877048e-05, + "loss": 2.5955, + "step": 321500 + }, + { + "epoch": 1.6, + "learning_rate": 4.920265007951725e-05, + "loss": 2.5852, + "step": 322000 + }, + { + "epoch": 1.6, + "learning_rate": 4.920141149309117e-05, + "loss": 2.6069, + "step": 322500 + }, + { + "epoch": 1.6, + "learning_rate": 4.9200172906665084e-05, + "loss": 2.5923, + "step": 323000 + }, + { + "epoch": 1.6, + "learning_rate": 4.9198934320239e-05, + "loss": 2.6257, + "step": 323500 + }, + { + "epoch": 1.61, + "learning_rate": 4.919769573381292e-05, + "loss": 2.6118, + "step": 324000 + }, + { + "epoch": 1.61, + "learning_rate": 4.9196459624559686e-05, + "loss": 2.6192, + "step": 324500 + }, + { + "epoch": 1.61, + "learning_rate": 4.9195221038133596e-05, + "loss": 2.6028, + "step": 325000 + }, + { + "epoch": 1.61, + "learning_rate": 4.919398245170751e-05, + "loss": 2.6064, + "step": 325500 + }, + { + "epoch": 1.62, + "learning_rate": 4.919274386528143e-05, + "loss": 2.6158, + "step": 326000 + }, + { + "epoch": 1.62, + "learning_rate": 4.919150527885535e-05, + "loss": 2.5847, + "step": 326500 + }, + { + "epoch": 1.62, + "learning_rate": 4.9190266692429264e-05, + "loss": 2.5866, + "step": 327000 + }, + { + "epoch": 1.62, + "learning_rate": 4.918902810600318e-05, + "loss": 2.6172, + "step": 327500 + }, + { + "epoch": 1.63, + "learning_rate": 4.91877895195771e-05, + "loss": 2.6314, + "step": 328000 + }, + { + "epoch": 1.63, + "learning_rate": 4.918655341032387e-05, + "loss": 2.6196, + "step": 328500 + }, + { + "epoch": 1.63, + "learning_rate": 4.9185317301070636e-05, + "loss": 2.6346, + "step": 329000 + }, + { + "epoch": 1.63, + "learning_rate": 4.918407871464455e-05, + "loss": 2.6118, + "step": 329500 + }, + { + "epoch": 1.63, + "learning_rate": 4.918284012821847e-05, + "loss": 2.5716, + "step": 330000 + }, + { + "epoch": 1.64, + "learning_rate": 4.9181601541792386e-05, + "loss": 2.626, + "step": 330500 + }, + { + "epoch": 1.64, + "learning_rate": 4.9180362955366297e-05, + "loss": 2.6039, + "step": 331000 + }, + { + "epoch": 1.64, + "learning_rate": 4.9179124368940213e-05, + "loss": 2.5957, + "step": 331500 + }, + { + "epoch": 1.64, + "learning_rate": 4.917788825968699e-05, + "loss": 2.6115, + "step": 332000 + }, + { + "epoch": 1.65, + "learning_rate": 4.9176649673260906e-05, + "loss": 2.6013, + "step": 332500 + }, + { + "epoch": 1.65, + "learning_rate": 4.917541108683482e-05, + "loss": 2.6131, + "step": 333000 + }, + { + "epoch": 1.65, + "learning_rate": 4.917417250040874e-05, + "loss": 2.5955, + "step": 333500 + }, + { + "epoch": 1.65, + "learning_rate": 4.917293391398265e-05, + "loss": 2.615, + "step": 334000 + }, + { + "epoch": 1.66, + "learning_rate": 4.917169780472942e-05, + "loss": 2.5883, + "step": 334500 + }, + { + "epoch": 1.66, + "learning_rate": 4.9170459218303336e-05, + "loss": 2.6009, + "step": 335000 + }, + { + "epoch": 1.66, + "learning_rate": 4.916922063187725e-05, + "loss": 2.6009, + "step": 335500 + }, + { + "epoch": 1.66, + "learning_rate": 4.916798204545117e-05, + "loss": 2.6285, + "step": 336000 + }, + { + "epoch": 1.67, + "learning_rate": 4.916674593619794e-05, + "loss": 2.6007, + "step": 336500 + }, + { + "epoch": 1.67, + "learning_rate": 4.9165507349771855e-05, + "loss": 2.5952, + "step": 337000 + }, + { + "epoch": 1.67, + "learning_rate": 4.916426876334577e-05, + "loss": 2.5738, + "step": 337500 + }, + { + "epoch": 1.67, + "learning_rate": 4.916303017691969e-05, + "loss": 2.6101, + "step": 338000 + }, + { + "epoch": 1.68, + "learning_rate": 4.916179406766646e-05, + "loss": 2.5811, + "step": 338500 + }, + { + "epoch": 1.68, + "learning_rate": 4.9160555481240375e-05, + "loss": 2.5833, + "step": 339000 + }, + { + "epoch": 1.68, + "learning_rate": 4.915931689481429e-05, + "loss": 2.6013, + "step": 339500 + }, + { + "epoch": 1.68, + "learning_rate": 4.915807830838821e-05, + "loss": 2.6163, + "step": 340000 + }, + { + "epoch": 1.69, + "learning_rate": 4.9156839721962126e-05, + "loss": 2.5969, + "step": 340500 + }, + { + "epoch": 1.69, + "learning_rate": 4.9155601135536036e-05, + "loss": 2.5872, + "step": 341000 + }, + { + "epoch": 1.69, + "learning_rate": 4.9154365026282805e-05, + "loss": 2.5693, + "step": 341500 + }, + { + "epoch": 1.69, + "learning_rate": 4.915312643985672e-05, + "loss": 2.5994, + "step": 342000 + }, + { + "epoch": 1.7, + "learning_rate": 4.915188785343064e-05, + "loss": 2.6187, + "step": 342500 + }, + { + "epoch": 1.7, + "learning_rate": 4.9150649267004555e-05, + "loss": 2.5982, + "step": 343000 + }, + { + "epoch": 1.7, + "learning_rate": 4.914941068057847e-05, + "loss": 2.5968, + "step": 343500 + }, + { + "epoch": 1.7, + "learning_rate": 4.914817209415239e-05, + "loss": 2.5756, + "step": 344000 + }, + { + "epoch": 1.71, + "learning_rate": 4.9146933507726306e-05, + "loss": 2.5962, + "step": 344500 + }, + { + "epoch": 1.71, + "learning_rate": 4.914569492130022e-05, + "loss": 2.6133, + "step": 345000 + }, + { + "epoch": 1.71, + "learning_rate": 4.914445881204699e-05, + "loss": 2.6052, + "step": 345500 + }, + { + "epoch": 1.71, + "learning_rate": 4.914322022562091e-05, + "loss": 2.6143, + "step": 346000 + }, + { + "epoch": 1.72, + "learning_rate": 4.9141981639194826e-05, + "loss": 2.6077, + "step": 346500 + }, + { + "epoch": 1.72, + "learning_rate": 4.914074552994159e-05, + "loss": 2.6198, + "step": 347000 + }, + { + "epoch": 1.72, + "learning_rate": 4.9139506943515505e-05, + "loss": 2.6435, + "step": 347500 + }, + { + "epoch": 1.72, + "learning_rate": 4.913826835708942e-05, + "loss": 2.6145, + "step": 348000 + }, + { + "epoch": 1.73, + "learning_rate": 4.913702977066334e-05, + "loss": 2.5931, + "step": 348500 + }, + { + "epoch": 1.73, + "learning_rate": 4.9135791184237256e-05, + "loss": 2.6031, + "step": 349000 + }, + { + "epoch": 1.73, + "learning_rate": 4.913455259781117e-05, + "loss": 2.599, + "step": 349500 + }, + { + "epoch": 1.73, + "learning_rate": 4.913331401138509e-05, + "loss": 2.6163, + "step": 350000 + }, + { + "epoch": 1.74, + "learning_rate": 4.9132075424959006e-05, + "loss": 2.6104, + "step": 350500 + }, + { + "epoch": 1.74, + "learning_rate": 4.913083683853292e-05, + "loss": 2.6251, + "step": 351000 + }, + { + "epoch": 1.74, + "learning_rate": 4.912959825210684e-05, + "loss": 2.6044, + "step": 351500 + }, + { + "epoch": 1.74, + "learning_rate": 4.912835966568076e-05, + "loss": 2.6131, + "step": 352000 + }, + { + "epoch": 1.75, + "learning_rate": 4.9127121079254674e-05, + "loss": 2.5796, + "step": 352500 + }, + { + "epoch": 1.75, + "learning_rate": 4.9125882492828584e-05, + "loss": 2.6012, + "step": 353000 + }, + { + "epoch": 1.75, + "learning_rate": 4.912464638357536e-05, + "loss": 2.6166, + "step": 353500 + }, + { + "epoch": 1.75, + "learning_rate": 4.912340779714928e-05, + "loss": 2.6058, + "step": 354000 + }, + { + "epoch": 1.76, + "learning_rate": 4.912216921072319e-05, + "loss": 2.6076, + "step": 354500 + }, + { + "epoch": 1.76, + "learning_rate": 4.9120930624297104e-05, + "loss": 2.5742, + "step": 355000 + }, + { + "epoch": 1.76, + "learning_rate": 4.911969203787102e-05, + "loss": 2.5899, + "step": 355500 + }, + { + "epoch": 1.76, + "learning_rate": 4.911845592861779e-05, + "loss": 2.6106, + "step": 356000 + }, + { + "epoch": 1.77, + "learning_rate": 4.9117217342191706e-05, + "loss": 2.6205, + "step": 356500 + }, + { + "epoch": 1.77, + "learning_rate": 4.9115978755765623e-05, + "loss": 2.6254, + "step": 357000 + }, + { + "epoch": 1.77, + "learning_rate": 4.911474016933954e-05, + "loss": 2.6239, + "step": 357500 + }, + { + "epoch": 1.77, + "learning_rate": 4.911350158291346e-05, + "loss": 2.6001, + "step": 358000 + }, + { + "epoch": 1.78, + "learning_rate": 4.9112262996487374e-05, + "loss": 2.5861, + "step": 358500 + }, + { + "epoch": 1.78, + "learning_rate": 4.911102688723414e-05, + "loss": 2.6104, + "step": 359000 + }, + { + "epoch": 1.78, + "learning_rate": 4.910978830080806e-05, + "loss": 2.5978, + "step": 359500 + }, + { + "epoch": 1.78, + "learning_rate": 4.910854971438198e-05, + "loss": 2.5799, + "step": 360000 + }, + { + "epoch": 1.79, + "learning_rate": 4.910731360512874e-05, + "loss": 2.6136, + "step": 360500 + }, + { + "epoch": 1.79, + "learning_rate": 4.9106075018702656e-05, + "loss": 2.6105, + "step": 361000 + }, + { + "epoch": 1.79, + "learning_rate": 4.910483643227657e-05, + "loss": 2.5947, + "step": 361500 + }, + { + "epoch": 1.79, + "learning_rate": 4.910359784585049e-05, + "loss": 2.5731, + "step": 362000 + }, + { + "epoch": 1.8, + "learning_rate": 4.910236421377011e-05, + "loss": 2.6102, + "step": 362500 + }, + { + "epoch": 1.8, + "learning_rate": 4.910112562734403e-05, + "loss": 2.6048, + "step": 363000 + }, + { + "epoch": 1.8, + "learning_rate": 4.9099887040917944e-05, + "loss": 2.6181, + "step": 363500 + }, + { + "epoch": 1.8, + "learning_rate": 4.909864845449186e-05, + "loss": 2.5967, + "step": 364000 + }, + { + "epoch": 1.81, + "learning_rate": 4.909740986806577e-05, + "loss": 2.6098, + "step": 364500 + }, + { + "epoch": 1.81, + "learning_rate": 4.909617128163969e-05, + "loss": 2.6129, + "step": 365000 + }, + { + "epoch": 1.81, + "learning_rate": 4.9094932695213605e-05, + "loss": 2.5773, + "step": 365500 + }, + { + "epoch": 1.81, + "learning_rate": 4.9093696585960374e-05, + "loss": 2.6096, + "step": 366000 + }, + { + "epoch": 1.82, + "learning_rate": 4.909245799953429e-05, + "loss": 2.6181, + "step": 366500 + }, + { + "epoch": 1.82, + "learning_rate": 4.909121941310821e-05, + "loss": 2.5896, + "step": 367000 + }, + { + "epoch": 1.82, + "learning_rate": 4.9089980826682125e-05, + "loss": 2.5851, + "step": 367500 + }, + { + "epoch": 1.82, + "learning_rate": 4.90887447174289e-05, + "loss": 2.6158, + "step": 368000 + }, + { + "epoch": 1.83, + "learning_rate": 4.908750613100282e-05, + "loss": 2.5965, + "step": 368500 + }, + { + "epoch": 1.83, + "learning_rate": 4.908626754457673e-05, + "loss": 2.6232, + "step": 369000 + }, + { + "epoch": 1.83, + "learning_rate": 4.9085028958150644e-05, + "loss": 2.5897, + "step": 369500 + }, + { + "epoch": 1.83, + "learning_rate": 4.908379037172456e-05, + "loss": 2.577, + "step": 370000 + }, + { + "epoch": 1.84, + "learning_rate": 4.908255178529848e-05, + "loss": 2.6109, + "step": 370500 + }, + { + "epoch": 1.84, + "learning_rate": 4.9081313198872395e-05, + "loss": 2.602, + "step": 371000 + }, + { + "epoch": 1.84, + "learning_rate": 4.9080074612446305e-05, + "loss": 2.5674, + "step": 371500 + }, + { + "epoch": 1.84, + "learning_rate": 4.907883602602022e-05, + "loss": 2.5986, + "step": 372000 + }, + { + "epoch": 1.85, + "learning_rate": 4.907759743959414e-05, + "loss": 2.6051, + "step": 372500 + }, + { + "epoch": 1.85, + "learning_rate": 4.9076358853168056e-05, + "loss": 2.5999, + "step": 373000 + }, + { + "epoch": 1.85, + "learning_rate": 4.907512026674197e-05, + "loss": 2.6057, + "step": 373500 + }, + { + "epoch": 1.85, + "learning_rate": 4.907388168031589e-05, + "loss": 2.5942, + "step": 374000 + }, + { + "epoch": 1.86, + "learning_rate": 4.907264309388981e-05, + "loss": 2.6297, + "step": 374500 + }, + { + "epoch": 1.86, + "learning_rate": 4.9071404507463724e-05, + "loss": 2.5977, + "step": 375000 + }, + { + "epoch": 1.86, + "learning_rate": 4.907016592103764e-05, + "loss": 2.5997, + "step": 375500 + }, + { + "epoch": 1.86, + "learning_rate": 4.906892733461156e-05, + "loss": 2.6261, + "step": 376000 + }, + { + "epoch": 1.87, + "learning_rate": 4.906769370253118e-05, + "loss": 2.6139, + "step": 376500 + }, + { + "epoch": 1.87, + "learning_rate": 4.9066455116105095e-05, + "loss": 2.6229, + "step": 377000 + }, + { + "epoch": 1.87, + "learning_rate": 4.906521652967901e-05, + "loss": 2.6, + "step": 377500 + }, + { + "epoch": 1.87, + "learning_rate": 4.906397794325292e-05, + "loss": 2.5966, + "step": 378000 + }, + { + "epoch": 1.88, + "learning_rate": 4.906273935682684e-05, + "loss": 2.5734, + "step": 378500 + }, + { + "epoch": 1.88, + "learning_rate": 4.9061500770400756e-05, + "loss": 2.6094, + "step": 379000 + }, + { + "epoch": 1.88, + "learning_rate": 4.906026218397467e-05, + "loss": 2.5933, + "step": 379500 + }, + { + "epoch": 1.88, + "learning_rate": 4.905902359754859e-05, + "loss": 2.6042, + "step": 380000 + }, + { + "epoch": 1.89, + "learning_rate": 4.905778501112251e-05, + "loss": 2.6221, + "step": 380500 + }, + { + "epoch": 1.89, + "learning_rate": 4.9056548901869276e-05, + "loss": 2.5897, + "step": 381000 + }, + { + "epoch": 1.89, + "learning_rate": 4.905531279261605e-05, + "loss": 2.5985, + "step": 381500 + }, + { + "epoch": 1.89, + "learning_rate": 4.9054076683362813e-05, + "loss": 2.6131, + "step": 382000 + }, + { + "epoch": 1.9, + "learning_rate": 4.905283809693673e-05, + "loss": 2.6016, + "step": 382500 + }, + { + "epoch": 1.9, + "learning_rate": 4.905159951051065e-05, + "loss": 2.6079, + "step": 383000 + }, + { + "epoch": 1.9, + "learning_rate": 4.9050360924084564e-05, + "loss": 2.6093, + "step": 383500 + }, + { + "epoch": 1.9, + "learning_rate": 4.904912233765848e-05, + "loss": 2.6203, + "step": 384000 + }, + { + "epoch": 1.9, + "learning_rate": 4.90478837512324e-05, + "loss": 2.6072, + "step": 384500 + }, + { + "epoch": 1.91, + "learning_rate": 4.904664516480631e-05, + "loss": 2.5929, + "step": 385000 + }, + { + "epoch": 1.91, + "learning_rate": 4.9045406578380225e-05, + "loss": 2.6029, + "step": 385500 + }, + { + "epoch": 1.91, + "learning_rate": 4.9044170469127e-05, + "loss": 2.6103, + "step": 386000 + }, + { + "epoch": 1.91, + "learning_rate": 4.904293188270092e-05, + "loss": 2.5676, + "step": 386500 + }, + { + "epoch": 1.92, + "learning_rate": 4.9041693296274835e-05, + "loss": 2.6016, + "step": 387000 + }, + { + "epoch": 1.92, + "learning_rate": 4.904045470984875e-05, + "loss": 2.5875, + "step": 387500 + }, + { + "epoch": 1.92, + "learning_rate": 4.903921612342266e-05, + "loss": 2.6147, + "step": 388000 + }, + { + "epoch": 1.92, + "learning_rate": 4.903797753699658e-05, + "loss": 2.573, + "step": 388500 + }, + { + "epoch": 1.93, + "learning_rate": 4.903674142774335e-05, + "loss": 2.6102, + "step": 389000 + }, + { + "epoch": 1.93, + "learning_rate": 4.9035502841317264e-05, + "loss": 2.6018, + "step": 389500 + }, + { + "epoch": 1.93, + "learning_rate": 4.903426425489118e-05, + "loss": 2.5944, + "step": 390000 + }, + { + "epoch": 1.93, + "learning_rate": 4.90330256684651e-05, + "loss": 2.6172, + "step": 390500 + }, + { + "epoch": 1.94, + "learning_rate": 4.903178708203901e-05, + "loss": 2.5899, + "step": 391000 + }, + { + "epoch": 1.94, + "learning_rate": 4.9030548495612925e-05, + "loss": 2.5876, + "step": 391500 + }, + { + "epoch": 1.94, + "learning_rate": 4.902930990918684e-05, + "loss": 2.5859, + "step": 392000 + }, + { + "epoch": 1.94, + "learning_rate": 4.902807132276076e-05, + "loss": 2.586, + "step": 392500 + }, + { + "epoch": 1.95, + "learning_rate": 4.9026832736334676e-05, + "loss": 2.6084, + "step": 393000 + }, + { + "epoch": 1.95, + "learning_rate": 4.902559662708145e-05, + "loss": 2.6038, + "step": 393500 + }, + { + "epoch": 1.95, + "learning_rate": 4.9024360517828214e-05, + "loss": 2.5836, + "step": 394000 + }, + { + "epoch": 1.95, + "learning_rate": 4.902312193140213e-05, + "loss": 2.5929, + "step": 394500 + }, + { + "epoch": 1.96, + "learning_rate": 4.90218858221489e-05, + "loss": 2.5957, + "step": 395000 + }, + { + "epoch": 1.96, + "learning_rate": 4.9020647235722816e-05, + "loss": 2.582, + "step": 395500 + }, + { + "epoch": 1.96, + "learning_rate": 4.901940864929673e-05, + "loss": 2.6287, + "step": 396000 + }, + { + "epoch": 1.96, + "learning_rate": 4.901817006287065e-05, + "loss": 2.6059, + "step": 396500 + }, + { + "epoch": 1.97, + "learning_rate": 4.901693147644457e-05, + "loss": 2.5855, + "step": 397000 + }, + { + "epoch": 1.97, + "learning_rate": 4.9015692890018484e-05, + "loss": 2.5951, + "step": 397500 + }, + { + "epoch": 1.97, + "learning_rate": 4.90144543035924e-05, + "loss": 2.5817, + "step": 398000 + }, + { + "epoch": 1.97, + "learning_rate": 4.901321571716632e-05, + "loss": 2.6157, + "step": 398500 + }, + { + "epoch": 1.98, + "learning_rate": 4.9011977130740235e-05, + "loss": 2.596, + "step": 399000 + }, + { + "epoch": 1.98, + "learning_rate": 4.901073854431415e-05, + "loss": 2.5786, + "step": 399500 + }, + { + "epoch": 1.98, + "learning_rate": 4.900949995788807e-05, + "loss": 2.5946, + "step": 400000 + }, + { + "epoch": 1.98, + "learning_rate": 4.900826384863483e-05, + "loss": 2.6155, + "step": 400500 + }, + { + "epoch": 1.99, + "learning_rate": 4.900702526220875e-05, + "loss": 2.5889, + "step": 401000 + }, + { + "epoch": 1.99, + "learning_rate": 4.9005786675782665e-05, + "loss": 2.5796, + "step": 401500 + }, + { + "epoch": 1.99, + "learning_rate": 4.900454808935658e-05, + "loss": 2.5993, + "step": 402000 + }, + { + "epoch": 1.99, + "learning_rate": 4.900331198010335e-05, + "loss": 2.5894, + "step": 402500 + }, + { + "epoch": 2.0, + "learning_rate": 4.900207339367727e-05, + "loss": 2.6215, + "step": 403000 + }, + { + "epoch": 2.0, + "learning_rate": 4.9000834807251184e-05, + "loss": 2.5994, + "step": 403500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.6286475772528078, + "eval_accuracy_mlm": 0.5813388926339267, + "eval_accuracy_nsp": 0.8516075133648939, + "eval_loss": 2.524798631668091, + "eval_runtime": 146.063, + "eval_samples_per_second": 1745.542, + "eval_steps_per_second": 72.736, + "step": 403686 + }, + { + "epoch": 2.0, + "learning_rate": 4.89995962208251e-05, + "loss": 2.5887, + "step": 404000 + }, + { + "epoch": 2.0, + "learning_rate": 4.899835763439902e-05, + "loss": 2.5834, + "step": 404500 + }, + { + "epoch": 2.01, + "learning_rate": 4.8997119047972935e-05, + "loss": 2.5753, + "step": 405000 + }, + { + "epoch": 2.01, + "learning_rate": 4.8995882938719704e-05, + "loss": 2.5542, + "step": 405500 + }, + { + "epoch": 2.01, + "learning_rate": 4.8994644352293614e-05, + "loss": 2.5592, + "step": 406000 + }, + { + "epoch": 2.01, + "learning_rate": 4.899340576586753e-05, + "loss": 2.5818, + "step": 406500 + }, + { + "epoch": 2.02, + "learning_rate": 4.899216717944145e-05, + "loss": 2.5571, + "step": 407000 + }, + { + "epoch": 2.02, + "learning_rate": 4.8990928593015365e-05, + "loss": 2.5746, + "step": 407500 + }, + { + "epoch": 2.02, + "learning_rate": 4.8989692483762133e-05, + "loss": 2.5663, + "step": 408000 + }, + { + "epoch": 2.02, + "learning_rate": 4.898845389733605e-05, + "loss": 2.5716, + "step": 408500 + }, + { + "epoch": 2.03, + "learning_rate": 4.898721778808282e-05, + "loss": 2.5595, + "step": 409000 + }, + { + "epoch": 2.03, + "learning_rate": 4.8985979201656736e-05, + "loss": 2.5811, + "step": 409500 + }, + { + "epoch": 2.03, + "learning_rate": 4.898474061523065e-05, + "loss": 2.5488, + "step": 410000 + }, + { + "epoch": 2.03, + "learning_rate": 4.898350202880457e-05, + "loss": 2.5651, + "step": 410500 + }, + { + "epoch": 2.04, + "learning_rate": 4.898226344237849e-05, + "loss": 2.5839, + "step": 411000 + }, + { + "epoch": 2.04, + "learning_rate": 4.8981024855952404e-05, + "loss": 2.577, + "step": 411500 + }, + { + "epoch": 2.04, + "learning_rate": 4.897978626952632e-05, + "loss": 2.5854, + "step": 412000 + }, + { + "epoch": 2.04, + "learning_rate": 4.897855016027308e-05, + "loss": 2.5607, + "step": 412500 + }, + { + "epoch": 2.05, + "learning_rate": 4.8977311573847e-05, + "loss": 2.5672, + "step": 413000 + }, + { + "epoch": 2.05, + "learning_rate": 4.897607298742092e-05, + "loss": 2.5866, + "step": 413500 + }, + { + "epoch": 2.05, + "learning_rate": 4.8974834400994834e-05, + "loss": 2.6007, + "step": 414000 + }, + { + "epoch": 2.05, + "learning_rate": 4.897359581456875e-05, + "loss": 2.5648, + "step": 414500 + }, + { + "epoch": 2.06, + "learning_rate": 4.897235722814267e-05, + "loss": 2.5832, + "step": 415000 + }, + { + "epoch": 2.06, + "learning_rate": 4.8971118641716584e-05, + "loss": 2.5968, + "step": 415500 + }, + { + "epoch": 2.06, + "learning_rate": 4.896988253246335e-05, + "loss": 2.5548, + "step": 416000 + }, + { + "epoch": 2.06, + "learning_rate": 4.896864394603727e-05, + "loss": 2.5688, + "step": 416500 + }, + { + "epoch": 2.07, + "learning_rate": 4.896740535961119e-05, + "loss": 2.5836, + "step": 417000 + }, + { + "epoch": 2.07, + "learning_rate": 4.8966166773185104e-05, + "loss": 2.571, + "step": 417500 + }, + { + "epoch": 2.07, + "learning_rate": 4.896492818675902e-05, + "loss": 2.5721, + "step": 418000 + }, + { + "epoch": 2.07, + "learning_rate": 4.896368960033294e-05, + "loss": 2.561, + "step": 418500 + }, + { + "epoch": 2.08, + "learning_rate": 4.8962451013906855e-05, + "loss": 2.5737, + "step": 419000 + }, + { + "epoch": 2.08, + "learning_rate": 4.8961212427480765e-05, + "loss": 2.559, + "step": 419500 + }, + { + "epoch": 2.08, + "learning_rate": 4.895997384105468e-05, + "loss": 2.5928, + "step": 420000 + }, + { + "epoch": 2.08, + "learning_rate": 4.89587352546286e-05, + "loss": 2.5754, + "step": 420500 + }, + { + "epoch": 2.09, + "learning_rate": 4.895749914537537e-05, + "loss": 2.5631, + "step": 421000 + }, + { + "epoch": 2.09, + "learning_rate": 4.8956260558949285e-05, + "loss": 2.5715, + "step": 421500 + }, + { + "epoch": 2.09, + "learning_rate": 4.89550219725232e-05, + "loss": 2.5942, + "step": 422000 + }, + { + "epoch": 2.09, + "learning_rate": 4.895378338609712e-05, + "loss": 2.5579, + "step": 422500 + }, + { + "epoch": 2.1, + "learning_rate": 4.8952544799671035e-05, + "loss": 2.5612, + "step": 423000 + }, + { + "epoch": 2.1, + "learning_rate": 4.895130621324495e-05, + "loss": 2.5945, + "step": 423500 + }, + { + "epoch": 2.1, + "learning_rate": 4.895007010399172e-05, + "loss": 2.547, + "step": 424000 + }, + { + "epoch": 2.1, + "learning_rate": 4.894883151756564e-05, + "loss": 2.5908, + "step": 424500 + }, + { + "epoch": 2.11, + "learning_rate": 4.8947592931139555e-05, + "loss": 2.5855, + "step": 425000 + }, + { + "epoch": 2.11, + "learning_rate": 4.894635434471347e-05, + "loss": 2.5796, + "step": 425500 + }, + { + "epoch": 2.11, + "learning_rate": 4.894511575828739e-05, + "loss": 2.5654, + "step": 426000 + }, + { + "epoch": 2.11, + "learning_rate": 4.89438771718613e-05, + "loss": 2.5478, + "step": 426500 + }, + { + "epoch": 2.12, + "learning_rate": 4.8942638585435216e-05, + "loss": 2.5889, + "step": 427000 + }, + { + "epoch": 2.12, + "learning_rate": 4.894139999900913e-05, + "loss": 2.5634, + "step": 427500 + }, + { + "epoch": 2.12, + "learning_rate": 4.894016141258305e-05, + "loss": 2.5461, + "step": 428000 + }, + { + "epoch": 2.12, + "learning_rate": 4.893892530332982e-05, + "loss": 2.5366, + "step": 428500 + }, + { + "epoch": 2.13, + "learning_rate": 4.893768919407659e-05, + "loss": 2.5651, + "step": 429000 + }, + { + "epoch": 2.13, + "learning_rate": 4.8936450607650504e-05, + "loss": 2.5911, + "step": 429500 + }, + { + "epoch": 2.13, + "learning_rate": 4.893521202122442e-05, + "loss": 2.5801, + "step": 430000 + }, + { + "epoch": 2.13, + "learning_rate": 4.893397591197118e-05, + "loss": 2.5771, + "step": 430500 + }, + { + "epoch": 2.14, + "learning_rate": 4.89327373255451e-05, + "loss": 2.5864, + "step": 431000 + }, + { + "epoch": 2.14, + "learning_rate": 4.8931501216291876e-05, + "loss": 2.5512, + "step": 431500 + }, + { + "epoch": 2.14, + "learning_rate": 4.893026262986579e-05, + "loss": 2.5651, + "step": 432000 + }, + { + "epoch": 2.14, + "learning_rate": 4.89290240434397e-05, + "loss": 2.5588, + "step": 432500 + }, + { + "epoch": 2.15, + "learning_rate": 4.892778545701362e-05, + "loss": 2.5744, + "step": 433000 + }, + { + "epoch": 2.15, + "learning_rate": 4.892654687058754e-05, + "loss": 2.5913, + "step": 433500 + }, + { + "epoch": 2.15, + "learning_rate": 4.8925308284161454e-05, + "loss": 2.5761, + "step": 434000 + }, + { + "epoch": 2.15, + "learning_rate": 4.892406969773537e-05, + "loss": 2.5819, + "step": 434500 + }, + { + "epoch": 2.16, + "learning_rate": 4.892283111130929e-05, + "loss": 2.5598, + "step": 435000 + }, + { + "epoch": 2.16, + "learning_rate": 4.8921592524883204e-05, + "loss": 2.574, + "step": 435500 + }, + { + "epoch": 2.16, + "learning_rate": 4.892035393845712e-05, + "loss": 2.5842, + "step": 436000 + }, + { + "epoch": 2.16, + "learning_rate": 4.891911535203104e-05, + "loss": 2.5627, + "step": 436500 + }, + { + "epoch": 2.17, + "learning_rate": 4.8917876765604955e-05, + "loss": 2.5587, + "step": 437000 + }, + { + "epoch": 2.17, + "learning_rate": 4.891663817917887e-05, + "loss": 2.5872, + "step": 437500 + }, + { + "epoch": 2.17, + "learning_rate": 4.891539959275279e-05, + "loss": 2.5496, + "step": 438000 + }, + { + "epoch": 2.17, + "learning_rate": 4.891416348349955e-05, + "loss": 2.5884, + "step": 438500 + }, + { + "epoch": 2.17, + "learning_rate": 4.891292489707347e-05, + "loss": 2.5475, + "step": 439000 + }, + { + "epoch": 2.18, + "learning_rate": 4.8911686310647385e-05, + "loss": 2.5432, + "step": 439500 + }, + { + "epoch": 2.18, + "learning_rate": 4.8910450201394154e-05, + "loss": 2.5509, + "step": 440000 + }, + { + "epoch": 2.18, + "learning_rate": 4.890921161496807e-05, + "loss": 2.5689, + "step": 440500 + }, + { + "epoch": 2.18, + "learning_rate": 4.890797302854199e-05, + "loss": 2.5687, + "step": 441000 + }, + { + "epoch": 2.19, + "learning_rate": 4.8906734442115904e-05, + "loss": 2.5651, + "step": 441500 + }, + { + "epoch": 2.19, + "learning_rate": 4.890549585568982e-05, + "loss": 2.5761, + "step": 442000 + }, + { + "epoch": 2.19, + "learning_rate": 4.8904267177955146e-05, + "loss": 2.5642, + "step": 442500 + }, + { + "epoch": 2.19, + "learning_rate": 4.890302859152906e-05, + "loss": 2.5557, + "step": 443000 + }, + { + "epoch": 2.2, + "learning_rate": 4.890179000510298e-05, + "loss": 2.575, + "step": 443500 + }, + { + "epoch": 2.2, + "learning_rate": 4.890055141867689e-05, + "loss": 2.5907, + "step": 444000 + }, + { + "epoch": 2.2, + "learning_rate": 4.889931283225081e-05, + "loss": 2.5731, + "step": 444500 + }, + { + "epoch": 2.2, + "learning_rate": 4.8898074245824724e-05, + "loss": 2.5548, + "step": 445000 + }, + { + "epoch": 2.21, + "learning_rate": 4.889683565939864e-05, + "loss": 2.5467, + "step": 445500 + }, + { + "epoch": 2.21, + "learning_rate": 4.889559707297256e-05, + "loss": 2.592, + "step": 446000 + }, + { + "epoch": 2.21, + "learning_rate": 4.8894358486546475e-05, + "loss": 2.5996, + "step": 446500 + }, + { + "epoch": 2.21, + "learning_rate": 4.889311990012039e-05, + "loss": 2.5768, + "step": 447000 + }, + { + "epoch": 2.22, + "learning_rate": 4.889188131369431e-05, + "loss": 2.5667, + "step": 447500 + }, + { + "epoch": 2.22, + "learning_rate": 4.8890642727268225e-05, + "loss": 2.5699, + "step": 448000 + }, + { + "epoch": 2.22, + "learning_rate": 4.888940414084214e-05, + "loss": 2.5794, + "step": 448500 + }, + { + "epoch": 2.22, + "learning_rate": 4.888816555441606e-05, + "loss": 2.5778, + "step": 449000 + }, + { + "epoch": 2.23, + "learning_rate": 4.8886926967989976e-05, + "loss": 2.5842, + "step": 449500 + }, + { + "epoch": 2.23, + "learning_rate": 4.888568838156389e-05, + "loss": 2.5859, + "step": 450000 + }, + { + "epoch": 2.23, + "learning_rate": 4.888444979513781e-05, + "loss": 2.5658, + "step": 450500 + }, + { + "epoch": 2.23, + "learning_rate": 4.888321120871172e-05, + "loss": 2.5744, + "step": 451000 + }, + { + "epoch": 2.24, + "learning_rate": 4.888197262228564e-05, + "loss": 2.5542, + "step": 451500 + }, + { + "epoch": 2.24, + "learning_rate": 4.8880734035859554e-05, + "loss": 2.5659, + "step": 452000 + }, + { + "epoch": 2.24, + "learning_rate": 4.887949544943347e-05, + "loss": 2.5667, + "step": 452500 + }, + { + "epoch": 2.24, + "learning_rate": 4.887825686300739e-05, + "loss": 2.5916, + "step": 453000 + }, + { + "epoch": 2.25, + "learning_rate": 4.887702075375416e-05, + "loss": 2.5724, + "step": 453500 + }, + { + "epoch": 2.25, + "learning_rate": 4.8875782167328074e-05, + "loss": 2.5604, + "step": 454000 + }, + { + "epoch": 2.25, + "learning_rate": 4.887454358090199e-05, + "loss": 2.556, + "step": 454500 + }, + { + "epoch": 2.25, + "learning_rate": 4.887330499447591e-05, + "loss": 2.569, + "step": 455000 + }, + { + "epoch": 2.26, + "learning_rate": 4.8872066408049824e-05, + "loss": 2.5667, + "step": 455500 + }, + { + "epoch": 2.26, + "learning_rate": 4.887082782162374e-05, + "loss": 2.5849, + "step": 456000 + }, + { + "epoch": 2.26, + "learning_rate": 4.886958923519765e-05, + "loss": 2.5702, + "step": 456500 + }, + { + "epoch": 2.26, + "learning_rate": 4.886835064877157e-05, + "loss": 2.5687, + "step": 457000 + }, + { + "epoch": 2.27, + "learning_rate": 4.886711453951834e-05, + "loss": 2.5787, + "step": 457500 + }, + { + "epoch": 2.27, + "learning_rate": 4.8865875953092254e-05, + "loss": 2.5678, + "step": 458000 + }, + { + "epoch": 2.27, + "learning_rate": 4.886463736666617e-05, + "loss": 2.5795, + "step": 458500 + }, + { + "epoch": 2.27, + "learning_rate": 4.8863401257412947e-05, + "loss": 2.5772, + "step": 459000 + }, + { + "epoch": 2.28, + "learning_rate": 4.8862162670986864e-05, + "loss": 2.5546, + "step": 459500 + }, + { + "epoch": 2.28, + "learning_rate": 4.886092408456078e-05, + "loss": 2.5824, + "step": 460000 + }, + { + "epoch": 2.28, + "learning_rate": 4.885968549813469e-05, + "loss": 2.5854, + "step": 460500 + }, + { + "epoch": 2.28, + "learning_rate": 4.885844691170861e-05, + "loss": 2.5993, + "step": 461000 + }, + { + "epoch": 2.29, + "learning_rate": 4.8857208325282524e-05, + "loss": 2.5536, + "step": 461500 + }, + { + "epoch": 2.29, + "learning_rate": 4.885596973885644e-05, + "loss": 2.5607, + "step": 462000 + }, + { + "epoch": 2.29, + "learning_rate": 4.885473362960321e-05, + "loss": 2.5777, + "step": 462500 + }, + { + "epoch": 2.29, + "learning_rate": 4.885349504317713e-05, + "loss": 2.5489, + "step": 463000 + }, + { + "epoch": 2.3, + "learning_rate": 4.885225645675104e-05, + "loss": 2.5935, + "step": 463500 + }, + { + "epoch": 2.3, + "learning_rate": 4.8851017870324954e-05, + "loss": 2.5599, + "step": 464000 + }, + { + "epoch": 2.3, + "learning_rate": 4.884978176107173e-05, + "loss": 2.5644, + "step": 464500 + }, + { + "epoch": 2.3, + "learning_rate": 4.884854317464565e-05, + "loss": 2.5661, + "step": 465000 + }, + { + "epoch": 2.31, + "learning_rate": 4.8847304588219564e-05, + "loss": 2.5732, + "step": 465500 + }, + { + "epoch": 2.31, + "learning_rate": 4.884606600179348e-05, + "loss": 2.5702, + "step": 466000 + }, + { + "epoch": 2.31, + "learning_rate": 4.88448274153674e-05, + "loss": 2.5586, + "step": 466500 + }, + { + "epoch": 2.31, + "learning_rate": 4.884359130611416e-05, + "loss": 2.574, + "step": 467000 + }, + { + "epoch": 2.32, + "learning_rate": 4.8842352719688076e-05, + "loss": 2.5618, + "step": 467500 + }, + { + "epoch": 2.32, + "learning_rate": 4.884111413326199e-05, + "loss": 2.5526, + "step": 468000 + }, + { + "epoch": 2.32, + "learning_rate": 4.883987554683591e-05, + "loss": 2.5749, + "step": 468500 + }, + { + "epoch": 2.32, + "learning_rate": 4.883863696040983e-05, + "loss": 2.5694, + "step": 469000 + }, + { + "epoch": 2.33, + "learning_rate": 4.8837398373983744e-05, + "loss": 2.5888, + "step": 469500 + }, + { + "epoch": 2.33, + "learning_rate": 4.8836159787557654e-05, + "loss": 2.5595, + "step": 470000 + }, + { + "epoch": 2.33, + "learning_rate": 4.883492120113157e-05, + "loss": 2.5967, + "step": 470500 + }, + { + "epoch": 2.33, + "learning_rate": 4.883368261470549e-05, + "loss": 2.5484, + "step": 471000 + }, + { + "epoch": 2.34, + "learning_rate": 4.8832444028279405e-05, + "loss": 2.5715, + "step": 471500 + }, + { + "epoch": 2.34, + "learning_rate": 4.883120544185332e-05, + "loss": 2.5756, + "step": 472000 + }, + { + "epoch": 2.34, + "learning_rate": 4.882996685542724e-05, + "loss": 2.5513, + "step": 472500 + }, + { + "epoch": 2.34, + "learning_rate": 4.8828728269001156e-05, + "loss": 2.5952, + "step": 473000 + }, + { + "epoch": 2.35, + "learning_rate": 4.8827492159747925e-05, + "loss": 2.5728, + "step": 473500 + }, + { + "epoch": 2.35, + "learning_rate": 4.882625357332184e-05, + "loss": 2.577, + "step": 474000 + }, + { + "epoch": 2.35, + "learning_rate": 4.882501498689576e-05, + "loss": 2.6014, + "step": 474500 + }, + { + "epoch": 2.35, + "learning_rate": 4.882377887764253e-05, + "loss": 2.5732, + "step": 475000 + }, + { + "epoch": 2.36, + "learning_rate": 4.8822540291216444e-05, + "loss": 2.559, + "step": 475500 + }, + { + "epoch": 2.36, + "learning_rate": 4.8821301704790354e-05, + "loss": 2.5574, + "step": 476000 + }, + { + "epoch": 2.36, + "learning_rate": 4.882006311836427e-05, + "loss": 2.5518, + "step": 476500 + }, + { + "epoch": 2.36, + "learning_rate": 4.881882453193819e-05, + "loss": 2.5777, + "step": 477000 + }, + { + "epoch": 2.37, + "learning_rate": 4.8817585945512105e-05, + "loss": 2.5795, + "step": 477500 + }, + { + "epoch": 2.37, + "learning_rate": 4.881634735908602e-05, + "loss": 2.5723, + "step": 478000 + }, + { + "epoch": 2.37, + "learning_rate": 4.881510877265994e-05, + "loss": 2.5972, + "step": 478500 + }, + { + "epoch": 2.37, + "learning_rate": 4.8813872663406715e-05, + "loss": 2.5405, + "step": 479000 + }, + { + "epoch": 2.38, + "learning_rate": 4.8812634076980625e-05, + "loss": 2.6003, + "step": 479500 + }, + { + "epoch": 2.38, + "learning_rate": 4.881139549055454e-05, + "loss": 2.5426, + "step": 480000 + }, + { + "epoch": 2.38, + "learning_rate": 4.881015690412846e-05, + "loss": 2.587, + "step": 480500 + }, + { + "epoch": 2.38, + "learning_rate": 4.8808918317702376e-05, + "loss": 2.5803, + "step": 481000 + }, + { + "epoch": 2.39, + "learning_rate": 4.880767973127629e-05, + "loss": 2.5914, + "step": 481500 + }, + { + "epoch": 2.39, + "learning_rate": 4.880644114485021e-05, + "loss": 2.5622, + "step": 482000 + }, + { + "epoch": 2.39, + "learning_rate": 4.8805202558424126e-05, + "loss": 2.5813, + "step": 482500 + }, + { + "epoch": 2.39, + "learning_rate": 4.880396397199804e-05, + "loss": 2.5929, + "step": 483000 + }, + { + "epoch": 2.4, + "learning_rate": 4.8802727862744805e-05, + "loss": 2.5919, + "step": 483500 + }, + { + "epoch": 2.4, + "learning_rate": 4.880148927631872e-05, + "loss": 2.5565, + "step": 484000 + }, + { + "epoch": 2.4, + "learning_rate": 4.88002531670655e-05, + "loss": 2.5604, + "step": 484500 + }, + { + "epoch": 2.4, + "learning_rate": 4.8799014580639415e-05, + "loss": 2.5575, + "step": 485000 + }, + { + "epoch": 2.41, + "learning_rate": 4.8797775994213325e-05, + "loss": 2.5634, + "step": 485500 + }, + { + "epoch": 2.41, + "learning_rate": 4.879653740778724e-05, + "loss": 2.5762, + "step": 486000 + }, + { + "epoch": 2.41, + "learning_rate": 4.879529882136116e-05, + "loss": 2.5648, + "step": 486500 + }, + { + "epoch": 2.41, + "learning_rate": 4.8794060234935076e-05, + "loss": 2.5592, + "step": 487000 + }, + { + "epoch": 2.42, + "learning_rate": 4.879282164850899e-05, + "loss": 2.563, + "step": 487500 + }, + { + "epoch": 2.42, + "learning_rate": 4.879158553925576e-05, + "loss": 2.6136, + "step": 488000 + }, + { + "epoch": 2.42, + "learning_rate": 4.879034695282967e-05, + "loss": 2.5676, + "step": 488500 + }, + { + "epoch": 2.42, + "learning_rate": 4.878910836640359e-05, + "loss": 2.5787, + "step": 489000 + }, + { + "epoch": 2.43, + "learning_rate": 4.8787872257150364e-05, + "loss": 2.5805, + "step": 489500 + }, + { + "epoch": 2.43, + "learning_rate": 4.878663367072428e-05, + "loss": 2.5883, + "step": 490000 + }, + { + "epoch": 2.43, + "learning_rate": 4.87853950842982e-05, + "loss": 2.5649, + "step": 490500 + }, + { + "epoch": 2.43, + "learning_rate": 4.878415897504497e-05, + "loss": 2.5689, + "step": 491000 + }, + { + "epoch": 2.44, + "learning_rate": 4.878292038861888e-05, + "loss": 2.5621, + "step": 491500 + }, + { + "epoch": 2.44, + "learning_rate": 4.8781681802192794e-05, + "loss": 2.586, + "step": 492000 + }, + { + "epoch": 2.44, + "learning_rate": 4.878044321576671e-05, + "loss": 2.5845, + "step": 492500 + }, + { + "epoch": 2.44, + "learning_rate": 4.877920462934063e-05, + "loss": 2.5749, + "step": 493000 + }, + { + "epoch": 2.44, + "learning_rate": 4.8777966042914545e-05, + "loss": 2.5925, + "step": 493500 + }, + { + "epoch": 2.45, + "learning_rate": 4.877672745648846e-05, + "loss": 2.5829, + "step": 494000 + }, + { + "epoch": 2.45, + "learning_rate": 4.877549134723523e-05, + "loss": 2.5584, + "step": 494500 + }, + { + "epoch": 2.45, + "learning_rate": 4.877425276080915e-05, + "loss": 2.562, + "step": 495000 + }, + { + "epoch": 2.45, + "learning_rate": 4.8773014174383064e-05, + "loss": 2.5678, + "step": 495500 + }, + { + "epoch": 2.46, + "learning_rate": 4.877177558795698e-05, + "loss": 2.5516, + "step": 496000 + }, + { + "epoch": 2.46, + "learning_rate": 4.87705370015309e-05, + "loss": 2.567, + "step": 496500 + }, + { + "epoch": 2.46, + "learning_rate": 4.8769298415104815e-05, + "loss": 2.5885, + "step": 497000 + }, + { + "epoch": 2.46, + "learning_rate": 4.876805982867873e-05, + "loss": 2.5813, + "step": 497500 + }, + { + "epoch": 2.47, + "learning_rate": 4.876682124225264e-05, + "loss": 2.5801, + "step": 498000 + }, + { + "epoch": 2.47, + "learning_rate": 4.876558265582656e-05, + "loss": 2.567, + "step": 498500 + }, + { + "epoch": 2.47, + "learning_rate": 4.8764344069400476e-05, + "loss": 2.5659, + "step": 499000 + }, + { + "epoch": 2.47, + "learning_rate": 4.876310548297439e-05, + "loss": 2.572, + "step": 499500 + }, + { + "epoch": 2.48, + "learning_rate": 4.876186689654831e-05, + "loss": 2.5771, + "step": 500000 + }, + { + "epoch": 2.48, + "learning_rate": 4.876062831012223e-05, + "loss": 2.5942, + "step": 500500 + }, + { + "epoch": 2.48, + "learning_rate": 4.8759389723696144e-05, + "loss": 2.58, + "step": 501000 + }, + { + "epoch": 2.48, + "learning_rate": 4.875815113727006e-05, + "loss": 2.5474, + "step": 501500 + }, + { + "epoch": 2.49, + "learning_rate": 4.875691502801682e-05, + "loss": 2.5899, + "step": 502000 + }, + { + "epoch": 2.49, + "learning_rate": 4.87556789187636e-05, + "loss": 2.581, + "step": 502500 + }, + { + "epoch": 2.49, + "learning_rate": 4.8754440332337515e-05, + "loss": 2.5766, + "step": 503000 + }, + { + "epoch": 2.49, + "learning_rate": 4.875320174591143e-05, + "loss": 2.5461, + "step": 503500 + }, + { + "epoch": 2.5, + "learning_rate": 4.875196315948535e-05, + "loss": 2.5437, + "step": 504000 + }, + { + "epoch": 2.5, + "learning_rate": 4.875072457305926e-05, + "loss": 2.566, + "step": 504500 + }, + { + "epoch": 2.5, + "learning_rate": 4.8749485986633176e-05, + "loss": 2.5592, + "step": 505000 + }, + { + "epoch": 2.5, + "learning_rate": 4.8748249877379945e-05, + "loss": 2.5718, + "step": 505500 + }, + { + "epoch": 2.51, + "learning_rate": 4.874701129095386e-05, + "loss": 2.554, + "step": 506000 + }, + { + "epoch": 2.51, + "learning_rate": 4.874577270452778e-05, + "loss": 2.5343, + "step": 506500 + }, + { + "epoch": 2.51, + "learning_rate": 4.8744534118101696e-05, + "loss": 2.5618, + "step": 507000 + }, + { + "epoch": 2.51, + "learning_rate": 4.8743295531675606e-05, + "loss": 2.5622, + "step": 507500 + }, + { + "epoch": 2.52, + "learning_rate": 4.874205694524952e-05, + "loss": 2.5814, + "step": 508000 + }, + { + "epoch": 2.52, + "learning_rate": 4.874081835882344e-05, + "loss": 2.5951, + "step": 508500 + }, + { + "epoch": 2.52, + "learning_rate": 4.8739582249570215e-05, + "loss": 2.5568, + "step": 509000 + }, + { + "epoch": 2.52, + "learning_rate": 4.873834366314413e-05, + "loss": 2.5576, + "step": 509500 + }, + { + "epoch": 2.53, + "learning_rate": 4.873710507671805e-05, + "loss": 2.5658, + "step": 510000 + }, + { + "epoch": 2.53, + "learning_rate": 4.873586649029196e-05, + "loss": 2.5628, + "step": 510500 + }, + { + "epoch": 2.53, + "learning_rate": 4.8734630381038735e-05, + "loss": 2.5771, + "step": 511000 + }, + { + "epoch": 2.53, + "learning_rate": 4.8733391794612645e-05, + "loss": 2.5623, + "step": 511500 + }, + { + "epoch": 2.54, + "learning_rate": 4.873215320818656e-05, + "loss": 2.552, + "step": 512000 + }, + { + "epoch": 2.54, + "learning_rate": 4.873091462176048e-05, + "loss": 2.5771, + "step": 512500 + }, + { + "epoch": 2.54, + "learning_rate": 4.8729676035334396e-05, + "loss": 2.5683, + "step": 513000 + }, + { + "epoch": 2.54, + "learning_rate": 4.872843744890831e-05, + "loss": 2.5772, + "step": 513500 + }, + { + "epoch": 2.55, + "learning_rate": 4.872719886248222e-05, + "loss": 2.5589, + "step": 514000 + }, + { + "epoch": 2.55, + "learning_rate": 4.872596027605614e-05, + "loss": 2.5793, + "step": 514500 + }, + { + "epoch": 2.55, + "learning_rate": 4.872472168963006e-05, + "loss": 2.5705, + "step": 515000 + }, + { + "epoch": 2.55, + "learning_rate": 4.8723483103203974e-05, + "loss": 2.5768, + "step": 515500 + }, + { + "epoch": 2.56, + "learning_rate": 4.872224451677789e-05, + "loss": 2.5792, + "step": 516000 + }, + { + "epoch": 2.56, + "learning_rate": 4.872100593035181e-05, + "loss": 2.538, + "step": 516500 + }, + { + "epoch": 2.56, + "learning_rate": 4.8719767343925724e-05, + "loss": 2.5786, + "step": 517000 + }, + { + "epoch": 2.56, + "learning_rate": 4.871852875749964e-05, + "loss": 2.5611, + "step": 517500 + }, + { + "epoch": 2.57, + "learning_rate": 4.871729017107356e-05, + "loss": 2.5601, + "step": 518000 + }, + { + "epoch": 2.57, + "learning_rate": 4.8716051584647475e-05, + "loss": 2.5772, + "step": 518500 + }, + { + "epoch": 2.57, + "learning_rate": 4.8714817952567096e-05, + "loss": 2.5865, + "step": 519000 + }, + { + "epoch": 2.57, + "learning_rate": 4.871357936614101e-05, + "loss": 2.5592, + "step": 519500 + }, + { + "epoch": 2.58, + "learning_rate": 4.871234077971493e-05, + "loss": 2.5937, + "step": 520000 + }, + { + "epoch": 2.58, + "learning_rate": 4.871110219328885e-05, + "loss": 2.5601, + "step": 520500 + }, + { + "epoch": 2.58, + "learning_rate": 4.870986360686276e-05, + "loss": 2.5464, + "step": 521000 + }, + { + "epoch": 2.58, + "learning_rate": 4.8708625020436674e-05, + "loss": 2.5565, + "step": 521500 + }, + { + "epoch": 2.59, + "learning_rate": 4.870738643401059e-05, + "loss": 2.5702, + "step": 522000 + }, + { + "epoch": 2.59, + "learning_rate": 4.870614784758451e-05, + "loss": 2.5839, + "step": 522500 + }, + { + "epoch": 2.59, + "learning_rate": 4.8704911738331276e-05, + "loss": 2.5712, + "step": 523000 + }, + { + "epoch": 2.59, + "learning_rate": 4.8703673151905193e-05, + "loss": 2.5777, + "step": 523500 + }, + { + "epoch": 2.6, + "learning_rate": 4.870243704265197e-05, + "loss": 2.56, + "step": 524000 + }, + { + "epoch": 2.6, + "learning_rate": 4.8701198456225886e-05, + "loss": 2.5688, + "step": 524500 + }, + { + "epoch": 2.6, + "learning_rate": 4.8699959869799796e-05, + "loss": 2.5656, + "step": 525000 + }, + { + "epoch": 2.6, + "learning_rate": 4.8698723760546565e-05, + "loss": 2.5649, + "step": 525500 + }, + { + "epoch": 2.61, + "learning_rate": 4.869748517412048e-05, + "loss": 2.5436, + "step": 526000 + }, + { + "epoch": 2.61, + "learning_rate": 4.86962465876944e-05, + "loss": 2.5476, + "step": 526500 + }, + { + "epoch": 2.61, + "learning_rate": 4.8695008001268316e-05, + "loss": 2.5696, + "step": 527000 + }, + { + "epoch": 2.61, + "learning_rate": 4.869376941484223e-05, + "loss": 2.5495, + "step": 527500 + }, + { + "epoch": 2.62, + "learning_rate": 4.869253082841615e-05, + "loss": 2.5808, + "step": 528000 + }, + { + "epoch": 2.62, + "learning_rate": 4.8691292241990066e-05, + "loss": 2.5641, + "step": 528500 + }, + { + "epoch": 2.62, + "learning_rate": 4.869005365556398e-05, + "loss": 2.5714, + "step": 529000 + }, + { + "epoch": 2.62, + "learning_rate": 4.8688815069137894e-05, + "loss": 2.5913, + "step": 529500 + }, + { + "epoch": 2.63, + "learning_rate": 4.868757648271181e-05, + "loss": 2.5783, + "step": 530000 + }, + { + "epoch": 2.63, + "learning_rate": 4.868633789628573e-05, + "loss": 2.57, + "step": 530500 + }, + { + "epoch": 2.63, + "learning_rate": 4.86851017870325e-05, + "loss": 2.5547, + "step": 531000 + }, + { + "epoch": 2.63, + "learning_rate": 4.868386320060642e-05, + "loss": 2.5468, + "step": 531500 + }, + { + "epoch": 2.64, + "learning_rate": 4.868262461418033e-05, + "loss": 2.5864, + "step": 532000 + }, + { + "epoch": 2.64, + "learning_rate": 4.868138602775425e-05, + "loss": 2.5843, + "step": 532500 + }, + { + "epoch": 2.64, + "learning_rate": 4.8680147441328164e-05, + "loss": 2.5719, + "step": 533000 + }, + { + "epoch": 2.64, + "learning_rate": 4.867891133207493e-05, + "loss": 2.5732, + "step": 533500 + }, + { + "epoch": 2.65, + "learning_rate": 4.867767274564885e-05, + "loss": 2.5765, + "step": 534000 + }, + { + "epoch": 2.65, + "learning_rate": 4.8676434159222767e-05, + "loss": 2.5757, + "step": 534500 + }, + { + "epoch": 2.65, + "learning_rate": 4.8675195572796683e-05, + "loss": 2.5774, + "step": 535000 + }, + { + "epoch": 2.65, + "learning_rate": 4.8673956986370594e-05, + "loss": 2.554, + "step": 535500 + }, + { + "epoch": 2.66, + "learning_rate": 4.867272087711737e-05, + "loss": 2.5533, + "step": 536000 + }, + { + "epoch": 2.66, + "learning_rate": 4.8671482290691286e-05, + "loss": 2.5772, + "step": 536500 + }, + { + "epoch": 2.66, + "learning_rate": 4.86702437042652e-05, + "loss": 2.5513, + "step": 537000 + }, + { + "epoch": 2.66, + "learning_rate": 4.866900511783912e-05, + "loss": 2.5708, + "step": 537500 + }, + { + "epoch": 2.67, + "learning_rate": 4.866776653141304e-05, + "loss": 2.5684, + "step": 538000 + }, + { + "epoch": 2.67, + "learning_rate": 4.866652794498695e-05, + "loss": 2.5832, + "step": 538500 + }, + { + "epoch": 2.67, + "learning_rate": 4.8665291835733716e-05, + "loss": 2.5732, + "step": 539000 + }, + { + "epoch": 2.67, + "learning_rate": 4.866405324930763e-05, + "loss": 2.5757, + "step": 539500 + }, + { + "epoch": 2.68, + "learning_rate": 4.86628171400544e-05, + "loss": 2.5808, + "step": 540000 + }, + { + "epoch": 2.68, + "learning_rate": 4.866157855362832e-05, + "loss": 2.5977, + "step": 540500 + }, + { + "epoch": 2.68, + "learning_rate": 4.8660339967202236e-05, + "loss": 2.5599, + "step": 541000 + }, + { + "epoch": 2.68, + "learning_rate": 4.865910138077615e-05, + "loss": 2.5493, + "step": 541500 + }, + { + "epoch": 2.69, + "learning_rate": 4.865786279435007e-05, + "loss": 2.5864, + "step": 542000 + }, + { + "epoch": 2.69, + "learning_rate": 4.8656624207923986e-05, + "loss": 2.5611, + "step": 542500 + }, + { + "epoch": 2.69, + "learning_rate": 4.86553856214979e-05, + "loss": 2.5601, + "step": 543000 + }, + { + "epoch": 2.69, + "learning_rate": 4.865414703507182e-05, + "loss": 2.5715, + "step": 543500 + }, + { + "epoch": 2.7, + "learning_rate": 4.865291092581858e-05, + "loss": 2.5968, + "step": 544000 + }, + { + "epoch": 2.7, + "learning_rate": 4.86516723393925e-05, + "loss": 2.561, + "step": 544500 + }, + { + "epoch": 2.7, + "learning_rate": 4.8650433752966416e-05, + "loss": 2.593, + "step": 545000 + }, + { + "epoch": 2.7, + "learning_rate": 4.864919516654033e-05, + "loss": 2.5692, + "step": 545500 + }, + { + "epoch": 2.71, + "learning_rate": 4.864795658011425e-05, + "loss": 2.5501, + "step": 546000 + }, + { + "epoch": 2.71, + "learning_rate": 4.864671799368817e-05, + "loss": 2.5733, + "step": 546500 + }, + { + "epoch": 2.71, + "learning_rate": 4.8645481884434936e-05, + "loss": 2.5761, + "step": 547000 + }, + { + "epoch": 2.71, + "learning_rate": 4.864424329800885e-05, + "loss": 2.5604, + "step": 547500 + }, + { + "epoch": 2.71, + "learning_rate": 4.864300471158277e-05, + "loss": 2.5876, + "step": 548000 + }, + { + "epoch": 2.72, + "learning_rate": 4.8641766125156686e-05, + "loss": 2.5696, + "step": 548500 + }, + { + "epoch": 2.72, + "learning_rate": 4.86405275387306e-05, + "loss": 2.5717, + "step": 549000 + }, + { + "epoch": 2.72, + "learning_rate": 4.863928895230452e-05, + "loss": 2.5383, + "step": 549500 + }, + { + "epoch": 2.72, + "learning_rate": 4.863805284305128e-05, + "loss": 2.5773, + "step": 550000 + }, + { + "epoch": 2.73, + "learning_rate": 4.863681673379805e-05, + "loss": 2.5818, + "step": 550500 + }, + { + "epoch": 2.73, + "learning_rate": 4.863557814737197e-05, + "loss": 2.5148, + "step": 551000 + }, + { + "epoch": 2.73, + "learning_rate": 4.8634339560945885e-05, + "loss": 2.5935, + "step": 551500 + }, + { + "epoch": 2.73, + "learning_rate": 4.86331009745198e-05, + "loss": 2.5585, + "step": 552000 + }, + { + "epoch": 2.74, + "learning_rate": 4.863186238809372e-05, + "loss": 2.5496, + "step": 552500 + }, + { + "epoch": 2.74, + "learning_rate": 4.8630623801667636e-05, + "loss": 2.5555, + "step": 553000 + }, + { + "epoch": 2.74, + "learning_rate": 4.862938521524155e-05, + "loss": 2.547, + "step": 553500 + }, + { + "epoch": 2.74, + "learning_rate": 4.862814662881547e-05, + "loss": 2.567, + "step": 554000 + }, + { + "epoch": 2.75, + "learning_rate": 4.8626908042389387e-05, + "loss": 2.554, + "step": 554500 + }, + { + "epoch": 2.75, + "learning_rate": 4.8625669455963303e-05, + "loss": 2.5619, + "step": 555000 + }, + { + "epoch": 2.75, + "learning_rate": 4.862443582388292e-05, + "loss": 2.5697, + "step": 555500 + }, + { + "epoch": 2.75, + "learning_rate": 4.8623197237456834e-05, + "loss": 2.5523, + "step": 556000 + }, + { + "epoch": 2.76, + "learning_rate": 4.862195865103075e-05, + "loss": 2.568, + "step": 556500 + }, + { + "epoch": 2.76, + "learning_rate": 4.862072006460467e-05, + "loss": 2.5945, + "step": 557000 + }, + { + "epoch": 2.76, + "learning_rate": 4.861948643252429e-05, + "loss": 2.5638, + "step": 557500 + }, + { + "epoch": 2.76, + "learning_rate": 4.8618247846098206e-05, + "loss": 2.56, + "step": 558000 + }, + { + "epoch": 2.77, + "learning_rate": 4.861700925967212e-05, + "loss": 2.5682, + "step": 558500 + }, + { + "epoch": 2.77, + "learning_rate": 4.861577067324604e-05, + "loss": 2.5404, + "step": 559000 + }, + { + "epoch": 2.77, + "learning_rate": 4.8614532086819957e-05, + "loss": 2.5794, + "step": 559500 + }, + { + "epoch": 2.77, + "learning_rate": 4.8613293500393873e-05, + "loss": 2.5669, + "step": 560000 + }, + { + "epoch": 2.78, + "learning_rate": 4.861205491396779e-05, + "loss": 2.5423, + "step": 560500 + }, + { + "epoch": 2.78, + "learning_rate": 4.86108163275417e-05, + "loss": 2.5711, + "step": 561000 + }, + { + "epoch": 2.78, + "learning_rate": 4.860957774111562e-05, + "loss": 2.5841, + "step": 561500 + }, + { + "epoch": 2.78, + "learning_rate": 4.8608339154689534e-05, + "loss": 2.563, + "step": 562000 + }, + { + "epoch": 2.79, + "learning_rate": 4.860710056826345e-05, + "loss": 2.5802, + "step": 562500 + }, + { + "epoch": 2.79, + "learning_rate": 4.860586198183737e-05, + "loss": 2.5727, + "step": 563000 + }, + { + "epoch": 2.79, + "learning_rate": 4.8604625872584144e-05, + "loss": 2.5798, + "step": 563500 + }, + { + "epoch": 2.79, + "learning_rate": 4.860338728615806e-05, + "loss": 2.5647, + "step": 564000 + }, + { + "epoch": 2.8, + "learning_rate": 4.860214869973197e-05, + "loss": 2.5512, + "step": 564500 + }, + { + "epoch": 2.8, + "learning_rate": 4.860091011330589e-05, + "loss": 2.5654, + "step": 565000 + }, + { + "epoch": 2.8, + "learning_rate": 4.8599671526879805e-05, + "loss": 2.5424, + "step": 565500 + }, + { + "epoch": 2.8, + "learning_rate": 4.859843294045372e-05, + "loss": 2.554, + "step": 566000 + }, + { + "epoch": 2.81, + "learning_rate": 4.859719435402764e-05, + "loss": 2.5894, + "step": 566500 + }, + { + "epoch": 2.81, + "learning_rate": 4.8595955767601556e-05, + "loss": 2.551, + "step": 567000 + }, + { + "epoch": 2.81, + "learning_rate": 4.859471965834832e-05, + "loss": 2.5805, + "step": 567500 + }, + { + "epoch": 2.81, + "learning_rate": 4.8593481071922235e-05, + "loss": 2.5897, + "step": 568000 + }, + { + "epoch": 2.82, + "learning_rate": 4.859224248549615e-05, + "loss": 2.5625, + "step": 568500 + }, + { + "epoch": 2.82, + "learning_rate": 4.859100389907007e-05, + "loss": 2.5616, + "step": 569000 + }, + { + "epoch": 2.82, + "learning_rate": 4.8589765312643985e-05, + "loss": 2.5768, + "step": 569500 + }, + { + "epoch": 2.82, + "learning_rate": 4.858852920339076e-05, + "loss": 2.5554, + "step": 570000 + }, + { + "epoch": 2.83, + "learning_rate": 4.858729061696467e-05, + "loss": 2.5912, + "step": 570500 + }, + { + "epoch": 2.83, + "learning_rate": 4.858605203053859e-05, + "loss": 2.5492, + "step": 571000 + }, + { + "epoch": 2.83, + "learning_rate": 4.858481592128536e-05, + "loss": 2.5554, + "step": 571500 + }, + { + "epoch": 2.83, + "learning_rate": 4.8583577334859274e-05, + "loss": 2.5641, + "step": 572000 + }, + { + "epoch": 2.84, + "learning_rate": 4.858233874843319e-05, + "loss": 2.5709, + "step": 572500 + }, + { + "epoch": 2.84, + "learning_rate": 4.858110016200711e-05, + "loss": 2.5975, + "step": 573000 + }, + { + "epoch": 2.84, + "learning_rate": 4.857986157558102e-05, + "loss": 2.5723, + "step": 573500 + }, + { + "epoch": 2.84, + "learning_rate": 4.8578622989154935e-05, + "loss": 2.5557, + "step": 574000 + }, + { + "epoch": 2.85, + "learning_rate": 4.857738440272885e-05, + "loss": 2.538, + "step": 574500 + }, + { + "epoch": 2.85, + "learning_rate": 4.857614581630277e-05, + "loss": 2.5816, + "step": 575000 + }, + { + "epoch": 2.85, + "learning_rate": 4.8574907229876685e-05, + "loss": 2.5707, + "step": 575500 + }, + { + "epoch": 2.85, + "learning_rate": 4.85736686434506e-05, + "loss": 2.5592, + "step": 576000 + }, + { + "epoch": 2.86, + "learning_rate": 4.857243005702452e-05, + "loss": 2.5493, + "step": 576500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8571191470598436e-05, + "loss": 2.5495, + "step": 577000 + }, + { + "epoch": 2.86, + "learning_rate": 4.8569955361345205e-05, + "loss": 2.5573, + "step": 577500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8568719252091974e-05, + "loss": 2.5596, + "step": 578000 + }, + { + "epoch": 2.87, + "learning_rate": 4.856748066566589e-05, + "loss": 2.5427, + "step": 578500 + }, + { + "epoch": 2.87, + "learning_rate": 4.856624207923981e-05, + "loss": 2.5828, + "step": 579000 + }, + { + "epoch": 2.87, + "learning_rate": 4.8565003492813725e-05, + "loss": 2.5622, + "step": 579500 + }, + { + "epoch": 2.87, + "learning_rate": 4.8563764906387635e-05, + "loss": 2.5681, + "step": 580000 + }, + { + "epoch": 2.88, + "learning_rate": 4.856252631996155e-05, + "loss": 2.5767, + "step": 580500 + }, + { + "epoch": 2.88, + "learning_rate": 4.856128773353547e-05, + "loss": 2.5359, + "step": 581000 + }, + { + "epoch": 2.88, + "learning_rate": 4.8560049147109386e-05, + "loss": 2.5966, + "step": 581500 + }, + { + "epoch": 2.88, + "learning_rate": 4.85588105606833e-05, + "loss": 2.5783, + "step": 582000 + }, + { + "epoch": 2.89, + "learning_rate": 4.855757445143008e-05, + "loss": 2.563, + "step": 582500 + }, + { + "epoch": 2.89, + "learning_rate": 4.855633586500399e-05, + "loss": 2.5594, + "step": 583000 + }, + { + "epoch": 2.89, + "learning_rate": 4.855509975575076e-05, + "loss": 2.5744, + "step": 583500 + }, + { + "epoch": 2.89, + "learning_rate": 4.8553861169324674e-05, + "loss": 2.5556, + "step": 584000 + }, + { + "epoch": 2.9, + "learning_rate": 4.855262258289859e-05, + "loss": 2.5552, + "step": 584500 + }, + { + "epoch": 2.9, + "learning_rate": 4.855138399647251e-05, + "loss": 2.5402, + "step": 585000 + }, + { + "epoch": 2.9, + "learning_rate": 4.8550145410046425e-05, + "loss": 2.553, + "step": 585500 + }, + { + "epoch": 2.9, + "learning_rate": 4.8548906823620335e-05, + "loss": 2.5668, + "step": 586000 + }, + { + "epoch": 2.91, + "learning_rate": 4.854766823719425e-05, + "loss": 2.5855, + "step": 586500 + }, + { + "epoch": 2.91, + "learning_rate": 4.854643212794103e-05, + "loss": 2.5822, + "step": 587000 + }, + { + "epoch": 2.91, + "learning_rate": 4.8545193541514944e-05, + "loss": 2.5603, + "step": 587500 + }, + { + "epoch": 2.91, + "learning_rate": 4.854395495508886e-05, + "loss": 2.5564, + "step": 588000 + }, + { + "epoch": 2.92, + "learning_rate": 4.854271884583563e-05, + "loss": 2.5659, + "step": 588500 + }, + { + "epoch": 2.92, + "learning_rate": 4.854148025940955e-05, + "loss": 2.5704, + "step": 589000 + }, + { + "epoch": 2.92, + "learning_rate": 4.8540241672983464e-05, + "loss": 2.584, + "step": 589500 + }, + { + "epoch": 2.92, + "learning_rate": 4.8539003086557374e-05, + "loss": 2.5611, + "step": 590000 + }, + { + "epoch": 2.93, + "learning_rate": 4.853776697730414e-05, + "loss": 2.5538, + "step": 590500 + }, + { + "epoch": 2.93, + "learning_rate": 4.853652839087806e-05, + "loss": 2.5419, + "step": 591000 + }, + { + "epoch": 2.93, + "learning_rate": 4.853528980445198e-05, + "loss": 2.5637, + "step": 591500 + }, + { + "epoch": 2.93, + "learning_rate": 4.8534051218025894e-05, + "loss": 2.5852, + "step": 592000 + }, + { + "epoch": 2.94, + "learning_rate": 4.853281263159981e-05, + "loss": 2.5701, + "step": 592500 + }, + { + "epoch": 2.94, + "learning_rate": 4.853157404517373e-05, + "loss": 2.5697, + "step": 593000 + }, + { + "epoch": 2.94, + "learning_rate": 4.8530335458747644e-05, + "loss": 2.5454, + "step": 593500 + }, + { + "epoch": 2.94, + "learning_rate": 4.852909687232156e-05, + "loss": 2.5414, + "step": 594000 + }, + { + "epoch": 2.95, + "learning_rate": 4.852785828589548e-05, + "loss": 2.5856, + "step": 594500 + }, + { + "epoch": 2.95, + "learning_rate": 4.8526619699469395e-05, + "loss": 2.5922, + "step": 595000 + }, + { + "epoch": 2.95, + "learning_rate": 4.8525381113043305e-05, + "loss": 2.5762, + "step": 595500 + }, + { + "epoch": 2.95, + "learning_rate": 4.852414252661722e-05, + "loss": 2.5752, + "step": 596000 + }, + { + "epoch": 2.96, + "learning_rate": 4.852290394019114e-05, + "loss": 2.5849, + "step": 596500 + }, + { + "epoch": 2.96, + "learning_rate": 4.852166783093791e-05, + "loss": 2.5798, + "step": 597000 + }, + { + "epoch": 2.96, + "learning_rate": 4.8520429244511825e-05, + "loss": 2.5716, + "step": 597500 + }, + { + "epoch": 2.96, + "learning_rate": 4.8519193135258594e-05, + "loss": 2.559, + "step": 598000 + }, + { + "epoch": 2.97, + "learning_rate": 4.851795454883251e-05, + "loss": 2.5591, + "step": 598500 + }, + { + "epoch": 2.97, + "learning_rate": 4.851671596240643e-05, + "loss": 2.582, + "step": 599000 + }, + { + "epoch": 2.97, + "learning_rate": 4.8515477375980345e-05, + "loss": 2.581, + "step": 599500 + }, + { + "epoch": 2.97, + "learning_rate": 4.851423878955426e-05, + "loss": 2.5779, + "step": 600000 + }, + { + "epoch": 2.98, + "learning_rate": 4.851300268030103e-05, + "loss": 2.5746, + "step": 600500 + }, + { + "epoch": 2.98, + "learning_rate": 4.851176409387495e-05, + "loss": 2.5805, + "step": 601000 + }, + { + "epoch": 2.98, + "learning_rate": 4.8510525507448864e-05, + "loss": 2.5754, + "step": 601500 + }, + { + "epoch": 2.98, + "learning_rate": 4.850928692102278e-05, + "loss": 2.5709, + "step": 602000 + }, + { + "epoch": 2.98, + "learning_rate": 4.85080483345967e-05, + "loss": 2.5627, + "step": 602500 + }, + { + "epoch": 2.99, + "learning_rate": 4.8506809748170615e-05, + "loss": 2.5808, + "step": 603000 + }, + { + "epoch": 2.99, + "learning_rate": 4.8505571161744525e-05, + "loss": 2.5847, + "step": 603500 + }, + { + "epoch": 2.99, + "learning_rate": 4.850433257531844e-05, + "loss": 2.5929, + "step": 604000 + }, + { + "epoch": 2.99, + "learning_rate": 4.850309398889236e-05, + "loss": 2.5613, + "step": 604500 + }, + { + "epoch": 3.0, + "learning_rate": 4.850185787963913e-05, + "loss": 2.5579, + "step": 605000 + }, + { + "epoch": 3.0, + "learning_rate": 4.8500619293213045e-05, + "loss": 2.5798, + "step": 605500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.6309935793243545, + "eval_accuracy_mlm": 0.583263169880887, + "eval_accuracy_nsp": 0.856231786287207, + "eval_loss": 2.503361225128174, + "eval_runtime": 145.9652, + "eval_samples_per_second": 1746.711, + "eval_steps_per_second": 72.784, + "step": 605529 + }, + { + "epoch": 3.0, + "learning_rate": 4.8499383183959814e-05, + "loss": 2.5333, + "step": 606000 + }, + { + "epoch": 3.0, + "learning_rate": 4.849814459753373e-05, + "loss": 2.5139, + "step": 606500 + }, + { + "epoch": 3.01, + "learning_rate": 4.849690601110765e-05, + "loss": 2.5247, + "step": 607000 + }, + { + "epoch": 3.01, + "learning_rate": 4.849566990185441e-05, + "loss": 2.5196, + "step": 607500 + }, + { + "epoch": 3.01, + "learning_rate": 4.8494431315428326e-05, + "loss": 2.5171, + "step": 608000 + }, + { + "epoch": 3.01, + "learning_rate": 4.849319272900224e-05, + "loss": 2.5036, + "step": 608500 + }, + { + "epoch": 3.02, + "learning_rate": 4.849195414257616e-05, + "loss": 2.5135, + "step": 609000 + }, + { + "epoch": 3.02, + "learning_rate": 4.849071555615008e-05, + "loss": 2.5322, + "step": 609500 + }, + { + "epoch": 3.02, + "learning_rate": 4.8489476969723994e-05, + "loss": 2.5542, + "step": 610000 + }, + { + "epoch": 3.02, + "learning_rate": 4.848823838329791e-05, + "loss": 2.5499, + "step": 610500 + }, + { + "epoch": 3.03, + "learning_rate": 4.848699979687183e-05, + "loss": 2.5478, + "step": 611000 + }, + { + "epoch": 3.03, + "learning_rate": 4.84857636876186e-05, + "loss": 2.5333, + "step": 611500 + }, + { + "epoch": 3.03, + "learning_rate": 4.8484525101192514e-05, + "loss": 2.552, + "step": 612000 + }, + { + "epoch": 3.03, + "learning_rate": 4.848328651476643e-05, + "loss": 2.5408, + "step": 612500 + }, + { + "epoch": 3.04, + "learning_rate": 4.848204792834035e-05, + "loss": 2.5539, + "step": 613000 + }, + { + "epoch": 3.04, + "learning_rate": 4.8480809341914264e-05, + "loss": 2.5355, + "step": 613500 + }, + { + "epoch": 3.04, + "learning_rate": 4.847957075548818e-05, + "loss": 2.5418, + "step": 614000 + }, + { + "epoch": 3.04, + "learning_rate": 4.84783321690621e-05, + "loss": 2.5373, + "step": 614500 + }, + { + "epoch": 3.05, + "learning_rate": 4.847709605980886e-05, + "loss": 2.539, + "step": 615000 + }, + { + "epoch": 3.05, + "learning_rate": 4.847585747338278e-05, + "loss": 2.5402, + "step": 615500 + }, + { + "epoch": 3.05, + "learning_rate": 4.8474618886956694e-05, + "loss": 2.5299, + "step": 616000 + }, + { + "epoch": 3.05, + "learning_rate": 4.847338030053061e-05, + "loss": 2.5361, + "step": 616500 + }, + { + "epoch": 3.06, + "learning_rate": 4.847214171410453e-05, + "loss": 2.5464, + "step": 617000 + }, + { + "epoch": 3.06, + "learning_rate": 4.8470903127678445e-05, + "loss": 2.5522, + "step": 617500 + }, + { + "epoch": 3.06, + "learning_rate": 4.846966454125236e-05, + "loss": 2.5553, + "step": 618000 + }, + { + "epoch": 3.06, + "learning_rate": 4.846842595482628e-05, + "loss": 2.5591, + "step": 618500 + }, + { + "epoch": 3.07, + "learning_rate": 4.8467187368400196e-05, + "loss": 2.54, + "step": 619000 + }, + { + "epoch": 3.07, + "learning_rate": 4.846594878197411e-05, + "loss": 2.5405, + "step": 619500 + }, + { + "epoch": 3.07, + "learning_rate": 4.846471267272088e-05, + "loss": 2.5232, + "step": 620000 + }, + { + "epoch": 3.07, + "learning_rate": 4.84634740862948e-05, + "loss": 2.5347, + "step": 620500 + }, + { + "epoch": 3.08, + "learning_rate": 4.8462235499868715e-05, + "loss": 2.528, + "step": 621000 + }, + { + "epoch": 3.08, + "learning_rate": 4.846099939061548e-05, + "loss": 2.5322, + "step": 621500 + }, + { + "epoch": 3.08, + "learning_rate": 4.8459760804189394e-05, + "loss": 2.5317, + "step": 622000 + }, + { + "epoch": 3.08, + "learning_rate": 4.845852221776331e-05, + "loss": 2.5383, + "step": 622500 + }, + { + "epoch": 3.09, + "learning_rate": 4.845728363133723e-05, + "loss": 2.5493, + "step": 623000 + }, + { + "epoch": 3.09, + "learning_rate": 4.8456045044911145e-05, + "loss": 2.538, + "step": 623500 + }, + { + "epoch": 3.09, + "learning_rate": 4.845480645848506e-05, + "loss": 2.5492, + "step": 624000 + }, + { + "epoch": 3.09, + "learning_rate": 4.845356787205898e-05, + "loss": 2.5289, + "step": 624500 + }, + { + "epoch": 3.1, + "learning_rate": 4.8452329285632896e-05, + "loss": 2.566, + "step": 625000 + }, + { + "epoch": 3.1, + "learning_rate": 4.8451093176379665e-05, + "loss": 2.5407, + "step": 625500 + }, + { + "epoch": 3.1, + "learning_rate": 4.844985458995358e-05, + "loss": 2.5053, + "step": 626000 + }, + { + "epoch": 3.1, + "learning_rate": 4.84486160035275e-05, + "loss": 2.5606, + "step": 626500 + }, + { + "epoch": 3.11, + "learning_rate": 4.844737989427427e-05, + "loss": 2.5307, + "step": 627000 + }, + { + "epoch": 3.11, + "learning_rate": 4.844614130784818e-05, + "loss": 2.5339, + "step": 627500 + }, + { + "epoch": 3.11, + "learning_rate": 4.8444902721422094e-05, + "loss": 2.5584, + "step": 628000 + }, + { + "epoch": 3.11, + "learning_rate": 4.844366413499601e-05, + "loss": 2.5237, + "step": 628500 + }, + { + "epoch": 3.12, + "learning_rate": 4.844242554856993e-05, + "loss": 2.5553, + "step": 629000 + }, + { + "epoch": 3.12, + "learning_rate": 4.84411894393167e-05, + "loss": 2.5506, + "step": 629500 + }, + { + "epoch": 3.12, + "learning_rate": 4.8439950852890614e-05, + "loss": 2.5469, + "step": 630000 + }, + { + "epoch": 3.12, + "learning_rate": 4.843871226646453e-05, + "loss": 2.551, + "step": 630500 + }, + { + "epoch": 3.13, + "learning_rate": 4.843747368003845e-05, + "loss": 2.5408, + "step": 631000 + }, + { + "epoch": 3.13, + "learning_rate": 4.8436235093612365e-05, + "loss": 2.5641, + "step": 631500 + }, + { + "epoch": 3.13, + "learning_rate": 4.843499650718628e-05, + "loss": 2.5317, + "step": 632000 + }, + { + "epoch": 3.13, + "learning_rate": 4.84337579207602e-05, + "loss": 2.528, + "step": 632500 + }, + { + "epoch": 3.14, + "learning_rate": 4.8432519334334116e-05, + "loss": 2.5114, + "step": 633000 + }, + { + "epoch": 3.14, + "learning_rate": 4.843128074790803e-05, + "loss": 2.5307, + "step": 633500 + }, + { + "epoch": 3.14, + "learning_rate": 4.843004216148195e-05, + "loss": 2.5363, + "step": 634000 + }, + { + "epoch": 3.14, + "learning_rate": 4.8428803575055866e-05, + "loss": 2.5329, + "step": 634500 + }, + { + "epoch": 3.15, + "learning_rate": 4.842756498862978e-05, + "loss": 2.5189, + "step": 635000 + }, + { + "epoch": 3.15, + "learning_rate": 4.8426328879376545e-05, + "loss": 2.5262, + "step": 635500 + }, + { + "epoch": 3.15, + "learning_rate": 4.842509029295046e-05, + "loss": 2.5593, + "step": 636000 + }, + { + "epoch": 3.15, + "learning_rate": 4.842385418369723e-05, + "loss": 2.5337, + "step": 636500 + }, + { + "epoch": 3.16, + "learning_rate": 4.842261559727115e-05, + "loss": 2.5343, + "step": 637000 + }, + { + "epoch": 3.16, + "learning_rate": 4.8421377010845065e-05, + "loss": 2.5407, + "step": 637500 + }, + { + "epoch": 3.16, + "learning_rate": 4.842013842441898e-05, + "loss": 2.5609, + "step": 638000 + }, + { + "epoch": 3.16, + "learning_rate": 4.84188998379929e-05, + "loss": 2.5219, + "step": 638500 + }, + { + "epoch": 3.17, + "learning_rate": 4.8417661251566816e-05, + "loss": 2.5684, + "step": 639000 + }, + { + "epoch": 3.17, + "learning_rate": 4.841642266514073e-05, + "loss": 2.5422, + "step": 639500 + }, + { + "epoch": 3.17, + "learning_rate": 4.841518407871465e-05, + "loss": 2.5736, + "step": 640000 + }, + { + "epoch": 3.17, + "learning_rate": 4.841394796946142e-05, + "loss": 2.5538, + "step": 640500 + }, + { + "epoch": 3.18, + "learning_rate": 4.841270938303533e-05, + "loss": 2.5404, + "step": 641000 + }, + { + "epoch": 3.18, + "learning_rate": 4.8411470796609245e-05, + "loss": 2.5472, + "step": 641500 + }, + { + "epoch": 3.18, + "learning_rate": 4.841023221018316e-05, + "loss": 2.5434, + "step": 642000 + }, + { + "epoch": 3.18, + "learning_rate": 4.840899610092993e-05, + "loss": 2.5179, + "step": 642500 + }, + { + "epoch": 3.19, + "learning_rate": 4.840775751450385e-05, + "loss": 2.5491, + "step": 643000 + }, + { + "epoch": 3.19, + "learning_rate": 4.8406518928077765e-05, + "loss": 2.5372, + "step": 643500 + }, + { + "epoch": 3.19, + "learning_rate": 4.840528034165168e-05, + "loss": 2.544, + "step": 644000 + }, + { + "epoch": 3.19, + "learning_rate": 4.84040417552256e-05, + "loss": 2.5429, + "step": 644500 + }, + { + "epoch": 3.2, + "learning_rate": 4.8402803168799516e-05, + "loss": 2.5307, + "step": 645000 + }, + { + "epoch": 3.2, + "learning_rate": 4.840156458237343e-05, + "loss": 2.553, + "step": 645500 + }, + { + "epoch": 3.2, + "learning_rate": 4.84003284731202e-05, + "loss": 2.5597, + "step": 646000 + }, + { + "epoch": 3.2, + "learning_rate": 4.839908988669412e-05, + "loss": 2.5398, + "step": 646500 + }, + { + "epoch": 3.21, + "learning_rate": 4.8397851300268035e-05, + "loss": 2.5258, + "step": 647000 + }, + { + "epoch": 3.21, + "learning_rate": 4.8396612713841946e-05, + "loss": 2.525, + "step": 647500 + }, + { + "epoch": 3.21, + "learning_rate": 4.839537412741586e-05, + "loss": 2.5487, + "step": 648000 + }, + { + "epoch": 3.21, + "learning_rate": 4.839413801816263e-05, + "loss": 2.5557, + "step": 648500 + }, + { + "epoch": 3.22, + "learning_rate": 4.839289943173655e-05, + "loss": 2.5434, + "step": 649000 + }, + { + "epoch": 3.22, + "learning_rate": 4.8391660845310465e-05, + "loss": 2.5495, + "step": 649500 + }, + { + "epoch": 3.22, + "learning_rate": 4.839042225888438e-05, + "loss": 2.5504, + "step": 650000 + }, + { + "epoch": 3.22, + "learning_rate": 4.838918614963115e-05, + "loss": 2.5101, + "step": 650500 + }, + { + "epoch": 3.23, + "learning_rate": 4.838794756320507e-05, + "loss": 2.5414, + "step": 651000 + }, + { + "epoch": 3.23, + "learning_rate": 4.8386708976778985e-05, + "loss": 2.5399, + "step": 651500 + }, + { + "epoch": 3.23, + "learning_rate": 4.83854703903529e-05, + "loss": 2.5554, + "step": 652000 + }, + { + "epoch": 3.23, + "learning_rate": 4.8384234281099664e-05, + "loss": 2.5515, + "step": 652500 + }, + { + "epoch": 3.24, + "learning_rate": 4.838299569467358e-05, + "loss": 2.5307, + "step": 653000 + }, + { + "epoch": 3.24, + "learning_rate": 4.83817571082475e-05, + "loss": 2.539, + "step": 653500 + }, + { + "epoch": 3.24, + "learning_rate": 4.8380518521821415e-05, + "loss": 2.5284, + "step": 654000 + }, + { + "epoch": 3.24, + "learning_rate": 4.837928241256819e-05, + "loss": 2.5652, + "step": 654500 + }, + { + "epoch": 3.25, + "learning_rate": 4.837804382614211e-05, + "loss": 2.5605, + "step": 655000 + }, + { + "epoch": 3.25, + "learning_rate": 4.837680523971602e-05, + "loss": 2.5348, + "step": 655500 + }, + { + "epoch": 3.25, + "learning_rate": 4.8375569130462786e-05, + "loss": 2.5298, + "step": 656000 + }, + { + "epoch": 3.25, + "learning_rate": 4.83743305440367e-05, + "loss": 2.5437, + "step": 656500 + }, + { + "epoch": 3.26, + "learning_rate": 4.837309195761062e-05, + "loss": 2.5365, + "step": 657000 + }, + { + "epoch": 3.26, + "learning_rate": 4.837185337118454e-05, + "loss": 2.5457, + "step": 657500 + }, + { + "epoch": 3.26, + "learning_rate": 4.8370614784758454e-05, + "loss": 2.5365, + "step": 658000 + }, + { + "epoch": 3.26, + "learning_rate": 4.836937619833237e-05, + "loss": 2.5297, + "step": 658500 + }, + { + "epoch": 3.26, + "learning_rate": 4.836813761190628e-05, + "loss": 2.5214, + "step": 659000 + }, + { + "epoch": 3.27, + "learning_rate": 4.83668990254802e-05, + "loss": 2.5343, + "step": 659500 + }, + { + "epoch": 3.27, + "learning_rate": 4.8365660439054115e-05, + "loss": 2.535, + "step": 660000 + }, + { + "epoch": 3.27, + "learning_rate": 4.836442432980089e-05, + "loss": 2.5523, + "step": 660500 + }, + { + "epoch": 3.27, + "learning_rate": 4.836318574337481e-05, + "loss": 2.5577, + "step": 661000 + }, + { + "epoch": 3.28, + "learning_rate": 4.8361947156948724e-05, + "loss": 2.5796, + "step": 661500 + }, + { + "epoch": 3.28, + "learning_rate": 4.8360708570522634e-05, + "loss": 2.5263, + "step": 662000 + }, + { + "epoch": 3.28, + "learning_rate": 4.835946998409655e-05, + "loss": 2.535, + "step": 662500 + }, + { + "epoch": 3.28, + "learning_rate": 4.835823139767047e-05, + "loss": 2.5555, + "step": 663000 + }, + { + "epoch": 3.29, + "learning_rate": 4.835699528841724e-05, + "loss": 2.5393, + "step": 663500 + }, + { + "epoch": 3.29, + "learning_rate": 4.8355756701991154e-05, + "loss": 2.5416, + "step": 664000 + }, + { + "epoch": 3.29, + "learning_rate": 4.835451811556507e-05, + "loss": 2.5387, + "step": 664500 + }, + { + "epoch": 3.29, + "learning_rate": 4.835327952913898e-05, + "loss": 2.5604, + "step": 665000 + }, + { + "epoch": 3.3, + "learning_rate": 4.83520409427129e-05, + "loss": 2.565, + "step": 665500 + }, + { + "epoch": 3.3, + "learning_rate": 4.8350802356286815e-05, + "loss": 2.5364, + "step": 666000 + }, + { + "epoch": 3.3, + "learning_rate": 4.834956376986073e-05, + "loss": 2.5461, + "step": 666500 + }, + { + "epoch": 3.3, + "learning_rate": 4.834832518343465e-05, + "loss": 2.5692, + "step": 667000 + }, + { + "epoch": 3.31, + "learning_rate": 4.8347089074181424e-05, + "loss": 2.5424, + "step": 667500 + }, + { + "epoch": 3.31, + "learning_rate": 4.834585296492819e-05, + "loss": 2.557, + "step": 668000 + }, + { + "epoch": 3.31, + "learning_rate": 4.834461437850211e-05, + "loss": 2.5454, + "step": 668500 + }, + { + "epoch": 3.31, + "learning_rate": 4.834337579207602e-05, + "loss": 2.5403, + "step": 669000 + }, + { + "epoch": 3.32, + "learning_rate": 4.834213720564994e-05, + "loss": 2.5301, + "step": 669500 + }, + { + "epoch": 3.32, + "learning_rate": 4.8340898619223854e-05, + "loss": 2.5211, + "step": 670000 + }, + { + "epoch": 3.32, + "learning_rate": 4.833966003279777e-05, + "loss": 2.5401, + "step": 670500 + }, + { + "epoch": 3.32, + "learning_rate": 4.833842144637169e-05, + "loss": 2.541, + "step": 671000 + }, + { + "epoch": 3.33, + "learning_rate": 4.833718533711846e-05, + "loss": 2.5565, + "step": 671500 + }, + { + "epoch": 3.33, + "learning_rate": 4.8335946750692374e-05, + "loss": 2.5404, + "step": 672000 + }, + { + "epoch": 3.33, + "learning_rate": 4.833470816426629e-05, + "loss": 2.567, + "step": 672500 + }, + { + "epoch": 3.33, + "learning_rate": 4.833346957784021e-05, + "loss": 2.5351, + "step": 673000 + }, + { + "epoch": 3.34, + "learning_rate": 4.8332230991414124e-05, + "loss": 2.5467, + "step": 673500 + }, + { + "epoch": 3.34, + "learning_rate": 4.833099240498804e-05, + "loss": 2.5485, + "step": 674000 + }, + { + "epoch": 3.34, + "learning_rate": 4.832975381856195e-05, + "loss": 2.5353, + "step": 674500 + }, + { + "epoch": 3.34, + "learning_rate": 4.832851523213587e-05, + "loss": 2.5495, + "step": 675000 + }, + { + "epoch": 3.35, + "learning_rate": 4.832728160005549e-05, + "loss": 2.5532, + "step": 675500 + }, + { + "epoch": 3.35, + "learning_rate": 4.8326043013629406e-05, + "loss": 2.5324, + "step": 676000 + }, + { + "epoch": 3.35, + "learning_rate": 4.832480442720332e-05, + "loss": 2.538, + "step": 676500 + }, + { + "epoch": 3.35, + "learning_rate": 4.832356584077724e-05, + "loss": 2.5226, + "step": 677000 + }, + { + "epoch": 3.36, + "learning_rate": 4.832232973152401e-05, + "loss": 2.5422, + "step": 677500 + }, + { + "epoch": 3.36, + "learning_rate": 4.8321091145097926e-05, + "loss": 2.5271, + "step": 678000 + }, + { + "epoch": 3.36, + "learning_rate": 4.831985255867184e-05, + "loss": 2.5118, + "step": 678500 + }, + { + "epoch": 3.36, + "learning_rate": 4.831861397224576e-05, + "loss": 2.5408, + "step": 679000 + }, + { + "epoch": 3.37, + "learning_rate": 4.831737786299252e-05, + "loss": 2.5397, + "step": 679500 + }, + { + "epoch": 3.37, + "learning_rate": 4.831613927656644e-05, + "loss": 2.5553, + "step": 680000 + }, + { + "epoch": 3.37, + "learning_rate": 4.8314900690140355e-05, + "loss": 2.5429, + "step": 680500 + }, + { + "epoch": 3.37, + "learning_rate": 4.831366210371427e-05, + "loss": 2.5535, + "step": 681000 + }, + { + "epoch": 3.38, + "learning_rate": 4.831242351728819e-05, + "loss": 2.5647, + "step": 681500 + }, + { + "epoch": 3.38, + "learning_rate": 4.8311184930862106e-05, + "loss": 2.5757, + "step": 682000 + }, + { + "epoch": 3.38, + "learning_rate": 4.830994634443602e-05, + "loss": 2.5574, + "step": 682500 + }, + { + "epoch": 3.38, + "learning_rate": 4.830870775800994e-05, + "loss": 2.53, + "step": 683000 + }, + { + "epoch": 3.39, + "learning_rate": 4.830746917158386e-05, + "loss": 2.5279, + "step": 683500 + }, + { + "epoch": 3.39, + "learning_rate": 4.8306233062330626e-05, + "loss": 2.5323, + "step": 684000 + }, + { + "epoch": 3.39, + "learning_rate": 4.830499447590454e-05, + "loss": 2.5433, + "step": 684500 + }, + { + "epoch": 3.39, + "learning_rate": 4.830375588947846e-05, + "loss": 2.5779, + "step": 685000 + }, + { + "epoch": 3.4, + "learning_rate": 4.8302517303052376e-05, + "loss": 2.5304, + "step": 685500 + }, + { + "epoch": 3.4, + "learning_rate": 4.830128119379914e-05, + "loss": 2.5579, + "step": 686000 + }, + { + "epoch": 3.4, + "learning_rate": 4.8300045084545914e-05, + "loss": 2.5429, + "step": 686500 + }, + { + "epoch": 3.4, + "learning_rate": 4.829880649811983e-05, + "loss": 2.5222, + "step": 687000 + }, + { + "epoch": 3.41, + "learning_rate": 4.829756791169374e-05, + "loss": 2.5562, + "step": 687500 + }, + { + "epoch": 3.41, + "learning_rate": 4.829632932526766e-05, + "loss": 2.579, + "step": 688000 + }, + { + "epoch": 3.41, + "learning_rate": 4.8295090738841575e-05, + "loss": 2.5623, + "step": 688500 + }, + { + "epoch": 3.41, + "learning_rate": 4.829385215241549e-05, + "loss": 2.543, + "step": 689000 + }, + { + "epoch": 3.42, + "learning_rate": 4.829261356598941e-05, + "loss": 2.5659, + "step": 689500 + }, + { + "epoch": 3.42, + "learning_rate": 4.8291374979563326e-05, + "loss": 2.5464, + "step": 690000 + }, + { + "epoch": 3.42, + "learning_rate": 4.829013639313724e-05, + "loss": 2.529, + "step": 690500 + }, + { + "epoch": 3.42, + "learning_rate": 4.828889780671116e-05, + "loss": 2.5534, + "step": 691000 + }, + { + "epoch": 3.43, + "learning_rate": 4.828765922028508e-05, + "loss": 2.5394, + "step": 691500 + }, + { + "epoch": 3.43, + "learning_rate": 4.8286420633858994e-05, + "loss": 2.5359, + "step": 692000 + }, + { + "epoch": 3.43, + "learning_rate": 4.8285184524605756e-05, + "loss": 2.5425, + "step": 692500 + }, + { + "epoch": 3.43, + "learning_rate": 4.828394593817967e-05, + "loss": 2.5602, + "step": 693000 + }, + { + "epoch": 3.44, + "learning_rate": 4.828270735175359e-05, + "loss": 2.5164, + "step": 693500 + }, + { + "epoch": 3.44, + "learning_rate": 4.8281468765327506e-05, + "loss": 2.5582, + "step": 694000 + }, + { + "epoch": 3.44, + "learning_rate": 4.8280232656074275e-05, + "loss": 2.5522, + "step": 694500 + }, + { + "epoch": 3.44, + "learning_rate": 4.827899406964819e-05, + "loss": 2.5528, + "step": 695000 + }, + { + "epoch": 3.45, + "learning_rate": 4.827775548322211e-05, + "loss": 2.5459, + "step": 695500 + }, + { + "epoch": 3.45, + "learning_rate": 4.8276516896796026e-05, + "loss": 2.5493, + "step": 696000 + }, + { + "epoch": 3.45, + "learning_rate": 4.827527831036994e-05, + "loss": 2.5516, + "step": 696500 + }, + { + "epoch": 3.45, + "learning_rate": 4.827403972394386e-05, + "loss": 2.5535, + "step": 697000 + }, + { + "epoch": 3.46, + "learning_rate": 4.827280113751778e-05, + "loss": 2.5477, + "step": 697500 + }, + { + "epoch": 3.46, + "learning_rate": 4.8271562551091694e-05, + "loss": 2.5291, + "step": 698000 + }, + { + "epoch": 3.46, + "learning_rate": 4.827032396466561e-05, + "loss": 2.5557, + "step": 698500 + }, + { + "epoch": 3.46, + "learning_rate": 4.826908537823953e-05, + "loss": 2.5559, + "step": 699000 + }, + { + "epoch": 3.47, + "learning_rate": 4.8267846791813444e-05, + "loss": 2.5445, + "step": 699500 + }, + { + "epoch": 3.47, + "learning_rate": 4.8266610682560206e-05, + "loss": 2.5345, + "step": 700000 + }, + { + "epoch": 3.47, + "learning_rate": 4.8265374573306975e-05, + "loss": 2.563, + "step": 700500 + }, + { + "epoch": 3.47, + "learning_rate": 4.826413598688089e-05, + "loss": 2.5561, + "step": 701000 + }, + { + "epoch": 3.48, + "learning_rate": 4.826289740045481e-05, + "loss": 2.5687, + "step": 701500 + }, + { + "epoch": 3.48, + "learning_rate": 4.8261658814028726e-05, + "loss": 2.544, + "step": 702000 + }, + { + "epoch": 3.48, + "learning_rate": 4.826042022760264e-05, + "loss": 2.5362, + "step": 702500 + }, + { + "epoch": 3.48, + "learning_rate": 4.825918164117656e-05, + "loss": 2.5309, + "step": 703000 + }, + { + "epoch": 3.49, + "learning_rate": 4.825794305475048e-05, + "loss": 2.5421, + "step": 703500 + }, + { + "epoch": 3.49, + "learning_rate": 4.8256704468324394e-05, + "loss": 2.5295, + "step": 704000 + }, + { + "epoch": 3.49, + "learning_rate": 4.825546588189831e-05, + "loss": 2.5354, + "step": 704500 + }, + { + "epoch": 3.49, + "learning_rate": 4.825422977264508e-05, + "loss": 2.5441, + "step": 705000 + }, + { + "epoch": 3.5, + "learning_rate": 4.8252991186218996e-05, + "loss": 2.5388, + "step": 705500 + }, + { + "epoch": 3.5, + "learning_rate": 4.8251752599792907e-05, + "loss": 2.5489, + "step": 706000 + }, + { + "epoch": 3.5, + "learning_rate": 4.8250514013366824e-05, + "loss": 2.5435, + "step": 706500 + }, + { + "epoch": 3.5, + "learning_rate": 4.824927790411359e-05, + "loss": 2.5567, + "step": 707000 + }, + { + "epoch": 3.51, + "learning_rate": 4.824803931768751e-05, + "loss": 2.5394, + "step": 707500 + }, + { + "epoch": 3.51, + "learning_rate": 4.8246800731261426e-05, + "loss": 2.5111, + "step": 708000 + }, + { + "epoch": 3.51, + "learning_rate": 4.824556214483534e-05, + "loss": 2.5513, + "step": 708500 + }, + { + "epoch": 3.51, + "learning_rate": 4.824432355840926e-05, + "loss": 2.5387, + "step": 709000 + }, + { + "epoch": 3.52, + "learning_rate": 4.824308497198318e-05, + "loss": 2.5602, + "step": 709500 + }, + { + "epoch": 3.52, + "learning_rate": 4.8241846385557094e-05, + "loss": 2.549, + "step": 710000 + }, + { + "epoch": 3.52, + "learning_rate": 4.824060779913101e-05, + "loss": 2.5252, + "step": 710500 + }, + { + "epoch": 3.52, + "learning_rate": 4.823937416705063e-05, + "loss": 2.5328, + "step": 711000 + }, + { + "epoch": 3.53, + "learning_rate": 4.82381380577974e-05, + "loss": 2.522, + "step": 711500 + }, + { + "epoch": 3.53, + "learning_rate": 4.823689947137132e-05, + "loss": 2.5387, + "step": 712000 + }, + { + "epoch": 3.53, + "learning_rate": 4.8235660884945234e-05, + "loss": 2.5568, + "step": 712500 + }, + { + "epoch": 3.53, + "learning_rate": 4.823442229851915e-05, + "loss": 2.5318, + "step": 713000 + }, + { + "epoch": 3.53, + "learning_rate": 4.823318371209307e-05, + "loss": 2.5472, + "step": 713500 + }, + { + "epoch": 3.54, + "learning_rate": 4.8231945125666985e-05, + "loss": 2.5452, + "step": 714000 + }, + { + "epoch": 3.54, + "learning_rate": 4.823070901641375e-05, + "loss": 2.5537, + "step": 714500 + }, + { + "epoch": 3.54, + "learning_rate": 4.8229470429987664e-05, + "loss": 2.5685, + "step": 715000 + }, + { + "epoch": 3.54, + "learning_rate": 4.822823184356158e-05, + "loss": 2.5505, + "step": 715500 + }, + { + "epoch": 3.55, + "learning_rate": 4.82269932571355e-05, + "loss": 2.5497, + "step": 716000 + }, + { + "epoch": 3.55, + "learning_rate": 4.8225754670709415e-05, + "loss": 2.529, + "step": 716500 + }, + { + "epoch": 3.55, + "learning_rate": 4.822451608428333e-05, + "loss": 2.5528, + "step": 717000 + }, + { + "epoch": 3.55, + "learning_rate": 4.822327749785725e-05, + "loss": 2.5036, + "step": 717500 + }, + { + "epoch": 3.56, + "learning_rate": 4.8222038911431166e-05, + "loss": 2.5104, + "step": 718000 + }, + { + "epoch": 3.56, + "learning_rate": 4.822080032500508e-05, + "loss": 2.5557, + "step": 718500 + }, + { + "epoch": 3.56, + "learning_rate": 4.821956173857899e-05, + "loss": 2.5172, + "step": 719000 + }, + { + "epoch": 3.56, + "learning_rate": 4.821832562932577e-05, + "loss": 2.5314, + "step": 719500 + }, + { + "epoch": 3.57, + "learning_rate": 4.821708952007254e-05, + "loss": 2.5631, + "step": 720000 + }, + { + "epoch": 3.57, + "learning_rate": 4.821585093364645e-05, + "loss": 2.5495, + "step": 720500 + }, + { + "epoch": 3.57, + "learning_rate": 4.8214612347220364e-05, + "loss": 2.5225, + "step": 721000 + }, + { + "epoch": 3.57, + "learning_rate": 4.821337376079428e-05, + "loss": 2.5358, + "step": 721500 + }, + { + "epoch": 3.58, + "learning_rate": 4.82121351743682e-05, + "loss": 2.5573, + "step": 722000 + }, + { + "epoch": 3.58, + "learning_rate": 4.8210896587942115e-05, + "loss": 2.5512, + "step": 722500 + }, + { + "epoch": 3.58, + "learning_rate": 4.820965800151603e-05, + "loss": 2.5373, + "step": 723000 + }, + { + "epoch": 3.58, + "learning_rate": 4.820841941508995e-05, + "loss": 2.5567, + "step": 723500 + }, + { + "epoch": 3.59, + "learning_rate": 4.8207180828663866e-05, + "loss": 2.5367, + "step": 724000 + }, + { + "epoch": 3.59, + "learning_rate": 4.820594224223778e-05, + "loss": 2.558, + "step": 724500 + }, + { + "epoch": 3.59, + "learning_rate": 4.820470613298455e-05, + "loss": 2.5263, + "step": 725000 + }, + { + "epoch": 3.59, + "learning_rate": 4.820346754655847e-05, + "loss": 2.5248, + "step": 725500 + }, + { + "epoch": 3.6, + "learning_rate": 4.8202228960132385e-05, + "loss": 2.5235, + "step": 726000 + }, + { + "epoch": 3.6, + "learning_rate": 4.82009903737063e-05, + "loss": 2.5575, + "step": 726500 + }, + { + "epoch": 3.6, + "learning_rate": 4.819975178728022e-05, + "loss": 2.5402, + "step": 727000 + }, + { + "epoch": 3.6, + "learning_rate": 4.8198513200854136e-05, + "loss": 2.5331, + "step": 727500 + }, + { + "epoch": 3.61, + "learning_rate": 4.8197274614428046e-05, + "loss": 2.5494, + "step": 728000 + }, + { + "epoch": 3.61, + "learning_rate": 4.819603602800196e-05, + "loss": 2.5456, + "step": 728500 + }, + { + "epoch": 3.61, + "learning_rate": 4.819479991874873e-05, + "loss": 2.5465, + "step": 729000 + }, + { + "epoch": 3.61, + "learning_rate": 4.819356133232265e-05, + "loss": 2.5497, + "step": 729500 + }, + { + "epoch": 3.62, + "learning_rate": 4.8192322745896566e-05, + "loss": 2.5535, + "step": 730000 + }, + { + "epoch": 3.62, + "learning_rate": 4.819108415947048e-05, + "loss": 2.5379, + "step": 730500 + }, + { + "epoch": 3.62, + "learning_rate": 4.81898455730444e-05, + "loss": 2.5503, + "step": 731000 + }, + { + "epoch": 3.62, + "learning_rate": 4.818860698661831e-05, + "loss": 2.5508, + "step": 731500 + }, + { + "epoch": 3.63, + "learning_rate": 4.818736840019223e-05, + "loss": 2.5412, + "step": 732000 + }, + { + "epoch": 3.63, + "learning_rate": 4.8186129813766144e-05, + "loss": 2.5646, + "step": 732500 + }, + { + "epoch": 3.63, + "learning_rate": 4.818489370451292e-05, + "loss": 2.554, + "step": 733000 + }, + { + "epoch": 3.63, + "learning_rate": 4.8183655118086836e-05, + "loss": 2.5507, + "step": 733500 + }, + { + "epoch": 3.64, + "learning_rate": 4.818241653166075e-05, + "loss": 2.5684, + "step": 734000 + }, + { + "epoch": 3.64, + "learning_rate": 4.818117794523466e-05, + "loss": 2.5564, + "step": 734500 + }, + { + "epoch": 3.64, + "learning_rate": 4.817993935880858e-05, + "loss": 2.5357, + "step": 735000 + }, + { + "epoch": 3.64, + "learning_rate": 4.81787057267282e-05, + "loss": 2.5358, + "step": 735500 + }, + { + "epoch": 3.65, + "learning_rate": 4.817746961747497e-05, + "loss": 2.5506, + "step": 736000 + }, + { + "epoch": 3.65, + "learning_rate": 4.8176231031048887e-05, + "loss": 2.5443, + "step": 736500 + }, + { + "epoch": 3.65, + "learning_rate": 4.8174992444622804e-05, + "loss": 2.5271, + "step": 737000 + }, + { + "epoch": 3.65, + "learning_rate": 4.817375633536957e-05, + "loss": 2.5738, + "step": 737500 + }, + { + "epoch": 3.66, + "learning_rate": 4.817251774894348e-05, + "loss": 2.5456, + "step": 738000 + }, + { + "epoch": 3.66, + "learning_rate": 4.81712791625174e-05, + "loss": 2.5431, + "step": 738500 + }, + { + "epoch": 3.66, + "learning_rate": 4.8170040576091316e-05, + "loss": 2.5176, + "step": 739000 + }, + { + "epoch": 3.66, + "learning_rate": 4.816880198966523e-05, + "loss": 2.5471, + "step": 739500 + }, + { + "epoch": 3.67, + "learning_rate": 4.816756340323915e-05, + "loss": 2.5422, + "step": 740000 + }, + { + "epoch": 3.67, + "learning_rate": 4.816632481681307e-05, + "loss": 2.5587, + "step": 740500 + }, + { + "epoch": 3.67, + "learning_rate": 4.8165086230386984e-05, + "loss": 2.5371, + "step": 741000 + }, + { + "epoch": 3.67, + "learning_rate": 4.81638476439609e-05, + "loss": 2.5467, + "step": 741500 + }, + { + "epoch": 3.68, + "learning_rate": 4.816260905753482e-05, + "loss": 2.5384, + "step": 742000 + }, + { + "epoch": 3.68, + "learning_rate": 4.8161370471108735e-05, + "loss": 2.5456, + "step": 742500 + }, + { + "epoch": 3.68, + "learning_rate": 4.816013188468265e-05, + "loss": 2.5356, + "step": 743000 + }, + { + "epoch": 3.68, + "learning_rate": 4.815889329825657e-05, + "loss": 2.5309, + "step": 743500 + }, + { + "epoch": 3.69, + "learning_rate": 4.815765718900334e-05, + "loss": 2.5272, + "step": 744000 + }, + { + "epoch": 3.69, + "learning_rate": 4.8156418602577254e-05, + "loss": 2.5665, + "step": 744500 + }, + { + "epoch": 3.69, + "learning_rate": 4.8155182493324016e-05, + "loss": 2.5413, + "step": 745000 + }, + { + "epoch": 3.69, + "learning_rate": 4.815394390689793e-05, + "loss": 2.568, + "step": 745500 + }, + { + "epoch": 3.7, + "learning_rate": 4.815270532047185e-05, + "loss": 2.5461, + "step": 746000 + }, + { + "epoch": 3.7, + "learning_rate": 4.815146673404577e-05, + "loss": 2.5391, + "step": 746500 + }, + { + "epoch": 3.7, + "learning_rate": 4.8150228147619684e-05, + "loss": 2.5244, + "step": 747000 + }, + { + "epoch": 3.7, + "learning_rate": 4.81489895611936e-05, + "loss": 2.544, + "step": 747500 + }, + { + "epoch": 3.71, + "learning_rate": 4.814775097476752e-05, + "loss": 2.5462, + "step": 748000 + }, + { + "epoch": 3.71, + "learning_rate": 4.8146512388341435e-05, + "loss": 2.5465, + "step": 748500 + }, + { + "epoch": 3.71, + "learning_rate": 4.814527380191535e-05, + "loss": 2.5428, + "step": 749000 + }, + { + "epoch": 3.71, + "learning_rate": 4.814403769266212e-05, + "loss": 2.555, + "step": 749500 + }, + { + "epoch": 3.72, + "learning_rate": 4.814280158340889e-05, + "loss": 2.563, + "step": 750000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8141562996982806e-05, + "loss": 2.5506, + "step": 750500 + }, + { + "epoch": 3.72, + "learning_rate": 4.8140324410556717e-05, + "loss": 2.5451, + "step": 751000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8139085824130633e-05, + "loss": 2.544, + "step": 751500 + }, + { + "epoch": 3.73, + "learning_rate": 4.813784971487741e-05, + "loss": 2.5365, + "step": 752000 + }, + { + "epoch": 3.73, + "learning_rate": 4.8136611128451326e-05, + "loss": 2.5606, + "step": 752500 + }, + { + "epoch": 3.73, + "learning_rate": 4.813537254202524e-05, + "loss": 2.5512, + "step": 753000 + }, + { + "epoch": 3.73, + "learning_rate": 4.813413395559916e-05, + "loss": 2.5219, + "step": 753500 + }, + { + "epoch": 3.74, + "learning_rate": 4.813289784634593e-05, + "loss": 2.5419, + "step": 754000 + }, + { + "epoch": 3.74, + "learning_rate": 4.813166173709269e-05, + "loss": 2.5459, + "step": 754500 + }, + { + "epoch": 3.74, + "learning_rate": 4.813042315066661e-05, + "loss": 2.5357, + "step": 755000 + }, + { + "epoch": 3.74, + "learning_rate": 4.8129184564240525e-05, + "loss": 2.5289, + "step": 755500 + }, + { + "epoch": 3.75, + "learning_rate": 4.812794597781444e-05, + "loss": 2.5534, + "step": 756000 + }, + { + "epoch": 3.75, + "learning_rate": 4.812670739138836e-05, + "loss": 2.5353, + "step": 756500 + }, + { + "epoch": 3.75, + "learning_rate": 4.8125468804962275e-05, + "loss": 2.5421, + "step": 757000 + }, + { + "epoch": 3.75, + "learning_rate": 4.8124232695709044e-05, + "loss": 2.5251, + "step": 757500 + }, + { + "epoch": 3.76, + "learning_rate": 4.812299410928296e-05, + "loss": 2.5294, + "step": 758000 + }, + { + "epoch": 3.76, + "learning_rate": 4.812175552285688e-05, + "loss": 2.5252, + "step": 758500 + }, + { + "epoch": 3.76, + "learning_rate": 4.8120516936430795e-05, + "loss": 2.5331, + "step": 759000 + }, + { + "epoch": 3.76, + "learning_rate": 4.811927835000471e-05, + "loss": 2.5705, + "step": 759500 + }, + { + "epoch": 3.77, + "learning_rate": 4.811803976357863e-05, + "loss": 2.5623, + "step": 760000 + }, + { + "epoch": 3.77, + "learning_rate": 4.8116801177152546e-05, + "loss": 2.5405, + "step": 760500 + }, + { + "epoch": 3.77, + "learning_rate": 4.811556259072646e-05, + "loss": 2.525, + "step": 761000 + }, + { + "epoch": 3.77, + "learning_rate": 4.811432400430038e-05, + "loss": 2.5315, + "step": 761500 + }, + { + "epoch": 3.78, + "learning_rate": 4.811308541787429e-05, + "loss": 2.5486, + "step": 762000 + }, + { + "epoch": 3.78, + "learning_rate": 4.811184683144821e-05, + "loss": 2.549, + "step": 762500 + }, + { + "epoch": 3.78, + "learning_rate": 4.8110608245022124e-05, + "loss": 2.5359, + "step": 763000 + }, + { + "epoch": 3.78, + "learning_rate": 4.810936965859604e-05, + "loss": 2.5533, + "step": 763500 + }, + { + "epoch": 3.79, + "learning_rate": 4.810813354934281e-05, + "loss": 2.5451, + "step": 764000 + }, + { + "epoch": 3.79, + "learning_rate": 4.8106894962916726e-05, + "loss": 2.5645, + "step": 764500 + }, + { + "epoch": 3.79, + "learning_rate": 4.810565637649064e-05, + "loss": 2.5328, + "step": 765000 + }, + { + "epoch": 3.79, + "learning_rate": 4.810441779006456e-05, + "loss": 2.538, + "step": 765500 + }, + { + "epoch": 3.8, + "learning_rate": 4.810318168081133e-05, + "loss": 2.5277, + "step": 766000 + }, + { + "epoch": 3.8, + "learning_rate": 4.8101943094385246e-05, + "loss": 2.5509, + "step": 766500 + }, + { + "epoch": 3.8, + "learning_rate": 4.810070450795916e-05, + "loss": 2.558, + "step": 767000 + }, + { + "epoch": 3.8, + "learning_rate": 4.809946592153308e-05, + "loss": 2.5482, + "step": 767500 + }, + { + "epoch": 3.8, + "learning_rate": 4.8098227335107e-05, + "loss": 2.5564, + "step": 768000 + }, + { + "epoch": 3.81, + "learning_rate": 4.809699122585376e-05, + "loss": 2.5368, + "step": 768500 + }, + { + "epoch": 3.81, + "learning_rate": 4.809575511660053e-05, + "loss": 2.5808, + "step": 769000 + }, + { + "epoch": 3.81, + "learning_rate": 4.8094516530174444e-05, + "loss": 2.5585, + "step": 769500 + }, + { + "epoch": 3.81, + "learning_rate": 4.809327794374836e-05, + "loss": 2.5754, + "step": 770000 + }, + { + "epoch": 3.82, + "learning_rate": 4.809203935732228e-05, + "loss": 2.5389, + "step": 770500 + }, + { + "epoch": 3.82, + "learning_rate": 4.8090800770896195e-05, + "loss": 2.5298, + "step": 771000 + }, + { + "epoch": 3.82, + "learning_rate": 4.808956218447011e-05, + "loss": 2.5495, + "step": 771500 + }, + { + "epoch": 3.82, + "learning_rate": 4.808832359804403e-05, + "loss": 2.5579, + "step": 772000 + }, + { + "epoch": 3.83, + "learning_rate": 4.808708748879079e-05, + "loss": 2.5362, + "step": 772500 + }, + { + "epoch": 3.83, + "learning_rate": 4.808584890236471e-05, + "loss": 2.5724, + "step": 773000 + }, + { + "epoch": 3.83, + "learning_rate": 4.8084610315938625e-05, + "loss": 2.5317, + "step": 773500 + }, + { + "epoch": 3.83, + "learning_rate": 4.808337172951254e-05, + "loss": 2.5207, + "step": 774000 + }, + { + "epoch": 3.84, + "learning_rate": 4.808213314308646e-05, + "loss": 2.5329, + "step": 774500 + }, + { + "epoch": 3.84, + "learning_rate": 4.8080894556660376e-05, + "loss": 2.5449, + "step": 775000 + }, + { + "epoch": 3.84, + "learning_rate": 4.807965597023429e-05, + "loss": 2.5359, + "step": 775500 + }, + { + "epoch": 3.84, + "learning_rate": 4.807841986098106e-05, + "loss": 2.5733, + "step": 776000 + }, + { + "epoch": 3.85, + "learning_rate": 4.807718375172783e-05, + "loss": 2.543, + "step": 776500 + }, + { + "epoch": 3.85, + "learning_rate": 4.807594516530175e-05, + "loss": 2.5573, + "step": 777000 + }, + { + "epoch": 3.85, + "learning_rate": 4.8074706578875664e-05, + "loss": 2.5554, + "step": 777500 + }, + { + "epoch": 3.85, + "learning_rate": 4.807346799244958e-05, + "loss": 2.5289, + "step": 778000 + }, + { + "epoch": 3.86, + "learning_rate": 4.807222940602349e-05, + "loss": 2.5436, + "step": 778500 + }, + { + "epoch": 3.86, + "learning_rate": 4.807099081959741e-05, + "loss": 2.5695, + "step": 779000 + }, + { + "epoch": 3.86, + "learning_rate": 4.8069752233171325e-05, + "loss": 2.5339, + "step": 779500 + }, + { + "epoch": 3.86, + "learning_rate": 4.806851364674524e-05, + "loss": 2.5585, + "step": 780000 + }, + { + "epoch": 3.87, + "learning_rate": 4.806727506031916e-05, + "loss": 2.5488, + "step": 780500 + }, + { + "epoch": 3.87, + "learning_rate": 4.8066036473893076e-05, + "loss": 2.55, + "step": 781000 + }, + { + "epoch": 3.87, + "learning_rate": 4.806479788746699e-05, + "loss": 2.524, + "step": 781500 + }, + { + "epoch": 3.87, + "learning_rate": 4.806355930104091e-05, + "loss": 2.5214, + "step": 782000 + }, + { + "epoch": 3.88, + "learning_rate": 4.806232071461483e-05, + "loss": 2.5534, + "step": 782500 + }, + { + "epoch": 3.88, + "learning_rate": 4.8061082128188744e-05, + "loss": 2.5238, + "step": 783000 + }, + { + "epoch": 3.88, + "learning_rate": 4.805984354176266e-05, + "loss": 2.5444, + "step": 783500 + }, + { + "epoch": 3.88, + "learning_rate": 4.805860495533658e-05, + "loss": 2.5237, + "step": 784000 + }, + { + "epoch": 3.89, + "learning_rate": 4.8057366368910494e-05, + "loss": 2.5258, + "step": 784500 + }, + { + "epoch": 3.89, + "learning_rate": 4.8056127782484404e-05, + "loss": 2.529, + "step": 785000 + }, + { + "epoch": 3.89, + "learning_rate": 4.805488919605832e-05, + "loss": 2.5469, + "step": 785500 + }, + { + "epoch": 3.89, + "learning_rate": 4.805365060963224e-05, + "loss": 2.5389, + "step": 786000 + }, + { + "epoch": 3.9, + "learning_rate": 4.8052414500379014e-05, + "loss": 2.5637, + "step": 786500 + }, + { + "epoch": 3.9, + "learning_rate": 4.805117591395293e-05, + "loss": 2.5438, + "step": 787000 + }, + { + "epoch": 3.9, + "learning_rate": 4.804993732752685e-05, + "loss": 2.5465, + "step": 787500 + }, + { + "epoch": 3.9, + "learning_rate": 4.804869874110076e-05, + "loss": 2.5289, + "step": 788000 + }, + { + "epoch": 3.91, + "learning_rate": 4.804746263184753e-05, + "loss": 2.5684, + "step": 788500 + }, + { + "epoch": 3.91, + "learning_rate": 4.8046226522594296e-05, + "loss": 2.5539, + "step": 789000 + }, + { + "epoch": 3.91, + "learning_rate": 4.804498793616821e-05, + "loss": 2.5358, + "step": 789500 + }, + { + "epoch": 3.91, + "learning_rate": 4.804374934974213e-05, + "loss": 2.5412, + "step": 790000 + }, + { + "epoch": 3.92, + "learning_rate": 4.8042510763316046e-05, + "loss": 2.5593, + "step": 790500 + }, + { + "epoch": 3.92, + "learning_rate": 4.804127217688996e-05, + "loss": 2.5245, + "step": 791000 + }, + { + "epoch": 3.92, + "learning_rate": 4.804003359046388e-05, + "loss": 2.5548, + "step": 791500 + }, + { + "epoch": 3.92, + "learning_rate": 4.80387950040378e-05, + "loss": 2.5387, + "step": 792000 + }, + { + "epoch": 3.93, + "learning_rate": 4.8037556417611714e-05, + "loss": 2.529, + "step": 792500 + }, + { + "epoch": 3.93, + "learning_rate": 4.8036320308358476e-05, + "loss": 2.5248, + "step": 793000 + }, + { + "epoch": 3.93, + "learning_rate": 4.803508172193239e-05, + "loss": 2.5407, + "step": 793500 + }, + { + "epoch": 3.93, + "learning_rate": 4.803384313550631e-05, + "loss": 2.5353, + "step": 794000 + }, + { + "epoch": 3.94, + "learning_rate": 4.803260454908023e-05, + "loss": 2.5249, + "step": 794500 + }, + { + "epoch": 3.94, + "learning_rate": 4.8031368439826996e-05, + "loss": 2.5362, + "step": 795000 + }, + { + "epoch": 3.94, + "learning_rate": 4.803012985340091e-05, + "loss": 2.5309, + "step": 795500 + }, + { + "epoch": 3.94, + "learning_rate": 4.802889126697483e-05, + "loss": 2.5317, + "step": 796000 + }, + { + "epoch": 3.95, + "learning_rate": 4.8027652680548746e-05, + "loss": 2.5299, + "step": 796500 + }, + { + "epoch": 3.95, + "learning_rate": 4.8026416571295515e-05, + "loss": 2.5359, + "step": 797000 + }, + { + "epoch": 3.95, + "learning_rate": 4.8025182939215136e-05, + "loss": 2.5481, + "step": 797500 + }, + { + "epoch": 3.95, + "learning_rate": 4.802394435278905e-05, + "loss": 2.5442, + "step": 798000 + }, + { + "epoch": 3.96, + "learning_rate": 4.802270576636297e-05, + "loss": 2.5507, + "step": 798500 + }, + { + "epoch": 3.96, + "learning_rate": 4.802146717993689e-05, + "loss": 2.5488, + "step": 799000 + }, + { + "epoch": 3.96, + "learning_rate": 4.8020228593510804e-05, + "loss": 2.5537, + "step": 799500 + }, + { + "epoch": 3.96, + "learning_rate": 4.801899000708472e-05, + "loss": 2.5368, + "step": 800000 + }, + { + "epoch": 3.97, + "learning_rate": 4.801775142065864e-05, + "loss": 2.5376, + "step": 800500 + }, + { + "epoch": 3.97, + "learning_rate": 4.8016512834232554e-05, + "loss": 2.5168, + "step": 801000 + }, + { + "epoch": 3.97, + "learning_rate": 4.8015276724979317e-05, + "loss": 2.5168, + "step": 801500 + }, + { + "epoch": 3.97, + "learning_rate": 4.8014038138553233e-05, + "loss": 2.5698, + "step": 802000 + }, + { + "epoch": 3.98, + "learning_rate": 4.801279955212715e-05, + "loss": 2.5456, + "step": 802500 + }, + { + "epoch": 3.98, + "learning_rate": 4.801156096570107e-05, + "loss": 2.5533, + "step": 803000 + }, + { + "epoch": 3.98, + "learning_rate": 4.8010322379274984e-05, + "loss": 2.5442, + "step": 803500 + }, + { + "epoch": 3.98, + "learning_rate": 4.80090837928489e-05, + "loss": 2.5533, + "step": 804000 + }, + { + "epoch": 3.99, + "learning_rate": 4.800784768359567e-05, + "loss": 2.5392, + "step": 804500 + }, + { + "epoch": 3.99, + "learning_rate": 4.800661157434244e-05, + "loss": 2.5661, + "step": 805000 + }, + { + "epoch": 3.99, + "learning_rate": 4.8005372987916356e-05, + "loss": 2.5666, + "step": 805500 + }, + { + "epoch": 3.99, + "learning_rate": 4.800413440149027e-05, + "loss": 2.5285, + "step": 806000 + }, + { + "epoch": 4.0, + "learning_rate": 4.800289581506418e-05, + "loss": 2.5249, + "step": 806500 + }, + { + "epoch": 4.0, + "learning_rate": 4.800165970581095e-05, + "loss": 2.5377, + "step": 807000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6340747980022936, + "eval_accuracy_mlm": 0.5867373422775914, + "eval_accuracy_nsp": 0.8572672468906766, + "eval_loss": 2.4791698455810547, + "eval_runtime": 145.9611, + "eval_samples_per_second": 1746.76, + "eval_steps_per_second": 72.787, + "step": 807372 + }, + { + "epoch": 4.0, + "learning_rate": 4.800042111938487e-05, + "loss": 2.5294, + "step": 807500 + }, + { + "epoch": 4.0, + "learning_rate": 4.7999182532958785e-05, + "loss": 2.4976, + "step": 808000 + }, + { + "epoch": 4.01, + "learning_rate": 4.79979439465327e-05, + "loss": 2.5014, + "step": 808500 + }, + { + "epoch": 4.01, + "learning_rate": 4.799670536010662e-05, + "loss": 2.5063, + "step": 809000 + }, + { + "epoch": 4.01, + "learning_rate": 4.7995466773680536e-05, + "loss": 2.5132, + "step": 809500 + }, + { + "epoch": 4.01, + "learning_rate": 4.799422818725445e-05, + "loss": 2.5018, + "step": 810000 + }, + { + "epoch": 4.02, + "learning_rate": 4.799298960082837e-05, + "loss": 2.5117, + "step": 810500 + }, + { + "epoch": 4.02, + "learning_rate": 4.799175101440229e-05, + "loss": 2.5232, + "step": 811000 + }, + { + "epoch": 4.02, + "learning_rate": 4.7990514905149056e-05, + "loss": 2.5043, + "step": 811500 + }, + { + "epoch": 4.02, + "learning_rate": 4.798927631872297e-05, + "loss": 2.5114, + "step": 812000 + }, + { + "epoch": 4.03, + "learning_rate": 4.798803773229689e-05, + "loss": 2.5253, + "step": 812500 + }, + { + "epoch": 4.03, + "learning_rate": 4.7986799145870807e-05, + "loss": 2.5181, + "step": 813000 + }, + { + "epoch": 4.03, + "learning_rate": 4.798556055944472e-05, + "loss": 2.5291, + "step": 813500 + }, + { + "epoch": 4.03, + "learning_rate": 4.7984321973018634e-05, + "loss": 2.499, + "step": 814000 + }, + { + "epoch": 4.04, + "learning_rate": 4.798308338659255e-05, + "loss": 2.5222, + "step": 814500 + }, + { + "epoch": 4.04, + "learning_rate": 4.798184480016647e-05, + "loss": 2.5236, + "step": 815000 + }, + { + "epoch": 4.04, + "learning_rate": 4.7980606213740384e-05, + "loss": 2.5314, + "step": 815500 + }, + { + "epoch": 4.04, + "learning_rate": 4.797937010448715e-05, + "loss": 2.5234, + "step": 816000 + }, + { + "epoch": 4.05, + "learning_rate": 4.797813151806107e-05, + "loss": 2.4951, + "step": 816500 + }, + { + "epoch": 4.05, + "learning_rate": 4.797689293163499e-05, + "loss": 2.529, + "step": 817000 + }, + { + "epoch": 4.05, + "learning_rate": 4.7975654345208904e-05, + "loss": 2.5055, + "step": 817500 + }, + { + "epoch": 4.05, + "learning_rate": 4.797441575878282e-05, + "loss": 2.4981, + "step": 818000 + }, + { + "epoch": 4.06, + "learning_rate": 4.797317964952959e-05, + "loss": 2.512, + "step": 818500 + }, + { + "epoch": 4.06, + "learning_rate": 4.797194106310351e-05, + "loss": 2.515, + "step": 819000 + }, + { + "epoch": 4.06, + "learning_rate": 4.7970702476677424e-05, + "loss": 2.4995, + "step": 819500 + }, + { + "epoch": 4.06, + "learning_rate": 4.7969463890251334e-05, + "loss": 2.5222, + "step": 820000 + }, + { + "epoch": 4.07, + "learning_rate": 4.79682277809981e-05, + "loss": 2.5096, + "step": 820500 + }, + { + "epoch": 4.07, + "learning_rate": 4.796698919457202e-05, + "loss": 2.5198, + "step": 821000 + }, + { + "epoch": 4.07, + "learning_rate": 4.796575308531879e-05, + "loss": 2.5011, + "step": 821500 + }, + { + "epoch": 4.07, + "learning_rate": 4.7964514498892705e-05, + "loss": 2.5134, + "step": 822000 + }, + { + "epoch": 4.07, + "learning_rate": 4.796327591246662e-05, + "loss": 2.486, + "step": 822500 + }, + { + "epoch": 4.08, + "learning_rate": 4.796203732604054e-05, + "loss": 2.5452, + "step": 823000 + }, + { + "epoch": 4.08, + "learning_rate": 4.7960798739614456e-05, + "loss": 2.5249, + "step": 823500 + }, + { + "epoch": 4.08, + "learning_rate": 4.795956015318837e-05, + "loss": 2.5277, + "step": 824000 + }, + { + "epoch": 4.08, + "learning_rate": 4.795832156676229e-05, + "loss": 2.517, + "step": 824500 + }, + { + "epoch": 4.09, + "learning_rate": 4.795708298033621e-05, + "loss": 2.5276, + "step": 825000 + }, + { + "epoch": 4.09, + "learning_rate": 4.7955844393910124e-05, + "loss": 2.5128, + "step": 825500 + }, + { + "epoch": 4.09, + "learning_rate": 4.7954610761829744e-05, + "loss": 2.5389, + "step": 826000 + }, + { + "epoch": 4.09, + "learning_rate": 4.795337217540366e-05, + "loss": 2.5206, + "step": 826500 + }, + { + "epoch": 4.1, + "learning_rate": 4.795213358897757e-05, + "loss": 2.5004, + "step": 827000 + }, + { + "epoch": 4.1, + "learning_rate": 4.795089747972435e-05, + "loss": 2.5165, + "step": 827500 + }, + { + "epoch": 4.1, + "learning_rate": 4.794965889329826e-05, + "loss": 2.5113, + "step": 828000 + }, + { + "epoch": 4.1, + "learning_rate": 4.7948420306872174e-05, + "loss": 2.4997, + "step": 828500 + }, + { + "epoch": 4.11, + "learning_rate": 4.794718172044609e-05, + "loss": 2.5323, + "step": 829000 + }, + { + "epoch": 4.11, + "learning_rate": 4.794594313402001e-05, + "loss": 2.5239, + "step": 829500 + }, + { + "epoch": 4.11, + "learning_rate": 4.794470454759392e-05, + "loss": 2.5154, + "step": 830000 + }, + { + "epoch": 4.11, + "learning_rate": 4.7943465961167835e-05, + "loss": 2.5174, + "step": 830500 + }, + { + "epoch": 4.12, + "learning_rate": 4.794222737474175e-05, + "loss": 2.5047, + "step": 831000 + }, + { + "epoch": 4.12, + "learning_rate": 4.794098878831567e-05, + "loss": 2.5309, + "step": 831500 + }, + { + "epoch": 4.12, + "learning_rate": 4.7939750201889586e-05, + "loss": 2.5058, + "step": 832000 + }, + { + "epoch": 4.12, + "learning_rate": 4.793851409263636e-05, + "loss": 2.5134, + "step": 832500 + }, + { + "epoch": 4.13, + "learning_rate": 4.793727550621028e-05, + "loss": 2.5142, + "step": 833000 + }, + { + "epoch": 4.13, + "learning_rate": 4.793603691978419e-05, + "loss": 2.5138, + "step": 833500 + }, + { + "epoch": 4.13, + "learning_rate": 4.7934798333358106e-05, + "loss": 2.5339, + "step": 834000 + }, + { + "epoch": 4.13, + "learning_rate": 4.793355974693202e-05, + "loss": 2.5123, + "step": 834500 + }, + { + "epoch": 4.14, + "learning_rate": 4.793232363767879e-05, + "loss": 2.5296, + "step": 835000 + }, + { + "epoch": 4.14, + "learning_rate": 4.793108752842556e-05, + "loss": 2.5339, + "step": 835500 + }, + { + "epoch": 4.14, + "learning_rate": 4.792984894199948e-05, + "loss": 2.538, + "step": 836000 + }, + { + "epoch": 4.14, + "learning_rate": 4.7928610355573394e-05, + "loss": 2.5208, + "step": 836500 + }, + { + "epoch": 4.15, + "learning_rate": 4.792737176914731e-05, + "loss": 2.4823, + "step": 837000 + }, + { + "epoch": 4.15, + "learning_rate": 4.792613318272123e-05, + "loss": 2.5293, + "step": 837500 + }, + { + "epoch": 4.15, + "learning_rate": 4.7924894596295145e-05, + "loss": 2.5136, + "step": 838000 + }, + { + "epoch": 4.15, + "learning_rate": 4.792365600986906e-05, + "loss": 2.497, + "step": 838500 + }, + { + "epoch": 4.16, + "learning_rate": 4.792241742344298e-05, + "loss": 2.5301, + "step": 839000 + }, + { + "epoch": 4.16, + "learning_rate": 4.792117883701689e-05, + "loss": 2.5154, + "step": 839500 + }, + { + "epoch": 4.16, + "learning_rate": 4.7919940250590806e-05, + "loss": 2.5086, + "step": 840000 + }, + { + "epoch": 4.16, + "learning_rate": 4.791870166416472e-05, + "loss": 2.506, + "step": 840500 + }, + { + "epoch": 4.17, + "learning_rate": 4.791746307773864e-05, + "loss": 2.5198, + "step": 841000 + }, + { + "epoch": 4.17, + "learning_rate": 4.7916224491312556e-05, + "loss": 2.5042, + "step": 841500 + }, + { + "epoch": 4.17, + "learning_rate": 4.791499085923218e-05, + "loss": 2.5122, + "step": 842000 + }, + { + "epoch": 4.17, + "learning_rate": 4.7913752272806094e-05, + "loss": 2.5176, + "step": 842500 + }, + { + "epoch": 4.18, + "learning_rate": 4.791251368638001e-05, + "loss": 2.5354, + "step": 843000 + }, + { + "epoch": 4.18, + "learning_rate": 4.791127509995393e-05, + "loss": 2.515, + "step": 843500 + }, + { + "epoch": 4.18, + "learning_rate": 4.7910036513527845e-05, + "loss": 2.5099, + "step": 844000 + }, + { + "epoch": 4.18, + "learning_rate": 4.790879792710176e-05, + "loss": 2.5267, + "step": 844500 + }, + { + "epoch": 4.19, + "learning_rate": 4.790755934067568e-05, + "loss": 2.5115, + "step": 845000 + }, + { + "epoch": 4.19, + "learning_rate": 4.7906320754249596e-05, + "loss": 2.5051, + "step": 845500 + }, + { + "epoch": 4.19, + "learning_rate": 4.7905082167823506e-05, + "loss": 2.5294, + "step": 846000 + }, + { + "epoch": 4.19, + "learning_rate": 4.790384605857028e-05, + "loss": 2.5381, + "step": 846500 + }, + { + "epoch": 4.2, + "learning_rate": 4.79026074721442e-05, + "loss": 2.509, + "step": 847000 + }, + { + "epoch": 4.2, + "learning_rate": 4.7901368885718115e-05, + "loss": 2.5301, + "step": 847500 + }, + { + "epoch": 4.2, + "learning_rate": 4.7900130299292025e-05, + "loss": 2.5011, + "step": 848000 + }, + { + "epoch": 4.2, + "learning_rate": 4.789889171286594e-05, + "loss": 2.5179, + "step": 848500 + }, + { + "epoch": 4.21, + "learning_rate": 4.789765312643986e-05, + "loss": 2.5437, + "step": 849000 + }, + { + "epoch": 4.21, + "learning_rate": 4.789641701718663e-05, + "loss": 2.5317, + "step": 849500 + }, + { + "epoch": 4.21, + "learning_rate": 4.78951809079334e-05, + "loss": 2.5221, + "step": 850000 + }, + { + "epoch": 4.21, + "learning_rate": 4.7893942321507314e-05, + "loss": 2.5152, + "step": 850500 + }, + { + "epoch": 4.22, + "learning_rate": 4.789270621225408e-05, + "loss": 2.5534, + "step": 851000 + }, + { + "epoch": 4.22, + "learning_rate": 4.789146762582799e-05, + "loss": 2.5463, + "step": 851500 + }, + { + "epoch": 4.22, + "learning_rate": 4.789022903940191e-05, + "loss": 2.528, + "step": 852000 + }, + { + "epoch": 4.22, + "learning_rate": 4.7888990452975827e-05, + "loss": 2.5066, + "step": 852500 + }, + { + "epoch": 4.23, + "learning_rate": 4.7887751866549744e-05, + "loss": 2.5129, + "step": 853000 + }, + { + "epoch": 4.23, + "learning_rate": 4.788651328012366e-05, + "loss": 2.5264, + "step": 853500 + }, + { + "epoch": 4.23, + "learning_rate": 4.788527469369758e-05, + "loss": 2.5151, + "step": 854000 + }, + { + "epoch": 4.23, + "learning_rate": 4.7884036107271494e-05, + "loss": 2.5214, + "step": 854500 + }, + { + "epoch": 4.24, + "learning_rate": 4.788279752084541e-05, + "loss": 2.5097, + "step": 855000 + }, + { + "epoch": 4.24, + "learning_rate": 4.788155893441933e-05, + "loss": 2.5109, + "step": 855500 + }, + { + "epoch": 4.24, + "learning_rate": 4.7880320347993245e-05, + "loss": 2.522, + "step": 856000 + }, + { + "epoch": 4.24, + "learning_rate": 4.7879084238740014e-05, + "loss": 2.5233, + "step": 856500 + }, + { + "epoch": 4.25, + "learning_rate": 4.787784565231393e-05, + "loss": 2.5345, + "step": 857000 + }, + { + "epoch": 4.25, + "learning_rate": 4.787660706588785e-05, + "loss": 2.5216, + "step": 857500 + }, + { + "epoch": 4.25, + "learning_rate": 4.7875368479461765e-05, + "loss": 2.565, + "step": 858000 + }, + { + "epoch": 4.25, + "learning_rate": 4.787412989303568e-05, + "loss": 2.5278, + "step": 858500 + }, + { + "epoch": 4.26, + "learning_rate": 4.7872893783782444e-05, + "loss": 2.5257, + "step": 859000 + }, + { + "epoch": 4.26, + "learning_rate": 4.787165519735636e-05, + "loss": 2.5161, + "step": 859500 + }, + { + "epoch": 4.26, + "learning_rate": 4.787041661093028e-05, + "loss": 2.5183, + "step": 860000 + }, + { + "epoch": 4.26, + "learning_rate": 4.7869178024504194e-05, + "loss": 2.52, + "step": 860500 + }, + { + "epoch": 4.27, + "learning_rate": 4.786793943807811e-05, + "loss": 2.5174, + "step": 861000 + }, + { + "epoch": 4.27, + "learning_rate": 4.786670085165203e-05, + "loss": 2.5502, + "step": 861500 + }, + { + "epoch": 4.27, + "learning_rate": 4.78654647423988e-05, + "loss": 2.5172, + "step": 862000 + }, + { + "epoch": 4.27, + "learning_rate": 4.7864226155972714e-05, + "loss": 2.5307, + "step": 862500 + }, + { + "epoch": 4.28, + "learning_rate": 4.786298756954663e-05, + "loss": 2.5203, + "step": 863000 + }, + { + "epoch": 4.28, + "learning_rate": 4.786174898312055e-05, + "loss": 2.528, + "step": 863500 + }, + { + "epoch": 4.28, + "learning_rate": 4.7860510396694465e-05, + "loss": 2.5293, + "step": 864000 + }, + { + "epoch": 4.28, + "learning_rate": 4.785927181026838e-05, + "loss": 2.5237, + "step": 864500 + }, + { + "epoch": 4.29, + "learning_rate": 4.78580332238423e-05, + "loss": 2.5579, + "step": 865000 + }, + { + "epoch": 4.29, + "learning_rate": 4.7856794637416216e-05, + "loss": 2.5067, + "step": 865500 + }, + { + "epoch": 4.29, + "learning_rate": 4.785555605099013e-05, + "loss": 2.5152, + "step": 866000 + }, + { + "epoch": 4.29, + "learning_rate": 4.785431746456405e-05, + "loss": 2.5302, + "step": 866500 + }, + { + "epoch": 4.3, + "learning_rate": 4.785308135531081e-05, + "loss": 2.5225, + "step": 867000 + }, + { + "epoch": 4.3, + "learning_rate": 4.785184524605758e-05, + "loss": 2.507, + "step": 867500 + }, + { + "epoch": 4.3, + "learning_rate": 4.78506066596315e-05, + "loss": 2.5427, + "step": 868000 + }, + { + "epoch": 4.3, + "learning_rate": 4.7849370550378266e-05, + "loss": 2.508, + "step": 868500 + }, + { + "epoch": 4.31, + "learning_rate": 4.784813196395218e-05, + "loss": 2.5365, + "step": 869000 + }, + { + "epoch": 4.31, + "learning_rate": 4.78468933775261e-05, + "loss": 2.5121, + "step": 869500 + }, + { + "epoch": 4.31, + "learning_rate": 4.784565479110002e-05, + "loss": 2.519, + "step": 870000 + }, + { + "epoch": 4.31, + "learning_rate": 4.7844416204673934e-05, + "loss": 2.5295, + "step": 870500 + }, + { + "epoch": 4.32, + "learning_rate": 4.784317761824785e-05, + "loss": 2.5398, + "step": 871000 + }, + { + "epoch": 4.32, + "learning_rate": 4.784193903182176e-05, + "loss": 2.5191, + "step": 871500 + }, + { + "epoch": 4.32, + "learning_rate": 4.784070044539568e-05, + "loss": 2.523, + "step": 872000 + }, + { + "epoch": 4.32, + "learning_rate": 4.7839461858969595e-05, + "loss": 2.5589, + "step": 872500 + }, + { + "epoch": 4.33, + "learning_rate": 4.783822327254351e-05, + "loss": 2.5345, + "step": 873000 + }, + { + "epoch": 4.33, + "learning_rate": 4.783698468611743e-05, + "loss": 2.5171, + "step": 873500 + }, + { + "epoch": 4.33, + "learning_rate": 4.7835746099691345e-05, + "loss": 2.5048, + "step": 874000 + }, + { + "epoch": 4.33, + "learning_rate": 4.783450751326526e-05, + "loss": 2.5152, + "step": 874500 + }, + { + "epoch": 4.34, + "learning_rate": 4.783327140401203e-05, + "loss": 2.5189, + "step": 875000 + }, + { + "epoch": 4.34, + "learning_rate": 4.783203281758595e-05, + "loss": 2.506, + "step": 875500 + }, + { + "epoch": 4.34, + "learning_rate": 4.7830794231159865e-05, + "loss": 2.5281, + "step": 876000 + }, + { + "epoch": 4.34, + "learning_rate": 4.782955564473378e-05, + "loss": 2.5282, + "step": 876500 + }, + { + "epoch": 4.34, + "learning_rate": 4.782831953548055e-05, + "loss": 2.4956, + "step": 877000 + }, + { + "epoch": 4.35, + "learning_rate": 4.782708094905447e-05, + "loss": 2.5356, + "step": 877500 + }, + { + "epoch": 4.35, + "learning_rate": 4.7825842362628385e-05, + "loss": 2.5127, + "step": 878000 + }, + { + "epoch": 4.35, + "learning_rate": 4.7824603776202295e-05, + "loss": 2.536, + "step": 878500 + }, + { + "epoch": 4.35, + "learning_rate": 4.782336518977621e-05, + "loss": 2.5432, + "step": 879000 + }, + { + "epoch": 4.36, + "learning_rate": 4.782212660335013e-05, + "loss": 2.5164, + "step": 879500 + }, + { + "epoch": 4.36, + "learning_rate": 4.78208904940969e-05, + "loss": 2.5096, + "step": 880000 + }, + { + "epoch": 4.36, + "learning_rate": 4.7819651907670814e-05, + "loss": 2.5279, + "step": 880500 + }, + { + "epoch": 4.36, + "learning_rate": 4.781841332124473e-05, + "loss": 2.5284, + "step": 881000 + }, + { + "epoch": 4.37, + "learning_rate": 4.781717473481865e-05, + "loss": 2.496, + "step": 881500 + }, + { + "epoch": 4.37, + "learning_rate": 4.7815936148392565e-05, + "loss": 2.5169, + "step": 882000 + }, + { + "epoch": 4.37, + "learning_rate": 4.781469756196648e-05, + "loss": 2.5015, + "step": 882500 + }, + { + "epoch": 4.37, + "learning_rate": 4.781346145271325e-05, + "loss": 2.5389, + "step": 883000 + }, + { + "epoch": 4.38, + "learning_rate": 4.781222286628717e-05, + "loss": 2.5235, + "step": 883500 + }, + { + "epoch": 4.38, + "learning_rate": 4.7810984279861085e-05, + "loss": 2.5245, + "step": 884000 + }, + { + "epoch": 4.38, + "learning_rate": 4.780974817060785e-05, + "loss": 2.5214, + "step": 884500 + }, + { + "epoch": 4.38, + "learning_rate": 4.780851206135462e-05, + "loss": 2.5422, + "step": 885000 + }, + { + "epoch": 4.39, + "learning_rate": 4.780727347492854e-05, + "loss": 2.5327, + "step": 885500 + }, + { + "epoch": 4.39, + "learning_rate": 4.7806034888502456e-05, + "loss": 2.5164, + "step": 886000 + }, + { + "epoch": 4.39, + "learning_rate": 4.780479630207637e-05, + "loss": 2.5537, + "step": 886500 + }, + { + "epoch": 4.39, + "learning_rate": 4.780355771565028e-05, + "loss": 2.513, + "step": 887000 + }, + { + "epoch": 4.4, + "learning_rate": 4.78023191292242e-05, + "loss": 2.5196, + "step": 887500 + }, + { + "epoch": 4.4, + "learning_rate": 4.780108054279812e-05, + "loss": 2.5014, + "step": 888000 + }, + { + "epoch": 4.4, + "learning_rate": 4.7799841956372034e-05, + "loss": 2.5319, + "step": 888500 + }, + { + "epoch": 4.4, + "learning_rate": 4.779860336994595e-05, + "loss": 2.5317, + "step": 889000 + }, + { + "epoch": 4.41, + "learning_rate": 4.779736478351987e-05, + "loss": 2.5447, + "step": 889500 + }, + { + "epoch": 4.41, + "learning_rate": 4.7796126197093785e-05, + "loss": 2.513, + "step": 890000 + }, + { + "epoch": 4.41, + "learning_rate": 4.77948876106677e-05, + "loss": 2.5189, + "step": 890500 + }, + { + "epoch": 4.41, + "learning_rate": 4.7793651501414464e-05, + "loss": 2.5308, + "step": 891000 + }, + { + "epoch": 4.42, + "learning_rate": 4.779241291498838e-05, + "loss": 2.5264, + "step": 891500 + }, + { + "epoch": 4.42, + "learning_rate": 4.77911743285623e-05, + "loss": 2.5147, + "step": 892000 + }, + { + "epoch": 4.42, + "learning_rate": 4.7789935742136215e-05, + "loss": 2.5411, + "step": 892500 + }, + { + "epoch": 4.42, + "learning_rate": 4.7788702110055835e-05, + "loss": 2.496, + "step": 893000 + }, + { + "epoch": 4.43, + "learning_rate": 4.778746352362975e-05, + "loss": 2.5043, + "step": 893500 + }, + { + "epoch": 4.43, + "learning_rate": 4.778622493720367e-05, + "loss": 2.51, + "step": 894000 + }, + { + "epoch": 4.43, + "learning_rate": 4.7784986350777586e-05, + "loss": 2.518, + "step": 894500 + }, + { + "epoch": 4.43, + "learning_rate": 4.77837477643515e-05, + "loss": 2.5077, + "step": 895000 + }, + { + "epoch": 4.44, + "learning_rate": 4.778250917792542e-05, + "loss": 2.512, + "step": 895500 + }, + { + "epoch": 4.44, + "learning_rate": 4.778127306867219e-05, + "loss": 2.5042, + "step": 896000 + }, + { + "epoch": 4.44, + "learning_rate": 4.7780034482246106e-05, + "loss": 2.5105, + "step": 896500 + }, + { + "epoch": 4.44, + "learning_rate": 4.777879589582002e-05, + "loss": 2.5393, + "step": 897000 + }, + { + "epoch": 4.45, + "learning_rate": 4.777755730939394e-05, + "loss": 2.5374, + "step": 897500 + }, + { + "epoch": 4.45, + "learning_rate": 4.7776318722967856e-05, + "loss": 2.4974, + "step": 898000 + }, + { + "epoch": 4.45, + "learning_rate": 4.7775080136541773e-05, + "loss": 2.5217, + "step": 898500 + }, + { + "epoch": 4.45, + "learning_rate": 4.777384155011569e-05, + "loss": 2.5425, + "step": 899000 + }, + { + "epoch": 4.46, + "learning_rate": 4.77726029636896e-05, + "loss": 2.5461, + "step": 899500 + }, + { + "epoch": 4.46, + "learning_rate": 4.777136933160922e-05, + "loss": 2.5432, + "step": 900000 + }, + { + "epoch": 4.46, + "learning_rate": 4.777013074518314e-05, + "loss": 2.5105, + "step": 900500 + }, + { + "epoch": 4.46, + "learning_rate": 4.7768892158757055e-05, + "loss": 2.5198, + "step": 901000 + }, + { + "epoch": 4.47, + "learning_rate": 4.7767656049503824e-05, + "loss": 2.5226, + "step": 901500 + }, + { + "epoch": 4.47, + "learning_rate": 4.776641746307774e-05, + "loss": 2.5397, + "step": 902000 + }, + { + "epoch": 4.47, + "learning_rate": 4.776517887665166e-05, + "loss": 2.5173, + "step": 902500 + }, + { + "epoch": 4.47, + "learning_rate": 4.7763940290225575e-05, + "loss": 2.5123, + "step": 903000 + }, + { + "epoch": 4.48, + "learning_rate": 4.776270170379949e-05, + "loss": 2.5164, + "step": 903500 + }, + { + "epoch": 4.48, + "learning_rate": 4.776146311737341e-05, + "loss": 2.5277, + "step": 904000 + }, + { + "epoch": 4.48, + "learning_rate": 4.7760224530947325e-05, + "loss": 2.5085, + "step": 904500 + }, + { + "epoch": 4.48, + "learning_rate": 4.775898594452124e-05, + "loss": 2.5297, + "step": 905000 + }, + { + "epoch": 4.49, + "learning_rate": 4.775774735809516e-05, + "loss": 2.5393, + "step": 905500 + }, + { + "epoch": 4.49, + "learning_rate": 4.775651124884192e-05, + "loss": 2.5383, + "step": 906000 + }, + { + "epoch": 4.49, + "learning_rate": 4.775527266241584e-05, + "loss": 2.5199, + "step": 906500 + }, + { + "epoch": 4.49, + "learning_rate": 4.7754034075989755e-05, + "loss": 2.5323, + "step": 907000 + }, + { + "epoch": 4.5, + "learning_rate": 4.775279548956367e-05, + "loss": 2.5211, + "step": 907500 + }, + { + "epoch": 4.5, + "learning_rate": 4.775155690313759e-05, + "loss": 2.5245, + "step": 908000 + }, + { + "epoch": 4.5, + "learning_rate": 4.7750318316711506e-05, + "loss": 2.5016, + "step": 908500 + }, + { + "epoch": 4.5, + "learning_rate": 4.774907973028542e-05, + "loss": 2.5191, + "step": 909000 + }, + { + "epoch": 4.51, + "learning_rate": 4.774784114385934e-05, + "loss": 2.5471, + "step": 909500 + }, + { + "epoch": 4.51, + "learning_rate": 4.774660255743326e-05, + "loss": 2.5372, + "step": 910000 + }, + { + "epoch": 4.51, + "learning_rate": 4.7745363971007174e-05, + "loss": 2.52, + "step": 910500 + }, + { + "epoch": 4.51, + "learning_rate": 4.774412538458109e-05, + "loss": 2.512, + "step": 911000 + }, + { + "epoch": 4.52, + "learning_rate": 4.774288679815501e-05, + "loss": 2.5325, + "step": 911500 + }, + { + "epoch": 4.52, + "learning_rate": 4.774164821172892e-05, + "loss": 2.5194, + "step": 912000 + }, + { + "epoch": 4.52, + "learning_rate": 4.7740409625302835e-05, + "loss": 2.5473, + "step": 912500 + }, + { + "epoch": 4.52, + "learning_rate": 4.773917103887675e-05, + "loss": 2.5345, + "step": 913000 + }, + { + "epoch": 4.53, + "learning_rate": 4.773793245245067e-05, + "loss": 2.5285, + "step": 913500 + }, + { + "epoch": 4.53, + "learning_rate": 4.7736693866024585e-05, + "loss": 2.5442, + "step": 914000 + }, + { + "epoch": 4.53, + "learning_rate": 4.7735457756771354e-05, + "loss": 2.5358, + "step": 914500 + }, + { + "epoch": 4.53, + "learning_rate": 4.773421917034527e-05, + "loss": 2.5193, + "step": 915000 + }, + { + "epoch": 4.54, + "learning_rate": 4.773298306109204e-05, + "loss": 2.5213, + "step": 915500 + }, + { + "epoch": 4.54, + "learning_rate": 4.773174447466596e-05, + "loss": 2.5143, + "step": 916000 + }, + { + "epoch": 4.54, + "learning_rate": 4.7730505888239874e-05, + "loss": 2.507, + "step": 916500 + }, + { + "epoch": 4.54, + "learning_rate": 4.772926977898664e-05, + "loss": 2.5088, + "step": 917000 + }, + { + "epoch": 4.55, + "learning_rate": 4.772803119256056e-05, + "loss": 2.538, + "step": 917500 + }, + { + "epoch": 4.55, + "learning_rate": 4.7726792606134476e-05, + "loss": 2.5344, + "step": 918000 + }, + { + "epoch": 4.55, + "learning_rate": 4.7725554019708393e-05, + "loss": 2.5217, + "step": 918500 + }, + { + "epoch": 4.55, + "learning_rate": 4.772431543328231e-05, + "loss": 2.5035, + "step": 919000 + }, + { + "epoch": 4.56, + "learning_rate": 4.772307684685623e-05, + "loss": 2.5022, + "step": 919500 + }, + { + "epoch": 4.56, + "learning_rate": 4.772183826043014e-05, + "loss": 2.535, + "step": 920000 + }, + { + "epoch": 4.56, + "learning_rate": 4.7720599674004054e-05, + "loss": 2.481, + "step": 920500 + }, + { + "epoch": 4.56, + "learning_rate": 4.771936356475082e-05, + "loss": 2.5258, + "step": 921000 + }, + { + "epoch": 4.57, + "learning_rate": 4.771812497832474e-05, + "loss": 2.5533, + "step": 921500 + }, + { + "epoch": 4.57, + "learning_rate": 4.771688639189866e-05, + "loss": 2.5407, + "step": 922000 + }, + { + "epoch": 4.57, + "learning_rate": 4.7715647805472574e-05, + "loss": 2.5222, + "step": 922500 + }, + { + "epoch": 4.57, + "learning_rate": 4.771440921904649e-05, + "loss": 2.5202, + "step": 923000 + }, + { + "epoch": 4.58, + "learning_rate": 4.7713178064138963e-05, + "loss": 2.5326, + "step": 923500 + }, + { + "epoch": 4.58, + "learning_rate": 4.771193947771288e-05, + "loss": 2.5148, + "step": 924000 + }, + { + "epoch": 4.58, + "learning_rate": 4.77107008912868e-05, + "loss": 2.5138, + "step": 924500 + }, + { + "epoch": 4.58, + "learning_rate": 4.770946230486071e-05, + "loss": 2.4793, + "step": 925000 + }, + { + "epoch": 4.59, + "learning_rate": 4.7708223718434624e-05, + "loss": 2.5133, + "step": 925500 + }, + { + "epoch": 4.59, + "learning_rate": 4.770698513200854e-05, + "loss": 2.5307, + "step": 926000 + }, + { + "epoch": 4.59, + "learning_rate": 4.770574902275532e-05, + "loss": 2.5068, + "step": 926500 + }, + { + "epoch": 4.59, + "learning_rate": 4.7704510436329234e-05, + "loss": 2.528, + "step": 927000 + }, + { + "epoch": 4.6, + "learning_rate": 4.7703271849903144e-05, + "loss": 2.5122, + "step": 927500 + }, + { + "epoch": 4.6, + "learning_rate": 4.770203326347706e-05, + "loss": 2.5372, + "step": 928000 + }, + { + "epoch": 4.6, + "learning_rate": 4.770079467705098e-05, + "loss": 2.5282, + "step": 928500 + }, + { + "epoch": 4.6, + "learning_rate": 4.7699556090624895e-05, + "loss": 2.5162, + "step": 929000 + }, + { + "epoch": 4.61, + "learning_rate": 4.769831750419881e-05, + "loss": 2.5279, + "step": 929500 + }, + { + "epoch": 4.61, + "learning_rate": 4.769707891777272e-05, + "loss": 2.5302, + "step": 930000 + }, + { + "epoch": 4.61, + "learning_rate": 4.769584033134664e-05, + "loss": 2.5265, + "step": 930500 + }, + { + "epoch": 4.61, + "learning_rate": 4.7694601744920556e-05, + "loss": 2.5167, + "step": 931000 + }, + { + "epoch": 4.61, + "learning_rate": 4.769336315849447e-05, + "loss": 2.5033, + "step": 931500 + }, + { + "epoch": 4.62, + "learning_rate": 4.769212457206839e-05, + "loss": 2.5239, + "step": 932000 + }, + { + "epoch": 4.62, + "learning_rate": 4.769088846281516e-05, + "loss": 2.5485, + "step": 932500 + }, + { + "epoch": 4.62, + "learning_rate": 4.7689649876389075e-05, + "loss": 2.5165, + "step": 933000 + }, + { + "epoch": 4.62, + "learning_rate": 4.768841376713585e-05, + "loss": 2.5295, + "step": 933500 + }, + { + "epoch": 4.63, + "learning_rate": 4.768717518070977e-05, + "loss": 2.5282, + "step": 934000 + }, + { + "epoch": 4.63, + "learning_rate": 4.768593659428368e-05, + "loss": 2.505, + "step": 934500 + }, + { + "epoch": 4.63, + "learning_rate": 4.768470048503045e-05, + "loss": 2.5145, + "step": 935000 + }, + { + "epoch": 4.63, + "learning_rate": 4.7683461898604364e-05, + "loss": 2.5239, + "step": 935500 + }, + { + "epoch": 4.64, + "learning_rate": 4.768222331217828e-05, + "loss": 2.512, + "step": 936000 + }, + { + "epoch": 4.64, + "learning_rate": 4.76809847257522e-05, + "loss": 2.5265, + "step": 936500 + }, + { + "epoch": 4.64, + "learning_rate": 4.7679746139326114e-05, + "loss": 2.528, + "step": 937000 + }, + { + "epoch": 4.64, + "learning_rate": 4.7678507552900025e-05, + "loss": 2.5295, + "step": 937500 + }, + { + "epoch": 4.65, + "learning_rate": 4.767726896647394e-05, + "loss": 2.4952, + "step": 938000 + }, + { + "epoch": 4.65, + "learning_rate": 4.767603038004786e-05, + "loss": 2.5174, + "step": 938500 + }, + { + "epoch": 4.65, + "learning_rate": 4.7674791793621775e-05, + "loss": 2.5244, + "step": 939000 + }, + { + "epoch": 4.65, + "learning_rate": 4.767355320719569e-05, + "loss": 2.5318, + "step": 939500 + }, + { + "epoch": 4.66, + "learning_rate": 4.767231462076961e-05, + "loss": 2.5375, + "step": 940000 + }, + { + "epoch": 4.66, + "learning_rate": 4.7671078511516385e-05, + "loss": 2.5271, + "step": 940500 + }, + { + "epoch": 4.66, + "learning_rate": 4.7669839925090295e-05, + "loss": 2.5511, + "step": 941000 + }, + { + "epoch": 4.66, + "learning_rate": 4.766860133866421e-05, + "loss": 2.5225, + "step": 941500 + }, + { + "epoch": 4.67, + "learning_rate": 4.766736275223813e-05, + "loss": 2.5055, + "step": 942000 + }, + { + "epoch": 4.67, + "learning_rate": 4.7666124165812046e-05, + "loss": 2.5206, + "step": 942500 + }, + { + "epoch": 4.67, + "learning_rate": 4.766488557938596e-05, + "loss": 2.5182, + "step": 943000 + }, + { + "epoch": 4.67, + "learning_rate": 4.766364947013273e-05, + "loss": 2.5254, + "step": 943500 + }, + { + "epoch": 4.68, + "learning_rate": 4.766241088370664e-05, + "loss": 2.4945, + "step": 944000 + }, + { + "epoch": 4.68, + "learning_rate": 4.766117229728056e-05, + "loss": 2.5185, + "step": 944500 + }, + { + "epoch": 4.68, + "learning_rate": 4.7659933710854476e-05, + "loss": 2.5111, + "step": 945000 + }, + { + "epoch": 4.68, + "learning_rate": 4.765869512442839e-05, + "loss": 2.5186, + "step": 945500 + }, + { + "epoch": 4.69, + "learning_rate": 4.765745653800231e-05, + "loss": 2.5001, + "step": 946000 + }, + { + "epoch": 4.69, + "learning_rate": 4.7656217951576226e-05, + "loss": 2.5132, + "step": 946500 + }, + { + "epoch": 4.69, + "learning_rate": 4.7654981842322995e-05, + "loss": 2.5195, + "step": 947000 + }, + { + "epoch": 4.69, + "learning_rate": 4.765374325589691e-05, + "loss": 2.5179, + "step": 947500 + }, + { + "epoch": 4.7, + "learning_rate": 4.765250466947083e-05, + "loss": 2.5338, + "step": 948000 + }, + { + "epoch": 4.7, + "learning_rate": 4.7651266083044746e-05, + "loss": 2.5614, + "step": 948500 + }, + { + "epoch": 4.7, + "learning_rate": 4.7650029973791515e-05, + "loss": 2.5336, + "step": 949000 + }, + { + "epoch": 4.7, + "learning_rate": 4.764879138736543e-05, + "loss": 2.4901, + "step": 949500 + }, + { + "epoch": 4.71, + "learning_rate": 4.76475552781122e-05, + "loss": 2.5283, + "step": 950000 + }, + { + "epoch": 4.71, + "learning_rate": 4.764631669168612e-05, + "loss": 2.5171, + "step": 950500 + }, + { + "epoch": 4.71, + "learning_rate": 4.764508058243288e-05, + "loss": 2.5353, + "step": 951000 + }, + { + "epoch": 4.71, + "learning_rate": 4.7643841996006796e-05, + "loss": 2.5246, + "step": 951500 + }, + { + "epoch": 4.72, + "learning_rate": 4.764260340958071e-05, + "loss": 2.5173, + "step": 952000 + }, + { + "epoch": 4.72, + "learning_rate": 4.764136482315463e-05, + "loss": 2.5125, + "step": 952500 + }, + { + "epoch": 4.72, + "learning_rate": 4.764012623672855e-05, + "loss": 2.5237, + "step": 953000 + }, + { + "epoch": 4.72, + "learning_rate": 4.7638887650302464e-05, + "loss": 2.5033, + "step": 953500 + }, + { + "epoch": 4.73, + "learning_rate": 4.763764906387638e-05, + "loss": 2.5233, + "step": 954000 + }, + { + "epoch": 4.73, + "learning_rate": 4.76364104774503e-05, + "loss": 2.5408, + "step": 954500 + }, + { + "epoch": 4.73, + "learning_rate": 4.7635171891024215e-05, + "loss": 2.5335, + "step": 955000 + }, + { + "epoch": 4.73, + "learning_rate": 4.763393330459813e-05, + "loss": 2.5481, + "step": 955500 + }, + { + "epoch": 4.74, + "learning_rate": 4.763269471817205e-05, + "loss": 2.5219, + "step": 956000 + }, + { + "epoch": 4.74, + "learning_rate": 4.763145613174596e-05, + "loss": 2.5032, + "step": 956500 + }, + { + "epoch": 4.74, + "learning_rate": 4.7630217545319876e-05, + "loss": 2.5114, + "step": 957000 + }, + { + "epoch": 4.74, + "learning_rate": 4.762897895889379e-05, + "loss": 2.5286, + "step": 957500 + }, + { + "epoch": 4.75, + "learning_rate": 4.762774284964057e-05, + "loss": 2.5163, + "step": 958000 + }, + { + "epoch": 4.75, + "learning_rate": 4.7626504263214485e-05, + "loss": 2.5127, + "step": 958500 + }, + { + "epoch": 4.75, + "learning_rate": 4.76252656767884e-05, + "loss": 2.5466, + "step": 959000 + }, + { + "epoch": 4.75, + "learning_rate": 4.762402709036231e-05, + "loss": 2.5094, + "step": 959500 + }, + { + "epoch": 4.76, + "learning_rate": 4.762278850393623e-05, + "loss": 2.5276, + "step": 960000 + }, + { + "epoch": 4.76, + "learning_rate": 4.7621549917510146e-05, + "loss": 2.5316, + "step": 960500 + }, + { + "epoch": 4.76, + "learning_rate": 4.762031133108406e-05, + "loss": 2.5051, + "step": 961000 + }, + { + "epoch": 4.76, + "learning_rate": 4.761907522183083e-05, + "loss": 2.495, + "step": 961500 + }, + { + "epoch": 4.77, + "learning_rate": 4.761783663540475e-05, + "loss": 2.5125, + "step": 962000 + }, + { + "epoch": 4.77, + "learning_rate": 4.761659804897866e-05, + "loss": 2.5293, + "step": 962500 + }, + { + "epoch": 4.77, + "learning_rate": 4.7615359462552576e-05, + "loss": 2.5661, + "step": 963000 + }, + { + "epoch": 4.77, + "learning_rate": 4.761412087612649e-05, + "loss": 2.5403, + "step": 963500 + }, + { + "epoch": 4.78, + "learning_rate": 4.761288228970041e-05, + "loss": 2.4998, + "step": 964000 + }, + { + "epoch": 4.78, + "learning_rate": 4.761164370327433e-05, + "loss": 2.5193, + "step": 964500 + }, + { + "epoch": 4.78, + "learning_rate": 4.7610405116848244e-05, + "loss": 2.5213, + "step": 965000 + }, + { + "epoch": 4.78, + "learning_rate": 4.760916653042216e-05, + "loss": 2.5163, + "step": 965500 + }, + { + "epoch": 4.79, + "learning_rate": 4.760792794399608e-05, + "loss": 2.5139, + "step": 966000 + }, + { + "epoch": 4.79, + "learning_rate": 4.7606689357569994e-05, + "loss": 2.5008, + "step": 966500 + }, + { + "epoch": 4.79, + "learning_rate": 4.760545324831676e-05, + "loss": 2.5123, + "step": 967000 + }, + { + "epoch": 4.79, + "learning_rate": 4.760421466189068e-05, + "loss": 2.519, + "step": 967500 + }, + { + "epoch": 4.8, + "learning_rate": 4.76029760754646e-05, + "loss": 2.5064, + "step": 968000 + }, + { + "epoch": 4.8, + "learning_rate": 4.7601739966211366e-05, + "loss": 2.5199, + "step": 968500 + }, + { + "epoch": 4.8, + "learning_rate": 4.7600501379785276e-05, + "loss": 2.5431, + "step": 969000 + }, + { + "epoch": 4.8, + "learning_rate": 4.759926279335919e-05, + "loss": 2.5357, + "step": 969500 + }, + { + "epoch": 4.81, + "learning_rate": 4.759802420693311e-05, + "loss": 2.5141, + "step": 970000 + }, + { + "epoch": 4.81, + "learning_rate": 4.759678562050703e-05, + "loss": 2.5247, + "step": 970500 + }, + { + "epoch": 4.81, + "learning_rate": 4.7595547034080944e-05, + "loss": 2.5246, + "step": 971000 + }, + { + "epoch": 4.81, + "learning_rate": 4.759430844765486e-05, + "loss": 2.5307, + "step": 971500 + }, + { + "epoch": 4.82, + "learning_rate": 4.759307233840163e-05, + "loss": 2.5291, + "step": 972000 + }, + { + "epoch": 4.82, + "learning_rate": 4.7591833751975546e-05, + "loss": 2.5249, + "step": 972500 + }, + { + "epoch": 4.82, + "learning_rate": 4.759059516554946e-05, + "loss": 2.5034, + "step": 973000 + }, + { + "epoch": 4.82, + "learning_rate": 4.758935657912338e-05, + "loss": 2.4944, + "step": 973500 + }, + { + "epoch": 4.83, + "learning_rate": 4.75881179926973e-05, + "loss": 2.5173, + "step": 974000 + }, + { + "epoch": 4.83, + "learning_rate": 4.7586881883444066e-05, + "loss": 2.5288, + "step": 974500 + }, + { + "epoch": 4.83, + "learning_rate": 4.7585643297017976e-05, + "loss": 2.5382, + "step": 975000 + }, + { + "epoch": 4.83, + "learning_rate": 4.758440471059189e-05, + "loss": 2.5082, + "step": 975500 + }, + { + "epoch": 4.84, + "learning_rate": 4.758316612416581e-05, + "loss": 2.4987, + "step": 976000 + }, + { + "epoch": 4.84, + "learning_rate": 4.758192753773973e-05, + "loss": 2.5288, + "step": 976500 + }, + { + "epoch": 4.84, + "learning_rate": 4.7580693905659354e-05, + "loss": 2.5066, + "step": 977000 + }, + { + "epoch": 4.84, + "learning_rate": 4.757945531923327e-05, + "loss": 2.5299, + "step": 977500 + }, + { + "epoch": 4.85, + "learning_rate": 4.757821673280718e-05, + "loss": 2.5367, + "step": 978000 + }, + { + "epoch": 4.85, + "learning_rate": 4.75769781463811e-05, + "loss": 2.5034, + "step": 978500 + }, + { + "epoch": 4.85, + "learning_rate": 4.7575739559955015e-05, + "loss": 2.5142, + "step": 979000 + }, + { + "epoch": 4.85, + "learning_rate": 4.757450097352893e-05, + "loss": 2.5349, + "step": 979500 + }, + { + "epoch": 4.86, + "learning_rate": 4.757326238710285e-05, + "loss": 2.5567, + "step": 980000 + }, + { + "epoch": 4.86, + "learning_rate": 4.7572023800676766e-05, + "loss": 2.5057, + "step": 980500 + }, + { + "epoch": 4.86, + "learning_rate": 4.757078521425068e-05, + "loss": 2.5152, + "step": 981000 + }, + { + "epoch": 4.86, + "learning_rate": 4.756954662782459e-05, + "loss": 2.5161, + "step": 981500 + }, + { + "epoch": 4.87, + "learning_rate": 4.756830804139851e-05, + "loss": 2.4941, + "step": 982000 + }, + { + "epoch": 4.87, + "learning_rate": 4.756706945497243e-05, + "loss": 2.5139, + "step": 982500 + }, + { + "epoch": 4.87, + "learning_rate": 4.7565830868546344e-05, + "loss": 2.5407, + "step": 983000 + }, + { + "epoch": 4.87, + "learning_rate": 4.756459475929312e-05, + "loss": 2.5312, + "step": 983500 + }, + { + "epoch": 4.88, + "learning_rate": 4.7563356172867036e-05, + "loss": 2.5106, + "step": 984000 + }, + { + "epoch": 4.88, + "learning_rate": 4.756211758644095e-05, + "loss": 2.5268, + "step": 984500 + }, + { + "epoch": 4.88, + "learning_rate": 4.7560879000014864e-05, + "loss": 2.537, + "step": 985000 + }, + { + "epoch": 4.88, + "learning_rate": 4.755964041358878e-05, + "loss": 2.5343, + "step": 985500 + }, + { + "epoch": 4.88, + "learning_rate": 4.75584018271627e-05, + "loss": 2.5492, + "step": 986000 + }, + { + "epoch": 4.89, + "learning_rate": 4.7557163240736614e-05, + "loss": 2.5359, + "step": 986500 + }, + { + "epoch": 4.89, + "learning_rate": 4.755592465431053e-05, + "loss": 2.5285, + "step": 987000 + }, + { + "epoch": 4.89, + "learning_rate": 4.755468854505729e-05, + "loss": 2.5059, + "step": 987500 + }, + { + "epoch": 4.89, + "learning_rate": 4.755344995863121e-05, + "loss": 2.5226, + "step": 988000 + }, + { + "epoch": 4.9, + "learning_rate": 4.755221137220513e-05, + "loss": 2.5191, + "step": 988500 + }, + { + "epoch": 4.9, + "learning_rate": 4.75509752629519e-05, + "loss": 2.5028, + "step": 989000 + }, + { + "epoch": 4.9, + "learning_rate": 4.754973915369867e-05, + "loss": 2.5224, + "step": 989500 + }, + { + "epoch": 4.9, + "learning_rate": 4.7548503044445434e-05, + "loss": 2.5351, + "step": 990000 + }, + { + "epoch": 4.91, + "learning_rate": 4.754726445801935e-05, + "loss": 2.5295, + "step": 990500 + }, + { + "epoch": 4.91, + "learning_rate": 4.754602587159327e-05, + "loss": 2.5256, + "step": 991000 + }, + { + "epoch": 4.91, + "learning_rate": 4.7544787285167184e-05, + "loss": 2.5175, + "step": 991500 + }, + { + "epoch": 4.91, + "learning_rate": 4.75435486987411e-05, + "loss": 2.5368, + "step": 992000 + }, + { + "epoch": 4.92, + "learning_rate": 4.754231011231502e-05, + "loss": 2.5184, + "step": 992500 + }, + { + "epoch": 4.92, + "learning_rate": 4.7541071525888935e-05, + "loss": 2.5307, + "step": 993000 + }, + { + "epoch": 4.92, + "learning_rate": 4.753983293946285e-05, + "loss": 2.5217, + "step": 993500 + }, + { + "epoch": 4.92, + "learning_rate": 4.753859435303677e-05, + "loss": 2.5294, + "step": 994000 + }, + { + "epoch": 4.93, + "learning_rate": 4.753735824378354e-05, + "loss": 2.505, + "step": 994500 + }, + { + "epoch": 4.93, + "learning_rate": 4.7536119657357455e-05, + "loss": 2.5285, + "step": 995000 + }, + { + "epoch": 4.93, + "learning_rate": 4.753488107093137e-05, + "loss": 2.5311, + "step": 995500 + }, + { + "epoch": 4.93, + "learning_rate": 4.753364248450529e-05, + "loss": 2.5233, + "step": 996000 + }, + { + "epoch": 4.94, + "learning_rate": 4.7532403898079206e-05, + "loss": 2.5115, + "step": 996500 + }, + { + "epoch": 4.94, + "learning_rate": 4.753116531165312e-05, + "loss": 2.5477, + "step": 997000 + }, + { + "epoch": 4.94, + "learning_rate": 4.7529929202399885e-05, + "loss": 2.5459, + "step": 997500 + }, + { + "epoch": 4.94, + "learning_rate": 4.75286906159738e-05, + "loss": 2.5032, + "step": 998000 + }, + { + "epoch": 4.95, + "learning_rate": 4.752745202954772e-05, + "loss": 2.506, + "step": 998500 + }, + { + "epoch": 4.95, + "learning_rate": 4.7526213443121635e-05, + "loss": 2.5271, + "step": 999000 + }, + { + "epoch": 4.95, + "learning_rate": 4.7524977333868404e-05, + "loss": 2.5142, + "step": 999500 + }, + { + "epoch": 4.95, + "learning_rate": 4.752373874744232e-05, + "loss": 2.5061, + "step": 1000000 + }, + { + "epoch": 4.96, + "learning_rate": 4.752250016101624e-05, + "loss": 2.5104, + "step": 1000500 + }, + { + "epoch": 4.96, + "learning_rate": 4.7521261574590155e-05, + "loss": 2.5104, + "step": 1001000 + }, + { + "epoch": 4.96, + "learning_rate": 4.752002298816407e-05, + "loss": 2.5243, + "step": 1001500 + }, + { + "epoch": 4.96, + "learning_rate": 4.751878440173799e-05, + "loss": 2.5186, + "step": 1002000 + }, + { + "epoch": 4.97, + "learning_rate": 4.7517545815311906e-05, + "loss": 2.52, + "step": 1002500 + }, + { + "epoch": 4.97, + "learning_rate": 4.751630722888582e-05, + "loss": 2.5164, + "step": 1003000 + }, + { + "epoch": 4.97, + "learning_rate": 4.751506864245974e-05, + "loss": 2.5276, + "step": 1003500 + }, + { + "epoch": 4.97, + "learning_rate": 4.7513830056033656e-05, + "loss": 2.5109, + "step": 1004000 + }, + { + "epoch": 4.98, + "learning_rate": 4.751259146960757e-05, + "loss": 2.5262, + "step": 1004500 + }, + { + "epoch": 4.98, + "learning_rate": 4.7511352883181484e-05, + "loss": 2.5065, + "step": 1005000 + }, + { + "epoch": 4.98, + "learning_rate": 4.751011677392825e-05, + "loss": 2.5033, + "step": 1005500 + }, + { + "epoch": 4.98, + "learning_rate": 4.750887818750217e-05, + "loss": 2.5218, + "step": 1006000 + }, + { + "epoch": 4.99, + "learning_rate": 4.7507639601076086e-05, + "loss": 2.5046, + "step": 1006500 + }, + { + "epoch": 4.99, + "learning_rate": 4.750640101465e-05, + "loss": 2.5356, + "step": 1007000 + }, + { + "epoch": 4.99, + "learning_rate": 4.750516490539677e-05, + "loss": 2.5308, + "step": 1007500 + }, + { + "epoch": 4.99, + "learning_rate": 4.750392879614354e-05, + "loss": 2.5266, + "step": 1008000 + }, + { + "epoch": 5.0, + "learning_rate": 4.750269020971745e-05, + "loss": 2.5191, + "step": 1008500 + }, + { + "epoch": 5.0, + "learning_rate": 4.750145162329137e-05, + "loss": 2.5071, + "step": 1009000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.635423503873417, + "eval_accuracy_mlm": 0.588502945831283, + "eval_accuracy_nsp": 0.8568240383747975, + "eval_loss": 2.4675350189208984, + "eval_runtime": 146.0196, + "eval_samples_per_second": 1746.06, + "eval_steps_per_second": 72.757, + "step": 1009215 + }, + { + "epoch": 5.0, + "learning_rate": 4.7500213036865285e-05, + "loss": 2.5021, + "step": 1009500 + }, + { + "epoch": 5.0, + "learning_rate": 4.74989744504392e-05, + "loss": 2.4848, + "step": 1010000 + }, + { + "epoch": 5.01, + "learning_rate": 4.749773586401312e-05, + "loss": 2.4841, + "step": 1010500 + }, + { + "epoch": 5.01, + "learning_rate": 4.7496497277587036e-05, + "loss": 2.4968, + "step": 1011000 + }, + { + "epoch": 5.01, + "learning_rate": 4.749525869116095e-05, + "loss": 2.4742, + "step": 1011500 + }, + { + "epoch": 5.01, + "learning_rate": 4.749402010473487e-05, + "loss": 2.5161, + "step": 1012000 + }, + { + "epoch": 5.02, + "learning_rate": 4.7492781518308786e-05, + "loss": 2.496, + "step": 1012500 + }, + { + "epoch": 5.02, + "learning_rate": 4.74915429318827e-05, + "loss": 2.4761, + "step": 1013000 + }, + { + "epoch": 5.02, + "learning_rate": 4.749030434545662e-05, + "loss": 2.514, + "step": 1013500 + }, + { + "epoch": 5.02, + "learning_rate": 4.748906823620339e-05, + "loss": 2.4858, + "step": 1014000 + }, + { + "epoch": 5.03, + "learning_rate": 4.7487829649777306e-05, + "loss": 2.5104, + "step": 1014500 + }, + { + "epoch": 5.03, + "learning_rate": 4.7486593540524075e-05, + "loss": 2.4841, + "step": 1015000 + }, + { + "epoch": 5.03, + "learning_rate": 4.7485354954097985e-05, + "loss": 2.4851, + "step": 1015500 + }, + { + "epoch": 5.03, + "learning_rate": 4.748411884484476e-05, + "loss": 2.4796, + "step": 1016000 + }, + { + "epoch": 5.04, + "learning_rate": 4.748288025841867e-05, + "loss": 2.512, + "step": 1016500 + }, + { + "epoch": 5.04, + "learning_rate": 4.7481644149165446e-05, + "loss": 2.4949, + "step": 1017000 + }, + { + "epoch": 5.04, + "learning_rate": 4.748040556273936e-05, + "loss": 2.4787, + "step": 1017500 + }, + { + "epoch": 5.04, + "learning_rate": 4.747916697631328e-05, + "loss": 2.4854, + "step": 1018000 + }, + { + "epoch": 5.05, + "learning_rate": 4.74779283898872e-05, + "loss": 2.505, + "step": 1018500 + }, + { + "epoch": 5.05, + "learning_rate": 4.7476689803461114e-05, + "loss": 2.4824, + "step": 1019000 + }, + { + "epoch": 5.05, + "learning_rate": 4.7475451217035024e-05, + "loss": 2.5006, + "step": 1019500 + }, + { + "epoch": 5.05, + "learning_rate": 4.747421263060894e-05, + "loss": 2.4915, + "step": 1020000 + }, + { + "epoch": 5.06, + "learning_rate": 4.747297652135571e-05, + "loss": 2.5021, + "step": 1020500 + }, + { + "epoch": 5.06, + "learning_rate": 4.747173793492963e-05, + "loss": 2.4845, + "step": 1021000 + }, + { + "epoch": 5.06, + "learning_rate": 4.7470499348503544e-05, + "loss": 2.4846, + "step": 1021500 + }, + { + "epoch": 5.06, + "learning_rate": 4.746926076207746e-05, + "loss": 2.4703, + "step": 1022000 + }, + { + "epoch": 5.07, + "learning_rate": 4.746802465282423e-05, + "loss": 2.5154, + "step": 1022500 + }, + { + "epoch": 5.07, + "learning_rate": 4.7466786066398146e-05, + "loss": 2.4895, + "step": 1023000 + }, + { + "epoch": 5.07, + "learning_rate": 4.746554747997206e-05, + "loss": 2.4909, + "step": 1023500 + }, + { + "epoch": 5.07, + "learning_rate": 4.746430889354598e-05, + "loss": 2.4931, + "step": 1024000 + }, + { + "epoch": 5.08, + "learning_rate": 4.74630703071199e-05, + "loss": 2.4837, + "step": 1024500 + }, + { + "epoch": 5.08, + "learning_rate": 4.746183667503951e-05, + "loss": 2.499, + "step": 1025000 + }, + { + "epoch": 5.08, + "learning_rate": 4.746059808861343e-05, + "loss": 2.5291, + "step": 1025500 + }, + { + "epoch": 5.08, + "learning_rate": 4.7459359502187345e-05, + "loss": 2.4858, + "step": 1026000 + }, + { + "epoch": 5.09, + "learning_rate": 4.745812091576126e-05, + "loss": 2.4969, + "step": 1026500 + }, + { + "epoch": 5.09, + "learning_rate": 4.745688232933518e-05, + "loss": 2.4516, + "step": 1027000 + }, + { + "epoch": 5.09, + "learning_rate": 4.7455643742909096e-05, + "loss": 2.5106, + "step": 1027500 + }, + { + "epoch": 5.09, + "learning_rate": 4.745440515648301e-05, + "loss": 2.4804, + "step": 1028000 + }, + { + "epoch": 5.1, + "learning_rate": 4.745316657005693e-05, + "loss": 2.4969, + "step": 1028500 + }, + { + "epoch": 5.1, + "learning_rate": 4.7451927983630846e-05, + "loss": 2.5142, + "step": 1029000 + }, + { + "epoch": 5.1, + "learning_rate": 4.745068939720476e-05, + "loss": 2.4816, + "step": 1029500 + }, + { + "epoch": 5.1, + "learning_rate": 4.744945081077868e-05, + "loss": 2.4882, + "step": 1030000 + }, + { + "epoch": 5.11, + "learning_rate": 4.74482122243526e-05, + "loss": 2.4912, + "step": 1030500 + }, + { + "epoch": 5.11, + "learning_rate": 4.7446973637926514e-05, + "loss": 2.4795, + "step": 1031000 + }, + { + "epoch": 5.11, + "learning_rate": 4.744573505150043e-05, + "loss": 2.498, + "step": 1031500 + }, + { + "epoch": 5.11, + "learning_rate": 4.744449646507434e-05, + "loss": 2.5025, + "step": 1032000 + }, + { + "epoch": 5.12, + "learning_rate": 4.744325787864826e-05, + "loss": 2.4867, + "step": 1032500 + }, + { + "epoch": 5.12, + "learning_rate": 4.7442019292222175e-05, + "loss": 2.5047, + "step": 1033000 + }, + { + "epoch": 5.12, + "learning_rate": 4.744078070579609e-05, + "loss": 2.493, + "step": 1033500 + }, + { + "epoch": 5.12, + "learning_rate": 4.743954211937001e-05, + "loss": 2.4766, + "step": 1034000 + }, + { + "epoch": 5.13, + "learning_rate": 4.7438303532943926e-05, + "loss": 2.4811, + "step": 1034500 + }, + { + "epoch": 5.13, + "learning_rate": 4.743706742369069e-05, + "loss": 2.4792, + "step": 1035000 + }, + { + "epoch": 5.13, + "learning_rate": 4.7435828837264605e-05, + "loss": 2.4694, + "step": 1035500 + }, + { + "epoch": 5.13, + "learning_rate": 4.743459272801138e-05, + "loss": 2.4567, + "step": 1036000 + }, + { + "epoch": 5.14, + "learning_rate": 4.7433359095930994e-05, + "loss": 2.4914, + "step": 1036500 + }, + { + "epoch": 5.14, + "learning_rate": 4.743212050950491e-05, + "loss": 2.4963, + "step": 1037000 + }, + { + "epoch": 5.14, + "learning_rate": 4.743088192307883e-05, + "loss": 2.5135, + "step": 1037500 + }, + { + "epoch": 5.14, + "learning_rate": 4.7429643336652745e-05, + "loss": 2.5065, + "step": 1038000 + }, + { + "epoch": 5.15, + "learning_rate": 4.742840475022666e-05, + "loss": 2.5155, + "step": 1038500 + }, + { + "epoch": 5.15, + "learning_rate": 4.742716616380058e-05, + "loss": 2.5172, + "step": 1039000 + }, + { + "epoch": 5.15, + "learning_rate": 4.7425927577374496e-05, + "loss": 2.4787, + "step": 1039500 + }, + { + "epoch": 5.15, + "learning_rate": 4.742468899094841e-05, + "loss": 2.4847, + "step": 1040000 + }, + { + "epoch": 5.15, + "learning_rate": 4.742345040452233e-05, + "loss": 2.5204, + "step": 1040500 + }, + { + "epoch": 5.16, + "learning_rate": 4.742221181809625e-05, + "loss": 2.4876, + "step": 1041000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7420973231670164e-05, + "loss": 2.5044, + "step": 1041500 + }, + { + "epoch": 5.16, + "learning_rate": 4.741973464524408e-05, + "loss": 2.4813, + "step": 1042000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7418496058818e-05, + "loss": 2.4927, + "step": 1042500 + }, + { + "epoch": 5.17, + "learning_rate": 4.7417257472391914e-05, + "loss": 2.5097, + "step": 1043000 + }, + { + "epoch": 5.17, + "learning_rate": 4.741601888596583e-05, + "loss": 2.5111, + "step": 1043500 + }, + { + "epoch": 5.17, + "learning_rate": 4.741478277671259e-05, + "loss": 2.5181, + "step": 1044000 + }, + { + "epoch": 5.17, + "learning_rate": 4.741354419028651e-05, + "loss": 2.5167, + "step": 1044500 + }, + { + "epoch": 5.18, + "learning_rate": 4.741230560386043e-05, + "loss": 2.5013, + "step": 1045000 + }, + { + "epoch": 5.18, + "learning_rate": 4.7411067017434344e-05, + "loss": 2.516, + "step": 1045500 + }, + { + "epoch": 5.18, + "learning_rate": 4.740982843100826e-05, + "loss": 2.4898, + "step": 1046000 + }, + { + "epoch": 5.18, + "learning_rate": 4.740858984458218e-05, + "loss": 2.5233, + "step": 1046500 + }, + { + "epoch": 5.19, + "learning_rate": 4.7407351258156095e-05, + "loss": 2.5021, + "step": 1047000 + }, + { + "epoch": 5.19, + "learning_rate": 4.7406115148902864e-05, + "loss": 2.4977, + "step": 1047500 + }, + { + "epoch": 5.19, + "learning_rate": 4.740487656247678e-05, + "loss": 2.5229, + "step": 1048000 + }, + { + "epoch": 5.19, + "learning_rate": 4.74036379760507e-05, + "loss": 2.5149, + "step": 1048500 + }, + { + "epoch": 5.2, + "learning_rate": 4.7402399389624615e-05, + "loss": 2.5227, + "step": 1049000 + }, + { + "epoch": 5.2, + "learning_rate": 4.740116328037138e-05, + "loss": 2.5052, + "step": 1049500 + }, + { + "epoch": 5.2, + "learning_rate": 4.7399924693945293e-05, + "loss": 2.5153, + "step": 1050000 + }, + { + "epoch": 5.2, + "learning_rate": 4.739868610751921e-05, + "loss": 2.5106, + "step": 1050500 + }, + { + "epoch": 5.21, + "learning_rate": 4.739744752109313e-05, + "loss": 2.4979, + "step": 1051000 + }, + { + "epoch": 5.21, + "learning_rate": 4.7396208934667044e-05, + "loss": 2.4952, + "step": 1051500 + }, + { + "epoch": 5.21, + "learning_rate": 4.739497034824096e-05, + "loss": 2.5124, + "step": 1052000 + }, + { + "epoch": 5.21, + "learning_rate": 4.739373176181488e-05, + "loss": 2.4948, + "step": 1052500 + }, + { + "epoch": 5.22, + "learning_rate": 4.7392493175388795e-05, + "loss": 2.5208, + "step": 1053000 + }, + { + "epoch": 5.22, + "learning_rate": 4.7391257066135564e-05, + "loss": 2.4947, + "step": 1053500 + }, + { + "epoch": 5.22, + "learning_rate": 4.7390023434055185e-05, + "loss": 2.505, + "step": 1054000 + }, + { + "epoch": 5.22, + "learning_rate": 4.7388784847629095e-05, + "loss": 2.5084, + "step": 1054500 + }, + { + "epoch": 5.23, + "learning_rate": 4.738754626120301e-05, + "loss": 2.5115, + "step": 1055000 + }, + { + "epoch": 5.23, + "learning_rate": 4.738630767477693e-05, + "loss": 2.5227, + "step": 1055500 + }, + { + "epoch": 5.23, + "learning_rate": 4.7385069088350846e-05, + "loss": 2.5089, + "step": 1056000 + }, + { + "epoch": 5.23, + "learning_rate": 4.738383050192476e-05, + "loss": 2.5291, + "step": 1056500 + }, + { + "epoch": 5.24, + "learning_rate": 4.738259191549868e-05, + "loss": 2.5091, + "step": 1057000 + }, + { + "epoch": 5.24, + "learning_rate": 4.7381353329072596e-05, + "loss": 2.4981, + "step": 1057500 + }, + { + "epoch": 5.24, + "learning_rate": 4.738011474264651e-05, + "loss": 2.4948, + "step": 1058000 + }, + { + "epoch": 5.24, + "learning_rate": 4.737887615622043e-05, + "loss": 2.5134, + "step": 1058500 + }, + { + "epoch": 5.25, + "learning_rate": 4.737763756979435e-05, + "loss": 2.528, + "step": 1059000 + }, + { + "epoch": 5.25, + "learning_rate": 4.7376398983368264e-05, + "loss": 2.5002, + "step": 1059500 + }, + { + "epoch": 5.25, + "learning_rate": 4.737516287411503e-05, + "loss": 2.4989, + "step": 1060000 + }, + { + "epoch": 5.25, + "learning_rate": 4.737392428768895e-05, + "loss": 2.5081, + "step": 1060500 + }, + { + "epoch": 5.26, + "learning_rate": 4.737268570126287e-05, + "loss": 2.4678, + "step": 1061000 + }, + { + "epoch": 5.26, + "learning_rate": 4.7371447114836784e-05, + "loss": 2.5176, + "step": 1061500 + }, + { + "epoch": 5.26, + "learning_rate": 4.73702085284107e-05, + "loss": 2.5324, + "step": 1062000 + }, + { + "epoch": 5.26, + "learning_rate": 4.736897489633032e-05, + "loss": 2.4977, + "step": 1062500 + }, + { + "epoch": 5.27, + "learning_rate": 4.736773630990424e-05, + "loss": 2.4846, + "step": 1063000 + }, + { + "epoch": 5.27, + "learning_rate": 4.7366497723478155e-05, + "loss": 2.4992, + "step": 1063500 + }, + { + "epoch": 5.27, + "learning_rate": 4.7365259137052065e-05, + "loss": 2.5183, + "step": 1064000 + }, + { + "epoch": 5.27, + "learning_rate": 4.736402055062598e-05, + "loss": 2.5053, + "step": 1064500 + }, + { + "epoch": 5.28, + "learning_rate": 4.73627819641999e-05, + "loss": 2.5262, + "step": 1065000 + }, + { + "epoch": 5.28, + "learning_rate": 4.7361543377773816e-05, + "loss": 2.515, + "step": 1065500 + }, + { + "epoch": 5.28, + "learning_rate": 4.736030479134773e-05, + "loss": 2.4938, + "step": 1066000 + }, + { + "epoch": 5.28, + "learning_rate": 4.735906620492165e-05, + "loss": 2.5149, + "step": 1066500 + }, + { + "epoch": 5.29, + "learning_rate": 4.735782761849557e-05, + "loss": 2.5019, + "step": 1067000 + }, + { + "epoch": 5.29, + "learning_rate": 4.735659150924233e-05, + "loss": 2.5112, + "step": 1067500 + }, + { + "epoch": 5.29, + "learning_rate": 4.7355352922816246e-05, + "loss": 2.5042, + "step": 1068000 + }, + { + "epoch": 5.29, + "learning_rate": 4.735411681356302e-05, + "loss": 2.5022, + "step": 1068500 + }, + { + "epoch": 5.3, + "learning_rate": 4.735287822713694e-05, + "loss": 2.5294, + "step": 1069000 + }, + { + "epoch": 5.3, + "learning_rate": 4.7351639640710855e-05, + "loss": 2.4906, + "step": 1069500 + }, + { + "epoch": 5.3, + "learning_rate": 4.735040105428477e-05, + "loss": 2.497, + "step": 1070000 + }, + { + "epoch": 5.3, + "learning_rate": 4.734916246785868e-05, + "loss": 2.5093, + "step": 1070500 + }, + { + "epoch": 5.31, + "learning_rate": 4.73479238814326e-05, + "loss": 2.4948, + "step": 1071000 + }, + { + "epoch": 5.31, + "learning_rate": 4.7346685295006516e-05, + "loss": 2.5087, + "step": 1071500 + }, + { + "epoch": 5.31, + "learning_rate": 4.734544670858043e-05, + "loss": 2.5076, + "step": 1072000 + }, + { + "epoch": 5.31, + "learning_rate": 4.734420812215435e-05, + "loss": 2.4856, + "step": 1072500 + }, + { + "epoch": 5.32, + "learning_rate": 4.734296953572827e-05, + "loss": 2.4979, + "step": 1073000 + }, + { + "epoch": 5.32, + "learning_rate": 4.7341730949302184e-05, + "loss": 2.4767, + "step": 1073500 + }, + { + "epoch": 5.32, + "learning_rate": 4.7340494840048946e-05, + "loss": 2.4792, + "step": 1074000 + }, + { + "epoch": 5.32, + "learning_rate": 4.733925625362286e-05, + "loss": 2.4931, + "step": 1074500 + }, + { + "epoch": 5.33, + "learning_rate": 4.733801766719678e-05, + "loss": 2.4973, + "step": 1075000 + }, + { + "epoch": 5.33, + "learning_rate": 4.73367790807707e-05, + "loss": 2.4988, + "step": 1075500 + }, + { + "epoch": 5.33, + "learning_rate": 4.7335540494344614e-05, + "loss": 2.4931, + "step": 1076000 + }, + { + "epoch": 5.33, + "learning_rate": 4.733430190791853e-05, + "loss": 2.5197, + "step": 1076500 + }, + { + "epoch": 5.34, + "learning_rate": 4.733306332149245e-05, + "loss": 2.4846, + "step": 1077000 + }, + { + "epoch": 5.34, + "learning_rate": 4.7331824735066364e-05, + "loss": 2.5041, + "step": 1077500 + }, + { + "epoch": 5.34, + "learning_rate": 4.733058862581313e-05, + "loss": 2.4894, + "step": 1078000 + }, + { + "epoch": 5.34, + "learning_rate": 4.732935003938705e-05, + "loss": 2.5004, + "step": 1078500 + }, + { + "epoch": 5.35, + "learning_rate": 4.732811145296097e-05, + "loss": 2.5042, + "step": 1079000 + }, + { + "epoch": 5.35, + "learning_rate": 4.7326875343707736e-05, + "loss": 2.4997, + "step": 1079500 + }, + { + "epoch": 5.35, + "learning_rate": 4.732563675728165e-05, + "loss": 2.5063, + "step": 1080000 + }, + { + "epoch": 5.35, + "learning_rate": 4.732439817085556e-05, + "loss": 2.5086, + "step": 1080500 + }, + { + "epoch": 5.36, + "learning_rate": 4.732315958442948e-05, + "loss": 2.4812, + "step": 1081000 + }, + { + "epoch": 5.36, + "learning_rate": 4.73219209980034e-05, + "loss": 2.4943, + "step": 1081500 + }, + { + "epoch": 5.36, + "learning_rate": 4.7320682411577314e-05, + "loss": 2.5031, + "step": 1082000 + }, + { + "epoch": 5.36, + "learning_rate": 4.731944382515123e-05, + "loss": 2.5015, + "step": 1082500 + }, + { + "epoch": 5.37, + "learning_rate": 4.731820523872515e-05, + "loss": 2.5043, + "step": 1083000 + }, + { + "epoch": 5.37, + "learning_rate": 4.7316966652299064e-05, + "loss": 2.5235, + "step": 1083500 + }, + { + "epoch": 5.37, + "learning_rate": 4.731572806587298e-05, + "loss": 2.491, + "step": 1084000 + }, + { + "epoch": 5.37, + "learning_rate": 4.73144894794469e-05, + "loss": 2.4956, + "step": 1084500 + }, + { + "epoch": 5.38, + "learning_rate": 4.7313250893020815e-05, + "loss": 2.5086, + "step": 1085000 + }, + { + "epoch": 5.38, + "learning_rate": 4.731201230659473e-05, + "loss": 2.534, + "step": 1085500 + }, + { + "epoch": 5.38, + "learning_rate": 4.73107761973415e-05, + "loss": 2.5147, + "step": 1086000 + }, + { + "epoch": 5.38, + "learning_rate": 4.730953761091542e-05, + "loss": 2.5247, + "step": 1086500 + }, + { + "epoch": 5.39, + "learning_rate": 4.7308299024489335e-05, + "loss": 2.5079, + "step": 1087000 + }, + { + "epoch": 5.39, + "learning_rate": 4.730706043806325e-05, + "loss": 2.5156, + "step": 1087500 + }, + { + "epoch": 5.39, + "learning_rate": 4.730582185163717e-05, + "loss": 2.4809, + "step": 1088000 + }, + { + "epoch": 5.39, + "learning_rate": 4.730458574238393e-05, + "loss": 2.5121, + "step": 1088500 + }, + { + "epoch": 5.4, + "learning_rate": 4.730334715595785e-05, + "loss": 2.5155, + "step": 1089000 + }, + { + "epoch": 5.4, + "learning_rate": 4.7302108569531765e-05, + "loss": 2.4882, + "step": 1089500 + }, + { + "epoch": 5.4, + "learning_rate": 4.730086998310568e-05, + "loss": 2.4941, + "step": 1090000 + }, + { + "epoch": 5.4, + "learning_rate": 4.72996313966796e-05, + "loss": 2.4765, + "step": 1090500 + }, + { + "epoch": 5.41, + "learning_rate": 4.7298392810253515e-05, + "loss": 2.4786, + "step": 1091000 + }, + { + "epoch": 5.41, + "learning_rate": 4.729715422382743e-05, + "loss": 2.5199, + "step": 1091500 + }, + { + "epoch": 5.41, + "learning_rate": 4.72959181145742e-05, + "loss": 2.4885, + "step": 1092000 + }, + { + "epoch": 5.41, + "learning_rate": 4.729468200532097e-05, + "loss": 2.523, + "step": 1092500 + }, + { + "epoch": 5.42, + "learning_rate": 4.729344589606774e-05, + "loss": 2.5936, + "step": 1093000 + }, + { + "epoch": 5.42, + "learning_rate": 4.7292207309641656e-05, + "loss": 2.5146, + "step": 1093500 + }, + { + "epoch": 5.42, + "learning_rate": 4.729096872321557e-05, + "loss": 2.5291, + "step": 1094000 + }, + { + "epoch": 5.42, + "learning_rate": 4.728973013678949e-05, + "loss": 2.5103, + "step": 1094500 + }, + { + "epoch": 5.43, + "learning_rate": 4.7288491550363406e-05, + "loss": 2.5062, + "step": 1095000 + }, + { + "epoch": 5.43, + "learning_rate": 4.728725791828302e-05, + "loss": 2.5114, + "step": 1095500 + }, + { + "epoch": 5.43, + "learning_rate": 4.728602180902979e-05, + "loss": 2.5239, + "step": 1096000 + }, + { + "epoch": 5.43, + "learning_rate": 4.7284783222603706e-05, + "loss": 2.5374, + "step": 1096500 + }, + { + "epoch": 5.43, + "learning_rate": 4.728354463617762e-05, + "loss": 2.5018, + "step": 1097000 + }, + { + "epoch": 5.44, + "learning_rate": 4.728230604975154e-05, + "loss": 2.4786, + "step": 1097500 + }, + { + "epoch": 5.44, + "learning_rate": 4.7281069940498316e-05, + "loss": 2.5172, + "step": 1098000 + }, + { + "epoch": 5.44, + "learning_rate": 4.727983135407223e-05, + "loss": 2.5365, + "step": 1098500 + }, + { + "epoch": 5.44, + "learning_rate": 4.727859276764614e-05, + "loss": 2.6215, + "step": 1099000 + }, + { + "epoch": 5.45, + "learning_rate": 4.727735418122006e-05, + "loss": 2.5575, + "step": 1099500 + }, + { + "epoch": 5.45, + "learning_rate": 4.7276115594793977e-05, + "loss": 2.549, + "step": 1100000 + }, + { + "epoch": 5.45, + "learning_rate": 4.7274877008367893e-05, + "loss": 2.538, + "step": 1100500 + }, + { + "epoch": 5.45, + "learning_rate": 4.727364089911466e-05, + "loss": 2.698, + "step": 1101000 + }, + { + "epoch": 5.46, + "learning_rate": 4.727240231268858e-05, + "loss": 2.6845, + "step": 1101500 + }, + { + "epoch": 5.46, + "learning_rate": 4.727116372626249e-05, + "loss": 2.6379, + "step": 1102000 + }, + { + "epoch": 5.46, + "learning_rate": 4.7269925139836406e-05, + "loss": 2.5858, + "step": 1102500 + }, + { + "epoch": 5.46, + "learning_rate": 4.726868655341032e-05, + "loss": 2.5711, + "step": 1103000 + }, + { + "epoch": 5.47, + "learning_rate": 4.7267452921329944e-05, + "loss": 2.6267, + "step": 1103500 + }, + { + "epoch": 5.47, + "learning_rate": 4.726621681207671e-05, + "loss": 2.6757, + "step": 1104000 + }, + { + "epoch": 5.47, + "learning_rate": 4.726497822565063e-05, + "loss": 2.6265, + "step": 1104500 + }, + { + "epoch": 5.47, + "learning_rate": 4.7263739639224547e-05, + "loss": 2.6267, + "step": 1105000 + }, + { + "epoch": 5.48, + "learning_rate": 4.7262501052798463e-05, + "loss": 2.6607, + "step": 1105500 + }, + { + "epoch": 5.48, + "learning_rate": 4.726126246637238e-05, + "loss": 2.5848, + "step": 1106000 + }, + { + "epoch": 5.48, + "learning_rate": 4.72600238799463e-05, + "loss": 2.5908, + "step": 1106500 + }, + { + "epoch": 5.48, + "learning_rate": 4.7258785293520214e-05, + "loss": 2.6656, + "step": 1107000 + }, + { + "epoch": 5.49, + "learning_rate": 4.725754918426698e-05, + "loss": 2.5915, + "step": 1107500 + }, + { + "epoch": 5.49, + "learning_rate": 4.72563105978409e-05, + "loss": 2.6518, + "step": 1108000 + }, + { + "epoch": 5.49, + "learning_rate": 4.725507201141482e-05, + "loss": 2.617, + "step": 1108500 + }, + { + "epoch": 5.49, + "learning_rate": 4.725383342498873e-05, + "loss": 2.6337, + "step": 1109000 + }, + { + "epoch": 5.5, + "learning_rate": 4.7252594838562644e-05, + "loss": 2.5885, + "step": 1109500 + }, + { + "epoch": 5.5, + "learning_rate": 4.725135625213656e-05, + "loss": 2.5465, + "step": 1110000 + }, + { + "epoch": 5.5, + "learning_rate": 4.725011766571048e-05, + "loss": 2.5822, + "step": 1110500 + }, + { + "epoch": 5.5, + "learning_rate": 4.7248879079284395e-05, + "loss": 2.6102, + "step": 1111000 + }, + { + "epoch": 5.51, + "learning_rate": 4.724764049285831e-05, + "loss": 2.6242, + "step": 1111500 + }, + { + "epoch": 5.51, + "learning_rate": 4.724640438360508e-05, + "loss": 2.6015, + "step": 1112000 + }, + { + "epoch": 5.51, + "learning_rate": 4.7245165797179e-05, + "loss": 2.5667, + "step": 1112500 + }, + { + "epoch": 5.51, + "learning_rate": 4.7243927210752914e-05, + "loss": 2.5967, + "step": 1113000 + }, + { + "epoch": 5.52, + "learning_rate": 4.724268862432683e-05, + "loss": 2.591, + "step": 1113500 + }, + { + "epoch": 5.52, + "learning_rate": 4.72414525150736e-05, + "loss": 2.6024, + "step": 1114000 + }, + { + "epoch": 5.52, + "learning_rate": 4.724021392864752e-05, + "loss": 2.6239, + "step": 1114500 + }, + { + "epoch": 5.52, + "learning_rate": 4.7238975342221434e-05, + "loss": 2.6354, + "step": 1115000 + }, + { + "epoch": 5.53, + "learning_rate": 4.723773675579535e-05, + "loss": 2.6408, + "step": 1115500 + }, + { + "epoch": 5.53, + "learning_rate": 4.723649816936926e-05, + "loss": 2.6031, + "step": 1116000 + }, + { + "epoch": 5.53, + "learning_rate": 4.723525958294318e-05, + "loss": 2.6564, + "step": 1116500 + }, + { + "epoch": 5.53, + "learning_rate": 4.7234020996517095e-05, + "loss": 2.5826, + "step": 1117000 + }, + { + "epoch": 5.54, + "learning_rate": 4.723278241009101e-05, + "loss": 2.583, + "step": 1117500 + }, + { + "epoch": 5.54, + "learning_rate": 4.723154382366493e-05, + "loss": 2.5424, + "step": 1118000 + }, + { + "epoch": 5.54, + "learning_rate": 4.7230305237238846e-05, + "loss": 2.5366, + "step": 1118500 + }, + { + "epoch": 5.54, + "learning_rate": 4.722906665081276e-05, + "loss": 2.5275, + "step": 1119000 + }, + { + "epoch": 5.55, + "learning_rate": 4.722782806438668e-05, + "loss": 2.546, + "step": 1119500 + }, + { + "epoch": 5.55, + "learning_rate": 4.7226589477960596e-05, + "loss": 2.5238, + "step": 1120000 + }, + { + "epoch": 5.55, + "learning_rate": 4.7225350891534513e-05, + "loss": 2.535, + "step": 1120500 + }, + { + "epoch": 5.55, + "learning_rate": 4.7224112305108424e-05, + "loss": 2.52, + "step": 1121000 + }, + { + "epoch": 5.56, + "learning_rate": 4.722287371868234e-05, + "loss": 2.5252, + "step": 1121500 + }, + { + "epoch": 5.56, + "learning_rate": 4.722163513225626e-05, + "loss": 2.544, + "step": 1122000 + }, + { + "epoch": 5.56, + "learning_rate": 4.7220396545830174e-05, + "loss": 2.5424, + "step": 1122500 + }, + { + "epoch": 5.56, + "learning_rate": 4.721915795940409e-05, + "loss": 2.5489, + "step": 1123000 + }, + { + "epoch": 5.57, + "learning_rate": 4.721791937297801e-05, + "loss": 2.5057, + "step": 1123500 + }, + { + "epoch": 5.57, + "learning_rate": 4.7216680786551925e-05, + "loss": 2.5233, + "step": 1124000 + }, + { + "epoch": 5.57, + "learning_rate": 4.721544220012584e-05, + "loss": 2.53, + "step": 1124500 + }, + { + "epoch": 5.57, + "learning_rate": 4.721420361369976e-05, + "loss": 2.5091, + "step": 1125000 + }, + { + "epoch": 5.58, + "learning_rate": 4.7212965027273676e-05, + "loss": 2.5253, + "step": 1125500 + }, + { + "epoch": 5.58, + "learning_rate": 4.7211728918020445e-05, + "loss": 2.5188, + "step": 1126000 + }, + { + "epoch": 5.58, + "learning_rate": 4.721049033159436e-05, + "loss": 2.522, + "step": 1126500 + }, + { + "epoch": 5.58, + "learning_rate": 4.720925669951398e-05, + "loss": 2.5627, + "step": 1127000 + }, + { + "epoch": 5.59, + "learning_rate": 4.72080181130879e-05, + "loss": 2.526, + "step": 1127500 + }, + { + "epoch": 5.59, + "learning_rate": 4.7206779526661816e-05, + "loss": 2.5394, + "step": 1128000 + }, + { + "epoch": 5.59, + "learning_rate": 4.720554094023573e-05, + "loss": 2.5182, + "step": 1128500 + }, + { + "epoch": 5.59, + "learning_rate": 4.720430235380965e-05, + "loss": 2.568, + "step": 1129000 + }, + { + "epoch": 5.6, + "learning_rate": 4.720306624455641e-05, + "loss": 2.5795, + "step": 1129500 + }, + { + "epoch": 5.6, + "learning_rate": 4.720182765813033e-05, + "loss": 2.6119, + "step": 1130000 + }, + { + "epoch": 5.6, + "learning_rate": 4.7200589071704246e-05, + "loss": 2.5682, + "step": 1130500 + }, + { + "epoch": 5.6, + "learning_rate": 4.719935048527816e-05, + "loss": 2.5436, + "step": 1131000 + }, + { + "epoch": 5.61, + "learning_rate": 4.719811189885208e-05, + "loss": 2.5482, + "step": 1131500 + }, + { + "epoch": 5.61, + "learning_rate": 4.7196873312426e-05, + "loss": 2.5595, + "step": 1132000 + }, + { + "epoch": 5.61, + "learning_rate": 4.7195634725999914e-05, + "loss": 2.5303, + "step": 1132500 + }, + { + "epoch": 5.61, + "learning_rate": 4.719439861674668e-05, + "loss": 2.5571, + "step": 1133000 + }, + { + "epoch": 5.62, + "learning_rate": 4.71931600303206e-05, + "loss": 2.5494, + "step": 1133500 + }, + { + "epoch": 5.62, + "learning_rate": 4.7191921443894516e-05, + "loss": 2.5194, + "step": 1134000 + }, + { + "epoch": 5.62, + "learning_rate": 4.719068285746843e-05, + "loss": 2.5303, + "step": 1134500 + }, + { + "epoch": 5.62, + "learning_rate": 4.718944427104235e-05, + "loss": 2.5406, + "step": 1135000 + }, + { + "epoch": 5.63, + "learning_rate": 4.718820568461627e-05, + "loss": 2.5231, + "step": 1135500 + }, + { + "epoch": 5.63, + "learning_rate": 4.7186967098190184e-05, + "loss": 2.5372, + "step": 1136000 + }, + { + "epoch": 5.63, + "learning_rate": 4.7185728511764094e-05, + "loss": 2.5714, + "step": 1136500 + }, + { + "epoch": 5.63, + "learning_rate": 4.718448992533801e-05, + "loss": 2.5142, + "step": 1137000 + }, + { + "epoch": 5.64, + "learning_rate": 4.718325381608478e-05, + "loss": 2.5527, + "step": 1137500 + }, + { + "epoch": 5.64, + "learning_rate": 4.71820152296587e-05, + "loss": 2.574, + "step": 1138000 + }, + { + "epoch": 5.64, + "learning_rate": 4.7180776643232614e-05, + "loss": 2.5483, + "step": 1138500 + }, + { + "epoch": 5.64, + "learning_rate": 4.717953805680653e-05, + "loss": 2.5275, + "step": 1139000 + }, + { + "epoch": 5.65, + "learning_rate": 4.717829947038044e-05, + "loss": 2.5176, + "step": 1139500 + }, + { + "epoch": 5.65, + "learning_rate": 4.717706088395436e-05, + "loss": 2.5255, + "step": 1140000 + }, + { + "epoch": 5.65, + "learning_rate": 4.717582477470113e-05, + "loss": 2.5277, + "step": 1140500 + }, + { + "epoch": 5.65, + "learning_rate": 4.717458618827505e-05, + "loss": 2.5264, + "step": 1141000 + }, + { + "epoch": 5.66, + "learning_rate": 4.717334760184897e-05, + "loss": 2.5172, + "step": 1141500 + }, + { + "epoch": 5.66, + "learning_rate": 4.7172109015422884e-05, + "loss": 2.5394, + "step": 1142000 + }, + { + "epoch": 5.66, + "learning_rate": 4.71708704289968e-05, + "loss": 2.4941, + "step": 1142500 + }, + { + "epoch": 5.66, + "learning_rate": 4.716963184257071e-05, + "loss": 2.5234, + "step": 1143000 + }, + { + "epoch": 5.67, + "learning_rate": 4.716839325614463e-05, + "loss": 2.5233, + "step": 1143500 + }, + { + "epoch": 5.67, + "learning_rate": 4.71671571468914e-05, + "loss": 2.5376, + "step": 1144000 + }, + { + "epoch": 5.67, + "learning_rate": 4.7165918560465314e-05, + "loss": 2.5352, + "step": 1144500 + }, + { + "epoch": 5.67, + "learning_rate": 4.716467997403923e-05, + "loss": 2.5087, + "step": 1145000 + }, + { + "epoch": 5.68, + "learning_rate": 4.716344138761315e-05, + "loss": 2.5188, + "step": 1145500 + }, + { + "epoch": 5.68, + "learning_rate": 4.716220280118706e-05, + "loss": 2.5193, + "step": 1146000 + }, + { + "epoch": 5.68, + "learning_rate": 4.7160964214760975e-05, + "loss": 2.5462, + "step": 1146500 + }, + { + "epoch": 5.68, + "learning_rate": 4.71597305826806e-05, + "loss": 2.5272, + "step": 1147000 + }, + { + "epoch": 5.69, + "learning_rate": 4.715849199625452e-05, + "loss": 2.5139, + "step": 1147500 + }, + { + "epoch": 5.69, + "learning_rate": 4.7157253409828436e-05, + "loss": 2.5406, + "step": 1148000 + }, + { + "epoch": 5.69, + "learning_rate": 4.715601482340235e-05, + "loss": 2.5199, + "step": 1148500 + }, + { + "epoch": 5.69, + "learning_rate": 4.715477623697627e-05, + "loss": 2.4929, + "step": 1149000 + }, + { + "epoch": 5.7, + "learning_rate": 4.715354012772303e-05, + "loss": 2.5136, + "step": 1149500 + }, + { + "epoch": 5.7, + "learning_rate": 4.715230154129695e-05, + "loss": 2.5388, + "step": 1150000 + }, + { + "epoch": 5.7, + "learning_rate": 4.7151062954870866e-05, + "loss": 2.5158, + "step": 1150500 + }, + { + "epoch": 5.7, + "learning_rate": 4.714982436844478e-05, + "loss": 2.4921, + "step": 1151000 + }, + { + "epoch": 5.7, + "learning_rate": 4.71485857820187e-05, + "loss": 2.5272, + "step": 1151500 + }, + { + "epoch": 5.71, + "learning_rate": 4.714734719559262e-05, + "loss": 2.5284, + "step": 1152000 + }, + { + "epoch": 5.71, + "learning_rate": 4.7146108609166534e-05, + "loss": 2.5297, + "step": 1152500 + }, + { + "epoch": 5.71, + "learning_rate": 4.714487002274045e-05, + "loss": 2.5186, + "step": 1153000 + }, + { + "epoch": 5.71, + "learning_rate": 4.714363143631437e-05, + "loss": 2.4969, + "step": 1153500 + }, + { + "epoch": 5.72, + "learning_rate": 4.7142392849888284e-05, + "loss": 2.5141, + "step": 1154000 + }, + { + "epoch": 5.72, + "learning_rate": 4.71411542634622e-05, + "loss": 2.5465, + "step": 1154500 + }, + { + "epoch": 5.72, + "learning_rate": 4.713991567703612e-05, + "loss": 2.5263, + "step": 1155000 + }, + { + "epoch": 5.72, + "learning_rate": 4.713867709061003e-05, + "loss": 2.5282, + "step": 1155500 + }, + { + "epoch": 5.73, + "learning_rate": 4.713744345852965e-05, + "loss": 2.5718, + "step": 1156000 + }, + { + "epoch": 5.73, + "learning_rate": 4.7136204872103566e-05, + "loss": 2.5614, + "step": 1156500 + }, + { + "epoch": 5.73, + "learning_rate": 4.7134968762850335e-05, + "loss": 2.5299, + "step": 1157000 + }, + { + "epoch": 5.73, + "learning_rate": 4.713373017642425e-05, + "loss": 2.5281, + "step": 1157500 + }, + { + "epoch": 5.74, + "learning_rate": 4.713249158999817e-05, + "loss": 2.503, + "step": 1158000 + }, + { + "epoch": 5.74, + "learning_rate": 4.7131253003572086e-05, + "loss": 2.5163, + "step": 1158500 + }, + { + "epoch": 5.74, + "learning_rate": 4.7130014417146e-05, + "loss": 2.5324, + "step": 1159000 + }, + { + "epoch": 5.74, + "learning_rate": 4.712877583071992e-05, + "loss": 2.522, + "step": 1159500 + }, + { + "epoch": 5.75, + "learning_rate": 4.7127537244293836e-05, + "loss": 2.55, + "step": 1160000 + }, + { + "epoch": 5.75, + "learning_rate": 4.712629865786775e-05, + "loss": 2.5301, + "step": 1160500 + }, + { + "epoch": 5.75, + "learning_rate": 4.712506007144167e-05, + "loss": 2.5052, + "step": 1161000 + }, + { + "epoch": 5.75, + "learning_rate": 4.712382148501559e-05, + "loss": 2.4877, + "step": 1161500 + }, + { + "epoch": 5.76, + "learning_rate": 4.7122582898589504e-05, + "loss": 2.5126, + "step": 1162000 + }, + { + "epoch": 5.76, + "learning_rate": 4.712134431216342e-05, + "loss": 2.5242, + "step": 1162500 + }, + { + "epoch": 5.76, + "learning_rate": 4.712010572573734e-05, + "loss": 2.5206, + "step": 1163000 + }, + { + "epoch": 5.76, + "learning_rate": 4.711886713931125e-05, + "loss": 2.5255, + "step": 1163500 + }, + { + "epoch": 5.77, + "learning_rate": 4.711763103005802e-05, + "loss": 2.5042, + "step": 1164000 + }, + { + "epoch": 5.77, + "learning_rate": 4.7116392443631934e-05, + "loss": 2.5521, + "step": 1164500 + }, + { + "epoch": 5.77, + "learning_rate": 4.711515385720585e-05, + "loss": 2.5334, + "step": 1165000 + }, + { + "epoch": 5.77, + "learning_rate": 4.711391527077977e-05, + "loss": 2.536, + "step": 1165500 + }, + { + "epoch": 5.78, + "learning_rate": 4.7112676684353685e-05, + "loss": 2.5293, + "step": 1166000 + }, + { + "epoch": 5.78, + "learning_rate": 4.71114380979276e-05, + "loss": 2.5166, + "step": 1166500 + }, + { + "epoch": 5.78, + "learning_rate": 4.711019951150152e-05, + "loss": 2.4953, + "step": 1167000 + }, + { + "epoch": 5.78, + "learning_rate": 4.7108960925075435e-05, + "loss": 2.4882, + "step": 1167500 + }, + { + "epoch": 5.79, + "learning_rate": 4.7107724815822204e-05, + "loss": 2.5227, + "step": 1168000 + }, + { + "epoch": 5.79, + "learning_rate": 4.710648622939612e-05, + "loss": 2.514, + "step": 1168500 + }, + { + "epoch": 5.79, + "learning_rate": 4.710524764297004e-05, + "loss": 2.5371, + "step": 1169000 + }, + { + "epoch": 5.79, + "learning_rate": 4.7104009056543955e-05, + "loss": 2.5194, + "step": 1169500 + }, + { + "epoch": 5.8, + "learning_rate": 4.7102770470117865e-05, + "loss": 2.5025, + "step": 1170000 + }, + { + "epoch": 5.8, + "learning_rate": 4.710153188369178e-05, + "loss": 2.5248, + "step": 1170500 + }, + { + "epoch": 5.8, + "learning_rate": 4.71002932972657e-05, + "loss": 2.4913, + "step": 1171000 + }, + { + "epoch": 5.8, + "learning_rate": 4.7099054710839616e-05, + "loss": 2.5103, + "step": 1171500 + }, + { + "epoch": 5.81, + "learning_rate": 4.709781612441353e-05, + "loss": 2.4856, + "step": 1172000 + }, + { + "epoch": 5.81, + "learning_rate": 4.709657753798744e-05, + "loss": 2.536, + "step": 1172500 + }, + { + "epoch": 5.81, + "learning_rate": 4.709534142873422e-05, + "loss": 2.4982, + "step": 1173000 + }, + { + "epoch": 5.81, + "learning_rate": 4.7094102842308136e-05, + "loss": 2.5312, + "step": 1173500 + }, + { + "epoch": 5.82, + "learning_rate": 4.7092866733054904e-05, + "loss": 2.4911, + "step": 1174000 + }, + { + "epoch": 5.82, + "learning_rate": 4.709162814662882e-05, + "loss": 2.5099, + "step": 1174500 + }, + { + "epoch": 5.82, + "learning_rate": 4.709038956020274e-05, + "loss": 2.5238, + "step": 1175000 + }, + { + "epoch": 5.82, + "learning_rate": 4.7089150973776655e-05, + "loss": 2.5279, + "step": 1175500 + }, + { + "epoch": 5.83, + "learning_rate": 4.708791486452342e-05, + "loss": 2.5004, + "step": 1176000 + }, + { + "epoch": 5.83, + "learning_rate": 4.7086676278097334e-05, + "loss": 2.5314, + "step": 1176500 + }, + { + "epoch": 5.83, + "learning_rate": 4.708543769167125e-05, + "loss": 2.5071, + "step": 1177000 + }, + { + "epoch": 5.83, + "learning_rate": 4.708419910524517e-05, + "loss": 2.5197, + "step": 1177500 + }, + { + "epoch": 5.84, + "learning_rate": 4.7082960518819085e-05, + "loss": 2.5249, + "step": 1178000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7081721932393e-05, + "loss": 2.5054, + "step": 1178500 + }, + { + "epoch": 5.84, + "learning_rate": 4.708048334596692e-05, + "loss": 2.5106, + "step": 1179000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7079244759540836e-05, + "loss": 2.5198, + "step": 1179500 + }, + { + "epoch": 5.85, + "learning_rate": 4.7078008650287604e-05, + "loss": 2.5138, + "step": 1180000 + }, + { + "epoch": 5.85, + "learning_rate": 4.707677006386152e-05, + "loss": 2.5156, + "step": 1180500 + }, + { + "epoch": 5.85, + "learning_rate": 4.707553147743544e-05, + "loss": 2.5207, + "step": 1181000 + }, + { + "epoch": 5.85, + "learning_rate": 4.7074292891009355e-05, + "loss": 2.4657, + "step": 1181500 + }, + { + "epoch": 5.86, + "learning_rate": 4.707305430458327e-05, + "loss": 2.4906, + "step": 1182000 + }, + { + "epoch": 5.86, + "learning_rate": 4.707181571815719e-05, + "loss": 2.4839, + "step": 1182500 + }, + { + "epoch": 5.86, + "learning_rate": 4.7070577131731106e-05, + "loss": 2.514, + "step": 1183000 + }, + { + "epoch": 5.86, + "learning_rate": 4.7069338545305016e-05, + "loss": 2.4829, + "step": 1183500 + }, + { + "epoch": 5.87, + "learning_rate": 4.706809995887893e-05, + "loss": 2.5311, + "step": 1184000 + }, + { + "epoch": 5.87, + "learning_rate": 4.706686137245285e-05, + "loss": 2.5153, + "step": 1184500 + }, + { + "epoch": 5.87, + "learning_rate": 4.706562774037247e-05, + "loss": 2.5128, + "step": 1185000 + }, + { + "epoch": 5.87, + "learning_rate": 4.706438915394639e-05, + "loss": 2.5017, + "step": 1185500 + }, + { + "epoch": 5.88, + "learning_rate": 4.7063153044693156e-05, + "loss": 2.5261, + "step": 1186000 + }, + { + "epoch": 5.88, + "learning_rate": 4.7061914458267073e-05, + "loss": 2.5201, + "step": 1186500 + }, + { + "epoch": 5.88, + "learning_rate": 4.7060675871840984e-05, + "loss": 2.5188, + "step": 1187000 + }, + { + "epoch": 5.88, + "learning_rate": 4.70594372854149e-05, + "loss": 2.534, + "step": 1187500 + }, + { + "epoch": 5.89, + "learning_rate": 4.705819869898882e-05, + "loss": 2.5313, + "step": 1188000 + }, + { + "epoch": 5.89, + "learning_rate": 4.7056960112562734e-05, + "loss": 2.5164, + "step": 1188500 + }, + { + "epoch": 5.89, + "learning_rate": 4.705572152613665e-05, + "loss": 2.5065, + "step": 1189000 + }, + { + "epoch": 5.89, + "learning_rate": 4.705448293971057e-05, + "loss": 2.5141, + "step": 1189500 + }, + { + "epoch": 5.9, + "learning_rate": 4.7053244353284485e-05, + "loss": 2.4913, + "step": 1190000 + }, + { + "epoch": 5.9, + "learning_rate": 4.70520057668584e-05, + "loss": 2.5257, + "step": 1190500 + }, + { + "epoch": 5.9, + "learning_rate": 4.705076965760517e-05, + "loss": 2.5149, + "step": 1191000 + }, + { + "epoch": 5.9, + "learning_rate": 4.704953107117909e-05, + "loss": 2.524, + "step": 1191500 + }, + { + "epoch": 5.91, + "learning_rate": 4.7048292484753005e-05, + "loss": 2.5101, + "step": 1192000 + }, + { + "epoch": 5.91, + "learning_rate": 4.704705389832692e-05, + "loss": 2.5176, + "step": 1192500 + }, + { + "epoch": 5.91, + "learning_rate": 4.704581531190084e-05, + "loss": 2.5155, + "step": 1193000 + }, + { + "epoch": 5.91, + "learning_rate": 4.7044576725474756e-05, + "loss": 2.4957, + "step": 1193500 + }, + { + "epoch": 5.92, + "learning_rate": 4.704333813904867e-05, + "loss": 2.5396, + "step": 1194000 + }, + { + "epoch": 5.92, + "learning_rate": 4.7042102029795434e-05, + "loss": 2.5241, + "step": 1194500 + }, + { + "epoch": 5.92, + "learning_rate": 4.704086344336935e-05, + "loss": 2.5134, + "step": 1195000 + }, + { + "epoch": 5.92, + "learning_rate": 4.703962485694327e-05, + "loss": 2.5014, + "step": 1195500 + }, + { + "epoch": 5.93, + "learning_rate": 4.7038386270517185e-05, + "loss": 2.5075, + "step": 1196000 + }, + { + "epoch": 5.93, + "learning_rate": 4.70371476840911e-05, + "loss": 2.5256, + "step": 1196500 + }, + { + "epoch": 5.93, + "learning_rate": 4.703590909766502e-05, + "loss": 2.4918, + "step": 1197000 + }, + { + "epoch": 5.93, + "learning_rate": 4.7034670511238936e-05, + "loss": 2.4843, + "step": 1197500 + }, + { + "epoch": 5.94, + "learning_rate": 4.703343192481285e-05, + "loss": 2.5463, + "step": 1198000 + }, + { + "epoch": 5.94, + "learning_rate": 4.703219333838677e-05, + "loss": 2.4987, + "step": 1198500 + }, + { + "epoch": 5.94, + "learning_rate": 4.703095475196068e-05, + "loss": 2.5182, + "step": 1199000 + }, + { + "epoch": 5.94, + "learning_rate": 4.702972111988031e-05, + "loss": 2.5022, + "step": 1199500 + }, + { + "epoch": 5.95, + "learning_rate": 4.7028482533454224e-05, + "loss": 2.5201, + "step": 1200000 + }, + { + "epoch": 5.95, + "learning_rate": 4.7027243947028135e-05, + "loss": 2.5196, + "step": 1200500 + }, + { + "epoch": 5.95, + "learning_rate": 4.702600536060205e-05, + "loss": 2.5223, + "step": 1201000 + }, + { + "epoch": 5.95, + "learning_rate": 4.702476677417597e-05, + "loss": 2.5153, + "step": 1201500 + }, + { + "epoch": 5.96, + "learning_rate": 4.7023528187749885e-05, + "loss": 2.5415, + "step": 1202000 + }, + { + "epoch": 5.96, + "learning_rate": 4.70222896013238e-05, + "loss": 2.529, + "step": 1202500 + }, + { + "epoch": 5.96, + "learning_rate": 4.702105349207057e-05, + "loss": 2.5326, + "step": 1203000 + }, + { + "epoch": 5.96, + "learning_rate": 4.701981490564449e-05, + "loss": 2.5434, + "step": 1203500 + }, + { + "epoch": 5.97, + "learning_rate": 4.7018576319218405e-05, + "loss": 2.5482, + "step": 1204000 + }, + { + "epoch": 5.97, + "learning_rate": 4.701733773279232e-05, + "loss": 2.5158, + "step": 1204500 + }, + { + "epoch": 5.97, + "learning_rate": 4.701609914636624e-05, + "loss": 2.5229, + "step": 1205000 + }, + { + "epoch": 5.97, + "learning_rate": 4.7014860559940156e-05, + "loss": 2.5232, + "step": 1205500 + }, + { + "epoch": 5.97, + "learning_rate": 4.7013624450686925e-05, + "loss": 2.5345, + "step": 1206000 + }, + { + "epoch": 5.98, + "learning_rate": 4.701238586426084e-05, + "loss": 2.4912, + "step": 1206500 + }, + { + "epoch": 5.98, + "learning_rate": 4.701114727783475e-05, + "loss": 2.5206, + "step": 1207000 + }, + { + "epoch": 5.98, + "learning_rate": 4.700990869140867e-05, + "loss": 2.4784, + "step": 1207500 + }, + { + "epoch": 5.98, + "learning_rate": 4.7008670104982586e-05, + "loss": 2.4862, + "step": 1208000 + }, + { + "epoch": 5.99, + "learning_rate": 4.70074315185565e-05, + "loss": 2.5107, + "step": 1208500 + }, + { + "epoch": 5.99, + "learning_rate": 4.700619293213042e-05, + "loss": 2.5165, + "step": 1209000 + }, + { + "epoch": 5.99, + "learning_rate": 4.700495682287719e-05, + "loss": 2.5188, + "step": 1209500 + }, + { + "epoch": 5.99, + "learning_rate": 4.7003718236451105e-05, + "loss": 2.4935, + "step": 1210000 + }, + { + "epoch": 6.0, + "learning_rate": 4.700247965002502e-05, + "loss": 2.5275, + "step": 1210500 + }, + { + "epoch": 6.0, + "learning_rate": 4.700124106359894e-05, + "loss": 2.5212, + "step": 1211000 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6358168518016798, + "eval_accuracy_mlm": 0.5887641252682907, + "eval_accuracy_nsp": 0.8578281213842226, + "eval_loss": 2.467965841293335, + "eval_runtime": 145.9334, + "eval_samples_per_second": 1747.092, + "eval_steps_per_second": 72.8, + "step": 1211058 + }, + { + "epoch": 6.0, + "learning_rate": 4.7000002477172856e-05, + "loss": 2.4902, + "step": 1211500 + }, + { + "epoch": 6.0, + "learning_rate": 4.699876389074677e-05, + "loss": 2.493, + "step": 1212000 + }, + { + "epoch": 6.01, + "learning_rate": 4.699752530432069e-05, + "loss": 2.5038, + "step": 1212500 + }, + { + "epoch": 6.01, + "learning_rate": 4.699628671789461e-05, + "loss": 2.4571, + "step": 1213000 + }, + { + "epoch": 6.01, + "learning_rate": 4.6995048131468524e-05, + "loss": 2.4688, + "step": 1213500 + }, + { + "epoch": 6.01, + "learning_rate": 4.699380954504244e-05, + "loss": 2.5051, + "step": 1214000 + }, + { + "epoch": 6.02, + "learning_rate": 4.699257095861635e-05, + "loss": 2.4776, + "step": 1214500 + }, + { + "epoch": 6.02, + "learning_rate": 4.699133237219027e-05, + "loss": 2.4888, + "step": 1215000 + }, + { + "epoch": 6.02, + "learning_rate": 4.6990096262937036e-05, + "loss": 2.4839, + "step": 1215500 + }, + { + "epoch": 6.02, + "learning_rate": 4.698885767651095e-05, + "loss": 2.4875, + "step": 1216000 + }, + { + "epoch": 6.03, + "learning_rate": 4.698762156725772e-05, + "loss": 2.5075, + "step": 1216500 + }, + { + "epoch": 6.03, + "learning_rate": 4.698638545800449e-05, + "loss": 2.4875, + "step": 1217000 + }, + { + "epoch": 6.03, + "learning_rate": 4.698514687157841e-05, + "loss": 2.4917, + "step": 1217500 + }, + { + "epoch": 6.03, + "learning_rate": 4.6983908285152325e-05, + "loss": 2.4866, + "step": 1218000 + }, + { + "epoch": 6.04, + "learning_rate": 4.698266969872624e-05, + "loss": 2.5019, + "step": 1218500 + }, + { + "epoch": 6.04, + "learning_rate": 4.698143111230016e-05, + "loss": 2.4532, + "step": 1219000 + }, + { + "epoch": 6.04, + "learning_rate": 4.6980192525874076e-05, + "loss": 2.4699, + "step": 1219500 + }, + { + "epoch": 6.04, + "learning_rate": 4.697895393944799e-05, + "loss": 2.5117, + "step": 1220000 + }, + { + "epoch": 6.05, + "learning_rate": 4.69777153530219e-05, + "loss": 2.4897, + "step": 1220500 + }, + { + "epoch": 6.05, + "learning_rate": 4.697647676659582e-05, + "loss": 2.4558, + "step": 1221000 + }, + { + "epoch": 6.05, + "learning_rate": 4.6975238180169737e-05, + "loss": 2.4775, + "step": 1221500 + }, + { + "epoch": 6.05, + "learning_rate": 4.6973999593743653e-05, + "loss": 2.4828, + "step": 1222000 + }, + { + "epoch": 6.06, + "learning_rate": 4.697276100731757e-05, + "loss": 2.487, + "step": 1222500 + }, + { + "epoch": 6.06, + "learning_rate": 4.697152242089149e-05, + "loss": 2.4936, + "step": 1223000 + }, + { + "epoch": 6.06, + "learning_rate": 4.6970283834465404e-05, + "loss": 2.489, + "step": 1223500 + }, + { + "epoch": 6.06, + "learning_rate": 4.6969045248039314e-05, + "loss": 2.4504, + "step": 1224000 + }, + { + "epoch": 6.07, + "learning_rate": 4.696780666161323e-05, + "loss": 2.4844, + "step": 1224500 + }, + { + "epoch": 6.07, + "learning_rate": 4.696656807518715e-05, + "loss": 2.4735, + "step": 1225000 + }, + { + "epoch": 6.07, + "learning_rate": 4.6965329488761065e-05, + "loss": 2.4685, + "step": 1225500 + }, + { + "epoch": 6.07, + "learning_rate": 4.696409090233498e-05, + "loss": 2.4795, + "step": 1226000 + }, + { + "epoch": 6.08, + "learning_rate": 4.69628523159089e-05, + "loss": 2.4918, + "step": 1226500 + }, + { + "epoch": 6.08, + "learning_rate": 4.696161620665567e-05, + "loss": 2.4753, + "step": 1227000 + }, + { + "epoch": 6.08, + "learning_rate": 4.696038009740244e-05, + "loss": 2.4695, + "step": 1227500 + }, + { + "epoch": 6.08, + "learning_rate": 4.6959141510976354e-05, + "loss": 2.4831, + "step": 1228000 + }, + { + "epoch": 6.09, + "learning_rate": 4.695790292455027e-05, + "loss": 2.4936, + "step": 1228500 + }, + { + "epoch": 6.09, + "learning_rate": 4.695666433812419e-05, + "loss": 2.4876, + "step": 1229000 + }, + { + "epoch": 6.09, + "learning_rate": 4.6955425751698104e-05, + "loss": 2.4676, + "step": 1229500 + }, + { + "epoch": 6.09, + "learning_rate": 4.695418716527202e-05, + "loss": 2.4873, + "step": 1230000 + }, + { + "epoch": 6.1, + "learning_rate": 4.695295105601879e-05, + "loss": 2.4812, + "step": 1230500 + }, + { + "epoch": 6.1, + "learning_rate": 4.695171246959271e-05, + "loss": 2.4859, + "step": 1231000 + }, + { + "epoch": 6.1, + "learning_rate": 4.6950473883166624e-05, + "loss": 2.4744, + "step": 1231500 + }, + { + "epoch": 6.1, + "learning_rate": 4.694923529674054e-05, + "loss": 2.4792, + "step": 1232000 + }, + { + "epoch": 6.11, + "learning_rate": 4.694799918748731e-05, + "loss": 2.4847, + "step": 1232500 + }, + { + "epoch": 6.11, + "learning_rate": 4.694676060106123e-05, + "loss": 2.5086, + "step": 1233000 + }, + { + "epoch": 6.11, + "learning_rate": 4.6945522014635144e-05, + "loss": 2.4921, + "step": 1233500 + }, + { + "epoch": 6.11, + "learning_rate": 4.6944283428209054e-05, + "loss": 2.471, + "step": 1234000 + }, + { + "epoch": 6.12, + "learning_rate": 4.694304484178297e-05, + "loss": 2.4858, + "step": 1234500 + }, + { + "epoch": 6.12, + "learning_rate": 4.694180625535689e-05, + "loss": 2.4519, + "step": 1235000 + }, + { + "epoch": 6.12, + "learning_rate": 4.6940567668930805e-05, + "loss": 2.4916, + "step": 1235500 + }, + { + "epoch": 6.12, + "learning_rate": 4.693932908250472e-05, + "loss": 2.4775, + "step": 1236000 + }, + { + "epoch": 6.13, + "learning_rate": 4.693809049607863e-05, + "loss": 2.483, + "step": 1236500 + }, + { + "epoch": 6.13, + "learning_rate": 4.693685190965255e-05, + "loss": 2.4979, + "step": 1237000 + }, + { + "epoch": 6.13, + "learning_rate": 4.6935613323226465e-05, + "loss": 2.4822, + "step": 1237500 + }, + { + "epoch": 6.13, + "learning_rate": 4.693437473680038e-05, + "loss": 2.4846, + "step": 1238000 + }, + { + "epoch": 6.14, + "learning_rate": 4.69331361503743e-05, + "loss": 2.4769, + "step": 1238500 + }, + { + "epoch": 6.14, + "learning_rate": 4.6931900041121075e-05, + "loss": 2.5055, + "step": 1239000 + }, + { + "epoch": 6.14, + "learning_rate": 4.6930661454694985e-05, + "loss": 2.4656, + "step": 1239500 + }, + { + "epoch": 6.14, + "learning_rate": 4.69294228682689e-05, + "loss": 2.5074, + "step": 1240000 + }, + { + "epoch": 6.15, + "learning_rate": 4.692818428184282e-05, + "loss": 2.4729, + "step": 1240500 + }, + { + "epoch": 6.15, + "learning_rate": 4.692694817258959e-05, + "loss": 2.4935, + "step": 1241000 + }, + { + "epoch": 6.15, + "learning_rate": 4.6925709586163505e-05, + "loss": 2.4919, + "step": 1241500 + }, + { + "epoch": 6.15, + "learning_rate": 4.692447099973742e-05, + "loss": 2.4865, + "step": 1242000 + }, + { + "epoch": 6.16, + "learning_rate": 4.692323241331134e-05, + "loss": 2.4812, + "step": 1242500 + }, + { + "epoch": 6.16, + "learning_rate": 4.6921993826885255e-05, + "loss": 2.483, + "step": 1243000 + }, + { + "epoch": 6.16, + "learning_rate": 4.6920757717632024e-05, + "loss": 2.4902, + "step": 1243500 + }, + { + "epoch": 6.16, + "learning_rate": 4.691951913120594e-05, + "loss": 2.47, + "step": 1244000 + }, + { + "epoch": 6.17, + "learning_rate": 4.691828054477986e-05, + "loss": 2.4547, + "step": 1244500 + }, + { + "epoch": 6.17, + "learning_rate": 4.6917041958353775e-05, + "loss": 2.4744, + "step": 1245000 + }, + { + "epoch": 6.17, + "learning_rate": 4.691580337192769e-05, + "loss": 2.4984, + "step": 1245500 + }, + { + "epoch": 6.17, + "learning_rate": 4.69145647855016e-05, + "loss": 2.4811, + "step": 1246000 + }, + { + "epoch": 6.18, + "learning_rate": 4.691332619907552e-05, + "loss": 2.4872, + "step": 1246500 + }, + { + "epoch": 6.18, + "learning_rate": 4.6912090089822295e-05, + "loss": 2.481, + "step": 1247000 + }, + { + "epoch": 6.18, + "learning_rate": 4.691085150339621e-05, + "loss": 2.4718, + "step": 1247500 + }, + { + "epoch": 6.18, + "learning_rate": 4.690961291697012e-05, + "loss": 2.4848, + "step": 1248000 + }, + { + "epoch": 6.19, + "learning_rate": 4.690837433054404e-05, + "loss": 2.483, + "step": 1248500 + }, + { + "epoch": 6.19, + "learning_rate": 4.6907135744117956e-05, + "loss": 2.4704, + "step": 1249000 + }, + { + "epoch": 6.19, + "learning_rate": 4.690589715769187e-05, + "loss": 2.4682, + "step": 1249500 + }, + { + "epoch": 6.19, + "learning_rate": 4.690466104843864e-05, + "loss": 2.4647, + "step": 1250000 + }, + { + "epoch": 6.2, + "learning_rate": 4.690342493918541e-05, + "loss": 2.4835, + "step": 1250500 + }, + { + "epoch": 6.2, + "learning_rate": 4.690218635275933e-05, + "loss": 2.4615, + "step": 1251000 + }, + { + "epoch": 6.2, + "learning_rate": 4.6900947766333244e-05, + "loss": 2.4774, + "step": 1251500 + }, + { + "epoch": 6.2, + "learning_rate": 4.689970917990716e-05, + "loss": 2.4772, + "step": 1252000 + }, + { + "epoch": 6.21, + "learning_rate": 4.689847059348108e-05, + "loss": 2.4977, + "step": 1252500 + }, + { + "epoch": 6.21, + "learning_rate": 4.6897232007054995e-05, + "loss": 2.5088, + "step": 1253000 + }, + { + "epoch": 6.21, + "learning_rate": 4.689599342062891e-05, + "loss": 2.4723, + "step": 1253500 + }, + { + "epoch": 6.21, + "learning_rate": 4.689475483420283e-05, + "loss": 2.4965, + "step": 1254000 + }, + { + "epoch": 6.22, + "learning_rate": 4.689351624777674e-05, + "loss": 2.4822, + "step": 1254500 + }, + { + "epoch": 6.22, + "learning_rate": 4.689228013852351e-05, + "loss": 2.5036, + "step": 1255000 + }, + { + "epoch": 6.22, + "learning_rate": 4.6891041552097424e-05, + "loss": 2.4874, + "step": 1255500 + }, + { + "epoch": 6.22, + "learning_rate": 4.688980544284419e-05, + "loss": 2.4798, + "step": 1256000 + }, + { + "epoch": 6.23, + "learning_rate": 4.688856685641811e-05, + "loss": 2.4956, + "step": 1256500 + }, + { + "epoch": 6.23, + "learning_rate": 4.688733074716488e-05, + "loss": 2.4933, + "step": 1257000 + }, + { + "epoch": 6.23, + "learning_rate": 4.6886092160738796e-05, + "loss": 2.5024, + "step": 1257500 + }, + { + "epoch": 6.23, + "learning_rate": 4.6884853574312706e-05, + "loss": 2.4811, + "step": 1258000 + }, + { + "epoch": 6.24, + "learning_rate": 4.688361498788662e-05, + "loss": 2.4707, + "step": 1258500 + }, + { + "epoch": 6.24, + "learning_rate": 4.688237887863339e-05, + "loss": 2.4857, + "step": 1259000 + }, + { + "epoch": 6.24, + "learning_rate": 4.688114029220731e-05, + "loss": 2.5269, + "step": 1259500 + }, + { + "epoch": 6.24, + "learning_rate": 4.6879901705781226e-05, + "loss": 2.4841, + "step": 1260000 + }, + { + "epoch": 6.24, + "learning_rate": 4.687866311935514e-05, + "loss": 2.5034, + "step": 1260500 + }, + { + "epoch": 6.25, + "learning_rate": 4.687742453292906e-05, + "loss": 2.4715, + "step": 1261000 + }, + { + "epoch": 6.25, + "learning_rate": 4.6876185946502976e-05, + "loss": 2.4924, + "step": 1261500 + }, + { + "epoch": 6.25, + "learning_rate": 4.6874947360076893e-05, + "loss": 2.5025, + "step": 1262000 + }, + { + "epoch": 6.25, + "learning_rate": 4.687370877365081e-05, + "loss": 2.4942, + "step": 1262500 + }, + { + "epoch": 6.26, + "learning_rate": 4.687247018722473e-05, + "loss": 2.4852, + "step": 1263000 + }, + { + "epoch": 6.26, + "learning_rate": 4.6871231600798644e-05, + "loss": 2.4815, + "step": 1263500 + }, + { + "epoch": 6.26, + "learning_rate": 4.686999301437256e-05, + "loss": 2.4833, + "step": 1264000 + }, + { + "epoch": 6.26, + "learning_rate": 4.686875690511932e-05, + "loss": 2.5097, + "step": 1264500 + }, + { + "epoch": 6.27, + "learning_rate": 4.686751831869324e-05, + "loss": 2.5067, + "step": 1265000 + }, + { + "epoch": 6.27, + "learning_rate": 4.686627973226716e-05, + "loss": 2.5029, + "step": 1265500 + }, + { + "epoch": 6.27, + "learning_rate": 4.6865041145841074e-05, + "loss": 2.4842, + "step": 1266000 + }, + { + "epoch": 6.27, + "learning_rate": 4.686380255941499e-05, + "loss": 2.4757, + "step": 1266500 + }, + { + "epoch": 6.28, + "learning_rate": 4.686256397298891e-05, + "loss": 2.4729, + "step": 1267000 + }, + { + "epoch": 6.28, + "learning_rate": 4.6861325386562825e-05, + "loss": 2.4995, + "step": 1267500 + }, + { + "epoch": 6.28, + "learning_rate": 4.686008680013674e-05, + "loss": 2.5044, + "step": 1268000 + }, + { + "epoch": 6.28, + "learning_rate": 4.685884821371066e-05, + "loss": 2.483, + "step": 1268500 + }, + { + "epoch": 6.29, + "learning_rate": 4.685761458163028e-05, + "loss": 2.5019, + "step": 1269000 + }, + { + "epoch": 6.29, + "learning_rate": 4.6856375995204196e-05, + "loss": 2.5024, + "step": 1269500 + }, + { + "epoch": 6.29, + "learning_rate": 4.685513740877811e-05, + "loss": 2.4707, + "step": 1270000 + }, + { + "epoch": 6.29, + "learning_rate": 4.685389882235203e-05, + "loss": 2.4647, + "step": 1270500 + }, + { + "epoch": 6.3, + "learning_rate": 4.68526627130988e-05, + "loss": 2.4922, + "step": 1271000 + }, + { + "epoch": 6.3, + "learning_rate": 4.685142412667271e-05, + "loss": 2.5003, + "step": 1271500 + }, + { + "epoch": 6.3, + "learning_rate": 4.6850185540246626e-05, + "loss": 2.4678, + "step": 1272000 + }, + { + "epoch": 6.3, + "learning_rate": 4.684894695382054e-05, + "loss": 2.4832, + "step": 1272500 + }, + { + "epoch": 6.31, + "learning_rate": 4.684771084456732e-05, + "loss": 2.4824, + "step": 1273000 + }, + { + "epoch": 6.31, + "learning_rate": 4.6846472258141235e-05, + "loss": 2.4748, + "step": 1273500 + }, + { + "epoch": 6.31, + "learning_rate": 4.684523367171515e-05, + "loss": 2.5172, + "step": 1274000 + }, + { + "epoch": 6.31, + "learning_rate": 4.684399508528906e-05, + "loss": 2.4525, + "step": 1274500 + }, + { + "epoch": 6.32, + "learning_rate": 4.684275649886298e-05, + "loss": 2.4783, + "step": 1275000 + }, + { + "epoch": 6.32, + "learning_rate": 4.6841517912436896e-05, + "loss": 2.5077, + "step": 1275500 + }, + { + "epoch": 6.32, + "learning_rate": 4.6840281803183665e-05, + "loss": 2.4907, + "step": 1276000 + }, + { + "epoch": 6.32, + "learning_rate": 4.683904321675758e-05, + "loss": 2.464, + "step": 1276500 + }, + { + "epoch": 6.33, + "learning_rate": 4.68378046303315e-05, + "loss": 2.4991, + "step": 1277000 + }, + { + "epoch": 6.33, + "learning_rate": 4.6836566043905416e-05, + "loss": 2.5031, + "step": 1277500 + }, + { + "epoch": 6.33, + "learning_rate": 4.6835327457479326e-05, + "loss": 2.4927, + "step": 1278000 + }, + { + "epoch": 6.33, + "learning_rate": 4.683408887105324e-05, + "loss": 2.4785, + "step": 1278500 + }, + { + "epoch": 6.34, + "learning_rate": 4.683285028462716e-05, + "loss": 2.465, + "step": 1279000 + }, + { + "epoch": 6.34, + "learning_rate": 4.683161169820108e-05, + "loss": 2.4818, + "step": 1279500 + }, + { + "epoch": 6.34, + "learning_rate": 4.6830373111774994e-05, + "loss": 2.5001, + "step": 1280000 + }, + { + "epoch": 6.34, + "learning_rate": 4.682913452534891e-05, + "loss": 2.4687, + "step": 1280500 + }, + { + "epoch": 6.35, + "learning_rate": 4.682789841609568e-05, + "loss": 2.4916, + "step": 1281000 + }, + { + "epoch": 6.35, + "learning_rate": 4.6826659829669596e-05, + "loss": 2.4898, + "step": 1281500 + }, + { + "epoch": 6.35, + "learning_rate": 4.682542124324351e-05, + "loss": 2.4705, + "step": 1282000 + }, + { + "epoch": 6.35, + "learning_rate": 4.682418265681743e-05, + "loss": 2.4925, + "step": 1282500 + }, + { + "epoch": 6.36, + "learning_rate": 4.682294407039135e-05, + "loss": 2.4848, + "step": 1283000 + }, + { + "epoch": 6.36, + "learning_rate": 4.6821705483965264e-05, + "loss": 2.4765, + "step": 1283500 + }, + { + "epoch": 6.36, + "learning_rate": 4.6820469374712026e-05, + "loss": 2.4905, + "step": 1284000 + }, + { + "epoch": 6.36, + "learning_rate": 4.681923078828594e-05, + "loss": 2.4852, + "step": 1284500 + }, + { + "epoch": 6.37, + "learning_rate": 4.681799220185986e-05, + "loss": 2.4962, + "step": 1285000 + }, + { + "epoch": 6.37, + "learning_rate": 4.681675361543378e-05, + "loss": 2.4493, + "step": 1285500 + }, + { + "epoch": 6.37, + "learning_rate": 4.6815515029007694e-05, + "loss": 2.4891, + "step": 1286000 + }, + { + "epoch": 6.37, + "learning_rate": 4.681427644258161e-05, + "loss": 2.4824, + "step": 1286500 + }, + { + "epoch": 6.38, + "learning_rate": 4.681303785615553e-05, + "loss": 2.4763, + "step": 1287000 + }, + { + "epoch": 6.38, + "learning_rate": 4.6811799269729445e-05, + "loss": 2.5012, + "step": 1287500 + }, + { + "epoch": 6.38, + "learning_rate": 4.681056068330336e-05, + "loss": 2.4999, + "step": 1288000 + }, + { + "epoch": 6.38, + "learning_rate": 4.680932209687728e-05, + "loss": 2.4742, + "step": 1288500 + }, + { + "epoch": 6.39, + "learning_rate": 4.680808598762405e-05, + "loss": 2.4774, + "step": 1289000 + }, + { + "epoch": 6.39, + "learning_rate": 4.6806847401197964e-05, + "loss": 2.4746, + "step": 1289500 + }, + { + "epoch": 6.39, + "learning_rate": 4.680560881477188e-05, + "loss": 2.4768, + "step": 1290000 + }, + { + "epoch": 6.39, + "learning_rate": 4.68043702283458e-05, + "loss": 2.4646, + "step": 1290500 + }, + { + "epoch": 6.4, + "learning_rate": 4.6803131641919715e-05, + "loss": 2.4499, + "step": 1291000 + }, + { + "epoch": 6.4, + "learning_rate": 4.680189553266648e-05, + "loss": 2.4654, + "step": 1291500 + }, + { + "epoch": 6.4, + "learning_rate": 4.6800656946240394e-05, + "loss": 2.4719, + "step": 1292000 + }, + { + "epoch": 6.4, + "learning_rate": 4.679941835981431e-05, + "loss": 2.4862, + "step": 1292500 + }, + { + "epoch": 6.41, + "learning_rate": 4.6798182250561087e-05, + "loss": 2.4977, + "step": 1293000 + }, + { + "epoch": 6.41, + "learning_rate": 4.679694614130785e-05, + "loss": 2.4832, + "step": 1293500 + }, + { + "epoch": 6.41, + "learning_rate": 4.6795707554881766e-05, + "loss": 2.5195, + "step": 1294000 + }, + { + "epoch": 6.41, + "learning_rate": 4.679446896845568e-05, + "loss": 2.4994, + "step": 1294500 + }, + { + "epoch": 6.42, + "learning_rate": 4.67932303820296e-05, + "loss": 2.4854, + "step": 1295000 + }, + { + "epoch": 6.42, + "learning_rate": 4.6791991795603516e-05, + "loss": 2.4757, + "step": 1295500 + }, + { + "epoch": 6.42, + "learning_rate": 4.679075320917743e-05, + "loss": 2.5, + "step": 1296000 + }, + { + "epoch": 6.42, + "learning_rate": 4.678951462275134e-05, + "loss": 2.4907, + "step": 1296500 + }, + { + "epoch": 6.43, + "learning_rate": 4.678827603632526e-05, + "loss": 2.5016, + "step": 1297000 + }, + { + "epoch": 6.43, + "learning_rate": 4.678703744989918e-05, + "loss": 2.5061, + "step": 1297500 + }, + { + "epoch": 6.43, + "learning_rate": 4.6785798863473094e-05, + "loss": 2.4797, + "step": 1298000 + }, + { + "epoch": 6.43, + "learning_rate": 4.678456027704701e-05, + "loss": 2.5274, + "step": 1298500 + }, + { + "epoch": 6.44, + "learning_rate": 4.678332169062093e-05, + "loss": 2.489, + "step": 1299000 + }, + { + "epoch": 6.44, + "learning_rate": 4.6782083104194845e-05, + "loss": 2.4759, + "step": 1299500 + }, + { + "epoch": 6.44, + "learning_rate": 4.6780846994941614e-05, + "loss": 2.4618, + "step": 1300000 + }, + { + "epoch": 6.44, + "learning_rate": 4.677961088568838e-05, + "loss": 2.4962, + "step": 1300500 + }, + { + "epoch": 6.45, + "learning_rate": 4.67783722992623e-05, + "loss": 2.485, + "step": 1301000 + }, + { + "epoch": 6.45, + "learning_rate": 4.6777133712836216e-05, + "loss": 2.4917, + "step": 1301500 + }, + { + "epoch": 6.45, + "learning_rate": 4.677589512641013e-05, + "loss": 2.4872, + "step": 1302000 + }, + { + "epoch": 6.45, + "learning_rate": 4.677465653998405e-05, + "loss": 2.4978, + "step": 1302500 + }, + { + "epoch": 6.46, + "learning_rate": 4.677342043073082e-05, + "loss": 2.486, + "step": 1303000 + }, + { + "epoch": 6.46, + "learning_rate": 4.6772181844304736e-05, + "loss": 2.5053, + "step": 1303500 + }, + { + "epoch": 6.46, + "learning_rate": 4.6770945735051505e-05, + "loss": 2.4912, + "step": 1304000 + }, + { + "epoch": 6.46, + "learning_rate": 4.676970714862542e-05, + "loss": 2.4926, + "step": 1304500 + }, + { + "epoch": 6.47, + "learning_rate": 4.676846856219934e-05, + "loss": 2.4839, + "step": 1305000 + }, + { + "epoch": 6.47, + "learning_rate": 4.6767229975773256e-05, + "loss": 2.4895, + "step": 1305500 + }, + { + "epoch": 6.47, + "learning_rate": 4.6765991389347166e-05, + "loss": 2.5066, + "step": 1306000 + }, + { + "epoch": 6.47, + "learning_rate": 4.6764755280093935e-05, + "loss": 2.4824, + "step": 1306500 + }, + { + "epoch": 6.48, + "learning_rate": 4.676351669366785e-05, + "loss": 2.482, + "step": 1307000 + }, + { + "epoch": 6.48, + "learning_rate": 4.676227810724177e-05, + "loss": 2.4795, + "step": 1307500 + }, + { + "epoch": 6.48, + "learning_rate": 4.6761039520815685e-05, + "loss": 2.5152, + "step": 1308000 + }, + { + "epoch": 6.48, + "learning_rate": 4.67598009343896e-05, + "loss": 2.4962, + "step": 1308500 + }, + { + "epoch": 6.49, + "learning_rate": 4.675856234796352e-05, + "loss": 2.499, + "step": 1309000 + }, + { + "epoch": 6.49, + "learning_rate": 4.6757323761537436e-05, + "loss": 2.5016, + "step": 1309500 + }, + { + "epoch": 6.49, + "learning_rate": 4.675608517511135e-05, + "loss": 2.5232, + "step": 1310000 + }, + { + "epoch": 6.49, + "learning_rate": 4.675484658868527e-05, + "loss": 2.4946, + "step": 1310500 + }, + { + "epoch": 6.5, + "learning_rate": 4.675361047943204e-05, + "loss": 2.4769, + "step": 1311000 + }, + { + "epoch": 6.5, + "learning_rate": 4.6752371893005956e-05, + "loss": 2.4588, + "step": 1311500 + }, + { + "epoch": 6.5, + "learning_rate": 4.675113578375272e-05, + "loss": 2.514, + "step": 1312000 + }, + { + "epoch": 6.5, + "learning_rate": 4.6749897197326635e-05, + "loss": 2.5118, + "step": 1312500 + }, + { + "epoch": 6.51, + "learning_rate": 4.674865861090055e-05, + "loss": 2.4955, + "step": 1313000 + }, + { + "epoch": 6.51, + "learning_rate": 4.674742002447447e-05, + "loss": 2.4849, + "step": 1313500 + }, + { + "epoch": 6.51, + "learning_rate": 4.6746181438048385e-05, + "loss": 2.4894, + "step": 1314000 + }, + { + "epoch": 6.51, + "learning_rate": 4.67449428516223e-05, + "loss": 2.4816, + "step": 1314500 + }, + { + "epoch": 6.51, + "learning_rate": 4.674370426519622e-05, + "loss": 2.4829, + "step": 1315000 + }, + { + "epoch": 6.52, + "learning_rate": 4.6742465678770136e-05, + "loss": 2.4969, + "step": 1315500 + }, + { + "epoch": 6.52, + "learning_rate": 4.6741229569516905e-05, + "loss": 2.4945, + "step": 1316000 + }, + { + "epoch": 6.52, + "learning_rate": 4.673999346026367e-05, + "loss": 2.4751, + "step": 1316500 + }, + { + "epoch": 6.52, + "learning_rate": 4.6738754873837584e-05, + "loss": 2.5224, + "step": 1317000 + }, + { + "epoch": 6.53, + "learning_rate": 4.67375162874115e-05, + "loss": 2.5149, + "step": 1317500 + }, + { + "epoch": 6.53, + "learning_rate": 4.673627770098542e-05, + "loss": 2.5144, + "step": 1318000 + }, + { + "epoch": 6.53, + "learning_rate": 4.6735039114559335e-05, + "loss": 2.4687, + "step": 1318500 + }, + { + "epoch": 6.53, + "learning_rate": 4.673380052813325e-05, + "loss": 2.4861, + "step": 1319000 + }, + { + "epoch": 6.54, + "learning_rate": 4.673256441888002e-05, + "loss": 2.5114, + "step": 1319500 + }, + { + "epoch": 6.54, + "learning_rate": 4.673132583245394e-05, + "loss": 2.482, + "step": 1320000 + }, + { + "epoch": 6.54, + "learning_rate": 4.6730087246027854e-05, + "loss": 2.5231, + "step": 1320500 + }, + { + "epoch": 6.54, + "learning_rate": 4.672885113677462e-05, + "loss": 2.511, + "step": 1321000 + }, + { + "epoch": 6.55, + "learning_rate": 4.672761502752139e-05, + "loss": 2.512, + "step": 1321500 + }, + { + "epoch": 6.55, + "learning_rate": 4.672637644109531e-05, + "loss": 2.5054, + "step": 1322000 + }, + { + "epoch": 6.55, + "learning_rate": 4.6725137854669226e-05, + "loss": 2.502, + "step": 1322500 + }, + { + "epoch": 6.55, + "learning_rate": 4.6723901745415995e-05, + "loss": 2.4838, + "step": 1323000 + }, + { + "epoch": 6.56, + "learning_rate": 4.672266315898991e-05, + "loss": 2.5259, + "step": 1323500 + }, + { + "epoch": 6.56, + "learning_rate": 4.672142457256383e-05, + "loss": 2.5036, + "step": 1324000 + }, + { + "epoch": 6.56, + "learning_rate": 4.6720185986137745e-05, + "loss": 2.5052, + "step": 1324500 + }, + { + "epoch": 6.56, + "learning_rate": 4.671894739971166e-05, + "loss": 2.4959, + "step": 1325000 + }, + { + "epoch": 6.57, + "learning_rate": 4.671770881328558e-05, + "loss": 2.5092, + "step": 1325500 + }, + { + "epoch": 6.57, + "learning_rate": 4.6716470226859496e-05, + "loss": 2.5141, + "step": 1326000 + }, + { + "epoch": 6.57, + "learning_rate": 4.671523164043341e-05, + "loss": 2.5202, + "step": 1326500 + }, + { + "epoch": 6.57, + "learning_rate": 4.671399305400732e-05, + "loss": 2.4919, + "step": 1327000 + }, + { + "epoch": 6.58, + "learning_rate": 4.671275446758124e-05, + "loss": 2.4807, + "step": 1327500 + }, + { + "epoch": 6.58, + "learning_rate": 4.671151588115516e-05, + "loss": 2.4948, + "step": 1328000 + }, + { + "epoch": 6.58, + "learning_rate": 4.6710277294729074e-05, + "loss": 2.51, + "step": 1328500 + }, + { + "epoch": 6.58, + "learning_rate": 4.670903870830299e-05, + "loss": 2.4887, + "step": 1329000 + }, + { + "epoch": 6.59, + "learning_rate": 4.67078001218769e-05, + "loss": 2.4943, + "step": 1329500 + }, + { + "epoch": 6.59, + "learning_rate": 4.670656153545082e-05, + "loss": 2.5102, + "step": 1330000 + }, + { + "epoch": 6.59, + "learning_rate": 4.6705322949024735e-05, + "loss": 2.4674, + "step": 1330500 + }, + { + "epoch": 6.59, + "learning_rate": 4.670408436259865e-05, + "loss": 2.4995, + "step": 1331000 + }, + { + "epoch": 6.6, + "learning_rate": 4.670285073051828e-05, + "loss": 2.5186, + "step": 1331500 + }, + { + "epoch": 6.6, + "learning_rate": 4.6701612144092196e-05, + "loss": 2.485, + "step": 1332000 + }, + { + "epoch": 6.6, + "learning_rate": 4.670037355766611e-05, + "loss": 2.487, + "step": 1332500 + }, + { + "epoch": 6.6, + "learning_rate": 4.669913497124003e-05, + "loss": 2.5017, + "step": 1333000 + }, + { + "epoch": 6.61, + "learning_rate": 4.669789886198679e-05, + "loss": 2.4821, + "step": 1333500 + }, + { + "epoch": 6.61, + "learning_rate": 4.669666027556071e-05, + "loss": 2.4815, + "step": 1334000 + }, + { + "epoch": 6.61, + "learning_rate": 4.6695421689134626e-05, + "loss": 2.4971, + "step": 1334500 + }, + { + "epoch": 6.61, + "learning_rate": 4.669418310270854e-05, + "loss": 2.5461, + "step": 1335000 + }, + { + "epoch": 6.62, + "learning_rate": 4.669294451628246e-05, + "loss": 2.514, + "step": 1335500 + }, + { + "epoch": 6.62, + "learning_rate": 4.669170592985638e-05, + "loss": 2.5117, + "step": 1336000 + }, + { + "epoch": 6.62, + "learning_rate": 4.6690467343430294e-05, + "loss": 2.5189, + "step": 1336500 + }, + { + "epoch": 6.62, + "learning_rate": 4.668922875700421e-05, + "loss": 2.4931, + "step": 1337000 + }, + { + "epoch": 6.63, + "learning_rate": 4.668799017057813e-05, + "loss": 2.5089, + "step": 1337500 + }, + { + "epoch": 6.63, + "learning_rate": 4.668675158415204e-05, + "loss": 2.5183, + "step": 1338000 + }, + { + "epoch": 6.63, + "learning_rate": 4.6685512997725955e-05, + "loss": 2.5247, + "step": 1338500 + }, + { + "epoch": 6.63, + "learning_rate": 4.668427441129987e-05, + "loss": 2.4885, + "step": 1339000 + }, + { + "epoch": 6.64, + "learning_rate": 4.668303830204665e-05, + "loss": 2.5007, + "step": 1339500 + }, + { + "epoch": 6.64, + "learning_rate": 4.6681799715620564e-05, + "loss": 2.483, + "step": 1340000 + }, + { + "epoch": 6.64, + "learning_rate": 4.668056112919448e-05, + "loss": 2.5025, + "step": 1340500 + }, + { + "epoch": 6.64, + "learning_rate": 4.667932254276839e-05, + "loss": 2.4848, + "step": 1341000 + }, + { + "epoch": 6.65, + "learning_rate": 4.667808395634231e-05, + "loss": 2.5055, + "step": 1341500 + }, + { + "epoch": 6.65, + "learning_rate": 4.6676845369916225e-05, + "loss": 2.4936, + "step": 1342000 + }, + { + "epoch": 6.65, + "learning_rate": 4.6675609260662994e-05, + "loss": 2.5113, + "step": 1342500 + }, + { + "epoch": 6.65, + "learning_rate": 4.667437315140976e-05, + "loss": 2.4663, + "step": 1343000 + }, + { + "epoch": 6.66, + "learning_rate": 4.667313456498368e-05, + "loss": 2.4992, + "step": 1343500 + }, + { + "epoch": 6.66, + "learning_rate": 4.66718959785576e-05, + "loss": 2.4944, + "step": 1344000 + }, + { + "epoch": 6.66, + "learning_rate": 4.6670657392131514e-05, + "loss": 2.4973, + "step": 1344500 + }, + { + "epoch": 6.66, + "learning_rate": 4.666941880570543e-05, + "loss": 2.469, + "step": 1345000 + }, + { + "epoch": 6.67, + "learning_rate": 4.666818021927935e-05, + "loss": 2.5108, + "step": 1345500 + }, + { + "epoch": 6.67, + "learning_rate": 4.6666941632853264e-05, + "loss": 2.4903, + "step": 1346000 + }, + { + "epoch": 6.67, + "learning_rate": 4.666570304642718e-05, + "loss": 2.4975, + "step": 1346500 + }, + { + "epoch": 6.67, + "learning_rate": 4.66644644600011e-05, + "loss": 2.5192, + "step": 1347000 + }, + { + "epoch": 6.68, + "learning_rate": 4.666322587357501e-05, + "loss": 2.5035, + "step": 1347500 + }, + { + "epoch": 6.68, + "learning_rate": 4.666198976432178e-05, + "loss": 2.4976, + "step": 1348000 + }, + { + "epoch": 6.68, + "learning_rate": 4.6660751177895694e-05, + "loss": 2.5002, + "step": 1348500 + }, + { + "epoch": 6.68, + "learning_rate": 4.665951506864246e-05, + "loss": 2.5158, + "step": 1349000 + }, + { + "epoch": 6.69, + "learning_rate": 4.665827648221638e-05, + "loss": 2.4809, + "step": 1349500 + }, + { + "epoch": 6.69, + "learning_rate": 4.66570378957903e-05, + "loss": 2.4874, + "step": 1350000 + }, + { + "epoch": 6.69, + "learning_rate": 4.6655799309364214e-05, + "loss": 2.5005, + "step": 1350500 + }, + { + "epoch": 6.69, + "learning_rate": 4.665456072293813e-05, + "loss": 2.4906, + "step": 1351000 + }, + { + "epoch": 6.7, + "learning_rate": 4.665332213651205e-05, + "loss": 2.5107, + "step": 1351500 + }, + { + "epoch": 6.7, + "learning_rate": 4.6652083550085964e-05, + "loss": 2.4649, + "step": 1352000 + }, + { + "epoch": 6.7, + "learning_rate": 4.665084496365988e-05, + "loss": 2.4886, + "step": 1352500 + }, + { + "epoch": 6.7, + "learning_rate": 4.66496063772338e-05, + "loss": 2.5207, + "step": 1353000 + }, + { + "epoch": 6.71, + "learning_rate": 4.664837026798056e-05, + "loss": 2.4898, + "step": 1353500 + }, + { + "epoch": 6.71, + "learning_rate": 4.664713168155448e-05, + "loss": 2.5024, + "step": 1354000 + }, + { + "epoch": 6.71, + "learning_rate": 4.6645893095128394e-05, + "loss": 2.517, + "step": 1354500 + }, + { + "epoch": 6.71, + "learning_rate": 4.664465450870231e-05, + "loss": 2.4937, + "step": 1355000 + }, + { + "epoch": 6.72, + "learning_rate": 4.664341592227623e-05, + "loss": 2.4872, + "step": 1355500 + }, + { + "epoch": 6.72, + "learning_rate": 4.6642177335850145e-05, + "loss": 2.5017, + "step": 1356000 + }, + { + "epoch": 6.72, + "learning_rate": 4.6640938749424055e-05, + "loss": 2.5051, + "step": 1356500 + }, + { + "epoch": 6.72, + "learning_rate": 4.663970016299797e-05, + "loss": 2.477, + "step": 1357000 + }, + { + "epoch": 6.73, + "learning_rate": 4.663846157657189e-05, + "loss": 2.5128, + "step": 1357500 + }, + { + "epoch": 6.73, + "learning_rate": 4.6637222990145806e-05, + "loss": 2.4908, + "step": 1358000 + }, + { + "epoch": 6.73, + "learning_rate": 4.663598440371972e-05, + "loss": 2.4832, + "step": 1358500 + }, + { + "epoch": 6.73, + "learning_rate": 4.66347482944665e-05, + "loss": 2.4844, + "step": 1359000 + }, + { + "epoch": 6.74, + "learning_rate": 4.6633509708040415e-05, + "loss": 2.5207, + "step": 1359500 + }, + { + "epoch": 6.74, + "learning_rate": 4.663227359878718e-05, + "loss": 2.496, + "step": 1360000 + }, + { + "epoch": 6.74, + "learning_rate": 4.6631035012361094e-05, + "loss": 2.5076, + "step": 1360500 + }, + { + "epoch": 6.74, + "learning_rate": 4.662979642593501e-05, + "loss": 2.4935, + "step": 1361000 + }, + { + "epoch": 6.75, + "learning_rate": 4.662855783950893e-05, + "loss": 2.4879, + "step": 1361500 + }, + { + "epoch": 6.75, + "learning_rate": 4.6627319253082845e-05, + "loss": 2.4986, + "step": 1362000 + }, + { + "epoch": 6.75, + "learning_rate": 4.6626083143829614e-05, + "loss": 2.515, + "step": 1362500 + }, + { + "epoch": 6.75, + "learning_rate": 4.662484455740353e-05, + "loss": 2.501, + "step": 1363000 + }, + { + "epoch": 6.76, + "learning_rate": 4.662360597097745e-05, + "loss": 2.4869, + "step": 1363500 + }, + { + "epoch": 6.76, + "learning_rate": 4.6622367384551365e-05, + "loss": 2.4833, + "step": 1364000 + }, + { + "epoch": 6.76, + "learning_rate": 4.662113127529813e-05, + "loss": 2.4934, + "step": 1364500 + }, + { + "epoch": 6.76, + "learning_rate": 4.6619895166044896e-05, + "loss": 2.48, + "step": 1365000 + }, + { + "epoch": 6.77, + "learning_rate": 4.661865657961881e-05, + "loss": 2.4729, + "step": 1365500 + }, + { + "epoch": 6.77, + "learning_rate": 4.661741799319273e-05, + "loss": 2.4902, + "step": 1366000 + }, + { + "epoch": 6.77, + "learning_rate": 4.6616179406766646e-05, + "loss": 2.5141, + "step": 1366500 + }, + { + "epoch": 6.77, + "learning_rate": 4.661494082034056e-05, + "loss": 2.4894, + "step": 1367000 + }, + { + "epoch": 6.78, + "learning_rate": 4.661370223391448e-05, + "loss": 2.4918, + "step": 1367500 + }, + { + "epoch": 6.78, + "learning_rate": 4.66124636474884e-05, + "loss": 2.5045, + "step": 1368000 + }, + { + "epoch": 6.78, + "learning_rate": 4.6611225061062314e-05, + "loss": 2.4838, + "step": 1368500 + }, + { + "epoch": 6.78, + "learning_rate": 4.660998647463623e-05, + "loss": 2.487, + "step": 1369000 + }, + { + "epoch": 6.78, + "learning_rate": 4.660874788821015e-05, + "loss": 2.5039, + "step": 1369500 + }, + { + "epoch": 6.79, + "learning_rate": 4.6607509301784065e-05, + "loss": 2.5063, + "step": 1370000 + }, + { + "epoch": 6.79, + "learning_rate": 4.660627071535798e-05, + "loss": 2.5151, + "step": 1370500 + }, + { + "epoch": 6.79, + "learning_rate": 4.6605039560450454e-05, + "loss": 2.4945, + "step": 1371000 + }, + { + "epoch": 6.79, + "learning_rate": 4.660380097402437e-05, + "loss": 2.4963, + "step": 1371500 + }, + { + "epoch": 6.8, + "learning_rate": 4.660256238759829e-05, + "loss": 2.5327, + "step": 1372000 + }, + { + "epoch": 6.8, + "learning_rate": 4.660132627834505e-05, + "loss": 2.4942, + "step": 1372500 + }, + { + "epoch": 6.8, + "learning_rate": 4.660008769191897e-05, + "loss": 2.512, + "step": 1373000 + }, + { + "epoch": 6.8, + "learning_rate": 4.6598849105492884e-05, + "loss": 2.4764, + "step": 1373500 + }, + { + "epoch": 6.81, + "learning_rate": 4.65976105190668e-05, + "loss": 2.4803, + "step": 1374000 + }, + { + "epoch": 6.81, + "learning_rate": 4.659637193264072e-05, + "loss": 2.4855, + "step": 1374500 + }, + { + "epoch": 6.81, + "learning_rate": 4.6595133346214635e-05, + "loss": 2.4853, + "step": 1375000 + }, + { + "epoch": 6.81, + "learning_rate": 4.6593897236961404e-05, + "loss": 2.4791, + "step": 1375500 + }, + { + "epoch": 6.82, + "learning_rate": 4.659265865053532e-05, + "loss": 2.4887, + "step": 1376000 + }, + { + "epoch": 6.82, + "learning_rate": 4.659142006410924e-05, + "loss": 2.4982, + "step": 1376500 + }, + { + "epoch": 6.82, + "learning_rate": 4.6590181477683154e-05, + "loss": 2.5017, + "step": 1377000 + }, + { + "epoch": 6.82, + "learning_rate": 4.658894536842992e-05, + "loss": 2.5206, + "step": 1377500 + }, + { + "epoch": 6.83, + "learning_rate": 4.658770678200384e-05, + "loss": 2.4919, + "step": 1378000 + }, + { + "epoch": 6.83, + "learning_rate": 4.658646819557776e-05, + "loss": 2.5036, + "step": 1378500 + }, + { + "epoch": 6.83, + "learning_rate": 4.658522960915167e-05, + "loss": 2.4691, + "step": 1379000 + }, + { + "epoch": 6.83, + "learning_rate": 4.6583991022725584e-05, + "loss": 2.4853, + "step": 1379500 + }, + { + "epoch": 6.84, + "learning_rate": 4.65827524362995e-05, + "loss": 2.5148, + "step": 1380000 + }, + { + "epoch": 6.84, + "learning_rate": 4.658151384987342e-05, + "loss": 2.5151, + "step": 1380500 + }, + { + "epoch": 6.84, + "learning_rate": 4.6580275263447335e-05, + "loss": 2.4836, + "step": 1381000 + }, + { + "epoch": 6.84, + "learning_rate": 4.657903667702125e-05, + "loss": 2.4898, + "step": 1381500 + }, + { + "epoch": 6.85, + "learning_rate": 4.657779809059517e-05, + "loss": 2.4917, + "step": 1382000 + }, + { + "epoch": 6.85, + "learning_rate": 4.657655950416908e-05, + "loss": 2.5036, + "step": 1382500 + }, + { + "epoch": 6.85, + "learning_rate": 4.6575320917742996e-05, + "loss": 2.4992, + "step": 1383000 + }, + { + "epoch": 6.85, + "learning_rate": 4.657408233131691e-05, + "loss": 2.502, + "step": 1383500 + }, + { + "epoch": 6.86, + "learning_rate": 4.657284374489083e-05, + "loss": 2.4944, + "step": 1384000 + }, + { + "epoch": 6.86, + "learning_rate": 4.657160515846475e-05, + "loss": 2.4917, + "step": 1384500 + }, + { + "epoch": 6.86, + "learning_rate": 4.6570366572038664e-05, + "loss": 2.4888, + "step": 1385000 + }, + { + "epoch": 6.86, + "learning_rate": 4.656912798561258e-05, + "loss": 2.485, + "step": 1385500 + }, + { + "epoch": 6.87, + "learning_rate": 4.65678893991865e-05, + "loss": 2.4949, + "step": 1386000 + }, + { + "epoch": 6.87, + "learning_rate": 4.6566653289933266e-05, + "loss": 2.4753, + "step": 1386500 + }, + { + "epoch": 6.87, + "learning_rate": 4.656541470350718e-05, + "loss": 2.5044, + "step": 1387000 + }, + { + "epoch": 6.87, + "learning_rate": 4.65641761170811e-05, + "loss": 2.5088, + "step": 1387500 + }, + { + "epoch": 6.88, + "learning_rate": 4.656293753065502e-05, + "loss": 2.4922, + "step": 1388000 + }, + { + "epoch": 6.88, + "learning_rate": 4.6561698944228934e-05, + "loss": 2.5054, + "step": 1388500 + }, + { + "epoch": 6.88, + "learning_rate": 4.656046035780285e-05, + "loss": 2.4948, + "step": 1389000 + }, + { + "epoch": 6.88, + "learning_rate": 4.655922177137677e-05, + "loss": 2.5002, + "step": 1389500 + }, + { + "epoch": 6.89, + "learning_rate": 4.655798566212353e-05, + "loss": 2.4978, + "step": 1390000 + }, + { + "epoch": 6.89, + "learning_rate": 4.655674707569745e-05, + "loss": 2.5017, + "step": 1390500 + }, + { + "epoch": 6.89, + "learning_rate": 4.6555508489271364e-05, + "loss": 2.4925, + "step": 1391000 + }, + { + "epoch": 6.89, + "learning_rate": 4.655426990284528e-05, + "loss": 2.4932, + "step": 1391500 + }, + { + "epoch": 6.9, + "learning_rate": 4.655303379359205e-05, + "loss": 2.5058, + "step": 1392000 + }, + { + "epoch": 6.9, + "learning_rate": 4.6551795207165966e-05, + "loss": 2.4799, + "step": 1392500 + }, + { + "epoch": 6.9, + "learning_rate": 4.655055662073988e-05, + "loss": 2.5225, + "step": 1393000 + }, + { + "epoch": 6.9, + "learning_rate": 4.65493180343138e-05, + "loss": 2.4859, + "step": 1393500 + }, + { + "epoch": 6.91, + "learning_rate": 4.654807944788772e-05, + "loss": 2.521, + "step": 1394000 + }, + { + "epoch": 6.91, + "learning_rate": 4.6546840861461634e-05, + "loss": 2.4728, + "step": 1394500 + }, + { + "epoch": 6.91, + "learning_rate": 4.6545604752208396e-05, + "loss": 2.4708, + "step": 1395000 + }, + { + "epoch": 6.91, + "learning_rate": 4.654436616578231e-05, + "loss": 2.4922, + "step": 1395500 + }, + { + "epoch": 6.92, + "learning_rate": 4.654312757935623e-05, + "loss": 2.4731, + "step": 1396000 + }, + { + "epoch": 6.92, + "learning_rate": 4.654188899293015e-05, + "loss": 2.4955, + "step": 1396500 + }, + { + "epoch": 6.92, + "learning_rate": 4.6540650406504064e-05, + "loss": 2.4996, + "step": 1397000 + }, + { + "epoch": 6.92, + "learning_rate": 4.653941182007798e-05, + "loss": 2.4988, + "step": 1397500 + }, + { + "epoch": 6.93, + "learning_rate": 4.65381732336519e-05, + "loss": 2.476, + "step": 1398000 + }, + { + "epoch": 6.93, + "learning_rate": 4.6536934647225815e-05, + "loss": 2.487, + "step": 1398500 + }, + { + "epoch": 6.93, + "learning_rate": 4.6535698537972583e-05, + "loss": 2.4695, + "step": 1399000 + }, + { + "epoch": 6.93, + "learning_rate": 4.65344599515465e-05, + "loss": 2.4825, + "step": 1399500 + }, + { + "epoch": 6.94, + "learning_rate": 4.653322136512042e-05, + "loss": 2.4803, + "step": 1400000 + }, + { + "epoch": 6.94, + "learning_rate": 4.6531982778694334e-05, + "loss": 2.4772, + "step": 1400500 + }, + { + "epoch": 6.94, + "learning_rate": 4.653074419226825e-05, + "loss": 2.4992, + "step": 1401000 + }, + { + "epoch": 6.94, + "learning_rate": 4.652950560584217e-05, + "loss": 2.4755, + "step": 1401500 + }, + { + "epoch": 6.95, + "learning_rate": 4.6528267019416085e-05, + "loss": 2.486, + "step": 1402000 + }, + { + "epoch": 6.95, + "learning_rate": 4.652703091016285e-05, + "loss": 2.5144, + "step": 1402500 + }, + { + "epoch": 6.95, + "learning_rate": 4.652579480090962e-05, + "loss": 2.4947, + "step": 1403000 + }, + { + "epoch": 6.95, + "learning_rate": 4.652455621448354e-05, + "loss": 2.4977, + "step": 1403500 + }, + { + "epoch": 6.96, + "learning_rate": 4.652331762805745e-05, + "loss": 2.508, + "step": 1404000 + }, + { + "epoch": 6.96, + "learning_rate": 4.652207904163137e-05, + "loss": 2.5197, + "step": 1404500 + }, + { + "epoch": 6.96, + "learning_rate": 4.6520840455205284e-05, + "loss": 2.4885, + "step": 1405000 + }, + { + "epoch": 6.96, + "learning_rate": 4.65196018687792e-05, + "loss": 2.4976, + "step": 1405500 + }, + { + "epoch": 6.97, + "learning_rate": 4.651836328235312e-05, + "loss": 2.4476, + "step": 1406000 + }, + { + "epoch": 6.97, + "learning_rate": 4.6517124695927034e-05, + "loss": 2.5003, + "step": 1406500 + }, + { + "epoch": 6.97, + "learning_rate": 4.651588610950095e-05, + "loss": 2.4807, + "step": 1407000 + }, + { + "epoch": 6.97, + "learning_rate": 4.651464752307487e-05, + "loss": 2.5, + "step": 1407500 + }, + { + "epoch": 6.98, + "learning_rate": 4.6513408936648785e-05, + "loss": 2.5051, + "step": 1408000 + }, + { + "epoch": 6.98, + "learning_rate": 4.65121703502227e-05, + "loss": 2.4952, + "step": 1408500 + }, + { + "epoch": 6.98, + "learning_rate": 4.651093176379662e-05, + "loss": 2.4864, + "step": 1409000 + }, + { + "epoch": 6.98, + "learning_rate": 4.650969565454338e-05, + "loss": 2.4935, + "step": 1409500 + }, + { + "epoch": 6.99, + "learning_rate": 4.65084570681173e-05, + "loss": 2.5068, + "step": 1410000 + }, + { + "epoch": 6.99, + "learning_rate": 4.650722095886407e-05, + "loss": 2.5097, + "step": 1410500 + }, + { + "epoch": 6.99, + "learning_rate": 4.6505982372437984e-05, + "loss": 2.4744, + "step": 1411000 + }, + { + "epoch": 6.99, + "learning_rate": 4.65047437860119e-05, + "loss": 2.4966, + "step": 1411500 + }, + { + "epoch": 7.0, + "learning_rate": 4.650350519958582e-05, + "loss": 2.4788, + "step": 1412000 + }, + { + "epoch": 7.0, + "learning_rate": 4.6502266613159735e-05, + "loss": 2.4876, + "step": 1412500 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6390809050019882, + "eval_accuracy_mlm": 0.5919321078827258, + "eval_accuracy_nsp": 0.8611933683454986, + "eval_loss": 2.4429469108581543, + "eval_runtime": 145.8536, + "eval_samples_per_second": 1748.048, + "eval_steps_per_second": 72.84, + "step": 1412901 + }, + { + "epoch": 7.0, + "learning_rate": 4.650102802673365e-05, + "loss": 2.4845, + "step": 1413000 + }, + { + "epoch": 7.0, + "learning_rate": 4.649978944030757e-05, + "loss": 2.4572, + "step": 1413500 + }, + { + "epoch": 7.01, + "learning_rate": 4.6498550853881485e-05, + "loss": 2.4539, + "step": 1414000 + }, + { + "epoch": 7.01, + "learning_rate": 4.6497314744628254e-05, + "loss": 2.4568, + "step": 1414500 + }, + { + "epoch": 7.01, + "learning_rate": 4.6496076158202164e-05, + "loss": 2.4616, + "step": 1415000 + }, + { + "epoch": 7.01, + "learning_rate": 4.649483757177608e-05, + "loss": 2.4551, + "step": 1415500 + }, + { + "epoch": 7.02, + "learning_rate": 4.649360146252286e-05, + "loss": 2.4791, + "step": 1416000 + }, + { + "epoch": 7.02, + "learning_rate": 4.649236287609677e-05, + "loss": 2.481, + "step": 1416500 + }, + { + "epoch": 7.02, + "learning_rate": 4.6491124289670684e-05, + "loss": 2.4784, + "step": 1417000 + }, + { + "epoch": 7.02, + "learning_rate": 4.64898857032446e-05, + "loss": 2.467, + "step": 1417500 + }, + { + "epoch": 7.03, + "learning_rate": 4.648864711681852e-05, + "loss": 2.4805, + "step": 1418000 + }, + { + "epoch": 7.03, + "learning_rate": 4.6487408530392435e-05, + "loss": 2.4275, + "step": 1418500 + }, + { + "epoch": 7.03, + "learning_rate": 4.648616994396635e-05, + "loss": 2.4766, + "step": 1419000 + }, + { + "epoch": 7.03, + "learning_rate": 4.648493135754027e-05, + "loss": 2.4559, + "step": 1419500 + }, + { + "epoch": 7.04, + "learning_rate": 4.6483692771114185e-05, + "loss": 2.5089, + "step": 1420000 + }, + { + "epoch": 7.04, + "learning_rate": 4.64824541846881e-05, + "loss": 2.4865, + "step": 1420500 + }, + { + "epoch": 7.04, + "learning_rate": 4.648121559826202e-05, + "loss": 2.4508, + "step": 1421000 + }, + { + "epoch": 7.04, + "learning_rate": 4.6479977011835936e-05, + "loss": 2.4655, + "step": 1421500 + }, + { + "epoch": 7.05, + "learning_rate": 4.647873842540985e-05, + "loss": 2.4671, + "step": 1422000 + }, + { + "epoch": 7.05, + "learning_rate": 4.647749983898377e-05, + "loss": 2.4695, + "step": 1422500 + }, + { + "epoch": 7.05, + "learning_rate": 4.647626372973053e-05, + "loss": 2.4488, + "step": 1423000 + }, + { + "epoch": 7.05, + "learning_rate": 4.647502514330445e-05, + "loss": 2.4521, + "step": 1423500 + }, + { + "epoch": 7.05, + "learning_rate": 4.647378903405122e-05, + "loss": 2.4709, + "step": 1424000 + }, + { + "epoch": 7.06, + "learning_rate": 4.647255540197084e-05, + "loss": 2.4587, + "step": 1424500 + }, + { + "epoch": 7.06, + "learning_rate": 4.6471316815544755e-05, + "loss": 2.4739, + "step": 1425000 + }, + { + "epoch": 7.06, + "learning_rate": 4.647007822911867e-05, + "loss": 2.4977, + "step": 1425500 + }, + { + "epoch": 7.06, + "learning_rate": 4.646883964269259e-05, + "loss": 2.4725, + "step": 1426000 + }, + { + "epoch": 7.07, + "learning_rate": 4.6467601056266506e-05, + "loss": 2.4716, + "step": 1426500 + }, + { + "epoch": 7.07, + "learning_rate": 4.646636246984042e-05, + "loss": 2.4874, + "step": 1427000 + }, + { + "epoch": 7.07, + "learning_rate": 4.646512388341434e-05, + "loss": 2.4674, + "step": 1427500 + }, + { + "epoch": 7.07, + "learning_rate": 4.646388529698826e-05, + "loss": 2.4617, + "step": 1428000 + }, + { + "epoch": 7.08, + "learning_rate": 4.6462646710562174e-05, + "loss": 2.4703, + "step": 1428500 + }, + { + "epoch": 7.08, + "learning_rate": 4.6461408124136084e-05, + "loss": 2.4612, + "step": 1429000 + }, + { + "epoch": 7.08, + "learning_rate": 4.646016953771e-05, + "loss": 2.4505, + "step": 1429500 + }, + { + "epoch": 7.08, + "learning_rate": 4.645893095128392e-05, + "loss": 2.4645, + "step": 1430000 + }, + { + "epoch": 7.09, + "learning_rate": 4.6457692364857835e-05, + "loss": 2.4425, + "step": 1430500 + }, + { + "epoch": 7.09, + "learning_rate": 4.645645377843175e-05, + "loss": 2.4478, + "step": 1431000 + }, + { + "epoch": 7.09, + "learning_rate": 4.645521519200567e-05, + "loss": 2.4645, + "step": 1431500 + }, + { + "epoch": 7.09, + "learning_rate": 4.6453979082752444e-05, + "loss": 2.4471, + "step": 1432000 + }, + { + "epoch": 7.1, + "learning_rate": 4.6452740496326354e-05, + "loss": 2.4378, + "step": 1432500 + }, + { + "epoch": 7.1, + "learning_rate": 4.645150190990027e-05, + "loss": 2.4872, + "step": 1433000 + }, + { + "epoch": 7.1, + "learning_rate": 4.645026332347419e-05, + "loss": 2.4629, + "step": 1433500 + }, + { + "epoch": 7.1, + "learning_rate": 4.6449024737048105e-05, + "loss": 2.4646, + "step": 1434000 + }, + { + "epoch": 7.11, + "learning_rate": 4.644778615062202e-05, + "loss": 2.4917, + "step": 1434500 + }, + { + "epoch": 7.11, + "learning_rate": 4.644654756419594e-05, + "loss": 2.4704, + "step": 1435000 + }, + { + "epoch": 7.11, + "learning_rate": 4.644530897776985e-05, + "loss": 2.4439, + "step": 1435500 + }, + { + "epoch": 7.11, + "learning_rate": 4.6444070391343766e-05, + "loss": 2.458, + "step": 1436000 + }, + { + "epoch": 7.12, + "learning_rate": 4.6442834282090535e-05, + "loss": 2.4867, + "step": 1436500 + }, + { + "epoch": 7.12, + "learning_rate": 4.644159569566445e-05, + "loss": 2.4849, + "step": 1437000 + }, + { + "epoch": 7.12, + "learning_rate": 4.644035710923837e-05, + "loss": 2.4636, + "step": 1437500 + }, + { + "epoch": 7.12, + "learning_rate": 4.6439118522812286e-05, + "loss": 2.4742, + "step": 1438000 + }, + { + "epoch": 7.13, + "learning_rate": 4.643788736790476e-05, + "loss": 2.4822, + "step": 1438500 + }, + { + "epoch": 7.13, + "learning_rate": 4.6436648781478675e-05, + "loss": 2.4591, + "step": 1439000 + }, + { + "epoch": 7.13, + "learning_rate": 4.643541019505259e-05, + "loss": 2.4727, + "step": 1439500 + }, + { + "epoch": 7.13, + "learning_rate": 4.643417408579936e-05, + "loss": 2.4814, + "step": 1440000 + }, + { + "epoch": 7.14, + "learning_rate": 4.643293549937328e-05, + "loss": 2.4816, + "step": 1440500 + }, + { + "epoch": 7.14, + "learning_rate": 4.6431696912947195e-05, + "loss": 2.4745, + "step": 1441000 + }, + { + "epoch": 7.14, + "learning_rate": 4.643045832652111e-05, + "loss": 2.467, + "step": 1441500 + }, + { + "epoch": 7.14, + "learning_rate": 4.642921974009503e-05, + "loss": 2.4839, + "step": 1442000 + }, + { + "epoch": 7.15, + "learning_rate": 4.6427981153668946e-05, + "loss": 2.4538, + "step": 1442500 + }, + { + "epoch": 7.15, + "learning_rate": 4.6426742567242856e-05, + "loss": 2.4355, + "step": 1443000 + }, + { + "epoch": 7.15, + "learning_rate": 4.642550398081677e-05, + "loss": 2.479, + "step": 1443500 + }, + { + "epoch": 7.15, + "learning_rate": 4.642426539439069e-05, + "loss": 2.4619, + "step": 1444000 + }, + { + "epoch": 7.16, + "learning_rate": 4.642302680796461e-05, + "loss": 2.4853, + "step": 1444500 + }, + { + "epoch": 7.16, + "learning_rate": 4.6421788221538524e-05, + "loss": 2.4502, + "step": 1445000 + }, + { + "epoch": 7.16, + "learning_rate": 4.642054963511244e-05, + "loss": 2.4703, + "step": 1445500 + }, + { + "epoch": 7.16, + "learning_rate": 4.641931104868636e-05, + "loss": 2.4679, + "step": 1446000 + }, + { + "epoch": 7.17, + "learning_rate": 4.6418072462260274e-05, + "loss": 2.4749, + "step": 1446500 + }, + { + "epoch": 7.17, + "learning_rate": 4.641683387583419e-05, + "loss": 2.4873, + "step": 1447000 + }, + { + "epoch": 7.17, + "learning_rate": 4.641559528940811e-05, + "loss": 2.4585, + "step": 1447500 + }, + { + "epoch": 7.17, + "learning_rate": 4.641435670298202e-05, + "loss": 2.4642, + "step": 1448000 + }, + { + "epoch": 7.18, + "learning_rate": 4.6413120593728794e-05, + "loss": 2.4696, + "step": 1448500 + }, + { + "epoch": 7.18, + "learning_rate": 4.641188200730271e-05, + "loss": 2.4597, + "step": 1449000 + }, + { + "epoch": 7.18, + "learning_rate": 4.641064342087663e-05, + "loss": 2.4897, + "step": 1449500 + }, + { + "epoch": 7.18, + "learning_rate": 4.640940731162339e-05, + "loss": 2.4707, + "step": 1450000 + }, + { + "epoch": 7.19, + "learning_rate": 4.640816872519731e-05, + "loss": 2.4917, + "step": 1450500 + }, + { + "epoch": 7.19, + "learning_rate": 4.6406930138771224e-05, + "loss": 2.4877, + "step": 1451000 + }, + { + "epoch": 7.19, + "learning_rate": 4.640569155234514e-05, + "loss": 2.4417, + "step": 1451500 + }, + { + "epoch": 7.19, + "learning_rate": 4.640445544309191e-05, + "loss": 2.4632, + "step": 1452000 + }, + { + "epoch": 7.2, + "learning_rate": 4.6403216856665826e-05, + "loss": 2.5164, + "step": 1452500 + }, + { + "epoch": 7.2, + "learning_rate": 4.640197827023974e-05, + "loss": 2.476, + "step": 1453000 + }, + { + "epoch": 7.2, + "learning_rate": 4.640073968381366e-05, + "loss": 2.4726, + "step": 1453500 + }, + { + "epoch": 7.2, + "learning_rate": 4.639950357456043e-05, + "loss": 2.4642, + "step": 1454000 + }, + { + "epoch": 7.21, + "learning_rate": 4.6398264988134346e-05, + "loss": 2.4863, + "step": 1454500 + }, + { + "epoch": 7.21, + "learning_rate": 4.639702640170826e-05, + "loss": 2.4524, + "step": 1455000 + }, + { + "epoch": 7.21, + "learning_rate": 4.639578781528218e-05, + "loss": 2.4725, + "step": 1455500 + }, + { + "epoch": 7.21, + "learning_rate": 4.639455170602894e-05, + "loss": 2.4505, + "step": 1456000 + }, + { + "epoch": 7.22, + "learning_rate": 4.639331311960286e-05, + "loss": 2.4743, + "step": 1456500 + }, + { + "epoch": 7.22, + "learning_rate": 4.6392074533176776e-05, + "loss": 2.4581, + "step": 1457000 + }, + { + "epoch": 7.22, + "learning_rate": 4.639083594675069e-05, + "loss": 2.4725, + "step": 1457500 + }, + { + "epoch": 7.22, + "learning_rate": 4.638959736032461e-05, + "loss": 2.4704, + "step": 1458000 + }, + { + "epoch": 7.23, + "learning_rate": 4.6388358773898526e-05, + "loss": 2.4718, + "step": 1458500 + }, + { + "epoch": 7.23, + "learning_rate": 4.638712018747244e-05, + "loss": 2.4867, + "step": 1459000 + }, + { + "epoch": 7.23, + "learning_rate": 4.638588407821921e-05, + "loss": 2.4673, + "step": 1459500 + }, + { + "epoch": 7.23, + "learning_rate": 4.638464549179313e-05, + "loss": 2.4954, + "step": 1460000 + }, + { + "epoch": 7.24, + "learning_rate": 4.6383406905367046e-05, + "loss": 2.4766, + "step": 1460500 + }, + { + "epoch": 7.24, + "learning_rate": 4.638216831894096e-05, + "loss": 2.4681, + "step": 1461000 + }, + { + "epoch": 7.24, + "learning_rate": 4.638092973251488e-05, + "loss": 2.4931, + "step": 1461500 + }, + { + "epoch": 7.24, + "learning_rate": 4.63796911460888e-05, + "loss": 2.4671, + "step": 1462000 + }, + { + "epoch": 7.25, + "learning_rate": 4.6378452559662714e-05, + "loss": 2.4901, + "step": 1462500 + }, + { + "epoch": 7.25, + "learning_rate": 4.637721397323663e-05, + "loss": 2.4586, + "step": 1463000 + }, + { + "epoch": 7.25, + "learning_rate": 4.637597538681054e-05, + "loss": 2.4707, + "step": 1463500 + }, + { + "epoch": 7.25, + "learning_rate": 4.637474175473016e-05, + "loss": 2.4735, + "step": 1464000 + }, + { + "epoch": 7.26, + "learning_rate": 4.637350316830408e-05, + "loss": 2.4678, + "step": 1464500 + }, + { + "epoch": 7.26, + "learning_rate": 4.6372264581877995e-05, + "loss": 2.485, + "step": 1465000 + }, + { + "epoch": 7.26, + "learning_rate": 4.637102599545191e-05, + "loss": 2.4831, + "step": 1465500 + }, + { + "epoch": 7.26, + "learning_rate": 4.636978740902583e-05, + "loss": 2.4813, + "step": 1466000 + }, + { + "epoch": 7.27, + "learning_rate": 4.6368548822599746e-05, + "loss": 2.4825, + "step": 1466500 + }, + { + "epoch": 7.27, + "learning_rate": 4.6367312713346515e-05, + "loss": 2.4867, + "step": 1467000 + }, + { + "epoch": 7.27, + "learning_rate": 4.6366074126920425e-05, + "loss": 2.4784, + "step": 1467500 + }, + { + "epoch": 7.27, + "learning_rate": 4.636483554049434e-05, + "loss": 2.4478, + "step": 1468000 + }, + { + "epoch": 7.28, + "learning_rate": 4.636359695406826e-05, + "loss": 2.4608, + "step": 1468500 + }, + { + "epoch": 7.28, + "learning_rate": 4.6362358367642176e-05, + "loss": 2.4759, + "step": 1469000 + }, + { + "epoch": 7.28, + "learning_rate": 4.636111978121609e-05, + "loss": 2.4748, + "step": 1469500 + }, + { + "epoch": 7.28, + "learning_rate": 4.635988119479001e-05, + "loss": 2.4616, + "step": 1470000 + }, + { + "epoch": 7.29, + "learning_rate": 4.635864260836393e-05, + "loss": 2.4663, + "step": 1470500 + }, + { + "epoch": 7.29, + "learning_rate": 4.6357404021937844e-05, + "loss": 2.4643, + "step": 1471000 + }, + { + "epoch": 7.29, + "learning_rate": 4.635616543551176e-05, + "loss": 2.482, + "step": 1471500 + }, + { + "epoch": 7.29, + "learning_rate": 4.635492932625853e-05, + "loss": 2.4848, + "step": 1472000 + }, + { + "epoch": 7.3, + "learning_rate": 4.6353690739832446e-05, + "loss": 2.4771, + "step": 1472500 + }, + { + "epoch": 7.3, + "learning_rate": 4.635245215340636e-05, + "loss": 2.4642, + "step": 1473000 + }, + { + "epoch": 7.3, + "learning_rate": 4.635121356698028e-05, + "loss": 2.4742, + "step": 1473500 + }, + { + "epoch": 7.3, + "learning_rate": 4.63499749805542e-05, + "loss": 2.4884, + "step": 1474000 + }, + { + "epoch": 7.31, + "learning_rate": 4.6348736394128114e-05, + "loss": 2.4658, + "step": 1474500 + }, + { + "epoch": 7.31, + "learning_rate": 4.634749780770203e-05, + "loss": 2.49, + "step": 1475000 + }, + { + "epoch": 7.31, + "learning_rate": 4.634625922127595e-05, + "loss": 2.4885, + "step": 1475500 + }, + { + "epoch": 7.31, + "learning_rate": 4.634502558919557e-05, + "loss": 2.4919, + "step": 1476000 + }, + { + "epoch": 7.32, + "learning_rate": 4.634378700276948e-05, + "loss": 2.4588, + "step": 1476500 + }, + { + "epoch": 7.32, + "learning_rate": 4.6342548416343396e-05, + "loss": 2.4474, + "step": 1477000 + }, + { + "epoch": 7.32, + "learning_rate": 4.634130982991731e-05, + "loss": 2.4867, + "step": 1477500 + }, + { + "epoch": 7.32, + "learning_rate": 4.634007124349123e-05, + "loss": 2.4749, + "step": 1478000 + }, + { + "epoch": 7.33, + "learning_rate": 4.6338832657065146e-05, + "loss": 2.4603, + "step": 1478500 + }, + { + "epoch": 7.33, + "learning_rate": 4.633759407063906e-05, + "loss": 2.4551, + "step": 1479000 + }, + { + "epoch": 7.33, + "learning_rate": 4.633635548421298e-05, + "loss": 2.4716, + "step": 1479500 + }, + { + "epoch": 7.33, + "learning_rate": 4.633511937495974e-05, + "loss": 2.4879, + "step": 1480000 + }, + { + "epoch": 7.33, + "learning_rate": 4.633388078853366e-05, + "loss": 2.4664, + "step": 1480500 + }, + { + "epoch": 7.34, + "learning_rate": 4.6332642202107576e-05, + "loss": 2.4596, + "step": 1481000 + }, + { + "epoch": 7.34, + "learning_rate": 4.633140361568149e-05, + "loss": 2.5133, + "step": 1481500 + }, + { + "epoch": 7.34, + "learning_rate": 4.633016502925541e-05, + "loss": 2.4767, + "step": 1482000 + }, + { + "epoch": 7.34, + "learning_rate": 4.632892644282933e-05, + "loss": 2.4489, + "step": 1482500 + }, + { + "epoch": 7.35, + "learning_rate": 4.6327687856403244e-05, + "loss": 2.4446, + "step": 1483000 + }, + { + "epoch": 7.35, + "learning_rate": 4.632644926997716e-05, + "loss": 2.4838, + "step": 1483500 + }, + { + "epoch": 7.35, + "learning_rate": 4.632521563789679e-05, + "loss": 2.4423, + "step": 1484000 + }, + { + "epoch": 7.35, + "learning_rate": 4.63239770514707e-05, + "loss": 2.4774, + "step": 1484500 + }, + { + "epoch": 7.36, + "learning_rate": 4.6322738465044615e-05, + "loss": 2.4497, + "step": 1485000 + }, + { + "epoch": 7.36, + "learning_rate": 4.632149987861853e-05, + "loss": 2.4882, + "step": 1485500 + }, + { + "epoch": 7.36, + "learning_rate": 4.63202637693653e-05, + "loss": 2.4747, + "step": 1486000 + }, + { + "epoch": 7.36, + "learning_rate": 4.631902518293922e-05, + "loss": 2.4873, + "step": 1486500 + }, + { + "epoch": 7.37, + "learning_rate": 4.6317786596513135e-05, + "loss": 2.4719, + "step": 1487000 + }, + { + "epoch": 7.37, + "learning_rate": 4.631654801008705e-05, + "loss": 2.466, + "step": 1487500 + }, + { + "epoch": 7.37, + "learning_rate": 4.631530942366097e-05, + "loss": 2.5012, + "step": 1488000 + }, + { + "epoch": 7.37, + "learning_rate": 4.6314070837234886e-05, + "loss": 2.4666, + "step": 1488500 + }, + { + "epoch": 7.38, + "learning_rate": 4.6312832250808796e-05, + "loss": 2.476, + "step": 1489000 + }, + { + "epoch": 7.38, + "learning_rate": 4.631159366438271e-05, + "loss": 2.4908, + "step": 1489500 + }, + { + "epoch": 7.38, + "learning_rate": 4.631035507795663e-05, + "loss": 2.4713, + "step": 1490000 + }, + { + "epoch": 7.38, + "learning_rate": 4.630911649153055e-05, + "loss": 2.4714, + "step": 1490500 + }, + { + "epoch": 7.39, + "learning_rate": 4.6307877905104464e-05, + "loss": 2.4822, + "step": 1491000 + }, + { + "epoch": 7.39, + "learning_rate": 4.630663931867838e-05, + "loss": 2.468, + "step": 1491500 + }, + { + "epoch": 7.39, + "learning_rate": 4.63054007322523e-05, + "loss": 2.4597, + "step": 1492000 + }, + { + "epoch": 7.39, + "learning_rate": 4.6304162145826214e-05, + "loss": 2.4886, + "step": 1492500 + }, + { + "epoch": 7.4, + "learning_rate": 4.630292355940013e-05, + "loss": 2.4621, + "step": 1493000 + }, + { + "epoch": 7.4, + "learning_rate": 4.630168745014689e-05, + "loss": 2.4916, + "step": 1493500 + }, + { + "epoch": 7.4, + "learning_rate": 4.630044886372081e-05, + "loss": 2.5105, + "step": 1494000 + }, + { + "epoch": 7.4, + "learning_rate": 4.629921027729473e-05, + "loss": 2.4965, + "step": 1494500 + }, + { + "epoch": 7.41, + "learning_rate": 4.62979741680415e-05, + "loss": 2.4962, + "step": 1495000 + }, + { + "epoch": 7.41, + "learning_rate": 4.629673558161541e-05, + "loss": 2.4789, + "step": 1495500 + }, + { + "epoch": 7.41, + "learning_rate": 4.629549947236219e-05, + "loss": 2.4785, + "step": 1496000 + }, + { + "epoch": 7.41, + "learning_rate": 4.6294260885936105e-05, + "loss": 2.4689, + "step": 1496500 + }, + { + "epoch": 7.42, + "learning_rate": 4.629302229951002e-05, + "loss": 2.5094, + "step": 1497000 + }, + { + "epoch": 7.42, + "learning_rate": 4.629178371308394e-05, + "loss": 2.4664, + "step": 1497500 + }, + { + "epoch": 7.42, + "learning_rate": 4.629054512665785e-05, + "loss": 2.4711, + "step": 1498000 + }, + { + "epoch": 7.42, + "learning_rate": 4.6289306540231766e-05, + "loss": 2.4982, + "step": 1498500 + }, + { + "epoch": 7.43, + "learning_rate": 4.628806795380568e-05, + "loss": 2.4652, + "step": 1499000 + }, + { + "epoch": 7.43, + "learning_rate": 4.62868293673796e-05, + "loss": 2.464, + "step": 1499500 + }, + { + "epoch": 7.43, + "learning_rate": 4.628559078095352e-05, + "loss": 2.4816, + "step": 1500000 + }, + { + "epoch": 7.43, + "learning_rate": 4.628435219452743e-05, + "loss": 2.461, + "step": 1500500 + }, + { + "epoch": 7.44, + "learning_rate": 4.6283113608101344e-05, + "loss": 2.4604, + "step": 1501000 + }, + { + "epoch": 7.44, + "learning_rate": 4.628187502167526e-05, + "loss": 2.4845, + "step": 1501500 + }, + { + "epoch": 7.44, + "learning_rate": 4.628063643524918e-05, + "loss": 2.4844, + "step": 1502000 + }, + { + "epoch": 7.44, + "learning_rate": 4.6279397848823095e-05, + "loss": 2.4942, + "step": 1502500 + }, + { + "epoch": 7.45, + "learning_rate": 4.627815926239701e-05, + "loss": 2.4517, + "step": 1503000 + }, + { + "epoch": 7.45, + "learning_rate": 4.627692067597093e-05, + "loss": 2.4613, + "step": 1503500 + }, + { + "epoch": 7.45, + "learning_rate": 4.62756845667177e-05, + "loss": 2.4785, + "step": 1504000 + }, + { + "epoch": 7.45, + "learning_rate": 4.6274445980291615e-05, + "loss": 2.4746, + "step": 1504500 + }, + { + "epoch": 7.46, + "learning_rate": 4.627320739386553e-05, + "loss": 2.4834, + "step": 1505000 + }, + { + "epoch": 7.46, + "learning_rate": 4.627196880743945e-05, + "loss": 2.5137, + "step": 1505500 + }, + { + "epoch": 7.46, + "learning_rate": 4.6270730221013365e-05, + "loss": 2.4748, + "step": 1506000 + }, + { + "epoch": 7.46, + "learning_rate": 4.626949163458728e-05, + "loss": 2.459, + "step": 1506500 + }, + { + "epoch": 7.47, + "learning_rate": 4.62682530481612e-05, + "loss": 2.4794, + "step": 1507000 + }, + { + "epoch": 7.47, + "learning_rate": 4.6267014461735116e-05, + "loss": 2.4742, + "step": 1507500 + }, + { + "epoch": 7.47, + "learning_rate": 4.626577587530903e-05, + "loss": 2.4774, + "step": 1508000 + }, + { + "epoch": 7.47, + "learning_rate": 4.626453728888295e-05, + "loss": 2.4662, + "step": 1508500 + }, + { + "epoch": 7.48, + "learning_rate": 4.626330117962971e-05, + "loss": 2.4627, + "step": 1509000 + }, + { + "epoch": 7.48, + "learning_rate": 4.626206259320363e-05, + "loss": 2.4786, + "step": 1509500 + }, + { + "epoch": 7.48, + "learning_rate": 4.62608264839504e-05, + "loss": 2.4726, + "step": 1510000 + }, + { + "epoch": 7.48, + "learning_rate": 4.6259590374697173e-05, + "loss": 2.4954, + "step": 1510500 + }, + { + "epoch": 7.49, + "learning_rate": 4.6258351788271084e-05, + "loss": 2.4493, + "step": 1511000 + }, + { + "epoch": 7.49, + "learning_rate": 4.6257113201845e-05, + "loss": 2.4704, + "step": 1511500 + }, + { + "epoch": 7.49, + "learning_rate": 4.625587461541892e-05, + "loss": 2.4773, + "step": 1512000 + }, + { + "epoch": 7.49, + "learning_rate": 4.6254636028992834e-05, + "loss": 2.4571, + "step": 1512500 + }, + { + "epoch": 7.5, + "learning_rate": 4.62533999197396e-05, + "loss": 2.4826, + "step": 1513000 + }, + { + "epoch": 7.5, + "learning_rate": 4.625216133331352e-05, + "loss": 2.4711, + "step": 1513500 + }, + { + "epoch": 7.5, + "learning_rate": 4.625092274688743e-05, + "loss": 2.5154, + "step": 1514000 + }, + { + "epoch": 7.5, + "learning_rate": 4.624968416046135e-05, + "loss": 2.4689, + "step": 1514500 + }, + { + "epoch": 7.51, + "learning_rate": 4.624844805120812e-05, + "loss": 2.4558, + "step": 1515000 + }, + { + "epoch": 7.51, + "learning_rate": 4.624720946478204e-05, + "loss": 2.489, + "step": 1515500 + }, + { + "epoch": 7.51, + "learning_rate": 4.624597087835596e-05, + "loss": 2.4775, + "step": 1516000 + }, + { + "epoch": 7.51, + "learning_rate": 4.6244732291929874e-05, + "loss": 2.5, + "step": 1516500 + }, + { + "epoch": 7.52, + "learning_rate": 4.6243496182676636e-05, + "loss": 2.5144, + "step": 1517000 + }, + { + "epoch": 7.52, + "learning_rate": 4.624225759625055e-05, + "loss": 2.4776, + "step": 1517500 + }, + { + "epoch": 7.52, + "learning_rate": 4.624101900982447e-05, + "loss": 2.5141, + "step": 1518000 + }, + { + "epoch": 7.52, + "learning_rate": 4.6239780423398386e-05, + "loss": 2.4846, + "step": 1518500 + }, + { + "epoch": 7.53, + "learning_rate": 4.62385418369723e-05, + "loss": 2.4617, + "step": 1519000 + }, + { + "epoch": 7.53, + "learning_rate": 4.623730325054622e-05, + "loss": 2.4419, + "step": 1519500 + }, + { + "epoch": 7.53, + "learning_rate": 4.623606466412014e-05, + "loss": 2.4669, + "step": 1520000 + }, + { + "epoch": 7.53, + "learning_rate": 4.623482607769405e-05, + "loss": 2.4905, + "step": 1520500 + }, + { + "epoch": 7.54, + "learning_rate": 4.6233587491267964e-05, + "loss": 2.4972, + "step": 1521000 + }, + { + "epoch": 7.54, + "learning_rate": 4.623234890484188e-05, + "loss": 2.4975, + "step": 1521500 + }, + { + "epoch": 7.54, + "learning_rate": 4.62311103184158e-05, + "loss": 2.4753, + "step": 1522000 + }, + { + "epoch": 7.54, + "learning_rate": 4.6229871731989715e-05, + "loss": 2.4884, + "step": 1522500 + }, + { + "epoch": 7.55, + "learning_rate": 4.622863314556363e-05, + "loss": 2.4908, + "step": 1523000 + }, + { + "epoch": 7.55, + "learning_rate": 4.62273970363104e-05, + "loss": 2.4883, + "step": 1523500 + }, + { + "epoch": 7.55, + "learning_rate": 4.622615844988432e-05, + "loss": 2.48, + "step": 1524000 + }, + { + "epoch": 7.55, + "learning_rate": 4.6224919863458235e-05, + "loss": 2.4724, + "step": 1524500 + }, + { + "epoch": 7.56, + "learning_rate": 4.622368127703215e-05, + "loss": 2.4895, + "step": 1525000 + }, + { + "epoch": 7.56, + "learning_rate": 4.622244516777892e-05, + "loss": 2.4562, + "step": 1525500 + }, + { + "epoch": 7.56, + "learning_rate": 4.622120658135284e-05, + "loss": 2.4682, + "step": 1526000 + }, + { + "epoch": 7.56, + "learning_rate": 4.621996799492675e-05, + "loss": 2.4951, + "step": 1526500 + }, + { + "epoch": 7.57, + "learning_rate": 4.6218729408500664e-05, + "loss": 2.4807, + "step": 1527000 + }, + { + "epoch": 7.57, + "learning_rate": 4.621749082207458e-05, + "loss": 2.51, + "step": 1527500 + }, + { + "epoch": 7.57, + "learning_rate": 4.621625471282136e-05, + "loss": 2.465, + "step": 1528000 + }, + { + "epoch": 7.57, + "learning_rate": 4.621501860356812e-05, + "loss": 2.4833, + "step": 1528500 + }, + { + "epoch": 7.58, + "learning_rate": 4.6213780017142036e-05, + "loss": 2.4647, + "step": 1529000 + }, + { + "epoch": 7.58, + "learning_rate": 4.621254143071595e-05, + "loss": 2.461, + "step": 1529500 + }, + { + "epoch": 7.58, + "learning_rate": 4.621130284428987e-05, + "loss": 2.4943, + "step": 1530000 + }, + { + "epoch": 7.58, + "learning_rate": 4.6210064257863787e-05, + "loss": 2.4836, + "step": 1530500 + }, + { + "epoch": 7.59, + "learning_rate": 4.6208825671437704e-05, + "loss": 2.4815, + "step": 1531000 + }, + { + "epoch": 7.59, + "learning_rate": 4.620758956218447e-05, + "loss": 2.4911, + "step": 1531500 + }, + { + "epoch": 7.59, + "learning_rate": 4.620635097575839e-05, + "loss": 2.5123, + "step": 1532000 + }, + { + "epoch": 7.59, + "learning_rate": 4.6205112389332306e-05, + "loss": 2.4855, + "step": 1532500 + }, + { + "epoch": 7.6, + "learning_rate": 4.620387380290622e-05, + "loss": 2.4682, + "step": 1533000 + }, + { + "epoch": 7.6, + "learning_rate": 4.620263521648014e-05, + "loss": 2.4504, + "step": 1533500 + }, + { + "epoch": 7.6, + "learning_rate": 4.620139663005406e-05, + "loss": 2.4983, + "step": 1534000 + }, + { + "epoch": 7.6, + "learning_rate": 4.6200158043627974e-05, + "loss": 2.444, + "step": 1534500 + }, + { + "epoch": 7.6, + "learning_rate": 4.619891945720189e-05, + "loss": 2.4729, + "step": 1535000 + }, + { + "epoch": 7.61, + "learning_rate": 4.619768087077581e-05, + "loss": 2.4705, + "step": 1535500 + }, + { + "epoch": 7.61, + "learning_rate": 4.619644476152257e-05, + "loss": 2.4669, + "step": 1536000 + }, + { + "epoch": 7.61, + "learning_rate": 4.619520617509649e-05, + "loss": 2.4638, + "step": 1536500 + }, + { + "epoch": 7.61, + "learning_rate": 4.6193967588670404e-05, + "loss": 2.4943, + "step": 1537000 + }, + { + "epoch": 7.62, + "learning_rate": 4.6192733956590024e-05, + "loss": 2.4793, + "step": 1537500 + }, + { + "epoch": 7.62, + "learning_rate": 4.619149537016394e-05, + "loss": 2.4636, + "step": 1538000 + }, + { + "epoch": 7.62, + "learning_rate": 4.619025678373786e-05, + "loss": 2.4792, + "step": 1538500 + }, + { + "epoch": 7.62, + "learning_rate": 4.6189018197311775e-05, + "loss": 2.4809, + "step": 1539000 + }, + { + "epoch": 7.63, + "learning_rate": 4.618777961088569e-05, + "loss": 2.4845, + "step": 1539500 + }, + { + "epoch": 7.63, + "learning_rate": 4.618654102445961e-05, + "loss": 2.4399, + "step": 1540000 + }, + { + "epoch": 7.63, + "learning_rate": 4.6185302438033526e-05, + "loss": 2.4847, + "step": 1540500 + }, + { + "epoch": 7.63, + "learning_rate": 4.618406385160744e-05, + "loss": 2.4646, + "step": 1541000 + }, + { + "epoch": 7.64, + "learning_rate": 4.618282526518136e-05, + "loss": 2.4743, + "step": 1541500 + }, + { + "epoch": 7.64, + "learning_rate": 4.618158667875527e-05, + "loss": 2.4826, + "step": 1542000 + }, + { + "epoch": 7.64, + "learning_rate": 4.618034809232919e-05, + "loss": 2.5099, + "step": 1542500 + }, + { + "epoch": 7.64, + "learning_rate": 4.6179109505903104e-05, + "loss": 2.478, + "step": 1543000 + }, + { + "epoch": 7.65, + "learning_rate": 4.617787091947702e-05, + "loss": 2.4752, + "step": 1543500 + }, + { + "epoch": 7.65, + "learning_rate": 4.617663481022379e-05, + "loss": 2.4662, + "step": 1544000 + }, + { + "epoch": 7.65, + "learning_rate": 4.6175396223797706e-05, + "loss": 2.48, + "step": 1544500 + }, + { + "epoch": 7.65, + "learning_rate": 4.617415763737162e-05, + "loss": 2.498, + "step": 1545000 + }, + { + "epoch": 7.66, + "learning_rate": 4.617291905094554e-05, + "loss": 2.4554, + "step": 1545500 + }, + { + "epoch": 7.66, + "learning_rate": 4.617168046451946e-05, + "loss": 2.481, + "step": 1546000 + }, + { + "epoch": 7.66, + "learning_rate": 4.6170444355266226e-05, + "loss": 2.4825, + "step": 1546500 + }, + { + "epoch": 7.66, + "learning_rate": 4.616920576884014e-05, + "loss": 2.4594, + "step": 1547000 + }, + { + "epoch": 7.67, + "learning_rate": 4.616796718241406e-05, + "loss": 2.4847, + "step": 1547500 + }, + { + "epoch": 7.67, + "learning_rate": 4.616672859598798e-05, + "loss": 2.4623, + "step": 1548000 + }, + { + "epoch": 7.67, + "learning_rate": 4.616549000956189e-05, + "loss": 2.4656, + "step": 1548500 + }, + { + "epoch": 7.67, + "learning_rate": 4.6164251423135804e-05, + "loss": 2.4649, + "step": 1549000 + }, + { + "epoch": 7.68, + "learning_rate": 4.616301531388257e-05, + "loss": 2.474, + "step": 1549500 + }, + { + "epoch": 7.68, + "learning_rate": 4.616177672745649e-05, + "loss": 2.4778, + "step": 1550000 + }, + { + "epoch": 7.68, + "learning_rate": 4.616054061820326e-05, + "loss": 2.4788, + "step": 1550500 + }, + { + "epoch": 7.68, + "learning_rate": 4.6159302031777175e-05, + "loss": 2.4836, + "step": 1551000 + }, + { + "epoch": 7.69, + "learning_rate": 4.615806344535109e-05, + "loss": 2.4977, + "step": 1551500 + }, + { + "epoch": 7.69, + "learning_rate": 4.615682733609786e-05, + "loss": 2.4888, + "step": 1552000 + }, + { + "epoch": 7.69, + "learning_rate": 4.615558874967177e-05, + "loss": 2.4875, + "step": 1552500 + }, + { + "epoch": 7.69, + "learning_rate": 4.61543551175914e-05, + "loss": 2.503, + "step": 1553000 + }, + { + "epoch": 7.7, + "learning_rate": 4.6153116531165316e-05, + "loss": 2.5001, + "step": 1553500 + }, + { + "epoch": 7.7, + "learning_rate": 4.615187794473923e-05, + "loss": 2.4949, + "step": 1554000 + }, + { + "epoch": 7.7, + "learning_rate": 4.615063935831315e-05, + "loss": 2.4934, + "step": 1554500 + }, + { + "epoch": 7.7, + "learning_rate": 4.6149400771887066e-05, + "loss": 2.4785, + "step": 1555000 + }, + { + "epoch": 7.71, + "learning_rate": 4.614816218546098e-05, + "loss": 2.4767, + "step": 1555500 + }, + { + "epoch": 7.71, + "learning_rate": 4.61469235990349e-05, + "loss": 2.4847, + "step": 1556000 + }, + { + "epoch": 7.71, + "learning_rate": 4.614568501260881e-05, + "loss": 2.5089, + "step": 1556500 + }, + { + "epoch": 7.71, + "learning_rate": 4.614444642618273e-05, + "loss": 2.4748, + "step": 1557000 + }, + { + "epoch": 7.72, + "learning_rate": 4.6143207839756644e-05, + "loss": 2.4601, + "step": 1557500 + }, + { + "epoch": 7.72, + "learning_rate": 4.614196925333056e-05, + "loss": 2.5098, + "step": 1558000 + }, + { + "epoch": 7.72, + "learning_rate": 4.614073066690447e-05, + "loss": 2.4963, + "step": 1558500 + }, + { + "epoch": 7.72, + "learning_rate": 4.613949208047839e-05, + "loss": 2.4725, + "step": 1559000 + }, + { + "epoch": 7.73, + "learning_rate": 4.6138253494052305e-05, + "loss": 2.4686, + "step": 1559500 + }, + { + "epoch": 7.73, + "learning_rate": 4.613701490762622e-05, + "loss": 2.4655, + "step": 1560000 + }, + { + "epoch": 7.73, + "learning_rate": 4.613577632120014e-05, + "loss": 2.4731, + "step": 1560500 + }, + { + "epoch": 7.73, + "learning_rate": 4.6134537734774056e-05, + "loss": 2.4864, + "step": 1561000 + }, + { + "epoch": 7.74, + "learning_rate": 4.613329914834797e-05, + "loss": 2.5237, + "step": 1561500 + }, + { + "epoch": 7.74, + "learning_rate": 4.613206056192189e-05, + "loss": 2.4653, + "step": 1562000 + }, + { + "epoch": 7.74, + "learning_rate": 4.613082197549581e-05, + "loss": 2.4584, + "step": 1562500 + }, + { + "epoch": 7.74, + "learning_rate": 4.6129583389069724e-05, + "loss": 2.4773, + "step": 1563000 + }, + { + "epoch": 7.75, + "learning_rate": 4.612834727981649e-05, + "loss": 2.4909, + "step": 1563500 + }, + { + "epoch": 7.75, + "learning_rate": 4.612711117056326e-05, + "loss": 2.4713, + "step": 1564000 + }, + { + "epoch": 7.75, + "learning_rate": 4.612587258413718e-05, + "loss": 2.4771, + "step": 1564500 + }, + { + "epoch": 7.75, + "learning_rate": 4.6124633997711095e-05, + "loss": 2.4854, + "step": 1565000 + }, + { + "epoch": 7.76, + "learning_rate": 4.6123397888457864e-05, + "loss": 2.4965, + "step": 1565500 + }, + { + "epoch": 7.76, + "learning_rate": 4.612215930203178e-05, + "loss": 2.4689, + "step": 1566000 + }, + { + "epoch": 7.76, + "learning_rate": 4.61209207156057e-05, + "loss": 2.4708, + "step": 1566500 + }, + { + "epoch": 7.76, + "learning_rate": 4.6119682129179615e-05, + "loss": 2.4845, + "step": 1567000 + }, + { + "epoch": 7.77, + "learning_rate": 4.611844354275353e-05, + "loss": 2.473, + "step": 1567500 + }, + { + "epoch": 7.77, + "learning_rate": 4.611720495632744e-05, + "loss": 2.4954, + "step": 1568000 + }, + { + "epoch": 7.77, + "learning_rate": 4.611596636990136e-05, + "loss": 2.4764, + "step": 1568500 + }, + { + "epoch": 7.77, + "learning_rate": 4.6114727783475276e-05, + "loss": 2.4517, + "step": 1569000 + }, + { + "epoch": 7.78, + "learning_rate": 4.611348919704919e-05, + "loss": 2.4814, + "step": 1569500 + }, + { + "epoch": 7.78, + "learning_rate": 4.611225061062311e-05, + "loss": 2.4804, + "step": 1570000 + }, + { + "epoch": 7.78, + "learning_rate": 4.6111012024197027e-05, + "loss": 2.4678, + "step": 1570500 + }, + { + "epoch": 7.78, + "learning_rate": 4.6109773437770943e-05, + "loss": 2.4607, + "step": 1571000 + }, + { + "epoch": 7.79, + "learning_rate": 4.610853485134486e-05, + "loss": 2.4835, + "step": 1571500 + }, + { + "epoch": 7.79, + "learning_rate": 4.610729874209162e-05, + "loss": 2.4995, + "step": 1572000 + }, + { + "epoch": 7.79, + "learning_rate": 4.610606015566554e-05, + "loss": 2.4387, + "step": 1572500 + }, + { + "epoch": 7.79, + "learning_rate": 4.6104824046412315e-05, + "loss": 2.451, + "step": 1573000 + }, + { + "epoch": 7.8, + "learning_rate": 4.610358545998623e-05, + "loss": 2.4665, + "step": 1573500 + }, + { + "epoch": 7.8, + "learning_rate": 4.610234687356015e-05, + "loss": 2.4839, + "step": 1574000 + }, + { + "epoch": 7.8, + "learning_rate": 4.610110828713406e-05, + "loss": 2.4874, + "step": 1574500 + }, + { + "epoch": 7.8, + "learning_rate": 4.6099869700707976e-05, + "loss": 2.4841, + "step": 1575000 + }, + { + "epoch": 7.81, + "learning_rate": 4.609863111428189e-05, + "loss": 2.4755, + "step": 1575500 + }, + { + "epoch": 7.81, + "learning_rate": 4.609739252785581e-05, + "loss": 2.4554, + "step": 1576000 + }, + { + "epoch": 7.81, + "learning_rate": 4.609615394142973e-05, + "loss": 2.4956, + "step": 1576500 + }, + { + "epoch": 7.81, + "learning_rate": 4.6094915355003644e-05, + "loss": 2.4772, + "step": 1577000 + }, + { + "epoch": 7.82, + "learning_rate": 4.609367676857756e-05, + "loss": 2.4684, + "step": 1577500 + }, + { + "epoch": 7.82, + "learning_rate": 4.609244065932433e-05, + "loss": 2.4815, + "step": 1578000 + }, + { + "epoch": 7.82, + "learning_rate": 4.6091202072898246e-05, + "loss": 2.4774, + "step": 1578500 + }, + { + "epoch": 7.82, + "learning_rate": 4.6089963486472156e-05, + "loss": 2.4714, + "step": 1579000 + }, + { + "epoch": 7.83, + "learning_rate": 4.608872490004607e-05, + "loss": 2.4645, + "step": 1579500 + }, + { + "epoch": 7.83, + "learning_rate": 4.608748631361999e-05, + "loss": 2.485, + "step": 1580000 + }, + { + "epoch": 7.83, + "learning_rate": 4.608624772719391e-05, + "loss": 2.4805, + "step": 1580500 + }, + { + "epoch": 7.83, + "learning_rate": 4.6085009140767824e-05, + "loss": 2.4852, + "step": 1581000 + }, + { + "epoch": 7.84, + "learning_rate": 4.608377055434174e-05, + "loss": 2.4744, + "step": 1581500 + }, + { + "epoch": 7.84, + "learning_rate": 4.608253196791566e-05, + "loss": 2.4791, + "step": 1582000 + }, + { + "epoch": 7.84, + "learning_rate": 4.608129585866243e-05, + "loss": 2.4555, + "step": 1582500 + }, + { + "epoch": 7.84, + "learning_rate": 4.60800597494092e-05, + "loss": 2.4637, + "step": 1583000 + }, + { + "epoch": 7.85, + "learning_rate": 4.607882116298311e-05, + "loss": 2.4863, + "step": 1583500 + }, + { + "epoch": 7.85, + "learning_rate": 4.607758257655703e-05, + "loss": 2.4752, + "step": 1584000 + }, + { + "epoch": 7.85, + "learning_rate": 4.6076343990130946e-05, + "loss": 2.4594, + "step": 1584500 + }, + { + "epoch": 7.85, + "learning_rate": 4.6075107880877715e-05, + "loss": 2.4782, + "step": 1585000 + }, + { + "epoch": 7.86, + "learning_rate": 4.607386929445163e-05, + "loss": 2.4713, + "step": 1585500 + }, + { + "epoch": 7.86, + "learning_rate": 4.607263070802555e-05, + "loss": 2.4576, + "step": 1586000 + }, + { + "epoch": 7.86, + "learning_rate": 4.607139459877232e-05, + "loss": 2.4664, + "step": 1586500 + }, + { + "epoch": 7.86, + "learning_rate": 4.6070156012346235e-05, + "loss": 2.4834, + "step": 1587000 + }, + { + "epoch": 7.87, + "learning_rate": 4.606891742592015e-05, + "loss": 2.5103, + "step": 1587500 + }, + { + "epoch": 7.87, + "learning_rate": 4.6067681316666914e-05, + "loss": 2.4813, + "step": 1588000 + }, + { + "epoch": 7.87, + "learning_rate": 4.606644273024083e-05, + "loss": 2.4637, + "step": 1588500 + }, + { + "epoch": 7.87, + "learning_rate": 4.606520414381475e-05, + "loss": 2.4753, + "step": 1589000 + }, + { + "epoch": 7.87, + "learning_rate": 4.6063965557388665e-05, + "loss": 2.4819, + "step": 1589500 + }, + { + "epoch": 7.88, + "learning_rate": 4.606272697096258e-05, + "loss": 2.4511, + "step": 1590000 + }, + { + "epoch": 7.88, + "learning_rate": 4.60614883845365e-05, + "loss": 2.4717, + "step": 1590500 + }, + { + "epoch": 7.88, + "learning_rate": 4.6060249798110415e-05, + "loss": 2.489, + "step": 1591000 + }, + { + "epoch": 7.88, + "learning_rate": 4.605901121168433e-05, + "loss": 2.4643, + "step": 1591500 + }, + { + "epoch": 7.89, + "learning_rate": 4.605777262525825e-05, + "loss": 2.4807, + "step": 1592000 + }, + { + "epoch": 7.89, + "learning_rate": 4.605653651600502e-05, + "loss": 2.474, + "step": 1592500 + }, + { + "epoch": 7.89, + "learning_rate": 4.6055297929578935e-05, + "loss": 2.4841, + "step": 1593000 + }, + { + "epoch": 7.89, + "learning_rate": 4.605405934315285e-05, + "loss": 2.4805, + "step": 1593500 + }, + { + "epoch": 7.9, + "learning_rate": 4.605282075672677e-05, + "loss": 2.4747, + "step": 1594000 + }, + { + "epoch": 7.9, + "learning_rate": 4.6051582170300686e-05, + "loss": 2.4757, + "step": 1594500 + }, + { + "epoch": 7.9, + "learning_rate": 4.60503435838746e-05, + "loss": 2.4795, + "step": 1595000 + }, + { + "epoch": 7.9, + "learning_rate": 4.604910499744852e-05, + "loss": 2.4725, + "step": 1595500 + }, + { + "epoch": 7.91, + "learning_rate": 4.604786641102243e-05, + "loss": 2.4809, + "step": 1596000 + }, + { + "epoch": 7.91, + "learning_rate": 4.604662782459635e-05, + "loss": 2.4966, + "step": 1596500 + }, + { + "epoch": 7.91, + "learning_rate": 4.6045389238170264e-05, + "loss": 2.4485, + "step": 1597000 + }, + { + "epoch": 7.91, + "learning_rate": 4.604415065174418e-05, + "loss": 2.4603, + "step": 1597500 + }, + { + "epoch": 7.92, + "learning_rate": 4.60429120653181e-05, + "loss": 2.4999, + "step": 1598000 + }, + { + "epoch": 7.92, + "learning_rate": 4.6041673478892014e-05, + "loss": 2.4641, + "step": 1598500 + }, + { + "epoch": 7.92, + "learning_rate": 4.604043736963878e-05, + "loss": 2.473, + "step": 1599000 + }, + { + "epoch": 7.92, + "learning_rate": 4.603919878321269e-05, + "loss": 2.4771, + "step": 1599500 + }, + { + "epoch": 7.93, + "learning_rate": 4.603796267395947e-05, + "loss": 2.4965, + "step": 1600000 + }, + { + "epoch": 7.93, + "learning_rate": 4.6036724087533386e-05, + "loss": 2.4797, + "step": 1600500 + }, + { + "epoch": 7.93, + "learning_rate": 4.60354855011073e-05, + "loss": 2.4846, + "step": 1601000 + }, + { + "epoch": 7.93, + "learning_rate": 4.603424691468122e-05, + "loss": 2.4528, + "step": 1601500 + }, + { + "epoch": 7.94, + "learning_rate": 4.6033008328255137e-05, + "loss": 2.4811, + "step": 1602000 + }, + { + "epoch": 7.94, + "learning_rate": 4.603176974182905e-05, + "loss": 2.4997, + "step": 1602500 + }, + { + "epoch": 7.94, + "learning_rate": 4.6030533632575816e-05, + "loss": 2.4839, + "step": 1603000 + }, + { + "epoch": 7.94, + "learning_rate": 4.602929504614973e-05, + "loss": 2.4663, + "step": 1603500 + }, + { + "epoch": 7.95, + "learning_rate": 4.60280589368965e-05, + "loss": 2.4979, + "step": 1604000 + }, + { + "epoch": 7.95, + "learning_rate": 4.602682035047042e-05, + "loss": 2.4584, + "step": 1604500 + }, + { + "epoch": 7.95, + "learning_rate": 4.6025581764044335e-05, + "loss": 2.4736, + "step": 1605000 + }, + { + "epoch": 7.95, + "learning_rate": 4.602434317761825e-05, + "loss": 2.4942, + "step": 1605500 + }, + { + "epoch": 7.96, + "learning_rate": 4.602310459119217e-05, + "loss": 2.4717, + "step": 1606000 + }, + { + "epoch": 7.96, + "learning_rate": 4.6021866004766086e-05, + "loss": 2.4614, + "step": 1606500 + }, + { + "epoch": 7.96, + "learning_rate": 4.602062741834e-05, + "loss": 2.4844, + "step": 1607000 + }, + { + "epoch": 7.96, + "learning_rate": 4.601938883191392e-05, + "loss": 2.469, + "step": 1607500 + }, + { + "epoch": 7.97, + "learning_rate": 4.601815272266068e-05, + "loss": 2.48, + "step": 1608000 + }, + { + "epoch": 7.97, + "learning_rate": 4.60169141362346e-05, + "loss": 2.4687, + "step": 1608500 + }, + { + "epoch": 7.97, + "learning_rate": 4.6015675549808516e-05, + "loss": 2.4747, + "step": 1609000 + }, + { + "epoch": 7.97, + "learning_rate": 4.601443696338243e-05, + "loss": 2.4615, + "step": 1609500 + }, + { + "epoch": 7.98, + "learning_rate": 4.601319837695635e-05, + "loss": 2.5101, + "step": 1610000 + }, + { + "epoch": 7.98, + "learning_rate": 4.6011959790530266e-05, + "loss": 2.4775, + "step": 1610500 + }, + { + "epoch": 7.98, + "learning_rate": 4.6010723681277035e-05, + "loss": 2.4492, + "step": 1611000 + }, + { + "epoch": 7.98, + "learning_rate": 4.600948509485095e-05, + "loss": 2.49, + "step": 1611500 + }, + { + "epoch": 7.99, + "learning_rate": 4.600824650842487e-05, + "loss": 2.4681, + "step": 1612000 + }, + { + "epoch": 7.99, + "learning_rate": 4.6007007921998786e-05, + "loss": 2.4747, + "step": 1612500 + }, + { + "epoch": 7.99, + "learning_rate": 4.60057693355727e-05, + "loss": 2.4882, + "step": 1613000 + }, + { + "epoch": 7.99, + "learning_rate": 4.600453074914662e-05, + "loss": 2.4759, + "step": 1613500 + }, + { + "epoch": 8.0, + "learning_rate": 4.600329216272054e-05, + "loss": 2.4905, + "step": 1614000 + }, + { + "epoch": 8.0, + "learning_rate": 4.6002053576294454e-05, + "loss": 2.4694, + "step": 1614500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6397272831364085, + "eval_accuracy_mlm": 0.59286278093972, + "eval_accuracy_nsp": 0.8609541141909092, + "eval_loss": 2.4369935989379883, + "eval_runtime": 147.379, + "eval_samples_per_second": 1729.955, + "eval_steps_per_second": 72.086, + "step": 1614744 + }, + { + "epoch": 8.0, + "learning_rate": 4.6000814989868364e-05, + "loss": 2.4571, + "step": 1615000 + }, + { + "epoch": 8.0, + "learning_rate": 4.599957640344228e-05, + "loss": 2.4342, + "step": 1615500 + }, + { + "epoch": 8.01, + "learning_rate": 4.599834029418905e-05, + "loss": 2.4282, + "step": 1616000 + }, + { + "epoch": 8.01, + "learning_rate": 4.599710418493582e-05, + "loss": 2.4413, + "step": 1616500 + }, + { + "epoch": 8.01, + "learning_rate": 4.5995865598509735e-05, + "loss": 2.4362, + "step": 1617000 + }, + { + "epoch": 8.01, + "learning_rate": 4.599462701208365e-05, + "loss": 2.4393, + "step": 1617500 + }, + { + "epoch": 8.02, + "learning_rate": 4.599338842565757e-05, + "loss": 2.4598, + "step": 1618000 + }, + { + "epoch": 8.02, + "learning_rate": 4.599215231640434e-05, + "loss": 2.4666, + "step": 1618500 + }, + { + "epoch": 8.02, + "learning_rate": 4.5990913729978255e-05, + "loss": 2.4484, + "step": 1619000 + }, + { + "epoch": 8.02, + "learning_rate": 4.598967514355217e-05, + "loss": 2.4482, + "step": 1619500 + }, + { + "epoch": 8.03, + "learning_rate": 4.598843655712609e-05, + "loss": 2.4395, + "step": 1620000 + }, + { + "epoch": 8.03, + "learning_rate": 4.598720044787285e-05, + "loss": 2.4311, + "step": 1620500 + }, + { + "epoch": 8.03, + "learning_rate": 4.598596186144677e-05, + "loss": 2.438, + "step": 1621000 + }, + { + "epoch": 8.03, + "learning_rate": 4.5984723275020685e-05, + "loss": 2.4487, + "step": 1621500 + }, + { + "epoch": 8.04, + "learning_rate": 4.59834846885946e-05, + "loss": 2.4532, + "step": 1622000 + }, + { + "epoch": 8.04, + "learning_rate": 4.598224610216852e-05, + "loss": 2.4735, + "step": 1622500 + }, + { + "epoch": 8.04, + "learning_rate": 4.5981007515742436e-05, + "loss": 2.4447, + "step": 1623000 + }, + { + "epoch": 8.04, + "learning_rate": 4.597976892931635e-05, + "loss": 2.4488, + "step": 1623500 + }, + { + "epoch": 8.05, + "learning_rate": 4.597853034289027e-05, + "loss": 2.437, + "step": 1624000 + }, + { + "epoch": 8.05, + "learning_rate": 4.597729423363704e-05, + "loss": 2.4128, + "step": 1624500 + }, + { + "epoch": 8.05, + "learning_rate": 4.5976055647210955e-05, + "loss": 2.4574, + "step": 1625000 + }, + { + "epoch": 8.05, + "learning_rate": 4.5974822015130576e-05, + "loss": 2.4699, + "step": 1625500 + }, + { + "epoch": 8.06, + "learning_rate": 4.597358342870449e-05, + "loss": 2.4535, + "step": 1626000 + }, + { + "epoch": 8.06, + "learning_rate": 4.597234484227841e-05, + "loss": 2.463, + "step": 1626500 + }, + { + "epoch": 8.06, + "learning_rate": 4.5971106255852327e-05, + "loss": 2.477, + "step": 1627000 + }, + { + "epoch": 8.06, + "learning_rate": 4.5969867669426244e-05, + "loss": 2.4738, + "step": 1627500 + }, + { + "epoch": 8.07, + "learning_rate": 4.5968629083000154e-05, + "loss": 2.4388, + "step": 1628000 + }, + { + "epoch": 8.07, + "learning_rate": 4.596739049657407e-05, + "loss": 2.4269, + "step": 1628500 + }, + { + "epoch": 8.07, + "learning_rate": 4.596615191014799e-05, + "loss": 2.453, + "step": 1629000 + }, + { + "epoch": 8.07, + "learning_rate": 4.5964913323721904e-05, + "loss": 2.4559, + "step": 1629500 + }, + { + "epoch": 8.08, + "learning_rate": 4.596367473729582e-05, + "loss": 2.4375, + "step": 1630000 + }, + { + "epoch": 8.08, + "learning_rate": 4.596243615086974e-05, + "loss": 2.4509, + "step": 1630500 + }, + { + "epoch": 8.08, + "learning_rate": 4.5961197564443655e-05, + "loss": 2.4442, + "step": 1631000 + }, + { + "epoch": 8.08, + "learning_rate": 4.595995897801757e-05, + "loss": 2.4474, + "step": 1631500 + }, + { + "epoch": 8.09, + "learning_rate": 4.595872039159149e-05, + "loss": 2.4552, + "step": 1632000 + }, + { + "epoch": 8.09, + "learning_rate": 4.5957481805165406e-05, + "loss": 2.4695, + "step": 1632500 + }, + { + "epoch": 8.09, + "learning_rate": 4.595624321873932e-05, + "loss": 2.4425, + "step": 1633000 + }, + { + "epoch": 8.09, + "learning_rate": 4.595500463231324e-05, + "loss": 2.4627, + "step": 1633500 + }, + { + "epoch": 8.1, + "learning_rate": 4.595376852306e-05, + "loss": 2.4539, + "step": 1634000 + }, + { + "epoch": 8.1, + "learning_rate": 4.595252993663392e-05, + "loss": 2.4307, + "step": 1634500 + }, + { + "epoch": 8.1, + "learning_rate": 4.5951291350207836e-05, + "loss": 2.446, + "step": 1635000 + }, + { + "epoch": 8.1, + "learning_rate": 4.595005276378175e-05, + "loss": 2.4706, + "step": 1635500 + }, + { + "epoch": 8.11, + "learning_rate": 4.594881417735567e-05, + "loss": 2.4612, + "step": 1636000 + }, + { + "epoch": 8.11, + "learning_rate": 4.594757806810244e-05, + "loss": 2.4764, + "step": 1636500 + }, + { + "epoch": 8.11, + "learning_rate": 4.5946339481676355e-05, + "loss": 2.4155, + "step": 1637000 + }, + { + "epoch": 8.11, + "learning_rate": 4.594510337242312e-05, + "loss": 2.4596, + "step": 1637500 + }, + { + "epoch": 8.12, + "learning_rate": 4.5943864785997034e-05, + "loss": 2.4651, + "step": 1638000 + }, + { + "epoch": 8.12, + "learning_rate": 4.594262619957095e-05, + "loss": 2.4594, + "step": 1638500 + }, + { + "epoch": 8.12, + "learning_rate": 4.594138761314487e-05, + "loss": 2.4223, + "step": 1639000 + }, + { + "epoch": 8.12, + "learning_rate": 4.5940149026718785e-05, + "loss": 2.4587, + "step": 1639500 + }, + { + "epoch": 8.13, + "learning_rate": 4.59389104402927e-05, + "loss": 2.4586, + "step": 1640000 + }, + { + "epoch": 8.13, + "learning_rate": 4.593767185386662e-05, + "loss": 2.4567, + "step": 1640500 + }, + { + "epoch": 8.13, + "learning_rate": 4.5936433267440536e-05, + "loss": 2.4287, + "step": 1641000 + }, + { + "epoch": 8.13, + "learning_rate": 4.593519468101445e-05, + "loss": 2.4658, + "step": 1641500 + }, + { + "epoch": 8.14, + "learning_rate": 4.593395609458837e-05, + "loss": 2.4436, + "step": 1642000 + }, + { + "epoch": 8.14, + "learning_rate": 4.593271750816229e-05, + "loss": 2.4596, + "step": 1642500 + }, + { + "epoch": 8.14, + "learning_rate": 4.5931478921736204e-05, + "loss": 2.4541, + "step": 1643000 + }, + { + "epoch": 8.14, + "learning_rate": 4.593024281248297e-05, + "loss": 2.4404, + "step": 1643500 + }, + { + "epoch": 8.14, + "learning_rate": 4.592900422605689e-05, + "loss": 2.4754, + "step": 1644000 + }, + { + "epoch": 8.15, + "learning_rate": 4.592776811680365e-05, + "loss": 2.4406, + "step": 1644500 + }, + { + "epoch": 8.15, + "learning_rate": 4.592652953037757e-05, + "loss": 2.4571, + "step": 1645000 + }, + { + "epoch": 8.15, + "learning_rate": 4.5925290943951485e-05, + "loss": 2.4375, + "step": 1645500 + }, + { + "epoch": 8.15, + "learning_rate": 4.59240523575254e-05, + "loss": 2.4535, + "step": 1646000 + }, + { + "epoch": 8.16, + "learning_rate": 4.592281377109932e-05, + "loss": 2.4388, + "step": 1646500 + }, + { + "epoch": 8.16, + "learning_rate": 4.5921575184673236e-05, + "loss": 2.4679, + "step": 1647000 + }, + { + "epoch": 8.16, + "learning_rate": 4.592033659824715e-05, + "loss": 2.443, + "step": 1647500 + }, + { + "epoch": 8.16, + "learning_rate": 4.591909801182107e-05, + "loss": 2.449, + "step": 1648000 + }, + { + "epoch": 8.17, + "learning_rate": 4.591786190256784e-05, + "loss": 2.463, + "step": 1648500 + }, + { + "epoch": 8.17, + "learning_rate": 4.5916623316141756e-05, + "loss": 2.4809, + "step": 1649000 + }, + { + "epoch": 8.17, + "learning_rate": 4.591538472971567e-05, + "loss": 2.4569, + "step": 1649500 + }, + { + "epoch": 8.17, + "learning_rate": 4.591414614328959e-05, + "loss": 2.4376, + "step": 1650000 + }, + { + "epoch": 8.18, + "learning_rate": 4.5912907556863506e-05, + "loss": 2.4565, + "step": 1650500 + }, + { + "epoch": 8.18, + "learning_rate": 4.591166897043742e-05, + "loss": 2.448, + "step": 1651000 + }, + { + "epoch": 8.18, + "learning_rate": 4.591043038401134e-05, + "loss": 2.4753, + "step": 1651500 + }, + { + "epoch": 8.18, + "learning_rate": 4.590919179758526e-05, + "loss": 2.4302, + "step": 1652000 + }, + { + "epoch": 8.19, + "learning_rate": 4.5907953211159174e-05, + "loss": 2.4589, + "step": 1652500 + }, + { + "epoch": 8.19, + "learning_rate": 4.590671462473309e-05, + "loss": 2.454, + "step": 1653000 + }, + { + "epoch": 8.19, + "learning_rate": 4.590547603830701e-05, + "loss": 2.442, + "step": 1653500 + }, + { + "epoch": 8.19, + "learning_rate": 4.590423992905377e-05, + "loss": 2.4509, + "step": 1654000 + }, + { + "epoch": 8.2, + "learning_rate": 4.590300134262769e-05, + "loss": 2.4578, + "step": 1654500 + }, + { + "epoch": 8.2, + "learning_rate": 4.5901765233374456e-05, + "loss": 2.4688, + "step": 1655000 + }, + { + "epoch": 8.2, + "learning_rate": 4.590052664694837e-05, + "loss": 2.4823, + "step": 1655500 + }, + { + "epoch": 8.2, + "learning_rate": 4.589928806052229e-05, + "loss": 2.4619, + "step": 1656000 + }, + { + "epoch": 8.21, + "learning_rate": 4.5898049474096207e-05, + "loss": 2.443, + "step": 1656500 + }, + { + "epoch": 8.21, + "learning_rate": 4.5896810887670123e-05, + "loss": 2.4455, + "step": 1657000 + }, + { + "epoch": 8.21, + "learning_rate": 4.5895574778416885e-05, + "loss": 2.4619, + "step": 1657500 + }, + { + "epoch": 8.21, + "learning_rate": 4.589433866916366e-05, + "loss": 2.4513, + "step": 1658000 + }, + { + "epoch": 8.22, + "learning_rate": 4.589310008273758e-05, + "loss": 2.4317, + "step": 1658500 + }, + { + "epoch": 8.22, + "learning_rate": 4.5891861496311495e-05, + "loss": 2.4583, + "step": 1659000 + }, + { + "epoch": 8.22, + "learning_rate": 4.5890625387058264e-05, + "loss": 2.4789, + "step": 1659500 + }, + { + "epoch": 8.22, + "learning_rate": 4.588938680063218e-05, + "loss": 2.4496, + "step": 1660000 + }, + { + "epoch": 8.23, + "learning_rate": 4.58881482142061e-05, + "loss": 2.4006, + "step": 1660500 + }, + { + "epoch": 8.23, + "learning_rate": 4.5886909627780015e-05, + "loss": 2.4678, + "step": 1661000 + }, + { + "epoch": 8.23, + "learning_rate": 4.588567104135393e-05, + "loss": 2.4779, + "step": 1661500 + }, + { + "epoch": 8.23, + "learning_rate": 4.588443245492784e-05, + "loss": 2.4671, + "step": 1662000 + }, + { + "epoch": 8.24, + "learning_rate": 4.588319386850176e-05, + "loss": 2.457, + "step": 1662500 + }, + { + "epoch": 8.24, + "learning_rate": 4.5881955282075675e-05, + "loss": 2.466, + "step": 1663000 + }, + { + "epoch": 8.24, + "learning_rate": 4.588071669564959e-05, + "loss": 2.4845, + "step": 1663500 + }, + { + "epoch": 8.24, + "learning_rate": 4.587947810922351e-05, + "loss": 2.4621, + "step": 1664000 + }, + { + "epoch": 8.25, + "learning_rate": 4.587823952279742e-05, + "loss": 2.455, + "step": 1664500 + }, + { + "epoch": 8.25, + "learning_rate": 4.5877000936371336e-05, + "loss": 2.4329, + "step": 1665000 + }, + { + "epoch": 8.25, + "learning_rate": 4.587576234994525e-05, + "loss": 2.4424, + "step": 1665500 + }, + { + "epoch": 8.25, + "learning_rate": 4.587452376351917e-05, + "loss": 2.4667, + "step": 1666000 + }, + { + "epoch": 8.26, + "learning_rate": 4.587328765426594e-05, + "loss": 2.4531, + "step": 1666500 + }, + { + "epoch": 8.26, + "learning_rate": 4.5872049067839856e-05, + "loss": 2.4527, + "step": 1667000 + }, + { + "epoch": 8.26, + "learning_rate": 4.587081048141377e-05, + "loss": 2.4457, + "step": 1667500 + }, + { + "epoch": 8.26, + "learning_rate": 4.586957189498769e-05, + "loss": 2.4557, + "step": 1668000 + }, + { + "epoch": 8.27, + "learning_rate": 4.586833330856161e-05, + "loss": 2.4627, + "step": 1668500 + }, + { + "epoch": 8.27, + "learning_rate": 4.5867094722135524e-05, + "loss": 2.4349, + "step": 1669000 + }, + { + "epoch": 8.27, + "learning_rate": 4.586585613570944e-05, + "loss": 2.4301, + "step": 1669500 + }, + { + "epoch": 8.27, + "learning_rate": 4.586461754928336e-05, + "loss": 2.4527, + "step": 1670000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5863378962857274e-05, + "loss": 2.4991, + "step": 1670500 + }, + { + "epoch": 8.28, + "learning_rate": 4.5862142853604037e-05, + "loss": 2.4676, + "step": 1671000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5860904267177953e-05, + "loss": 2.4682, + "step": 1671500 + }, + { + "epoch": 8.28, + "learning_rate": 4.585966568075187e-05, + "loss": 2.4547, + "step": 1672000 + }, + { + "epoch": 8.29, + "learning_rate": 4.585842709432579e-05, + "loss": 2.454, + "step": 1672500 + }, + { + "epoch": 8.29, + "learning_rate": 4.5857190985072556e-05, + "loss": 2.4367, + "step": 1673000 + }, + { + "epoch": 8.29, + "learning_rate": 4.585595239864647e-05, + "loss": 2.4829, + "step": 1673500 + }, + { + "epoch": 8.29, + "learning_rate": 4.585471381222039e-05, + "loss": 2.4399, + "step": 1674000 + }, + { + "epoch": 8.3, + "learning_rate": 4.585347522579431e-05, + "loss": 2.4632, + "step": 1674500 + }, + { + "epoch": 8.3, + "learning_rate": 4.5852236639368224e-05, + "loss": 2.4526, + "step": 1675000 + }, + { + "epoch": 8.3, + "learning_rate": 4.585099805294214e-05, + "loss": 2.4805, + "step": 1675500 + }, + { + "epoch": 8.3, + "learning_rate": 4.584976194368891e-05, + "loss": 2.4633, + "step": 1676000 + }, + { + "epoch": 8.31, + "learning_rate": 4.5848523357262826e-05, + "loss": 2.4558, + "step": 1676500 + }, + { + "epoch": 8.31, + "learning_rate": 4.5847284770836743e-05, + "loss": 2.4494, + "step": 1677000 + }, + { + "epoch": 8.31, + "learning_rate": 4.584604618441066e-05, + "loss": 2.4412, + "step": 1677500 + }, + { + "epoch": 8.31, + "learning_rate": 4.584481255233028e-05, + "loss": 2.4841, + "step": 1678000 + }, + { + "epoch": 8.32, + "learning_rate": 4.58435739659042e-05, + "loss": 2.4545, + "step": 1678500 + }, + { + "epoch": 8.32, + "learning_rate": 4.5842335379478115e-05, + "loss": 2.4488, + "step": 1679000 + }, + { + "epoch": 8.32, + "learning_rate": 4.584109679305203e-05, + "loss": 2.454, + "step": 1679500 + }, + { + "epoch": 8.32, + "learning_rate": 4.583985820662595e-05, + "loss": 2.4556, + "step": 1680000 + }, + { + "epoch": 8.33, + "learning_rate": 4.5838619620199866e-05, + "loss": 2.4793, + "step": 1680500 + }, + { + "epoch": 8.33, + "learning_rate": 4.5837381033773776e-05, + "loss": 2.4807, + "step": 1681000 + }, + { + "epoch": 8.33, + "learning_rate": 4.583614244734769e-05, + "loss": 2.4468, + "step": 1681500 + }, + { + "epoch": 8.33, + "learning_rate": 4.583490386092161e-05, + "loss": 2.4587, + "step": 1682000 + }, + { + "epoch": 8.34, + "learning_rate": 4.583366775166838e-05, + "loss": 2.4789, + "step": 1682500 + }, + { + "epoch": 8.34, + "learning_rate": 4.583243164241515e-05, + "loss": 2.4712, + "step": 1683000 + }, + { + "epoch": 8.34, + "learning_rate": 4.5831193055989064e-05, + "loss": 2.4732, + "step": 1683500 + }, + { + "epoch": 8.34, + "learning_rate": 4.582995446956298e-05, + "loss": 2.4527, + "step": 1684000 + }, + { + "epoch": 8.35, + "learning_rate": 4.582871836030975e-05, + "loss": 2.4415, + "step": 1684500 + }, + { + "epoch": 8.35, + "learning_rate": 4.582747977388367e-05, + "loss": 2.4473, + "step": 1685000 + }, + { + "epoch": 8.35, + "learning_rate": 4.582624118745758e-05, + "loss": 2.4513, + "step": 1685500 + }, + { + "epoch": 8.35, + "learning_rate": 4.5825002601031494e-05, + "loss": 2.4484, + "step": 1686000 + }, + { + "epoch": 8.36, + "learning_rate": 4.582376649177826e-05, + "loss": 2.454, + "step": 1686500 + }, + { + "epoch": 8.36, + "learning_rate": 4.582252790535218e-05, + "loss": 2.4445, + "step": 1687000 + }, + { + "epoch": 8.36, + "learning_rate": 4.58212893189261e-05, + "loss": 2.4324, + "step": 1687500 + }, + { + "epoch": 8.36, + "learning_rate": 4.5820050732500014e-05, + "loss": 2.4387, + "step": 1688000 + }, + { + "epoch": 8.37, + "learning_rate": 4.581881214607393e-05, + "loss": 2.4302, + "step": 1688500 + }, + { + "epoch": 8.37, + "learning_rate": 4.581757355964785e-05, + "loss": 2.4632, + "step": 1689000 + }, + { + "epoch": 8.37, + "learning_rate": 4.5816334973221764e-05, + "loss": 2.4692, + "step": 1689500 + }, + { + "epoch": 8.37, + "learning_rate": 4.581509638679568e-05, + "loss": 2.4576, + "step": 1690000 + }, + { + "epoch": 8.38, + "learning_rate": 4.58138578003696e-05, + "loss": 2.454, + "step": 1690500 + }, + { + "epoch": 8.38, + "learning_rate": 4.5812619213943515e-05, + "loss": 2.4575, + "step": 1691000 + }, + { + "epoch": 8.38, + "learning_rate": 4.581138062751743e-05, + "loss": 2.4608, + "step": 1691500 + }, + { + "epoch": 8.38, + "learning_rate": 4.581014204109135e-05, + "loss": 2.4546, + "step": 1692000 + }, + { + "epoch": 8.39, + "learning_rate": 4.580890593183811e-05, + "loss": 2.4915, + "step": 1692500 + }, + { + "epoch": 8.39, + "learning_rate": 4.580766734541203e-05, + "loss": 2.4625, + "step": 1693000 + }, + { + "epoch": 8.39, + "learning_rate": 4.5806428758985945e-05, + "loss": 2.4287, + "step": 1693500 + }, + { + "epoch": 8.39, + "learning_rate": 4.580519017255986e-05, + "loss": 2.45, + "step": 1694000 + }, + { + "epoch": 8.4, + "learning_rate": 4.580395406330663e-05, + "loss": 2.4605, + "step": 1694500 + }, + { + "epoch": 8.4, + "learning_rate": 4.580271547688055e-05, + "loss": 2.4412, + "step": 1695000 + }, + { + "epoch": 8.4, + "learning_rate": 4.5801476890454464e-05, + "loss": 2.4722, + "step": 1695500 + }, + { + "epoch": 8.4, + "learning_rate": 4.580023830402838e-05, + "loss": 2.4585, + "step": 1696000 + }, + { + "epoch": 8.41, + "learning_rate": 4.57989997176023e-05, + "loss": 2.4503, + "step": 1696500 + }, + { + "epoch": 8.41, + "learning_rate": 4.579776360834907e-05, + "loss": 2.4558, + "step": 1697000 + }, + { + "epoch": 8.41, + "learning_rate": 4.5796525021922984e-05, + "loss": 2.4635, + "step": 1697500 + }, + { + "epoch": 8.41, + "learning_rate": 4.5795288912669746e-05, + "loss": 2.4488, + "step": 1698000 + }, + { + "epoch": 8.41, + "learning_rate": 4.579405032624366e-05, + "loss": 2.4235, + "step": 1698500 + }, + { + "epoch": 8.42, + "learning_rate": 4.579281173981758e-05, + "loss": 2.471, + "step": 1699000 + }, + { + "epoch": 8.42, + "learning_rate": 4.5791575630564356e-05, + "loss": 2.4608, + "step": 1699500 + }, + { + "epoch": 8.42, + "learning_rate": 4.579033704413827e-05, + "loss": 2.4558, + "step": 1700000 + }, + { + "epoch": 8.42, + "learning_rate": 4.578909845771218e-05, + "loss": 2.4506, + "step": 1700500 + }, + { + "epoch": 8.43, + "learning_rate": 4.57878598712861e-05, + "loss": 2.4858, + "step": 1701000 + }, + { + "epoch": 8.43, + "learning_rate": 4.5786621284860016e-05, + "loss": 2.4645, + "step": 1701500 + }, + { + "epoch": 8.43, + "learning_rate": 4.5785385175606785e-05, + "loss": 2.4548, + "step": 1702000 + }, + { + "epoch": 8.43, + "learning_rate": 4.57841465891807e-05, + "loss": 2.4721, + "step": 1702500 + }, + { + "epoch": 8.44, + "learning_rate": 4.578290800275462e-05, + "loss": 2.4628, + "step": 1703000 + }, + { + "epoch": 8.44, + "learning_rate": 4.578166941632853e-05, + "loss": 2.4656, + "step": 1703500 + }, + { + "epoch": 8.44, + "learning_rate": 4.5780430829902446e-05, + "loss": 2.4542, + "step": 1704000 + }, + { + "epoch": 8.44, + "learning_rate": 4.577919224347636e-05, + "loss": 2.4838, + "step": 1704500 + }, + { + "epoch": 8.45, + "learning_rate": 4.577795365705028e-05, + "loss": 2.4511, + "step": 1705000 + }, + { + "epoch": 8.45, + "learning_rate": 4.57767150706242e-05, + "loss": 2.4663, + "step": 1705500 + }, + { + "epoch": 8.45, + "learning_rate": 4.5775476484198114e-05, + "loss": 2.4657, + "step": 1706000 + }, + { + "epoch": 8.45, + "learning_rate": 4.577423789777203e-05, + "loss": 2.4396, + "step": 1706500 + }, + { + "epoch": 8.46, + "learning_rate": 4.577299931134595e-05, + "loss": 2.4544, + "step": 1707000 + }, + { + "epoch": 8.46, + "learning_rate": 4.5771760724919865e-05, + "loss": 2.4735, + "step": 1707500 + }, + { + "epoch": 8.46, + "learning_rate": 4.577052213849378e-05, + "loss": 2.4564, + "step": 1708000 + }, + { + "epoch": 8.46, + "learning_rate": 4.57692835520677e-05, + "loss": 2.4455, + "step": 1708500 + }, + { + "epoch": 8.47, + "learning_rate": 4.576804744281447e-05, + "loss": 2.4593, + "step": 1709000 + }, + { + "epoch": 8.47, + "learning_rate": 4.5766808856388384e-05, + "loss": 2.468, + "step": 1709500 + }, + { + "epoch": 8.47, + "learning_rate": 4.57655702699623e-05, + "loss": 2.4607, + "step": 1710000 + }, + { + "epoch": 8.47, + "learning_rate": 4.576433168353622e-05, + "loss": 2.4671, + "step": 1710500 + }, + { + "epoch": 8.48, + "learning_rate": 4.5763093097110135e-05, + "loss": 2.4854, + "step": 1711000 + }, + { + "epoch": 8.48, + "learning_rate": 4.576185451068405e-05, + "loss": 2.4792, + "step": 1711500 + }, + { + "epoch": 8.48, + "learning_rate": 4.576061592425797e-05, + "loss": 2.4591, + "step": 1712000 + }, + { + "epoch": 8.48, + "learning_rate": 4.575937733783188e-05, + "loss": 2.464, + "step": 1712500 + }, + { + "epoch": 8.49, + "learning_rate": 4.5758138751405796e-05, + "loss": 2.4626, + "step": 1713000 + }, + { + "epoch": 8.49, + "learning_rate": 4.575690016497971e-05, + "loss": 2.4861, + "step": 1713500 + }, + { + "epoch": 8.49, + "learning_rate": 4.5755666532899334e-05, + "loss": 2.4691, + "step": 1714000 + }, + { + "epoch": 8.49, + "learning_rate": 4.575442794647325e-05, + "loss": 2.4656, + "step": 1714500 + }, + { + "epoch": 8.5, + "learning_rate": 4.575318936004717e-05, + "loss": 2.4999, + "step": 1715000 + }, + { + "epoch": 8.5, + "learning_rate": 4.5751950773621084e-05, + "loss": 2.4376, + "step": 1715500 + }, + { + "epoch": 8.5, + "learning_rate": 4.5750712187195e-05, + "loss": 2.4519, + "step": 1716000 + }, + { + "epoch": 8.5, + "learning_rate": 4.5749476077941763e-05, + "loss": 2.4505, + "step": 1716500 + }, + { + "epoch": 8.51, + "learning_rate": 4.574823749151568e-05, + "loss": 2.4783, + "step": 1717000 + }, + { + "epoch": 8.51, + "learning_rate": 4.57469989050896e-05, + "loss": 2.437, + "step": 1717500 + }, + { + "epoch": 8.51, + "learning_rate": 4.5745760318663514e-05, + "loss": 2.4739, + "step": 1718000 + }, + { + "epoch": 8.51, + "learning_rate": 4.574452420941029e-05, + "loss": 2.4648, + "step": 1718500 + }, + { + "epoch": 8.52, + "learning_rate": 4.574328562298421e-05, + "loss": 2.4603, + "step": 1719000 + }, + { + "epoch": 8.52, + "learning_rate": 4.574204703655812e-05, + "loss": 2.4343, + "step": 1719500 + }, + { + "epoch": 8.52, + "learning_rate": 4.5740808450132034e-05, + "loss": 2.4705, + "step": 1720000 + }, + { + "epoch": 8.52, + "learning_rate": 4.573956986370595e-05, + "loss": 2.4513, + "step": 1720500 + }, + { + "epoch": 8.53, + "learning_rate": 4.573833127727987e-05, + "loss": 2.4702, + "step": 1721000 + }, + { + "epoch": 8.53, + "learning_rate": 4.5737092690853785e-05, + "loss": 2.4558, + "step": 1721500 + }, + { + "epoch": 8.53, + "learning_rate": 4.57358541044277e-05, + "loss": 2.4708, + "step": 1722000 + }, + { + "epoch": 8.53, + "learning_rate": 4.573461551800162e-05, + "loss": 2.4531, + "step": 1722500 + }, + { + "epoch": 8.54, + "learning_rate": 4.5733376931575535e-05, + "loss": 2.4685, + "step": 1723000 + }, + { + "epoch": 8.54, + "learning_rate": 4.573213834514945e-05, + "loss": 2.4622, + "step": 1723500 + }, + { + "epoch": 8.54, + "learning_rate": 4.573089975872337e-05, + "loss": 2.4663, + "step": 1724000 + }, + { + "epoch": 8.54, + "learning_rate": 4.572966364947013e-05, + "loss": 2.4865, + "step": 1724500 + }, + { + "epoch": 8.55, + "learning_rate": 4.572842506304405e-05, + "loss": 2.4635, + "step": 1725000 + }, + { + "epoch": 8.55, + "learning_rate": 4.5727186476617965e-05, + "loss": 2.4426, + "step": 1725500 + }, + { + "epoch": 8.55, + "learning_rate": 4.572594789019188e-05, + "loss": 2.4789, + "step": 1726000 + }, + { + "epoch": 8.55, + "learning_rate": 4.57247093037658e-05, + "loss": 2.4653, + "step": 1726500 + }, + { + "epoch": 8.56, + "learning_rate": 4.5723470717339716e-05, + "loss": 2.4574, + "step": 1727000 + }, + { + "epoch": 8.56, + "learning_rate": 4.572223213091363e-05, + "loss": 2.447, + "step": 1727500 + }, + { + "epoch": 8.56, + "learning_rate": 4.572099354448755e-05, + "loss": 2.4368, + "step": 1728000 + }, + { + "epoch": 8.56, + "learning_rate": 4.571975495806147e-05, + "loss": 2.4754, + "step": 1728500 + }, + { + "epoch": 8.57, + "learning_rate": 4.5718516371635384e-05, + "loss": 2.4884, + "step": 1729000 + }, + { + "epoch": 8.57, + "learning_rate": 4.57172777852093e-05, + "loss": 2.4793, + "step": 1729500 + }, + { + "epoch": 8.57, + "learning_rate": 4.571603919878322e-05, + "loss": 2.4638, + "step": 1730000 + }, + { + "epoch": 8.57, + "learning_rate": 4.5714803089529986e-05, + "loss": 2.4545, + "step": 1730500 + }, + { + "epoch": 8.58, + "learning_rate": 4.57135645031039e-05, + "loss": 2.4627, + "step": 1731000 + }, + { + "epoch": 8.58, + "learning_rate": 4.571232591667782e-05, + "loss": 2.46, + "step": 1731500 + }, + { + "epoch": 8.58, + "learning_rate": 4.571108733025174e-05, + "loss": 2.4664, + "step": 1732000 + }, + { + "epoch": 8.58, + "learning_rate": 4.570985369817135e-05, + "loss": 2.468, + "step": 1732500 + }, + { + "epoch": 8.59, + "learning_rate": 4.570861511174527e-05, + "loss": 2.4468, + "step": 1733000 + }, + { + "epoch": 8.59, + "learning_rate": 4.5707376525319185e-05, + "loss": 2.4756, + "step": 1733500 + }, + { + "epoch": 8.59, + "learning_rate": 4.57061379388931e-05, + "loss": 2.4659, + "step": 1734000 + }, + { + "epoch": 8.59, + "learning_rate": 4.570489935246702e-05, + "loss": 2.4663, + "step": 1734500 + }, + { + "epoch": 8.6, + "learning_rate": 4.570366324321379e-05, + "loss": 2.4561, + "step": 1735000 + }, + { + "epoch": 8.6, + "learning_rate": 4.5702424656787704e-05, + "loss": 2.4384, + "step": 1735500 + }, + { + "epoch": 8.6, + "learning_rate": 4.5701186070361615e-05, + "loss": 2.4622, + "step": 1736000 + }, + { + "epoch": 8.6, + "learning_rate": 4.569994748393553e-05, + "loss": 2.4654, + "step": 1736500 + }, + { + "epoch": 8.61, + "learning_rate": 4.569870889750945e-05, + "loss": 2.4673, + "step": 1737000 + }, + { + "epoch": 8.61, + "learning_rate": 4.5697472788256224e-05, + "loss": 2.4546, + "step": 1737500 + }, + { + "epoch": 8.61, + "learning_rate": 4.569623667900299e-05, + "loss": 2.4721, + "step": 1738000 + }, + { + "epoch": 8.61, + "learning_rate": 4.569499809257691e-05, + "loss": 2.4357, + "step": 1738500 + }, + { + "epoch": 8.62, + "learning_rate": 4.569375950615083e-05, + "loss": 2.4688, + "step": 1739000 + }, + { + "epoch": 8.62, + "learning_rate": 4.5692520919724744e-05, + "loss": 2.4617, + "step": 1739500 + }, + { + "epoch": 8.62, + "learning_rate": 4.569128233329866e-05, + "loss": 2.4783, + "step": 1740000 + }, + { + "epoch": 8.62, + "learning_rate": 4.569004374687257e-05, + "loss": 2.4474, + "step": 1740500 + }, + { + "epoch": 8.63, + "learning_rate": 4.568880516044649e-05, + "loss": 2.476, + "step": 1741000 + }, + { + "epoch": 8.63, + "learning_rate": 4.5687566574020405e-05, + "loss": 2.4684, + "step": 1741500 + }, + { + "epoch": 8.63, + "learning_rate": 4.568632798759432e-05, + "loss": 2.4567, + "step": 1742000 + }, + { + "epoch": 8.63, + "learning_rate": 4.568508940116824e-05, + "loss": 2.4522, + "step": 1742500 + }, + { + "epoch": 8.64, + "learning_rate": 4.568385081474215e-05, + "loss": 2.4613, + "step": 1743000 + }, + { + "epoch": 8.64, + "learning_rate": 4.5682612228316065e-05, + "loss": 2.481, + "step": 1743500 + }, + { + "epoch": 8.64, + "learning_rate": 4.568137611906284e-05, + "loss": 2.4624, + "step": 1744000 + }, + { + "epoch": 8.64, + "learning_rate": 4.568013753263675e-05, + "loss": 2.4642, + "step": 1744500 + }, + { + "epoch": 8.65, + "learning_rate": 4.567889894621067e-05, + "loss": 2.4412, + "step": 1745000 + }, + { + "epoch": 8.65, + "learning_rate": 4.5677660359784585e-05, + "loss": 2.4577, + "step": 1745500 + }, + { + "epoch": 8.65, + "learning_rate": 4.56764217733585e-05, + "loss": 2.4878, + "step": 1746000 + }, + { + "epoch": 8.65, + "learning_rate": 4.567518566410528e-05, + "loss": 2.4542, + "step": 1746500 + }, + { + "epoch": 8.66, + "learning_rate": 4.567394707767919e-05, + "loss": 2.466, + "step": 1747000 + }, + { + "epoch": 8.66, + "learning_rate": 4.5672708491253105e-05, + "loss": 2.4531, + "step": 1747500 + }, + { + "epoch": 8.66, + "learning_rate": 4.567146990482702e-05, + "loss": 2.4706, + "step": 1748000 + }, + { + "epoch": 8.66, + "learning_rate": 4.567023131840094e-05, + "loss": 2.4771, + "step": 1748500 + }, + { + "epoch": 8.67, + "learning_rate": 4.5668992731974855e-05, + "loss": 2.4667, + "step": 1749000 + }, + { + "epoch": 8.67, + "learning_rate": 4.5667754145548766e-05, + "loss": 2.4672, + "step": 1749500 + }, + { + "epoch": 8.67, + "learning_rate": 4.566651803629554e-05, + "loss": 2.4299, + "step": 1750000 + }, + { + "epoch": 8.67, + "learning_rate": 4.566527944986945e-05, + "loss": 2.4456, + "step": 1750500 + }, + { + "epoch": 8.68, + "learning_rate": 4.566404334061623e-05, + "loss": 2.4839, + "step": 1751000 + }, + { + "epoch": 8.68, + "learning_rate": 4.5662804754190144e-05, + "loss": 2.453, + "step": 1751500 + }, + { + "epoch": 8.68, + "learning_rate": 4.5661568644936906e-05, + "loss": 2.4527, + "step": 1752000 + }, + { + "epoch": 8.68, + "learning_rate": 4.566033005851082e-05, + "loss": 2.4506, + "step": 1752500 + }, + { + "epoch": 8.68, + "learning_rate": 4.565909147208474e-05, + "loss": 2.4345, + "step": 1753000 + }, + { + "epoch": 8.69, + "learning_rate": 4.565785288565866e-05, + "loss": 2.4588, + "step": 1753500 + }, + { + "epoch": 8.69, + "learning_rate": 4.5656614299232574e-05, + "loss": 2.4735, + "step": 1754000 + }, + { + "epoch": 8.69, + "learning_rate": 4.565537571280649e-05, + "loss": 2.4679, + "step": 1754500 + }, + { + "epoch": 8.69, + "learning_rate": 4.565413712638041e-05, + "loss": 2.4841, + "step": 1755000 + }, + { + "epoch": 8.7, + "learning_rate": 4.5652898539954324e-05, + "loss": 2.4871, + "step": 1755500 + }, + { + "epoch": 8.7, + "learning_rate": 4.565165995352824e-05, + "loss": 2.4499, + "step": 1756000 + }, + { + "epoch": 8.7, + "learning_rate": 4.565042136710216e-05, + "loss": 2.4511, + "step": 1756500 + }, + { + "epoch": 8.7, + "learning_rate": 4.564918278067607e-05, + "loss": 2.4915, + "step": 1757000 + }, + { + "epoch": 8.71, + "learning_rate": 4.5647944194249985e-05, + "loss": 2.4732, + "step": 1757500 + }, + { + "epoch": 8.71, + "learning_rate": 4.56467056078239e-05, + "loss": 2.4507, + "step": 1758000 + }, + { + "epoch": 8.71, + "learning_rate": 4.564546702139782e-05, + "loss": 2.4399, + "step": 1758500 + }, + { + "epoch": 8.71, + "learning_rate": 4.5644228434971736e-05, + "loss": 2.4765, + "step": 1759000 + }, + { + "epoch": 8.72, + "learning_rate": 4.564299480289136e-05, + "loss": 2.4684, + "step": 1759500 + }, + { + "epoch": 8.72, + "learning_rate": 4.5641756216465274e-05, + "loss": 2.4655, + "step": 1760000 + }, + { + "epoch": 8.72, + "learning_rate": 4.564051763003919e-05, + "loss": 2.4645, + "step": 1760500 + }, + { + "epoch": 8.72, + "learning_rate": 4.563927904361311e-05, + "loss": 2.4831, + "step": 1761000 + }, + { + "epoch": 8.73, + "learning_rate": 4.5638040457187025e-05, + "loss": 2.4761, + "step": 1761500 + }, + { + "epoch": 8.73, + "learning_rate": 4.563680187076094e-05, + "loss": 2.4824, + "step": 1762000 + }, + { + "epoch": 8.73, + "learning_rate": 4.563556328433486e-05, + "loss": 2.4461, + "step": 1762500 + }, + { + "epoch": 8.73, + "learning_rate": 4.563432469790877e-05, + "loss": 2.4469, + "step": 1763000 + }, + { + "epoch": 8.74, + "learning_rate": 4.5633086111482685e-05, + "loss": 2.4606, + "step": 1763500 + }, + { + "epoch": 8.74, + "learning_rate": 4.56318475250566e-05, + "loss": 2.4706, + "step": 1764000 + }, + { + "epoch": 8.74, + "learning_rate": 4.563061141580338e-05, + "loss": 2.4447, + "step": 1764500 + }, + { + "epoch": 8.74, + "learning_rate": 4.562937530655014e-05, + "loss": 2.4601, + "step": 1765000 + }, + { + "epoch": 8.75, + "learning_rate": 4.562813672012406e-05, + "loss": 2.4485, + "step": 1765500 + }, + { + "epoch": 8.75, + "learning_rate": 4.5626898133697974e-05, + "loss": 2.4624, + "step": 1766000 + }, + { + "epoch": 8.75, + "learning_rate": 4.562565954727189e-05, + "loss": 2.5026, + "step": 1766500 + }, + { + "epoch": 8.75, + "learning_rate": 4.562442096084581e-05, + "loss": 2.4555, + "step": 1767000 + }, + { + "epoch": 8.76, + "learning_rate": 4.5623182374419725e-05, + "loss": 2.4672, + "step": 1767500 + }, + { + "epoch": 8.76, + "learning_rate": 4.562194378799364e-05, + "loss": 2.4704, + "step": 1768000 + }, + { + "epoch": 8.76, + "learning_rate": 4.5620712633086114e-05, + "loss": 2.5025, + "step": 1768500 + }, + { + "epoch": 8.76, + "learning_rate": 4.561947404666003e-05, + "loss": 2.4781, + "step": 1769000 + }, + { + "epoch": 8.77, + "learning_rate": 4.561823546023395e-05, + "loss": 2.4675, + "step": 1769500 + }, + { + "epoch": 8.77, + "learning_rate": 4.561699687380786e-05, + "loss": 2.4574, + "step": 1770000 + }, + { + "epoch": 8.77, + "learning_rate": 4.5615758287381775e-05, + "loss": 2.4715, + "step": 1770500 + }, + { + "epoch": 8.77, + "learning_rate": 4.561451970095569e-05, + "loss": 2.4416, + "step": 1771000 + }, + { + "epoch": 8.78, + "learning_rate": 4.561328111452961e-05, + "loss": 2.4696, + "step": 1771500 + }, + { + "epoch": 8.78, + "learning_rate": 4.5612042528103526e-05, + "loss": 2.4609, + "step": 1772000 + }, + { + "epoch": 8.78, + "learning_rate": 4.561080394167744e-05, + "loss": 2.4613, + "step": 1772500 + }, + { + "epoch": 8.78, + "learning_rate": 4.560956535525136e-05, + "loss": 2.4924, + "step": 1773000 + }, + { + "epoch": 8.79, + "learning_rate": 4.560832676882528e-05, + "loss": 2.4574, + "step": 1773500 + }, + { + "epoch": 8.79, + "learning_rate": 4.5607088182399194e-05, + "loss": 2.479, + "step": 1774000 + }, + { + "epoch": 8.79, + "learning_rate": 4.560584959597311e-05, + "loss": 2.4495, + "step": 1774500 + }, + { + "epoch": 8.79, + "learning_rate": 4.560461100954703e-05, + "loss": 2.4675, + "step": 1775000 + }, + { + "epoch": 8.8, + "learning_rate": 4.5603372423120944e-05, + "loss": 2.472, + "step": 1775500 + }, + { + "epoch": 8.8, + "learning_rate": 4.560213631386771e-05, + "loss": 2.4506, + "step": 1776000 + }, + { + "epoch": 8.8, + "learning_rate": 4.560089772744163e-05, + "loss": 2.453, + "step": 1776500 + }, + { + "epoch": 8.8, + "learning_rate": 4.559965914101555e-05, + "loss": 2.4638, + "step": 1777000 + }, + { + "epoch": 8.81, + "learning_rate": 4.559842055458946e-05, + "loss": 2.4947, + "step": 1777500 + }, + { + "epoch": 8.81, + "learning_rate": 4.5597181968163374e-05, + "loss": 2.4435, + "step": 1778000 + }, + { + "epoch": 8.81, + "learning_rate": 4.559594338173729e-05, + "loss": 2.4739, + "step": 1778500 + }, + { + "epoch": 8.81, + "learning_rate": 4.559470727248406e-05, + "loss": 2.435, + "step": 1779000 + }, + { + "epoch": 8.82, + "learning_rate": 4.559346868605798e-05, + "loss": 2.4844, + "step": 1779500 + }, + { + "epoch": 8.82, + "learning_rate": 4.5592230099631894e-05, + "loss": 2.4432, + "step": 1780000 + }, + { + "epoch": 8.82, + "learning_rate": 4.559099151320581e-05, + "loss": 2.4639, + "step": 1780500 + }, + { + "epoch": 8.82, + "learning_rate": 4.558975540395258e-05, + "loss": 2.4533, + "step": 1781000 + }, + { + "epoch": 8.83, + "learning_rate": 4.558851929469935e-05, + "loss": 2.4545, + "step": 1781500 + }, + { + "epoch": 8.83, + "learning_rate": 4.5587280708273265e-05, + "loss": 2.4481, + "step": 1782000 + }, + { + "epoch": 8.83, + "learning_rate": 4.5586042121847175e-05, + "loss": 2.4613, + "step": 1782500 + }, + { + "epoch": 8.83, + "learning_rate": 4.558480353542109e-05, + "loss": 2.4775, + "step": 1783000 + }, + { + "epoch": 8.84, + "learning_rate": 4.558356494899501e-05, + "loss": 2.4433, + "step": 1783500 + }, + { + "epoch": 8.84, + "learning_rate": 4.5582326362568926e-05, + "loss": 2.4653, + "step": 1784000 + }, + { + "epoch": 8.84, + "learning_rate": 4.558108777614284e-05, + "loss": 2.4877, + "step": 1784500 + }, + { + "epoch": 8.84, + "learning_rate": 4.557984918971676e-05, + "loss": 2.4707, + "step": 1785000 + }, + { + "epoch": 8.85, + "learning_rate": 4.557861060329068e-05, + "loss": 2.4879, + "step": 1785500 + }, + { + "epoch": 8.85, + "learning_rate": 4.5577372016864594e-05, + "loss": 2.4454, + "step": 1786000 + }, + { + "epoch": 8.85, + "learning_rate": 4.557613343043851e-05, + "loss": 2.458, + "step": 1786500 + }, + { + "epoch": 8.85, + "learning_rate": 4.557489732118528e-05, + "loss": 2.4683, + "step": 1787000 + }, + { + "epoch": 8.86, + "learning_rate": 4.5573658734759196e-05, + "loss": 2.4623, + "step": 1787500 + }, + { + "epoch": 8.86, + "learning_rate": 4.5572420148333113e-05, + "loss": 2.4821, + "step": 1788000 + }, + { + "epoch": 8.86, + "learning_rate": 4.557118156190703e-05, + "loss": 2.456, + "step": 1788500 + }, + { + "epoch": 8.86, + "learning_rate": 4.556994297548095e-05, + "loss": 2.4476, + "step": 1789000 + }, + { + "epoch": 8.87, + "learning_rate": 4.5568704389054864e-05, + "loss": 2.4679, + "step": 1789500 + }, + { + "epoch": 8.87, + "learning_rate": 4.556746580262878e-05, + "loss": 2.4734, + "step": 1790000 + }, + { + "epoch": 8.87, + "learning_rate": 4.55662272162027e-05, + "loss": 2.4734, + "step": 1790500 + }, + { + "epoch": 8.87, + "learning_rate": 4.556498862977661e-05, + "loss": 2.4945, + "step": 1791000 + }, + { + "epoch": 8.88, + "learning_rate": 4.5563750043350525e-05, + "loss": 2.4307, + "step": 1791500 + }, + { + "epoch": 8.88, + "learning_rate": 4.5562513934097294e-05, + "loss": 2.4534, + "step": 1792000 + }, + { + "epoch": 8.88, + "learning_rate": 4.556127534767121e-05, + "loss": 2.4577, + "step": 1792500 + }, + { + "epoch": 8.88, + "learning_rate": 4.556003676124513e-05, + "loss": 2.4634, + "step": 1793000 + }, + { + "epoch": 8.89, + "learning_rate": 4.5558798174819045e-05, + "loss": 2.4808, + "step": 1793500 + }, + { + "epoch": 8.89, + "learning_rate": 4.555755958839296e-05, + "loss": 2.4439, + "step": 1794000 + }, + { + "epoch": 8.89, + "learning_rate": 4.555632100196688e-05, + "loss": 2.4453, + "step": 1794500 + }, + { + "epoch": 8.89, + "learning_rate": 4.555508489271365e-05, + "loss": 2.442, + "step": 1795000 + }, + { + "epoch": 8.9, + "learning_rate": 4.5553846306287564e-05, + "loss": 2.4453, + "step": 1795500 + }, + { + "epoch": 8.9, + "learning_rate": 4.555260771986148e-05, + "loss": 2.4428, + "step": 1796000 + }, + { + "epoch": 8.9, + "learning_rate": 4.555137161060824e-05, + "loss": 2.48, + "step": 1796500 + }, + { + "epoch": 8.9, + "learning_rate": 4.555013302418216e-05, + "loss": 2.4658, + "step": 1797000 + }, + { + "epoch": 8.91, + "learning_rate": 4.554889443775608e-05, + "loss": 2.4606, + "step": 1797500 + }, + { + "epoch": 8.91, + "learning_rate": 4.5547655851329994e-05, + "loss": 2.449, + "step": 1798000 + }, + { + "epoch": 8.91, + "learning_rate": 4.554641726490391e-05, + "loss": 2.4647, + "step": 1798500 + }, + { + "epoch": 8.91, + "learning_rate": 4.554517867847783e-05, + "loss": 2.4644, + "step": 1799000 + }, + { + "epoch": 8.92, + "learning_rate": 4.5543940092051745e-05, + "loss": 2.4737, + "step": 1799500 + }, + { + "epoch": 8.92, + "learning_rate": 4.554270150562566e-05, + "loss": 2.491, + "step": 1800000 + }, + { + "epoch": 8.92, + "learning_rate": 4.554146291919958e-05, + "loss": 2.4842, + "step": 1800500 + }, + { + "epoch": 8.92, + "learning_rate": 4.55402292871192e-05, + "loss": 2.4706, + "step": 1801000 + }, + { + "epoch": 8.93, + "learning_rate": 4.553899070069311e-05, + "loss": 2.4853, + "step": 1801500 + }, + { + "epoch": 8.93, + "learning_rate": 4.5537752114267026e-05, + "loss": 2.4699, + "step": 1802000 + }, + { + "epoch": 8.93, + "learning_rate": 4.5536513527840943e-05, + "loss": 2.4815, + "step": 1802500 + }, + { + "epoch": 8.93, + "learning_rate": 4.553527494141486e-05, + "loss": 2.458, + "step": 1803000 + }, + { + "epoch": 8.94, + "learning_rate": 4.553403635498878e-05, + "loss": 2.4418, + "step": 1803500 + }, + { + "epoch": 8.94, + "learning_rate": 4.5532797768562694e-05, + "loss": 2.4691, + "step": 1804000 + }, + { + "epoch": 8.94, + "learning_rate": 4.553155918213661e-05, + "loss": 2.4687, + "step": 1804500 + }, + { + "epoch": 8.94, + "learning_rate": 4.553032059571053e-05, + "loss": 2.4617, + "step": 1805000 + }, + { + "epoch": 8.95, + "learning_rate": 4.55290844864573e-05, + "loss": 2.491, + "step": 1805500 + }, + { + "epoch": 8.95, + "learning_rate": 4.5527845900031214e-05, + "loss": 2.4481, + "step": 1806000 + }, + { + "epoch": 8.95, + "learning_rate": 4.552660731360513e-05, + "loss": 2.447, + "step": 1806500 + }, + { + "epoch": 8.95, + "learning_rate": 4.552536872717905e-05, + "loss": 2.4697, + "step": 1807000 + }, + { + "epoch": 8.95, + "learning_rate": 4.5524132617925816e-05, + "loss": 2.4696, + "step": 1807500 + }, + { + "epoch": 8.96, + "learning_rate": 4.5522894031499727e-05, + "loss": 2.4883, + "step": 1808000 + }, + { + "epoch": 8.96, + "learning_rate": 4.5521655445073644e-05, + "loss": 2.429, + "step": 1808500 + }, + { + "epoch": 8.96, + "learning_rate": 4.552041685864756e-05, + "loss": 2.4636, + "step": 1809000 + }, + { + "epoch": 8.96, + "learning_rate": 4.5519180749394336e-05, + "loss": 2.4663, + "step": 1809500 + }, + { + "epoch": 8.97, + "learning_rate": 4.5517944640141105e-05, + "loss": 2.4526, + "step": 1810000 + }, + { + "epoch": 8.97, + "learning_rate": 4.551670605371502e-05, + "loss": 2.4824, + "step": 1810500 + }, + { + "epoch": 8.97, + "learning_rate": 4.551546746728894e-05, + "loss": 2.4671, + "step": 1811000 + }, + { + "epoch": 8.97, + "learning_rate": 4.5514228880862856e-05, + "loss": 2.4767, + "step": 1811500 + }, + { + "epoch": 8.98, + "learning_rate": 4.551299029443677e-05, + "loss": 2.4661, + "step": 1812000 + }, + { + "epoch": 8.98, + "learning_rate": 4.551175170801068e-05, + "loss": 2.4633, + "step": 1812500 + }, + { + "epoch": 8.98, + "learning_rate": 4.55105131215846e-05, + "loss": 2.4379, + "step": 1813000 + }, + { + "epoch": 8.98, + "learning_rate": 4.5509274535158517e-05, + "loss": 2.4667, + "step": 1813500 + }, + { + "epoch": 8.99, + "learning_rate": 4.5508035948732434e-05, + "loss": 2.4808, + "step": 1814000 + }, + { + "epoch": 8.99, + "learning_rate": 4.55067998394792e-05, + "loss": 2.4708, + "step": 1814500 + }, + { + "epoch": 8.99, + "learning_rate": 4.550556125305312e-05, + "loss": 2.4644, + "step": 1815000 + }, + { + "epoch": 8.99, + "learning_rate": 4.5504322666627036e-05, + "loss": 2.4861, + "step": 1815500 + }, + { + "epoch": 9.0, + "learning_rate": 4.550308408020095e-05, + "loss": 2.4598, + "step": 1816000 + }, + { + "epoch": 9.0, + "learning_rate": 4.550184549377487e-05, + "loss": 2.4818, + "step": 1816500 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.6403843342409302, + "eval_accuracy_mlm": 0.5934790457919371, + "eval_accuracy_nsp": 0.8615189108837107, + "eval_loss": 2.434657096862793, + "eval_runtime": 145.7954, + "eval_samples_per_second": 1748.745, + "eval_steps_per_second": 72.869, + "step": 1816587 + }, + { + "epoch": 9.0, + "learning_rate": 4.550060938452164e-05, + "loss": 2.424, + "step": 1817000 + }, + { + "epoch": 9.0, + "learning_rate": 4.5499370798095556e-05, + "loss": 2.426, + "step": 1817500 + }, + { + "epoch": 9.01, + "learning_rate": 4.549813221166947e-05, + "loss": 2.4208, + "step": 1818000 + }, + { + "epoch": 9.01, + "learning_rate": 4.549689362524339e-05, + "loss": 2.4265, + "step": 1818500 + }, + { + "epoch": 9.01, + "learning_rate": 4.549565751599015e-05, + "loss": 2.4424, + "step": 1819000 + }, + { + "epoch": 9.01, + "learning_rate": 4.549441892956407e-05, + "loss": 2.4181, + "step": 1819500 + }, + { + "epoch": 9.02, + "learning_rate": 4.5493180343137986e-05, + "loss": 2.4305, + "step": 1820000 + }, + { + "epoch": 9.02, + "learning_rate": 4.54919417567119e-05, + "loss": 2.4542, + "step": 1820500 + }, + { + "epoch": 9.02, + "learning_rate": 4.549070317028582e-05, + "loss": 2.4495, + "step": 1821000 + }, + { + "epoch": 9.02, + "learning_rate": 4.5489464583859736e-05, + "loss": 2.4497, + "step": 1821500 + }, + { + "epoch": 9.03, + "learning_rate": 4.5488228474606505e-05, + "loss": 2.442, + "step": 1822000 + }, + { + "epoch": 9.03, + "learning_rate": 4.548698988818042e-05, + "loss": 2.4593, + "step": 1822500 + }, + { + "epoch": 9.03, + "learning_rate": 4.548575130175434e-05, + "loss": 2.4229, + "step": 1823000 + }, + { + "epoch": 9.03, + "learning_rate": 4.5484512715328256e-05, + "loss": 2.4115, + "step": 1823500 + }, + { + "epoch": 9.04, + "learning_rate": 4.548327412890217e-05, + "loss": 2.4466, + "step": 1824000 + }, + { + "epoch": 9.04, + "learning_rate": 4.548203554247609e-05, + "loss": 2.4517, + "step": 1824500 + }, + { + "epoch": 9.04, + "learning_rate": 4.548079695605001e-05, + "loss": 2.4276, + "step": 1825000 + }, + { + "epoch": 9.04, + "learning_rate": 4.5479558369623924e-05, + "loss": 2.4327, + "step": 1825500 + }, + { + "epoch": 9.05, + "learning_rate": 4.5478319783197834e-05, + "loss": 2.4248, + "step": 1826000 + }, + { + "epoch": 9.05, + "learning_rate": 4.547708119677175e-05, + "loss": 2.4232, + "step": 1826500 + }, + { + "epoch": 9.05, + "learning_rate": 4.547584508751852e-05, + "loss": 2.4336, + "step": 1827000 + }, + { + "epoch": 9.05, + "learning_rate": 4.547460897826529e-05, + "loss": 2.4489, + "step": 1827500 + }, + { + "epoch": 9.06, + "learning_rate": 4.5473370391839205e-05, + "loss": 2.4391, + "step": 1828000 + }, + { + "epoch": 9.06, + "learning_rate": 4.5472134282585974e-05, + "loss": 2.4429, + "step": 1828500 + }, + { + "epoch": 9.06, + "learning_rate": 4.5470895696159884e-05, + "loss": 2.4328, + "step": 1829000 + }, + { + "epoch": 9.06, + "learning_rate": 4.54696571097338e-05, + "loss": 2.4022, + "step": 1829500 + }, + { + "epoch": 9.07, + "learning_rate": 4.546841852330772e-05, + "loss": 2.4167, + "step": 1830000 + }, + { + "epoch": 9.07, + "learning_rate": 4.5467179936881635e-05, + "loss": 2.4667, + "step": 1830500 + }, + { + "epoch": 9.07, + "learning_rate": 4.546594135045555e-05, + "loss": 2.4303, + "step": 1831000 + }, + { + "epoch": 9.07, + "learning_rate": 4.546470276402947e-05, + "loss": 2.4245, + "step": 1831500 + }, + { + "epoch": 9.08, + "learning_rate": 4.5463464177603386e-05, + "loss": 2.4439, + "step": 1832000 + }, + { + "epoch": 9.08, + "learning_rate": 4.54622255911773e-05, + "loss": 2.447, + "step": 1832500 + }, + { + "epoch": 9.08, + "learning_rate": 4.546098700475122e-05, + "loss": 2.4434, + "step": 1833000 + }, + { + "epoch": 9.08, + "learning_rate": 4.5459748418325137e-05, + "loss": 2.4311, + "step": 1833500 + }, + { + "epoch": 9.09, + "learning_rate": 4.5458509831899053e-05, + "loss": 2.4513, + "step": 1834000 + }, + { + "epoch": 9.09, + "learning_rate": 4.545727372264582e-05, + "loss": 2.4607, + "step": 1834500 + }, + { + "epoch": 9.09, + "learning_rate": 4.545603513621974e-05, + "loss": 2.4291, + "step": 1835000 + }, + { + "epoch": 9.09, + "learning_rate": 4.5454796549793656e-05, + "loss": 2.4439, + "step": 1835500 + }, + { + "epoch": 9.1, + "learning_rate": 4.545355796336757e-05, + "loss": 2.4741, + "step": 1836000 + }, + { + "epoch": 9.1, + "learning_rate": 4.5452321854114335e-05, + "loss": 2.4591, + "step": 1836500 + }, + { + "epoch": 9.1, + "learning_rate": 4.545108326768825e-05, + "loss": 2.4535, + "step": 1837000 + }, + { + "epoch": 9.1, + "learning_rate": 4.544984468126217e-05, + "loss": 2.4464, + "step": 1837500 + }, + { + "epoch": 9.11, + "learning_rate": 4.5448606094836086e-05, + "loss": 2.4405, + "step": 1838000 + }, + { + "epoch": 9.11, + "learning_rate": 4.544736750841e-05, + "loss": 2.4423, + "step": 1838500 + }, + { + "epoch": 9.11, + "learning_rate": 4.544612892198392e-05, + "loss": 2.4383, + "step": 1839000 + }, + { + "epoch": 9.11, + "learning_rate": 4.544489033555784e-05, + "loss": 2.4421, + "step": 1839500 + }, + { + "epoch": 9.12, + "learning_rate": 4.5443651749131754e-05, + "loss": 2.4476, + "step": 1840000 + }, + { + "epoch": 9.12, + "learning_rate": 4.544241316270567e-05, + "loss": 2.4121, + "step": 1840500 + }, + { + "epoch": 9.12, + "learning_rate": 4.544117457627959e-05, + "loss": 2.4078, + "step": 1841000 + }, + { + "epoch": 9.12, + "learning_rate": 4.5439935989853504e-05, + "loss": 2.4349, + "step": 1841500 + }, + { + "epoch": 9.13, + "learning_rate": 4.543869988060027e-05, + "loss": 2.4562, + "step": 1842000 + }, + { + "epoch": 9.13, + "learning_rate": 4.543746129417419e-05, + "loss": 2.404, + "step": 1842500 + }, + { + "epoch": 9.13, + "learning_rate": 4.543622270774811e-05, + "loss": 2.4584, + "step": 1843000 + }, + { + "epoch": 9.13, + "learning_rate": 4.543498659849487e-05, + "loss": 2.4404, + "step": 1843500 + }, + { + "epoch": 9.14, + "learning_rate": 4.5433748012068786e-05, + "loss": 2.4358, + "step": 1844000 + }, + { + "epoch": 9.14, + "learning_rate": 4.54325094256427e-05, + "loss": 2.4061, + "step": 1844500 + }, + { + "epoch": 9.14, + "learning_rate": 4.543127083921662e-05, + "loss": 2.4233, + "step": 1845000 + }, + { + "epoch": 9.14, + "learning_rate": 4.543003472996339e-05, + "loss": 2.4524, + "step": 1845500 + }, + { + "epoch": 9.15, + "learning_rate": 4.542879862071016e-05, + "loss": 2.4392, + "step": 1846000 + }, + { + "epoch": 9.15, + "learning_rate": 4.5427562511456926e-05, + "loss": 2.4372, + "step": 1846500 + }, + { + "epoch": 9.15, + "learning_rate": 4.542632392503084e-05, + "loss": 2.4474, + "step": 1847000 + }, + { + "epoch": 9.15, + "learning_rate": 4.542508533860476e-05, + "loss": 2.4468, + "step": 1847500 + }, + { + "epoch": 9.16, + "learning_rate": 4.542384675217868e-05, + "loss": 2.4432, + "step": 1848000 + }, + { + "epoch": 9.16, + "learning_rate": 4.5422608165752594e-05, + "loss": 2.4501, + "step": 1848500 + }, + { + "epoch": 9.16, + "learning_rate": 4.5421369579326504e-05, + "loss": 2.4495, + "step": 1849000 + }, + { + "epoch": 9.16, + "learning_rate": 4.542013099290042e-05, + "loss": 2.4418, + "step": 1849500 + }, + { + "epoch": 9.17, + "learning_rate": 4.541889240647434e-05, + "loss": 2.4484, + "step": 1850000 + }, + { + "epoch": 9.17, + "learning_rate": 4.5417653820048255e-05, + "loss": 2.4385, + "step": 1850500 + }, + { + "epoch": 9.17, + "learning_rate": 4.541641523362217e-05, + "loss": 2.4258, + "step": 1851000 + }, + { + "epoch": 9.17, + "learning_rate": 4.541517664719609e-05, + "loss": 2.4504, + "step": 1851500 + }, + { + "epoch": 9.18, + "learning_rate": 4.5413938060770006e-05, + "loss": 2.458, + "step": 1852000 + }, + { + "epoch": 9.18, + "learning_rate": 4.541269947434392e-05, + "loss": 2.4314, + "step": 1852500 + }, + { + "epoch": 9.18, + "learning_rate": 4.541146088791784e-05, + "loss": 2.436, + "step": 1853000 + }, + { + "epoch": 9.18, + "learning_rate": 4.5410222301491757e-05, + "loss": 2.426, + "step": 1853500 + }, + { + "epoch": 9.19, + "learning_rate": 4.5408983715065673e-05, + "loss": 2.435, + "step": 1854000 + }, + { + "epoch": 9.19, + "learning_rate": 4.540774512863959e-05, + "loss": 2.4389, + "step": 1854500 + }, + { + "epoch": 9.19, + "learning_rate": 4.540650901938636e-05, + "loss": 2.461, + "step": 1855000 + }, + { + "epoch": 9.19, + "learning_rate": 4.5405270432960276e-05, + "loss": 2.4472, + "step": 1855500 + }, + { + "epoch": 9.2, + "learning_rate": 4.540403432370704e-05, + "loss": 2.4388, + "step": 1856000 + }, + { + "epoch": 9.2, + "learning_rate": 4.5402795737280955e-05, + "loss": 2.4427, + "step": 1856500 + }, + { + "epoch": 9.2, + "learning_rate": 4.540155715085487e-05, + "loss": 2.4563, + "step": 1857000 + }, + { + "epoch": 9.2, + "learning_rate": 4.540032104160165e-05, + "loss": 2.4632, + "step": 1857500 + }, + { + "epoch": 9.21, + "learning_rate": 4.539908245517556e-05, + "loss": 2.4482, + "step": 1858000 + }, + { + "epoch": 9.21, + "learning_rate": 4.5397843868749475e-05, + "loss": 2.4332, + "step": 1858500 + }, + { + "epoch": 9.21, + "learning_rate": 4.539660528232339e-05, + "loss": 2.4423, + "step": 1859000 + }, + { + "epoch": 9.21, + "learning_rate": 4.539536669589731e-05, + "loss": 2.4584, + "step": 1859500 + }, + { + "epoch": 9.22, + "learning_rate": 4.5394128109471225e-05, + "loss": 2.4123, + "step": 1860000 + }, + { + "epoch": 9.22, + "learning_rate": 4.539288952304514e-05, + "loss": 2.4395, + "step": 1860500 + }, + { + "epoch": 9.22, + "learning_rate": 4.539165093661906e-05, + "loss": 2.4833, + "step": 1861000 + }, + { + "epoch": 9.22, + "learning_rate": 4.5390412350192976e-05, + "loss": 2.4595, + "step": 1861500 + }, + { + "epoch": 9.22, + "learning_rate": 4.538917376376689e-05, + "loss": 2.4364, + "step": 1862000 + }, + { + "epoch": 9.23, + "learning_rate": 4.538793517734081e-05, + "loss": 2.4205, + "step": 1862500 + }, + { + "epoch": 9.23, + "learning_rate": 4.538669659091472e-05, + "loss": 2.4233, + "step": 1863000 + }, + { + "epoch": 9.23, + "learning_rate": 4.538545800448864e-05, + "loss": 2.4394, + "step": 1863500 + }, + { + "epoch": 9.23, + "learning_rate": 4.5384219418062554e-05, + "loss": 2.4386, + "step": 1864000 + }, + { + "epoch": 9.24, + "learning_rate": 4.538298330880932e-05, + "loss": 2.4466, + "step": 1864500 + }, + { + "epoch": 9.24, + "learning_rate": 4.538174472238324e-05, + "loss": 2.4483, + "step": 1865000 + }, + { + "epoch": 9.24, + "learning_rate": 4.538050613595716e-05, + "loss": 2.454, + "step": 1865500 + }, + { + "epoch": 9.24, + "learning_rate": 4.5379267549531074e-05, + "loss": 2.4675, + "step": 1866000 + }, + { + "epoch": 9.25, + "learning_rate": 4.537803144027784e-05, + "loss": 2.4575, + "step": 1866500 + }, + { + "epoch": 9.25, + "learning_rate": 4.537679285385176e-05, + "loss": 2.4484, + "step": 1867000 + }, + { + "epoch": 9.25, + "learning_rate": 4.5375554267425676e-05, + "loss": 2.4436, + "step": 1867500 + }, + { + "epoch": 9.25, + "learning_rate": 4.537431568099959e-05, + "loss": 2.4271, + "step": 1868000 + }, + { + "epoch": 9.26, + "learning_rate": 4.537307709457351e-05, + "loss": 2.4645, + "step": 1868500 + }, + { + "epoch": 9.26, + "learning_rate": 4.537183850814743e-05, + "loss": 2.4648, + "step": 1869000 + }, + { + "epoch": 9.26, + "learning_rate": 4.537059992172134e-05, + "loss": 2.4415, + "step": 1869500 + }, + { + "epoch": 9.26, + "learning_rate": 4.5369363812468106e-05, + "loss": 2.4357, + "step": 1870000 + }, + { + "epoch": 9.27, + "learning_rate": 4.536812522604202e-05, + "loss": 2.4403, + "step": 1870500 + }, + { + "epoch": 9.27, + "learning_rate": 4.536688663961594e-05, + "loss": 2.4475, + "step": 1871000 + }, + { + "epoch": 9.27, + "learning_rate": 4.536564805318986e-05, + "loss": 2.4376, + "step": 1871500 + }, + { + "epoch": 9.27, + "learning_rate": 4.5364411943936626e-05, + "loss": 2.4186, + "step": 1872000 + }, + { + "epoch": 9.28, + "learning_rate": 4.536317335751054e-05, + "loss": 2.4547, + "step": 1872500 + }, + { + "epoch": 9.28, + "learning_rate": 4.536193477108446e-05, + "loss": 2.4273, + "step": 1873000 + }, + { + "epoch": 9.28, + "learning_rate": 4.5360696184658376e-05, + "loss": 2.453, + "step": 1873500 + }, + { + "epoch": 9.28, + "learning_rate": 4.5359457598232293e-05, + "loss": 2.4315, + "step": 1874000 + }, + { + "epoch": 9.29, + "learning_rate": 4.535821901180621e-05, + "loss": 2.4556, + "step": 1874500 + }, + { + "epoch": 9.29, + "learning_rate": 4.535698042538013e-05, + "loss": 2.4386, + "step": 1875000 + }, + { + "epoch": 9.29, + "learning_rate": 4.5355741838954044e-05, + "loss": 2.4383, + "step": 1875500 + }, + { + "epoch": 9.29, + "learning_rate": 4.535450325252796e-05, + "loss": 2.4127, + "step": 1876000 + }, + { + "epoch": 9.3, + "learning_rate": 4.535326466610187e-05, + "loss": 2.4339, + "step": 1876500 + }, + { + "epoch": 9.3, + "learning_rate": 4.535202607967579e-05, + "loss": 2.4542, + "step": 1877000 + }, + { + "epoch": 9.3, + "learning_rate": 4.5350787493249705e-05, + "loss": 2.4275, + "step": 1877500 + }, + { + "epoch": 9.3, + "learning_rate": 4.5349551383996474e-05, + "loss": 2.4308, + "step": 1878000 + }, + { + "epoch": 9.31, + "learning_rate": 4.534831279757039e-05, + "loss": 2.4147, + "step": 1878500 + }, + { + "epoch": 9.31, + "learning_rate": 4.534707421114431e-05, + "loss": 2.4667, + "step": 1879000 + }, + { + "epoch": 9.31, + "learning_rate": 4.5345835624718225e-05, + "loss": 2.4236, + "step": 1879500 + }, + { + "epoch": 9.31, + "learning_rate": 4.534459703829214e-05, + "loss": 2.4343, + "step": 1880000 + }, + { + "epoch": 9.32, + "learning_rate": 4.534335845186606e-05, + "loss": 2.4451, + "step": 1880500 + }, + { + "epoch": 9.32, + "learning_rate": 4.5342119865439975e-05, + "loss": 2.4541, + "step": 1881000 + }, + { + "epoch": 9.32, + "learning_rate": 4.534088127901389e-05, + "loss": 2.4212, + "step": 1881500 + }, + { + "epoch": 9.32, + "learning_rate": 4.533964516976066e-05, + "loss": 2.4321, + "step": 1882000 + }, + { + "epoch": 9.33, + "learning_rate": 4.533840658333458e-05, + "loss": 2.4481, + "step": 1882500 + }, + { + "epoch": 9.33, + "learning_rate": 4.533716799690849e-05, + "loss": 2.4062, + "step": 1883000 + }, + { + "epoch": 9.33, + "learning_rate": 4.5335929410482405e-05, + "loss": 2.4301, + "step": 1883500 + }, + { + "epoch": 9.33, + "learning_rate": 4.533469082405632e-05, + "loss": 2.4163, + "step": 1884000 + }, + { + "epoch": 9.34, + "learning_rate": 4.533345471480309e-05, + "loss": 2.4449, + "step": 1884500 + }, + { + "epoch": 9.34, + "learning_rate": 4.533221612837701e-05, + "loss": 2.4703, + "step": 1885000 + }, + { + "epoch": 9.34, + "learning_rate": 4.5330977541950925e-05, + "loss": 2.4336, + "step": 1885500 + }, + { + "epoch": 9.34, + "learning_rate": 4.5329741432697694e-05, + "loss": 2.4546, + "step": 1886000 + }, + { + "epoch": 9.35, + "learning_rate": 4.532850284627161e-05, + "loss": 2.4623, + "step": 1886500 + }, + { + "epoch": 9.35, + "learning_rate": 4.532726425984553e-05, + "loss": 2.4571, + "step": 1887000 + }, + { + "epoch": 9.35, + "learning_rate": 4.5326025673419444e-05, + "loss": 2.4461, + "step": 1887500 + }, + { + "epoch": 9.35, + "learning_rate": 4.532478708699336e-05, + "loss": 2.4349, + "step": 1888000 + }, + { + "epoch": 9.36, + "learning_rate": 4.532354850056728e-05, + "loss": 2.4261, + "step": 1888500 + }, + { + "epoch": 9.36, + "learning_rate": 4.5322309914141195e-05, + "loss": 2.4164, + "step": 1889000 + }, + { + "epoch": 9.36, + "learning_rate": 4.532107132771511e-05, + "loss": 2.462, + "step": 1889500 + }, + { + "epoch": 9.36, + "learning_rate": 4.5319835218461874e-05, + "loss": 2.4433, + "step": 1890000 + }, + { + "epoch": 9.37, + "learning_rate": 4.531859663203579e-05, + "loss": 2.4574, + "step": 1890500 + }, + { + "epoch": 9.37, + "learning_rate": 4.531735804560971e-05, + "loss": 2.4477, + "step": 1891000 + }, + { + "epoch": 9.37, + "learning_rate": 4.5316119459183625e-05, + "loss": 2.4444, + "step": 1891500 + }, + { + "epoch": 9.37, + "learning_rate": 4.531488087275754e-05, + "loss": 2.4569, + "step": 1892000 + }, + { + "epoch": 9.38, + "learning_rate": 4.531364476350431e-05, + "loss": 2.4576, + "step": 1892500 + }, + { + "epoch": 9.38, + "learning_rate": 4.531240617707823e-05, + "loss": 2.4366, + "step": 1893000 + }, + { + "epoch": 9.38, + "learning_rate": 4.5311167590652145e-05, + "loss": 2.455, + "step": 1893500 + }, + { + "epoch": 9.38, + "learning_rate": 4.530992900422606e-05, + "loss": 2.4634, + "step": 1894000 + }, + { + "epoch": 9.39, + "learning_rate": 4.530869041779998e-05, + "loss": 2.4578, + "step": 1894500 + }, + { + "epoch": 9.39, + "learning_rate": 4.530745430854674e-05, + "loss": 2.4391, + "step": 1895000 + }, + { + "epoch": 9.39, + "learning_rate": 4.530621572212066e-05, + "loss": 2.4376, + "step": 1895500 + }, + { + "epoch": 9.39, + "learning_rate": 4.5304977135694574e-05, + "loss": 2.4466, + "step": 1896000 + }, + { + "epoch": 9.4, + "learning_rate": 4.530373854926849e-05, + "loss": 2.429, + "step": 1896500 + }, + { + "epoch": 9.4, + "learning_rate": 4.530249996284241e-05, + "loss": 2.4456, + "step": 1897000 + }, + { + "epoch": 9.4, + "learning_rate": 4.5301261376416325e-05, + "loss": 2.4541, + "step": 1897500 + }, + { + "epoch": 9.4, + "learning_rate": 4.530002278999024e-05, + "loss": 2.4651, + "step": 1898000 + }, + { + "epoch": 9.41, + "learning_rate": 4.529878668073701e-05, + "loss": 2.4453, + "step": 1898500 + }, + { + "epoch": 9.41, + "learning_rate": 4.529754809431093e-05, + "loss": 2.4451, + "step": 1899000 + }, + { + "epoch": 9.41, + "learning_rate": 4.5296309507884845e-05, + "loss": 2.4266, + "step": 1899500 + }, + { + "epoch": 9.41, + "learning_rate": 4.529507092145876e-05, + "loss": 2.455, + "step": 1900000 + }, + { + "epoch": 9.42, + "learning_rate": 4.529383233503268e-05, + "loss": 2.4657, + "step": 1900500 + }, + { + "epoch": 9.42, + "learning_rate": 4.5292593748606595e-05, + "loss": 2.4469, + "step": 1901000 + }, + { + "epoch": 9.42, + "learning_rate": 4.529135516218051e-05, + "loss": 2.4411, + "step": 1901500 + }, + { + "epoch": 9.42, + "learning_rate": 4.5290121530100126e-05, + "loss": 2.4473, + "step": 1902000 + }, + { + "epoch": 9.43, + "learning_rate": 4.528888294367404e-05, + "loss": 2.4457, + "step": 1902500 + }, + { + "epoch": 9.43, + "learning_rate": 4.528764435724796e-05, + "loss": 2.4583, + "step": 1903000 + }, + { + "epoch": 9.43, + "learning_rate": 4.528640577082188e-05, + "loss": 2.4524, + "step": 1903500 + }, + { + "epoch": 9.43, + "learning_rate": 4.5285167184395794e-05, + "loss": 2.4532, + "step": 1904000 + }, + { + "epoch": 9.44, + "learning_rate": 4.528392859796971e-05, + "loss": 2.4809, + "step": 1904500 + }, + { + "epoch": 9.44, + "learning_rate": 4.528269001154363e-05, + "loss": 2.4212, + "step": 1905000 + }, + { + "epoch": 9.44, + "learning_rate": 4.5281451425117545e-05, + "loss": 2.4673, + "step": 1905500 + }, + { + "epoch": 9.44, + "learning_rate": 4.528021283869146e-05, + "loss": 2.4051, + "step": 1906000 + }, + { + "epoch": 9.45, + "learning_rate": 4.527897425226538e-05, + "loss": 2.4195, + "step": 1906500 + }, + { + "epoch": 9.45, + "learning_rate": 4.5277735665839296e-05, + "loss": 2.4623, + "step": 1907000 + }, + { + "epoch": 9.45, + "learning_rate": 4.527649955658606e-05, + "loss": 2.4388, + "step": 1907500 + }, + { + "epoch": 9.45, + "learning_rate": 4.5275260970159975e-05, + "loss": 2.4585, + "step": 1908000 + }, + { + "epoch": 9.46, + "learning_rate": 4.527402238373389e-05, + "loss": 2.4361, + "step": 1908500 + }, + { + "epoch": 9.46, + "learning_rate": 4.527278379730781e-05, + "loss": 2.442, + "step": 1909000 + }, + { + "epoch": 9.46, + "learning_rate": 4.5271545210881725e-05, + "loss": 2.4635, + "step": 1909500 + }, + { + "epoch": 9.46, + "learning_rate": 4.527030662445564e-05, + "loss": 2.4437, + "step": 1910000 + }, + { + "epoch": 9.47, + "learning_rate": 4.526906803802956e-05, + "loss": 2.4507, + "step": 1910500 + }, + { + "epoch": 9.47, + "learning_rate": 4.5267829451603476e-05, + "loss": 2.4323, + "step": 1911000 + }, + { + "epoch": 9.47, + "learning_rate": 4.526659086517739e-05, + "loss": 2.45, + "step": 1911500 + }, + { + "epoch": 9.47, + "learning_rate": 4.526535227875131e-05, + "loss": 2.4588, + "step": 1912000 + }, + { + "epoch": 9.48, + "learning_rate": 4.526411369232523e-05, + "loss": 2.4592, + "step": 1912500 + }, + { + "epoch": 9.48, + "learning_rate": 4.5262875105899144e-05, + "loss": 2.4493, + "step": 1913000 + }, + { + "epoch": 9.48, + "learning_rate": 4.526163899664591e-05, + "loss": 2.4565, + "step": 1913500 + }, + { + "epoch": 9.48, + "learning_rate": 4.5260402887392675e-05, + "loss": 2.4562, + "step": 1914000 + }, + { + "epoch": 9.49, + "learning_rate": 4.525916430096659e-05, + "loss": 2.463, + "step": 1914500 + }, + { + "epoch": 9.49, + "learning_rate": 4.525792819171336e-05, + "loss": 2.4384, + "step": 1915000 + }, + { + "epoch": 9.49, + "learning_rate": 4.525668960528728e-05, + "loss": 2.429, + "step": 1915500 + }, + { + "epoch": 9.49, + "learning_rate": 4.5255451018861194e-05, + "loss": 2.4265, + "step": 1916000 + }, + { + "epoch": 9.5, + "learning_rate": 4.525421243243511e-05, + "loss": 2.457, + "step": 1916500 + }, + { + "epoch": 9.5, + "learning_rate": 4.525297384600903e-05, + "loss": 2.4622, + "step": 1917000 + }, + { + "epoch": 9.5, + "learning_rate": 4.52517377367558e-05, + "loss": 2.4689, + "step": 1917500 + }, + { + "epoch": 9.5, + "learning_rate": 4.5250499150329714e-05, + "loss": 2.4288, + "step": 1918000 + }, + { + "epoch": 9.5, + "learning_rate": 4.524926304107648e-05, + "loss": 2.454, + "step": 1918500 + }, + { + "epoch": 9.51, + "learning_rate": 4.52480244546504e-05, + "loss": 2.4701, + "step": 1919000 + }, + { + "epoch": 9.51, + "learning_rate": 4.5246785868224317e-05, + "loss": 2.4518, + "step": 1919500 + }, + { + "epoch": 9.51, + "learning_rate": 4.5245547281798233e-05, + "loss": 2.4624, + "step": 1920000 + }, + { + "epoch": 9.51, + "learning_rate": 4.5244308695372144e-05, + "loss": 2.4445, + "step": 1920500 + }, + { + "epoch": 9.52, + "learning_rate": 4.524307010894606e-05, + "loss": 2.4495, + "step": 1921000 + }, + { + "epoch": 9.52, + "learning_rate": 4.524183152251998e-05, + "loss": 2.4558, + "step": 1921500 + }, + { + "epoch": 9.52, + "learning_rate": 4.5240592936093894e-05, + "loss": 2.4547, + "step": 1922000 + }, + { + "epoch": 9.52, + "learning_rate": 4.523935682684067e-05, + "loss": 2.441, + "step": 1922500 + }, + { + "epoch": 9.53, + "learning_rate": 4.523811824041459e-05, + "loss": 2.4597, + "step": 1923000 + }, + { + "epoch": 9.53, + "learning_rate": 4.5236879653988504e-05, + "loss": 2.444, + "step": 1923500 + }, + { + "epoch": 9.53, + "learning_rate": 4.523564602190812e-05, + "loss": 2.4567, + "step": 1924000 + }, + { + "epoch": 9.53, + "learning_rate": 4.5234407435482035e-05, + "loss": 2.4353, + "step": 1924500 + }, + { + "epoch": 9.54, + "learning_rate": 4.523316884905595e-05, + "loss": 2.4716, + "step": 1925000 + }, + { + "epoch": 9.54, + "learning_rate": 4.523193026262987e-05, + "loss": 2.4343, + "step": 1925500 + }, + { + "epoch": 9.54, + "learning_rate": 4.5230691676203785e-05, + "loss": 2.4463, + "step": 1926000 + }, + { + "epoch": 9.54, + "learning_rate": 4.52294530897777e-05, + "loss": 2.438, + "step": 1926500 + }, + { + "epoch": 9.55, + "learning_rate": 4.522821450335162e-05, + "loss": 2.4469, + "step": 1927000 + }, + { + "epoch": 9.55, + "learning_rate": 4.5226975916925536e-05, + "loss": 2.4329, + "step": 1927500 + }, + { + "epoch": 9.55, + "learning_rate": 4.522573733049945e-05, + "loss": 2.4451, + "step": 1928000 + }, + { + "epoch": 9.55, + "learning_rate": 4.522449874407337e-05, + "loss": 2.4711, + "step": 1928500 + }, + { + "epoch": 9.56, + "learning_rate": 4.522326015764729e-05, + "loss": 2.4351, + "step": 1929000 + }, + { + "epoch": 9.56, + "learning_rate": 4.5222021571221204e-05, + "loss": 2.4218, + "step": 1929500 + }, + { + "epoch": 9.56, + "learning_rate": 4.5220782984795114e-05, + "loss": 2.437, + "step": 1930000 + }, + { + "epoch": 9.56, + "learning_rate": 4.521954439836903e-05, + "loss": 2.4446, + "step": 1930500 + }, + { + "epoch": 9.57, + "learning_rate": 4.52183082891158e-05, + "loss": 2.4568, + "step": 1931000 + }, + { + "epoch": 9.57, + "learning_rate": 4.521706970268972e-05, + "loss": 2.4456, + "step": 1931500 + }, + { + "epoch": 9.57, + "learning_rate": 4.5215831116263634e-05, + "loss": 2.435, + "step": 1932000 + }, + { + "epoch": 9.57, + "learning_rate": 4.521459252983755e-05, + "loss": 2.4542, + "step": 1932500 + }, + { + "epoch": 9.58, + "learning_rate": 4.521335394341146e-05, + "loss": 2.4418, + "step": 1933000 + }, + { + "epoch": 9.58, + "learning_rate": 4.521211535698538e-05, + "loss": 2.4507, + "step": 1933500 + }, + { + "epoch": 9.58, + "learning_rate": 4.5210876770559295e-05, + "loss": 2.4544, + "step": 1934000 + }, + { + "epoch": 9.58, + "learning_rate": 4.520963818413321e-05, + "loss": 2.4652, + "step": 1934500 + }, + { + "epoch": 9.59, + "learning_rate": 4.520840207487999e-05, + "loss": 2.4531, + "step": 1935000 + }, + { + "epoch": 9.59, + "learning_rate": 4.5207163488453904e-05, + "loss": 2.4594, + "step": 1935500 + }, + { + "epoch": 9.59, + "learning_rate": 4.520592490202782e-05, + "loss": 2.453, + "step": 1936000 + }, + { + "epoch": 9.59, + "learning_rate": 4.5204693747120294e-05, + "loss": 2.4411, + "step": 1936500 + }, + { + "epoch": 9.6, + "learning_rate": 4.5203455160694204e-05, + "loss": 2.4588, + "step": 1937000 + }, + { + "epoch": 9.6, + "learning_rate": 4.520221657426812e-05, + "loss": 2.4649, + "step": 1937500 + }, + { + "epoch": 9.6, + "learning_rate": 4.520097798784204e-05, + "loss": 2.4384, + "step": 1938000 + }, + { + "epoch": 9.6, + "learning_rate": 4.5199739401415955e-05, + "loss": 2.4294, + "step": 1938500 + }, + { + "epoch": 9.61, + "learning_rate": 4.519850081498987e-05, + "loss": 2.4468, + "step": 1939000 + }, + { + "epoch": 9.61, + "learning_rate": 4.519726222856379e-05, + "loss": 2.451, + "step": 1939500 + }, + { + "epoch": 9.61, + "learning_rate": 4.5196023642137705e-05, + "loss": 2.4349, + "step": 1940000 + }, + { + "epoch": 9.61, + "learning_rate": 4.519478505571162e-05, + "loss": 2.439, + "step": 1940500 + }, + { + "epoch": 9.62, + "learning_rate": 4.519354646928554e-05, + "loss": 2.4513, + "step": 1941000 + }, + { + "epoch": 9.62, + "learning_rate": 4.519230788285945e-05, + "loss": 2.4451, + "step": 1941500 + }, + { + "epoch": 9.62, + "learning_rate": 4.5191069296433366e-05, + "loss": 2.4526, + "step": 1942000 + }, + { + "epoch": 9.62, + "learning_rate": 4.518983071000728e-05, + "loss": 2.4393, + "step": 1942500 + }, + { + "epoch": 9.63, + "learning_rate": 4.51885921235812e-05, + "loss": 2.4453, + "step": 1943000 + }, + { + "epoch": 9.63, + "learning_rate": 4.518735353715512e-05, + "loss": 2.4257, + "step": 1943500 + }, + { + "epoch": 9.63, + "learning_rate": 4.5186114950729034e-05, + "loss": 2.4565, + "step": 1944000 + }, + { + "epoch": 9.63, + "learning_rate": 4.51848788414758e-05, + "loss": 2.4816, + "step": 1944500 + }, + { + "epoch": 9.64, + "learning_rate": 4.518364273222257e-05, + "loss": 2.4714, + "step": 1945000 + }, + { + "epoch": 9.64, + "learning_rate": 4.518240662296934e-05, + "loss": 2.4386, + "step": 1945500 + }, + { + "epoch": 9.64, + "learning_rate": 4.518116803654326e-05, + "loss": 2.4524, + "step": 1946000 + }, + { + "epoch": 9.64, + "learning_rate": 4.517992945011717e-05, + "loss": 2.4481, + "step": 1946500 + }, + { + "epoch": 9.65, + "learning_rate": 4.5178690863691084e-05, + "loss": 2.4431, + "step": 1947000 + }, + { + "epoch": 9.65, + "learning_rate": 4.5177452277265e-05, + "loss": 2.4608, + "step": 1947500 + }, + { + "epoch": 9.65, + "learning_rate": 4.517621369083892e-05, + "loss": 2.4459, + "step": 1948000 + }, + { + "epoch": 9.65, + "learning_rate": 4.5174977581585694e-05, + "loss": 2.4347, + "step": 1948500 + }, + { + "epoch": 9.66, + "learning_rate": 4.517373899515961e-05, + "loss": 2.4307, + "step": 1949000 + }, + { + "epoch": 9.66, + "learning_rate": 4.517250040873352e-05, + "loss": 2.4474, + "step": 1949500 + }, + { + "epoch": 9.66, + "learning_rate": 4.517126182230744e-05, + "loss": 2.4829, + "step": 1950000 + }, + { + "epoch": 9.66, + "learning_rate": 4.5170023235881355e-05, + "loss": 2.4657, + "step": 1950500 + }, + { + "epoch": 9.67, + "learning_rate": 4.516878464945527e-05, + "loss": 2.4238, + "step": 1951000 + }, + { + "epoch": 9.67, + "learning_rate": 4.516754854020204e-05, + "loss": 2.4664, + "step": 1951500 + }, + { + "epoch": 9.67, + "learning_rate": 4.516630995377596e-05, + "loss": 2.4432, + "step": 1952000 + }, + { + "epoch": 9.67, + "learning_rate": 4.516507136734987e-05, + "loss": 2.4409, + "step": 1952500 + }, + { + "epoch": 9.68, + "learning_rate": 4.5163832780923785e-05, + "loss": 2.4568, + "step": 1953000 + }, + { + "epoch": 9.68, + "learning_rate": 4.51625941944977e-05, + "loss": 2.4436, + "step": 1953500 + }, + { + "epoch": 9.68, + "learning_rate": 4.516135560807162e-05, + "loss": 2.4369, + "step": 1954000 + }, + { + "epoch": 9.68, + "learning_rate": 4.5160117021645535e-05, + "loss": 2.4344, + "step": 1954500 + }, + { + "epoch": 9.69, + "learning_rate": 4.515887843521945e-05, + "loss": 2.4531, + "step": 1955000 + }, + { + "epoch": 9.69, + "learning_rate": 4.515764232596622e-05, + "loss": 2.4491, + "step": 1955500 + }, + { + "epoch": 9.69, + "learning_rate": 4.515640373954014e-05, + "loss": 2.4332, + "step": 1956000 + }, + { + "epoch": 9.69, + "learning_rate": 4.5155165153114055e-05, + "loss": 2.4424, + "step": 1956500 + }, + { + "epoch": 9.7, + "learning_rate": 4.515392656668797e-05, + "loss": 2.4608, + "step": 1957000 + }, + { + "epoch": 9.7, + "learning_rate": 4.515268798026189e-05, + "loss": 2.444, + "step": 1957500 + }, + { + "epoch": 9.7, + "learning_rate": 4.5151449393835806e-05, + "loss": 2.4751, + "step": 1958000 + }, + { + "epoch": 9.7, + "learning_rate": 4.5150213284582574e-05, + "loss": 2.4599, + "step": 1958500 + }, + { + "epoch": 9.71, + "learning_rate": 4.5148974698156485e-05, + "loss": 2.4676, + "step": 1959000 + }, + { + "epoch": 9.71, + "learning_rate": 4.51477361117304e-05, + "loss": 2.4294, + "step": 1959500 + }, + { + "epoch": 9.71, + "learning_rate": 4.514649752530432e-05, + "loss": 2.4494, + "step": 1960000 + }, + { + "epoch": 9.71, + "learning_rate": 4.5145263893223946e-05, + "loss": 2.4253, + "step": 1960500 + }, + { + "epoch": 9.72, + "learning_rate": 4.514402530679786e-05, + "loss": 2.4498, + "step": 1961000 + }, + { + "epoch": 9.72, + "learning_rate": 4.514278672037178e-05, + "loss": 2.4453, + "step": 1961500 + }, + { + "epoch": 9.72, + "learning_rate": 4.51415481339457e-05, + "loss": 2.4619, + "step": 1962000 + }, + { + "epoch": 9.72, + "learning_rate": 4.514030954751961e-05, + "loss": 2.4357, + "step": 1962500 + }, + { + "epoch": 9.73, + "learning_rate": 4.5139070961093524e-05, + "loss": 2.4439, + "step": 1963000 + }, + { + "epoch": 9.73, + "learning_rate": 4.513783485184029e-05, + "loss": 2.4647, + "step": 1963500 + }, + { + "epoch": 9.73, + "learning_rate": 4.513659626541421e-05, + "loss": 2.4701, + "step": 1964000 + }, + { + "epoch": 9.73, + "learning_rate": 4.5135357678988126e-05, + "loss": 2.4486, + "step": 1964500 + }, + { + "epoch": 9.74, + "learning_rate": 4.5134119092562043e-05, + "loss": 2.4749, + "step": 1965000 + }, + { + "epoch": 9.74, + "learning_rate": 4.513288050613596e-05, + "loss": 2.4462, + "step": 1965500 + }, + { + "epoch": 9.74, + "learning_rate": 4.513164191970988e-05, + "loss": 2.452, + "step": 1966000 + }, + { + "epoch": 9.74, + "learning_rate": 4.5130403333283794e-05, + "loss": 2.4414, + "step": 1966500 + }, + { + "epoch": 9.75, + "learning_rate": 4.512916474685771e-05, + "loss": 2.4396, + "step": 1967000 + }, + { + "epoch": 9.75, + "learning_rate": 4.512792616043163e-05, + "loss": 2.4279, + "step": 1967500 + }, + { + "epoch": 9.75, + "learning_rate": 4.512668757400554e-05, + "loss": 2.4615, + "step": 1968000 + }, + { + "epoch": 9.75, + "learning_rate": 4.5125451464752314e-05, + "loss": 2.4715, + "step": 1968500 + }, + { + "epoch": 9.76, + "learning_rate": 4.512421287832623e-05, + "loss": 2.4397, + "step": 1969000 + }, + { + "epoch": 9.76, + "learning_rate": 4.512297429190014e-05, + "loss": 2.4726, + "step": 1969500 + }, + { + "epoch": 9.76, + "learning_rate": 4.512173570547406e-05, + "loss": 2.4296, + "step": 1970000 + }, + { + "epoch": 9.76, + "learning_rate": 4.5120497119047975e-05, + "loss": 2.4543, + "step": 1970500 + }, + { + "epoch": 9.77, + "learning_rate": 4.511925853262189e-05, + "loss": 2.4283, + "step": 1971000 + }, + { + "epoch": 9.77, + "learning_rate": 4.511801994619581e-05, + "loss": 2.4433, + "step": 1971500 + }, + { + "epoch": 9.77, + "learning_rate": 4.511678135976972e-05, + "loss": 2.431, + "step": 1972000 + }, + { + "epoch": 9.77, + "learning_rate": 4.5115542773343636e-05, + "loss": 2.4314, + "step": 1972500 + }, + { + "epoch": 9.77, + "learning_rate": 4.511430666409041e-05, + "loss": 2.4454, + "step": 1973000 + }, + { + "epoch": 9.78, + "learning_rate": 4.511306807766433e-05, + "loss": 2.4631, + "step": 1973500 + }, + { + "epoch": 9.78, + "learning_rate": 4.5111829491238245e-05, + "loss": 2.4612, + "step": 1974000 + }, + { + "epoch": 9.78, + "learning_rate": 4.5110590904812155e-05, + "loss": 2.4322, + "step": 1974500 + }, + { + "epoch": 9.78, + "learning_rate": 4.510935231838607e-05, + "loss": 2.4259, + "step": 1975000 + }, + { + "epoch": 9.79, + "learning_rate": 4.510811620913285e-05, + "loss": 2.4497, + "step": 1975500 + }, + { + "epoch": 9.79, + "learning_rate": 4.510687762270676e-05, + "loss": 2.4183, + "step": 1976000 + }, + { + "epoch": 9.79, + "learning_rate": 4.5105639036280675e-05, + "loss": 2.4897, + "step": 1976500 + }, + { + "epoch": 9.79, + "learning_rate": 4.510440044985459e-05, + "loss": 2.4587, + "step": 1977000 + }, + { + "epoch": 9.8, + "learning_rate": 4.510316434060136e-05, + "loss": 2.4401, + "step": 1977500 + }, + { + "epoch": 9.8, + "learning_rate": 4.510192575417528e-05, + "loss": 2.4357, + "step": 1978000 + }, + { + "epoch": 9.8, + "learning_rate": 4.5100687167749194e-05, + "loss": 2.4482, + "step": 1978500 + }, + { + "epoch": 9.8, + "learning_rate": 4.509944858132311e-05, + "loss": 2.4574, + "step": 1979000 + }, + { + "epoch": 9.81, + "learning_rate": 4.509821247206988e-05, + "loss": 2.4316, + "step": 1979500 + }, + { + "epoch": 9.81, + "learning_rate": 4.50969738856438e-05, + "loss": 2.4638, + "step": 1980000 + }, + { + "epoch": 9.81, + "learning_rate": 4.5095735299217714e-05, + "loss": 2.4208, + "step": 1980500 + }, + { + "epoch": 9.81, + "learning_rate": 4.509449671279163e-05, + "loss": 2.4633, + "step": 1981000 + }, + { + "epoch": 9.82, + "learning_rate": 4.5093263080711245e-05, + "loss": 2.4471, + "step": 1981500 + }, + { + "epoch": 9.82, + "learning_rate": 4.509202697145802e-05, + "loss": 2.4612, + "step": 1982000 + }, + { + "epoch": 9.82, + "learning_rate": 4.509078838503194e-05, + "loss": 2.4536, + "step": 1982500 + }, + { + "epoch": 9.82, + "learning_rate": 4.50895522757787e-05, + "loss": 2.4203, + "step": 1983000 + }, + { + "epoch": 9.83, + "learning_rate": 4.5088313689352616e-05, + "loss": 2.4689, + "step": 1983500 + }, + { + "epoch": 9.83, + "learning_rate": 4.508707510292653e-05, + "loss": 2.4635, + "step": 1984000 + }, + { + "epoch": 9.83, + "learning_rate": 4.508583651650045e-05, + "loss": 2.4427, + "step": 1984500 + }, + { + "epoch": 9.83, + "learning_rate": 4.508459793007437e-05, + "loss": 2.4488, + "step": 1985000 + }, + { + "epoch": 9.84, + "learning_rate": 4.5083359343648284e-05, + "loss": 2.4687, + "step": 1985500 + }, + { + "epoch": 9.84, + "learning_rate": 4.50821207572222e-05, + "loss": 2.4482, + "step": 1986000 + }, + { + "epoch": 9.84, + "learning_rate": 4.508088217079612e-05, + "loss": 2.4457, + "step": 1986500 + }, + { + "epoch": 9.84, + "learning_rate": 4.5079643584370035e-05, + "loss": 2.4492, + "step": 1987000 + }, + { + "epoch": 9.85, + "learning_rate": 4.5078404997943945e-05, + "loss": 2.468, + "step": 1987500 + }, + { + "epoch": 9.85, + "learning_rate": 4.507716888869072e-05, + "loss": 2.4591, + "step": 1988000 + }, + { + "epoch": 9.85, + "learning_rate": 4.507593030226464e-05, + "loss": 2.4664, + "step": 1988500 + }, + { + "epoch": 9.85, + "learning_rate": 4.5074691715838554e-05, + "loss": 2.4511, + "step": 1989000 + }, + { + "epoch": 9.86, + "learning_rate": 4.507345312941247e-05, + "loss": 2.4497, + "step": 1989500 + }, + { + "epoch": 9.86, + "learning_rate": 4.507221454298639e-05, + "loss": 2.4432, + "step": 1990000 + }, + { + "epoch": 9.86, + "learning_rate": 4.50709759565603e-05, + "loss": 2.4519, + "step": 1990500 + }, + { + "epoch": 9.86, + "learning_rate": 4.5069737370134215e-05, + "loss": 2.4516, + "step": 1991000 + }, + { + "epoch": 9.87, + "learning_rate": 4.506849878370813e-05, + "loss": 2.4417, + "step": 1991500 + }, + { + "epoch": 9.87, + "learning_rate": 4.506726019728205e-05, + "loss": 2.464, + "step": 1992000 + }, + { + "epoch": 9.87, + "learning_rate": 4.5066021610855966e-05, + "loss": 2.4332, + "step": 1992500 + }, + { + "epoch": 9.87, + "learning_rate": 4.5064783024429876e-05, + "loss": 2.4693, + "step": 1993000 + }, + { + "epoch": 9.88, + "learning_rate": 4.506354443800379e-05, + "loss": 2.4477, + "step": 1993500 + }, + { + "epoch": 9.88, + "learning_rate": 4.506230585157771e-05, + "loss": 2.4455, + "step": 1994000 + }, + { + "epoch": 9.88, + "learning_rate": 4.506106974232448e-05, + "loss": 2.4597, + "step": 1994500 + }, + { + "epoch": 9.88, + "learning_rate": 4.5059833633071255e-05, + "loss": 2.446, + "step": 1995000 + }, + { + "epoch": 9.89, + "learning_rate": 4.505859504664517e-05, + "loss": 2.4577, + "step": 1995500 + }, + { + "epoch": 9.89, + "learning_rate": 4.505735646021909e-05, + "loss": 2.4678, + "step": 1996000 + }, + { + "epoch": 9.89, + "learning_rate": 4.5056117873793005e-05, + "loss": 2.4562, + "step": 1996500 + }, + { + "epoch": 9.89, + "learning_rate": 4.5054879287366916e-05, + "loss": 2.4657, + "step": 1997000 + }, + { + "epoch": 9.9, + "learning_rate": 4.505364070094083e-05, + "loss": 2.433, + "step": 1997500 + }, + { + "epoch": 9.9, + "learning_rate": 4.505240211451475e-05, + "loss": 2.4243, + "step": 1998000 + }, + { + "epoch": 9.9, + "learning_rate": 4.5051163528088666e-05, + "loss": 2.4324, + "step": 1998500 + }, + { + "epoch": 9.9, + "learning_rate": 4.504992494166258e-05, + "loss": 2.4494, + "step": 1999000 + }, + { + "epoch": 9.91, + "learning_rate": 4.50486863552365e-05, + "loss": 2.4347, + "step": 1999500 + }, + { + "epoch": 9.91, + "learning_rate": 4.504744776881041e-05, + "loss": 2.4345, + "step": 2000000 + }, + { + "epoch": 9.91, + "learning_rate": 4.504620918238433e-05, + "loss": 2.4564, + "step": 2000500 + }, + { + "epoch": 9.91, + "learning_rate": 4.5044973073131096e-05, + "loss": 2.4482, + "step": 2001000 + }, + { + "epoch": 9.92, + "learning_rate": 4.504373448670501e-05, + "loss": 2.4592, + "step": 2001500 + }, + { + "epoch": 9.92, + "learning_rate": 4.504249590027893e-05, + "loss": 2.4584, + "step": 2002000 + }, + { + "epoch": 9.92, + "learning_rate": 4.504125731385285e-05, + "loss": 2.4937, + "step": 2002500 + }, + { + "epoch": 9.92, + "learning_rate": 4.5040018727426764e-05, + "loss": 2.4234, + "step": 2003000 + }, + { + "epoch": 9.93, + "learning_rate": 4.503878014100068e-05, + "loss": 2.4364, + "step": 2003500 + }, + { + "epoch": 9.93, + "learning_rate": 4.50375415545746e-05, + "loss": 2.4408, + "step": 2004000 + }, + { + "epoch": 9.93, + "learning_rate": 4.5036302968148515e-05, + "loss": 2.4403, + "step": 2004500 + }, + { + "epoch": 9.93, + "learning_rate": 4.503506685889528e-05, + "loss": 2.4583, + "step": 2005000 + }, + { + "epoch": 9.94, + "learning_rate": 4.503383074964205e-05, + "loss": 2.4657, + "step": 2005500 + }, + { + "epoch": 9.94, + "learning_rate": 4.503259216321597e-05, + "loss": 2.4634, + "step": 2006000 + }, + { + "epoch": 9.94, + "learning_rate": 4.503135357678988e-05, + "loss": 2.4743, + "step": 2006500 + }, + { + "epoch": 9.94, + "learning_rate": 4.5030114990363796e-05, + "loss": 2.4383, + "step": 2007000 + }, + { + "epoch": 9.95, + "learning_rate": 4.502887640393771e-05, + "loss": 2.4592, + "step": 2007500 + }, + { + "epoch": 9.95, + "learning_rate": 4.502764029468449e-05, + "loss": 2.458, + "step": 2008000 + }, + { + "epoch": 9.95, + "learning_rate": 4.5026401708258406e-05, + "loss": 2.4377, + "step": 2008500 + }, + { + "epoch": 9.95, + "learning_rate": 4.502516312183232e-05, + "loss": 2.4445, + "step": 2009000 + }, + { + "epoch": 9.96, + "learning_rate": 4.502392453540623e-05, + "loss": 2.4318, + "step": 2009500 + }, + { + "epoch": 9.96, + "learning_rate": 4.502268594898015e-05, + "loss": 2.4583, + "step": 2010000 + }, + { + "epoch": 9.96, + "learning_rate": 4.502144983972692e-05, + "loss": 2.4609, + "step": 2010500 + }, + { + "epoch": 9.96, + "learning_rate": 4.5020211253300835e-05, + "loss": 2.4503, + "step": 2011000 + }, + { + "epoch": 9.97, + "learning_rate": 4.501897266687475e-05, + "loss": 2.4607, + "step": 2011500 + }, + { + "epoch": 9.97, + "learning_rate": 4.501773408044867e-05, + "loss": 2.4503, + "step": 2012000 + }, + { + "epoch": 9.97, + "learning_rate": 4.501649549402258e-05, + "loss": 2.4397, + "step": 2012500 + }, + { + "epoch": 9.97, + "learning_rate": 4.5015256907596496e-05, + "loss": 2.4582, + "step": 2013000 + }, + { + "epoch": 9.98, + "learning_rate": 4.501401832117041e-05, + "loss": 2.438, + "step": 2013500 + }, + { + "epoch": 9.98, + "learning_rate": 4.501277973474433e-05, + "loss": 2.462, + "step": 2014000 + }, + { + "epoch": 9.98, + "learning_rate": 4.5011543625491106e-05, + "loss": 2.4676, + "step": 2014500 + }, + { + "epoch": 9.98, + "learning_rate": 4.501030751623787e-05, + "loss": 2.4411, + "step": 2015000 + }, + { + "epoch": 9.99, + "learning_rate": 4.5009068929811785e-05, + "loss": 2.4521, + "step": 2015500 + }, + { + "epoch": 9.99, + "learning_rate": 4.50078303433857e-05, + "loss": 2.4713, + "step": 2016000 + }, + { + "epoch": 9.99, + "learning_rate": 4.500659175695962e-05, + "loss": 2.432, + "step": 2016500 + }, + { + "epoch": 9.99, + "learning_rate": 4.5005353170533535e-05, + "loss": 2.4529, + "step": 2017000 + }, + { + "epoch": 10.0, + "learning_rate": 4.500411458410745e-05, + "loss": 2.4451, + "step": 2017500 + }, + { + "epoch": 10.0, + "learning_rate": 4.500287847485422e-05, + "loss": 2.4712, + "step": 2018000 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.6423499793751883, + "eval_accuracy_mlm": 0.5954648160103292, + "eval_accuracy_nsp": 0.8633937221278716, + "eval_loss": 2.427269697189331, + "eval_runtime": 146.0474, + "eval_samples_per_second": 1745.728, + "eval_steps_per_second": 72.743, + "step": 2018430 + }, + { + "epoch": 10.0, + "learning_rate": 4.500164236560099e-05, + "loss": 2.4274, + "step": 2018500 + }, + { + "epoch": 10.0, + "learning_rate": 4.500040377917491e-05, + "loss": 2.4295, + "step": 2019000 + }, + { + "epoch": 10.01, + "learning_rate": 4.4999165192748824e-05, + "loss": 2.3972, + "step": 2019500 + }, + { + "epoch": 10.01, + "learning_rate": 4.499792660632274e-05, + "loss": 2.4053, + "step": 2020000 + }, + { + "epoch": 10.01, + "learning_rate": 4.499668801989666e-05, + "loss": 2.4178, + "step": 2020500 + }, + { + "epoch": 10.01, + "learning_rate": 4.499544943347057e-05, + "loss": 2.4151, + "step": 2021000 + }, + { + "epoch": 10.02, + "learning_rate": 4.4994210847044485e-05, + "loss": 2.4157, + "step": 2021500 + }, + { + "epoch": 10.02, + "learning_rate": 4.49929722606184e-05, + "loss": 2.4145, + "step": 2022000 + }, + { + "epoch": 10.02, + "learning_rate": 4.499173367419232e-05, + "loss": 2.4341, + "step": 2022500 + }, + { + "epoch": 10.02, + "learning_rate": 4.499050004211194e-05, + "loss": 2.4384, + "step": 2023000 + }, + { + "epoch": 10.03, + "learning_rate": 4.4989261455685856e-05, + "loss": 2.4209, + "step": 2023500 + }, + { + "epoch": 10.03, + "learning_rate": 4.498802286925977e-05, + "loss": 2.4392, + "step": 2024000 + }, + { + "epoch": 10.03, + "learning_rate": 4.498678428283369e-05, + "loss": 2.4234, + "step": 2024500 + }, + { + "epoch": 10.03, + "learning_rate": 4.498554569640761e-05, + "loss": 2.412, + "step": 2025000 + }, + { + "epoch": 10.04, + "learning_rate": 4.4984307109981524e-05, + "loss": 2.4119, + "step": 2025500 + }, + { + "epoch": 10.04, + "learning_rate": 4.498306852355544e-05, + "loss": 2.4244, + "step": 2026000 + }, + { + "epoch": 10.04, + "learning_rate": 4.498182993712936e-05, + "loss": 2.4498, + "step": 2026500 + }, + { + "epoch": 10.04, + "learning_rate": 4.4980591350703275e-05, + "loss": 2.4358, + "step": 2027000 + }, + { + "epoch": 10.04, + "learning_rate": 4.497935276427719e-05, + "loss": 2.4245, + "step": 2027500 + }, + { + "epoch": 10.05, + "learning_rate": 4.497811913219681e-05, + "loss": 2.4284, + "step": 2028000 + }, + { + "epoch": 10.05, + "learning_rate": 4.497688054577073e-05, + "loss": 2.4131, + "step": 2028500 + }, + { + "epoch": 10.05, + "learning_rate": 4.497564443651749e-05, + "loss": 2.4187, + "step": 2029000 + }, + { + "epoch": 10.05, + "learning_rate": 4.497440585009141e-05, + "loss": 2.4189, + "step": 2029500 + }, + { + "epoch": 10.06, + "learning_rate": 4.4973167263665325e-05, + "loss": 2.4364, + "step": 2030000 + }, + { + "epoch": 10.06, + "learning_rate": 4.497192867723924e-05, + "loss": 2.4455, + "step": 2030500 + }, + { + "epoch": 10.06, + "learning_rate": 4.497069009081316e-05, + "loss": 2.4064, + "step": 2031000 + }, + { + "epoch": 10.06, + "learning_rate": 4.4969451504387076e-05, + "loss": 2.4481, + "step": 2031500 + }, + { + "epoch": 10.07, + "learning_rate": 4.4968212917960986e-05, + "loss": 2.4151, + "step": 2032000 + }, + { + "epoch": 10.07, + "learning_rate": 4.49669743315349e-05, + "loss": 2.4254, + "step": 2032500 + }, + { + "epoch": 10.07, + "learning_rate": 4.496573574510882e-05, + "loss": 2.428, + "step": 2033000 + }, + { + "epoch": 10.07, + "learning_rate": 4.496449715868274e-05, + "loss": 2.4263, + "step": 2033500 + }, + { + "epoch": 10.08, + "learning_rate": 4.4963258572256654e-05, + "loss": 2.4114, + "step": 2034000 + }, + { + "epoch": 10.08, + "learning_rate": 4.496201998583057e-05, + "loss": 2.4332, + "step": 2034500 + }, + { + "epoch": 10.08, + "learning_rate": 4.496078139940449e-05, + "loss": 2.4325, + "step": 2035000 + }, + { + "epoch": 10.08, + "learning_rate": 4.4959545290151257e-05, + "loss": 2.4009, + "step": 2035500 + }, + { + "epoch": 10.09, + "learning_rate": 4.4958306703725173e-05, + "loss": 2.435, + "step": 2036000 + }, + { + "epoch": 10.09, + "learning_rate": 4.495706811729909e-05, + "loss": 2.4103, + "step": 2036500 + }, + { + "epoch": 10.09, + "learning_rate": 4.495582953087301e-05, + "loss": 2.4359, + "step": 2037000 + }, + { + "epoch": 10.09, + "learning_rate": 4.4954590944446924e-05, + "loss": 2.4256, + "step": 2037500 + }, + { + "epoch": 10.1, + "learning_rate": 4.495335235802084e-05, + "loss": 2.4242, + "step": 2038000 + }, + { + "epoch": 10.1, + "learning_rate": 4.495211377159476e-05, + "loss": 2.4267, + "step": 2038500 + }, + { + "epoch": 10.1, + "learning_rate": 4.4950875185168675e-05, + "loss": 2.4368, + "step": 2039000 + }, + { + "epoch": 10.1, + "learning_rate": 4.4949641553088296e-05, + "loss": 2.4211, + "step": 2039500 + }, + { + "epoch": 10.11, + "learning_rate": 4.494840296666221e-05, + "loss": 2.4296, + "step": 2040000 + }, + { + "epoch": 10.11, + "learning_rate": 4.494716438023613e-05, + "loss": 2.4401, + "step": 2040500 + }, + { + "epoch": 10.11, + "learning_rate": 4.4945925793810047e-05, + "loss": 2.4441, + "step": 2041000 + }, + { + "epoch": 10.11, + "learning_rate": 4.494468720738396e-05, + "loss": 2.4434, + "step": 2041500 + }, + { + "epoch": 10.12, + "learning_rate": 4.4943448620957874e-05, + "loss": 2.4475, + "step": 2042000 + }, + { + "epoch": 10.12, + "learning_rate": 4.494221003453179e-05, + "loss": 2.4398, + "step": 2042500 + }, + { + "epoch": 10.12, + "learning_rate": 4.494097144810571e-05, + "loss": 2.4278, + "step": 2043000 + }, + { + "epoch": 10.12, + "learning_rate": 4.4939732861679624e-05, + "loss": 2.4079, + "step": 2043500 + }, + { + "epoch": 10.13, + "learning_rate": 4.493849675242639e-05, + "loss": 2.4734, + "step": 2044000 + }, + { + "epoch": 10.13, + "learning_rate": 4.49372581660003e-05, + "loss": 2.4516, + "step": 2044500 + }, + { + "epoch": 10.13, + "learning_rate": 4.493601957957422e-05, + "loss": 2.4456, + "step": 2045000 + }, + { + "epoch": 10.13, + "learning_rate": 4.493478099314814e-05, + "loss": 2.4319, + "step": 2045500 + }, + { + "epoch": 10.14, + "learning_rate": 4.4933542406722054e-05, + "loss": 2.4348, + "step": 2046000 + }, + { + "epoch": 10.14, + "learning_rate": 4.493230382029597e-05, + "loss": 2.4237, + "step": 2046500 + }, + { + "epoch": 10.14, + "learning_rate": 4.493106771104275e-05, + "loss": 2.4202, + "step": 2047000 + }, + { + "epoch": 10.14, + "learning_rate": 4.492982912461666e-05, + "loss": 2.421, + "step": 2047500 + }, + { + "epoch": 10.15, + "learning_rate": 4.492859301536343e-05, + "loss": 2.4291, + "step": 2048000 + }, + { + "epoch": 10.15, + "learning_rate": 4.4927356906110194e-05, + "loss": 2.422, + "step": 2048500 + }, + { + "epoch": 10.15, + "learning_rate": 4.492611831968411e-05, + "loss": 2.4306, + "step": 2049000 + }, + { + "epoch": 10.15, + "learning_rate": 4.492487973325803e-05, + "loss": 2.4154, + "step": 2049500 + }, + { + "epoch": 10.16, + "learning_rate": 4.4923641146831945e-05, + "loss": 2.4207, + "step": 2050000 + }, + { + "epoch": 10.16, + "learning_rate": 4.492240256040586e-05, + "loss": 2.4277, + "step": 2050500 + }, + { + "epoch": 10.16, + "learning_rate": 4.492116397397978e-05, + "loss": 2.4082, + "step": 2051000 + }, + { + "epoch": 10.16, + "learning_rate": 4.4919925387553696e-05, + "loss": 2.4284, + "step": 2051500 + }, + { + "epoch": 10.17, + "learning_rate": 4.491868680112761e-05, + "loss": 2.4324, + "step": 2052000 + }, + { + "epoch": 10.17, + "learning_rate": 4.491744821470153e-05, + "loss": 2.418, + "step": 2052500 + }, + { + "epoch": 10.17, + "learning_rate": 4.491620962827545e-05, + "loss": 2.4193, + "step": 2053000 + }, + { + "epoch": 10.17, + "learning_rate": 4.4914971041849364e-05, + "loss": 2.4341, + "step": 2053500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4913732455423274e-05, + "loss": 2.4189, + "step": 2054000 + }, + { + "epoch": 10.18, + "learning_rate": 4.491249634617005e-05, + "loss": 2.4323, + "step": 2054500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4911257759743966e-05, + "loss": 2.4587, + "step": 2055000 + }, + { + "epoch": 10.18, + "learning_rate": 4.491002165049073e-05, + "loss": 2.4278, + "step": 2055500 + }, + { + "epoch": 10.19, + "learning_rate": 4.4908783064064645e-05, + "loss": 2.4202, + "step": 2056000 + }, + { + "epoch": 10.19, + "learning_rate": 4.490754447763856e-05, + "loss": 2.4481, + "step": 2056500 + }, + { + "epoch": 10.19, + "learning_rate": 4.490630589121248e-05, + "loss": 2.4114, + "step": 2057000 + }, + { + "epoch": 10.19, + "learning_rate": 4.490506978195925e-05, + "loss": 2.4111, + "step": 2057500 + }, + { + "epoch": 10.2, + "learning_rate": 4.4903831195533165e-05, + "loss": 2.4391, + "step": 2058000 + }, + { + "epoch": 10.2, + "learning_rate": 4.490259260910708e-05, + "loss": 2.4327, + "step": 2058500 + }, + { + "epoch": 10.2, + "learning_rate": 4.4901354022681e-05, + "loss": 2.4279, + "step": 2059000 + }, + { + "epoch": 10.2, + "learning_rate": 4.4900115436254916e-05, + "loss": 2.4355, + "step": 2059500 + }, + { + "epoch": 10.21, + "learning_rate": 4.4898881804174536e-05, + "loss": 2.4252, + "step": 2060000 + }, + { + "epoch": 10.21, + "learning_rate": 4.489764321774845e-05, + "loss": 2.4196, + "step": 2060500 + }, + { + "epoch": 10.21, + "learning_rate": 4.4896404631322363e-05, + "loss": 2.4315, + "step": 2061000 + }, + { + "epoch": 10.21, + "learning_rate": 4.489516604489628e-05, + "loss": 2.4199, + "step": 2061500 + }, + { + "epoch": 10.22, + "learning_rate": 4.48939274584702e-05, + "loss": 2.4235, + "step": 2062000 + }, + { + "epoch": 10.22, + "learning_rate": 4.4892688872044114e-05, + "loss": 2.3984, + "step": 2062500 + }, + { + "epoch": 10.22, + "learning_rate": 4.489145028561803e-05, + "loss": 2.4339, + "step": 2063000 + }, + { + "epoch": 10.22, + "learning_rate": 4.489021169919195e-05, + "loss": 2.3881, + "step": 2063500 + }, + { + "epoch": 10.23, + "learning_rate": 4.4888973112765865e-05, + "loss": 2.4048, + "step": 2064000 + }, + { + "epoch": 10.23, + "learning_rate": 4.488773452633978e-05, + "loss": 2.426, + "step": 2064500 + }, + { + "epoch": 10.23, + "learning_rate": 4.48864959399137e-05, + "loss": 2.4169, + "step": 2065000 + }, + { + "epoch": 10.23, + "learning_rate": 4.4885257353487616e-05, + "loss": 2.4291, + "step": 2065500 + }, + { + "epoch": 10.24, + "learning_rate": 4.488402124423438e-05, + "loss": 2.422, + "step": 2066000 + }, + { + "epoch": 10.24, + "learning_rate": 4.4882782657808295e-05, + "loss": 2.4173, + "step": 2066500 + }, + { + "epoch": 10.24, + "learning_rate": 4.488154407138221e-05, + "loss": 2.415, + "step": 2067000 + }, + { + "epoch": 10.24, + "learning_rate": 4.488030548495613e-05, + "loss": 2.4058, + "step": 2067500 + }, + { + "epoch": 10.25, + "learning_rate": 4.4879066898530046e-05, + "loss": 2.4282, + "step": 2068000 + }, + { + "epoch": 10.25, + "learning_rate": 4.4877830789276814e-05, + "loss": 2.4527, + "step": 2068500 + }, + { + "epoch": 10.25, + "learning_rate": 4.487659220285073e-05, + "loss": 2.4305, + "step": 2069000 + }, + { + "epoch": 10.25, + "learning_rate": 4.487535361642465e-05, + "loss": 2.4422, + "step": 2069500 + }, + { + "epoch": 10.26, + "learning_rate": 4.4874115029998565e-05, + "loss": 2.4138, + "step": 2070000 + }, + { + "epoch": 10.26, + "learning_rate": 4.4872878920745334e-05, + "loss": 2.4326, + "step": 2070500 + }, + { + "epoch": 10.26, + "learning_rate": 4.487164033431925e-05, + "loss": 2.4561, + "step": 2071000 + }, + { + "epoch": 10.26, + "learning_rate": 4.487040174789317e-05, + "loss": 2.4219, + "step": 2071500 + }, + { + "epoch": 10.27, + "learning_rate": 4.4869163161467085e-05, + "loss": 2.4363, + "step": 2072000 + }, + { + "epoch": 10.27, + "learning_rate": 4.4867924575040995e-05, + "loss": 2.4182, + "step": 2072500 + }, + { + "epoch": 10.27, + "learning_rate": 4.486668598861491e-05, + "loss": 2.4498, + "step": 2073000 + }, + { + "epoch": 10.27, + "learning_rate": 4.486544740218883e-05, + "loss": 2.4262, + "step": 2073500 + }, + { + "epoch": 10.28, + "learning_rate": 4.4864208815762746e-05, + "loss": 2.4582, + "step": 2074000 + }, + { + "epoch": 10.28, + "learning_rate": 4.4862972706509515e-05, + "loss": 2.4353, + "step": 2074500 + }, + { + "epoch": 10.28, + "learning_rate": 4.486173412008343e-05, + "loss": 2.4298, + "step": 2075000 + }, + { + "epoch": 10.28, + "learning_rate": 4.486049553365735e-05, + "loss": 2.4233, + "step": 2075500 + }, + { + "epoch": 10.29, + "learning_rate": 4.4859259424404124e-05, + "loss": 2.4328, + "step": 2076000 + }, + { + "epoch": 10.29, + "learning_rate": 4.4858023315150886e-05, + "loss": 2.4591, + "step": 2076500 + }, + { + "epoch": 10.29, + "learning_rate": 4.48567847287248e-05, + "loss": 2.4568, + "step": 2077000 + }, + { + "epoch": 10.29, + "learning_rate": 4.485554614229872e-05, + "loss": 2.4302, + "step": 2077500 + }, + { + "epoch": 10.3, + "learning_rate": 4.485430755587264e-05, + "loss": 2.4473, + "step": 2078000 + }, + { + "epoch": 10.3, + "learning_rate": 4.4853071446619406e-05, + "loss": 2.4297, + "step": 2078500 + }, + { + "epoch": 10.3, + "learning_rate": 4.4851835337366174e-05, + "loss": 2.4359, + "step": 2079000 + }, + { + "epoch": 10.3, + "learning_rate": 4.485059675094009e-05, + "loss": 2.4481, + "step": 2079500 + }, + { + "epoch": 10.31, + "learning_rate": 4.4849358164514e-05, + "loss": 2.4354, + "step": 2080000 + }, + { + "epoch": 10.31, + "learning_rate": 4.484811957808792e-05, + "loss": 2.435, + "step": 2080500 + }, + { + "epoch": 10.31, + "learning_rate": 4.484688346883469e-05, + "loss": 2.4235, + "step": 2081000 + }, + { + "epoch": 10.31, + "learning_rate": 4.4845644882408604e-05, + "loss": 2.419, + "step": 2081500 + }, + { + "epoch": 10.31, + "learning_rate": 4.484440629598252e-05, + "loss": 2.4439, + "step": 2082000 + }, + { + "epoch": 10.32, + "learning_rate": 4.484316770955644e-05, + "loss": 2.423, + "step": 2082500 + }, + { + "epoch": 10.32, + "learning_rate": 4.4841929123130355e-05, + "loss": 2.4478, + "step": 2083000 + }, + { + "epoch": 10.32, + "learning_rate": 4.484069053670427e-05, + "loss": 2.4356, + "step": 2083500 + }, + { + "epoch": 10.32, + "learning_rate": 4.483945195027819e-05, + "loss": 2.4367, + "step": 2084000 + }, + { + "epoch": 10.33, + "learning_rate": 4.4838213363852106e-05, + "loss": 2.4409, + "step": 2084500 + }, + { + "epoch": 10.33, + "learning_rate": 4.483697477742602e-05, + "loss": 2.4238, + "step": 2085000 + }, + { + "epoch": 10.33, + "learning_rate": 4.483573619099994e-05, + "loss": 2.4366, + "step": 2085500 + }, + { + "epoch": 10.33, + "learning_rate": 4.4834497604573856e-05, + "loss": 2.4146, + "step": 2086000 + }, + { + "epoch": 10.34, + "learning_rate": 4.4833259018147773e-05, + "loss": 2.4158, + "step": 2086500 + }, + { + "epoch": 10.34, + "learning_rate": 4.483202043172169e-05, + "loss": 2.4511, + "step": 2087000 + }, + { + "epoch": 10.34, + "learning_rate": 4.483078184529561e-05, + "loss": 2.42, + "step": 2087500 + }, + { + "epoch": 10.34, + "learning_rate": 4.4829543258869524e-05, + "loss": 2.4163, + "step": 2088000 + }, + { + "epoch": 10.35, + "learning_rate": 4.482830467244344e-05, + "loss": 2.4279, + "step": 2088500 + }, + { + "epoch": 10.35, + "learning_rate": 4.482706608601735e-05, + "loss": 2.451, + "step": 2089000 + }, + { + "epoch": 10.35, + "learning_rate": 4.482582749959127e-05, + "loss": 2.4472, + "step": 2089500 + }, + { + "epoch": 10.35, + "learning_rate": 4.4824588913165185e-05, + "loss": 2.4138, + "step": 2090000 + }, + { + "epoch": 10.36, + "learning_rate": 4.48233503267391e-05, + "loss": 2.4263, + "step": 2090500 + }, + { + "epoch": 10.36, + "learning_rate": 4.482211174031302e-05, + "loss": 2.4066, + "step": 2091000 + }, + { + "epoch": 10.36, + "learning_rate": 4.482087563105979e-05, + "loss": 2.4424, + "step": 2091500 + }, + { + "epoch": 10.36, + "learning_rate": 4.48196370446337e-05, + "loss": 2.4485, + "step": 2092000 + }, + { + "epoch": 10.37, + "learning_rate": 4.4818398458207615e-05, + "loss": 2.4437, + "step": 2092500 + }, + { + "epoch": 10.37, + "learning_rate": 4.481716234895439e-05, + "loss": 2.4505, + "step": 2093000 + }, + { + "epoch": 10.37, + "learning_rate": 4.481592376252831e-05, + "loss": 2.4309, + "step": 2093500 + }, + { + "epoch": 10.37, + "learning_rate": 4.4814685176102224e-05, + "loss": 2.4408, + "step": 2094000 + }, + { + "epoch": 10.38, + "learning_rate": 4.481344658967614e-05, + "loss": 2.4208, + "step": 2094500 + }, + { + "epoch": 10.38, + "learning_rate": 4.481220800325005e-05, + "loss": 2.3977, + "step": 2095000 + }, + { + "epoch": 10.38, + "learning_rate": 4.481097189399682e-05, + "loss": 2.4398, + "step": 2095500 + }, + { + "epoch": 10.38, + "learning_rate": 4.480973330757074e-05, + "loss": 2.4252, + "step": 2096000 + }, + { + "epoch": 10.39, + "learning_rate": 4.4808494721144654e-05, + "loss": 2.4061, + "step": 2096500 + }, + { + "epoch": 10.39, + "learning_rate": 4.480725613471857e-05, + "loss": 2.4416, + "step": 2097000 + }, + { + "epoch": 10.39, + "learning_rate": 4.480601754829249e-05, + "loss": 2.4346, + "step": 2097500 + }, + { + "epoch": 10.39, + "learning_rate": 4.4804778961866405e-05, + "loss": 2.4364, + "step": 2098000 + }, + { + "epoch": 10.4, + "learning_rate": 4.4803540375440315e-05, + "loss": 2.4197, + "step": 2098500 + }, + { + "epoch": 10.4, + "learning_rate": 4.480230178901423e-05, + "loss": 2.423, + "step": 2099000 + }, + { + "epoch": 10.4, + "learning_rate": 4.480106320258815e-05, + "loss": 2.4381, + "step": 2099500 + }, + { + "epoch": 10.4, + "learning_rate": 4.4799824616162066e-05, + "loss": 2.4665, + "step": 2100000 + }, + { + "epoch": 10.41, + "learning_rate": 4.479858602973598e-05, + "loss": 2.4273, + "step": 2100500 + }, + { + "epoch": 10.41, + "learning_rate": 4.479734992048276e-05, + "loss": 2.4372, + "step": 2101000 + }, + { + "epoch": 10.41, + "learning_rate": 4.479611133405667e-05, + "loss": 2.429, + "step": 2101500 + }, + { + "epoch": 10.41, + "learning_rate": 4.4794872747630585e-05, + "loss": 2.4399, + "step": 2102000 + }, + { + "epoch": 10.42, + "learning_rate": 4.47936341612045e-05, + "loss": 2.4392, + "step": 2102500 + }, + { + "epoch": 10.42, + "learning_rate": 4.479239805195127e-05, + "loss": 2.4241, + "step": 2103000 + }, + { + "epoch": 10.42, + "learning_rate": 4.479115946552519e-05, + "loss": 2.4679, + "step": 2103500 + }, + { + "epoch": 10.42, + "learning_rate": 4.4789920879099105e-05, + "loss": 2.4386, + "step": 2104000 + }, + { + "epoch": 10.43, + "learning_rate": 4.4788682292673015e-05, + "loss": 2.4394, + "step": 2104500 + }, + { + "epoch": 10.43, + "learning_rate": 4.478744370624693e-05, + "loss": 2.4168, + "step": 2105000 + }, + { + "epoch": 10.43, + "learning_rate": 4.478620511982085e-05, + "loss": 2.4562, + "step": 2105500 + }, + { + "epoch": 10.43, + "learning_rate": 4.4784969010567625e-05, + "loss": 2.4321, + "step": 2106000 + }, + { + "epoch": 10.44, + "learning_rate": 4.478373042414154e-05, + "loss": 2.4339, + "step": 2106500 + }, + { + "epoch": 10.44, + "learning_rate": 4.478249183771546e-05, + "loss": 2.4364, + "step": 2107000 + }, + { + "epoch": 10.44, + "learning_rate": 4.478125325128937e-05, + "loss": 2.3967, + "step": 2107500 + }, + { + "epoch": 10.44, + "learning_rate": 4.4780014664863286e-05, + "loss": 2.4516, + "step": 2108000 + }, + { + "epoch": 10.45, + "learning_rate": 4.47787760784372e-05, + "loss": 2.4217, + "step": 2108500 + }, + { + "epoch": 10.45, + "learning_rate": 4.477753749201112e-05, + "loss": 2.418, + "step": 2109000 + }, + { + "epoch": 10.45, + "learning_rate": 4.4776298905585036e-05, + "loss": 2.4375, + "step": 2109500 + }, + { + "epoch": 10.45, + "learning_rate": 4.477506031915895e-05, + "loss": 2.4179, + "step": 2110000 + }, + { + "epoch": 10.46, + "learning_rate": 4.477382173273287e-05, + "loss": 2.4415, + "step": 2110500 + }, + { + "epoch": 10.46, + "learning_rate": 4.477258562347963e-05, + "loss": 2.4335, + "step": 2111000 + }, + { + "epoch": 10.46, + "learning_rate": 4.477134703705355e-05, + "loss": 2.4394, + "step": 2111500 + }, + { + "epoch": 10.46, + "learning_rate": 4.4770108450627466e-05, + "loss": 2.441, + "step": 2112000 + }, + { + "epoch": 10.47, + "learning_rate": 4.476886986420138e-05, + "loss": 2.4271, + "step": 2112500 + }, + { + "epoch": 10.47, + "learning_rate": 4.47676312777753e-05, + "loss": 2.4607, + "step": 2113000 + }, + { + "epoch": 10.47, + "learning_rate": 4.476639269134922e-05, + "loss": 2.4462, + "step": 2113500 + }, + { + "epoch": 10.47, + "learning_rate": 4.4765154104923134e-05, + "loss": 2.4493, + "step": 2114000 + }, + { + "epoch": 10.48, + "learning_rate": 4.47639179956699e-05, + "loss": 2.4344, + "step": 2114500 + }, + { + "epoch": 10.48, + "learning_rate": 4.476267940924382e-05, + "loss": 2.4534, + "step": 2115000 + }, + { + "epoch": 10.48, + "learning_rate": 4.4761440822817736e-05, + "loss": 2.4273, + "step": 2115500 + }, + { + "epoch": 10.48, + "learning_rate": 4.476020223639165e-05, + "loss": 2.4293, + "step": 2116000 + }, + { + "epoch": 10.49, + "learning_rate": 4.475896612713842e-05, + "loss": 2.4547, + "step": 2116500 + }, + { + "epoch": 10.49, + "learning_rate": 4.475773001788519e-05, + "loss": 2.4524, + "step": 2117000 + }, + { + "epoch": 10.49, + "learning_rate": 4.475649143145911e-05, + "loss": 2.4532, + "step": 2117500 + }, + { + "epoch": 10.49, + "learning_rate": 4.4755252845033025e-05, + "loss": 2.4419, + "step": 2118000 + }, + { + "epoch": 10.5, + "learning_rate": 4.475401425860694e-05, + "loss": 2.4467, + "step": 2118500 + }, + { + "epoch": 10.5, + "learning_rate": 4.475277567218086e-05, + "loss": 2.4355, + "step": 2119000 + }, + { + "epoch": 10.5, + "learning_rate": 4.475153956292763e-05, + "loss": 2.4465, + "step": 2119500 + }, + { + "epoch": 10.5, + "learning_rate": 4.4750300976501544e-05, + "loss": 2.4484, + "step": 2120000 + }, + { + "epoch": 10.51, + "learning_rate": 4.474906239007546e-05, + "loss": 2.4718, + "step": 2120500 + }, + { + "epoch": 10.51, + "learning_rate": 4.474782380364937e-05, + "loss": 2.4429, + "step": 2121000 + }, + { + "epoch": 10.51, + "learning_rate": 4.474658521722329e-05, + "loss": 2.461, + "step": 2121500 + }, + { + "epoch": 10.51, + "learning_rate": 4.4745346630797205e-05, + "loss": 2.4328, + "step": 2122000 + }, + { + "epoch": 10.52, + "learning_rate": 4.474410804437112e-05, + "loss": 2.457, + "step": 2122500 + }, + { + "epoch": 10.52, + "learning_rate": 4.474286945794504e-05, + "loss": 2.4409, + "step": 2123000 + }, + { + "epoch": 10.52, + "learning_rate": 4.474163087151895e-05, + "loss": 2.4528, + "step": 2123500 + }, + { + "epoch": 10.52, + "learning_rate": 4.4740392285092866e-05, + "loss": 2.4398, + "step": 2124000 + }, + { + "epoch": 10.53, + "learning_rate": 4.473915617583964e-05, + "loss": 2.419, + "step": 2124500 + }, + { + "epoch": 10.53, + "learning_rate": 4.473791758941356e-05, + "loss": 2.4351, + "step": 2125000 + }, + { + "epoch": 10.53, + "learning_rate": 4.4736679002987476e-05, + "loss": 2.4138, + "step": 2125500 + }, + { + "epoch": 10.53, + "learning_rate": 4.473544041656139e-05, + "loss": 2.4318, + "step": 2126000 + }, + { + "epoch": 10.54, + "learning_rate": 4.47342018301353e-05, + "loss": 2.4301, + "step": 2126500 + }, + { + "epoch": 10.54, + "learning_rate": 4.473296324370922e-05, + "loss": 2.4201, + "step": 2127000 + }, + { + "epoch": 10.54, + "learning_rate": 4.473172465728314e-05, + "loss": 2.439, + "step": 2127500 + }, + { + "epoch": 10.54, + "learning_rate": 4.4730488548029905e-05, + "loss": 2.4401, + "step": 2128000 + }, + { + "epoch": 10.55, + "learning_rate": 4.472924996160382e-05, + "loss": 2.4265, + "step": 2128500 + }, + { + "epoch": 10.55, + "learning_rate": 4.472801137517774e-05, + "loss": 2.4333, + "step": 2129000 + }, + { + "epoch": 10.55, + "learning_rate": 4.4726772788751656e-05, + "loss": 2.4448, + "step": 2129500 + }, + { + "epoch": 10.55, + "learning_rate": 4.4725534202325566e-05, + "loss": 2.4431, + "step": 2130000 + }, + { + "epoch": 10.56, + "learning_rate": 4.472429561589948e-05, + "loss": 2.4097, + "step": 2130500 + }, + { + "epoch": 10.56, + "learning_rate": 4.47230570294734e-05, + "loss": 2.4432, + "step": 2131000 + }, + { + "epoch": 10.56, + "learning_rate": 4.472181844304732e-05, + "loss": 2.4459, + "step": 2131500 + }, + { + "epoch": 10.56, + "learning_rate": 4.472058233379409e-05, + "loss": 2.4461, + "step": 2132000 + }, + { + "epoch": 10.57, + "learning_rate": 4.4719343747368e-05, + "loss": 2.4396, + "step": 2132500 + }, + { + "epoch": 10.57, + "learning_rate": 4.471810763811478e-05, + "loss": 2.4402, + "step": 2133000 + }, + { + "epoch": 10.57, + "learning_rate": 4.4716869051688695e-05, + "loss": 2.4444, + "step": 2133500 + }, + { + "epoch": 10.57, + "learning_rate": 4.471563046526261e-05, + "loss": 2.4261, + "step": 2134000 + }, + { + "epoch": 10.58, + "learning_rate": 4.471439187883652e-05, + "loss": 2.4395, + "step": 2134500 + }, + { + "epoch": 10.58, + "learning_rate": 4.471315329241044e-05, + "loss": 2.4222, + "step": 2135000 + }, + { + "epoch": 10.58, + "learning_rate": 4.4711914705984356e-05, + "loss": 2.4348, + "step": 2135500 + }, + { + "epoch": 10.58, + "learning_rate": 4.4710678596731125e-05, + "loss": 2.4261, + "step": 2136000 + }, + { + "epoch": 10.58, + "learning_rate": 4.470944001030504e-05, + "loss": 2.4462, + "step": 2136500 + }, + { + "epoch": 10.59, + "learning_rate": 4.470820142387896e-05, + "loss": 2.4554, + "step": 2137000 + }, + { + "epoch": 10.59, + "learning_rate": 4.4706962837452876e-05, + "loss": 2.4547, + "step": 2137500 + }, + { + "epoch": 10.59, + "learning_rate": 4.470572425102679e-05, + "loss": 2.4258, + "step": 2138000 + }, + { + "epoch": 10.59, + "learning_rate": 4.470448566460071e-05, + "loss": 2.4368, + "step": 2138500 + }, + { + "epoch": 10.6, + "learning_rate": 4.470324707817462e-05, + "loss": 2.4417, + "step": 2139000 + }, + { + "epoch": 10.6, + "learning_rate": 4.470200849174854e-05, + "loss": 2.4279, + "step": 2139500 + }, + { + "epoch": 10.6, + "learning_rate": 4.470077238249531e-05, + "loss": 2.4251, + "step": 2140000 + }, + { + "epoch": 10.6, + "learning_rate": 4.469953379606923e-05, + "loss": 2.4378, + "step": 2140500 + }, + { + "epoch": 10.61, + "learning_rate": 4.469829520964314e-05, + "loss": 2.4518, + "step": 2141000 + }, + { + "epoch": 10.61, + "learning_rate": 4.4697056623217057e-05, + "loss": 2.4638, + "step": 2141500 + }, + { + "epoch": 10.61, + "learning_rate": 4.4695818036790973e-05, + "loss": 2.4405, + "step": 2142000 + }, + { + "epoch": 10.61, + "learning_rate": 4.469457945036489e-05, + "loss": 2.444, + "step": 2142500 + }, + { + "epoch": 10.62, + "learning_rate": 4.469334334111166e-05, + "loss": 2.4427, + "step": 2143000 + }, + { + "epoch": 10.62, + "learning_rate": 4.4692104754685576e-05, + "loss": 2.4335, + "step": 2143500 + }, + { + "epoch": 10.62, + "learning_rate": 4.469086616825949e-05, + "loss": 2.4229, + "step": 2144000 + }, + { + "epoch": 10.62, + "learning_rate": 4.468962758183341e-05, + "loss": 2.4291, + "step": 2144500 + }, + { + "epoch": 10.63, + "learning_rate": 4.468838899540732e-05, + "loss": 2.4256, + "step": 2145000 + }, + { + "epoch": 10.63, + "learning_rate": 4.4687152886154096e-05, + "loss": 2.4089, + "step": 2145500 + }, + { + "epoch": 10.63, + "learning_rate": 4.468591429972801e-05, + "loss": 2.4351, + "step": 2146000 + }, + { + "epoch": 10.63, + "learning_rate": 4.468467571330193e-05, + "loss": 2.4628, + "step": 2146500 + }, + { + "epoch": 10.64, + "learning_rate": 4.468343960404869e-05, + "loss": 2.4254, + "step": 2147000 + }, + { + "epoch": 10.64, + "learning_rate": 4.468220349479546e-05, + "loss": 2.4318, + "step": 2147500 + }, + { + "epoch": 10.64, + "learning_rate": 4.468096490836938e-05, + "loss": 2.4339, + "step": 2148000 + }, + { + "epoch": 10.64, + "learning_rate": 4.4679726321943294e-05, + "loss": 2.4517, + "step": 2148500 + }, + { + "epoch": 10.65, + "learning_rate": 4.467848773551721e-05, + "loss": 2.4626, + "step": 2149000 + }, + { + "epoch": 10.65, + "learning_rate": 4.467725162626398e-05, + "loss": 2.4349, + "step": 2149500 + }, + { + "epoch": 10.65, + "learning_rate": 4.467601551701075e-05, + "loss": 2.4428, + "step": 2150000 + }, + { + "epoch": 10.65, + "learning_rate": 4.4674776930584666e-05, + "loss": 2.4328, + "step": 2150500 + }, + { + "epoch": 10.66, + "learning_rate": 4.467353834415858e-05, + "loss": 2.4354, + "step": 2151000 + }, + { + "epoch": 10.66, + "learning_rate": 4.46722997577325e-05, + "loss": 2.4413, + "step": 2151500 + }, + { + "epoch": 10.66, + "learning_rate": 4.467106117130641e-05, + "loss": 2.4063, + "step": 2152000 + }, + { + "epoch": 10.66, + "learning_rate": 4.466982258488033e-05, + "loss": 2.4428, + "step": 2152500 + }, + { + "epoch": 10.67, + "learning_rate": 4.4668583998454244e-05, + "loss": 2.4312, + "step": 2153000 + }, + { + "epoch": 10.67, + "learning_rate": 4.466734541202816e-05, + "loss": 2.4444, + "step": 2153500 + }, + { + "epoch": 10.67, + "learning_rate": 4.466610682560208e-05, + "loss": 2.4416, + "step": 2154000 + }, + { + "epoch": 10.67, + "learning_rate": 4.4664868239175994e-05, + "loss": 2.4138, + "step": 2154500 + }, + { + "epoch": 10.68, + "learning_rate": 4.466362965274991e-05, + "loss": 2.4313, + "step": 2155000 + }, + { + "epoch": 10.68, + "learning_rate": 4.466239106632383e-05, + "loss": 2.419, + "step": 2155500 + }, + { + "epoch": 10.68, + "learning_rate": 4.4661152479897745e-05, + "loss": 2.4566, + "step": 2156000 + }, + { + "epoch": 10.68, + "learning_rate": 4.4659916370644514e-05, + "loss": 2.4375, + "step": 2156500 + }, + { + "epoch": 10.69, + "learning_rate": 4.465867778421843e-05, + "loss": 2.4304, + "step": 2157000 + }, + { + "epoch": 10.69, + "learning_rate": 4.465743919779235e-05, + "loss": 2.4378, + "step": 2157500 + }, + { + "epoch": 10.69, + "learning_rate": 4.465620061136626e-05, + "loss": 2.4383, + "step": 2158000 + }, + { + "epoch": 10.69, + "learning_rate": 4.4654962024940175e-05, + "loss": 2.4265, + "step": 2158500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4653725915686944e-05, + "loss": 2.4481, + "step": 2159000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465248732926086e-05, + "loss": 2.4098, + "step": 2159500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4651251220007636e-05, + "loss": 2.4331, + "step": 2160000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465001263358155e-05, + "loss": 2.419, + "step": 2160500 + }, + { + "epoch": 10.71, + "learning_rate": 4.464877404715547e-05, + "loss": 2.43, + "step": 2161000 + }, + { + "epoch": 10.71, + "learning_rate": 4.464753546072938e-05, + "loss": 2.4379, + "step": 2161500 + }, + { + "epoch": 10.71, + "learning_rate": 4.46462968743033e-05, + "loss": 2.4677, + "step": 2162000 + }, + { + "epoch": 10.71, + "learning_rate": 4.4645058287877214e-05, + "loss": 2.431, + "step": 2162500 + }, + { + "epoch": 10.72, + "learning_rate": 4.464381970145113e-05, + "loss": 2.4455, + "step": 2163000 + }, + { + "epoch": 10.72, + "learning_rate": 4.464258111502505e-05, + "loss": 2.4502, + "step": 2163500 + }, + { + "epoch": 10.72, + "learning_rate": 4.4641342528598965e-05, + "loss": 2.4302, + "step": 2164000 + }, + { + "epoch": 10.72, + "learning_rate": 4.464010641934573e-05, + "loss": 2.4198, + "step": 2164500 + }, + { + "epoch": 10.73, + "learning_rate": 4.4638867832919644e-05, + "loss": 2.4332, + "step": 2165000 + }, + { + "epoch": 10.73, + "learning_rate": 4.463762924649356e-05, + "loss": 2.4356, + "step": 2165500 + }, + { + "epoch": 10.73, + "learning_rate": 4.463639066006748e-05, + "loss": 2.422, + "step": 2166000 + }, + { + "epoch": 10.73, + "learning_rate": 4.4635152073641395e-05, + "loss": 2.4193, + "step": 2166500 + }, + { + "epoch": 10.74, + "learning_rate": 4.463391348721531e-05, + "loss": 2.4501, + "step": 2167000 + }, + { + "epoch": 10.74, + "learning_rate": 4.463267490078923e-05, + "loss": 2.4621, + "step": 2167500 + }, + { + "epoch": 10.74, + "learning_rate": 4.4631436314363145e-05, + "loss": 2.4338, + "step": 2168000 + }, + { + "epoch": 10.74, + "learning_rate": 4.4630200205109914e-05, + "loss": 2.4167, + "step": 2168500 + }, + { + "epoch": 10.75, + "learning_rate": 4.462896161868383e-05, + "loss": 2.4113, + "step": 2169000 + }, + { + "epoch": 10.75, + "learning_rate": 4.462772303225775e-05, + "loss": 2.4667, + "step": 2169500 + }, + { + "epoch": 10.75, + "learning_rate": 4.462648692300452e-05, + "loss": 2.4525, + "step": 2170000 + }, + { + "epoch": 10.75, + "learning_rate": 4.4625248336578434e-05, + "loss": 2.4503, + "step": 2170500 + }, + { + "epoch": 10.76, + "learning_rate": 4.4624009750152344e-05, + "loss": 2.4539, + "step": 2171000 + }, + { + "epoch": 10.76, + "learning_rate": 4.462277116372626e-05, + "loss": 2.4368, + "step": 2171500 + }, + { + "epoch": 10.76, + "learning_rate": 4.462153257730018e-05, + "loss": 2.4427, + "step": 2172000 + }, + { + "epoch": 10.76, + "learning_rate": 4.4620293990874095e-05, + "loss": 2.4173, + "step": 2172500 + }, + { + "epoch": 10.77, + "learning_rate": 4.461905788162087e-05, + "loss": 2.4316, + "step": 2173000 + }, + { + "epoch": 10.77, + "learning_rate": 4.461781929519479e-05, + "loss": 2.4271, + "step": 2173500 + }, + { + "epoch": 10.77, + "learning_rate": 4.461658318594155e-05, + "loss": 2.4018, + "step": 2174000 + }, + { + "epoch": 10.77, + "learning_rate": 4.4615344599515466e-05, + "loss": 2.427, + "step": 2174500 + }, + { + "epoch": 10.78, + "learning_rate": 4.461410601308938e-05, + "loss": 2.4359, + "step": 2175000 + }, + { + "epoch": 10.78, + "learning_rate": 4.46128674266633e-05, + "loss": 2.4292, + "step": 2175500 + }, + { + "epoch": 10.78, + "learning_rate": 4.461163131741007e-05, + "loss": 2.4656, + "step": 2176000 + }, + { + "epoch": 10.78, + "learning_rate": 4.4610392730983986e-05, + "loss": 2.4482, + "step": 2176500 + }, + { + "epoch": 10.79, + "learning_rate": 4.46091541445579e-05, + "loss": 2.4331, + "step": 2177000 + }, + { + "epoch": 10.79, + "learning_rate": 4.460791555813182e-05, + "loss": 2.4638, + "step": 2177500 + }, + { + "epoch": 10.79, + "learning_rate": 4.4606676971705737e-05, + "loss": 2.4594, + "step": 2178000 + }, + { + "epoch": 10.79, + "learning_rate": 4.4605438385279654e-05, + "loss": 2.4464, + "step": 2178500 + }, + { + "epoch": 10.8, + "learning_rate": 4.460419979885357e-05, + "loss": 2.4558, + "step": 2179000 + }, + { + "epoch": 10.8, + "learning_rate": 4.460296121242749e-05, + "loss": 2.4394, + "step": 2179500 + }, + { + "epoch": 10.8, + "learning_rate": 4.46017226260014e-05, + "loss": 2.4307, + "step": 2180000 + }, + { + "epoch": 10.8, + "learning_rate": 4.4600484039575314e-05, + "loss": 2.4527, + "step": 2180500 + }, + { + "epoch": 10.81, + "learning_rate": 4.459924545314923e-05, + "loss": 2.4379, + "step": 2181000 + }, + { + "epoch": 10.81, + "learning_rate": 4.459800686672315e-05, + "loss": 2.4372, + "step": 2181500 + }, + { + "epoch": 10.81, + "learning_rate": 4.4596768280297065e-05, + "loss": 2.4373, + "step": 2182000 + }, + { + "epoch": 10.81, + "learning_rate": 4.4595532171043834e-05, + "loss": 2.4486, + "step": 2182500 + }, + { + "epoch": 10.82, + "learning_rate": 4.459429358461775e-05, + "loss": 2.4385, + "step": 2183000 + }, + { + "epoch": 10.82, + "learning_rate": 4.459305747536452e-05, + "loss": 2.4328, + "step": 2183500 + }, + { + "epoch": 10.82, + "learning_rate": 4.459181888893844e-05, + "loss": 2.4374, + "step": 2184000 + }, + { + "epoch": 10.82, + "learning_rate": 4.4590580302512354e-05, + "loss": 2.4451, + "step": 2184500 + }, + { + "epoch": 10.83, + "learning_rate": 4.458934171608627e-05, + "loss": 2.4589, + "step": 2185000 + }, + { + "epoch": 10.83, + "learning_rate": 4.458810312966019e-05, + "loss": 2.451, + "step": 2185500 + }, + { + "epoch": 10.83, + "learning_rate": 4.458686702040695e-05, + "loss": 2.418, + "step": 2186000 + }, + { + "epoch": 10.83, + "learning_rate": 4.4585628433980866e-05, + "loss": 2.4478, + "step": 2186500 + }, + { + "epoch": 10.84, + "learning_rate": 4.4584389847554783e-05, + "loss": 2.4271, + "step": 2187000 + }, + { + "epoch": 10.84, + "learning_rate": 4.45831512611287e-05, + "loss": 2.4425, + "step": 2187500 + }, + { + "epoch": 10.84, + "learning_rate": 4.458191267470262e-05, + "loss": 2.452, + "step": 2188000 + }, + { + "epoch": 10.84, + "learning_rate": 4.4580674088276534e-05, + "loss": 2.4415, + "step": 2188500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457943550185045e-05, + "loss": 2.4458, + "step": 2189000 + }, + { + "epoch": 10.85, + "learning_rate": 4.457819691542436e-05, + "loss": 2.4406, + "step": 2189500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457696080617114e-05, + "loss": 2.4222, + "step": 2190000 + }, + { + "epoch": 10.85, + "learning_rate": 4.4575722219745054e-05, + "loss": 2.458, + "step": 2190500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457448363331897e-05, + "loss": 2.4399, + "step": 2191000 + }, + { + "epoch": 10.86, + "learning_rate": 4.457324504689289e-05, + "loss": 2.4346, + "step": 2191500 + }, + { + "epoch": 10.86, + "learning_rate": 4.4572008937639656e-05, + "loss": 2.4664, + "step": 2192000 + }, + { + "epoch": 10.86, + "learning_rate": 4.4570770351213567e-05, + "loss": 2.4396, + "step": 2192500 + }, + { + "epoch": 10.86, + "learning_rate": 4.4569531764787484e-05, + "loss": 2.4306, + "step": 2193000 + }, + { + "epoch": 10.87, + "learning_rate": 4.45682931783614e-05, + "loss": 2.4317, + "step": 2193500 + }, + { + "epoch": 10.87, + "learning_rate": 4.456705459193532e-05, + "loss": 2.4156, + "step": 2194000 + }, + { + "epoch": 10.87, + "learning_rate": 4.4565816005509234e-05, + "loss": 2.444, + "step": 2194500 + }, + { + "epoch": 10.87, + "learning_rate": 4.456457741908315e-05, + "loss": 2.4486, + "step": 2195000 + }, + { + "epoch": 10.88, + "learning_rate": 4.456333883265707e-05, + "loss": 2.4602, + "step": 2195500 + }, + { + "epoch": 10.88, + "learning_rate": 4.456210024623098e-05, + "loss": 2.4304, + "step": 2196000 + }, + { + "epoch": 10.88, + "learning_rate": 4.4560861659804895e-05, + "loss": 2.4429, + "step": 2196500 + }, + { + "epoch": 10.88, + "learning_rate": 4.455962307337881e-05, + "loss": 2.4199, + "step": 2197000 + }, + { + "epoch": 10.89, + "learning_rate": 4.455838448695273e-05, + "loss": 2.4348, + "step": 2197500 + }, + { + "epoch": 10.89, + "learning_rate": 4.4557145900526646e-05, + "loss": 2.4346, + "step": 2198000 + }, + { + "epoch": 10.89, + "learning_rate": 4.455590979127342e-05, + "loss": 2.4507, + "step": 2198500 + }, + { + "epoch": 10.89, + "learning_rate": 4.455467120484733e-05, + "loss": 2.443, + "step": 2199000 + }, + { + "epoch": 10.9, + "learning_rate": 4.455343261842125e-05, + "loss": 2.4219, + "step": 2199500 + }, + { + "epoch": 10.9, + "learning_rate": 4.455219650916802e-05, + "loss": 2.4231, + "step": 2200000 + }, + { + "epoch": 10.9, + "learning_rate": 4.4550957922741934e-05, + "loss": 2.4443, + "step": 2200500 + }, + { + "epoch": 10.9, + "learning_rate": 4.454971933631585e-05, + "loss": 2.4357, + "step": 2201000 + }, + { + "epoch": 10.91, + "learning_rate": 4.454848570423547e-05, + "loss": 2.4494, + "step": 2201500 + }, + { + "epoch": 10.91, + "learning_rate": 4.454724711780939e-05, + "loss": 2.4197, + "step": 2202000 + }, + { + "epoch": 10.91, + "learning_rate": 4.4546008531383306e-05, + "loss": 2.4512, + "step": 2202500 + }, + { + "epoch": 10.91, + "learning_rate": 4.454476994495722e-05, + "loss": 2.445, + "step": 2203000 + }, + { + "epoch": 10.92, + "learning_rate": 4.454353135853114e-05, + "loss": 2.449, + "step": 2203500 + }, + { + "epoch": 10.92, + "learning_rate": 4.45422952492779e-05, + "loss": 2.4383, + "step": 2204000 + }, + { + "epoch": 10.92, + "learning_rate": 4.454105666285182e-05, + "loss": 2.4595, + "step": 2204500 + }, + { + "epoch": 10.92, + "learning_rate": 4.4539818076425736e-05, + "loss": 2.4192, + "step": 2205000 + }, + { + "epoch": 10.93, + "learning_rate": 4.453857948999965e-05, + "loss": 2.4381, + "step": 2205500 + }, + { + "epoch": 10.93, + "learning_rate": 4.453734090357357e-05, + "loss": 2.429, + "step": 2206000 + }, + { + "epoch": 10.93, + "learning_rate": 4.453610479432034e-05, + "loss": 2.4534, + "step": 2206500 + }, + { + "epoch": 10.93, + "learning_rate": 4.4534866207894255e-05, + "loss": 2.4423, + "step": 2207000 + }, + { + "epoch": 10.94, + "learning_rate": 4.453362762146817e-05, + "loss": 2.4272, + "step": 2207500 + }, + { + "epoch": 10.94, + "learning_rate": 4.453239151221494e-05, + "loss": 2.4552, + "step": 2208000 + }, + { + "epoch": 10.94, + "learning_rate": 4.453115292578886e-05, + "loss": 2.4172, + "step": 2208500 + }, + { + "epoch": 10.94, + "learning_rate": 4.4529914339362775e-05, + "loss": 2.4602, + "step": 2209000 + }, + { + "epoch": 10.95, + "learning_rate": 4.4528675752936685e-05, + "loss": 2.4529, + "step": 2209500 + }, + { + "epoch": 10.95, + "learning_rate": 4.45274371665106e-05, + "loss": 2.4541, + "step": 2210000 + }, + { + "epoch": 10.95, + "learning_rate": 4.452619858008452e-05, + "loss": 2.4447, + "step": 2210500 + }, + { + "epoch": 10.95, + "learning_rate": 4.4524959993658436e-05, + "loss": 2.4507, + "step": 2211000 + }, + { + "epoch": 10.96, + "learning_rate": 4.452372140723235e-05, + "loss": 2.4288, + "step": 2211500 + }, + { + "epoch": 10.96, + "learning_rate": 4.452248282080627e-05, + "loss": 2.4157, + "step": 2212000 + }, + { + "epoch": 10.96, + "learning_rate": 4.4521244234380187e-05, + "loss": 2.4446, + "step": 2212500 + }, + { + "epoch": 10.96, + "learning_rate": 4.4520005647954103e-05, + "loss": 2.4222, + "step": 2213000 + }, + { + "epoch": 10.97, + "learning_rate": 4.451876953870087e-05, + "loss": 2.4498, + "step": 2213500 + }, + { + "epoch": 10.97, + "learning_rate": 4.451753095227479e-05, + "loss": 2.4253, + "step": 2214000 + }, + { + "epoch": 10.97, + "learning_rate": 4.4516292365848706e-05, + "loss": 2.4469, + "step": 2214500 + }, + { + "epoch": 10.97, + "learning_rate": 4.451505377942262e-05, + "loss": 2.4336, + "step": 2215000 + }, + { + "epoch": 10.98, + "learning_rate": 4.451381519299654e-05, + "loss": 2.4368, + "step": 2215500 + }, + { + "epoch": 10.98, + "learning_rate": 4.451257908374331e-05, + "loss": 2.4251, + "step": 2216000 + }, + { + "epoch": 10.98, + "learning_rate": 4.451134297449008e-05, + "loss": 2.4311, + "step": 2216500 + }, + { + "epoch": 10.98, + "learning_rate": 4.4510104388063995e-05, + "loss": 2.4489, + "step": 2217000 + }, + { + "epoch": 10.99, + "learning_rate": 4.450886580163791e-05, + "loss": 2.4204, + "step": 2217500 + }, + { + "epoch": 10.99, + "learning_rate": 4.450762721521183e-05, + "loss": 2.4315, + "step": 2218000 + }, + { + "epoch": 10.99, + "learning_rate": 4.450638862878574e-05, + "loss": 2.4432, + "step": 2218500 + }, + { + "epoch": 10.99, + "learning_rate": 4.4505150042359655e-05, + "loss": 2.4581, + "step": 2219000 + }, + { + "epoch": 11.0, + "learning_rate": 4.450391145593357e-05, + "loss": 2.4339, + "step": 2219500 + }, + { + "epoch": 11.0, + "learning_rate": 4.450267286950749e-05, + "loss": 2.4147, + "step": 2220000 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.6435940740781358, + "eval_accuracy_mlm": 0.596974884280312, + "eval_accuracy_nsp": 0.863742797861617, + "eval_loss": 2.406216859817505, + "eval_runtime": 146.12, + "eval_samples_per_second": 1744.86, + "eval_steps_per_second": 72.707, + "step": 2220273 + }, + { + "epoch": 11.0, + "learning_rate": 4.4501434283081406e-05, + "loss": 2.4115, + "step": 2220500 + }, + { + "epoch": 11.0, + "learning_rate": 4.450019569665532e-05, + "loss": 2.3987, + "step": 2221000 + }, + { + "epoch": 11.01, + "learning_rate": 4.449895958740209e-05, + "loss": 2.392, + "step": 2221500 + }, + { + "epoch": 11.01, + "learning_rate": 4.449772100097601e-05, + "loss": 2.3835, + "step": 2222000 + }, + { + "epoch": 11.01, + "learning_rate": 4.4496482414549926e-05, + "loss": 2.4125, + "step": 2222500 + }, + { + "epoch": 11.01, + "learning_rate": 4.4495243828123836e-05, + "loss": 2.4218, + "step": 2223000 + }, + { + "epoch": 11.02, + "learning_rate": 4.449400524169775e-05, + "loss": 2.4222, + "step": 2223500 + }, + { + "epoch": 11.02, + "learning_rate": 4.449276665527167e-05, + "loss": 2.3963, + "step": 2224000 + }, + { + "epoch": 11.02, + "learning_rate": 4.449152806884559e-05, + "loss": 2.4003, + "step": 2224500 + }, + { + "epoch": 11.02, + "learning_rate": 4.4490289482419504e-05, + "loss": 2.4131, + "step": 2225000 + }, + { + "epoch": 11.03, + "learning_rate": 4.448905089599342e-05, + "loss": 2.426, + "step": 2225500 + }, + { + "epoch": 11.03, + "learning_rate": 4.448781230956734e-05, + "loss": 2.4027, + "step": 2226000 + }, + { + "epoch": 11.03, + "learning_rate": 4.4486573723141255e-05, + "loss": 2.4331, + "step": 2226500 + }, + { + "epoch": 11.03, + "learning_rate": 4.448533761388802e-05, + "loss": 2.4009, + "step": 2227000 + }, + { + "epoch": 11.04, + "learning_rate": 4.448409902746194e-05, + "loss": 2.4009, + "step": 2227500 + }, + { + "epoch": 11.04, + "learning_rate": 4.448286044103586e-05, + "loss": 2.4034, + "step": 2228000 + }, + { + "epoch": 11.04, + "learning_rate": 4.4481621854609774e-05, + "loss": 2.4055, + "step": 2228500 + }, + { + "epoch": 11.04, + "learning_rate": 4.448038326818369e-05, + "loss": 2.3944, + "step": 2229000 + }, + { + "epoch": 11.05, + "learning_rate": 4.447914468175761e-05, + "loss": 2.4153, + "step": 2229500 + }, + { + "epoch": 11.05, + "learning_rate": 4.4477906095331525e-05, + "loss": 2.4075, + "step": 2230000 + }, + { + "epoch": 11.05, + "learning_rate": 4.447666750890544e-05, + "loss": 2.4052, + "step": 2230500 + }, + { + "epoch": 11.05, + "learning_rate": 4.447542892247936e-05, + "loss": 2.4021, + "step": 2231000 + }, + { + "epoch": 11.06, + "learning_rate": 4.447419281322612e-05, + "loss": 2.3982, + "step": 2231500 + }, + { + "epoch": 11.06, + "learning_rate": 4.447295422680004e-05, + "loss": 2.4161, + "step": 2232000 + }, + { + "epoch": 11.06, + "learning_rate": 4.4471715640373955e-05, + "loss": 2.4319, + "step": 2232500 + }, + { + "epoch": 11.06, + "learning_rate": 4.447047705394787e-05, + "loss": 2.4021, + "step": 2233000 + }, + { + "epoch": 11.07, + "learning_rate": 4.446923846752179e-05, + "loss": 2.4029, + "step": 2233500 + }, + { + "epoch": 11.07, + "learning_rate": 4.4467999881095705e-05, + "loss": 2.4136, + "step": 2234000 + }, + { + "epoch": 11.07, + "learning_rate": 4.446676129466962e-05, + "loss": 2.4027, + "step": 2234500 + }, + { + "epoch": 11.07, + "learning_rate": 4.446552518541639e-05, + "loss": 2.4079, + "step": 2235000 + }, + { + "epoch": 11.08, + "learning_rate": 4.446428659899031e-05, + "loss": 2.4259, + "step": 2235500 + }, + { + "epoch": 11.08, + "learning_rate": 4.4463048012564225e-05, + "loss": 2.4226, + "step": 2236000 + }, + { + "epoch": 11.08, + "learning_rate": 4.446180942613814e-05, + "loss": 2.4198, + "step": 2236500 + }, + { + "epoch": 11.08, + "learning_rate": 4.446057083971206e-05, + "loss": 2.3826, + "step": 2237000 + }, + { + "epoch": 11.09, + "learning_rate": 4.4459332253285976e-05, + "loss": 2.4163, + "step": 2237500 + }, + { + "epoch": 11.09, + "learning_rate": 4.445809366685989e-05, + "loss": 2.403, + "step": 2238000 + }, + { + "epoch": 11.09, + "learning_rate": 4.445685508043381e-05, + "loss": 2.4073, + "step": 2238500 + }, + { + "epoch": 11.09, + "learning_rate": 4.445561897118057e-05, + "loss": 2.4129, + "step": 2239000 + }, + { + "epoch": 11.1, + "learning_rate": 4.445438038475449e-05, + "loss": 2.4009, + "step": 2239500 + }, + { + "epoch": 11.1, + "learning_rate": 4.4453141798328406e-05, + "loss": 2.4227, + "step": 2240000 + }, + { + "epoch": 11.1, + "learning_rate": 4.445190321190232e-05, + "loss": 2.4006, + "step": 2240500 + }, + { + "epoch": 11.1, + "learning_rate": 4.445066710264909e-05, + "loss": 2.3994, + "step": 2241000 + }, + { + "epoch": 11.11, + "learning_rate": 4.444942851622301e-05, + "loss": 2.4206, + "step": 2241500 + }, + { + "epoch": 11.11, + "learning_rate": 4.4448189929796925e-05, + "loss": 2.4191, + "step": 2242000 + }, + { + "epoch": 11.11, + "learning_rate": 4.4446953820543694e-05, + "loss": 2.3991, + "step": 2242500 + }, + { + "epoch": 11.11, + "learning_rate": 4.444571523411761e-05, + "loss": 2.4203, + "step": 2243000 + }, + { + "epoch": 11.12, + "learning_rate": 4.444447664769152e-05, + "loss": 2.4075, + "step": 2243500 + }, + { + "epoch": 11.12, + "learning_rate": 4.444323806126544e-05, + "loss": 2.4173, + "step": 2244000 + }, + { + "epoch": 11.12, + "learning_rate": 4.4441999474839355e-05, + "loss": 2.4242, + "step": 2244500 + }, + { + "epoch": 11.12, + "learning_rate": 4.444076088841327e-05, + "loss": 2.4012, + "step": 2245000 + }, + { + "epoch": 11.12, + "learning_rate": 4.443952230198719e-05, + "loss": 2.4417, + "step": 2245500 + }, + { + "epoch": 11.13, + "learning_rate": 4.4438283715561106e-05, + "loss": 2.3909, + "step": 2246000 + }, + { + "epoch": 11.13, + "learning_rate": 4.443704512913502e-05, + "loss": 2.4142, + "step": 2246500 + }, + { + "epoch": 11.13, + "learning_rate": 4.443580901988179e-05, + "loss": 2.4145, + "step": 2247000 + }, + { + "epoch": 11.13, + "learning_rate": 4.443457043345571e-05, + "loss": 2.4039, + "step": 2247500 + }, + { + "epoch": 11.14, + "learning_rate": 4.4433331847029625e-05, + "loss": 2.4166, + "step": 2248000 + }, + { + "epoch": 11.14, + "learning_rate": 4.443209326060354e-05, + "loss": 2.4373, + "step": 2248500 + }, + { + "epoch": 11.14, + "learning_rate": 4.443085467417746e-05, + "loss": 2.3937, + "step": 2249000 + }, + { + "epoch": 11.14, + "learning_rate": 4.4429616087751376e-05, + "loss": 2.4121, + "step": 2249500 + }, + { + "epoch": 11.15, + "learning_rate": 4.442837997849814e-05, + "loss": 2.4216, + "step": 2250000 + }, + { + "epoch": 11.15, + "learning_rate": 4.4427141392072055e-05, + "loss": 2.4062, + "step": 2250500 + }, + { + "epoch": 11.15, + "learning_rate": 4.442590280564597e-05, + "loss": 2.3873, + "step": 2251000 + }, + { + "epoch": 11.15, + "learning_rate": 4.442466421921989e-05, + "loss": 2.42, + "step": 2251500 + }, + { + "epoch": 11.16, + "learning_rate": 4.4423425632793806e-05, + "loss": 2.3876, + "step": 2252000 + }, + { + "epoch": 11.16, + "learning_rate": 4.442218704636772e-05, + "loss": 2.4366, + "step": 2252500 + }, + { + "epoch": 11.16, + "learning_rate": 4.442095093711449e-05, + "loss": 2.4273, + "step": 2253000 + }, + { + "epoch": 11.16, + "learning_rate": 4.441971235068841e-05, + "loss": 2.4109, + "step": 2253500 + }, + { + "epoch": 11.17, + "learning_rate": 4.4418473764262325e-05, + "loss": 2.4171, + "step": 2254000 + }, + { + "epoch": 11.17, + "learning_rate": 4.441723517783624e-05, + "loss": 2.4281, + "step": 2254500 + }, + { + "epoch": 11.17, + "learning_rate": 4.441599659141016e-05, + "loss": 2.4044, + "step": 2255000 + }, + { + "epoch": 11.17, + "learning_rate": 4.441476048215693e-05, + "loss": 2.3969, + "step": 2255500 + }, + { + "epoch": 11.18, + "learning_rate": 4.4413521895730845e-05, + "loss": 2.4335, + "step": 2256000 + }, + { + "epoch": 11.18, + "learning_rate": 4.441228330930476e-05, + "loss": 2.4196, + "step": 2256500 + }, + { + "epoch": 11.18, + "learning_rate": 4.441104472287867e-05, + "loss": 2.4018, + "step": 2257000 + }, + { + "epoch": 11.18, + "learning_rate": 4.440980613645259e-05, + "loss": 2.4158, + "step": 2257500 + }, + { + "epoch": 11.19, + "learning_rate": 4.4408567550026506e-05, + "loss": 2.4362, + "step": 2258000 + }, + { + "epoch": 11.19, + "learning_rate": 4.4407331440773275e-05, + "loss": 2.4383, + "step": 2258500 + }, + { + "epoch": 11.19, + "learning_rate": 4.440609285434719e-05, + "loss": 2.4255, + "step": 2259000 + }, + { + "epoch": 11.19, + "learning_rate": 4.440485426792111e-05, + "loss": 2.4434, + "step": 2259500 + }, + { + "epoch": 11.2, + "learning_rate": 4.4403615681495026e-05, + "loss": 2.4004, + "step": 2260000 + }, + { + "epoch": 11.2, + "learning_rate": 4.440237709506894e-05, + "loss": 2.4139, + "step": 2260500 + }, + { + "epoch": 11.2, + "learning_rate": 4.440113850864286e-05, + "loss": 2.4226, + "step": 2261000 + }, + { + "epoch": 11.2, + "learning_rate": 4.439990239938963e-05, + "loss": 2.4301, + "step": 2261500 + }, + { + "epoch": 11.21, + "learning_rate": 4.4398663812963545e-05, + "loss": 2.4222, + "step": 2262000 + }, + { + "epoch": 11.21, + "learning_rate": 4.439742522653746e-05, + "loss": 2.4225, + "step": 2262500 + }, + { + "epoch": 11.21, + "learning_rate": 4.439618664011138e-05, + "loss": 2.4212, + "step": 2263000 + }, + { + "epoch": 11.21, + "learning_rate": 4.439494805368529e-05, + "loss": 2.4104, + "step": 2263500 + }, + { + "epoch": 11.22, + "learning_rate": 4.439371194443206e-05, + "loss": 2.4221, + "step": 2264000 + }, + { + "epoch": 11.22, + "learning_rate": 4.4392473358005975e-05, + "loss": 2.4218, + "step": 2264500 + }, + { + "epoch": 11.22, + "learning_rate": 4.439123477157989e-05, + "loss": 2.4348, + "step": 2265000 + }, + { + "epoch": 11.22, + "learning_rate": 4.438999618515381e-05, + "loss": 2.4062, + "step": 2265500 + }, + { + "epoch": 11.23, + "learning_rate": 4.4388757598727726e-05, + "loss": 2.4061, + "step": 2266000 + }, + { + "epoch": 11.23, + "learning_rate": 4.438751901230164e-05, + "loss": 2.4101, + "step": 2266500 + }, + { + "epoch": 11.23, + "learning_rate": 4.438628290304841e-05, + "loss": 2.4078, + "step": 2267000 + }, + { + "epoch": 11.23, + "learning_rate": 4.438504431662233e-05, + "loss": 2.4107, + "step": 2267500 + }, + { + "epoch": 11.24, + "learning_rate": 4.4383805730196245e-05, + "loss": 2.412, + "step": 2268000 + }, + { + "epoch": 11.24, + "learning_rate": 4.438256714377016e-05, + "loss": 2.4134, + "step": 2268500 + }, + { + "epoch": 11.24, + "learning_rate": 4.4381331034516924e-05, + "loss": 2.4223, + "step": 2269000 + }, + { + "epoch": 11.24, + "learning_rate": 4.438009244809084e-05, + "loss": 2.4328, + "step": 2269500 + }, + { + "epoch": 11.25, + "learning_rate": 4.437885386166476e-05, + "loss": 2.4224, + "step": 2270000 + }, + { + "epoch": 11.25, + "learning_rate": 4.4377615275238675e-05, + "loss": 2.4191, + "step": 2270500 + }, + { + "epoch": 11.25, + "learning_rate": 4.437637916598545e-05, + "loss": 2.4188, + "step": 2271000 + }, + { + "epoch": 11.25, + "learning_rate": 4.437514057955936e-05, + "loss": 2.4173, + "step": 2271500 + }, + { + "epoch": 11.26, + "learning_rate": 4.437390199313328e-05, + "loss": 2.3998, + "step": 2272000 + }, + { + "epoch": 11.26, + "learning_rate": 4.4372663406707195e-05, + "loss": 2.4385, + "step": 2272500 + }, + { + "epoch": 11.26, + "learning_rate": 4.437142482028111e-05, + "loss": 2.3898, + "step": 2273000 + }, + { + "epoch": 11.26, + "learning_rate": 4.437018623385503e-05, + "loss": 2.4457, + "step": 2273500 + }, + { + "epoch": 11.27, + "learning_rate": 4.4368947647428945e-05, + "loss": 2.4537, + "step": 2274000 + }, + { + "epoch": 11.27, + "learning_rate": 4.436770906100286e-05, + "loss": 2.4273, + "step": 2274500 + }, + { + "epoch": 11.27, + "learning_rate": 4.436647047457678e-05, + "loss": 2.4318, + "step": 2275000 + }, + { + "epoch": 11.27, + "learning_rate": 4.4365231888150696e-05, + "loss": 2.4194, + "step": 2275500 + }, + { + "epoch": 11.28, + "learning_rate": 4.436399330172461e-05, + "loss": 2.4104, + "step": 2276000 + }, + { + "epoch": 11.28, + "learning_rate": 4.4362757192471375e-05, + "loss": 2.4375, + "step": 2276500 + }, + { + "epoch": 11.28, + "learning_rate": 4.436152108321815e-05, + "loss": 2.4367, + "step": 2277000 + }, + { + "epoch": 11.28, + "learning_rate": 4.436028249679207e-05, + "loss": 2.4193, + "step": 2277500 + }, + { + "epoch": 11.29, + "learning_rate": 4.435904638753883e-05, + "loss": 2.4272, + "step": 2278000 + }, + { + "epoch": 11.29, + "learning_rate": 4.4357807801112747e-05, + "loss": 2.441, + "step": 2278500 + }, + { + "epoch": 11.29, + "learning_rate": 4.4356569214686664e-05, + "loss": 2.4035, + "step": 2279000 + }, + { + "epoch": 11.29, + "learning_rate": 4.435533062826058e-05, + "loss": 2.4012, + "step": 2279500 + }, + { + "epoch": 11.3, + "learning_rate": 4.43540920418345e-05, + "loss": 2.4267, + "step": 2280000 + }, + { + "epoch": 11.3, + "learning_rate": 4.4352853455408414e-05, + "loss": 2.4075, + "step": 2280500 + }, + { + "epoch": 11.3, + "learning_rate": 4.4351614868982324e-05, + "loss": 2.4112, + "step": 2281000 + }, + { + "epoch": 11.3, + "learning_rate": 4.435037628255624e-05, + "loss": 2.4298, + "step": 2281500 + }, + { + "epoch": 11.31, + "learning_rate": 4.434913769613016e-05, + "loss": 2.4208, + "step": 2282000 + }, + { + "epoch": 11.31, + "learning_rate": 4.4347899109704075e-05, + "loss": 2.4449, + "step": 2282500 + }, + { + "epoch": 11.31, + "learning_rate": 4.434666052327799e-05, + "loss": 2.4218, + "step": 2283000 + }, + { + "epoch": 11.31, + "learning_rate": 4.434542193685191e-05, + "loss": 2.4302, + "step": 2283500 + }, + { + "epoch": 11.32, + "learning_rate": 4.4344183350425826e-05, + "loss": 2.4257, + "step": 2284000 + }, + { + "epoch": 11.32, + "learning_rate": 4.4342947241172595e-05, + "loss": 2.4383, + "step": 2284500 + }, + { + "epoch": 11.32, + "learning_rate": 4.434170865474651e-05, + "loss": 2.3936, + "step": 2285000 + }, + { + "epoch": 11.32, + "learning_rate": 4.434047254549328e-05, + "loss": 2.4184, + "step": 2285500 + }, + { + "epoch": 11.33, + "learning_rate": 4.43392339590672e-05, + "loss": 2.413, + "step": 2286000 + }, + { + "epoch": 11.33, + "learning_rate": 4.4337995372641114e-05, + "loss": 2.4455, + "step": 2286500 + }, + { + "epoch": 11.33, + "learning_rate": 4.4336756786215025e-05, + "loss": 2.4197, + "step": 2287000 + }, + { + "epoch": 11.33, + "learning_rate": 4.433551819978894e-05, + "loss": 2.4045, + "step": 2287500 + }, + { + "epoch": 11.34, + "learning_rate": 4.433427961336286e-05, + "loss": 2.4081, + "step": 2288000 + }, + { + "epoch": 11.34, + "learning_rate": 4.4333041026936775e-05, + "loss": 2.4123, + "step": 2288500 + }, + { + "epoch": 11.34, + "learning_rate": 4.433180244051069e-05, + "loss": 2.4212, + "step": 2289000 + }, + { + "epoch": 11.34, + "learning_rate": 4.433056633125747e-05, + "loss": 2.4364, + "step": 2289500 + }, + { + "epoch": 11.35, + "learning_rate": 4.4329327744831385e-05, + "loss": 2.4244, + "step": 2290000 + }, + { + "epoch": 11.35, + "learning_rate": 4.4328091635578154e-05, + "loss": 2.439, + "step": 2290500 + }, + { + "epoch": 11.35, + "learning_rate": 4.432685304915207e-05, + "loss": 2.4081, + "step": 2291000 + }, + { + "epoch": 11.35, + "learning_rate": 4.432561446272598e-05, + "loss": 2.4084, + "step": 2291500 + }, + { + "epoch": 11.36, + "learning_rate": 4.43243758762999e-05, + "loss": 2.4155, + "step": 2292000 + }, + { + "epoch": 11.36, + "learning_rate": 4.4323137289873815e-05, + "loss": 2.4206, + "step": 2292500 + }, + { + "epoch": 11.36, + "learning_rate": 4.432189870344773e-05, + "loss": 2.4201, + "step": 2293000 + }, + { + "epoch": 11.36, + "learning_rate": 4.43206625941945e-05, + "loss": 2.4092, + "step": 2293500 + }, + { + "epoch": 11.37, + "learning_rate": 4.431942400776842e-05, + "loss": 2.4223, + "step": 2294000 + }, + { + "epoch": 11.37, + "learning_rate": 4.4318185421342334e-05, + "loss": 2.4183, + "step": 2294500 + }, + { + "epoch": 11.37, + "learning_rate": 4.431694683491625e-05, + "loss": 2.424, + "step": 2295000 + }, + { + "epoch": 11.37, + "learning_rate": 4.431570824849017e-05, + "loss": 2.4262, + "step": 2295500 + }, + { + "epoch": 11.38, + "learning_rate": 4.4314469662064085e-05, + "loss": 2.4002, + "step": 2296000 + }, + { + "epoch": 11.38, + "learning_rate": 4.4313231075637995e-05, + "loss": 2.4274, + "step": 2296500 + }, + { + "epoch": 11.38, + "learning_rate": 4.431199496638477e-05, + "loss": 2.4295, + "step": 2297000 + }, + { + "epoch": 11.38, + "learning_rate": 4.431075637995869e-05, + "loss": 2.4161, + "step": 2297500 + }, + { + "epoch": 11.39, + "learning_rate": 4.4309517793532605e-05, + "loss": 2.4199, + "step": 2298000 + }, + { + "epoch": 11.39, + "learning_rate": 4.4308279207106515e-05, + "loss": 2.4241, + "step": 2298500 + }, + { + "epoch": 11.39, + "learning_rate": 4.430704062068043e-05, + "loss": 2.4267, + "step": 2299000 + }, + { + "epoch": 11.39, + "learning_rate": 4.430580203425435e-05, + "loss": 2.4256, + "step": 2299500 + }, + { + "epoch": 11.39, + "learning_rate": 4.4304563447828265e-05, + "loss": 2.4038, + "step": 2300000 + }, + { + "epoch": 11.4, + "learning_rate": 4.4303324861402176e-05, + "loss": 2.419, + "step": 2300500 + }, + { + "epoch": 11.4, + "learning_rate": 4.430208627497609e-05, + "loss": 2.4068, + "step": 2301000 + }, + { + "epoch": 11.4, + "learning_rate": 4.430085016572287e-05, + "loss": 2.4023, + "step": 2301500 + }, + { + "epoch": 11.4, + "learning_rate": 4.4299611579296785e-05, + "loss": 2.4277, + "step": 2302000 + }, + { + "epoch": 11.41, + "learning_rate": 4.42983729928707e-05, + "loss": 2.4221, + "step": 2302500 + }, + { + "epoch": 11.41, + "learning_rate": 4.429713440644461e-05, + "loss": 2.429, + "step": 2303000 + }, + { + "epoch": 11.41, + "learning_rate": 4.429589582001853e-05, + "loss": 2.429, + "step": 2303500 + }, + { + "epoch": 11.41, + "learning_rate": 4.4294657233592446e-05, + "loss": 2.4134, + "step": 2304000 + }, + { + "epoch": 11.42, + "learning_rate": 4.429341864716636e-05, + "loss": 2.4422, + "step": 2304500 + }, + { + "epoch": 11.42, + "learning_rate": 4.429218253791313e-05, + "loss": 2.438, + "step": 2305000 + }, + { + "epoch": 11.42, + "learning_rate": 4.429094395148705e-05, + "loss": 2.4249, + "step": 2305500 + }, + { + "epoch": 11.42, + "learning_rate": 4.428970784223382e-05, + "loss": 2.4145, + "step": 2306000 + }, + { + "epoch": 11.43, + "learning_rate": 4.4288469255807734e-05, + "loss": 2.4202, + "step": 2306500 + }, + { + "epoch": 11.43, + "learning_rate": 4.428723066938165e-05, + "loss": 2.4222, + "step": 2307000 + }, + { + "epoch": 11.43, + "learning_rate": 4.428599208295557e-05, + "loss": 2.4162, + "step": 2307500 + }, + { + "epoch": 11.43, + "learning_rate": 4.428475597370234e-05, + "loss": 2.4277, + "step": 2308000 + }, + { + "epoch": 11.44, + "learning_rate": 4.4283517387276254e-05, + "loss": 2.4503, + "step": 2308500 + }, + { + "epoch": 11.44, + "learning_rate": 4.4282281278023016e-05, + "loss": 2.4174, + "step": 2309000 + }, + { + "epoch": 11.44, + "learning_rate": 4.428104269159693e-05, + "loss": 2.4125, + "step": 2309500 + }, + { + "epoch": 11.44, + "learning_rate": 4.427980410517085e-05, + "loss": 2.4267, + "step": 2310000 + }, + { + "epoch": 11.45, + "learning_rate": 4.427856551874477e-05, + "loss": 2.4294, + "step": 2310500 + }, + { + "epoch": 11.45, + "learning_rate": 4.4277326932318684e-05, + "loss": 2.4163, + "step": 2311000 + }, + { + "epoch": 11.45, + "learning_rate": 4.42760883458926e-05, + "loss": 2.406, + "step": 2311500 + }, + { + "epoch": 11.45, + "learning_rate": 4.427484975946652e-05, + "loss": 2.4035, + "step": 2312000 + }, + { + "epoch": 11.46, + "learning_rate": 4.4273611173040435e-05, + "loss": 2.4194, + "step": 2312500 + }, + { + "epoch": 11.46, + "learning_rate": 4.42723750637872e-05, + "loss": 2.4203, + "step": 2313000 + }, + { + "epoch": 11.46, + "learning_rate": 4.427113647736112e-05, + "loss": 2.4323, + "step": 2313500 + }, + { + "epoch": 11.46, + "learning_rate": 4.426989789093504e-05, + "loss": 2.4078, + "step": 2314000 + }, + { + "epoch": 11.47, + "learning_rate": 4.4268659304508954e-05, + "loss": 2.4321, + "step": 2314500 + }, + { + "epoch": 11.47, + "learning_rate": 4.426742071808287e-05, + "loss": 2.4087, + "step": 2315000 + }, + { + "epoch": 11.47, + "learning_rate": 4.426618213165679e-05, + "loss": 2.4277, + "step": 2315500 + }, + { + "epoch": 11.47, + "learning_rate": 4.4264943545230705e-05, + "loss": 2.41, + "step": 2316000 + }, + { + "epoch": 11.48, + "learning_rate": 4.426370495880462e-05, + "loss": 2.4286, + "step": 2316500 + }, + { + "epoch": 11.48, + "learning_rate": 4.426246637237854e-05, + "loss": 2.4222, + "step": 2317000 + }, + { + "epoch": 11.48, + "learning_rate": 4.4261227785952456e-05, + "loss": 2.4014, + "step": 2317500 + }, + { + "epoch": 11.48, + "learning_rate": 4.425998919952637e-05, + "loss": 2.4204, + "step": 2318000 + }, + { + "epoch": 11.49, + "learning_rate": 4.425875061310028e-05, + "loss": 2.3936, + "step": 2318500 + }, + { + "epoch": 11.49, + "learning_rate": 4.42575120266742e-05, + "loss": 2.4184, + "step": 2319000 + }, + { + "epoch": 11.49, + "learning_rate": 4.425627591742097e-05, + "loss": 2.4393, + "step": 2319500 + }, + { + "epoch": 11.49, + "learning_rate": 4.4255037330994885e-05, + "loss": 2.4238, + "step": 2320000 + }, + { + "epoch": 11.5, + "learning_rate": 4.4253801221741654e-05, + "loss": 2.4157, + "step": 2320500 + }, + { + "epoch": 11.5, + "learning_rate": 4.425256263531557e-05, + "loss": 2.4123, + "step": 2321000 + }, + { + "epoch": 11.5, + "learning_rate": 4.425132404888949e-05, + "loss": 2.4241, + "step": 2321500 + }, + { + "epoch": 11.5, + "learning_rate": 4.425008793963625e-05, + "loss": 2.4004, + "step": 2322000 + }, + { + "epoch": 11.51, + "learning_rate": 4.424884935321017e-05, + "loss": 2.4401, + "step": 2322500 + }, + { + "epoch": 11.51, + "learning_rate": 4.4247610766784084e-05, + "loss": 2.4317, + "step": 2323000 + }, + { + "epoch": 11.51, + "learning_rate": 4.4246372180358e-05, + "loss": 2.4115, + "step": 2323500 + }, + { + "epoch": 11.51, + "learning_rate": 4.424513359393192e-05, + "loss": 2.4012, + "step": 2324000 + }, + { + "epoch": 11.52, + "learning_rate": 4.424389748467869e-05, + "loss": 2.4115, + "step": 2324500 + }, + { + "epoch": 11.52, + "learning_rate": 4.4242658898252604e-05, + "loss": 2.392, + "step": 2325000 + }, + { + "epoch": 11.52, + "learning_rate": 4.424142031182652e-05, + "loss": 2.4307, + "step": 2325500 + }, + { + "epoch": 11.52, + "learning_rate": 4.424018172540044e-05, + "loss": 2.4094, + "step": 2326000 + }, + { + "epoch": 11.53, + "learning_rate": 4.4238943138974354e-05, + "loss": 2.4216, + "step": 2326500 + }, + { + "epoch": 11.53, + "learning_rate": 4.423770455254827e-05, + "loss": 2.4106, + "step": 2327000 + }, + { + "epoch": 11.53, + "learning_rate": 4.423646596612219e-05, + "loss": 2.4049, + "step": 2327500 + }, + { + "epoch": 11.53, + "learning_rate": 4.4235227379696105e-05, + "loss": 2.4113, + "step": 2328000 + }, + { + "epoch": 11.54, + "learning_rate": 4.423398879327002e-05, + "loss": 2.4132, + "step": 2328500 + }, + { + "epoch": 11.54, + "learning_rate": 4.423275020684394e-05, + "loss": 2.4204, + "step": 2329000 + }, + { + "epoch": 11.54, + "learning_rate": 4.42315140975907e-05, + "loss": 2.4299, + "step": 2329500 + }, + { + "epoch": 11.54, + "learning_rate": 4.423027551116462e-05, + "loss": 2.419, + "step": 2330000 + }, + { + "epoch": 11.55, + "learning_rate": 4.4229036924738535e-05, + "loss": 2.4446, + "step": 2330500 + }, + { + "epoch": 11.55, + "learning_rate": 4.422779833831245e-05, + "loss": 2.4427, + "step": 2331000 + }, + { + "epoch": 11.55, + "learning_rate": 4.422655975188637e-05, + "loss": 2.4376, + "step": 2331500 + }, + { + "epoch": 11.55, + "learning_rate": 4.4225321165460286e-05, + "loss": 2.4157, + "step": 2332000 + }, + { + "epoch": 11.56, + "learning_rate": 4.42240825790342e-05, + "loss": 2.4434, + "step": 2332500 + }, + { + "epoch": 11.56, + "learning_rate": 4.422284399260812e-05, + "loss": 2.4163, + "step": 2333000 + }, + { + "epoch": 11.56, + "learning_rate": 4.4221605406182036e-05, + "loss": 2.4218, + "step": 2333500 + }, + { + "epoch": 11.56, + "learning_rate": 4.4220369296928805e-05, + "loss": 2.4099, + "step": 2334000 + }, + { + "epoch": 11.57, + "learning_rate": 4.421913071050272e-05, + "loss": 2.4475, + "step": 2334500 + }, + { + "epoch": 11.57, + "learning_rate": 4.421789460124949e-05, + "loss": 2.4169, + "step": 2335000 + }, + { + "epoch": 11.57, + "learning_rate": 4.42166560148234e-05, + "loss": 2.4357, + "step": 2335500 + }, + { + "epoch": 11.57, + "learning_rate": 4.421541742839732e-05, + "loss": 2.4186, + "step": 2336000 + }, + { + "epoch": 11.58, + "learning_rate": 4.4214178841971235e-05, + "loss": 2.438, + "step": 2336500 + }, + { + "epoch": 11.58, + "learning_rate": 4.421294025554515e-05, + "loss": 2.4189, + "step": 2337000 + }, + { + "epoch": 11.58, + "learning_rate": 4.421170414629192e-05, + "loss": 2.4401, + "step": 2337500 + }, + { + "epoch": 11.58, + "learning_rate": 4.421046555986584e-05, + "loss": 2.4355, + "step": 2338000 + }, + { + "epoch": 11.59, + "learning_rate": 4.4209226973439755e-05, + "loss": 2.4278, + "step": 2338500 + }, + { + "epoch": 11.59, + "learning_rate": 4.420798838701367e-05, + "loss": 2.4345, + "step": 2339000 + }, + { + "epoch": 11.59, + "learning_rate": 4.420674980058759e-05, + "loss": 2.4363, + "step": 2339500 + }, + { + "epoch": 11.59, + "learning_rate": 4.4205511214161505e-05, + "loss": 2.4035, + "step": 2340000 + }, + { + "epoch": 11.6, + "learning_rate": 4.4204275104908274e-05, + "loss": 2.4417, + "step": 2340500 + }, + { + "epoch": 11.6, + "learning_rate": 4.420303651848219e-05, + "loss": 2.4219, + "step": 2341000 + }, + { + "epoch": 11.6, + "learning_rate": 4.420179793205611e-05, + "loss": 2.4465, + "step": 2341500 + }, + { + "epoch": 11.6, + "learning_rate": 4.420056182280287e-05, + "loss": 2.4282, + "step": 2342000 + }, + { + "epoch": 11.61, + "learning_rate": 4.419932323637679e-05, + "loss": 2.4328, + "step": 2342500 + }, + { + "epoch": 11.61, + "learning_rate": 4.4198084649950704e-05, + "loss": 2.4098, + "step": 2343000 + }, + { + "epoch": 11.61, + "learning_rate": 4.419684606352462e-05, + "loss": 2.4214, + "step": 2343500 + }, + { + "epoch": 11.61, + "learning_rate": 4.419560747709854e-05, + "loss": 2.442, + "step": 2344000 + }, + { + "epoch": 11.62, + "learning_rate": 4.4194368890672455e-05, + "loss": 2.4259, + "step": 2344500 + }, + { + "epoch": 11.62, + "learning_rate": 4.419313030424637e-05, + "loss": 2.4322, + "step": 2345000 + }, + { + "epoch": 11.62, + "learning_rate": 4.419189171782029e-05, + "loss": 2.4194, + "step": 2345500 + }, + { + "epoch": 11.62, + "learning_rate": 4.4190653131394206e-05, + "loss": 2.4452, + "step": 2346000 + }, + { + "epoch": 11.63, + "learning_rate": 4.418941454496812e-05, + "loss": 2.4208, + "step": 2346500 + }, + { + "epoch": 11.63, + "learning_rate": 4.418817595854204e-05, + "loss": 2.4285, + "step": 2347000 + }, + { + "epoch": 11.63, + "learning_rate": 4.418693984928881e-05, + "loss": 2.4065, + "step": 2347500 + }, + { + "epoch": 11.63, + "learning_rate": 4.418570374003557e-05, + "loss": 2.42, + "step": 2348000 + }, + { + "epoch": 11.64, + "learning_rate": 4.418446515360949e-05, + "loss": 2.4344, + "step": 2348500 + }, + { + "epoch": 11.64, + "learning_rate": 4.4183226567183404e-05, + "loss": 2.4254, + "step": 2349000 + }, + { + "epoch": 11.64, + "learning_rate": 4.418198798075732e-05, + "loss": 2.4336, + "step": 2349500 + }, + { + "epoch": 11.64, + "learning_rate": 4.418074939433124e-05, + "loss": 2.4124, + "step": 2350000 + }, + { + "epoch": 11.65, + "learning_rate": 4.4179510807905155e-05, + "loss": 2.4268, + "step": 2350500 + }, + { + "epoch": 11.65, + "learning_rate": 4.417827222147907e-05, + "loss": 2.4369, + "step": 2351000 + }, + { + "epoch": 11.65, + "learning_rate": 4.417703363505299e-05, + "loss": 2.4431, + "step": 2351500 + }, + { + "epoch": 11.65, + "learning_rate": 4.4175795048626906e-05, + "loss": 2.4248, + "step": 2352000 + }, + { + "epoch": 11.66, + "learning_rate": 4.417455646220082e-05, + "loss": 2.3927, + "step": 2352500 + }, + { + "epoch": 11.66, + "learning_rate": 4.417331787577474e-05, + "loss": 2.4374, + "step": 2353000 + }, + { + "epoch": 11.66, + "learning_rate": 4.4172079289348656e-05, + "loss": 2.4195, + "step": 2353500 + }, + { + "epoch": 11.66, + "learning_rate": 4.417084070292257e-05, + "loss": 2.4295, + "step": 2354000 + }, + { + "epoch": 11.67, + "learning_rate": 4.416960459366934e-05, + "loss": 2.4114, + "step": 2354500 + }, + { + "epoch": 11.67, + "learning_rate": 4.416836600724326e-05, + "loss": 2.4245, + "step": 2355000 + }, + { + "epoch": 11.67, + "learning_rate": 4.416712742081717e-05, + "loss": 2.4333, + "step": 2355500 + }, + { + "epoch": 11.67, + "learning_rate": 4.416589131156394e-05, + "loss": 2.4191, + "step": 2356000 + }, + { + "epoch": 11.67, + "learning_rate": 4.4164652725137855e-05, + "loss": 2.3961, + "step": 2356500 + }, + { + "epoch": 11.68, + "learning_rate": 4.4163416615884624e-05, + "loss": 2.4087, + "step": 2357000 + }, + { + "epoch": 11.68, + "learning_rate": 4.416217802945854e-05, + "loss": 2.4227, + "step": 2357500 + }, + { + "epoch": 11.68, + "learning_rate": 4.416093944303246e-05, + "loss": 2.4177, + "step": 2358000 + }, + { + "epoch": 11.68, + "learning_rate": 4.4159700856606375e-05, + "loss": 2.4067, + "step": 2358500 + }, + { + "epoch": 11.69, + "learning_rate": 4.415846227018029e-05, + "loss": 2.4215, + "step": 2359000 + }, + { + "epoch": 11.69, + "learning_rate": 4.4157226160927054e-05, + "loss": 2.4338, + "step": 2359500 + }, + { + "epoch": 11.69, + "learning_rate": 4.415598757450097e-05, + "loss": 2.4406, + "step": 2360000 + }, + { + "epoch": 11.69, + "learning_rate": 4.415474898807489e-05, + "loss": 2.4358, + "step": 2360500 + }, + { + "epoch": 11.7, + "learning_rate": 4.4153510401648804e-05, + "loss": 2.4452, + "step": 2361000 + }, + { + "epoch": 11.7, + "learning_rate": 4.415227181522272e-05, + "loss": 2.4173, + "step": 2361500 + }, + { + "epoch": 11.7, + "learning_rate": 4.415103322879664e-05, + "loss": 2.4111, + "step": 2362000 + }, + { + "epoch": 11.7, + "learning_rate": 4.4149794642370555e-05, + "loss": 2.4227, + "step": 2362500 + }, + { + "epoch": 11.71, + "learning_rate": 4.414855605594447e-05, + "loss": 2.4011, + "step": 2363000 + }, + { + "epoch": 11.71, + "learning_rate": 4.414731994669124e-05, + "loss": 2.4455, + "step": 2363500 + }, + { + "epoch": 11.71, + "learning_rate": 4.414608136026516e-05, + "loss": 2.4459, + "step": 2364000 + }, + { + "epoch": 11.71, + "learning_rate": 4.4144842773839075e-05, + "loss": 2.4196, + "step": 2364500 + }, + { + "epoch": 11.72, + "learning_rate": 4.414360418741299e-05, + "loss": 2.4346, + "step": 2365000 + }, + { + "epoch": 11.72, + "learning_rate": 4.414236560098691e-05, + "loss": 2.4307, + "step": 2365500 + }, + { + "epoch": 11.72, + "learning_rate": 4.414113196890653e-05, + "loss": 2.4386, + "step": 2366000 + }, + { + "epoch": 11.72, + "learning_rate": 4.4139893382480446e-05, + "loss": 2.4333, + "step": 2366500 + }, + { + "epoch": 11.73, + "learning_rate": 4.413865479605436e-05, + "loss": 2.4199, + "step": 2367000 + }, + { + "epoch": 11.73, + "learning_rate": 4.413741620962828e-05, + "loss": 2.4048, + "step": 2367500 + }, + { + "epoch": 11.73, + "learning_rate": 4.41361776232022e-05, + "loss": 2.4399, + "step": 2368000 + }, + { + "epoch": 11.73, + "learning_rate": 4.4134939036776114e-05, + "loss": 2.4399, + "step": 2368500 + }, + { + "epoch": 11.74, + "learning_rate": 4.4133700450350024e-05, + "loss": 2.4265, + "step": 2369000 + }, + { + "epoch": 11.74, + "learning_rate": 4.413246186392394e-05, + "loss": 2.4374, + "step": 2369500 + }, + { + "epoch": 11.74, + "learning_rate": 4.413122327749786e-05, + "loss": 2.4099, + "step": 2370000 + }, + { + "epoch": 11.74, + "learning_rate": 4.4129984691071775e-05, + "loss": 2.3965, + "step": 2370500 + }, + { + "epoch": 11.75, + "learning_rate": 4.4128748581818544e-05, + "loss": 2.4322, + "step": 2371000 + }, + { + "epoch": 11.75, + "learning_rate": 4.412750999539246e-05, + "loss": 2.4613, + "step": 2371500 + }, + { + "epoch": 11.75, + "learning_rate": 4.412627140896638e-05, + "loss": 2.4158, + "step": 2372000 + }, + { + "epoch": 11.75, + "learning_rate": 4.412503282254029e-05, + "loss": 2.4332, + "step": 2372500 + }, + { + "epoch": 11.76, + "learning_rate": 4.4123794236114205e-05, + "loss": 2.4335, + "step": 2373000 + }, + { + "epoch": 11.76, + "learning_rate": 4.412255812686098e-05, + "loss": 2.4264, + "step": 2373500 + }, + { + "epoch": 11.76, + "learning_rate": 4.41213195404349e-05, + "loss": 2.3871, + "step": 2374000 + }, + { + "epoch": 11.76, + "learning_rate": 4.4120080954008814e-05, + "loss": 2.4349, + "step": 2374500 + }, + { + "epoch": 11.77, + "learning_rate": 4.411884236758273e-05, + "loss": 2.4322, + "step": 2375000 + }, + { + "epoch": 11.77, + "learning_rate": 4.411760378115664e-05, + "loss": 2.4205, + "step": 2375500 + }, + { + "epoch": 11.77, + "learning_rate": 4.411637014907626e-05, + "loss": 2.427, + "step": 2376000 + }, + { + "epoch": 11.77, + "learning_rate": 4.411513403982303e-05, + "loss": 2.4348, + "step": 2376500 + }, + { + "epoch": 11.78, + "learning_rate": 4.411389545339695e-05, + "loss": 2.4465, + "step": 2377000 + }, + { + "epoch": 11.78, + "learning_rate": 4.4112656866970864e-05, + "loss": 2.4235, + "step": 2377500 + }, + { + "epoch": 11.78, + "learning_rate": 4.411141828054478e-05, + "loss": 2.4328, + "step": 2378000 + }, + { + "epoch": 11.78, + "learning_rate": 4.41101796941187e-05, + "loss": 2.3982, + "step": 2378500 + }, + { + "epoch": 11.79, + "learning_rate": 4.4108941107692615e-05, + "loss": 2.4139, + "step": 2379000 + }, + { + "epoch": 11.79, + "learning_rate": 4.410770252126653e-05, + "loss": 2.4416, + "step": 2379500 + }, + { + "epoch": 11.79, + "learning_rate": 4.410646393484045e-05, + "loss": 2.4149, + "step": 2380000 + }, + { + "epoch": 11.79, + "learning_rate": 4.4105225348414366e-05, + "loss": 2.4093, + "step": 2380500 + }, + { + "epoch": 11.8, + "learning_rate": 4.410398676198828e-05, + "loss": 2.4322, + "step": 2381000 + }, + { + "epoch": 11.8, + "learning_rate": 4.41027481755622e-05, + "loss": 2.4391, + "step": 2381500 + }, + { + "epoch": 11.8, + "learning_rate": 4.410150958913612e-05, + "loss": 2.4264, + "step": 2382000 + }, + { + "epoch": 11.8, + "learning_rate": 4.4100271002710034e-05, + "loss": 2.4302, + "step": 2382500 + }, + { + "epoch": 11.81, + "learning_rate": 4.409903241628395e-05, + "loss": 2.4337, + "step": 2383000 + }, + { + "epoch": 11.81, + "learning_rate": 4.409779630703071e-05, + "loss": 2.404, + "step": 2383500 + }, + { + "epoch": 11.81, + "learning_rate": 4.409655772060463e-05, + "loss": 2.4127, + "step": 2384000 + }, + { + "epoch": 11.81, + "learning_rate": 4.4095319134178547e-05, + "loss": 2.4261, + "step": 2384500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4094080547752463e-05, + "loss": 2.4263, + "step": 2385000 + }, + { + "epoch": 11.82, + "learning_rate": 4.409284196132638e-05, + "loss": 2.4344, + "step": 2385500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4091608329246e-05, + "loss": 2.4243, + "step": 2386000 + }, + { + "epoch": 11.82, + "learning_rate": 4.409036974281992e-05, + "loss": 2.4161, + "step": 2386500 + }, + { + "epoch": 11.83, + "learning_rate": 4.408913115639383e-05, + "loss": 2.4354, + "step": 2387000 + }, + { + "epoch": 11.83, + "learning_rate": 4.4087895047140604e-05, + "loss": 2.398, + "step": 2387500 + }, + { + "epoch": 11.83, + "learning_rate": 4.408665646071452e-05, + "loss": 2.4091, + "step": 2388000 + }, + { + "epoch": 11.83, + "learning_rate": 4.408541787428843e-05, + "loss": 2.4046, + "step": 2388500 + }, + { + "epoch": 11.84, + "learning_rate": 4.408417928786235e-05, + "loss": 2.4116, + "step": 2389000 + }, + { + "epoch": 11.84, + "learning_rate": 4.4082940701436265e-05, + "loss": 2.4345, + "step": 2389500 + }, + { + "epoch": 11.84, + "learning_rate": 4.408170211501018e-05, + "loss": 2.4339, + "step": 2390000 + }, + { + "epoch": 11.84, + "learning_rate": 4.40804635285841e-05, + "loss": 2.4014, + "step": 2390500 + }, + { + "epoch": 11.85, + "learning_rate": 4.4079224942158015e-05, + "loss": 2.4364, + "step": 2391000 + }, + { + "epoch": 11.85, + "learning_rate": 4.407798635573193e-05, + "loss": 2.4408, + "step": 2391500 + }, + { + "epoch": 11.85, + "learning_rate": 4.407674776930585e-05, + "loss": 2.4218, + "step": 2392000 + }, + { + "epoch": 11.85, + "learning_rate": 4.4075509182879766e-05, + "loss": 2.4203, + "step": 2392500 + }, + { + "epoch": 11.86, + "learning_rate": 4.407427059645368e-05, + "loss": 2.4449, + "step": 2393000 + }, + { + "epoch": 11.86, + "learning_rate": 4.40730320100276e-05, + "loss": 2.4359, + "step": 2393500 + }, + { + "epoch": 11.86, + "learning_rate": 4.407179342360152e-05, + "loss": 2.4611, + "step": 2394000 + }, + { + "epoch": 11.86, + "learning_rate": 4.4070554837175434e-05, + "loss": 2.4342, + "step": 2394500 + }, + { + "epoch": 11.87, + "learning_rate": 4.406931625074935e-05, + "loss": 2.4223, + "step": 2395000 + }, + { + "epoch": 11.87, + "learning_rate": 4.406807766432327e-05, + "loss": 2.4426, + "step": 2395500 + }, + { + "epoch": 11.87, + "learning_rate": 4.4066839077897185e-05, + "loss": 2.4305, + "step": 2396000 + }, + { + "epoch": 11.87, + "learning_rate": 4.406560296864395e-05, + "loss": 2.4321, + "step": 2396500 + }, + { + "epoch": 11.88, + "learning_rate": 4.4064366859390716e-05, + "loss": 2.4089, + "step": 2397000 + }, + { + "epoch": 11.88, + "learning_rate": 4.4063133227310336e-05, + "loss": 2.414, + "step": 2397500 + }, + { + "epoch": 11.88, + "learning_rate": 4.406189464088425e-05, + "loss": 2.4312, + "step": 2398000 + }, + { + "epoch": 11.88, + "learning_rate": 4.406065605445817e-05, + "loss": 2.405, + "step": 2398500 + }, + { + "epoch": 11.89, + "learning_rate": 4.405941746803209e-05, + "loss": 2.4159, + "step": 2399000 + }, + { + "epoch": 11.89, + "learning_rate": 4.4058178881606004e-05, + "loss": 2.417, + "step": 2399500 + }, + { + "epoch": 11.89, + "learning_rate": 4.405694277235277e-05, + "loss": 2.3933, + "step": 2400000 + }, + { + "epoch": 11.89, + "learning_rate": 4.405570418592669e-05, + "loss": 2.4296, + "step": 2400500 + }, + { + "epoch": 11.9, + "learning_rate": 4.405446559950061e-05, + "loss": 2.4389, + "step": 2401000 + }, + { + "epoch": 11.9, + "learning_rate": 4.4053227013074524e-05, + "loss": 2.4468, + "step": 2401500 + }, + { + "epoch": 11.9, + "learning_rate": 4.405198842664844e-05, + "loss": 2.4335, + "step": 2402000 + }, + { + "epoch": 11.9, + "learning_rate": 4.405074984022236e-05, + "loss": 2.4179, + "step": 2402500 + }, + { + "epoch": 11.91, + "learning_rate": 4.4049511253796274e-05, + "loss": 2.4189, + "step": 2403000 + }, + { + "epoch": 11.91, + "learning_rate": 4.404827266737019e-05, + "loss": 2.4311, + "step": 2403500 + }, + { + "epoch": 11.91, + "learning_rate": 4.40470340809441e-05, + "loss": 2.4236, + "step": 2404000 + }, + { + "epoch": 11.91, + "learning_rate": 4.404579549451802e-05, + "loss": 2.4249, + "step": 2404500 + }, + { + "epoch": 11.92, + "learning_rate": 4.4044556908091935e-05, + "loss": 2.4524, + "step": 2405000 + }, + { + "epoch": 11.92, + "learning_rate": 4.404331832166585e-05, + "loss": 2.4291, + "step": 2405500 + }, + { + "epoch": 11.92, + "learning_rate": 4.404207973523977e-05, + "loss": 2.4173, + "step": 2406000 + }, + { + "epoch": 11.92, + "learning_rate": 4.4040841148813686e-05, + "loss": 2.448, + "step": 2406500 + }, + { + "epoch": 11.93, + "learning_rate": 4.4039602562387596e-05, + "loss": 2.4346, + "step": 2407000 + }, + { + "epoch": 11.93, + "learning_rate": 4.403836397596151e-05, + "loss": 2.4152, + "step": 2407500 + }, + { + "epoch": 11.93, + "learning_rate": 4.403712538953543e-05, + "loss": 2.4002, + "step": 2408000 + }, + { + "epoch": 11.93, + "learning_rate": 4.40358892802822e-05, + "loss": 2.419, + "step": 2408500 + }, + { + "epoch": 11.94, + "learning_rate": 4.4034653171028975e-05, + "loss": 2.4215, + "step": 2409000 + }, + { + "epoch": 11.94, + "learning_rate": 4.403341458460289e-05, + "loss": 2.4047, + "step": 2409500 + }, + { + "epoch": 11.94, + "learning_rate": 4.4032178475349653e-05, + "loss": 2.4267, + "step": 2410000 + }, + { + "epoch": 11.94, + "learning_rate": 4.403093988892357e-05, + "loss": 2.4454, + "step": 2410500 + }, + { + "epoch": 11.94, + "learning_rate": 4.402970130249749e-05, + "loss": 2.3954, + "step": 2411000 + }, + { + "epoch": 11.95, + "learning_rate": 4.4028462716071404e-05, + "loss": 2.4412, + "step": 2411500 + }, + { + "epoch": 11.95, + "learning_rate": 4.402722412964532e-05, + "loss": 2.4209, + "step": 2412000 + }, + { + "epoch": 11.95, + "learning_rate": 4.402598554321924e-05, + "loss": 2.4415, + "step": 2412500 + }, + { + "epoch": 11.95, + "learning_rate": 4.4024746956793155e-05, + "loss": 2.408, + "step": 2413000 + }, + { + "epoch": 11.96, + "learning_rate": 4.4023508370367065e-05, + "loss": 2.4469, + "step": 2413500 + }, + { + "epoch": 11.96, + "learning_rate": 4.402226978394098e-05, + "loss": 2.4386, + "step": 2414000 + }, + { + "epoch": 11.96, + "learning_rate": 4.402103367468776e-05, + "loss": 2.4394, + "step": 2414500 + }, + { + "epoch": 11.96, + "learning_rate": 4.401979756543452e-05, + "loss": 2.426, + "step": 2415000 + }, + { + "epoch": 11.97, + "learning_rate": 4.401855897900844e-05, + "loss": 2.4471, + "step": 2415500 + }, + { + "epoch": 11.97, + "learning_rate": 4.4017320392582354e-05, + "loss": 2.4293, + "step": 2416000 + }, + { + "epoch": 11.97, + "learning_rate": 4.401608180615627e-05, + "loss": 2.4297, + "step": 2416500 + }, + { + "epoch": 11.97, + "learning_rate": 4.401484321973019e-05, + "loss": 2.4321, + "step": 2417000 + }, + { + "epoch": 11.98, + "learning_rate": 4.4013604633304104e-05, + "loss": 2.4261, + "step": 2417500 + }, + { + "epoch": 11.98, + "learning_rate": 4.401236604687802e-05, + "loss": 2.4338, + "step": 2418000 + }, + { + "epoch": 11.98, + "learning_rate": 4.401112746045194e-05, + "loss": 2.4304, + "step": 2418500 + }, + { + "epoch": 11.98, + "learning_rate": 4.400989135119871e-05, + "loss": 2.3984, + "step": 2419000 + }, + { + "epoch": 11.99, + "learning_rate": 4.4008652764772624e-05, + "loss": 2.437, + "step": 2419500 + }, + { + "epoch": 11.99, + "learning_rate": 4.400741665551939e-05, + "loss": 2.4412, + "step": 2420000 + }, + { + "epoch": 11.99, + "learning_rate": 4.400617806909331e-05, + "loss": 2.4605, + "step": 2420500 + }, + { + "epoch": 11.99, + "learning_rate": 4.400493948266723e-05, + "loss": 2.4154, + "step": 2421000 + }, + { + "epoch": 12.0, + "learning_rate": 4.400370089624114e-05, + "loss": 2.4555, + "step": 2421500 + }, + { + "epoch": 12.0, + "learning_rate": 4.4002462309815054e-05, + "loss": 2.4047, + "step": 2422000 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.6447791117813911, + "eval_accuracy_mlm": 0.5983567225929853, + "eval_accuracy_nsp": 0.8637231868653391, + "eval_loss": 2.398054361343384, + "eval_runtime": 146.0821, + "eval_samples_per_second": 1745.314, + "eval_steps_per_second": 72.726, + "step": 2422116 + }, + { + "epoch": 12.0, + "learning_rate": 4.400122372338897e-05, + "loss": 2.3898, + "step": 2422500 + }, + { + "epoch": 12.0, + "learning_rate": 4.399998513696289e-05, + "loss": 2.4349, + "step": 2423000 + }, + { + "epoch": 12.01, + "learning_rate": 4.3998746550536804e-05, + "loss": 2.3875, + "step": 2423500 + }, + { + "epoch": 12.01, + "learning_rate": 4.399750796411072e-05, + "loss": 2.3812, + "step": 2424000 + }, + { + "epoch": 12.01, + "learning_rate": 4.399626937768464e-05, + "loss": 2.3915, + "step": 2424500 + }, + { + "epoch": 12.01, + "learning_rate": 4.3995030791258555e-05, + "loss": 2.4071, + "step": 2425000 + }, + { + "epoch": 12.02, + "learning_rate": 4.399379220483247e-05, + "loss": 2.3903, + "step": 2425500 + }, + { + "epoch": 12.02, + "learning_rate": 4.399255609557924e-05, + "loss": 2.3863, + "step": 2426000 + }, + { + "epoch": 12.02, + "learning_rate": 4.399131998632601e-05, + "loss": 2.3889, + "step": 2426500 + }, + { + "epoch": 12.02, + "learning_rate": 4.399008139989993e-05, + "loss": 2.3892, + "step": 2427000 + }, + { + "epoch": 12.03, + "learning_rate": 4.3988842813473844e-05, + "loss": 2.4049, + "step": 2427500 + }, + { + "epoch": 12.03, + "learning_rate": 4.398760422704776e-05, + "loss": 2.3953, + "step": 2428000 + }, + { + "epoch": 12.03, + "learning_rate": 4.398636564062167e-05, + "loss": 2.3994, + "step": 2428500 + }, + { + "epoch": 12.03, + "learning_rate": 4.398512705419559e-05, + "loss": 2.3911, + "step": 2429000 + }, + { + "epoch": 12.04, + "learning_rate": 4.3983888467769505e-05, + "loss": 2.4036, + "step": 2429500 + }, + { + "epoch": 12.04, + "learning_rate": 4.3982652358516273e-05, + "loss": 2.3935, + "step": 2430000 + }, + { + "epoch": 12.04, + "learning_rate": 4.398141624926304e-05, + "loss": 2.4119, + "step": 2430500 + }, + { + "epoch": 12.04, + "learning_rate": 4.398018014000981e-05, + "loss": 2.4128, + "step": 2431000 + }, + { + "epoch": 12.05, + "learning_rate": 4.397894155358373e-05, + "loss": 2.3834, + "step": 2431500 + }, + { + "epoch": 12.05, + "learning_rate": 4.3977702967157645e-05, + "loss": 2.402, + "step": 2432000 + }, + { + "epoch": 12.05, + "learning_rate": 4.397646438073156e-05, + "loss": 2.3786, + "step": 2432500 + }, + { + "epoch": 12.05, + "learning_rate": 4.397522579430547e-05, + "loss": 2.3985, + "step": 2433000 + }, + { + "epoch": 12.06, + "learning_rate": 4.397398720787939e-05, + "loss": 2.3975, + "step": 2433500 + }, + { + "epoch": 12.06, + "learning_rate": 4.3972748621453306e-05, + "loss": 2.395, + "step": 2434000 + }, + { + "epoch": 12.06, + "learning_rate": 4.397151251220008e-05, + "loss": 2.3914, + "step": 2434500 + }, + { + "epoch": 12.06, + "learning_rate": 4.397027640294685e-05, + "loss": 2.4154, + "step": 2435000 + }, + { + "epoch": 12.07, + "learning_rate": 4.396903781652077e-05, + "loss": 2.4228, + "step": 2435500 + }, + { + "epoch": 12.07, + "learning_rate": 4.396779923009468e-05, + "loss": 2.4234, + "step": 2436000 + }, + { + "epoch": 12.07, + "learning_rate": 4.3966560643668594e-05, + "loss": 2.4103, + "step": 2436500 + }, + { + "epoch": 12.07, + "learning_rate": 4.396532205724251e-05, + "loss": 2.4163, + "step": 2437000 + }, + { + "epoch": 12.08, + "learning_rate": 4.396408347081643e-05, + "loss": 2.408, + "step": 2437500 + }, + { + "epoch": 12.08, + "learning_rate": 4.3962844884390345e-05, + "loss": 2.3927, + "step": 2438000 + }, + { + "epoch": 12.08, + "learning_rate": 4.396160629796426e-05, + "loss": 2.3937, + "step": 2438500 + }, + { + "epoch": 12.08, + "learning_rate": 4.396036771153817e-05, + "loss": 2.3874, + "step": 2439000 + }, + { + "epoch": 12.09, + "learning_rate": 4.395912912511209e-05, + "loss": 2.4065, + "step": 2439500 + }, + { + "epoch": 12.09, + "learning_rate": 4.3957890538686006e-05, + "loss": 2.3979, + "step": 2440000 + }, + { + "epoch": 12.09, + "learning_rate": 4.395665442943278e-05, + "loss": 2.3901, + "step": 2440500 + }, + { + "epoch": 12.09, + "learning_rate": 4.39554158430067e-05, + "loss": 2.4009, + "step": 2441000 + }, + { + "epoch": 12.1, + "learning_rate": 4.3954177256580615e-05, + "loss": 2.396, + "step": 2441500 + }, + { + "epoch": 12.1, + "learning_rate": 4.3952941147327384e-05, + "loss": 2.4246, + "step": 2442000 + }, + { + "epoch": 12.1, + "learning_rate": 4.39517025609013e-05, + "loss": 2.4031, + "step": 2442500 + }, + { + "epoch": 12.1, + "learning_rate": 4.395046397447521e-05, + "loss": 2.4032, + "step": 2443000 + }, + { + "epoch": 12.11, + "learning_rate": 4.394922538804913e-05, + "loss": 2.3833, + "step": 2443500 + }, + { + "epoch": 12.11, + "learning_rate": 4.3947986801623045e-05, + "loss": 2.3917, + "step": 2444000 + }, + { + "epoch": 12.11, + "learning_rate": 4.394674821519696e-05, + "loss": 2.4239, + "step": 2444500 + }, + { + "epoch": 12.11, + "learning_rate": 4.394550962877088e-05, + "loss": 2.4095, + "step": 2445000 + }, + { + "epoch": 12.12, + "learning_rate": 4.394427104234479e-05, + "loss": 2.4093, + "step": 2445500 + }, + { + "epoch": 12.12, + "learning_rate": 4.3943032455918706e-05, + "loss": 2.4138, + "step": 2446000 + }, + { + "epoch": 12.12, + "learning_rate": 4.394179386949262e-05, + "loss": 2.3883, + "step": 2446500 + }, + { + "epoch": 12.12, + "learning_rate": 4.394055528306654e-05, + "loss": 2.3971, + "step": 2447000 + }, + { + "epoch": 12.13, + "learning_rate": 4.393931669664046e-05, + "loss": 2.4231, + "step": 2447500 + }, + { + "epoch": 12.13, + "learning_rate": 4.3938078110214374e-05, + "loss": 2.4131, + "step": 2448000 + }, + { + "epoch": 12.13, + "learning_rate": 4.393683952378829e-05, + "loss": 2.4189, + "step": 2448500 + }, + { + "epoch": 12.13, + "learning_rate": 4.393560093736221e-05, + "loss": 2.3864, + "step": 2449000 + }, + { + "epoch": 12.14, + "learning_rate": 4.3934362350936125e-05, + "loss": 2.4134, + "step": 2449500 + }, + { + "epoch": 12.14, + "learning_rate": 4.393312376451004e-05, + "loss": 2.3885, + "step": 2450000 + }, + { + "epoch": 12.14, + "learning_rate": 4.393188517808396e-05, + "loss": 2.4138, + "step": 2450500 + }, + { + "epoch": 12.14, + "learning_rate": 4.393064906883073e-05, + "loss": 2.42, + "step": 2451000 + }, + { + "epoch": 12.15, + "learning_rate": 4.3929410482404644e-05, + "loss": 2.4087, + "step": 2451500 + }, + { + "epoch": 12.15, + "learning_rate": 4.392817189597856e-05, + "loss": 2.4067, + "step": 2452000 + }, + { + "epoch": 12.15, + "learning_rate": 4.392693330955248e-05, + "loss": 2.4229, + "step": 2452500 + }, + { + "epoch": 12.15, + "learning_rate": 4.3925694723126395e-05, + "loss": 2.3739, + "step": 2453000 + }, + { + "epoch": 12.16, + "learning_rate": 4.392445613670031e-05, + "loss": 2.3939, + "step": 2453500 + }, + { + "epoch": 12.16, + "learning_rate": 4.3923220027447074e-05, + "loss": 2.4108, + "step": 2454000 + }, + { + "epoch": 12.16, + "learning_rate": 4.392198391819384e-05, + "loss": 2.3995, + "step": 2454500 + }, + { + "epoch": 12.16, + "learning_rate": 4.392074533176776e-05, + "loss": 2.396, + "step": 2455000 + }, + { + "epoch": 12.17, + "learning_rate": 4.3919506745341677e-05, + "loss": 2.4091, + "step": 2455500 + }, + { + "epoch": 12.17, + "learning_rate": 4.3918268158915594e-05, + "loss": 2.4122, + "step": 2456000 + }, + { + "epoch": 12.17, + "learning_rate": 4.391702957248951e-05, + "loss": 2.3943, + "step": 2456500 + }, + { + "epoch": 12.17, + "learning_rate": 4.391579098606343e-05, + "loss": 2.4066, + "step": 2457000 + }, + { + "epoch": 12.18, + "learning_rate": 4.3914552399637344e-05, + "loss": 2.4244, + "step": 2457500 + }, + { + "epoch": 12.18, + "learning_rate": 4.391331381321126e-05, + "loss": 2.386, + "step": 2458000 + }, + { + "epoch": 12.18, + "learning_rate": 4.391207522678518e-05, + "loss": 2.4081, + "step": 2458500 + }, + { + "epoch": 12.18, + "learning_rate": 4.3910836640359095e-05, + "loss": 2.376, + "step": 2459000 + }, + { + "epoch": 12.19, + "learning_rate": 4.390959805393301e-05, + "loss": 2.4178, + "step": 2459500 + }, + { + "epoch": 12.19, + "learning_rate": 4.3908361944679774e-05, + "loss": 2.4064, + "step": 2460000 + }, + { + "epoch": 12.19, + "learning_rate": 4.390712335825369e-05, + "loss": 2.4019, + "step": 2460500 + }, + { + "epoch": 12.19, + "learning_rate": 4.390588477182761e-05, + "loss": 2.3993, + "step": 2461000 + }, + { + "epoch": 12.2, + "learning_rate": 4.3904646185401525e-05, + "loss": 2.3876, + "step": 2461500 + }, + { + "epoch": 12.2, + "learning_rate": 4.390340759897544e-05, + "loss": 2.388, + "step": 2462000 + }, + { + "epoch": 12.2, + "learning_rate": 4.390216901254936e-05, + "loss": 2.4285, + "step": 2462500 + }, + { + "epoch": 12.2, + "learning_rate": 4.390093290329613e-05, + "loss": 2.4094, + "step": 2463000 + }, + { + "epoch": 12.21, + "learning_rate": 4.3899696794042896e-05, + "loss": 2.3865, + "step": 2463500 + }, + { + "epoch": 12.21, + "learning_rate": 4.389845820761681e-05, + "loss": 2.4204, + "step": 2464000 + }, + { + "epoch": 12.21, + "learning_rate": 4.389721962119073e-05, + "loss": 2.4236, + "step": 2464500 + }, + { + "epoch": 12.21, + "learning_rate": 4.389598103476465e-05, + "loss": 2.3961, + "step": 2465000 + }, + { + "epoch": 12.21, + "learning_rate": 4.3894744925511416e-05, + "loss": 2.397, + "step": 2465500 + }, + { + "epoch": 12.22, + "learning_rate": 4.389350633908533e-05, + "loss": 2.3895, + "step": 2466000 + }, + { + "epoch": 12.22, + "learning_rate": 4.389226775265925e-05, + "loss": 2.4102, + "step": 2466500 + }, + { + "epoch": 12.22, + "learning_rate": 4.389102916623316e-05, + "loss": 2.405, + "step": 2467000 + }, + { + "epoch": 12.22, + "learning_rate": 4.388979057980708e-05, + "loss": 2.4291, + "step": 2467500 + }, + { + "epoch": 12.23, + "learning_rate": 4.3888551993380994e-05, + "loss": 2.4213, + "step": 2468000 + }, + { + "epoch": 12.23, + "learning_rate": 4.388731340695491e-05, + "loss": 2.4009, + "step": 2468500 + }, + { + "epoch": 12.23, + "learning_rate": 4.388607482052883e-05, + "loss": 2.4047, + "step": 2469000 + }, + { + "epoch": 12.23, + "learning_rate": 4.3884836234102745e-05, + "loss": 2.385, + "step": 2469500 + }, + { + "epoch": 12.24, + "learning_rate": 4.388360012484951e-05, + "loss": 2.4111, + "step": 2470000 + }, + { + "epoch": 12.24, + "learning_rate": 4.388236153842343e-05, + "loss": 2.4104, + "step": 2470500 + }, + { + "epoch": 12.24, + "learning_rate": 4.388112295199735e-05, + "loss": 2.4294, + "step": 2471000 + }, + { + "epoch": 12.24, + "learning_rate": 4.3879884365571264e-05, + "loss": 2.4056, + "step": 2471500 + }, + { + "epoch": 12.25, + "learning_rate": 4.387864577914518e-05, + "loss": 2.396, + "step": 2472000 + }, + { + "epoch": 12.25, + "learning_rate": 4.387740719271909e-05, + "loss": 2.3991, + "step": 2472500 + }, + { + "epoch": 12.25, + "learning_rate": 4.387617108346587e-05, + "loss": 2.3729, + "step": 2473000 + }, + { + "epoch": 12.25, + "learning_rate": 4.387493249703978e-05, + "loss": 2.4009, + "step": 2473500 + }, + { + "epoch": 12.26, + "learning_rate": 4.3873693910613694e-05, + "loss": 2.4272, + "step": 2474000 + }, + { + "epoch": 12.26, + "learning_rate": 4.387245532418761e-05, + "loss": 2.4015, + "step": 2474500 + }, + { + "epoch": 12.26, + "learning_rate": 4.387121673776153e-05, + "loss": 2.4204, + "step": 2475000 + }, + { + "epoch": 12.26, + "learning_rate": 4.3869978151335445e-05, + "loss": 2.4145, + "step": 2475500 + }, + { + "epoch": 12.27, + "learning_rate": 4.386873956490936e-05, + "loss": 2.4175, + "step": 2476000 + }, + { + "epoch": 12.27, + "learning_rate": 4.386750097848328e-05, + "loss": 2.3936, + "step": 2476500 + }, + { + "epoch": 12.27, + "learning_rate": 4.3866262392057195e-05, + "loss": 2.382, + "step": 2477000 + }, + { + "epoch": 12.27, + "learning_rate": 4.3865026282803964e-05, + "loss": 2.4075, + "step": 2477500 + }, + { + "epoch": 12.28, + "learning_rate": 4.386378769637788e-05, + "loss": 2.3898, + "step": 2478000 + }, + { + "epoch": 12.28, + "learning_rate": 4.38625491099518e-05, + "loss": 2.4162, + "step": 2478500 + }, + { + "epoch": 12.28, + "learning_rate": 4.386131052352571e-05, + "loss": 2.4163, + "step": 2479000 + }, + { + "epoch": 12.28, + "learning_rate": 4.3860071937099625e-05, + "loss": 2.4165, + "step": 2479500 + }, + { + "epoch": 12.29, + "learning_rate": 4.385883335067354e-05, + "loss": 2.4039, + "step": 2480000 + }, + { + "epoch": 12.29, + "learning_rate": 4.385759476424746e-05, + "loss": 2.4346, + "step": 2480500 + }, + { + "epoch": 12.29, + "learning_rate": 4.385635865499423e-05, + "loss": 2.427, + "step": 2481000 + }, + { + "epoch": 12.29, + "learning_rate": 4.3855120068568145e-05, + "loss": 2.3986, + "step": 2481500 + }, + { + "epoch": 12.3, + "learning_rate": 4.385388148214206e-05, + "loss": 2.4292, + "step": 2482000 + }, + { + "epoch": 12.3, + "learning_rate": 4.385264537288884e-05, + "loss": 2.4181, + "step": 2482500 + }, + { + "epoch": 12.3, + "learning_rate": 4.385140678646275e-05, + "loss": 2.4078, + "step": 2483000 + }, + { + "epoch": 12.3, + "learning_rate": 4.3850168200036664e-05, + "loss": 2.426, + "step": 2483500 + }, + { + "epoch": 12.31, + "learning_rate": 4.384892961361058e-05, + "loss": 2.4333, + "step": 2484000 + }, + { + "epoch": 12.31, + "learning_rate": 4.38476910271845e-05, + "loss": 2.4103, + "step": 2484500 + }, + { + "epoch": 12.31, + "learning_rate": 4.3846452440758415e-05, + "loss": 2.418, + "step": 2485000 + }, + { + "epoch": 12.31, + "learning_rate": 4.384521385433233e-05, + "loss": 2.399, + "step": 2485500 + }, + { + "epoch": 12.32, + "learning_rate": 4.384397526790624e-05, + "loss": 2.408, + "step": 2486000 + }, + { + "epoch": 12.32, + "learning_rate": 4.384273668148016e-05, + "loss": 2.3786, + "step": 2486500 + }, + { + "epoch": 12.32, + "learning_rate": 4.3841498095054076e-05, + "loss": 2.434, + "step": 2487000 + }, + { + "epoch": 12.32, + "learning_rate": 4.384025950862799e-05, + "loss": 2.4179, + "step": 2487500 + }, + { + "epoch": 12.33, + "learning_rate": 4.383902339937476e-05, + "loss": 2.4173, + "step": 2488000 + }, + { + "epoch": 12.33, + "learning_rate": 4.383778729012154e-05, + "loss": 2.4215, + "step": 2488500 + }, + { + "epoch": 12.33, + "learning_rate": 4.383655365804115e-05, + "loss": 2.4273, + "step": 2489000 + }, + { + "epoch": 12.33, + "learning_rate": 4.383531507161507e-05, + "loss": 2.4262, + "step": 2489500 + }, + { + "epoch": 12.34, + "learning_rate": 4.3834076485188985e-05, + "loss": 2.4236, + "step": 2490000 + }, + { + "epoch": 12.34, + "learning_rate": 4.38328378987629e-05, + "loss": 2.3886, + "step": 2490500 + }, + { + "epoch": 12.34, + "learning_rate": 4.383159931233682e-05, + "loss": 2.4172, + "step": 2491000 + }, + { + "epoch": 12.34, + "learning_rate": 4.3830360725910736e-05, + "loss": 2.4084, + "step": 2491500 + }, + { + "epoch": 12.35, + "learning_rate": 4.382912213948465e-05, + "loss": 2.4236, + "step": 2492000 + }, + { + "epoch": 12.35, + "learning_rate": 4.382788355305857e-05, + "loss": 2.4233, + "step": 2492500 + }, + { + "epoch": 12.35, + "learning_rate": 4.382664496663249e-05, + "loss": 2.4178, + "step": 2493000 + }, + { + "epoch": 12.35, + "learning_rate": 4.3825406380206404e-05, + "loss": 2.3815, + "step": 2493500 + }, + { + "epoch": 12.36, + "learning_rate": 4.382416779378032e-05, + "loss": 2.4237, + "step": 2494000 + }, + { + "epoch": 12.36, + "learning_rate": 4.382292920735424e-05, + "loss": 2.397, + "step": 2494500 + }, + { + "epoch": 12.36, + "learning_rate": 4.3821690620928154e-05, + "loss": 2.4008, + "step": 2495000 + }, + { + "epoch": 12.36, + "learning_rate": 4.3820452034502065e-05, + "loss": 2.4192, + "step": 2495500 + }, + { + "epoch": 12.37, + "learning_rate": 4.3819215925248833e-05, + "loss": 2.3788, + "step": 2496000 + }, + { + "epoch": 12.37, + "learning_rate": 4.381797733882275e-05, + "loss": 2.3979, + "step": 2496500 + }, + { + "epoch": 12.37, + "learning_rate": 4.381673875239667e-05, + "loss": 2.3989, + "step": 2497000 + }, + { + "epoch": 12.37, + "learning_rate": 4.3815500165970584e-05, + "loss": 2.4017, + "step": 2497500 + }, + { + "epoch": 12.38, + "learning_rate": 4.38142615795445e-05, + "loss": 2.4174, + "step": 2498000 + }, + { + "epoch": 12.38, + "learning_rate": 4.381302299311841e-05, + "loss": 2.4145, + "step": 2498500 + }, + { + "epoch": 12.38, + "learning_rate": 4.381178440669233e-05, + "loss": 2.4302, + "step": 2499000 + }, + { + "epoch": 12.38, + "learning_rate": 4.3810545820266245e-05, + "loss": 2.4266, + "step": 2499500 + }, + { + "epoch": 12.39, + "learning_rate": 4.380930971101302e-05, + "loss": 2.3984, + "step": 2500000 + }, + { + "epoch": 12.39, + "learning_rate": 4.380807360175978e-05, + "loss": 2.4286, + "step": 2500500 + }, + { + "epoch": 12.39, + "learning_rate": 4.38068350153337e-05, + "loss": 2.3976, + "step": 2501000 + }, + { + "epoch": 12.39, + "learning_rate": 4.380559890608047e-05, + "loss": 2.4343, + "step": 2501500 + }, + { + "epoch": 12.4, + "learning_rate": 4.3804360319654385e-05, + "loss": 2.4084, + "step": 2502000 + }, + { + "epoch": 12.4, + "learning_rate": 4.38031217332283e-05, + "loss": 2.4167, + "step": 2502500 + }, + { + "epoch": 12.4, + "learning_rate": 4.380188314680222e-05, + "loss": 2.4184, + "step": 2503000 + }, + { + "epoch": 12.4, + "learning_rate": 4.3800644560376136e-05, + "loss": 2.424, + "step": 2503500 + }, + { + "epoch": 12.41, + "learning_rate": 4.3799408451122905e-05, + "loss": 2.4085, + "step": 2504000 + }, + { + "epoch": 12.41, + "learning_rate": 4.379816986469682e-05, + "loss": 2.4307, + "step": 2504500 + }, + { + "epoch": 12.41, + "learning_rate": 4.379693127827074e-05, + "loss": 2.4117, + "step": 2505000 + }, + { + "epoch": 12.41, + "learning_rate": 4.3795692691844656e-05, + "loss": 2.3954, + "step": 2505500 + }, + { + "epoch": 12.42, + "learning_rate": 4.379445658259142e-05, + "loss": 2.4061, + "step": 2506000 + }, + { + "epoch": 12.42, + "learning_rate": 4.3793217996165335e-05, + "loss": 2.4096, + "step": 2506500 + }, + { + "epoch": 12.42, + "learning_rate": 4.379197940973925e-05, + "loss": 2.3931, + "step": 2507000 + }, + { + "epoch": 12.42, + "learning_rate": 4.379074082331317e-05, + "loss": 2.4206, + "step": 2507500 + }, + { + "epoch": 12.43, + "learning_rate": 4.3789502236887086e-05, + "loss": 2.4262, + "step": 2508000 + }, + { + "epoch": 12.43, + "learning_rate": 4.3788263650461e-05, + "loss": 2.3758, + "step": 2508500 + }, + { + "epoch": 12.43, + "learning_rate": 4.378702506403492e-05, + "loss": 2.3879, + "step": 2509000 + }, + { + "epoch": 12.43, + "learning_rate": 4.3785786477608836e-05, + "loss": 2.4278, + "step": 2509500 + }, + { + "epoch": 12.44, + "learning_rate": 4.3784550368355605e-05, + "loss": 2.4015, + "step": 2510000 + }, + { + "epoch": 12.44, + "learning_rate": 4.378331178192952e-05, + "loss": 2.4261, + "step": 2510500 + }, + { + "epoch": 12.44, + "learning_rate": 4.378207319550344e-05, + "loss": 2.4078, + "step": 2511000 + }, + { + "epoch": 12.44, + "learning_rate": 4.3780834609077356e-05, + "loss": 2.3968, + "step": 2511500 + }, + { + "epoch": 12.45, + "learning_rate": 4.377959602265127e-05, + "loss": 2.3812, + "step": 2512000 + }, + { + "epoch": 12.45, + "learning_rate": 4.377835743622519e-05, + "loss": 2.3986, + "step": 2512500 + }, + { + "epoch": 12.45, + "learning_rate": 4.377711884979911e-05, + "loss": 2.4222, + "step": 2513000 + }, + { + "epoch": 12.45, + "learning_rate": 4.377588026337302e-05, + "loss": 2.4022, + "step": 2513500 + }, + { + "epoch": 12.46, + "learning_rate": 4.3774641676946934e-05, + "loss": 2.4151, + "step": 2514000 + }, + { + "epoch": 12.46, + "learning_rate": 4.377340309052085e-05, + "loss": 2.4226, + "step": 2514500 + }, + { + "epoch": 12.46, + "learning_rate": 4.377216450409477e-05, + "loss": 2.3848, + "step": 2515000 + }, + { + "epoch": 12.46, + "learning_rate": 4.3770925917668685e-05, + "loss": 2.4285, + "step": 2515500 + }, + { + "epoch": 12.47, + "learning_rate": 4.37696873312426e-05, + "loss": 2.4243, + "step": 2516000 + }, + { + "epoch": 12.47, + "learning_rate": 4.376845122198937e-05, + "loss": 2.4008, + "step": 2516500 + }, + { + "epoch": 12.47, + "learning_rate": 4.376721263556329e-05, + "loss": 2.4064, + "step": 2517000 + }, + { + "epoch": 12.47, + "learning_rate": 4.3765974049137204e-05, + "loss": 2.4022, + "step": 2517500 + }, + { + "epoch": 12.48, + "learning_rate": 4.376473793988397e-05, + "loss": 2.3923, + "step": 2518000 + }, + { + "epoch": 12.48, + "learning_rate": 4.376349935345789e-05, + "loss": 2.4132, + "step": 2518500 + }, + { + "epoch": 12.48, + "learning_rate": 4.376226076703181e-05, + "loss": 2.4178, + "step": 2519000 + }, + { + "epoch": 12.48, + "learning_rate": 4.3761022180605724e-05, + "loss": 2.4013, + "step": 2519500 + }, + { + "epoch": 12.48, + "learning_rate": 4.375978359417964e-05, + "loss": 2.4077, + "step": 2520000 + }, + { + "epoch": 12.49, + "learning_rate": 4.375854500775355e-05, + "loss": 2.4075, + "step": 2520500 + }, + { + "epoch": 12.49, + "learning_rate": 4.375730642132747e-05, + "loss": 2.4111, + "step": 2521000 + }, + { + "epoch": 12.49, + "learning_rate": 4.375607031207424e-05, + "loss": 2.3895, + "step": 2521500 + }, + { + "epoch": 12.49, + "learning_rate": 4.3754831725648154e-05, + "loss": 2.3763, + "step": 2522000 + }, + { + "epoch": 12.5, + "learning_rate": 4.375359313922207e-05, + "loss": 2.4228, + "step": 2522500 + }, + { + "epoch": 12.5, + "learning_rate": 4.375235455279599e-05, + "loss": 2.3938, + "step": 2523000 + }, + { + "epoch": 12.5, + "learning_rate": 4.3751115966369904e-05, + "loss": 2.404, + "step": 2523500 + }, + { + "epoch": 12.5, + "learning_rate": 4.374987985711667e-05, + "loss": 2.4042, + "step": 2524000 + }, + { + "epoch": 12.51, + "learning_rate": 4.374864127069059e-05, + "loss": 2.4155, + "step": 2524500 + }, + { + "epoch": 12.51, + "learning_rate": 4.374740268426451e-05, + "loss": 2.4128, + "step": 2525000 + }, + { + "epoch": 12.51, + "learning_rate": 4.3746164097838424e-05, + "loss": 2.3995, + "step": 2525500 + }, + { + "epoch": 12.51, + "learning_rate": 4.374492551141234e-05, + "loss": 2.3965, + "step": 2526000 + }, + { + "epoch": 12.52, + "learning_rate": 4.374368692498626e-05, + "loss": 2.426, + "step": 2526500 + }, + { + "epoch": 12.52, + "learning_rate": 4.3742448338560175e-05, + "loss": 2.4275, + "step": 2527000 + }, + { + "epoch": 12.52, + "learning_rate": 4.3741209752134085e-05, + "loss": 2.407, + "step": 2527500 + }, + { + "epoch": 12.52, + "learning_rate": 4.3739973642880854e-05, + "loss": 2.376, + "step": 2528000 + }, + { + "epoch": 12.53, + "learning_rate": 4.373873505645477e-05, + "loss": 2.4029, + "step": 2528500 + }, + { + "epoch": 12.53, + "learning_rate": 4.373749647002869e-05, + "loss": 2.4195, + "step": 2529000 + }, + { + "epoch": 12.53, + "learning_rate": 4.3736257883602604e-05, + "loss": 2.3921, + "step": 2529500 + }, + { + "epoch": 12.53, + "learning_rate": 4.373501929717652e-05, + "loss": 2.4134, + "step": 2530000 + }, + { + "epoch": 12.54, + "learning_rate": 4.3733785665096135e-05, + "loss": 2.4015, + "step": 2530500 + }, + { + "epoch": 12.54, + "learning_rate": 4.373254707867005e-05, + "loss": 2.4044, + "step": 2531000 + }, + { + "epoch": 12.54, + "learning_rate": 4.373130849224397e-05, + "loss": 2.4226, + "step": 2531500 + }, + { + "epoch": 12.54, + "learning_rate": 4.3730069905817886e-05, + "loss": 2.4238, + "step": 2532000 + }, + { + "epoch": 12.55, + "learning_rate": 4.37288313193918e-05, + "loss": 2.3989, + "step": 2532500 + }, + { + "epoch": 12.55, + "learning_rate": 4.372759273296572e-05, + "loss": 2.4198, + "step": 2533000 + }, + { + "epoch": 12.55, + "learning_rate": 4.372635414653964e-05, + "loss": 2.3914, + "step": 2533500 + }, + { + "epoch": 12.55, + "learning_rate": 4.3725115560113554e-05, + "loss": 2.4127, + "step": 2534000 + }, + { + "epoch": 12.56, + "learning_rate": 4.372387697368747e-05, + "loss": 2.4067, + "step": 2534500 + }, + { + "epoch": 12.56, + "learning_rate": 4.372264086443424e-05, + "loss": 2.4096, + "step": 2535000 + }, + { + "epoch": 12.56, + "learning_rate": 4.3721402278008156e-05, + "loss": 2.392, + "step": 2535500 + }, + { + "epoch": 12.56, + "learning_rate": 4.372016369158207e-05, + "loss": 2.4025, + "step": 2536000 + }, + { + "epoch": 12.57, + "learning_rate": 4.371892510515599e-05, + "loss": 2.3878, + "step": 2536500 + }, + { + "epoch": 12.57, + "learning_rate": 4.371768651872991e-05, + "loss": 2.428, + "step": 2537000 + }, + { + "epoch": 12.57, + "learning_rate": 4.3716447932303824e-05, + "loss": 2.3988, + "step": 2537500 + }, + { + "epoch": 12.57, + "learning_rate": 4.371520934587774e-05, + "loss": 2.4164, + "step": 2538000 + }, + { + "epoch": 12.58, + "learning_rate": 4.37139732366245e-05, + "loss": 2.4146, + "step": 2538500 + }, + { + "epoch": 12.58, + "learning_rate": 4.371273465019842e-05, + "loss": 2.4205, + "step": 2539000 + }, + { + "epoch": 12.58, + "learning_rate": 4.3711498540945196e-05, + "loss": 2.4266, + "step": 2539500 + }, + { + "epoch": 12.58, + "learning_rate": 4.3710259954519106e-05, + "loss": 2.405, + "step": 2540000 + }, + { + "epoch": 12.59, + "learning_rate": 4.370902384526588e-05, + "loss": 2.4024, + "step": 2540500 + }, + { + "epoch": 12.59, + "learning_rate": 4.37077852588398e-05, + "loss": 2.4214, + "step": 2541000 + }, + { + "epoch": 12.59, + "learning_rate": 4.370654667241371e-05, + "loss": 2.4359, + "step": 2541500 + }, + { + "epoch": 12.59, + "learning_rate": 4.3705308085987625e-05, + "loss": 2.4429, + "step": 2542000 + }, + { + "epoch": 12.6, + "learning_rate": 4.370406949956154e-05, + "loss": 2.4162, + "step": 2542500 + }, + { + "epoch": 12.6, + "learning_rate": 4.370283339030831e-05, + "loss": 2.4065, + "step": 2543000 + }, + { + "epoch": 12.6, + "learning_rate": 4.370159480388223e-05, + "loss": 2.4123, + "step": 2543500 + }, + { + "epoch": 12.6, + "learning_rate": 4.3700356217456145e-05, + "loss": 2.3914, + "step": 2544000 + }, + { + "epoch": 12.61, + "learning_rate": 4.369911763103006e-05, + "loss": 2.4199, + "step": 2544500 + }, + { + "epoch": 12.61, + "learning_rate": 4.369787904460398e-05, + "loss": 2.4165, + "step": 2545000 + }, + { + "epoch": 12.61, + "learning_rate": 4.3696640458177896e-05, + "loss": 2.4082, + "step": 2545500 + }, + { + "epoch": 12.61, + "learning_rate": 4.3695401871751806e-05, + "loss": 2.4234, + "step": 2546000 + }, + { + "epoch": 12.62, + "learning_rate": 4.369416576249858e-05, + "loss": 2.4354, + "step": 2546500 + }, + { + "epoch": 12.62, + "learning_rate": 4.36929271760725e-05, + "loss": 2.434, + "step": 2547000 + }, + { + "epoch": 12.62, + "learning_rate": 4.3691688589646415e-05, + "loss": 2.3977, + "step": 2547500 + }, + { + "epoch": 12.62, + "learning_rate": 4.369045000322033e-05, + "loss": 2.3974, + "step": 2548000 + }, + { + "epoch": 12.63, + "learning_rate": 4.368921141679424e-05, + "loss": 2.4089, + "step": 2548500 + }, + { + "epoch": 12.63, + "learning_rate": 4.368797283036816e-05, + "loss": 2.4315, + "step": 2549000 + }, + { + "epoch": 12.63, + "learning_rate": 4.3686734243942076e-05, + "loss": 2.4032, + "step": 2549500 + }, + { + "epoch": 12.63, + "learning_rate": 4.368549565751599e-05, + "loss": 2.4246, + "step": 2550000 + }, + { + "epoch": 12.64, + "learning_rate": 4.368425707108991e-05, + "loss": 2.3949, + "step": 2550500 + }, + { + "epoch": 12.64, + "learning_rate": 4.368302096183668e-05, + "loss": 2.4226, + "step": 2551000 + }, + { + "epoch": 12.64, + "learning_rate": 4.3681782375410596e-05, + "loss": 2.4192, + "step": 2551500 + }, + { + "epoch": 12.64, + "learning_rate": 4.368054378898451e-05, + "loss": 2.4099, + "step": 2552000 + }, + { + "epoch": 12.65, + "learning_rate": 4.367930520255842e-05, + "loss": 2.4061, + "step": 2552500 + }, + { + "epoch": 12.65, + "learning_rate": 4.367806661613234e-05, + "loss": 2.4307, + "step": 2553000 + }, + { + "epoch": 12.65, + "learning_rate": 4.367683298405196e-05, + "loss": 2.3988, + "step": 2553500 + }, + { + "epoch": 12.65, + "learning_rate": 4.367559439762588e-05, + "loss": 2.3905, + "step": 2554000 + }, + { + "epoch": 12.66, + "learning_rate": 4.3674355811199794e-05, + "loss": 2.4154, + "step": 2554500 + }, + { + "epoch": 12.66, + "learning_rate": 4.367311722477371e-05, + "loss": 2.4097, + "step": 2555000 + }, + { + "epoch": 12.66, + "learning_rate": 4.367187863834763e-05, + "loss": 2.4109, + "step": 2555500 + }, + { + "epoch": 12.66, + "learning_rate": 4.3670640051921545e-05, + "loss": 2.3961, + "step": 2556000 + }, + { + "epoch": 12.67, + "learning_rate": 4.366940146549546e-05, + "loss": 2.4039, + "step": 2556500 + }, + { + "epoch": 12.67, + "learning_rate": 4.366816287906938e-05, + "loss": 2.3979, + "step": 2557000 + }, + { + "epoch": 12.67, + "learning_rate": 4.366692676981615e-05, + "loss": 2.4154, + "step": 2557500 + }, + { + "epoch": 12.67, + "learning_rate": 4.3665688183390065e-05, + "loss": 2.4105, + "step": 2558000 + }, + { + "epoch": 12.68, + "learning_rate": 4.366444959696398e-05, + "loss": 2.4075, + "step": 2558500 + }, + { + "epoch": 12.68, + "learning_rate": 4.36632110105379e-05, + "loss": 2.4176, + "step": 2559000 + }, + { + "epoch": 12.68, + "learning_rate": 4.3661972424111816e-05, + "loss": 2.4234, + "step": 2559500 + }, + { + "epoch": 12.68, + "learning_rate": 4.366073631485858e-05, + "loss": 2.414, + "step": 2560000 + }, + { + "epoch": 12.69, + "learning_rate": 4.3659497728432495e-05, + "loss": 2.4034, + "step": 2560500 + }, + { + "epoch": 12.69, + "learning_rate": 4.365825914200641e-05, + "loss": 2.4229, + "step": 2561000 + }, + { + "epoch": 12.69, + "learning_rate": 4.365702055558033e-05, + "loss": 2.3905, + "step": 2561500 + }, + { + "epoch": 12.69, + "learning_rate": 4.3655781969154245e-05, + "loss": 2.4277, + "step": 2562000 + }, + { + "epoch": 12.7, + "learning_rate": 4.365454338272816e-05, + "loss": 2.4012, + "step": 2562500 + }, + { + "epoch": 12.7, + "learning_rate": 4.365330479630208e-05, + "loss": 2.4179, + "step": 2563000 + }, + { + "epoch": 12.7, + "learning_rate": 4.365206868704885e-05, + "loss": 2.4339, + "step": 2563500 + }, + { + "epoch": 12.7, + "learning_rate": 4.3650830100622765e-05, + "loss": 2.4012, + "step": 2564000 + }, + { + "epoch": 12.71, + "learning_rate": 4.364959151419668e-05, + "loss": 2.4154, + "step": 2564500 + }, + { + "epoch": 12.71, + "learning_rate": 4.36483529277706e-05, + "loss": 2.3908, + "step": 2565000 + }, + { + "epoch": 12.71, + "learning_rate": 4.3647114341344516e-05, + "loss": 2.4239, + "step": 2565500 + }, + { + "epoch": 12.71, + "learning_rate": 4.364587575491843e-05, + "loss": 2.403, + "step": 2566000 + }, + { + "epoch": 12.72, + "learning_rate": 4.364463716849235e-05, + "loss": 2.3735, + "step": 2566500 + }, + { + "epoch": 12.72, + "learning_rate": 4.3643398582066267e-05, + "loss": 2.4058, + "step": 2567000 + }, + { + "epoch": 12.72, + "learning_rate": 4.364216247281303e-05, + "loss": 2.4092, + "step": 2567500 + }, + { + "epoch": 12.72, + "learning_rate": 4.36409263635598e-05, + "loss": 2.4055, + "step": 2568000 + }, + { + "epoch": 12.73, + "learning_rate": 4.3639687777133714e-05, + "loss": 2.4084, + "step": 2568500 + }, + { + "epoch": 12.73, + "learning_rate": 4.363844919070763e-05, + "loss": 2.4078, + "step": 2569000 + }, + { + "epoch": 12.73, + "learning_rate": 4.363721060428155e-05, + "loss": 2.3979, + "step": 2569500 + }, + { + "epoch": 12.73, + "learning_rate": 4.363597449502832e-05, + "loss": 2.4249, + "step": 2570000 + }, + { + "epoch": 12.74, + "learning_rate": 4.3634735908602234e-05, + "loss": 2.4067, + "step": 2570500 + }, + { + "epoch": 12.74, + "learning_rate": 4.3633499799349e-05, + "loss": 2.4111, + "step": 2571000 + }, + { + "epoch": 12.74, + "learning_rate": 4.363226121292291e-05, + "loss": 2.3941, + "step": 2571500 + }, + { + "epoch": 12.74, + "learning_rate": 4.363102262649683e-05, + "loss": 2.4572, + "step": 2572000 + }, + { + "epoch": 12.75, + "learning_rate": 4.362978404007075e-05, + "loss": 2.428, + "step": 2572500 + }, + { + "epoch": 12.75, + "learning_rate": 4.3628545453644664e-05, + "loss": 2.4121, + "step": 2573000 + }, + { + "epoch": 12.75, + "learning_rate": 4.362730686721858e-05, + "loss": 2.4407, + "step": 2573500 + }, + { + "epoch": 12.75, + "learning_rate": 4.36260682807925e-05, + "loss": 2.4202, + "step": 2574000 + }, + { + "epoch": 12.75, + "learning_rate": 4.3624829694366414e-05, + "loss": 2.3971, + "step": 2574500 + }, + { + "epoch": 12.76, + "learning_rate": 4.362359358511318e-05, + "loss": 2.4459, + "step": 2575000 + }, + { + "epoch": 12.76, + "learning_rate": 4.36223549986871e-05, + "loss": 2.4038, + "step": 2575500 + }, + { + "epoch": 12.76, + "learning_rate": 4.362111641226102e-05, + "loss": 2.4283, + "step": 2576000 + }, + { + "epoch": 12.76, + "learning_rate": 4.3619877825834934e-05, + "loss": 2.4301, + "step": 2576500 + }, + { + "epoch": 12.77, + "learning_rate": 4.361863923940885e-05, + "loss": 2.4261, + "step": 2577000 + }, + { + "epoch": 12.77, + "learning_rate": 4.361740313015562e-05, + "loss": 2.4179, + "step": 2577500 + }, + { + "epoch": 12.77, + "learning_rate": 4.361616702090239e-05, + "loss": 2.4112, + "step": 2578000 + }, + { + "epoch": 12.77, + "learning_rate": 4.3614928434476305e-05, + "loss": 2.4111, + "step": 2578500 + }, + { + "epoch": 12.78, + "learning_rate": 4.361368984805022e-05, + "loss": 2.4339, + "step": 2579000 + }, + { + "epoch": 12.78, + "learning_rate": 4.361245126162414e-05, + "loss": 2.4154, + "step": 2579500 + }, + { + "epoch": 12.78, + "learning_rate": 4.3611212675198056e-05, + "loss": 2.4092, + "step": 2580000 + }, + { + "epoch": 12.78, + "learning_rate": 4.360997408877197e-05, + "loss": 2.4017, + "step": 2580500 + }, + { + "epoch": 12.79, + "learning_rate": 4.3608737979518735e-05, + "loss": 2.3993, + "step": 2581000 + }, + { + "epoch": 12.79, + "learning_rate": 4.360749939309265e-05, + "loss": 2.4092, + "step": 2581500 + }, + { + "epoch": 12.79, + "learning_rate": 4.360626080666657e-05, + "loss": 2.4148, + "step": 2582000 + }, + { + "epoch": 12.79, + "learning_rate": 4.3605022220240486e-05, + "loss": 2.4223, + "step": 2582500 + }, + { + "epoch": 12.8, + "learning_rate": 4.36037836338144e-05, + "loss": 2.4065, + "step": 2583000 + }, + { + "epoch": 12.8, + "learning_rate": 4.360254504738832e-05, + "loss": 2.4275, + "step": 2583500 + }, + { + "epoch": 12.8, + "learning_rate": 4.360130646096223e-05, + "loss": 2.3997, + "step": 2584000 + }, + { + "epoch": 12.8, + "learning_rate": 4.360006787453615e-05, + "loss": 2.4098, + "step": 2584500 + }, + { + "epoch": 12.81, + "learning_rate": 4.3598829288110064e-05, + "loss": 2.4142, + "step": 2585000 + }, + { + "epoch": 12.81, + "learning_rate": 4.359759070168398e-05, + "loss": 2.4112, + "step": 2585500 + }, + { + "epoch": 12.81, + "learning_rate": 4.3596354592430756e-05, + "loss": 2.4065, + "step": 2586000 + }, + { + "epoch": 12.81, + "learning_rate": 4.359511600600467e-05, + "loss": 2.4327, + "step": 2586500 + }, + { + "epoch": 12.82, + "learning_rate": 4.3593879896751435e-05, + "loss": 2.4185, + "step": 2587000 + }, + { + "epoch": 12.82, + "learning_rate": 4.359264131032535e-05, + "loss": 2.3929, + "step": 2587500 + }, + { + "epoch": 12.82, + "learning_rate": 4.359140272389927e-05, + "loss": 2.4129, + "step": 2588000 + }, + { + "epoch": 12.82, + "learning_rate": 4.3590164137473186e-05, + "loss": 2.4261, + "step": 2588500 + }, + { + "epoch": 12.83, + "learning_rate": 4.35889255510471e-05, + "loss": 2.4036, + "step": 2589000 + }, + { + "epoch": 12.83, + "learning_rate": 4.358768696462102e-05, + "loss": 2.4252, + "step": 2589500 + }, + { + "epoch": 12.83, + "learning_rate": 4.358644837819494e-05, + "loss": 2.4257, + "step": 2590000 + }, + { + "epoch": 12.83, + "learning_rate": 4.358520979176885e-05, + "loss": 2.3993, + "step": 2590500 + }, + { + "epoch": 12.84, + "learning_rate": 4.3583971205342764e-05, + "loss": 2.4082, + "step": 2591000 + }, + { + "epoch": 12.84, + "learning_rate": 4.358273261891668e-05, + "loss": 2.417, + "step": 2591500 + }, + { + "epoch": 12.84, + "learning_rate": 4.35814940324906e-05, + "loss": 2.4277, + "step": 2592000 + }, + { + "epoch": 12.84, + "learning_rate": 4.3580257923237373e-05, + "loss": 2.4523, + "step": 2592500 + }, + { + "epoch": 12.85, + "learning_rate": 4.357901933681129e-05, + "loss": 2.4193, + "step": 2593000 + }, + { + "epoch": 12.85, + "learning_rate": 4.35777807503852e-05, + "loss": 2.403, + "step": 2593500 + }, + { + "epoch": 12.85, + "learning_rate": 4.357654216395912e-05, + "loss": 2.4337, + "step": 2594000 + }, + { + "epoch": 12.85, + "learning_rate": 4.3575303577533034e-05, + "loss": 2.4078, + "step": 2594500 + }, + { + "epoch": 12.86, + "learning_rate": 4.35740674682798e-05, + "loss": 2.4119, + "step": 2595000 + }, + { + "epoch": 12.86, + "learning_rate": 4.357282888185372e-05, + "loss": 2.4088, + "step": 2595500 + }, + { + "epoch": 12.86, + "learning_rate": 4.357159029542764e-05, + "loss": 2.4246, + "step": 2596000 + }, + { + "epoch": 12.86, + "learning_rate": 4.357035170900155e-05, + "loss": 2.4001, + "step": 2596500 + }, + { + "epoch": 12.87, + "learning_rate": 4.3569113122575464e-05, + "loss": 2.4112, + "step": 2597000 + }, + { + "epoch": 12.87, + "learning_rate": 4.356787701332224e-05, + "loss": 2.4181, + "step": 2597500 + }, + { + "epoch": 12.87, + "learning_rate": 4.356664090406901e-05, + "loss": 2.43, + "step": 2598000 + }, + { + "epoch": 12.87, + "learning_rate": 4.356540479481577e-05, + "loss": 2.4409, + "step": 2598500 + }, + { + "epoch": 12.88, + "learning_rate": 4.3564168685562546e-05, + "loss": 2.4328, + "step": 2599000 + }, + { + "epoch": 12.88, + "learning_rate": 4.356293009913646e-05, + "loss": 2.4351, + "step": 2599500 + }, + { + "epoch": 12.88, + "learning_rate": 4.356169151271038e-05, + "loss": 2.4067, + "step": 2600000 + }, + { + "epoch": 12.88, + "learning_rate": 4.356045292628429e-05, + "loss": 2.4088, + "step": 2600500 + }, + { + "epoch": 12.89, + "learning_rate": 4.355921433985821e-05, + "loss": 2.4367, + "step": 2601000 + }, + { + "epoch": 12.89, + "learning_rate": 4.3557975753432124e-05, + "loss": 2.4095, + "step": 2601500 + }, + { + "epoch": 12.89, + "learning_rate": 4.355673716700604e-05, + "loss": 2.4283, + "step": 2602000 + }, + { + "epoch": 12.89, + "learning_rate": 4.355549858057996e-05, + "loss": 2.4218, + "step": 2602500 + }, + { + "epoch": 12.9, + "learning_rate": 4.3554259994153875e-05, + "loss": 2.4259, + "step": 2603000 + }, + { + "epoch": 12.9, + "learning_rate": 4.355302140772779e-05, + "loss": 2.3895, + "step": 2603500 + }, + { + "epoch": 12.9, + "learning_rate": 4.355178282130171e-05, + "loss": 2.416, + "step": 2604000 + }, + { + "epoch": 12.9, + "learning_rate": 4.3550544234875626e-05, + "loss": 2.3923, + "step": 2604500 + }, + { + "epoch": 12.91, + "learning_rate": 4.354930812562239e-05, + "loss": 2.3966, + "step": 2605000 + }, + { + "epoch": 12.91, + "learning_rate": 4.3548069539196305e-05, + "loss": 2.4156, + "step": 2605500 + }, + { + "epoch": 12.91, + "learning_rate": 4.354683095277022e-05, + "loss": 2.4116, + "step": 2606000 + }, + { + "epoch": 12.91, + "learning_rate": 4.354559236634414e-05, + "loss": 2.4211, + "step": 2606500 + }, + { + "epoch": 12.92, + "learning_rate": 4.3544353779918055e-05, + "loss": 2.4306, + "step": 2607000 + }, + { + "epoch": 12.92, + "learning_rate": 4.354311519349197e-05, + "loss": 2.3948, + "step": 2607500 + }, + { + "epoch": 12.92, + "learning_rate": 4.354187660706589e-05, + "loss": 2.4124, + "step": 2608000 + }, + { + "epoch": 12.92, + "learning_rate": 4.3540638020639806e-05, + "loss": 2.4227, + "step": 2608500 + }, + { + "epoch": 12.93, + "learning_rate": 4.353939943421372e-05, + "loss": 2.4166, + "step": 2609000 + }, + { + "epoch": 12.93, + "learning_rate": 4.353816084778764e-05, + "loss": 2.4322, + "step": 2609500 + }, + { + "epoch": 12.93, + "learning_rate": 4.353692226136156e-05, + "loss": 2.4191, + "step": 2610000 + }, + { + "epoch": 12.93, + "learning_rate": 4.3535686152108326e-05, + "loss": 2.424, + "step": 2610500 + }, + { + "epoch": 12.94, + "learning_rate": 4.353444756568224e-05, + "loss": 2.4131, + "step": 2611000 + }, + { + "epoch": 12.94, + "learning_rate": 4.353320897925616e-05, + "loss": 2.422, + "step": 2611500 + }, + { + "epoch": 12.94, + "learning_rate": 4.3531970392830076e-05, + "loss": 2.3948, + "step": 2612000 + }, + { + "epoch": 12.94, + "learning_rate": 4.3530731806403993e-05, + "loss": 2.4332, + "step": 2612500 + }, + { + "epoch": 12.95, + "learning_rate": 4.352949321997791e-05, + "loss": 2.4331, + "step": 2613000 + }, + { + "epoch": 12.95, + "learning_rate": 4.352825463355182e-05, + "loss": 2.4107, + "step": 2613500 + }, + { + "epoch": 12.95, + "learning_rate": 4.352701604712574e-05, + "loss": 2.4368, + "step": 2614000 + }, + { + "epoch": 12.95, + "learning_rate": 4.3525779937872506e-05, + "loss": 2.4095, + "step": 2614500 + }, + { + "epoch": 12.96, + "learning_rate": 4.352454135144642e-05, + "loss": 2.3953, + "step": 2615000 + }, + { + "epoch": 12.96, + "learning_rate": 4.352330276502034e-05, + "loss": 2.411, + "step": 2615500 + }, + { + "epoch": 12.96, + "learning_rate": 4.352206417859426e-05, + "loss": 2.3921, + "step": 2616000 + }, + { + "epoch": 12.96, + "learning_rate": 4.3520828069341026e-05, + "loss": 2.4145, + "step": 2616500 + }, + { + "epoch": 12.97, + "learning_rate": 4.351958948291494e-05, + "loss": 2.4006, + "step": 2617000 + }, + { + "epoch": 12.97, + "learning_rate": 4.351835089648886e-05, + "loss": 2.4215, + "step": 2617500 + }, + { + "epoch": 12.97, + "learning_rate": 4.3517112310062777e-05, + "loss": 2.4077, + "step": 2618000 + }, + { + "epoch": 12.97, + "learning_rate": 4.3515873723636694e-05, + "loss": 2.4019, + "step": 2618500 + }, + { + "epoch": 12.98, + "learning_rate": 4.351463513721061e-05, + "loss": 2.4322, + "step": 2619000 + }, + { + "epoch": 12.98, + "learning_rate": 4.351339655078453e-05, + "loss": 2.4438, + "step": 2619500 + }, + { + "epoch": 12.98, + "learning_rate": 4.3512157964358444e-05, + "loss": 2.3985, + "step": 2620000 + }, + { + "epoch": 12.98, + "learning_rate": 4.3510919377932354e-05, + "loss": 2.4106, + "step": 2620500 + }, + { + "epoch": 12.99, + "learning_rate": 4.350968326867912e-05, + "loss": 2.4127, + "step": 2621000 + }, + { + "epoch": 12.99, + "learning_rate": 4.350844468225304e-05, + "loss": 2.3968, + "step": 2621500 + }, + { + "epoch": 12.99, + "learning_rate": 4.350720609582696e-05, + "loss": 2.4192, + "step": 2622000 + }, + { + "epoch": 12.99, + "learning_rate": 4.3505967509400874e-05, + "loss": 2.4349, + "step": 2622500 + }, + { + "epoch": 13.0, + "learning_rate": 4.350472892297479e-05, + "loss": 2.403, + "step": 2623000 + }, + { + "epoch": 13.0, + "learning_rate": 4.350349281372156e-05, + "loss": 2.4137, + "step": 2623500 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.6461218698848585, + "eval_accuracy_mlm": 0.5996847093159505, + "eval_accuracy_nsp": 0.8652920665675657, + "eval_loss": 2.396927833557129, + "eval_runtime": 146.13, + "eval_samples_per_second": 1744.741, + "eval_steps_per_second": 72.702, + "step": 2623959 + }, + { + "epoch": 13.0, + "learning_rate": 4.350225422729548e-05, + "loss": 2.3977, + "step": 2624000 + }, + { + "epoch": 13.0, + "learning_rate": 4.3501015640869394e-05, + "loss": 2.374, + "step": 2624500 + }, + { + "epoch": 13.01, + "learning_rate": 4.3499779531616156e-05, + "loss": 2.3743, + "step": 2625000 + }, + { + "epoch": 13.01, + "learning_rate": 4.349854094519007e-05, + "loss": 2.3774, + "step": 2625500 + }, + { + "epoch": 13.01, + "learning_rate": 4.349730483593684e-05, + "loss": 2.3954, + "step": 2626000 + }, + { + "epoch": 13.01, + "learning_rate": 4.349606624951076e-05, + "loss": 2.3685, + "step": 2626500 + }, + { + "epoch": 13.02, + "learning_rate": 4.3494827663084675e-05, + "loss": 2.3846, + "step": 2627000 + }, + { + "epoch": 13.02, + "learning_rate": 4.349358907665859e-05, + "loss": 2.4169, + "step": 2627500 + }, + { + "epoch": 13.02, + "learning_rate": 4.349235049023251e-05, + "loss": 2.3881, + "step": 2628000 + }, + { + "epoch": 13.02, + "learning_rate": 4.3491111903806426e-05, + "loss": 2.3866, + "step": 2628500 + }, + { + "epoch": 13.02, + "learning_rate": 4.3489875794553195e-05, + "loss": 2.3958, + "step": 2629000 + }, + { + "epoch": 13.03, + "learning_rate": 4.3488639685299964e-05, + "loss": 2.3686, + "step": 2629500 + }, + { + "epoch": 13.03, + "learning_rate": 4.348740109887388e-05, + "loss": 2.3922, + "step": 2630000 + }, + { + "epoch": 13.03, + "learning_rate": 4.34861625124478e-05, + "loss": 2.3662, + "step": 2630500 + }, + { + "epoch": 13.03, + "learning_rate": 4.3484923926021714e-05, + "loss": 2.3793, + "step": 2631000 + }, + { + "epoch": 13.04, + "learning_rate": 4.3483685339595625e-05, + "loss": 2.3712, + "step": 2631500 + }, + { + "epoch": 13.04, + "learning_rate": 4.348244675316954e-05, + "loss": 2.4067, + "step": 2632000 + }, + { + "epoch": 13.04, + "learning_rate": 4.348120816674346e-05, + "loss": 2.3861, + "step": 2632500 + }, + { + "epoch": 13.04, + "learning_rate": 4.3479972057490234e-05, + "loss": 2.4153, + "step": 2633000 + }, + { + "epoch": 13.05, + "learning_rate": 4.347873347106415e-05, + "loss": 2.3681, + "step": 2633500 + }, + { + "epoch": 13.05, + "learning_rate": 4.347749488463807e-05, + "loss": 2.3843, + "step": 2634000 + }, + { + "epoch": 13.05, + "learning_rate": 4.347625629821198e-05, + "loss": 2.3793, + "step": 2634500 + }, + { + "epoch": 13.05, + "learning_rate": 4.3475017711785895e-05, + "loss": 2.3923, + "step": 2635000 + }, + { + "epoch": 13.06, + "learning_rate": 4.347377912535981e-05, + "loss": 2.3799, + "step": 2635500 + }, + { + "epoch": 13.06, + "learning_rate": 4.347254053893373e-05, + "loss": 2.3595, + "step": 2636000 + }, + { + "epoch": 13.06, + "learning_rate": 4.3471301952507646e-05, + "loss": 2.4064, + "step": 2636500 + }, + { + "epoch": 13.06, + "learning_rate": 4.3470063366081556e-05, + "loss": 2.4083, + "step": 2637000 + }, + { + "epoch": 13.07, + "learning_rate": 4.346882477965547e-05, + "loss": 2.3916, + "step": 2637500 + }, + { + "epoch": 13.07, + "learning_rate": 4.346758619322939e-05, + "loss": 2.372, + "step": 2638000 + }, + { + "epoch": 13.07, + "learning_rate": 4.346634760680331e-05, + "loss": 2.3781, + "step": 2638500 + }, + { + "epoch": 13.07, + "learning_rate": 4.3465109020377224e-05, + "loss": 2.408, + "step": 2639000 + }, + { + "epoch": 13.08, + "learning_rate": 4.346387043395114e-05, + "loss": 2.4136, + "step": 2639500 + }, + { + "epoch": 13.08, + "learning_rate": 4.346263432469791e-05, + "loss": 2.3621, + "step": 2640000 + }, + { + "epoch": 13.08, + "learning_rate": 4.3461398215444685e-05, + "loss": 2.3862, + "step": 2640500 + }, + { + "epoch": 13.08, + "learning_rate": 4.3460159629018595e-05, + "loss": 2.3855, + "step": 2641000 + }, + { + "epoch": 13.09, + "learning_rate": 4.345892104259251e-05, + "loss": 2.4046, + "step": 2641500 + }, + { + "epoch": 13.09, + "learning_rate": 4.345768245616643e-05, + "loss": 2.3887, + "step": 2642000 + }, + { + "epoch": 13.09, + "learning_rate": 4.3456443869740346e-05, + "loss": 2.3702, + "step": 2642500 + }, + { + "epoch": 13.09, + "learning_rate": 4.3455207760487115e-05, + "loss": 2.4123, + "step": 2643000 + }, + { + "epoch": 13.1, + "learning_rate": 4.345396917406103e-05, + "loss": 2.3679, + "step": 2643500 + }, + { + "epoch": 13.1, + "learning_rate": 4.345273058763494e-05, + "loss": 2.3806, + "step": 2644000 + }, + { + "epoch": 13.1, + "learning_rate": 4.345149200120886e-05, + "loss": 2.4071, + "step": 2644500 + }, + { + "epoch": 13.1, + "learning_rate": 4.3450253414782776e-05, + "loss": 2.3682, + "step": 2645000 + }, + { + "epoch": 13.11, + "learning_rate": 4.344901482835669e-05, + "loss": 2.375, + "step": 2645500 + }, + { + "epoch": 13.11, + "learning_rate": 4.344777624193061e-05, + "loss": 2.3903, + "step": 2646000 + }, + { + "epoch": 13.11, + "learning_rate": 4.3446537655504526e-05, + "loss": 2.3944, + "step": 2646500 + }, + { + "epoch": 13.11, + "learning_rate": 4.344529906907844e-05, + "loss": 2.3925, + "step": 2647000 + }, + { + "epoch": 13.12, + "learning_rate": 4.344406048265236e-05, + "loss": 2.3984, + "step": 2647500 + }, + { + "epoch": 13.12, + "learning_rate": 4.344282189622628e-05, + "loss": 2.4028, + "step": 2648000 + }, + { + "epoch": 13.12, + "learning_rate": 4.3441583309800194e-05, + "loss": 2.3867, + "step": 2648500 + }, + { + "epoch": 13.12, + "learning_rate": 4.344034472337411e-05, + "loss": 2.4064, + "step": 2649000 + }, + { + "epoch": 13.13, + "learning_rate": 4.343910613694803e-05, + "loss": 2.3914, + "step": 2649500 + }, + { + "epoch": 13.13, + "learning_rate": 4.34378700276948e-05, + "loss": 2.4027, + "step": 2650000 + }, + { + "epoch": 13.13, + "learning_rate": 4.343663391844156e-05, + "loss": 2.4052, + "step": 2650500 + }, + { + "epoch": 13.13, + "learning_rate": 4.3435395332015476e-05, + "loss": 2.3882, + "step": 2651000 + }, + { + "epoch": 13.14, + "learning_rate": 4.3434161699935096e-05, + "loss": 2.3821, + "step": 2651500 + }, + { + "epoch": 13.14, + "learning_rate": 4.3432923113509013e-05, + "loss": 2.4076, + "step": 2652000 + }, + { + "epoch": 13.14, + "learning_rate": 4.343168452708293e-05, + "loss": 2.3935, + "step": 2652500 + }, + { + "epoch": 13.14, + "learning_rate": 4.34304484178297e-05, + "loss": 2.3977, + "step": 2653000 + }, + { + "epoch": 13.15, + "learning_rate": 4.3429209831403616e-05, + "loss": 2.3899, + "step": 2653500 + }, + { + "epoch": 13.15, + "learning_rate": 4.342797124497753e-05, + "loss": 2.3781, + "step": 2654000 + }, + { + "epoch": 13.15, + "learning_rate": 4.342673265855145e-05, + "loss": 2.3914, + "step": 2654500 + }, + { + "epoch": 13.15, + "learning_rate": 4.342549407212537e-05, + "loss": 2.3926, + "step": 2655000 + }, + { + "epoch": 13.16, + "learning_rate": 4.3424255485699284e-05, + "loss": 2.4267, + "step": 2655500 + }, + { + "epoch": 13.16, + "learning_rate": 4.34230168992732e-05, + "loss": 2.3881, + "step": 2656000 + }, + { + "epoch": 13.16, + "learning_rate": 4.342177831284712e-05, + "loss": 2.3844, + "step": 2656500 + }, + { + "epoch": 13.16, + "learning_rate": 4.3420539726421035e-05, + "loss": 2.4005, + "step": 2657000 + }, + { + "epoch": 13.17, + "learning_rate": 4.341930113999495e-05, + "loss": 2.3819, + "step": 2657500 + }, + { + "epoch": 13.17, + "learning_rate": 4.341806255356887e-05, + "loss": 2.395, + "step": 2658000 + }, + { + "epoch": 13.17, + "learning_rate": 4.341682644431563e-05, + "loss": 2.371, + "step": 2658500 + }, + { + "epoch": 13.17, + "learning_rate": 4.341558785788955e-05, + "loss": 2.3832, + "step": 2659000 + }, + { + "epoch": 13.18, + "learning_rate": 4.3414349271463464e-05, + "loss": 2.391, + "step": 2659500 + }, + { + "epoch": 13.18, + "learning_rate": 4.341311068503738e-05, + "loss": 2.3821, + "step": 2660000 + }, + { + "epoch": 13.18, + "learning_rate": 4.34118720986113e-05, + "loss": 2.3738, + "step": 2660500 + }, + { + "epoch": 13.18, + "learning_rate": 4.3410633512185215e-05, + "loss": 2.398, + "step": 2661000 + }, + { + "epoch": 13.19, + "learning_rate": 4.340939492575913e-05, + "loss": 2.395, + "step": 2661500 + }, + { + "epoch": 13.19, + "learning_rate": 4.340815633933305e-05, + "loss": 2.3963, + "step": 2662000 + }, + { + "epoch": 13.19, + "learning_rate": 4.3406917752906966e-05, + "loss": 2.3994, + "step": 2662500 + }, + { + "epoch": 13.19, + "learning_rate": 4.3405679166480876e-05, + "loss": 2.3915, + "step": 2663000 + }, + { + "epoch": 13.2, + "learning_rate": 4.340444058005479e-05, + "loss": 2.4018, + "step": 2663500 + }, + { + "epoch": 13.2, + "learning_rate": 4.340320199362871e-05, + "loss": 2.3685, + "step": 2664000 + }, + { + "epoch": 13.2, + "learning_rate": 4.340196340720263e-05, + "loss": 2.4209, + "step": 2664500 + }, + { + "epoch": 13.2, + "learning_rate": 4.3400724820776544e-05, + "loss": 2.406, + "step": 2665000 + }, + { + "epoch": 13.21, + "learning_rate": 4.339948623435046e-05, + "loss": 2.3968, + "step": 2665500 + }, + { + "epoch": 13.21, + "learning_rate": 4.339824764792438e-05, + "loss": 2.3927, + "step": 2666000 + }, + { + "epoch": 13.21, + "learning_rate": 4.3397011538671146e-05, + "loss": 2.3701, + "step": 2666500 + }, + { + "epoch": 13.21, + "learning_rate": 4.339577295224506e-05, + "loss": 2.364, + "step": 2667000 + }, + { + "epoch": 13.22, + "learning_rate": 4.339453436581898e-05, + "loss": 2.3787, + "step": 2667500 + }, + { + "epoch": 13.22, + "learning_rate": 4.33932957793929e-05, + "loss": 2.3985, + "step": 2668000 + }, + { + "epoch": 13.22, + "learning_rate": 4.3392057192966814e-05, + "loss": 2.379, + "step": 2668500 + }, + { + "epoch": 13.22, + "learning_rate": 4.339081860654073e-05, + "loss": 2.3979, + "step": 2669000 + }, + { + "epoch": 13.23, + "learning_rate": 4.338958002011465e-05, + "loss": 2.3876, + "step": 2669500 + }, + { + "epoch": 13.23, + "learning_rate": 4.3388341433688565e-05, + "loss": 2.4065, + "step": 2670000 + }, + { + "epoch": 13.23, + "learning_rate": 4.338710284726248e-05, + "loss": 2.3983, + "step": 2670500 + }, + { + "epoch": 13.23, + "learning_rate": 4.338586426083639e-05, + "loss": 2.4001, + "step": 2671000 + }, + { + "epoch": 13.24, + "learning_rate": 4.338462567441031e-05, + "loss": 2.4228, + "step": 2671500 + }, + { + "epoch": 13.24, + "learning_rate": 4.338338956515708e-05, + "loss": 2.3995, + "step": 2672000 + }, + { + "epoch": 13.24, + "learning_rate": 4.3382153455903847e-05, + "loss": 2.3867, + "step": 2672500 + }, + { + "epoch": 13.24, + "learning_rate": 4.3380914869477763e-05, + "loss": 2.3989, + "step": 2673000 + }, + { + "epoch": 13.25, + "learning_rate": 4.337967628305168e-05, + "loss": 2.4051, + "step": 2673500 + }, + { + "epoch": 13.25, + "learning_rate": 4.33784376966256e-05, + "loss": 2.4195, + "step": 2674000 + }, + { + "epoch": 13.25, + "learning_rate": 4.3377199110199514e-05, + "loss": 2.4004, + "step": 2674500 + }, + { + "epoch": 13.25, + "learning_rate": 4.337596052377343e-05, + "loss": 2.3939, + "step": 2675000 + }, + { + "epoch": 13.26, + "learning_rate": 4.337472193734735e-05, + "loss": 2.3849, + "step": 2675500 + }, + { + "epoch": 13.26, + "learning_rate": 4.3373483350921265e-05, + "loss": 2.3963, + "step": 2676000 + }, + { + "epoch": 13.26, + "learning_rate": 4.337224476449518e-05, + "loss": 2.4051, + "step": 2676500 + }, + { + "epoch": 13.26, + "learning_rate": 4.3371008655241944e-05, + "loss": 2.3968, + "step": 2677000 + }, + { + "epoch": 13.27, + "learning_rate": 4.336977006881586e-05, + "loss": 2.4036, + "step": 2677500 + }, + { + "epoch": 13.27, + "learning_rate": 4.336853148238978e-05, + "loss": 2.4113, + "step": 2678000 + }, + { + "epoch": 13.27, + "learning_rate": 4.3367292895963695e-05, + "loss": 2.4127, + "step": 2678500 + }, + { + "epoch": 13.27, + "learning_rate": 4.336605430953761e-05, + "loss": 2.3876, + "step": 2679000 + }, + { + "epoch": 13.28, + "learning_rate": 4.336481572311153e-05, + "loss": 2.4134, + "step": 2679500 + }, + { + "epoch": 13.28, + "learning_rate": 4.33635796138583e-05, + "loss": 2.4021, + "step": 2680000 + }, + { + "epoch": 13.28, + "learning_rate": 4.3362343504605066e-05, + "loss": 2.401, + "step": 2680500 + }, + { + "epoch": 13.28, + "learning_rate": 4.336110491817898e-05, + "loss": 2.3873, + "step": 2681000 + }, + { + "epoch": 13.29, + "learning_rate": 4.335986633175289e-05, + "loss": 2.394, + "step": 2681500 + }, + { + "epoch": 13.29, + "learning_rate": 4.335863022249967e-05, + "loss": 2.4049, + "step": 2682000 + }, + { + "epoch": 13.29, + "learning_rate": 4.3357391636073586e-05, + "loss": 2.3955, + "step": 2682500 + }, + { + "epoch": 13.29, + "learning_rate": 4.33561530496475e-05, + "loss": 2.4062, + "step": 2683000 + }, + { + "epoch": 13.29, + "learning_rate": 4.335491446322142e-05, + "loss": 2.427, + "step": 2683500 + }, + { + "epoch": 13.3, + "learning_rate": 4.335367587679534e-05, + "loss": 2.4073, + "step": 2684000 + }, + { + "epoch": 13.3, + "learning_rate": 4.3352437290369254e-05, + "loss": 2.4097, + "step": 2684500 + }, + { + "epoch": 13.3, + "learning_rate": 4.3351198703943164e-05, + "loss": 2.3962, + "step": 2685000 + }, + { + "epoch": 13.3, + "learning_rate": 4.334996011751708e-05, + "loss": 2.3851, + "step": 2685500 + }, + { + "epoch": 13.31, + "learning_rate": 4.3348721531091e-05, + "loss": 2.4041, + "step": 2686000 + }, + { + "epoch": 13.31, + "learning_rate": 4.3347482944664914e-05, + "loss": 2.3944, + "step": 2686500 + }, + { + "epoch": 13.31, + "learning_rate": 4.334624435823883e-05, + "loss": 2.4004, + "step": 2687000 + }, + { + "epoch": 13.31, + "learning_rate": 4.334501072615845e-05, + "loss": 2.3767, + "step": 2687500 + }, + { + "epoch": 13.32, + "learning_rate": 4.334377213973237e-05, + "loss": 2.3833, + "step": 2688000 + }, + { + "epoch": 13.32, + "learning_rate": 4.3342533553306286e-05, + "loss": 2.3789, + "step": 2688500 + }, + { + "epoch": 13.32, + "learning_rate": 4.3341297444053055e-05, + "loss": 2.4131, + "step": 2689000 + }, + { + "epoch": 13.32, + "learning_rate": 4.334005885762697e-05, + "loss": 2.404, + "step": 2689500 + }, + { + "epoch": 13.33, + "learning_rate": 4.333882027120089e-05, + "loss": 2.4251, + "step": 2690000 + }, + { + "epoch": 13.33, + "learning_rate": 4.3337581684774806e-05, + "loss": 2.4047, + "step": 2690500 + }, + { + "epoch": 13.33, + "learning_rate": 4.333634309834872e-05, + "loss": 2.384, + "step": 2691000 + }, + { + "epoch": 13.33, + "learning_rate": 4.333510451192264e-05, + "loss": 2.4168, + "step": 2691500 + }, + { + "epoch": 13.34, + "learning_rate": 4.333386592549655e-05, + "loss": 2.3786, + "step": 2692000 + }, + { + "epoch": 13.34, + "learning_rate": 4.3332627339070467e-05, + "loss": 2.3896, + "step": 2692500 + }, + { + "epoch": 13.34, + "learning_rate": 4.3331388752644383e-05, + "loss": 2.3878, + "step": 2693000 + }, + { + "epoch": 13.34, + "learning_rate": 4.33301501662183e-05, + "loss": 2.3996, + "step": 2693500 + }, + { + "epoch": 13.35, + "learning_rate": 4.332891157979222e-05, + "loss": 2.4324, + "step": 2694000 + }, + { + "epoch": 13.35, + "learning_rate": 4.3327675470538986e-05, + "loss": 2.3555, + "step": 2694500 + }, + { + "epoch": 13.35, + "learning_rate": 4.33264368841129e-05, + "loss": 2.4025, + "step": 2695000 + }, + { + "epoch": 13.35, + "learning_rate": 4.332520077485967e-05, + "loss": 2.4122, + "step": 2695500 + }, + { + "epoch": 13.36, + "learning_rate": 4.332396218843359e-05, + "loss": 2.3975, + "step": 2696000 + }, + { + "epoch": 13.36, + "learning_rate": 4.3322723602007506e-05, + "loss": 2.3986, + "step": 2696500 + }, + { + "epoch": 13.36, + "learning_rate": 4.332148501558142e-05, + "loss": 2.378, + "step": 2697000 + }, + { + "epoch": 13.36, + "learning_rate": 4.332024642915534e-05, + "loss": 2.4197, + "step": 2697500 + }, + { + "epoch": 13.37, + "learning_rate": 4.3319007842729256e-05, + "loss": 2.4088, + "step": 2698000 + }, + { + "epoch": 13.37, + "learning_rate": 4.3317769256303173e-05, + "loss": 2.427, + "step": 2698500 + }, + { + "epoch": 13.37, + "learning_rate": 4.3316530669877084e-05, + "loss": 2.4063, + "step": 2699000 + }, + { + "epoch": 13.37, + "learning_rate": 4.3315292083451e-05, + "loss": 2.4056, + "step": 2699500 + }, + { + "epoch": 13.38, + "learning_rate": 4.331405349702492e-05, + "loss": 2.3971, + "step": 2700000 + }, + { + "epoch": 13.38, + "learning_rate": 4.3312814910598834e-05, + "loss": 2.4082, + "step": 2700500 + }, + { + "epoch": 13.38, + "learning_rate": 4.331157632417275e-05, + "loss": 2.401, + "step": 2701000 + }, + { + "epoch": 13.38, + "learning_rate": 4.331033773774666e-05, + "loss": 2.3885, + "step": 2701500 + }, + { + "epoch": 13.39, + "learning_rate": 4.330909915132058e-05, + "loss": 2.3773, + "step": 2702000 + }, + { + "epoch": 13.39, + "learning_rate": 4.3307860564894495e-05, + "loss": 2.441, + "step": 2702500 + }, + { + "epoch": 13.39, + "learning_rate": 4.330662197846841e-05, + "loss": 2.4094, + "step": 2703000 + }, + { + "epoch": 13.39, + "learning_rate": 4.330538339204233e-05, + "loss": 2.4056, + "step": 2703500 + }, + { + "epoch": 13.4, + "learning_rate": 4.3304144805616246e-05, + "loss": 2.4129, + "step": 2704000 + }, + { + "epoch": 13.4, + "learning_rate": 4.3302908696363015e-05, + "loss": 2.3796, + "step": 2704500 + }, + { + "epoch": 13.4, + "learning_rate": 4.330167010993693e-05, + "loss": 2.3743, + "step": 2705000 + }, + { + "epoch": 13.4, + "learning_rate": 4.330043152351085e-05, + "loss": 2.4091, + "step": 2705500 + }, + { + "epoch": 13.41, + "learning_rate": 4.329919789143047e-05, + "loss": 2.3977, + "step": 2706000 + }, + { + "epoch": 13.41, + "learning_rate": 4.3297959305004386e-05, + "loss": 2.3935, + "step": 2706500 + }, + { + "epoch": 13.41, + "learning_rate": 4.32967207185783e-05, + "loss": 2.4121, + "step": 2707000 + }, + { + "epoch": 13.41, + "learning_rate": 4.329548213215222e-05, + "loss": 2.4118, + "step": 2707500 + }, + { + "epoch": 13.42, + "learning_rate": 4.329424354572614e-05, + "loss": 2.4029, + "step": 2708000 + }, + { + "epoch": 13.42, + "learning_rate": 4.3293007436472906e-05, + "loss": 2.3758, + "step": 2708500 + }, + { + "epoch": 13.42, + "learning_rate": 4.329176885004682e-05, + "loss": 2.3919, + "step": 2709000 + }, + { + "epoch": 13.42, + "learning_rate": 4.329053026362074e-05, + "loss": 2.408, + "step": 2709500 + }, + { + "epoch": 13.43, + "learning_rate": 4.328929167719466e-05, + "loss": 2.3962, + "step": 2710000 + }, + { + "epoch": 13.43, + "learning_rate": 4.3288053090768574e-05, + "loss": 2.3936, + "step": 2710500 + }, + { + "epoch": 13.43, + "learning_rate": 4.328681450434249e-05, + "loss": 2.402, + "step": 2711000 + }, + { + "epoch": 13.43, + "learning_rate": 4.328557839508925e-05, + "loss": 2.3929, + "step": 2711500 + }, + { + "epoch": 13.44, + "learning_rate": 4.328433980866317e-05, + "loss": 2.3941, + "step": 2712000 + }, + { + "epoch": 13.44, + "learning_rate": 4.3283101222237086e-05, + "loss": 2.396, + "step": 2712500 + }, + { + "epoch": 13.44, + "learning_rate": 4.3281862635811003e-05, + "loss": 2.4242, + "step": 2713000 + }, + { + "epoch": 13.44, + "learning_rate": 4.328062404938492e-05, + "loss": 2.4014, + "step": 2713500 + }, + { + "epoch": 13.45, + "learning_rate": 4.327938546295884e-05, + "loss": 2.3839, + "step": 2714000 + }, + { + "epoch": 13.45, + "learning_rate": 4.3278146876532754e-05, + "loss": 2.4211, + "step": 2714500 + }, + { + "epoch": 13.45, + "learning_rate": 4.327691076727952e-05, + "loss": 2.3848, + "step": 2715000 + }, + { + "epoch": 13.45, + "learning_rate": 4.327567218085344e-05, + "loss": 2.4099, + "step": 2715500 + }, + { + "epoch": 13.46, + "learning_rate": 4.327443359442736e-05, + "loss": 2.4064, + "step": 2716000 + }, + { + "epoch": 13.46, + "learning_rate": 4.3273195008001274e-05, + "loss": 2.4105, + "step": 2716500 + }, + { + "epoch": 13.46, + "learning_rate": 4.327195642157519e-05, + "loss": 2.4022, + "step": 2717000 + }, + { + "epoch": 13.46, + "learning_rate": 4.327071783514911e-05, + "loss": 2.3729, + "step": 2717500 + }, + { + "epoch": 13.47, + "learning_rate": 4.3269479248723025e-05, + "loss": 2.4031, + "step": 2718000 + }, + { + "epoch": 13.47, + "learning_rate": 4.326824066229694e-05, + "loss": 2.3839, + "step": 2718500 + }, + { + "epoch": 13.47, + "learning_rate": 4.3267004553043704e-05, + "loss": 2.4294, + "step": 2719000 + }, + { + "epoch": 13.47, + "learning_rate": 4.326576596661762e-05, + "loss": 2.4108, + "step": 2719500 + }, + { + "epoch": 13.48, + "learning_rate": 4.326452738019154e-05, + "loss": 2.4364, + "step": 2720000 + }, + { + "epoch": 13.48, + "learning_rate": 4.3263288793765454e-05, + "loss": 2.4068, + "step": 2720500 + }, + { + "epoch": 13.48, + "learning_rate": 4.326205020733937e-05, + "loss": 2.4131, + "step": 2721000 + }, + { + "epoch": 13.48, + "learning_rate": 4.326081162091329e-05, + "loss": 2.3866, + "step": 2721500 + }, + { + "epoch": 13.49, + "learning_rate": 4.325957551166006e-05, + "loss": 2.388, + "step": 2722000 + }, + { + "epoch": 13.49, + "learning_rate": 4.3258336925233974e-05, + "loss": 2.3977, + "step": 2722500 + }, + { + "epoch": 13.49, + "learning_rate": 4.3257100815980736e-05, + "loss": 2.4153, + "step": 2723000 + }, + { + "epoch": 13.49, + "learning_rate": 4.325586222955465e-05, + "loss": 2.3835, + "step": 2723500 + }, + { + "epoch": 13.5, + "learning_rate": 4.325462364312857e-05, + "loss": 2.4049, + "step": 2724000 + }, + { + "epoch": 13.5, + "learning_rate": 4.325338505670249e-05, + "loss": 2.4043, + "step": 2724500 + }, + { + "epoch": 13.5, + "learning_rate": 4.3252146470276404e-05, + "loss": 2.4012, + "step": 2725000 + }, + { + "epoch": 13.5, + "learning_rate": 4.325090788385032e-05, + "loss": 2.3883, + "step": 2725500 + }, + { + "epoch": 13.51, + "learning_rate": 4.324966929742424e-05, + "loss": 2.4236, + "step": 2726000 + }, + { + "epoch": 13.51, + "learning_rate": 4.3248430710998154e-05, + "loss": 2.3839, + "step": 2726500 + }, + { + "epoch": 13.51, + "learning_rate": 4.324719212457207e-05, + "loss": 2.4157, + "step": 2727000 + }, + { + "epoch": 13.51, + "learning_rate": 4.324595353814599e-05, + "loss": 2.3927, + "step": 2727500 + }, + { + "epoch": 13.52, + "learning_rate": 4.324471742889276e-05, + "loss": 2.388, + "step": 2728000 + }, + { + "epoch": 13.52, + "learning_rate": 4.3243481319639526e-05, + "loss": 2.3996, + "step": 2728500 + }, + { + "epoch": 13.52, + "learning_rate": 4.3242242733213436e-05, + "loss": 2.4144, + "step": 2729000 + }, + { + "epoch": 13.52, + "learning_rate": 4.324100414678735e-05, + "loss": 2.4069, + "step": 2729500 + }, + { + "epoch": 13.53, + "learning_rate": 4.323976556036127e-05, + "loss": 2.4112, + "step": 2730000 + }, + { + "epoch": 13.53, + "learning_rate": 4.323852697393519e-05, + "loss": 2.4076, + "step": 2730500 + }, + { + "epoch": 13.53, + "learning_rate": 4.3237288387509104e-05, + "loss": 2.4015, + "step": 2731000 + }, + { + "epoch": 13.53, + "learning_rate": 4.323604980108302e-05, + "loss": 2.4073, + "step": 2731500 + }, + { + "epoch": 13.54, + "learning_rate": 4.323481369182979e-05, + "loss": 2.428, + "step": 2732000 + }, + { + "epoch": 13.54, + "learning_rate": 4.3233575105403706e-05, + "loss": 2.3869, + "step": 2732500 + }, + { + "epoch": 13.54, + "learning_rate": 4.323233651897762e-05, + "loss": 2.411, + "step": 2733000 + }, + { + "epoch": 13.54, + "learning_rate": 4.323109793255154e-05, + "loss": 2.3869, + "step": 2733500 + }, + { + "epoch": 13.55, + "learning_rate": 4.322985934612546e-05, + "loss": 2.4143, + "step": 2734000 + }, + { + "epoch": 13.55, + "learning_rate": 4.3228620759699374e-05, + "loss": 2.4027, + "step": 2734500 + }, + { + "epoch": 13.55, + "learning_rate": 4.322738217327329e-05, + "loss": 2.3911, + "step": 2735000 + }, + { + "epoch": 13.55, + "learning_rate": 4.322614606402006e-05, + "loss": 2.4308, + "step": 2735500 + }, + { + "epoch": 13.56, + "learning_rate": 4.322490747759397e-05, + "loss": 2.4117, + "step": 2736000 + }, + { + "epoch": 13.56, + "learning_rate": 4.322366889116789e-05, + "loss": 2.4093, + "step": 2736500 + }, + { + "epoch": 13.56, + "learning_rate": 4.3222430304741804e-05, + "loss": 2.372, + "step": 2737000 + }, + { + "epoch": 13.56, + "learning_rate": 4.322119171831572e-05, + "loss": 2.3907, + "step": 2737500 + }, + { + "epoch": 13.56, + "learning_rate": 4.321995313188964e-05, + "loss": 2.3867, + "step": 2738000 + }, + { + "epoch": 13.57, + "learning_rate": 4.3218714545463555e-05, + "loss": 2.3667, + "step": 2738500 + }, + { + "epoch": 13.57, + "learning_rate": 4.3217478436210323e-05, + "loss": 2.3809, + "step": 2739000 + }, + { + "epoch": 13.57, + "learning_rate": 4.321623984978424e-05, + "loss": 2.4062, + "step": 2739500 + }, + { + "epoch": 13.57, + "learning_rate": 4.321500126335816e-05, + "loss": 2.396, + "step": 2740000 + }, + { + "epoch": 13.58, + "learning_rate": 4.3213762676932074e-05, + "loss": 2.4295, + "step": 2740500 + }, + { + "epoch": 13.58, + "learning_rate": 4.321252409050599e-05, + "loss": 2.4223, + "step": 2741000 + }, + { + "epoch": 13.58, + "learning_rate": 4.321128550407991e-05, + "loss": 2.4312, + "step": 2741500 + }, + { + "epoch": 13.58, + "learning_rate": 4.3210046917653825e-05, + "loss": 2.408, + "step": 2742000 + }, + { + "epoch": 13.59, + "learning_rate": 4.320880833122774e-05, + "loss": 2.391, + "step": 2742500 + }, + { + "epoch": 13.59, + "learning_rate": 4.320756974480166e-05, + "loss": 2.4148, + "step": 2743000 + }, + { + "epoch": 13.59, + "learning_rate": 4.320633363554842e-05, + "loss": 2.3847, + "step": 2743500 + }, + { + "epoch": 13.59, + "learning_rate": 4.320509504912234e-05, + "loss": 2.4125, + "step": 2744000 + }, + { + "epoch": 13.6, + "learning_rate": 4.3203856462696255e-05, + "loss": 2.4068, + "step": 2744500 + }, + { + "epoch": 13.6, + "learning_rate": 4.320261787627017e-05, + "loss": 2.3831, + "step": 2745000 + }, + { + "epoch": 13.6, + "learning_rate": 4.320137928984409e-05, + "loss": 2.4067, + "step": 2745500 + }, + { + "epoch": 13.6, + "learning_rate": 4.3200140703418006e-05, + "loss": 2.3968, + "step": 2746000 + }, + { + "epoch": 13.61, + "learning_rate": 4.319890211699192e-05, + "loss": 2.4172, + "step": 2746500 + }, + { + "epoch": 13.61, + "learning_rate": 4.319766353056584e-05, + "loss": 2.4011, + "step": 2747000 + }, + { + "epoch": 13.61, + "learning_rate": 4.319642494413975e-05, + "loss": 2.4083, + "step": 2747500 + }, + { + "epoch": 13.61, + "learning_rate": 4.3195188834886525e-05, + "loss": 2.4248, + "step": 2748000 + }, + { + "epoch": 13.62, + "learning_rate": 4.319395024846044e-05, + "loss": 2.3911, + "step": 2748500 + }, + { + "epoch": 13.62, + "learning_rate": 4.319271166203436e-05, + "loss": 2.3895, + "step": 2749000 + }, + { + "epoch": 13.62, + "learning_rate": 4.3191473075608276e-05, + "loss": 2.4364, + "step": 2749500 + }, + { + "epoch": 13.62, + "learning_rate": 4.319023448918219e-05, + "loss": 2.4193, + "step": 2750000 + }, + { + "epoch": 13.63, + "learning_rate": 4.3188998379928955e-05, + "loss": 2.4033, + "step": 2750500 + }, + { + "epoch": 13.63, + "learning_rate": 4.318775979350287e-05, + "loss": 2.4104, + "step": 2751000 + }, + { + "epoch": 13.63, + "learning_rate": 4.318652120707679e-05, + "loss": 2.4298, + "step": 2751500 + }, + { + "epoch": 13.63, + "learning_rate": 4.3185282620650706e-05, + "loss": 2.3979, + "step": 2752000 + }, + { + "epoch": 13.64, + "learning_rate": 4.318404403422462e-05, + "loss": 2.39, + "step": 2752500 + }, + { + "epoch": 13.64, + "learning_rate": 4.318280792497139e-05, + "loss": 2.4094, + "step": 2753000 + }, + { + "epoch": 13.64, + "learning_rate": 4.318156933854531e-05, + "loss": 2.4306, + "step": 2753500 + }, + { + "epoch": 13.64, + "learning_rate": 4.3180330752119225e-05, + "loss": 2.4091, + "step": 2754000 + }, + { + "epoch": 13.65, + "learning_rate": 4.317909216569314e-05, + "loss": 2.4295, + "step": 2754500 + }, + { + "epoch": 13.65, + "learning_rate": 4.317785357926706e-05, + "loss": 2.4031, + "step": 2755000 + }, + { + "epoch": 13.65, + "learning_rate": 4.317661747001383e-05, + "loss": 2.4377, + "step": 2755500 + }, + { + "epoch": 13.65, + "learning_rate": 4.3175378883587745e-05, + "loss": 2.3773, + "step": 2756000 + }, + { + "epoch": 13.66, + "learning_rate": 4.3174140297161655e-05, + "loss": 2.4163, + "step": 2756500 + }, + { + "epoch": 13.66, + "learning_rate": 4.317290171073557e-05, + "loss": 2.4139, + "step": 2757000 + }, + { + "epoch": 13.66, + "learning_rate": 4.317166312430949e-05, + "loss": 2.4116, + "step": 2757500 + }, + { + "epoch": 13.66, + "learning_rate": 4.3170424537883406e-05, + "loss": 2.391, + "step": 2758000 + }, + { + "epoch": 13.67, + "learning_rate": 4.316918595145732e-05, + "loss": 2.4302, + "step": 2758500 + }, + { + "epoch": 13.67, + "learning_rate": 4.316794984220409e-05, + "loss": 2.4052, + "step": 2759000 + }, + { + "epoch": 13.67, + "learning_rate": 4.316671373295086e-05, + "loss": 2.4154, + "step": 2759500 + }, + { + "epoch": 13.67, + "learning_rate": 4.316547514652478e-05, + "loss": 2.3856, + "step": 2760000 + }, + { + "epoch": 13.68, + "learning_rate": 4.3164236560098694e-05, + "loss": 2.4001, + "step": 2760500 + }, + { + "epoch": 13.68, + "learning_rate": 4.316299797367261e-05, + "loss": 2.4079, + "step": 2761000 + }, + { + "epoch": 13.68, + "learning_rate": 4.316175938724653e-05, + "loss": 2.3876, + "step": 2761500 + }, + { + "epoch": 13.68, + "learning_rate": 4.3160520800820445e-05, + "loss": 2.4033, + "step": 2762000 + }, + { + "epoch": 13.69, + "learning_rate": 4.315928221439436e-05, + "loss": 2.4024, + "step": 2762500 + }, + { + "epoch": 13.69, + "learning_rate": 4.3158046105141124e-05, + "loss": 2.3802, + "step": 2763000 + }, + { + "epoch": 13.69, + "learning_rate": 4.315680751871504e-05, + "loss": 2.4104, + "step": 2763500 + }, + { + "epoch": 13.69, + "learning_rate": 4.315556893228896e-05, + "loss": 2.3841, + "step": 2764000 + }, + { + "epoch": 13.7, + "learning_rate": 4.315433282303573e-05, + "loss": 2.3901, + "step": 2764500 + }, + { + "epoch": 13.7, + "learning_rate": 4.3153094236609644e-05, + "loss": 2.4044, + "step": 2765000 + }, + { + "epoch": 13.7, + "learning_rate": 4.315185565018356e-05, + "loss": 2.4209, + "step": 2765500 + }, + { + "epoch": 13.7, + "learning_rate": 4.315061706375748e-05, + "loss": 2.3937, + "step": 2766000 + }, + { + "epoch": 13.71, + "learning_rate": 4.3149380954504246e-05, + "loss": 2.41, + "step": 2766500 + }, + { + "epoch": 13.71, + "learning_rate": 4.3148142368078156e-05, + "loss": 2.4149, + "step": 2767000 + }, + { + "epoch": 13.71, + "learning_rate": 4.314690378165207e-05, + "loss": 2.4024, + "step": 2767500 + }, + { + "epoch": 13.71, + "learning_rate": 4.314566519522599e-05, + "loss": 2.4355, + "step": 2768000 + }, + { + "epoch": 13.72, + "learning_rate": 4.314442660879991e-05, + "loss": 2.4085, + "step": 2768500 + }, + { + "epoch": 13.72, + "learning_rate": 4.3143188022373824e-05, + "loss": 2.3709, + "step": 2769000 + }, + { + "epoch": 13.72, + "learning_rate": 4.314194943594774e-05, + "loss": 2.397, + "step": 2769500 + }, + { + "epoch": 13.72, + "learning_rate": 4.314071084952166e-05, + "loss": 2.3901, + "step": 2770000 + }, + { + "epoch": 13.73, + "learning_rate": 4.3139472263095575e-05, + "loss": 2.4026, + "step": 2770500 + }, + { + "epoch": 13.73, + "learning_rate": 4.313823367666949e-05, + "loss": 2.4013, + "step": 2771000 + }, + { + "epoch": 13.73, + "learning_rate": 4.313699756741626e-05, + "loss": 2.3871, + "step": 2771500 + }, + { + "epoch": 13.73, + "learning_rate": 4.313575898099018e-05, + "loss": 2.4078, + "step": 2772000 + }, + { + "epoch": 13.74, + "learning_rate": 4.3134520394564094e-05, + "loss": 2.4151, + "step": 2772500 + }, + { + "epoch": 13.74, + "learning_rate": 4.313328180813801e-05, + "loss": 2.4024, + "step": 2773000 + }, + { + "epoch": 13.74, + "learning_rate": 4.313204322171193e-05, + "loss": 2.4182, + "step": 2773500 + }, + { + "epoch": 13.74, + "learning_rate": 4.3130804635285845e-05, + "loss": 2.3841, + "step": 2774000 + }, + { + "epoch": 13.75, + "learning_rate": 4.312956604885976e-05, + "loss": 2.3987, + "step": 2774500 + }, + { + "epoch": 13.75, + "learning_rate": 4.312832746243368e-05, + "loss": 2.3907, + "step": 2775000 + }, + { + "epoch": 13.75, + "learning_rate": 4.3127088876007596e-05, + "loss": 2.4168, + "step": 2775500 + }, + { + "epoch": 13.75, + "learning_rate": 4.312585028958151e-05, + "loss": 2.4074, + "step": 2776000 + }, + { + "epoch": 13.76, + "learning_rate": 4.312461170315542e-05, + "loss": 2.3894, + "step": 2776500 + }, + { + "epoch": 13.76, + "learning_rate": 4.312337311672934e-05, + "loss": 2.3936, + "step": 2777000 + }, + { + "epoch": 13.76, + "learning_rate": 4.312213453030326e-05, + "loss": 2.3887, + "step": 2777500 + }, + { + "epoch": 13.76, + "learning_rate": 4.3120895943877174e-05, + "loss": 2.4014, + "step": 2778000 + }, + { + "epoch": 13.77, + "learning_rate": 4.311965735745109e-05, + "loss": 2.4004, + "step": 2778500 + }, + { + "epoch": 13.77, + "learning_rate": 4.3118418771025e-05, + "loss": 2.4207, + "step": 2779000 + }, + { + "epoch": 13.77, + "learning_rate": 4.3117182661771777e-05, + "loss": 2.4038, + "step": 2779500 + }, + { + "epoch": 13.77, + "learning_rate": 4.3115944075345694e-05, + "loss": 2.417, + "step": 2780000 + }, + { + "epoch": 13.78, + "learning_rate": 4.311470548891961e-05, + "loss": 2.4107, + "step": 2780500 + }, + { + "epoch": 13.78, + "learning_rate": 4.311346690249353e-05, + "loss": 2.4136, + "step": 2781000 + }, + { + "epoch": 13.78, + "learning_rate": 4.3112228316067444e-05, + "loss": 2.3845, + "step": 2781500 + }, + { + "epoch": 13.78, + "learning_rate": 4.311099220681421e-05, + "loss": 2.43, + "step": 2782000 + }, + { + "epoch": 13.79, + "learning_rate": 4.3109756097560975e-05, + "loss": 2.4391, + "step": 2782500 + }, + { + "epoch": 13.79, + "learning_rate": 4.3108519988307744e-05, + "loss": 2.417, + "step": 2783000 + }, + { + "epoch": 13.79, + "learning_rate": 4.310728387905452e-05, + "loss": 2.4119, + "step": 2783500 + }, + { + "epoch": 13.79, + "learning_rate": 4.310604529262843e-05, + "loss": 2.4281, + "step": 2784000 + }, + { + "epoch": 13.8, + "learning_rate": 4.31048091833752e-05, + "loss": 2.4254, + "step": 2784500 + }, + { + "epoch": 13.8, + "learning_rate": 4.3103570596949115e-05, + "loss": 2.3977, + "step": 2785000 + }, + { + "epoch": 13.8, + "learning_rate": 4.310233201052303e-05, + "loss": 2.4255, + "step": 2785500 + }, + { + "epoch": 13.8, + "learning_rate": 4.310109342409695e-05, + "loss": 2.393, + "step": 2786000 + }, + { + "epoch": 13.81, + "learning_rate": 4.3099854837670866e-05, + "loss": 2.4038, + "step": 2786500 + }, + { + "epoch": 13.81, + "learning_rate": 4.309861625124478e-05, + "loss": 2.4045, + "step": 2787000 + }, + { + "epoch": 13.81, + "learning_rate": 4.30973776648187e-05, + "loss": 2.4029, + "step": 2787500 + }, + { + "epoch": 13.81, + "learning_rate": 4.309613907839262e-05, + "loss": 2.4214, + "step": 2788000 + }, + { + "epoch": 13.82, + "learning_rate": 4.3094902969139386e-05, + "loss": 2.3809, + "step": 2788500 + }, + { + "epoch": 13.82, + "learning_rate": 4.30936643827133e-05, + "loss": 2.402, + "step": 2789000 + }, + { + "epoch": 13.82, + "learning_rate": 4.309242579628722e-05, + "loss": 2.3671, + "step": 2789500 + }, + { + "epoch": 13.82, + "learning_rate": 4.3091187209861137e-05, + "loss": 2.391, + "step": 2790000 + }, + { + "epoch": 13.83, + "learning_rate": 4.30899511006079e-05, + "loss": 2.4029, + "step": 2790500 + }, + { + "epoch": 13.83, + "learning_rate": 4.3088712514181816e-05, + "loss": 2.3973, + "step": 2791000 + }, + { + "epoch": 13.83, + "learning_rate": 4.308747392775573e-05, + "loss": 2.3823, + "step": 2791500 + }, + { + "epoch": 13.83, + "learning_rate": 4.308623534132965e-05, + "loss": 2.4324, + "step": 2792000 + }, + { + "epoch": 13.84, + "learning_rate": 4.3084996754903566e-05, + "loss": 2.4068, + "step": 2792500 + }, + { + "epoch": 13.84, + "learning_rate": 4.308375816847748e-05, + "loss": 2.4183, + "step": 2793000 + }, + { + "epoch": 13.84, + "learning_rate": 4.30825195820514e-05, + "loss": 2.4176, + "step": 2793500 + }, + { + "epoch": 13.84, + "learning_rate": 4.308128099562532e-05, + "loss": 2.41, + "step": 2794000 + }, + { + "epoch": 13.84, + "learning_rate": 4.3080042409199234e-05, + "loss": 2.3977, + "step": 2794500 + }, + { + "epoch": 13.85, + "learning_rate": 4.3078806299946e-05, + "loss": 2.3854, + "step": 2795000 + }, + { + "epoch": 13.85, + "learning_rate": 4.3077570190692765e-05, + "loss": 2.4014, + "step": 2795500 + }, + { + "epoch": 13.85, + "learning_rate": 4.307633160426668e-05, + "loss": 2.432, + "step": 2796000 + }, + { + "epoch": 13.85, + "learning_rate": 4.30750930178406e-05, + "loss": 2.4016, + "step": 2796500 + }, + { + "epoch": 13.86, + "learning_rate": 4.3073854431414516e-05, + "loss": 2.4061, + "step": 2797000 + }, + { + "epoch": 13.86, + "learning_rate": 4.307261584498843e-05, + "loss": 2.3972, + "step": 2797500 + }, + { + "epoch": 13.86, + "learning_rate": 4.307137725856235e-05, + "loss": 2.3841, + "step": 2798000 + }, + { + "epoch": 13.86, + "learning_rate": 4.307014114930912e-05, + "loss": 2.3905, + "step": 2798500 + }, + { + "epoch": 13.87, + "learning_rate": 4.3068902562883035e-05, + "loss": 2.412, + "step": 2799000 + }, + { + "epoch": 13.87, + "learning_rate": 4.306766397645695e-05, + "loss": 2.3943, + "step": 2799500 + }, + { + "epoch": 13.87, + "learning_rate": 4.306642539003087e-05, + "loss": 2.3898, + "step": 2800000 + }, + { + "epoch": 13.87, + "learning_rate": 4.3065186803604786e-05, + "loss": 2.3945, + "step": 2800500 + }, + { + "epoch": 13.88, + "learning_rate": 4.30639482171787e-05, + "loss": 2.4051, + "step": 2801000 + }, + { + "epoch": 13.88, + "learning_rate": 4.306270963075262e-05, + "loss": 2.3813, + "step": 2801500 + }, + { + "epoch": 13.88, + "learning_rate": 4.306147104432654e-05, + "loss": 2.3979, + "step": 2802000 + }, + { + "epoch": 13.88, + "learning_rate": 4.3060232457900454e-05, + "loss": 2.4034, + "step": 2802500 + }, + { + "epoch": 13.89, + "learning_rate": 4.305899387147437e-05, + "loss": 2.399, + "step": 2803000 + }, + { + "epoch": 13.89, + "learning_rate": 4.305775528504829e-05, + "loss": 2.3926, + "step": 2803500 + }, + { + "epoch": 13.89, + "learning_rate": 4.3056516698622205e-05, + "loss": 2.3909, + "step": 2804000 + }, + { + "epoch": 13.89, + "learning_rate": 4.3055278112196115e-05, + "loss": 2.4082, + "step": 2804500 + }, + { + "epoch": 13.9, + "learning_rate": 4.305403952577003e-05, + "loss": 2.3697, + "step": 2805000 + }, + { + "epoch": 13.9, + "learning_rate": 4.30528034165168e-05, + "loss": 2.4134, + "step": 2805500 + }, + { + "epoch": 13.9, + "learning_rate": 4.305156483009072e-05, + "loss": 2.3691, + "step": 2806000 + }, + { + "epoch": 13.9, + "learning_rate": 4.3050326243664634e-05, + "loss": 2.388, + "step": 2806500 + }, + { + "epoch": 13.91, + "learning_rate": 4.304908765723855e-05, + "loss": 2.4274, + "step": 2807000 + }, + { + "epoch": 13.91, + "learning_rate": 4.304784907081246e-05, + "loss": 2.4141, + "step": 2807500 + }, + { + "epoch": 13.91, + "learning_rate": 4.304661048438638e-05, + "loss": 2.3835, + "step": 2808000 + }, + { + "epoch": 13.91, + "learning_rate": 4.3045371897960295e-05, + "loss": 2.4, + "step": 2808500 + }, + { + "epoch": 13.92, + "learning_rate": 4.304413331153421e-05, + "loss": 2.4225, + "step": 2809000 + }, + { + "epoch": 13.92, + "learning_rate": 4.304289472510813e-05, + "loss": 2.3942, + "step": 2809500 + }, + { + "epoch": 13.92, + "learning_rate": 4.3041658615854905e-05, + "loss": 2.3977, + "step": 2810000 + }, + { + "epoch": 13.92, + "learning_rate": 4.304042250660167e-05, + "loss": 2.4204, + "step": 2810500 + }, + { + "epoch": 13.93, + "learning_rate": 4.3039183920175584e-05, + "loss": 2.4132, + "step": 2811000 + }, + { + "epoch": 13.93, + "learning_rate": 4.30379453337495e-05, + "loss": 2.4255, + "step": 2811500 + }, + { + "epoch": 13.93, + "learning_rate": 4.303670674732342e-05, + "loss": 2.4183, + "step": 2812000 + }, + { + "epoch": 13.93, + "learning_rate": 4.3035468160897334e-05, + "loss": 2.3954, + "step": 2812500 + }, + { + "epoch": 13.94, + "learning_rate": 4.30342320516441e-05, + "loss": 2.4053, + "step": 2813000 + }, + { + "epoch": 13.94, + "learning_rate": 4.303299594239087e-05, + "loss": 2.3931, + "step": 2813500 + }, + { + "epoch": 13.94, + "learning_rate": 4.303175735596479e-05, + "loss": 2.4367, + "step": 2814000 + }, + { + "epoch": 13.94, + "learning_rate": 4.30305187695387e-05, + "loss": 2.4039, + "step": 2814500 + }, + { + "epoch": 13.95, + "learning_rate": 4.3029280183112616e-05, + "loss": 2.3991, + "step": 2815000 + }, + { + "epoch": 13.95, + "learning_rate": 4.302804159668653e-05, + "loss": 2.4093, + "step": 2815500 + }, + { + "epoch": 13.95, + "learning_rate": 4.302680301026045e-05, + "loss": 2.3957, + "step": 2816000 + }, + { + "epoch": 13.95, + "learning_rate": 4.302556442383437e-05, + "loss": 2.4145, + "step": 2816500 + }, + { + "epoch": 13.96, + "learning_rate": 4.3024325837408284e-05, + "loss": 2.4256, + "step": 2817000 + }, + { + "epoch": 13.96, + "learning_rate": 4.30230872509822e-05, + "loss": 2.412, + "step": 2817500 + }, + { + "epoch": 13.96, + "learning_rate": 4.302184866455612e-05, + "loss": 2.412, + "step": 2818000 + }, + { + "epoch": 13.96, + "learning_rate": 4.3020612555302886e-05, + "loss": 2.4011, + "step": 2818500 + }, + { + "epoch": 13.97, + "learning_rate": 4.3019376446049655e-05, + "loss": 2.4118, + "step": 2819000 + }, + { + "epoch": 13.97, + "learning_rate": 4.301813785962357e-05, + "loss": 2.4149, + "step": 2819500 + }, + { + "epoch": 13.97, + "learning_rate": 4.301689927319749e-05, + "loss": 2.4283, + "step": 2820000 + }, + { + "epoch": 13.97, + "learning_rate": 4.3015660686771406e-05, + "loss": 2.3776, + "step": 2820500 + }, + { + "epoch": 13.98, + "learning_rate": 4.301442457751817e-05, + "loss": 2.4164, + "step": 2821000 + }, + { + "epoch": 13.98, + "learning_rate": 4.3013185991092085e-05, + "loss": 2.3833, + "step": 2821500 + }, + { + "epoch": 13.98, + "learning_rate": 4.3011947404666e-05, + "loss": 2.384, + "step": 2822000 + }, + { + "epoch": 13.98, + "learning_rate": 4.301070881823992e-05, + "loss": 2.4177, + "step": 2822500 + }, + { + "epoch": 13.99, + "learning_rate": 4.3009470231813836e-05, + "loss": 2.4214, + "step": 2823000 + }, + { + "epoch": 13.99, + "learning_rate": 4.300823164538775e-05, + "loss": 2.4005, + "step": 2823500 + }, + { + "epoch": 13.99, + "learning_rate": 4.300699305896167e-05, + "loss": 2.3818, + "step": 2824000 + }, + { + "epoch": 13.99, + "learning_rate": 4.3005754472535587e-05, + "loss": 2.3987, + "step": 2824500 + }, + { + "epoch": 14.0, + "learning_rate": 4.3004515886109503e-05, + "loss": 2.4102, + "step": 2825000 + }, + { + "epoch": 14.0, + "learning_rate": 4.300327729968342e-05, + "loss": 2.4001, + "step": 2825500 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.6461582624983472, + "eval_accuracy_mlm": 0.5998228536990704, + "eval_accuracy_nsp": 0.8650881122062763, + "eval_loss": 2.389559268951416, + "eval_runtime": 145.7903, + "eval_samples_per_second": 1748.807, + "eval_steps_per_second": 72.872, + "step": 2825802 + }, + { + "epoch": 14.0, + "learning_rate": 4.300204119043019e-05, + "loss": 2.3643, + "step": 2826000 + }, + { + "epoch": 14.0, + "learning_rate": 4.3000802604004106e-05, + "loss": 2.3761, + "step": 2826500 + }, + { + "epoch": 14.01, + "learning_rate": 4.299956401757802e-05, + "loss": 2.3749, + "step": 2827000 + }, + { + "epoch": 14.01, + "learning_rate": 4.299832543115194e-05, + "loss": 2.3892, + "step": 2827500 + }, + { + "epoch": 14.01, + "learning_rate": 4.299708684472585e-05, + "loss": 2.3667, + "step": 2828000 + }, + { + "epoch": 14.01, + "learning_rate": 4.299585073547262e-05, + "loss": 2.364, + "step": 2828500 + }, + { + "epoch": 14.02, + "learning_rate": 4.2994612149046536e-05, + "loss": 2.3782, + "step": 2829000 + }, + { + "epoch": 14.02, + "learning_rate": 4.299337356262045e-05, + "loss": 2.371, + "step": 2829500 + }, + { + "epoch": 14.02, + "learning_rate": 4.299213497619437e-05, + "loss": 2.349, + "step": 2830000 + }, + { + "epoch": 14.02, + "learning_rate": 4.299089638976829e-05, + "loss": 2.3903, + "step": 2830500 + }, + { + "epoch": 14.03, + "learning_rate": 4.2989657803342204e-05, + "loss": 2.374, + "step": 2831000 + }, + { + "epoch": 14.03, + "learning_rate": 4.298842169408897e-05, + "loss": 2.378, + "step": 2831500 + }, + { + "epoch": 14.03, + "learning_rate": 4.298718310766289e-05, + "loss": 2.3867, + "step": 2832000 + }, + { + "epoch": 14.03, + "learning_rate": 4.2985944521236806e-05, + "loss": 2.366, + "step": 2832500 + }, + { + "epoch": 14.04, + "learning_rate": 4.298470593481072e-05, + "loss": 2.3584, + "step": 2833000 + }, + { + "epoch": 14.04, + "learning_rate": 4.298346734838464e-05, + "loss": 2.3585, + "step": 2833500 + }, + { + "epoch": 14.04, + "learning_rate": 4.298222876195856e-05, + "loss": 2.3845, + "step": 2834000 + }, + { + "epoch": 14.04, + "learning_rate": 4.2980990175532474e-05, + "loss": 2.3867, + "step": 2834500 + }, + { + "epoch": 14.05, + "learning_rate": 4.2979751589106384e-05, + "loss": 2.3844, + "step": 2835000 + }, + { + "epoch": 14.05, + "learning_rate": 4.29785130026803e-05, + "loss": 2.3945, + "step": 2835500 + }, + { + "epoch": 14.05, + "learning_rate": 4.297727689342707e-05, + "loss": 2.3819, + "step": 2836000 + }, + { + "epoch": 14.05, + "learning_rate": 4.297603830700099e-05, + "loss": 2.3963, + "step": 2836500 + }, + { + "epoch": 14.06, + "learning_rate": 4.2974799720574904e-05, + "loss": 2.3649, + "step": 2837000 + }, + { + "epoch": 14.06, + "learning_rate": 4.297356113414882e-05, + "loss": 2.3759, + "step": 2837500 + }, + { + "epoch": 14.06, + "learning_rate": 4.297232254772274e-05, + "loss": 2.3773, + "step": 2838000 + }, + { + "epoch": 14.06, + "learning_rate": 4.2971083961296655e-05, + "loss": 2.3649, + "step": 2838500 + }, + { + "epoch": 14.07, + "learning_rate": 4.296984785204342e-05, + "loss": 2.3764, + "step": 2839000 + }, + { + "epoch": 14.07, + "learning_rate": 4.296860926561734e-05, + "loss": 2.3793, + "step": 2839500 + }, + { + "epoch": 14.07, + "learning_rate": 4.29673731563641e-05, + "loss": 2.4066, + "step": 2840000 + }, + { + "epoch": 14.07, + "learning_rate": 4.296613704711088e-05, + "loss": 2.377, + "step": 2840500 + }, + { + "epoch": 14.08, + "learning_rate": 4.2964898460684795e-05, + "loss": 2.3808, + "step": 2841000 + }, + { + "epoch": 14.08, + "learning_rate": 4.296365987425871e-05, + "loss": 2.4138, + "step": 2841500 + }, + { + "epoch": 14.08, + "learning_rate": 4.296242128783263e-05, + "loss": 2.3763, + "step": 2842000 + }, + { + "epoch": 14.08, + "learning_rate": 4.296118270140654e-05, + "loss": 2.3747, + "step": 2842500 + }, + { + "epoch": 14.09, + "learning_rate": 4.2959944114980456e-05, + "loss": 2.3829, + "step": 2843000 + }, + { + "epoch": 14.09, + "learning_rate": 4.295870552855437e-05, + "loss": 2.4047, + "step": 2843500 + }, + { + "epoch": 14.09, + "learning_rate": 4.295746694212829e-05, + "loss": 2.3814, + "step": 2844000 + }, + { + "epoch": 14.09, + "learning_rate": 4.2956228355702207e-05, + "loss": 2.387, + "step": 2844500 + }, + { + "epoch": 14.1, + "learning_rate": 4.2954989769276123e-05, + "loss": 2.3661, + "step": 2845000 + }, + { + "epoch": 14.1, + "learning_rate": 4.295375118285004e-05, + "loss": 2.4036, + "step": 2845500 + }, + { + "epoch": 14.1, + "learning_rate": 4.29525150735968e-05, + "loss": 2.3828, + "step": 2846000 + }, + { + "epoch": 14.1, + "learning_rate": 4.295127648717072e-05, + "loss": 2.3651, + "step": 2846500 + }, + { + "epoch": 14.11, + "learning_rate": 4.2950037900744636e-05, + "loss": 2.3889, + "step": 2847000 + }, + { + "epoch": 14.11, + "learning_rate": 4.294879931431855e-05, + "loss": 2.3733, + "step": 2847500 + }, + { + "epoch": 14.11, + "learning_rate": 4.294756072789247e-05, + "loss": 2.3593, + "step": 2848000 + }, + { + "epoch": 14.11, + "learning_rate": 4.294632214146639e-05, + "loss": 2.3742, + "step": 2848500 + }, + { + "epoch": 14.11, + "learning_rate": 4.2945083555040304e-05, + "loss": 2.3745, + "step": 2849000 + }, + { + "epoch": 14.12, + "learning_rate": 4.294384496861422e-05, + "loss": 2.3674, + "step": 2849500 + }, + { + "epoch": 14.12, + "learning_rate": 4.294260638218814e-05, + "loss": 2.3607, + "step": 2850000 + }, + { + "epoch": 14.12, + "learning_rate": 4.294137027293491e-05, + "loss": 2.383, + "step": 2850500 + }, + { + "epoch": 14.12, + "learning_rate": 4.2940131686508824e-05, + "loss": 2.3761, + "step": 2851000 + }, + { + "epoch": 14.13, + "learning_rate": 4.293889310008274e-05, + "loss": 2.3966, + "step": 2851500 + }, + { + "epoch": 14.13, + "learning_rate": 4.293765451365666e-05, + "loss": 2.3912, + "step": 2852000 + }, + { + "epoch": 14.13, + "learning_rate": 4.293641840440342e-05, + "loss": 2.3719, + "step": 2852500 + }, + { + "epoch": 14.13, + "learning_rate": 4.2935179817977336e-05, + "loss": 2.3608, + "step": 2853000 + }, + { + "epoch": 14.14, + "learning_rate": 4.293394123155125e-05, + "loss": 2.3762, + "step": 2853500 + }, + { + "epoch": 14.14, + "learning_rate": 4.293270264512517e-05, + "loss": 2.3916, + "step": 2854000 + }, + { + "epoch": 14.14, + "learning_rate": 4.293146405869909e-05, + "loss": 2.3773, + "step": 2854500 + }, + { + "epoch": 14.14, + "learning_rate": 4.2930225472273004e-05, + "loss": 2.3872, + "step": 2855000 + }, + { + "epoch": 14.15, + "learning_rate": 4.292898688584692e-05, + "loss": 2.3776, + "step": 2855500 + }, + { + "epoch": 14.15, + "learning_rate": 4.292775077659369e-05, + "loss": 2.4074, + "step": 2856000 + }, + { + "epoch": 14.15, + "learning_rate": 4.292651219016761e-05, + "loss": 2.379, + "step": 2856500 + }, + { + "epoch": 14.15, + "learning_rate": 4.2925273603741524e-05, + "loss": 2.3803, + "step": 2857000 + }, + { + "epoch": 14.16, + "learning_rate": 4.292403501731544e-05, + "loss": 2.3807, + "step": 2857500 + }, + { + "epoch": 14.16, + "learning_rate": 4.292279643088936e-05, + "loss": 2.3591, + "step": 2858000 + }, + { + "epoch": 14.16, + "learning_rate": 4.2921557844463274e-05, + "loss": 2.3725, + "step": 2858500 + }, + { + "epoch": 14.16, + "learning_rate": 4.292031925803719e-05, + "loss": 2.3891, + "step": 2859000 + }, + { + "epoch": 14.17, + "learning_rate": 4.291908067161111e-05, + "loss": 2.3872, + "step": 2859500 + }, + { + "epoch": 14.17, + "learning_rate": 4.291784456235787e-05, + "loss": 2.3672, + "step": 2860000 + }, + { + "epoch": 14.17, + "learning_rate": 4.2916608453104646e-05, + "loss": 2.3655, + "step": 2860500 + }, + { + "epoch": 14.17, + "learning_rate": 4.2915369866678556e-05, + "loss": 2.403, + "step": 2861000 + }, + { + "epoch": 14.18, + "learning_rate": 4.291413375742533e-05, + "loss": 2.4014, + "step": 2861500 + }, + { + "epoch": 14.18, + "learning_rate": 4.291289517099925e-05, + "loss": 2.3719, + "step": 2862000 + }, + { + "epoch": 14.18, + "learning_rate": 4.2911656584573166e-05, + "loss": 2.377, + "step": 2862500 + }, + { + "epoch": 14.18, + "learning_rate": 4.2910417998147076e-05, + "loss": 2.3801, + "step": 2863000 + }, + { + "epoch": 14.19, + "learning_rate": 4.290917941172099e-05, + "loss": 2.3656, + "step": 2863500 + }, + { + "epoch": 14.19, + "learning_rate": 4.290794082529491e-05, + "loss": 2.4083, + "step": 2864000 + }, + { + "epoch": 14.19, + "learning_rate": 4.2906702238868826e-05, + "loss": 2.3794, + "step": 2864500 + }, + { + "epoch": 14.19, + "learning_rate": 4.2905463652442743e-05, + "loss": 2.3929, + "step": 2865000 + }, + { + "epoch": 14.2, + "learning_rate": 4.2904225066016654e-05, + "loss": 2.3909, + "step": 2865500 + }, + { + "epoch": 14.2, + "learning_rate": 4.290298647959057e-05, + "loss": 2.4093, + "step": 2866000 + }, + { + "epoch": 14.2, + "learning_rate": 4.290174789316449e-05, + "loss": 2.3792, + "step": 2866500 + }, + { + "epoch": 14.2, + "learning_rate": 4.2900509306738404e-05, + "loss": 2.3973, + "step": 2867000 + }, + { + "epoch": 14.21, + "learning_rate": 4.289927072031232e-05, + "loss": 2.3682, + "step": 2867500 + }, + { + "epoch": 14.21, + "learning_rate": 4.289803213388624e-05, + "loss": 2.3672, + "step": 2868000 + }, + { + "epoch": 14.21, + "learning_rate": 4.2896793547460155e-05, + "loss": 2.3825, + "step": 2868500 + }, + { + "epoch": 14.21, + "learning_rate": 4.289555496103407e-05, + "loss": 2.3766, + "step": 2869000 + }, + { + "epoch": 14.22, + "learning_rate": 4.289431885178084e-05, + "loss": 2.3758, + "step": 2869500 + }, + { + "epoch": 14.22, + "learning_rate": 4.289308026535476e-05, + "loss": 2.37, + "step": 2870000 + }, + { + "epoch": 14.22, + "learning_rate": 4.2891844156101527e-05, + "loss": 2.3608, + "step": 2870500 + }, + { + "epoch": 14.22, + "learning_rate": 4.2890605569675444e-05, + "loss": 2.3819, + "step": 2871000 + }, + { + "epoch": 14.23, + "learning_rate": 4.288936698324936e-05, + "loss": 2.3681, + "step": 2871500 + }, + { + "epoch": 14.23, + "learning_rate": 4.288813087399613e-05, + "loss": 2.3852, + "step": 2872000 + }, + { + "epoch": 14.23, + "learning_rate": 4.2886892287570046e-05, + "loss": 2.3943, + "step": 2872500 + }, + { + "epoch": 14.23, + "learning_rate": 4.288565370114396e-05, + "loss": 2.3918, + "step": 2873000 + }, + { + "epoch": 14.24, + "learning_rate": 4.288441511471787e-05, + "loss": 2.363, + "step": 2873500 + }, + { + "epoch": 14.24, + "learning_rate": 4.288317652829179e-05, + "loss": 2.3689, + "step": 2874000 + }, + { + "epoch": 14.24, + "learning_rate": 4.2881940419038566e-05, + "loss": 2.3561, + "step": 2874500 + }, + { + "epoch": 14.24, + "learning_rate": 4.288070183261248e-05, + "loss": 2.3556, + "step": 2875000 + }, + { + "epoch": 14.25, + "learning_rate": 4.28794632461864e-05, + "loss": 2.3698, + "step": 2875500 + }, + { + "epoch": 14.25, + "learning_rate": 4.2878224659760317e-05, + "loss": 2.3819, + "step": 2876000 + }, + { + "epoch": 14.25, + "learning_rate": 4.287698607333423e-05, + "loss": 2.3529, + "step": 2876500 + }, + { + "epoch": 14.25, + "learning_rate": 4.2875747486908144e-05, + "loss": 2.3704, + "step": 2877000 + }, + { + "epoch": 14.26, + "learning_rate": 4.287450890048206e-05, + "loss": 2.399, + "step": 2877500 + }, + { + "epoch": 14.26, + "learning_rate": 4.287327031405598e-05, + "loss": 2.3887, + "step": 2878000 + }, + { + "epoch": 14.26, + "learning_rate": 4.2872031727629894e-05, + "loss": 2.3905, + "step": 2878500 + }, + { + "epoch": 14.26, + "learning_rate": 4.2870793141203805e-05, + "loss": 2.3566, + "step": 2879000 + }, + { + "epoch": 14.27, + "learning_rate": 4.286955703195058e-05, + "loss": 2.4, + "step": 2879500 + }, + { + "epoch": 14.27, + "learning_rate": 4.286831844552449e-05, + "loss": 2.3926, + "step": 2880000 + }, + { + "epoch": 14.27, + "learning_rate": 4.286707985909841e-05, + "loss": 2.3815, + "step": 2880500 + }, + { + "epoch": 14.27, + "learning_rate": 4.2865841272672324e-05, + "loss": 2.369, + "step": 2881000 + }, + { + "epoch": 14.28, + "learning_rate": 4.286460268624624e-05, + "loss": 2.3836, + "step": 2881500 + }, + { + "epoch": 14.28, + "learning_rate": 4.286336409982016e-05, + "loss": 2.4035, + "step": 2882000 + }, + { + "epoch": 14.28, + "learning_rate": 4.2862127990566934e-05, + "loss": 2.3799, + "step": 2882500 + }, + { + "epoch": 14.28, + "learning_rate": 4.2860889404140844e-05, + "loss": 2.4322, + "step": 2883000 + }, + { + "epoch": 14.29, + "learning_rate": 4.285965081771476e-05, + "loss": 2.4124, + "step": 2883500 + }, + { + "epoch": 14.29, + "learning_rate": 4.285841223128868e-05, + "loss": 2.3728, + "step": 2884000 + }, + { + "epoch": 14.29, + "learning_rate": 4.2857173644862595e-05, + "loss": 2.3791, + "step": 2884500 + }, + { + "epoch": 14.29, + "learning_rate": 4.285593505843651e-05, + "loss": 2.3921, + "step": 2885000 + }, + { + "epoch": 14.3, + "learning_rate": 4.285469647201042e-05, + "loss": 2.3687, + "step": 2885500 + }, + { + "epoch": 14.3, + "learning_rate": 4.285346036275719e-05, + "loss": 2.3897, + "step": 2886000 + }, + { + "epoch": 14.3, + "learning_rate": 4.285222177633111e-05, + "loss": 2.3735, + "step": 2886500 + }, + { + "epoch": 14.3, + "learning_rate": 4.2850983189905024e-05, + "loss": 2.3695, + "step": 2887000 + }, + { + "epoch": 14.31, + "learning_rate": 4.284974460347894e-05, + "loss": 2.3833, + "step": 2887500 + }, + { + "epoch": 14.31, + "learning_rate": 4.284850601705286e-05, + "loss": 2.3874, + "step": 2888000 + }, + { + "epoch": 14.31, + "learning_rate": 4.2847267430626775e-05, + "loss": 2.4175, + "step": 2888500 + }, + { + "epoch": 14.31, + "learning_rate": 4.284603132137355e-05, + "loss": 2.3706, + "step": 2889000 + }, + { + "epoch": 14.32, + "learning_rate": 4.2844797689293165e-05, + "loss": 2.3967, + "step": 2889500 + }, + { + "epoch": 14.32, + "learning_rate": 4.2843561580039933e-05, + "loss": 2.3934, + "step": 2890000 + }, + { + "epoch": 14.32, + "learning_rate": 4.284232299361385e-05, + "loss": 2.3733, + "step": 2890500 + }, + { + "epoch": 14.32, + "learning_rate": 4.284108440718777e-05, + "loss": 2.3728, + "step": 2891000 + }, + { + "epoch": 14.33, + "learning_rate": 4.2839845820761684e-05, + "loss": 2.3741, + "step": 2891500 + }, + { + "epoch": 14.33, + "learning_rate": 4.28386072343356e-05, + "loss": 2.406, + "step": 2892000 + }, + { + "epoch": 14.33, + "learning_rate": 4.283736864790952e-05, + "loss": 2.3878, + "step": 2892500 + }, + { + "epoch": 14.33, + "learning_rate": 4.283613006148343e-05, + "loss": 2.3857, + "step": 2893000 + }, + { + "epoch": 14.34, + "learning_rate": 4.2834891475057345e-05, + "loss": 2.3929, + "step": 2893500 + }, + { + "epoch": 14.34, + "learning_rate": 4.2833655365804114e-05, + "loss": 2.3873, + "step": 2894000 + }, + { + "epoch": 14.34, + "learning_rate": 4.283241925655089e-05, + "loss": 2.3801, + "step": 2894500 + }, + { + "epoch": 14.34, + "learning_rate": 4.2831180670124806e-05, + "loss": 2.3904, + "step": 2895000 + }, + { + "epoch": 14.35, + "learning_rate": 4.282994208369872e-05, + "loss": 2.4079, + "step": 2895500 + }, + { + "epoch": 14.35, + "learning_rate": 4.2828703497272634e-05, + "loss": 2.418, + "step": 2896000 + }, + { + "epoch": 14.35, + "learning_rate": 4.282746491084655e-05, + "loss": 2.3692, + "step": 2896500 + }, + { + "epoch": 14.35, + "learning_rate": 4.282622632442047e-05, + "loss": 2.377, + "step": 2897000 + }, + { + "epoch": 14.36, + "learning_rate": 4.2824987737994384e-05, + "loss": 2.3925, + "step": 2897500 + }, + { + "epoch": 14.36, + "learning_rate": 4.28237491515683e-05, + "loss": 2.3754, + "step": 2898000 + }, + { + "epoch": 14.36, + "learning_rate": 4.282251056514222e-05, + "loss": 2.3814, + "step": 2898500 + }, + { + "epoch": 14.36, + "learning_rate": 4.2821271978716135e-05, + "loss": 2.3872, + "step": 2899000 + }, + { + "epoch": 14.37, + "learning_rate": 4.282003339229005e-05, + "loss": 2.3753, + "step": 2899500 + }, + { + "epoch": 14.37, + "learning_rate": 4.281879480586396e-05, + "loss": 2.4065, + "step": 2900000 + }, + { + "epoch": 14.37, + "learning_rate": 4.281755621943788e-05, + "loss": 2.3957, + "step": 2900500 + }, + { + "epoch": 14.37, + "learning_rate": 4.2816317633011796e-05, + "loss": 2.4159, + "step": 2901000 + }, + { + "epoch": 14.38, + "learning_rate": 4.281507904658571e-05, + "loss": 2.4126, + "step": 2901500 + }, + { + "epoch": 14.38, + "learning_rate": 4.281384046015963e-05, + "loss": 2.3595, + "step": 2902000 + }, + { + "epoch": 14.38, + "learning_rate": 4.281260187373355e-05, + "loss": 2.3574, + "step": 2902500 + }, + { + "epoch": 14.38, + "learning_rate": 4.281136824165317e-05, + "loss": 2.3769, + "step": 2903000 + }, + { + "epoch": 14.38, + "learning_rate": 4.2810129655227084e-05, + "loss": 2.395, + "step": 2903500 + }, + { + "epoch": 14.39, + "learning_rate": 4.2808891068801e-05, + "loss": 2.4074, + "step": 2904000 + }, + { + "epoch": 14.39, + "learning_rate": 4.280765495954777e-05, + "loss": 2.3847, + "step": 2904500 + }, + { + "epoch": 14.39, + "learning_rate": 4.280641637312169e-05, + "loss": 2.4074, + "step": 2905000 + }, + { + "epoch": 14.39, + "learning_rate": 4.28051777866956e-05, + "loss": 2.3715, + "step": 2905500 + }, + { + "epoch": 14.4, + "learning_rate": 4.2803939200269514e-05, + "loss": 2.3871, + "step": 2906000 + }, + { + "epoch": 14.4, + "learning_rate": 4.280270061384343e-05, + "loss": 2.4013, + "step": 2906500 + }, + { + "epoch": 14.4, + "learning_rate": 4.280146202741735e-05, + "loss": 2.3838, + "step": 2907000 + }, + { + "epoch": 14.4, + "learning_rate": 4.2800223440991265e-05, + "loss": 2.4154, + "step": 2907500 + }, + { + "epoch": 14.41, + "learning_rate": 4.279898485456518e-05, + "loss": 2.3687, + "step": 2908000 + }, + { + "epoch": 14.41, + "learning_rate": 4.27977462681391e-05, + "loss": 2.396, + "step": 2908500 + }, + { + "epoch": 14.41, + "learning_rate": 4.2796507681713016e-05, + "loss": 2.3786, + "step": 2909000 + }, + { + "epoch": 14.41, + "learning_rate": 4.2795271572459785e-05, + "loss": 2.3984, + "step": 2909500 + }, + { + "epoch": 14.42, + "learning_rate": 4.279403546320655e-05, + "loss": 2.3943, + "step": 2910000 + }, + { + "epoch": 14.42, + "learning_rate": 4.279279687678047e-05, + "loss": 2.3614, + "step": 2910500 + }, + { + "epoch": 14.42, + "learning_rate": 4.279155829035439e-05, + "loss": 2.3771, + "step": 2911000 + }, + { + "epoch": 14.42, + "learning_rate": 4.2790319703928304e-05, + "loss": 2.3691, + "step": 2911500 + }, + { + "epoch": 14.43, + "learning_rate": 4.2789081117502214e-05, + "loss": 2.392, + "step": 2912000 + }, + { + "epoch": 14.43, + "learning_rate": 4.278784253107613e-05, + "loss": 2.4161, + "step": 2912500 + }, + { + "epoch": 14.43, + "learning_rate": 4.278660394465005e-05, + "loss": 2.3881, + "step": 2913000 + }, + { + "epoch": 14.43, + "learning_rate": 4.2785365358223965e-05, + "loss": 2.3568, + "step": 2913500 + }, + { + "epoch": 14.44, + "learning_rate": 4.278412677179788e-05, + "loss": 2.4166, + "step": 2914000 + }, + { + "epoch": 14.44, + "learning_rate": 4.278289066254466e-05, + "loss": 2.4008, + "step": 2914500 + }, + { + "epoch": 14.44, + "learning_rate": 4.278165207611857e-05, + "loss": 2.3719, + "step": 2915000 + }, + { + "epoch": 14.44, + "learning_rate": 4.2780413489692485e-05, + "loss": 2.4054, + "step": 2915500 + }, + { + "epoch": 14.45, + "learning_rate": 4.27791749032664e-05, + "loss": 2.3981, + "step": 2916000 + }, + { + "epoch": 14.45, + "learning_rate": 4.277793631684032e-05, + "loss": 2.4088, + "step": 2916500 + }, + { + "epoch": 14.45, + "learning_rate": 4.2776697730414235e-05, + "loss": 2.4193, + "step": 2917000 + }, + { + "epoch": 14.45, + "learning_rate": 4.277545914398815e-05, + "loss": 2.3723, + "step": 2917500 + }, + { + "epoch": 14.46, + "learning_rate": 4.277422055756207e-05, + "loss": 2.3896, + "step": 2918000 + }, + { + "epoch": 14.46, + "learning_rate": 4.2772981971135986e-05, + "loss": 2.3889, + "step": 2918500 + }, + { + "epoch": 14.46, + "learning_rate": 4.27717433847099e-05, + "loss": 2.3773, + "step": 2919000 + }, + { + "epoch": 14.46, + "learning_rate": 4.277050479828382e-05, + "loss": 2.4043, + "step": 2919500 + }, + { + "epoch": 14.47, + "learning_rate": 4.276927116620344e-05, + "loss": 2.3883, + "step": 2920000 + }, + { + "epoch": 14.47, + "learning_rate": 4.276803257977736e-05, + "loss": 2.3812, + "step": 2920500 + }, + { + "epoch": 14.47, + "learning_rate": 4.276679894769697e-05, + "loss": 2.4031, + "step": 2921000 + }, + { + "epoch": 14.47, + "learning_rate": 4.276556036127089e-05, + "loss": 2.3853, + "step": 2921500 + }, + { + "epoch": 14.48, + "learning_rate": 4.2764321774844806e-05, + "loss": 2.4171, + "step": 2922000 + }, + { + "epoch": 14.48, + "learning_rate": 4.276308318841872e-05, + "loss": 2.3994, + "step": 2922500 + }, + { + "epoch": 14.48, + "learning_rate": 4.276184460199264e-05, + "loss": 2.3933, + "step": 2923000 + }, + { + "epoch": 14.48, + "learning_rate": 4.276060849273941e-05, + "loss": 2.3946, + "step": 2923500 + }, + { + "epoch": 14.49, + "learning_rate": 4.2759369906313325e-05, + "loss": 2.4314, + "step": 2924000 + }, + { + "epoch": 14.49, + "learning_rate": 4.275813131988724e-05, + "loss": 2.3782, + "step": 2924500 + }, + { + "epoch": 14.49, + "learning_rate": 4.275689273346116e-05, + "loss": 2.3963, + "step": 2925000 + }, + { + "epoch": 14.49, + "learning_rate": 4.2755654147035076e-05, + "loss": 2.4168, + "step": 2925500 + }, + { + "epoch": 14.5, + "learning_rate": 4.275441556060899e-05, + "loss": 2.3955, + "step": 2926000 + }, + { + "epoch": 14.5, + "learning_rate": 4.275317697418291e-05, + "loss": 2.4109, + "step": 2926500 + }, + { + "epoch": 14.5, + "learning_rate": 4.275193838775683e-05, + "loss": 2.3643, + "step": 2927000 + }, + { + "epoch": 14.5, + "learning_rate": 4.2750699801330744e-05, + "loss": 2.3692, + "step": 2927500 + }, + { + "epoch": 14.51, + "learning_rate": 4.2749461214904654e-05, + "loss": 2.4234, + "step": 2928000 + }, + { + "epoch": 14.51, + "learning_rate": 4.274822262847857e-05, + "loss": 2.4136, + "step": 2928500 + }, + { + "epoch": 14.51, + "learning_rate": 4.274698404205249e-05, + "loss": 2.3882, + "step": 2929000 + }, + { + "epoch": 14.51, + "learning_rate": 4.2745745455626405e-05, + "loss": 2.3713, + "step": 2929500 + }, + { + "epoch": 14.52, + "learning_rate": 4.274450686920032e-05, + "loss": 2.3898, + "step": 2930000 + }, + { + "epoch": 14.52, + "learning_rate": 4.274326828277423e-05, + "loss": 2.4151, + "step": 2930500 + }, + { + "epoch": 14.52, + "learning_rate": 4.274202969634815e-05, + "loss": 2.3902, + "step": 2931000 + }, + { + "epoch": 14.52, + "learning_rate": 4.2740793587094924e-05, + "loss": 2.3814, + "step": 2931500 + }, + { + "epoch": 14.53, + "learning_rate": 4.273955500066884e-05, + "loss": 2.3855, + "step": 2932000 + }, + { + "epoch": 14.53, + "learning_rate": 4.273831641424276e-05, + "loss": 2.4006, + "step": 2932500 + }, + { + "epoch": 14.53, + "learning_rate": 4.2737077827816675e-05, + "loss": 2.3651, + "step": 2933000 + }, + { + "epoch": 14.53, + "learning_rate": 4.2735839241390585e-05, + "loss": 2.3846, + "step": 2933500 + }, + { + "epoch": 14.54, + "learning_rate": 4.273460313213736e-05, + "loss": 2.3763, + "step": 2934000 + }, + { + "epoch": 14.54, + "learning_rate": 4.273336454571127e-05, + "loss": 2.382, + "step": 2934500 + }, + { + "epoch": 14.54, + "learning_rate": 4.273212595928519e-05, + "loss": 2.3847, + "step": 2935000 + }, + { + "epoch": 14.54, + "learning_rate": 4.2730887372859105e-05, + "loss": 2.3934, + "step": 2935500 + }, + { + "epoch": 14.55, + "learning_rate": 4.2729651263605873e-05, + "loss": 2.3807, + "step": 2936000 + }, + { + "epoch": 14.55, + "learning_rate": 4.272841515435264e-05, + "loss": 2.3737, + "step": 2936500 + }, + { + "epoch": 14.55, + "learning_rate": 4.272717656792656e-05, + "loss": 2.396, + "step": 2937000 + }, + { + "epoch": 14.55, + "learning_rate": 4.2725937981500476e-05, + "loss": 2.397, + "step": 2937500 + }, + { + "epoch": 14.56, + "learning_rate": 4.272469939507439e-05, + "loss": 2.3834, + "step": 2938000 + }, + { + "epoch": 14.56, + "learning_rate": 4.272346080864831e-05, + "loss": 2.383, + "step": 2938500 + }, + { + "epoch": 14.56, + "learning_rate": 4.272222222222223e-05, + "loss": 2.3652, + "step": 2939000 + }, + { + "epoch": 14.56, + "learning_rate": 4.2720983635796144e-05, + "loss": 2.3763, + "step": 2939500 + }, + { + "epoch": 14.57, + "learning_rate": 4.271974504937006e-05, + "loss": 2.4087, + "step": 2940000 + }, + { + "epoch": 14.57, + "learning_rate": 4.271850646294398e-05, + "loss": 2.3843, + "step": 2940500 + }, + { + "epoch": 14.57, + "learning_rate": 4.2717267876517895e-05, + "loss": 2.3971, + "step": 2941000 + }, + { + "epoch": 14.57, + "learning_rate": 4.2716029290091805e-05, + "loss": 2.3815, + "step": 2941500 + }, + { + "epoch": 14.58, + "learning_rate": 4.271479070366572e-05, + "loss": 2.4035, + "step": 2942000 + }, + { + "epoch": 14.58, + "learning_rate": 4.271355211723964e-05, + "loss": 2.3709, + "step": 2942500 + }, + { + "epoch": 14.58, + "learning_rate": 4.2712313530813556e-05, + "loss": 2.3951, + "step": 2943000 + }, + { + "epoch": 14.58, + "learning_rate": 4.271107494438747e-05, + "loss": 2.3758, + "step": 2943500 + }, + { + "epoch": 14.59, + "learning_rate": 4.270983635796138e-05, + "loss": 2.4007, + "step": 2944000 + }, + { + "epoch": 14.59, + "learning_rate": 4.270860024870816e-05, + "loss": 2.4097, + "step": 2944500 + }, + { + "epoch": 14.59, + "learning_rate": 4.2707361662282075e-05, + "loss": 2.4025, + "step": 2945000 + }, + { + "epoch": 14.59, + "learning_rate": 4.2706125553028844e-05, + "loss": 2.3816, + "step": 2945500 + }, + { + "epoch": 14.6, + "learning_rate": 4.270488696660276e-05, + "loss": 2.3982, + "step": 2946000 + }, + { + "epoch": 14.6, + "learning_rate": 4.270364838017668e-05, + "loss": 2.3793, + "step": 2946500 + }, + { + "epoch": 14.6, + "learning_rate": 4.2702409793750595e-05, + "loss": 2.389, + "step": 2947000 + }, + { + "epoch": 14.6, + "learning_rate": 4.270117120732451e-05, + "loss": 2.362, + "step": 2947500 + }, + { + "epoch": 14.61, + "learning_rate": 4.2699935098071274e-05, + "loss": 2.3914, + "step": 2948000 + }, + { + "epoch": 14.61, + "learning_rate": 4.269869651164519e-05, + "loss": 2.3902, + "step": 2948500 + }, + { + "epoch": 14.61, + "learning_rate": 4.269745792521911e-05, + "loss": 2.3968, + "step": 2949000 + }, + { + "epoch": 14.61, + "learning_rate": 4.2696219338793024e-05, + "loss": 2.3966, + "step": 2949500 + }, + { + "epoch": 14.62, + "learning_rate": 4.269498075236694e-05, + "loss": 2.395, + "step": 2950000 + }, + { + "epoch": 14.62, + "learning_rate": 4.269374216594086e-05, + "loss": 2.3799, + "step": 2950500 + }, + { + "epoch": 14.62, + "learning_rate": 4.2692503579514775e-05, + "loss": 2.3837, + "step": 2951000 + }, + { + "epoch": 14.62, + "learning_rate": 4.2691267470261544e-05, + "loss": 2.3736, + "step": 2951500 + }, + { + "epoch": 14.63, + "learning_rate": 4.269002888383546e-05, + "loss": 2.3967, + "step": 2952000 + }, + { + "epoch": 14.63, + "learning_rate": 4.268879029740938e-05, + "loss": 2.4221, + "step": 2952500 + }, + { + "epoch": 14.63, + "learning_rate": 4.2687551710983295e-05, + "loss": 2.3803, + "step": 2953000 + }, + { + "epoch": 14.63, + "learning_rate": 4.268631560173006e-05, + "loss": 2.3863, + "step": 2953500 + }, + { + "epoch": 14.64, + "learning_rate": 4.2685077015303974e-05, + "loss": 2.391, + "step": 2954000 + }, + { + "epoch": 14.64, + "learning_rate": 4.268383842887789e-05, + "loss": 2.374, + "step": 2954500 + }, + { + "epoch": 14.64, + "learning_rate": 4.268259984245181e-05, + "loss": 2.3949, + "step": 2955000 + }, + { + "epoch": 14.64, + "learning_rate": 4.2681363733198577e-05, + "loss": 2.3759, + "step": 2955500 + }, + { + "epoch": 14.65, + "learning_rate": 4.2680125146772493e-05, + "loss": 2.387, + "step": 2956000 + }, + { + "epoch": 14.65, + "learning_rate": 4.267888656034641e-05, + "loss": 2.3696, + "step": 2956500 + }, + { + "epoch": 14.65, + "learning_rate": 4.267764797392033e-05, + "loss": 2.4066, + "step": 2957000 + }, + { + "epoch": 14.65, + "learning_rate": 4.2676409387494244e-05, + "loss": 2.4101, + "step": 2957500 + }, + { + "epoch": 14.65, + "learning_rate": 4.267517327824101e-05, + "loss": 2.3882, + "step": 2958000 + }, + { + "epoch": 14.66, + "learning_rate": 4.267393716898778e-05, + "loss": 2.4137, + "step": 2958500 + }, + { + "epoch": 14.66, + "learning_rate": 4.26726985825617e-05, + "loss": 2.3827, + "step": 2959000 + }, + { + "epoch": 14.66, + "learning_rate": 4.267145999613561e-05, + "loss": 2.4004, + "step": 2959500 + }, + { + "epoch": 14.66, + "learning_rate": 4.2670221409709526e-05, + "loss": 2.3833, + "step": 2960000 + }, + { + "epoch": 14.67, + "learning_rate": 4.26689853004563e-05, + "loss": 2.402, + "step": 2960500 + }, + { + "epoch": 14.67, + "learning_rate": 4.266774671403022e-05, + "loss": 2.4208, + "step": 2961000 + }, + { + "epoch": 14.67, + "learning_rate": 4.2666508127604135e-05, + "loss": 2.4217, + "step": 2961500 + }, + { + "epoch": 14.67, + "learning_rate": 4.266526954117805e-05, + "loss": 2.3977, + "step": 2962000 + }, + { + "epoch": 14.68, + "learning_rate": 4.266403095475196e-05, + "loss": 2.398, + "step": 2962500 + }, + { + "epoch": 14.68, + "learning_rate": 4.266279236832588e-05, + "loss": 2.3978, + "step": 2963000 + }, + { + "epoch": 14.68, + "learning_rate": 4.2661553781899796e-05, + "loss": 2.4067, + "step": 2963500 + }, + { + "epoch": 14.68, + "learning_rate": 4.266031519547371e-05, + "loss": 2.4094, + "step": 2964000 + }, + { + "epoch": 14.69, + "learning_rate": 4.265907660904763e-05, + "loss": 2.3881, + "step": 2964500 + }, + { + "epoch": 14.69, + "learning_rate": 4.265783802262154e-05, + "loss": 2.3953, + "step": 2965000 + }, + { + "epoch": 14.69, + "learning_rate": 4.265659943619546e-05, + "loss": 2.3919, + "step": 2965500 + }, + { + "epoch": 14.69, + "learning_rate": 4.2655363326942226e-05, + "loss": 2.3778, + "step": 2966000 + }, + { + "epoch": 14.7, + "learning_rate": 4.265412474051614e-05, + "loss": 2.3833, + "step": 2966500 + }, + { + "epoch": 14.7, + "learning_rate": 4.265288615409006e-05, + "loss": 2.3947, + "step": 2967000 + }, + { + "epoch": 14.7, + "learning_rate": 4.265164756766398e-05, + "loss": 2.3884, + "step": 2967500 + }, + { + "epoch": 14.7, + "learning_rate": 4.2650408981237894e-05, + "loss": 2.3881, + "step": 2968000 + }, + { + "epoch": 14.71, + "learning_rate": 4.264917039481181e-05, + "loss": 2.4038, + "step": 2968500 + }, + { + "epoch": 14.71, + "learning_rate": 4.264793180838573e-05, + "loss": 2.412, + "step": 2969000 + }, + { + "epoch": 14.71, + "learning_rate": 4.2646693221959644e-05, + "loss": 2.3893, + "step": 2969500 + }, + { + "epoch": 14.71, + "learning_rate": 4.264545463553356e-05, + "loss": 2.38, + "step": 2970000 + }, + { + "epoch": 14.72, + "learning_rate": 4.264421604910748e-05, + "loss": 2.4093, + "step": 2970500 + }, + { + "epoch": 14.72, + "learning_rate": 4.264297993985425e-05, + "loss": 2.3932, + "step": 2971000 + }, + { + "epoch": 14.72, + "learning_rate": 4.2641741353428164e-05, + "loss": 2.4133, + "step": 2971500 + }, + { + "epoch": 14.72, + "learning_rate": 4.2640502767002074e-05, + "loss": 2.3936, + "step": 2972000 + }, + { + "epoch": 14.73, + "learning_rate": 4.263926418057599e-05, + "loss": 2.405, + "step": 2972500 + }, + { + "epoch": 14.73, + "learning_rate": 4.263802559414991e-05, + "loss": 2.3882, + "step": 2973000 + }, + { + "epoch": 14.73, + "learning_rate": 4.2636787007723825e-05, + "loss": 2.3862, + "step": 2973500 + }, + { + "epoch": 14.73, + "learning_rate": 4.263554842129774e-05, + "loss": 2.3741, + "step": 2974000 + }, + { + "epoch": 14.74, + "learning_rate": 4.263430983487166e-05, + "loss": 2.4191, + "step": 2974500 + }, + { + "epoch": 14.74, + "learning_rate": 4.2633071248445576e-05, + "loss": 2.4142, + "step": 2975000 + }, + { + "epoch": 14.74, + "learning_rate": 4.263183266201949e-05, + "loss": 2.3749, + "step": 2975500 + }, + { + "epoch": 14.74, + "learning_rate": 4.263059407559341e-05, + "loss": 2.3808, + "step": 2976000 + }, + { + "epoch": 14.75, + "learning_rate": 4.262935796634018e-05, + "loss": 2.3935, + "step": 2976500 + }, + { + "epoch": 14.75, + "learning_rate": 4.2628119379914095e-05, + "loss": 2.3867, + "step": 2977000 + }, + { + "epoch": 14.75, + "learning_rate": 4.2626883270660864e-05, + "loss": 2.3977, + "step": 2977500 + }, + { + "epoch": 14.75, + "learning_rate": 4.262564468423478e-05, + "loss": 2.3814, + "step": 2978000 + }, + { + "epoch": 14.76, + "learning_rate": 4.262440609780869e-05, + "loss": 2.3813, + "step": 2978500 + }, + { + "epoch": 14.76, + "learning_rate": 4.262316751138261e-05, + "loss": 2.3986, + "step": 2979000 + }, + { + "epoch": 14.76, + "learning_rate": 4.262193140212938e-05, + "loss": 2.3936, + "step": 2979500 + }, + { + "epoch": 14.76, + "learning_rate": 4.2620692815703294e-05, + "loss": 2.403, + "step": 2980000 + }, + { + "epoch": 14.77, + "learning_rate": 4.261945422927721e-05, + "loss": 2.391, + "step": 2980500 + }, + { + "epoch": 14.77, + "learning_rate": 4.261821564285113e-05, + "loss": 2.4022, + "step": 2981000 + }, + { + "epoch": 14.77, + "learning_rate": 4.2616977056425045e-05, + "loss": 2.3994, + "step": 2981500 + }, + { + "epoch": 14.77, + "learning_rate": 4.261573846999896e-05, + "loss": 2.4071, + "step": 2982000 + }, + { + "epoch": 14.78, + "learning_rate": 4.261449988357288e-05, + "loss": 2.402, + "step": 2982500 + }, + { + "epoch": 14.78, + "learning_rate": 4.2613261297146795e-05, + "loss": 2.377, + "step": 2983000 + }, + { + "epoch": 14.78, + "learning_rate": 4.261202271072071e-05, + "loss": 2.3869, + "step": 2983500 + }, + { + "epoch": 14.78, + "learning_rate": 4.261078412429463e-05, + "loss": 2.3832, + "step": 2984000 + }, + { + "epoch": 14.79, + "learning_rate": 4.26095480150414e-05, + "loss": 2.4119, + "step": 2984500 + }, + { + "epoch": 14.79, + "learning_rate": 4.2608309428615315e-05, + "loss": 2.4104, + "step": 2985000 + }, + { + "epoch": 14.79, + "learning_rate": 4.2607070842189225e-05, + "loss": 2.3959, + "step": 2985500 + }, + { + "epoch": 14.79, + "learning_rate": 4.260583225576314e-05, + "loss": 2.4221, + "step": 2986000 + }, + { + "epoch": 14.8, + "learning_rate": 4.260459366933706e-05, + "loss": 2.4011, + "step": 2986500 + }, + { + "epoch": 14.8, + "learning_rate": 4.2603355082910976e-05, + "loss": 2.3937, + "step": 2987000 + }, + { + "epoch": 14.8, + "learning_rate": 4.260211649648489e-05, + "loss": 2.3794, + "step": 2987500 + }, + { + "epoch": 14.8, + "learning_rate": 4.260088038723166e-05, + "loss": 2.4179, + "step": 2988000 + }, + { + "epoch": 14.81, + "learning_rate": 4.259964180080558e-05, + "loss": 2.4193, + "step": 2988500 + }, + { + "epoch": 14.81, + "learning_rate": 4.2598403214379496e-05, + "loss": 2.3833, + "step": 2989000 + }, + { + "epoch": 14.81, + "learning_rate": 4.2597167105126264e-05, + "loss": 2.3923, + "step": 2989500 + }, + { + "epoch": 14.81, + "learning_rate": 4.259592851870018e-05, + "loss": 2.394, + "step": 2990000 + }, + { + "epoch": 14.82, + "learning_rate": 4.25946899322741e-05, + "loss": 2.3941, + "step": 2990500 + }, + { + "epoch": 14.82, + "learning_rate": 4.2593451345848015e-05, + "loss": 2.4062, + "step": 2991000 + }, + { + "epoch": 14.82, + "learning_rate": 4.259221275942193e-05, + "loss": 2.3848, + "step": 2991500 + }, + { + "epoch": 14.82, + "learning_rate": 4.259097417299584e-05, + "loss": 2.3924, + "step": 2992000 + }, + { + "epoch": 14.83, + "learning_rate": 4.258973558656976e-05, + "loss": 2.4166, + "step": 2992500 + }, + { + "epoch": 14.83, + "learning_rate": 4.2588497000143676e-05, + "loss": 2.3901, + "step": 2993000 + }, + { + "epoch": 14.83, + "learning_rate": 4.258725841371759e-05, + "loss": 2.4053, + "step": 2993500 + }, + { + "epoch": 14.83, + "learning_rate": 4.258601982729151e-05, + "loss": 2.3861, + "step": 2994000 + }, + { + "epoch": 14.84, + "learning_rate": 4.258478124086543e-05, + "loss": 2.3877, + "step": 2994500 + }, + { + "epoch": 14.84, + "learning_rate": 4.2583542654439344e-05, + "loss": 2.4219, + "step": 2995000 + }, + { + "epoch": 14.84, + "learning_rate": 4.258230654518611e-05, + "loss": 2.4288, + "step": 2995500 + }, + { + "epoch": 14.84, + "learning_rate": 4.258106795876003e-05, + "loss": 2.388, + "step": 2996000 + }, + { + "epoch": 14.85, + "learning_rate": 4.2579829372333947e-05, + "loss": 2.391, + "step": 2996500 + }, + { + "epoch": 14.85, + "learning_rate": 4.2578590785907863e-05, + "loss": 2.4086, + "step": 2997000 + }, + { + "epoch": 14.85, + "learning_rate": 4.257735219948178e-05, + "loss": 2.3844, + "step": 2997500 + }, + { + "epoch": 14.85, + "learning_rate": 4.25761136130557e-05, + "loss": 2.3956, + "step": 2998000 + }, + { + "epoch": 14.86, + "learning_rate": 4.2574875026629614e-05, + "loss": 2.3777, + "step": 2998500 + }, + { + "epoch": 14.86, + "learning_rate": 4.257363644020353e-05, + "loss": 2.3943, + "step": 2999000 + }, + { + "epoch": 14.86, + "learning_rate": 4.2572402808123145e-05, + "loss": 2.3853, + "step": 2999500 + }, + { + "epoch": 14.86, + "learning_rate": 4.257116422169706e-05, + "loss": 2.388, + "step": 3000000 + }, + { + "epoch": 14.87, + "learning_rate": 4.256992563527098e-05, + "loss": 2.4066, + "step": 3000500 + }, + { + "epoch": 14.87, + "learning_rate": 4.2568687048844896e-05, + "loss": 2.3979, + "step": 3001000 + }, + { + "epoch": 14.87, + "learning_rate": 4.256744846241881e-05, + "loss": 2.392, + "step": 3001500 + }, + { + "epoch": 14.87, + "learning_rate": 4.256620987599273e-05, + "loss": 2.4233, + "step": 3002000 + }, + { + "epoch": 14.88, + "learning_rate": 4.256497128956665e-05, + "loss": 2.3834, + "step": 3002500 + }, + { + "epoch": 14.88, + "learning_rate": 4.2563732703140564e-05, + "loss": 2.3635, + "step": 3003000 + }, + { + "epoch": 14.88, + "learning_rate": 4.256249659388733e-05, + "loss": 2.4039, + "step": 3003500 + }, + { + "epoch": 14.88, + "learning_rate": 4.256125800746125e-05, + "loss": 2.3884, + "step": 3004000 + }, + { + "epoch": 14.89, + "learning_rate": 4.2560019421035166e-05, + "loss": 2.4051, + "step": 3004500 + }, + { + "epoch": 14.89, + "learning_rate": 4.255878083460908e-05, + "loss": 2.3943, + "step": 3005000 + }, + { + "epoch": 14.89, + "learning_rate": 4.255754224818299e-05, + "loss": 2.4152, + "step": 3005500 + }, + { + "epoch": 14.89, + "learning_rate": 4.255630613892976e-05, + "loss": 2.3811, + "step": 3006000 + }, + { + "epoch": 14.9, + "learning_rate": 4.255506755250368e-05, + "loss": 2.393, + "step": 3006500 + }, + { + "epoch": 14.9, + "learning_rate": 4.2553828966077596e-05, + "loss": 2.4022, + "step": 3007000 + }, + { + "epoch": 14.9, + "learning_rate": 4.255259037965151e-05, + "loss": 2.3587, + "step": 3007500 + }, + { + "epoch": 14.9, + "learning_rate": 4.255135179322543e-05, + "loss": 2.4104, + "step": 3008000 + }, + { + "epoch": 14.91, + "learning_rate": 4.255011320679935e-05, + "loss": 2.3866, + "step": 3008500 + }, + { + "epoch": 14.91, + "learning_rate": 4.2548874620373264e-05, + "loss": 2.4138, + "step": 3009000 + }, + { + "epoch": 14.91, + "learning_rate": 4.254763603394718e-05, + "loss": 2.3889, + "step": 3009500 + }, + { + "epoch": 14.91, + "learning_rate": 4.254639992469395e-05, + "loss": 2.4053, + "step": 3010000 + }, + { + "epoch": 14.92, + "learning_rate": 4.2545161338267866e-05, + "loss": 2.3929, + "step": 3010500 + }, + { + "epoch": 14.92, + "learning_rate": 4.254392275184178e-05, + "loss": 2.401, + "step": 3011000 + }, + { + "epoch": 14.92, + "learning_rate": 4.25426841654157e-05, + "loss": 2.3921, + "step": 3011500 + }, + { + "epoch": 14.92, + "learning_rate": 4.254144557898962e-05, + "loss": 2.4238, + "step": 3012000 + }, + { + "epoch": 14.92, + "learning_rate": 4.254020699256353e-05, + "loss": 2.3746, + "step": 3012500 + }, + { + "epoch": 14.93, + "learning_rate": 4.2538968406137444e-05, + "loss": 2.3839, + "step": 3013000 + }, + { + "epoch": 14.93, + "learning_rate": 4.253773229688421e-05, + "loss": 2.3947, + "step": 3013500 + }, + { + "epoch": 14.93, + "learning_rate": 4.253649371045813e-05, + "loss": 2.402, + "step": 3014000 + }, + { + "epoch": 14.93, + "learning_rate": 4.253525512403205e-05, + "loss": 2.3758, + "step": 3014500 + }, + { + "epoch": 14.94, + "learning_rate": 4.2534016537605964e-05, + "loss": 2.3933, + "step": 3015000 + }, + { + "epoch": 14.94, + "learning_rate": 4.253278290552558e-05, + "loss": 2.3876, + "step": 3015500 + }, + { + "epoch": 14.94, + "learning_rate": 4.2531544319099495e-05, + "loss": 2.3971, + "step": 3016000 + }, + { + "epoch": 14.94, + "learning_rate": 4.253030573267341e-05, + "loss": 2.4107, + "step": 3016500 + }, + { + "epoch": 14.95, + "learning_rate": 4.252906714624733e-05, + "loss": 2.4113, + "step": 3017000 + }, + { + "epoch": 14.95, + "learning_rate": 4.2527831036994104e-05, + "loss": 2.3868, + "step": 3017500 + }, + { + "epoch": 14.95, + "learning_rate": 4.252659245056802e-05, + "loss": 2.3966, + "step": 3018000 + }, + { + "epoch": 14.95, + "learning_rate": 4.252535386414194e-05, + "loss": 2.3974, + "step": 3018500 + }, + { + "epoch": 14.96, + "learning_rate": 4.252411527771585e-05, + "loss": 2.3664, + "step": 3019000 + }, + { + "epoch": 14.96, + "learning_rate": 4.2522876691289765e-05, + "loss": 2.3885, + "step": 3019500 + }, + { + "epoch": 14.96, + "learning_rate": 4.252163810486368e-05, + "loss": 2.3988, + "step": 3020000 + }, + { + "epoch": 14.96, + "learning_rate": 4.252040199561045e-05, + "loss": 2.3894, + "step": 3020500 + }, + { + "epoch": 14.97, + "learning_rate": 4.251916340918437e-05, + "loss": 2.3897, + "step": 3021000 + }, + { + "epoch": 14.97, + "learning_rate": 4.2517924822758285e-05, + "loss": 2.3855, + "step": 3021500 + }, + { + "epoch": 14.97, + "learning_rate": 4.2516688713505053e-05, + "loss": 2.3951, + "step": 3022000 + }, + { + "epoch": 14.97, + "learning_rate": 4.251545260425182e-05, + "loss": 2.438, + "step": 3022500 + }, + { + "epoch": 14.98, + "learning_rate": 4.251421401782574e-05, + "loss": 2.3995, + "step": 3023000 + }, + { + "epoch": 14.98, + "learning_rate": 4.2512975431399656e-05, + "loss": 2.4106, + "step": 3023500 + }, + { + "epoch": 14.98, + "learning_rate": 4.251173684497357e-05, + "loss": 2.3756, + "step": 3024000 + }, + { + "epoch": 14.98, + "learning_rate": 4.2510500735720335e-05, + "loss": 2.3789, + "step": 3024500 + }, + { + "epoch": 14.99, + "learning_rate": 4.250926214929425e-05, + "loss": 2.4049, + "step": 3025000 + }, + { + "epoch": 14.99, + "learning_rate": 4.250802356286817e-05, + "loss": 2.363, + "step": 3025500 + }, + { + "epoch": 14.99, + "learning_rate": 4.2506784976442086e-05, + "loss": 2.4208, + "step": 3026000 + }, + { + "epoch": 14.99, + "learning_rate": 4.2505546390016e-05, + "loss": 2.3791, + "step": 3026500 + }, + { + "epoch": 15.0, + "learning_rate": 4.250430780358992e-05, + "loss": 2.3832, + "step": 3027000 + }, + { + "epoch": 15.0, + "learning_rate": 4.250306921716384e-05, + "loss": 2.3903, + "step": 3027500 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.6470159402014201, + "eval_accuracy_mlm": 0.6010111516244826, + "eval_accuracy_nsp": 0.8638683082377951, + "eval_loss": 2.399909496307373, + "eval_runtime": 145.758, + "eval_samples_per_second": 1749.194, + "eval_steps_per_second": 72.888, + "step": 3027645 + }, + { + "epoch": 15.0, + "learning_rate": 4.2501830630737754e-05, + "loss": 2.3627, + "step": 3028000 + }, + { + "epoch": 15.0, + "learning_rate": 4.250059204431167e-05, + "loss": 2.3456, + "step": 3028500 + }, + { + "epoch": 15.01, + "learning_rate": 4.249935593505844e-05, + "loss": 2.3451, + "step": 3029000 + }, + { + "epoch": 15.01, + "learning_rate": 4.2498117348632356e-05, + "loss": 2.3651, + "step": 3029500 + }, + { + "epoch": 15.01, + "learning_rate": 4.249687876220627e-05, + "loss": 2.3518, + "step": 3030000 + }, + { + "epoch": 15.01, + "learning_rate": 4.249564017578019e-05, + "loss": 2.3489, + "step": 3030500 + }, + { + "epoch": 15.02, + "learning_rate": 4.249440158935411e-05, + "loss": 2.3564, + "step": 3031000 + }, + { + "epoch": 15.02, + "learning_rate": 4.2493163002928024e-05, + "loss": 2.3755, + "step": 3031500 + }, + { + "epoch": 15.02, + "learning_rate": 4.249192441650194e-05, + "loss": 2.3875, + "step": 3032000 + }, + { + "epoch": 15.02, + "learning_rate": 4.249068583007586e-05, + "loss": 2.3706, + "step": 3032500 + }, + { + "epoch": 15.03, + "learning_rate": 4.2489447243649775e-05, + "loss": 2.3564, + "step": 3033000 + }, + { + "epoch": 15.03, + "learning_rate": 4.2488208657223685e-05, + "loss": 2.3672, + "step": 3033500 + }, + { + "epoch": 15.03, + "learning_rate": 4.24869700707976e-05, + "loss": 2.3532, + "step": 3034000 + }, + { + "epoch": 15.03, + "learning_rate": 4.248573148437152e-05, + "loss": 2.3531, + "step": 3034500 + }, + { + "epoch": 15.04, + "learning_rate": 4.2484492897945436e-05, + "loss": 2.3578, + "step": 3035000 + }, + { + "epoch": 15.04, + "learning_rate": 4.2483256788692204e-05, + "loss": 2.4104, + "step": 3035500 + }, + { + "epoch": 15.04, + "learning_rate": 4.248201820226612e-05, + "loss": 2.3724, + "step": 3036000 + }, + { + "epoch": 15.04, + "learning_rate": 4.248077961584004e-05, + "loss": 2.3303, + "step": 3036500 + }, + { + "epoch": 15.05, + "learning_rate": 4.2479541029413955e-05, + "loss": 2.3409, + "step": 3037000 + }, + { + "epoch": 15.05, + "learning_rate": 4.2478302442987865e-05, + "loss": 2.3438, + "step": 3037500 + }, + { + "epoch": 15.05, + "learning_rate": 4.247706633373464e-05, + "loss": 2.3988, + "step": 3038000 + }, + { + "epoch": 15.05, + "learning_rate": 4.247582774730856e-05, + "loss": 2.3621, + "step": 3038500 + }, + { + "epoch": 15.06, + "learning_rate": 4.247459163805532e-05, + "loss": 2.4002, + "step": 3039000 + }, + { + "epoch": 15.06, + "learning_rate": 4.247335305162924e-05, + "loss": 2.3571, + "step": 3039500 + }, + { + "epoch": 15.06, + "learning_rate": 4.2472114465203154e-05, + "loss": 2.3597, + "step": 3040000 + }, + { + "epoch": 15.06, + "learning_rate": 4.247087587877707e-05, + "loss": 2.3572, + "step": 3040500 + }, + { + "epoch": 15.07, + "learning_rate": 4.246963729235099e-05, + "loss": 2.391, + "step": 3041000 + }, + { + "epoch": 15.07, + "learning_rate": 4.2468398705924905e-05, + "loss": 2.3717, + "step": 3041500 + }, + { + "epoch": 15.07, + "learning_rate": 4.246716011949882e-05, + "loss": 2.3924, + "step": 3042000 + }, + { + "epoch": 15.07, + "learning_rate": 4.246592153307274e-05, + "loss": 2.3618, + "step": 3042500 + }, + { + "epoch": 15.08, + "learning_rate": 4.2464682946646655e-05, + "loss": 2.3624, + "step": 3043000 + }, + { + "epoch": 15.08, + "learning_rate": 4.246344436022057e-05, + "loss": 2.368, + "step": 3043500 + }, + { + "epoch": 15.08, + "learning_rate": 4.246220577379448e-05, + "loss": 2.3958, + "step": 3044000 + }, + { + "epoch": 15.08, + "learning_rate": 4.24609671873684e-05, + "loss": 2.3759, + "step": 3044500 + }, + { + "epoch": 15.09, + "learning_rate": 4.2459728600942316e-05, + "loss": 2.3554, + "step": 3045000 + }, + { + "epoch": 15.09, + "learning_rate": 4.245849496886194e-05, + "loss": 2.3552, + "step": 3045500 + }, + { + "epoch": 15.09, + "learning_rate": 4.2457256382435854e-05, + "loss": 2.3811, + "step": 3046000 + }, + { + "epoch": 15.09, + "learning_rate": 4.245601779600977e-05, + "loss": 2.381, + "step": 3046500 + }, + { + "epoch": 15.1, + "learning_rate": 4.245477920958369e-05, + "loss": 2.3725, + "step": 3047000 + }, + { + "epoch": 15.1, + "learning_rate": 4.245354310033046e-05, + "loss": 2.3783, + "step": 3047500 + }, + { + "epoch": 15.1, + "learning_rate": 4.2452304513904374e-05, + "loss": 2.3869, + "step": 3048000 + }, + { + "epoch": 15.1, + "learning_rate": 4.245106592747829e-05, + "loss": 2.3689, + "step": 3048500 + }, + { + "epoch": 15.11, + "learning_rate": 4.244982734105221e-05, + "loss": 2.3686, + "step": 3049000 + }, + { + "epoch": 15.11, + "learning_rate": 4.2448588754626124e-05, + "loss": 2.3877, + "step": 3049500 + }, + { + "epoch": 15.11, + "learning_rate": 4.244735016820004e-05, + "loss": 2.3803, + "step": 3050000 + }, + { + "epoch": 15.11, + "learning_rate": 4.244611158177396e-05, + "loss": 2.3927, + "step": 3050500 + }, + { + "epoch": 15.12, + "learning_rate": 4.2444872995347875e-05, + "loss": 2.3387, + "step": 3051000 + }, + { + "epoch": 15.12, + "learning_rate": 4.244363688609464e-05, + "loss": 2.3727, + "step": 3051500 + }, + { + "epoch": 15.12, + "learning_rate": 4.2442398299668554e-05, + "loss": 2.3679, + "step": 3052000 + }, + { + "epoch": 15.12, + "learning_rate": 4.244115971324247e-05, + "loss": 2.3728, + "step": 3052500 + }, + { + "epoch": 15.13, + "learning_rate": 4.243992112681639e-05, + "loss": 2.3888, + "step": 3053000 + }, + { + "epoch": 15.13, + "learning_rate": 4.2438682540390305e-05, + "loss": 2.3701, + "step": 3053500 + }, + { + "epoch": 15.13, + "learning_rate": 4.243744395396422e-05, + "loss": 2.363, + "step": 3054000 + }, + { + "epoch": 15.13, + "learning_rate": 4.243620784471099e-05, + "loss": 2.3638, + "step": 3054500 + }, + { + "epoch": 15.14, + "learning_rate": 4.243496925828491e-05, + "loss": 2.3703, + "step": 3055000 + }, + { + "epoch": 15.14, + "learning_rate": 4.2433730671858824e-05, + "loss": 2.3799, + "step": 3055500 + }, + { + "epoch": 15.14, + "learning_rate": 4.243249208543274e-05, + "loss": 2.3817, + "step": 3056000 + }, + { + "epoch": 15.14, + "learning_rate": 4.243125349900666e-05, + "loss": 2.3796, + "step": 3056500 + }, + { + "epoch": 15.15, + "learning_rate": 4.243001738975342e-05, + "loss": 2.3622, + "step": 3057000 + }, + { + "epoch": 15.15, + "learning_rate": 4.242877880332734e-05, + "loss": 2.3801, + "step": 3057500 + }, + { + "epoch": 15.15, + "learning_rate": 4.2427542694074106e-05, + "loss": 2.3878, + "step": 3058000 + }, + { + "epoch": 15.15, + "learning_rate": 4.242630410764802e-05, + "loss": 2.3574, + "step": 3058500 + }, + { + "epoch": 15.16, + "learning_rate": 4.242506552122194e-05, + "loss": 2.3479, + "step": 3059000 + }, + { + "epoch": 15.16, + "learning_rate": 4.242382693479586e-05, + "loss": 2.3529, + "step": 3059500 + }, + { + "epoch": 15.16, + "learning_rate": 4.2422588348369774e-05, + "loss": 2.3862, + "step": 3060000 + }, + { + "epoch": 15.16, + "learning_rate": 4.242134976194369e-05, + "loss": 2.3654, + "step": 3060500 + }, + { + "epoch": 15.17, + "learning_rate": 4.242011117551761e-05, + "loss": 2.3518, + "step": 3061000 + }, + { + "epoch": 15.17, + "learning_rate": 4.2418875066264376e-05, + "loss": 2.374, + "step": 3061500 + }, + { + "epoch": 15.17, + "learning_rate": 4.241763647983829e-05, + "loss": 2.3655, + "step": 3062000 + }, + { + "epoch": 15.17, + "learning_rate": 4.241639789341221e-05, + "loss": 2.3658, + "step": 3062500 + }, + { + "epoch": 15.18, + "learning_rate": 4.241515930698613e-05, + "loss": 2.3602, + "step": 3063000 + }, + { + "epoch": 15.18, + "learning_rate": 4.2413920720560044e-05, + "loss": 2.3609, + "step": 3063500 + }, + { + "epoch": 15.18, + "learning_rate": 4.2412682134133954e-05, + "loss": 2.367, + "step": 3064000 + }, + { + "epoch": 15.18, + "learning_rate": 4.241144354770787e-05, + "loss": 2.3744, + "step": 3064500 + }, + { + "epoch": 15.19, + "learning_rate": 4.241020496128179e-05, + "loss": 2.3696, + "step": 3065000 + }, + { + "epoch": 15.19, + "learning_rate": 4.2408966374855705e-05, + "loss": 2.3808, + "step": 3065500 + }, + { + "epoch": 15.19, + "learning_rate": 4.2407730265602474e-05, + "loss": 2.3886, + "step": 3066000 + }, + { + "epoch": 15.19, + "learning_rate": 4.240649167917639e-05, + "loss": 2.3662, + "step": 3066500 + }, + { + "epoch": 15.19, + "learning_rate": 4.240525309275031e-05, + "loss": 2.3931, + "step": 3067000 + }, + { + "epoch": 15.2, + "learning_rate": 4.2404014506324225e-05, + "loss": 2.3764, + "step": 3067500 + }, + { + "epoch": 15.2, + "learning_rate": 4.240277591989814e-05, + "loss": 2.3745, + "step": 3068000 + }, + { + "epoch": 15.2, + "learning_rate": 4.240153733347206e-05, + "loss": 2.3827, + "step": 3068500 + }, + { + "epoch": 15.2, + "learning_rate": 4.240030122421883e-05, + "loss": 2.3736, + "step": 3069000 + }, + { + "epoch": 15.21, + "learning_rate": 4.2399062637792744e-05, + "loss": 2.3926, + "step": 3069500 + }, + { + "epoch": 15.21, + "learning_rate": 4.239782405136666e-05, + "loss": 2.3844, + "step": 3070000 + }, + { + "epoch": 15.21, + "learning_rate": 4.239658546494057e-05, + "loss": 2.3614, + "step": 3070500 + }, + { + "epoch": 15.21, + "learning_rate": 4.239534687851449e-05, + "loss": 2.3672, + "step": 3071000 + }, + { + "epoch": 15.22, + "learning_rate": 4.2394108292088405e-05, + "loss": 2.3647, + "step": 3071500 + }, + { + "epoch": 15.22, + "learning_rate": 4.239286970566232e-05, + "loss": 2.3832, + "step": 3072000 + }, + { + "epoch": 15.22, + "learning_rate": 4.239163111923624e-05, + "loss": 2.3962, + "step": 3072500 + }, + { + "epoch": 15.22, + "learning_rate": 4.239039500998301e-05, + "loss": 2.361, + "step": 3073000 + }, + { + "epoch": 15.23, + "learning_rate": 4.238915890072978e-05, + "loss": 2.3841, + "step": 3073500 + }, + { + "epoch": 15.23, + "learning_rate": 4.2387920314303694e-05, + "loss": 2.3729, + "step": 3074000 + }, + { + "epoch": 15.23, + "learning_rate": 4.238668172787761e-05, + "loss": 2.3962, + "step": 3074500 + }, + { + "epoch": 15.23, + "learning_rate": 4.238544314145153e-05, + "loss": 2.3965, + "step": 3075000 + }, + { + "epoch": 15.24, + "learning_rate": 4.2384204555025444e-05, + "loss": 2.3928, + "step": 3075500 + }, + { + "epoch": 15.24, + "learning_rate": 4.238296596859936e-05, + "loss": 2.3878, + "step": 3076000 + }, + { + "epoch": 15.24, + "learning_rate": 4.238172985934612e-05, + "loss": 2.3677, + "step": 3076500 + }, + { + "epoch": 15.24, + "learning_rate": 4.238049127292004e-05, + "loss": 2.3718, + "step": 3077000 + }, + { + "epoch": 15.25, + "learning_rate": 4.237925268649396e-05, + "loss": 2.3981, + "step": 3077500 + }, + { + "epoch": 15.25, + "learning_rate": 4.2378014100067874e-05, + "loss": 2.3709, + "step": 3078000 + }, + { + "epoch": 15.25, + "learning_rate": 4.237677551364179e-05, + "loss": 2.4063, + "step": 3078500 + }, + { + "epoch": 15.25, + "learning_rate": 4.237553940438856e-05, + "loss": 2.3766, + "step": 3079000 + }, + { + "epoch": 15.26, + "learning_rate": 4.237430081796248e-05, + "loss": 2.3836, + "step": 3079500 + }, + { + "epoch": 15.26, + "learning_rate": 4.2373064708709246e-05, + "loss": 2.3741, + "step": 3080000 + }, + { + "epoch": 15.26, + "learning_rate": 4.237182612228316e-05, + "loss": 2.3866, + "step": 3080500 + }, + { + "epoch": 15.26, + "learning_rate": 4.237058753585708e-05, + "loss": 2.3761, + "step": 3081000 + }, + { + "epoch": 15.27, + "learning_rate": 4.2369348949430996e-05, + "loss": 2.3606, + "step": 3081500 + }, + { + "epoch": 15.27, + "learning_rate": 4.2368110363004907e-05, + "loss": 2.3544, + "step": 3082000 + }, + { + "epoch": 15.27, + "learning_rate": 4.2366871776578824e-05, + "loss": 2.3828, + "step": 3082500 + }, + { + "epoch": 15.27, + "learning_rate": 4.236563319015274e-05, + "loss": 2.3494, + "step": 3083000 + }, + { + "epoch": 15.28, + "learning_rate": 4.236439460372666e-05, + "loss": 2.3749, + "step": 3083500 + }, + { + "epoch": 15.28, + "learning_rate": 4.2363156017300574e-05, + "loss": 2.384, + "step": 3084000 + }, + { + "epoch": 15.28, + "learning_rate": 4.236191990804735e-05, + "loss": 2.3721, + "step": 3084500 + }, + { + "epoch": 15.28, + "learning_rate": 4.236068132162126e-05, + "loss": 2.367, + "step": 3085000 + }, + { + "epoch": 15.29, + "learning_rate": 4.235944273519518e-05, + "loss": 2.3508, + "step": 3085500 + }, + { + "epoch": 15.29, + "learning_rate": 4.2358204148769094e-05, + "loss": 2.3788, + "step": 3086000 + }, + { + "epoch": 15.29, + "learning_rate": 4.235696556234301e-05, + "loss": 2.3785, + "step": 3086500 + }, + { + "epoch": 15.29, + "learning_rate": 4.235572697591693e-05, + "loss": 2.3908, + "step": 3087000 + }, + { + "epoch": 15.3, + "learning_rate": 4.2354488389490845e-05, + "loss": 2.3881, + "step": 3087500 + }, + { + "epoch": 15.3, + "learning_rate": 4.235324980306476e-05, + "loss": 2.3729, + "step": 3088000 + }, + { + "epoch": 15.3, + "learning_rate": 4.2352013693811524e-05, + "loss": 2.3611, + "step": 3088500 + }, + { + "epoch": 15.3, + "learning_rate": 4.235077510738544e-05, + "loss": 2.3854, + "step": 3089000 + }, + { + "epoch": 15.31, + "learning_rate": 4.2349538998132216e-05, + "loss": 2.3698, + "step": 3089500 + }, + { + "epoch": 15.31, + "learning_rate": 4.234830041170613e-05, + "loss": 2.3766, + "step": 3090000 + }, + { + "epoch": 15.31, + "learning_rate": 4.234706182528005e-05, + "loss": 2.366, + "step": 3090500 + }, + { + "epoch": 15.31, + "learning_rate": 4.234582323885397e-05, + "loss": 2.3723, + "step": 3091000 + }, + { + "epoch": 15.32, + "learning_rate": 4.234458465242788e-05, + "loss": 2.378, + "step": 3091500 + }, + { + "epoch": 15.32, + "learning_rate": 4.2343346066001794e-05, + "loss": 2.3886, + "step": 3092000 + }, + { + "epoch": 15.32, + "learning_rate": 4.234210747957571e-05, + "loss": 2.3767, + "step": 3092500 + }, + { + "epoch": 15.32, + "learning_rate": 4.234087137032248e-05, + "loss": 2.3718, + "step": 3093000 + }, + { + "epoch": 15.33, + "learning_rate": 4.23396327838964e-05, + "loss": 2.3711, + "step": 3093500 + }, + { + "epoch": 15.33, + "learning_rate": 4.2338394197470314e-05, + "loss": 2.3911, + "step": 3094000 + }, + { + "epoch": 15.33, + "learning_rate": 4.2337155611044224e-05, + "loss": 2.3506, + "step": 3094500 + }, + { + "epoch": 15.33, + "learning_rate": 4.2335919501791e-05, + "loss": 2.3763, + "step": 3095000 + }, + { + "epoch": 15.34, + "learning_rate": 4.2334680915364916e-05, + "loss": 2.3951, + "step": 3095500 + }, + { + "epoch": 15.34, + "learning_rate": 4.233344232893883e-05, + "loss": 2.3904, + "step": 3096000 + }, + { + "epoch": 15.34, + "learning_rate": 4.233220374251275e-05, + "loss": 2.3658, + "step": 3096500 + }, + { + "epoch": 15.34, + "learning_rate": 4.233096515608667e-05, + "loss": 2.3671, + "step": 3097000 + }, + { + "epoch": 15.35, + "learning_rate": 4.232972656966058e-05, + "loss": 2.3921, + "step": 3097500 + }, + { + "epoch": 15.35, + "learning_rate": 4.2328487983234494e-05, + "loss": 2.3858, + "step": 3098000 + }, + { + "epoch": 15.35, + "learning_rate": 4.232724939680841e-05, + "loss": 2.3708, + "step": 3098500 + }, + { + "epoch": 15.35, + "learning_rate": 4.232601081038233e-05, + "loss": 2.3545, + "step": 3099000 + }, + { + "epoch": 15.36, + "learning_rate": 4.2324772223956245e-05, + "loss": 2.3699, + "step": 3099500 + }, + { + "epoch": 15.36, + "learning_rate": 4.232353363753016e-05, + "loss": 2.3685, + "step": 3100000 + }, + { + "epoch": 15.36, + "learning_rate": 4.232229505110408e-05, + "loss": 2.3597, + "step": 3100500 + }, + { + "epoch": 15.36, + "learning_rate": 4.2321056464677996e-05, + "loss": 2.4052, + "step": 3101000 + }, + { + "epoch": 15.37, + "learning_rate": 4.231981787825191e-05, + "loss": 2.3693, + "step": 3101500 + }, + { + "epoch": 15.37, + "learning_rate": 4.231857929182583e-05, + "loss": 2.3889, + "step": 3102000 + }, + { + "epoch": 15.37, + "learning_rate": 4.2317340705399746e-05, + "loss": 2.3852, + "step": 3102500 + }, + { + "epoch": 15.37, + "learning_rate": 4.2316102118973663e-05, + "loss": 2.3749, + "step": 3103000 + }, + { + "epoch": 15.38, + "learning_rate": 4.2314866009720425e-05, + "loss": 2.3614, + "step": 3103500 + }, + { + "epoch": 15.38, + "learning_rate": 4.231362742329434e-05, + "loss": 2.3763, + "step": 3104000 + }, + { + "epoch": 15.38, + "learning_rate": 4.231238883686826e-05, + "loss": 2.3818, + "step": 3104500 + }, + { + "epoch": 15.38, + "learning_rate": 4.2311150250442176e-05, + "loss": 2.3593, + "step": 3105000 + }, + { + "epoch": 15.39, + "learning_rate": 4.2309914141188945e-05, + "loss": 2.3944, + "step": 3105500 + }, + { + "epoch": 15.39, + "learning_rate": 4.2308678031935714e-05, + "loss": 2.3794, + "step": 3106000 + }, + { + "epoch": 15.39, + "learning_rate": 4.230743944550963e-05, + "loss": 2.3482, + "step": 3106500 + }, + { + "epoch": 15.39, + "learning_rate": 4.230620085908355e-05, + "loss": 2.3852, + "step": 3107000 + }, + { + "epoch": 15.4, + "learning_rate": 4.2304962272657465e-05, + "loss": 2.3996, + "step": 3107500 + }, + { + "epoch": 15.4, + "learning_rate": 4.2303723686231375e-05, + "loss": 2.3387, + "step": 3108000 + }, + { + "epoch": 15.4, + "learning_rate": 4.230248509980529e-05, + "loss": 2.3473, + "step": 3108500 + }, + { + "epoch": 15.4, + "learning_rate": 4.230124899055207e-05, + "loss": 2.3551, + "step": 3109000 + }, + { + "epoch": 15.41, + "learning_rate": 4.2300012881298836e-05, + "loss": 2.3676, + "step": 3109500 + }, + { + "epoch": 15.41, + "learning_rate": 4.229877429487275e-05, + "loss": 2.3606, + "step": 3110000 + }, + { + "epoch": 15.41, + "learning_rate": 4.229753570844667e-05, + "loss": 2.3807, + "step": 3110500 + }, + { + "epoch": 15.41, + "learning_rate": 4.229629712202059e-05, + "loss": 2.3688, + "step": 3111000 + }, + { + "epoch": 15.42, + "learning_rate": 4.2295058535594504e-05, + "loss": 2.4027, + "step": 3111500 + }, + { + "epoch": 15.42, + "learning_rate": 4.2293819949168414e-05, + "loss": 2.3676, + "step": 3112000 + }, + { + "epoch": 15.42, + "learning_rate": 4.229258136274233e-05, + "loss": 2.3845, + "step": 3112500 + }, + { + "epoch": 15.42, + "learning_rate": 4.229134277631625e-05, + "loss": 2.3844, + "step": 3113000 + }, + { + "epoch": 15.43, + "learning_rate": 4.2290104189890165e-05, + "loss": 2.3842, + "step": 3113500 + }, + { + "epoch": 15.43, + "learning_rate": 4.2288868080636934e-05, + "loss": 2.3777, + "step": 3114000 + }, + { + "epoch": 15.43, + "learning_rate": 4.228762949421085e-05, + "loss": 2.3698, + "step": 3114500 + }, + { + "epoch": 15.43, + "learning_rate": 4.228639090778477e-05, + "loss": 2.3828, + "step": 3115000 + }, + { + "epoch": 15.44, + "learning_rate": 4.2285154798531536e-05, + "loss": 2.3369, + "step": 3115500 + }, + { + "epoch": 15.44, + "learning_rate": 4.228391621210545e-05, + "loss": 2.3914, + "step": 3116000 + }, + { + "epoch": 15.44, + "learning_rate": 4.228267762567937e-05, + "loss": 2.3863, + "step": 3116500 + }, + { + "epoch": 15.44, + "learning_rate": 4.228144151642613e-05, + "loss": 2.3908, + "step": 3117000 + }, + { + "epoch": 15.45, + "learning_rate": 4.228020293000005e-05, + "loss": 2.3652, + "step": 3117500 + }, + { + "epoch": 15.45, + "learning_rate": 4.2278964343573966e-05, + "loss": 2.3764, + "step": 3118000 + }, + { + "epoch": 15.45, + "learning_rate": 4.227772575714788e-05, + "loss": 2.4005, + "step": 3118500 + }, + { + "epoch": 15.45, + "learning_rate": 4.22764871707218e-05, + "loss": 2.3834, + "step": 3119000 + }, + { + "epoch": 15.46, + "learning_rate": 4.227525106146857e-05, + "loss": 2.3881, + "step": 3119500 + }, + { + "epoch": 15.46, + "learning_rate": 4.2274012475042486e-05, + "loss": 2.3676, + "step": 3120000 + }, + { + "epoch": 15.46, + "learning_rate": 4.22727738886164e-05, + "loss": 2.3671, + "step": 3120500 + }, + { + "epoch": 15.46, + "learning_rate": 4.227153530219032e-05, + "loss": 2.3702, + "step": 3121000 + }, + { + "epoch": 15.46, + "learning_rate": 4.2270296715764236e-05, + "loss": 2.3961, + "step": 3121500 + }, + { + "epoch": 15.47, + "learning_rate": 4.226905812933815e-05, + "loss": 2.3922, + "step": 3122000 + }, + { + "epoch": 15.47, + "learning_rate": 4.226781954291207e-05, + "loss": 2.3649, + "step": 3122500 + }, + { + "epoch": 15.47, + "learning_rate": 4.226658095648599e-05, + "loss": 2.3649, + "step": 3123000 + }, + { + "epoch": 15.47, + "learning_rate": 4.2265342370059904e-05, + "loss": 2.393, + "step": 3123500 + }, + { + "epoch": 15.48, + "learning_rate": 4.226410378363382e-05, + "loss": 2.3866, + "step": 3124000 + }, + { + "epoch": 15.48, + "learning_rate": 4.226286519720774e-05, + "loss": 2.3817, + "step": 3124500 + }, + { + "epoch": 15.48, + "learning_rate": 4.2261626610781655e-05, + "loss": 2.3549, + "step": 3125000 + }, + { + "epoch": 15.48, + "learning_rate": 4.2260388024355565e-05, + "loss": 2.3664, + "step": 3125500 + }, + { + "epoch": 15.49, + "learning_rate": 4.225914943792948e-05, + "loss": 2.3663, + "step": 3126000 + }, + { + "epoch": 15.49, + "learning_rate": 4.22579158058491e-05, + "loss": 2.3709, + "step": 3126500 + }, + { + "epoch": 15.49, + "learning_rate": 4.225667721942302e-05, + "loss": 2.368, + "step": 3127000 + }, + { + "epoch": 15.49, + "learning_rate": 4.2255438632996936e-05, + "loss": 2.3716, + "step": 3127500 + }, + { + "epoch": 15.5, + "learning_rate": 4.2254200046570853e-05, + "loss": 2.4181, + "step": 3128000 + }, + { + "epoch": 15.5, + "learning_rate": 4.225296146014477e-05, + "loss": 2.3836, + "step": 3128500 + }, + { + "epoch": 15.5, + "learning_rate": 4.225172287371869e-05, + "loss": 2.3661, + "step": 3129000 + }, + { + "epoch": 15.5, + "learning_rate": 4.225048676446545e-05, + "loss": 2.3753, + "step": 3129500 + }, + { + "epoch": 15.51, + "learning_rate": 4.2249248178039366e-05, + "loss": 2.3983, + "step": 3130000 + }, + { + "epoch": 15.51, + "learning_rate": 4.224800959161328e-05, + "loss": 2.3943, + "step": 3130500 + }, + { + "epoch": 15.51, + "learning_rate": 4.22467710051872e-05, + "loss": 2.3889, + "step": 3131000 + }, + { + "epoch": 15.51, + "learning_rate": 4.224553241876112e-05, + "loss": 2.3742, + "step": 3131500 + }, + { + "epoch": 15.52, + "learning_rate": 4.2244293832335034e-05, + "loss": 2.3783, + "step": 3132000 + }, + { + "epoch": 15.52, + "learning_rate": 4.224305524590895e-05, + "loss": 2.4068, + "step": 3132500 + }, + { + "epoch": 15.52, + "learning_rate": 4.224181665948287e-05, + "loss": 2.3924, + "step": 3133000 + }, + { + "epoch": 15.52, + "learning_rate": 4.2240580550229637e-05, + "loss": 2.386, + "step": 3133500 + }, + { + "epoch": 15.53, + "learning_rate": 4.2239341963803554e-05, + "loss": 2.3976, + "step": 3134000 + }, + { + "epoch": 15.53, + "learning_rate": 4.223810337737747e-05, + "loss": 2.369, + "step": 3134500 + }, + { + "epoch": 15.53, + "learning_rate": 4.223686479095139e-05, + "loss": 2.3854, + "step": 3135000 + }, + { + "epoch": 15.53, + "learning_rate": 4.2235626204525304e-05, + "loss": 2.392, + "step": 3135500 + }, + { + "epoch": 15.54, + "learning_rate": 4.223438761809922e-05, + "loss": 2.3792, + "step": 3136000 + }, + { + "epoch": 15.54, + "learning_rate": 4.223314903167314e-05, + "loss": 2.357, + "step": 3136500 + }, + { + "epoch": 15.54, + "learning_rate": 4.2231910445247055e-05, + "loss": 2.3669, + "step": 3137000 + }, + { + "epoch": 15.54, + "learning_rate": 4.223067185882097e-05, + "loss": 2.3775, + "step": 3137500 + }, + { + "epoch": 15.55, + "learning_rate": 4.2229435749567734e-05, + "loss": 2.3768, + "step": 3138000 + }, + { + "epoch": 15.55, + "learning_rate": 4.222819716314165e-05, + "loss": 2.3868, + "step": 3138500 + }, + { + "epoch": 15.55, + "learning_rate": 4.222695857671557e-05, + "loss": 2.3805, + "step": 3139000 + }, + { + "epoch": 15.55, + "learning_rate": 4.2225719990289485e-05, + "loss": 2.3855, + "step": 3139500 + }, + { + "epoch": 15.56, + "learning_rate": 4.22244814038634e-05, + "loss": 2.364, + "step": 3140000 + }, + { + "epoch": 15.56, + "learning_rate": 4.222324281743732e-05, + "loss": 2.3673, + "step": 3140500 + }, + { + "epoch": 15.56, + "learning_rate": 4.222200670818409e-05, + "loss": 2.3848, + "step": 3141000 + }, + { + "epoch": 15.56, + "learning_rate": 4.2220768121758004e-05, + "loss": 2.3857, + "step": 3141500 + }, + { + "epoch": 15.57, + "learning_rate": 4.221953201250477e-05, + "loss": 2.3872, + "step": 3142000 + }, + { + "epoch": 15.57, + "learning_rate": 4.2218293426078683e-05, + "loss": 2.3764, + "step": 3142500 + }, + { + "epoch": 15.57, + "learning_rate": 4.22170548396526e-05, + "loss": 2.3927, + "step": 3143000 + }, + { + "epoch": 15.57, + "learning_rate": 4.221581625322652e-05, + "loss": 2.3675, + "step": 3143500 + }, + { + "epoch": 15.58, + "learning_rate": 4.2214577666800434e-05, + "loss": 2.3898, + "step": 3144000 + }, + { + "epoch": 15.58, + "learning_rate": 4.221333908037435e-05, + "loss": 2.3677, + "step": 3144500 + }, + { + "epoch": 15.58, + "learning_rate": 4.221210049394827e-05, + "loss": 2.4042, + "step": 3145000 + }, + { + "epoch": 15.58, + "learning_rate": 4.2210861907522185e-05, + "loss": 2.3829, + "step": 3145500 + }, + { + "epoch": 15.59, + "learning_rate": 4.2209625798268954e-05, + "loss": 2.3929, + "step": 3146000 + }, + { + "epoch": 15.59, + "learning_rate": 4.220838721184287e-05, + "loss": 2.3803, + "step": 3146500 + }, + { + "epoch": 15.59, + "learning_rate": 4.220714862541679e-05, + "loss": 2.3651, + "step": 3147000 + }, + { + "epoch": 15.59, + "learning_rate": 4.2205910038990705e-05, + "loss": 2.4064, + "step": 3147500 + }, + { + "epoch": 15.6, + "learning_rate": 4.220467392973747e-05, + "loss": 2.3791, + "step": 3148000 + }, + { + "epoch": 15.6, + "learning_rate": 4.220343534331139e-05, + "loss": 2.402, + "step": 3148500 + }, + { + "epoch": 15.6, + "learning_rate": 4.22021967568853e-05, + "loss": 2.3714, + "step": 3149000 + }, + { + "epoch": 15.6, + "learning_rate": 4.220095817045922e-05, + "loss": 2.3842, + "step": 3149500 + }, + { + "epoch": 15.61, + "learning_rate": 4.2199719584033134e-05, + "loss": 2.3855, + "step": 3150000 + }, + { + "epoch": 15.61, + "learning_rate": 4.21984834747799e-05, + "loss": 2.4002, + "step": 3150500 + }, + { + "epoch": 15.61, + "learning_rate": 4.219724488835382e-05, + "loss": 2.3798, + "step": 3151000 + }, + { + "epoch": 15.61, + "learning_rate": 4.219600630192774e-05, + "loss": 2.3679, + "step": 3151500 + }, + { + "epoch": 15.62, + "learning_rate": 4.2194770192674506e-05, + "loss": 2.3694, + "step": 3152000 + }, + { + "epoch": 15.62, + "learning_rate": 4.219353160624842e-05, + "loss": 2.3997, + "step": 3152500 + }, + { + "epoch": 15.62, + "learning_rate": 4.219229301982234e-05, + "loss": 2.3808, + "step": 3153000 + }, + { + "epoch": 15.62, + "learning_rate": 4.2191054433396257e-05, + "loss": 2.3822, + "step": 3153500 + }, + { + "epoch": 15.63, + "learning_rate": 4.2189815846970173e-05, + "loss": 2.3799, + "step": 3154000 + }, + { + "epoch": 15.63, + "learning_rate": 4.218857726054409e-05, + "loss": 2.3874, + "step": 3154500 + }, + { + "epoch": 15.63, + "learning_rate": 4.218733867411801e-05, + "loss": 2.387, + "step": 3155000 + }, + { + "epoch": 15.63, + "learning_rate": 4.2186100087691924e-05, + "loss": 2.3953, + "step": 3155500 + }, + { + "epoch": 15.64, + "learning_rate": 4.2184861501265834e-05, + "loss": 2.3654, + "step": 3156000 + }, + { + "epoch": 15.64, + "learning_rate": 4.218362291483975e-05, + "loss": 2.3823, + "step": 3156500 + }, + { + "epoch": 15.64, + "learning_rate": 4.218238432841367e-05, + "loss": 2.3889, + "step": 3157000 + }, + { + "epoch": 15.64, + "learning_rate": 4.218114821916044e-05, + "loss": 2.3922, + "step": 3157500 + }, + { + "epoch": 15.65, + "learning_rate": 4.2179909632734354e-05, + "loss": 2.395, + "step": 3158000 + }, + { + "epoch": 15.65, + "learning_rate": 4.217867352348112e-05, + "loss": 2.3761, + "step": 3158500 + }, + { + "epoch": 15.65, + "learning_rate": 4.217743493705504e-05, + "loss": 2.4031, + "step": 3159000 + }, + { + "epoch": 15.65, + "learning_rate": 4.217619635062896e-05, + "loss": 2.3753, + "step": 3159500 + }, + { + "epoch": 15.66, + "learning_rate": 4.2174957764202874e-05, + "loss": 2.3735, + "step": 3160000 + }, + { + "epoch": 15.66, + "learning_rate": 4.217371917777679e-05, + "loss": 2.3785, + "step": 3160500 + }, + { + "epoch": 15.66, + "learning_rate": 4.217248059135071e-05, + "loss": 2.3862, + "step": 3161000 + }, + { + "epoch": 15.66, + "learning_rate": 4.217124448209747e-05, + "loss": 2.3566, + "step": 3161500 + }, + { + "epoch": 15.67, + "learning_rate": 4.2170008372844245e-05, + "loss": 2.3831, + "step": 3162000 + }, + { + "epoch": 15.67, + "learning_rate": 4.216876978641816e-05, + "loss": 2.3836, + "step": 3162500 + }, + { + "epoch": 15.67, + "learning_rate": 4.216753119999208e-05, + "loss": 2.3862, + "step": 3163000 + }, + { + "epoch": 15.67, + "learning_rate": 4.2166292613565996e-05, + "loss": 2.3876, + "step": 3163500 + }, + { + "epoch": 15.68, + "learning_rate": 4.2165054027139906e-05, + "loss": 2.3782, + "step": 3164000 + }, + { + "epoch": 15.68, + "learning_rate": 4.216381544071382e-05, + "loss": 2.3743, + "step": 3164500 + }, + { + "epoch": 15.68, + "learning_rate": 4.216257685428774e-05, + "loss": 2.3789, + "step": 3165000 + }, + { + "epoch": 15.68, + "learning_rate": 4.216133826786166e-05, + "loss": 2.3919, + "step": 3165500 + }, + { + "epoch": 15.69, + "learning_rate": 4.2160099681435574e-05, + "loss": 2.3701, + "step": 3166000 + }, + { + "epoch": 15.69, + "learning_rate": 4.215886109500949e-05, + "loss": 2.3719, + "step": 3166500 + }, + { + "epoch": 15.69, + "learning_rate": 4.215762250858341e-05, + "loss": 2.3928, + "step": 3167000 + }, + { + "epoch": 15.69, + "learning_rate": 4.2156383922157325e-05, + "loss": 2.3984, + "step": 3167500 + }, + { + "epoch": 15.7, + "learning_rate": 4.215514533573124e-05, + "loss": 2.3644, + "step": 3168000 + }, + { + "epoch": 15.7, + "learning_rate": 4.215390674930516e-05, + "loss": 2.3855, + "step": 3168500 + }, + { + "epoch": 15.7, + "learning_rate": 4.2152668162879075e-05, + "loss": 2.3751, + "step": 3169000 + }, + { + "epoch": 15.7, + "learning_rate": 4.2151429576452985e-05, + "loss": 2.3593, + "step": 3169500 + }, + { + "epoch": 15.71, + "learning_rate": 4.2150193467199754e-05, + "loss": 2.3574, + "step": 3170000 + }, + { + "epoch": 15.71, + "learning_rate": 4.214895488077367e-05, + "loss": 2.3706, + "step": 3170500 + }, + { + "epoch": 15.71, + "learning_rate": 4.214771629434759e-05, + "loss": 2.3809, + "step": 3171000 + }, + { + "epoch": 15.71, + "learning_rate": 4.214648018509436e-05, + "loss": 2.3843, + "step": 3171500 + }, + { + "epoch": 15.72, + "learning_rate": 4.2145241598668274e-05, + "loss": 2.3904, + "step": 3172000 + }, + { + "epoch": 15.72, + "learning_rate": 4.214400301224219e-05, + "loss": 2.38, + "step": 3172500 + }, + { + "epoch": 15.72, + "learning_rate": 4.214276442581611e-05, + "loss": 2.3769, + "step": 3173000 + }, + { + "epoch": 15.72, + "learning_rate": 4.2141525839390025e-05, + "loss": 2.3672, + "step": 3173500 + }, + { + "epoch": 15.73, + "learning_rate": 4.214028725296394e-05, + "loss": 2.4197, + "step": 3174000 + }, + { + "epoch": 15.73, + "learning_rate": 4.213904866653786e-05, + "loss": 2.3741, + "step": 3174500 + }, + { + "epoch": 15.73, + "learning_rate": 4.2137810080111775e-05, + "loss": 2.3853, + "step": 3175000 + }, + { + "epoch": 15.73, + "learning_rate": 4.213657149368569e-05, + "loss": 2.3921, + "step": 3175500 + }, + { + "epoch": 15.74, + "learning_rate": 4.213533290725961e-05, + "loss": 2.3486, + "step": 3176000 + }, + { + "epoch": 15.74, + "learning_rate": 4.213409679800637e-05, + "loss": 2.3594, + "step": 3176500 + }, + { + "epoch": 15.74, + "learning_rate": 4.213285821158029e-05, + "loss": 2.3819, + "step": 3177000 + }, + { + "epoch": 15.74, + "learning_rate": 4.2131619625154205e-05, + "loss": 2.3789, + "step": 3177500 + }, + { + "epoch": 15.74, + "learning_rate": 4.213038103872812e-05, + "loss": 2.3747, + "step": 3178000 + }, + { + "epoch": 15.75, + "learning_rate": 4.212914245230204e-05, + "loss": 2.3711, + "step": 3178500 + }, + { + "epoch": 15.75, + "learning_rate": 4.2127903865875956e-05, + "loss": 2.3928, + "step": 3179000 + }, + { + "epoch": 15.75, + "learning_rate": 4.2126667756622725e-05, + "loss": 2.3843, + "step": 3179500 + }, + { + "epoch": 15.75, + "learning_rate": 4.212543164736949e-05, + "loss": 2.4232, + "step": 3180000 + }, + { + "epoch": 15.76, + "learning_rate": 4.2124193060943404e-05, + "loss": 2.3803, + "step": 3180500 + }, + { + "epoch": 15.76, + "learning_rate": 4.212295447451732e-05, + "loss": 2.3913, + "step": 3181000 + }, + { + "epoch": 15.76, + "learning_rate": 4.212171588809124e-05, + "loss": 2.3512, + "step": 3181500 + }, + { + "epoch": 15.76, + "learning_rate": 4.2120477301665155e-05, + "loss": 2.3715, + "step": 3182000 + }, + { + "epoch": 15.77, + "learning_rate": 4.211923871523907e-05, + "loss": 2.3803, + "step": 3182500 + }, + { + "epoch": 15.77, + "learning_rate": 4.211800012881299e-05, + "loss": 2.3765, + "step": 3183000 + }, + { + "epoch": 15.77, + "learning_rate": 4.211676401955976e-05, + "loss": 2.3654, + "step": 3183500 + }, + { + "epoch": 15.77, + "learning_rate": 4.2115525433133674e-05, + "loss": 2.3893, + "step": 3184000 + }, + { + "epoch": 15.78, + "learning_rate": 4.211428684670759e-05, + "loss": 2.3841, + "step": 3184500 + }, + { + "epoch": 15.78, + "learning_rate": 4.211304826028151e-05, + "loss": 2.3901, + "step": 3185000 + }, + { + "epoch": 15.78, + "learning_rate": 4.2111809673855425e-05, + "loss": 2.3614, + "step": 3185500 + }, + { + "epoch": 15.78, + "learning_rate": 4.211057108742934e-05, + "loss": 2.3858, + "step": 3186000 + }, + { + "epoch": 15.79, + "learning_rate": 4.210933250100326e-05, + "loss": 2.3817, + "step": 3186500 + }, + { + "epoch": 15.79, + "learning_rate": 4.210809886892288e-05, + "loss": 2.3566, + "step": 3187000 + }, + { + "epoch": 15.79, + "learning_rate": 4.2106860282496796e-05, + "loss": 2.3557, + "step": 3187500 + }, + { + "epoch": 15.79, + "learning_rate": 4.210562169607071e-05, + "loss": 2.3717, + "step": 3188000 + }, + { + "epoch": 15.8, + "learning_rate": 4.210438310964463e-05, + "loss": 2.3864, + "step": 3188500 + }, + { + "epoch": 15.8, + "learning_rate": 4.210314452321854e-05, + "loss": 2.3699, + "step": 3189000 + }, + { + "epoch": 15.8, + "learning_rate": 4.210190593679246e-05, + "loss": 2.4, + "step": 3189500 + }, + { + "epoch": 15.8, + "learning_rate": 4.2100667350366374e-05, + "loss": 2.3853, + "step": 3190000 + }, + { + "epoch": 15.81, + "learning_rate": 4.209942876394029e-05, + "loss": 2.3952, + "step": 3190500 + }, + { + "epoch": 15.81, + "learning_rate": 4.209819017751421e-05, + "loss": 2.3948, + "step": 3191000 + }, + { + "epoch": 15.81, + "learning_rate": 4.2096951591088125e-05, + "loss": 2.3522, + "step": 3191500 + }, + { + "epoch": 15.81, + "learning_rate": 4.209571300466204e-05, + "loss": 2.3915, + "step": 3192000 + }, + { + "epoch": 15.82, + "learning_rate": 4.209447441823596e-05, + "loss": 2.3876, + "step": 3192500 + }, + { + "epoch": 15.82, + "learning_rate": 4.2093235831809876e-05, + "loss": 2.4019, + "step": 3193000 + }, + { + "epoch": 15.82, + "learning_rate": 4.209199724538379e-05, + "loss": 2.3881, + "step": 3193500 + }, + { + "epoch": 15.82, + "learning_rate": 4.209075865895771e-05, + "loss": 2.4025, + "step": 3194000 + }, + { + "epoch": 15.83, + "learning_rate": 4.208952254970447e-05, + "loss": 2.3753, + "step": 3194500 + }, + { + "epoch": 15.83, + "learning_rate": 4.208828644045124e-05, + "loss": 2.4118, + "step": 3195000 + }, + { + "epoch": 15.83, + "learning_rate": 4.208704785402516e-05, + "loss": 2.3851, + "step": 3195500 + }, + { + "epoch": 15.83, + "learning_rate": 4.2085809267599074e-05, + "loss": 2.3801, + "step": 3196000 + }, + { + "epoch": 15.84, + "learning_rate": 4.208457315834585e-05, + "loss": 2.394, + "step": 3196500 + }, + { + "epoch": 15.84, + "learning_rate": 4.208333457191977e-05, + "loss": 2.4099, + "step": 3197000 + }, + { + "epoch": 15.84, + "learning_rate": 4.208209598549368e-05, + "loss": 2.3875, + "step": 3197500 + }, + { + "epoch": 15.84, + "learning_rate": 4.2080857399067594e-05, + "loss": 2.3896, + "step": 3198000 + }, + { + "epoch": 15.85, + "learning_rate": 4.207961881264151e-05, + "loss": 2.3944, + "step": 3198500 + }, + { + "epoch": 15.85, + "learning_rate": 4.207838022621543e-05, + "loss": 2.3874, + "step": 3199000 + }, + { + "epoch": 15.85, + "learning_rate": 4.2077141639789345e-05, + "loss": 2.3929, + "step": 3199500 + }, + { + "epoch": 15.85, + "learning_rate": 4.2075903053363255e-05, + "loss": 2.3987, + "step": 3200000 + }, + { + "epoch": 15.86, + "learning_rate": 4.207466694411003e-05, + "loss": 2.3844, + "step": 3200500 + }, + { + "epoch": 15.86, + "learning_rate": 4.207342835768395e-05, + "loss": 2.3909, + "step": 3201000 + }, + { + "epoch": 15.86, + "learning_rate": 4.207218977125786e-05, + "loss": 2.3669, + "step": 3201500 + }, + { + "epoch": 15.86, + "learning_rate": 4.2070951184831774e-05, + "loss": 2.3722, + "step": 3202000 + }, + { + "epoch": 15.87, + "learning_rate": 4.206971259840569e-05, + "loss": 2.3922, + "step": 3202500 + }, + { + "epoch": 15.87, + "learning_rate": 4.206847401197961e-05, + "loss": 2.3647, + "step": 3203000 + }, + { + "epoch": 15.87, + "learning_rate": 4.2067235425553525e-05, + "loss": 2.3798, + "step": 3203500 + }, + { + "epoch": 15.87, + "learning_rate": 4.206599683912744e-05, + "loss": 2.3789, + "step": 3204000 + }, + { + "epoch": 15.88, + "learning_rate": 4.206475825270136e-05, + "loss": 2.3835, + "step": 3204500 + }, + { + "epoch": 15.88, + "learning_rate": 4.2063519666275276e-05, + "loss": 2.4027, + "step": 3205000 + }, + { + "epoch": 15.88, + "learning_rate": 4.206228107984919e-05, + "loss": 2.3981, + "step": 3205500 + }, + { + "epoch": 15.88, + "learning_rate": 4.206104249342311e-05, + "loss": 2.3786, + "step": 3206000 + }, + { + "epoch": 15.89, + "learning_rate": 4.205980390699703e-05, + "loss": 2.3917, + "step": 3206500 + }, + { + "epoch": 15.89, + "learning_rate": 4.2058565320570944e-05, + "loss": 2.381, + "step": 3207000 + }, + { + "epoch": 15.89, + "learning_rate": 4.2057329211317706e-05, + "loss": 2.4097, + "step": 3207500 + }, + { + "epoch": 15.89, + "learning_rate": 4.205609062489162e-05, + "loss": 2.37, + "step": 3208000 + }, + { + "epoch": 15.9, + "learning_rate": 4.205485203846554e-05, + "loss": 2.3886, + "step": 3208500 + }, + { + "epoch": 15.9, + "learning_rate": 4.2053613452039457e-05, + "loss": 2.3526, + "step": 3209000 + }, + { + "epoch": 15.9, + "learning_rate": 4.2052374865613374e-05, + "loss": 2.3746, + "step": 3209500 + }, + { + "epoch": 15.9, + "learning_rate": 4.205113875636014e-05, + "loss": 2.397, + "step": 3210000 + }, + { + "epoch": 15.91, + "learning_rate": 4.204990016993406e-05, + "loss": 2.3853, + "step": 3210500 + }, + { + "epoch": 15.91, + "learning_rate": 4.2048661583507976e-05, + "loss": 2.4041, + "step": 3211000 + }, + { + "epoch": 15.91, + "learning_rate": 4.204742299708189e-05, + "loss": 2.3686, + "step": 3211500 + }, + { + "epoch": 15.91, + "learning_rate": 4.204618441065581e-05, + "loss": 2.3785, + "step": 3212000 + }, + { + "epoch": 15.92, + "learning_rate": 4.204494582422973e-05, + "loss": 2.4122, + "step": 3212500 + }, + { + "epoch": 15.92, + "learning_rate": 4.2043707237803644e-05, + "loss": 2.3736, + "step": 3213000 + }, + { + "epoch": 15.92, + "learning_rate": 4.204246865137756e-05, + "loss": 2.3955, + "step": 3213500 + }, + { + "epoch": 15.92, + "learning_rate": 4.204123006495148e-05, + "loss": 2.4139, + "step": 3214000 + }, + { + "epoch": 15.93, + "learning_rate": 4.2039991478525395e-05, + "loss": 2.385, + "step": 3214500 + }, + { + "epoch": 15.93, + "learning_rate": 4.203875536927216e-05, + "loss": 2.3506, + "step": 3215000 + }, + { + "epoch": 15.93, + "learning_rate": 4.2037516782846074e-05, + "loss": 2.3841, + "step": 3215500 + }, + { + "epoch": 15.93, + "learning_rate": 4.203627819641999e-05, + "loss": 2.3781, + "step": 3216000 + }, + { + "epoch": 15.94, + "learning_rate": 4.203503960999391e-05, + "loss": 2.3828, + "step": 3216500 + }, + { + "epoch": 15.94, + "learning_rate": 4.2033801023567824e-05, + "loss": 2.402, + "step": 3217000 + }, + { + "epoch": 15.94, + "learning_rate": 4.203256243714174e-05, + "loss": 2.3728, + "step": 3217500 + }, + { + "epoch": 15.94, + "learning_rate": 4.203132632788851e-05, + "loss": 2.3832, + "step": 3218000 + }, + { + "epoch": 15.95, + "learning_rate": 4.203008774146243e-05, + "loss": 2.3773, + "step": 3218500 + }, + { + "epoch": 15.95, + "learning_rate": 4.2028849155036344e-05, + "loss": 2.3783, + "step": 3219000 + }, + { + "epoch": 15.95, + "learning_rate": 4.202761056861026e-05, + "loss": 2.401, + "step": 3219500 + }, + { + "epoch": 15.95, + "learning_rate": 4.202637445935702e-05, + "loss": 2.4113, + "step": 3220000 + }, + { + "epoch": 15.96, + "learning_rate": 4.202513587293094e-05, + "loss": 2.3905, + "step": 3220500 + }, + { + "epoch": 15.96, + "learning_rate": 4.202389728650486e-05, + "loss": 2.3798, + "step": 3221000 + }, + { + "epoch": 15.96, + "learning_rate": 4.2022658700078774e-05, + "loss": 2.3802, + "step": 3221500 + }, + { + "epoch": 15.96, + "learning_rate": 4.202142011365269e-05, + "loss": 2.3822, + "step": 3222000 + }, + { + "epoch": 15.97, + "learning_rate": 4.202018152722661e-05, + "loss": 2.3768, + "step": 3222500 + }, + { + "epoch": 15.97, + "learning_rate": 4.2018942940800525e-05, + "loss": 2.373, + "step": 3223000 + }, + { + "epoch": 15.97, + "learning_rate": 4.201770683154729e-05, + "loss": 2.3979, + "step": 3223500 + }, + { + "epoch": 15.97, + "learning_rate": 4.201646824512121e-05, + "loss": 2.4065, + "step": 3224000 + }, + { + "epoch": 15.98, + "learning_rate": 4.201522965869513e-05, + "loss": 2.3761, + "step": 3224500 + }, + { + "epoch": 15.98, + "learning_rate": 4.2013991072269044e-05, + "loss": 2.3741, + "step": 3225000 + }, + { + "epoch": 15.98, + "learning_rate": 4.201275248584296e-05, + "loss": 2.354, + "step": 3225500 + }, + { + "epoch": 15.98, + "learning_rate": 4.201151637658973e-05, + "loss": 2.3871, + "step": 3226000 + }, + { + "epoch": 15.99, + "learning_rate": 4.201027779016365e-05, + "loss": 2.3685, + "step": 3226500 + }, + { + "epoch": 15.99, + "learning_rate": 4.200903920373756e-05, + "loss": 2.3788, + "step": 3227000 + }, + { + "epoch": 15.99, + "learning_rate": 4.2007805571657184e-05, + "loss": 2.365, + "step": 3227500 + }, + { + "epoch": 15.99, + "learning_rate": 4.20065669852311e-05, + "loss": 2.3865, + "step": 3228000 + }, + { + "epoch": 16.0, + "learning_rate": 4.200532839880502e-05, + "loss": 2.3693, + "step": 3228500 + }, + { + "epoch": 16.0, + "learning_rate": 4.200409228955178e-05, + "loss": 2.3664, + "step": 3229000 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.6492868439902124, + "eval_accuracy_mlm": 0.6034055016175841, + "eval_accuracy_nsp": 0.8654450323385329, + "eval_loss": 2.379127025604248, + "eval_runtime": 145.9487, + "eval_samples_per_second": 1746.908, + "eval_steps_per_second": 72.793, + "step": 3229488 + }, + { + "epoch": 16.0, + "learning_rate": 4.20028537031257e-05, + "loss": 2.3991, + "step": 3229500 + }, + { + "epoch": 16.0, + "learning_rate": 4.2001615116699614e-05, + "loss": 2.3311, + "step": 3230000 + }, + { + "epoch": 16.01, + "learning_rate": 4.200037653027353e-05, + "loss": 2.379, + "step": 3230500 + }, + { + "epoch": 16.01, + "learning_rate": 4.199913794384745e-05, + "loss": 2.3483, + "step": 3231000 + }, + { + "epoch": 16.01, + "learning_rate": 4.1997899357421365e-05, + "loss": 2.3625, + "step": 3231500 + }, + { + "epoch": 16.01, + "learning_rate": 4.199666077099528e-05, + "loss": 2.3739, + "step": 3232000 + }, + { + "epoch": 16.01, + "learning_rate": 4.199542218456919e-05, + "loss": 2.3595, + "step": 3232500 + }, + { + "epoch": 16.02, + "learning_rate": 4.199418359814311e-05, + "loss": 2.3672, + "step": 3233000 + }, + { + "epoch": 16.02, + "learning_rate": 4.1992947488889885e-05, + "loss": 2.3369, + "step": 3233500 + }, + { + "epoch": 16.02, + "learning_rate": 4.19917089024638e-05, + "loss": 2.3675, + "step": 3234000 + }, + { + "epoch": 16.02, + "learning_rate": 4.199047031603772e-05, + "loss": 2.3456, + "step": 3234500 + }, + { + "epoch": 16.03, + "learning_rate": 4.1989231729611635e-05, + "loss": 2.345, + "step": 3235000 + }, + { + "epoch": 16.03, + "learning_rate": 4.1987993143185545e-05, + "loss": 2.3621, + "step": 3235500 + }, + { + "epoch": 16.03, + "learning_rate": 4.198675455675946e-05, + "loss": 2.3692, + "step": 3236000 + }, + { + "epoch": 16.03, + "learning_rate": 4.198551597033338e-05, + "loss": 2.3583, + "step": 3236500 + }, + { + "epoch": 16.04, + "learning_rate": 4.1984277383907296e-05, + "loss": 2.3604, + "step": 3237000 + }, + { + "epoch": 16.04, + "learning_rate": 4.198303879748121e-05, + "loss": 2.3367, + "step": 3237500 + }, + { + "epoch": 16.04, + "learning_rate": 4.198180268822798e-05, + "loss": 2.3517, + "step": 3238000 + }, + { + "epoch": 16.04, + "learning_rate": 4.19805641018019e-05, + "loss": 2.3562, + "step": 3238500 + }, + { + "epoch": 16.05, + "learning_rate": 4.197932551537581e-05, + "loss": 2.3516, + "step": 3239000 + }, + { + "epoch": 16.05, + "learning_rate": 4.1978086928949726e-05, + "loss": 2.3629, + "step": 3239500 + }, + { + "epoch": 16.05, + "learning_rate": 4.197684834252364e-05, + "loss": 2.3508, + "step": 3240000 + }, + { + "epoch": 16.05, + "learning_rate": 4.197560975609756e-05, + "loss": 2.3546, + "step": 3240500 + }, + { + "epoch": 16.06, + "learning_rate": 4.1974373646844335e-05, + "loss": 2.3317, + "step": 3241000 + }, + { + "epoch": 16.06, + "learning_rate": 4.19731375375911e-05, + "loss": 2.3723, + "step": 3241500 + }, + { + "epoch": 16.06, + "learning_rate": 4.1971898951165014e-05, + "loss": 2.3662, + "step": 3242000 + }, + { + "epoch": 16.06, + "learning_rate": 4.197066036473893e-05, + "loss": 2.3793, + "step": 3242500 + }, + { + "epoch": 16.07, + "learning_rate": 4.196942177831285e-05, + "loss": 2.3432, + "step": 3243000 + }, + { + "epoch": 16.07, + "learning_rate": 4.196818566905962e-05, + "loss": 2.357, + "step": 3243500 + }, + { + "epoch": 16.07, + "learning_rate": 4.1966947082633534e-05, + "loss": 2.3268, + "step": 3244000 + }, + { + "epoch": 16.07, + "learning_rate": 4.196570849620745e-05, + "loss": 2.3552, + "step": 3244500 + }, + { + "epoch": 16.08, + "learning_rate": 4.196446990978137e-05, + "loss": 2.3761, + "step": 3245000 + }, + { + "epoch": 16.08, + "learning_rate": 4.1963231323355285e-05, + "loss": 2.3717, + "step": 3245500 + }, + { + "epoch": 16.08, + "learning_rate": 4.19619927369292e-05, + "loss": 2.3475, + "step": 3246000 + }, + { + "epoch": 16.08, + "learning_rate": 4.196075415050312e-05, + "loss": 2.3493, + "step": 3246500 + }, + { + "epoch": 16.09, + "learning_rate": 4.1959515564077036e-05, + "loss": 2.3632, + "step": 3247000 + }, + { + "epoch": 16.09, + "learning_rate": 4.1958279454823804e-05, + "loss": 2.3624, + "step": 3247500 + }, + { + "epoch": 16.09, + "learning_rate": 4.1957040868397715e-05, + "loss": 2.3572, + "step": 3248000 + }, + { + "epoch": 16.09, + "learning_rate": 4.195580228197163e-05, + "loss": 2.3751, + "step": 3248500 + }, + { + "epoch": 16.1, + "learning_rate": 4.195456369554555e-05, + "loss": 2.3589, + "step": 3249000 + }, + { + "epoch": 16.1, + "learning_rate": 4.195332758629232e-05, + "loss": 2.3459, + "step": 3249500 + }, + { + "epoch": 16.1, + "learning_rate": 4.1952088999866234e-05, + "loss": 2.3791, + "step": 3250000 + }, + { + "epoch": 16.1, + "learning_rate": 4.195085041344015e-05, + "loss": 2.3528, + "step": 3250500 + }, + { + "epoch": 16.11, + "learning_rate": 4.194961182701407e-05, + "loss": 2.3381, + "step": 3251000 + }, + { + "epoch": 16.11, + "learning_rate": 4.1948373240587985e-05, + "loss": 2.3431, + "step": 3251500 + }, + { + "epoch": 16.11, + "learning_rate": 4.19471346541619e-05, + "loss": 2.3478, + "step": 3252000 + }, + { + "epoch": 16.11, + "learning_rate": 4.194589606773582e-05, + "loss": 2.3681, + "step": 3252500 + }, + { + "epoch": 16.12, + "learning_rate": 4.1944657481309736e-05, + "loss": 2.3455, + "step": 3253000 + }, + { + "epoch": 16.12, + "learning_rate": 4.194341889488365e-05, + "loss": 2.3379, + "step": 3253500 + }, + { + "epoch": 16.12, + "learning_rate": 4.194218030845757e-05, + "loss": 2.3439, + "step": 3254000 + }, + { + "epoch": 16.12, + "learning_rate": 4.194094172203148e-05, + "loss": 2.3919, + "step": 3254500 + }, + { + "epoch": 16.13, + "learning_rate": 4.193970561277825e-05, + "loss": 2.3615, + "step": 3255000 + }, + { + "epoch": 16.13, + "learning_rate": 4.1938467026352165e-05, + "loss": 2.3455, + "step": 3255500 + }, + { + "epoch": 16.13, + "learning_rate": 4.193722843992608e-05, + "loss": 2.3646, + "step": 3256000 + }, + { + "epoch": 16.13, + "learning_rate": 4.19359898535e-05, + "loss": 2.3457, + "step": 3256500 + }, + { + "epoch": 16.14, + "learning_rate": 4.1934751267073916e-05, + "loss": 2.364, + "step": 3257000 + }, + { + "epoch": 16.14, + "learning_rate": 4.1933512680647826e-05, + "loss": 2.3615, + "step": 3257500 + }, + { + "epoch": 16.14, + "learning_rate": 4.193227409422174e-05, + "loss": 2.3757, + "step": 3258000 + }, + { + "epoch": 16.14, + "learning_rate": 4.193104046214137e-05, + "loss": 2.3399, + "step": 3258500 + }, + { + "epoch": 16.15, + "learning_rate": 4.192980187571529e-05, + "loss": 2.3593, + "step": 3259000 + }, + { + "epoch": 16.15, + "learning_rate": 4.192856576646205e-05, + "loss": 2.378, + "step": 3259500 + }, + { + "epoch": 16.15, + "learning_rate": 4.192732718003597e-05, + "loss": 2.362, + "step": 3260000 + }, + { + "epoch": 16.15, + "learning_rate": 4.1926088593609884e-05, + "loss": 2.381, + "step": 3260500 + }, + { + "epoch": 16.16, + "learning_rate": 4.19248500071838e-05, + "loss": 2.3708, + "step": 3261000 + }, + { + "epoch": 16.16, + "learning_rate": 4.192361142075772e-05, + "loss": 2.3763, + "step": 3261500 + }, + { + "epoch": 16.16, + "learning_rate": 4.1922372834331634e-05, + "loss": 2.3587, + "step": 3262000 + }, + { + "epoch": 16.16, + "learning_rate": 4.192113424790555e-05, + "loss": 2.3468, + "step": 3262500 + }, + { + "epoch": 16.17, + "learning_rate": 4.191989566147947e-05, + "loss": 2.3629, + "step": 3263000 + }, + { + "epoch": 16.17, + "learning_rate": 4.1918657075053385e-05, + "loss": 2.3905, + "step": 3263500 + }, + { + "epoch": 16.17, + "learning_rate": 4.1917423442973006e-05, + "loss": 2.3469, + "step": 3264000 + }, + { + "epoch": 16.17, + "learning_rate": 4.191618485654692e-05, + "loss": 2.3685, + "step": 3264500 + }, + { + "epoch": 16.18, + "learning_rate": 4.191494627012083e-05, + "loss": 2.3798, + "step": 3265000 + }, + { + "epoch": 16.18, + "learning_rate": 4.191370768369475e-05, + "loss": 2.368, + "step": 3265500 + }, + { + "epoch": 16.18, + "learning_rate": 4.191246909726867e-05, + "loss": 2.3569, + "step": 3266000 + }, + { + "epoch": 16.18, + "learning_rate": 4.1911230510842584e-05, + "loss": 2.3642, + "step": 3266500 + }, + { + "epoch": 16.19, + "learning_rate": 4.19099919244165e-05, + "loss": 2.339, + "step": 3267000 + }, + { + "epoch": 16.19, + "learning_rate": 4.190875333799042e-05, + "loss": 2.392, + "step": 3267500 + }, + { + "epoch": 16.19, + "learning_rate": 4.1907514751564335e-05, + "loss": 2.3535, + "step": 3268000 + }, + { + "epoch": 16.19, + "learning_rate": 4.190627616513825e-05, + "loss": 2.3608, + "step": 3268500 + }, + { + "epoch": 16.2, + "learning_rate": 4.190504005588502e-05, + "loss": 2.3881, + "step": 3269000 + }, + { + "epoch": 16.2, + "learning_rate": 4.190380146945894e-05, + "loss": 2.354, + "step": 3269500 + }, + { + "epoch": 16.2, + "learning_rate": 4.1902562883032854e-05, + "loss": 2.3587, + "step": 3270000 + }, + { + "epoch": 16.2, + "learning_rate": 4.190132429660677e-05, + "loss": 2.375, + "step": 3270500 + }, + { + "epoch": 16.21, + "learning_rate": 4.190008571018069e-05, + "loss": 2.3826, + "step": 3271000 + }, + { + "epoch": 16.21, + "learning_rate": 4.1898847123754605e-05, + "loss": 2.3526, + "step": 3271500 + }, + { + "epoch": 16.21, + "learning_rate": 4.189760853732852e-05, + "loss": 2.3849, + "step": 3272000 + }, + { + "epoch": 16.21, + "learning_rate": 4.189636995090244e-05, + "loss": 2.3827, + "step": 3272500 + }, + { + "epoch": 16.22, + "learning_rate": 4.1895131364476356e-05, + "loss": 2.3505, + "step": 3273000 + }, + { + "epoch": 16.22, + "learning_rate": 4.189389277805027e-05, + "loss": 2.3436, + "step": 3273500 + }, + { + "epoch": 16.22, + "learning_rate": 4.189265419162419e-05, + "loss": 2.3404, + "step": 3274000 + }, + { + "epoch": 16.22, + "learning_rate": 4.1891415605198106e-05, + "loss": 2.3904, + "step": 3274500 + }, + { + "epoch": 16.23, + "learning_rate": 4.189017701877202e-05, + "loss": 2.3878, + "step": 3275000 + }, + { + "epoch": 16.23, + "learning_rate": 4.188894338669164e-05, + "loss": 2.372, + "step": 3275500 + }, + { + "epoch": 16.23, + "learning_rate": 4.1887704800265554e-05, + "loss": 2.3541, + "step": 3276000 + }, + { + "epoch": 16.23, + "learning_rate": 4.188646621383947e-05, + "loss": 2.3633, + "step": 3276500 + }, + { + "epoch": 16.24, + "learning_rate": 4.188522762741339e-05, + "loss": 2.3915, + "step": 3277000 + }, + { + "epoch": 16.24, + "learning_rate": 4.1883989040987305e-05, + "loss": 2.3363, + "step": 3277500 + }, + { + "epoch": 16.24, + "learning_rate": 4.188275045456122e-05, + "loss": 2.3568, + "step": 3278000 + }, + { + "epoch": 16.24, + "learning_rate": 4.188151186813514e-05, + "loss": 2.3588, + "step": 3278500 + }, + { + "epoch": 16.25, + "learning_rate": 4.18802757588819e-05, + "loss": 2.3645, + "step": 3279000 + }, + { + "epoch": 16.25, + "learning_rate": 4.187903717245582e-05, + "loss": 2.3591, + "step": 3279500 + }, + { + "epoch": 16.25, + "learning_rate": 4.1877798586029735e-05, + "loss": 2.3448, + "step": 3280000 + }, + { + "epoch": 16.25, + "learning_rate": 4.187655999960365e-05, + "loss": 2.3665, + "step": 3280500 + }, + { + "epoch": 16.26, + "learning_rate": 4.187532389035042e-05, + "loss": 2.3366, + "step": 3281000 + }, + { + "epoch": 16.26, + "learning_rate": 4.187408530392434e-05, + "loss": 2.3512, + "step": 3281500 + }, + { + "epoch": 16.26, + "learning_rate": 4.1872846717498254e-05, + "loss": 2.3426, + "step": 3282000 + }, + { + "epoch": 16.26, + "learning_rate": 4.187160813107217e-05, + "loss": 2.3714, + "step": 3282500 + }, + { + "epoch": 16.27, + "learning_rate": 4.187036954464609e-05, + "loss": 2.3823, + "step": 3283000 + }, + { + "epoch": 16.27, + "learning_rate": 4.186913343539286e-05, + "loss": 2.3549, + "step": 3283500 + }, + { + "epoch": 16.27, + "learning_rate": 4.1867894848966774e-05, + "loss": 2.3734, + "step": 3284000 + }, + { + "epoch": 16.27, + "learning_rate": 4.186665626254069e-05, + "loss": 2.372, + "step": 3284500 + }, + { + "epoch": 16.28, + "learning_rate": 4.18654176761146e-05, + "loss": 2.3525, + "step": 3285000 + }, + { + "epoch": 16.28, + "learning_rate": 4.186417908968852e-05, + "loss": 2.3635, + "step": 3285500 + }, + { + "epoch": 16.28, + "learning_rate": 4.1862940503262435e-05, + "loss": 2.3709, + "step": 3286000 + }, + { + "epoch": 16.28, + "learning_rate": 4.186170191683635e-05, + "loss": 2.364, + "step": 3286500 + }, + { + "epoch": 16.28, + "learning_rate": 4.186046333041027e-05, + "loss": 2.3777, + "step": 3287000 + }, + { + "epoch": 16.29, + "learning_rate": 4.185922722115704e-05, + "loss": 2.3548, + "step": 3287500 + }, + { + "epoch": 16.29, + "learning_rate": 4.1857988634730954e-05, + "loss": 2.3503, + "step": 3288000 + }, + { + "epoch": 16.29, + "learning_rate": 4.185675004830487e-05, + "loss": 2.3702, + "step": 3288500 + }, + { + "epoch": 16.29, + "learning_rate": 4.185551146187879e-05, + "loss": 2.3839, + "step": 3289000 + }, + { + "epoch": 16.3, + "learning_rate": 4.185427535262556e-05, + "loss": 2.3612, + "step": 3289500 + }, + { + "epoch": 16.3, + "learning_rate": 4.1853039243372326e-05, + "loss": 2.3803, + "step": 3290000 + }, + { + "epoch": 16.3, + "learning_rate": 4.185180065694624e-05, + "loss": 2.3574, + "step": 3290500 + }, + { + "epoch": 16.3, + "learning_rate": 4.185056207052016e-05, + "loss": 2.3609, + "step": 3291000 + }, + { + "epoch": 16.31, + "learning_rate": 4.184932348409408e-05, + "loss": 2.3628, + "step": 3291500 + }, + { + "epoch": 16.31, + "learning_rate": 4.1848087374840846e-05, + "loss": 2.3595, + "step": 3292000 + }, + { + "epoch": 16.31, + "learning_rate": 4.1846851265587614e-05, + "loss": 2.3629, + "step": 3292500 + }, + { + "epoch": 16.31, + "learning_rate": 4.1845612679161525e-05, + "loss": 2.3337, + "step": 3293000 + }, + { + "epoch": 16.32, + "learning_rate": 4.184437409273544e-05, + "loss": 2.3881, + "step": 3293500 + }, + { + "epoch": 16.32, + "learning_rate": 4.184313550630936e-05, + "loss": 2.3534, + "step": 3294000 + }, + { + "epoch": 16.32, + "learning_rate": 4.184189939705613e-05, + "loss": 2.4006, + "step": 3294500 + }, + { + "epoch": 16.32, + "learning_rate": 4.1840660810630044e-05, + "loss": 2.351, + "step": 3295000 + }, + { + "epoch": 16.33, + "learning_rate": 4.183942222420396e-05, + "loss": 2.3661, + "step": 3295500 + }, + { + "epoch": 16.33, + "learning_rate": 4.183818363777788e-05, + "loss": 2.3705, + "step": 3296000 + }, + { + "epoch": 16.33, + "learning_rate": 4.1836945051351795e-05, + "loss": 2.3444, + "step": 3296500 + }, + { + "epoch": 16.33, + "learning_rate": 4.183570646492571e-05, + "loss": 2.3706, + "step": 3297000 + }, + { + "epoch": 16.34, + "learning_rate": 4.183446787849963e-05, + "loss": 2.3554, + "step": 3297500 + }, + { + "epoch": 16.34, + "learning_rate": 4.1833229292073546e-05, + "loss": 2.372, + "step": 3298000 + }, + { + "epoch": 16.34, + "learning_rate": 4.183199070564746e-05, + "loss": 2.3748, + "step": 3298500 + }, + { + "epoch": 16.34, + "learning_rate": 4.183075211922138e-05, + "loss": 2.3779, + "step": 3299000 + }, + { + "epoch": 16.35, + "learning_rate": 4.1829513532795296e-05, + "loss": 2.3628, + "step": 3299500 + }, + { + "epoch": 16.35, + "learning_rate": 4.182827494636921e-05, + "loss": 2.3664, + "step": 3300000 + }, + { + "epoch": 16.35, + "learning_rate": 4.182703635994313e-05, + "loss": 2.3823, + "step": 3300500 + }, + { + "epoch": 16.35, + "learning_rate": 4.182579777351705e-05, + "loss": 2.3607, + "step": 3301000 + }, + { + "epoch": 16.36, + "learning_rate": 4.1824559187090964e-05, + "loss": 2.3748, + "step": 3301500 + }, + { + "epoch": 16.36, + "learning_rate": 4.1823320600664874e-05, + "loss": 2.3491, + "step": 3302000 + }, + { + "epoch": 16.36, + "learning_rate": 4.182208201423879e-05, + "loss": 2.3577, + "step": 3302500 + }, + { + "epoch": 16.36, + "learning_rate": 4.182084342781271e-05, + "loss": 2.3337, + "step": 3303000 + }, + { + "epoch": 16.37, + "learning_rate": 4.1819604841386625e-05, + "loss": 2.3717, + "step": 3303500 + }, + { + "epoch": 16.37, + "learning_rate": 4.181836625496054e-05, + "loss": 2.3637, + "step": 3304000 + }, + { + "epoch": 16.37, + "learning_rate": 4.181712766853446e-05, + "loss": 2.3689, + "step": 3304500 + }, + { + "epoch": 16.37, + "learning_rate": 4.181589155928122e-05, + "loss": 2.3704, + "step": 3305000 + }, + { + "epoch": 16.38, + "learning_rate": 4.181465297285514e-05, + "loss": 2.3826, + "step": 3305500 + }, + { + "epoch": 16.38, + "learning_rate": 4.1813414386429055e-05, + "loss": 2.3344, + "step": 3306000 + }, + { + "epoch": 16.38, + "learning_rate": 4.181217580000297e-05, + "loss": 2.3534, + "step": 3306500 + }, + { + "epoch": 16.38, + "learning_rate": 4.181093721357689e-05, + "loss": 2.3628, + "step": 3307000 + }, + { + "epoch": 16.39, + "learning_rate": 4.1809698627150806e-05, + "loss": 2.3458, + "step": 3307500 + }, + { + "epoch": 16.39, + "learning_rate": 4.180846251789758e-05, + "loss": 2.3597, + "step": 3308000 + }, + { + "epoch": 16.39, + "learning_rate": 4.180722640864434e-05, + "loss": 2.3612, + "step": 3308500 + }, + { + "epoch": 16.39, + "learning_rate": 4.180598782221826e-05, + "loss": 2.348, + "step": 3309000 + }, + { + "epoch": 16.4, + "learning_rate": 4.180474923579218e-05, + "loss": 2.3849, + "step": 3309500 + }, + { + "epoch": 16.4, + "learning_rate": 4.1803510649366094e-05, + "loss": 2.3843, + "step": 3310000 + }, + { + "epoch": 16.4, + "learning_rate": 4.180227206294001e-05, + "loss": 2.3771, + "step": 3310500 + }, + { + "epoch": 16.4, + "learning_rate": 4.180103347651393e-05, + "loss": 2.3771, + "step": 3311000 + }, + { + "epoch": 16.41, + "learning_rate": 4.179979489008784e-05, + "loss": 2.3497, + "step": 3311500 + }, + { + "epoch": 16.41, + "learning_rate": 4.1798556303661755e-05, + "loss": 2.348, + "step": 3312000 + }, + { + "epoch": 16.41, + "learning_rate": 4.179732019440853e-05, + "loss": 2.3576, + "step": 3312500 + }, + { + "epoch": 16.41, + "learning_rate": 4.179608160798245e-05, + "loss": 2.3733, + "step": 3313000 + }, + { + "epoch": 16.42, + "learning_rate": 4.1794843021556364e-05, + "loss": 2.3712, + "step": 3313500 + }, + { + "epoch": 16.42, + "learning_rate": 4.179360443513028e-05, + "loss": 2.3685, + "step": 3314000 + }, + { + "epoch": 16.42, + "learning_rate": 4.179236832587704e-05, + "loss": 2.3851, + "step": 3314500 + }, + { + "epoch": 16.42, + "learning_rate": 4.179112973945096e-05, + "loss": 2.3961, + "step": 3315000 + }, + { + "epoch": 16.43, + "learning_rate": 4.178989115302488e-05, + "loss": 2.4049, + "step": 3315500 + }, + { + "epoch": 16.43, + "learning_rate": 4.1788652566598794e-05, + "loss": 2.3543, + "step": 3316000 + }, + { + "epoch": 16.43, + "learning_rate": 4.178741398017271e-05, + "loss": 2.3707, + "step": 3316500 + }, + { + "epoch": 16.43, + "learning_rate": 4.178617539374663e-05, + "loss": 2.3482, + "step": 3317000 + }, + { + "epoch": 16.44, + "learning_rate": 4.178493680732054e-05, + "loss": 2.3832, + "step": 3317500 + }, + { + "epoch": 16.44, + "learning_rate": 4.1783700698067314e-05, + "loss": 2.4093, + "step": 3318000 + }, + { + "epoch": 16.44, + "learning_rate": 4.178246458881408e-05, + "loss": 2.3686, + "step": 3318500 + }, + { + "epoch": 16.44, + "learning_rate": 4.1781226002388e-05, + "loss": 2.3364, + "step": 3319000 + }, + { + "epoch": 16.45, + "learning_rate": 4.177998989313476e-05, + "loss": 2.3651, + "step": 3319500 + }, + { + "epoch": 16.45, + "learning_rate": 4.177875130670868e-05, + "loss": 2.3806, + "step": 3320000 + }, + { + "epoch": 16.45, + "learning_rate": 4.1777512720282595e-05, + "loss": 2.3776, + "step": 3320500 + }, + { + "epoch": 16.45, + "learning_rate": 4.177627413385651e-05, + "loss": 2.3673, + "step": 3321000 + }, + { + "epoch": 16.46, + "learning_rate": 4.177503554743043e-05, + "loss": 2.3572, + "step": 3321500 + }, + { + "epoch": 16.46, + "learning_rate": 4.1773796961004346e-05, + "loss": 2.3464, + "step": 3322000 + }, + { + "epoch": 16.46, + "learning_rate": 4.177255837457826e-05, + "loss": 2.3861, + "step": 3322500 + }, + { + "epoch": 16.46, + "learning_rate": 4.177131978815218e-05, + "loss": 2.3693, + "step": 3323000 + }, + { + "epoch": 16.47, + "learning_rate": 4.177008367889895e-05, + "loss": 2.362, + "step": 3323500 + }, + { + "epoch": 16.47, + "learning_rate": 4.1768845092472866e-05, + "loss": 2.3583, + "step": 3324000 + }, + { + "epoch": 16.47, + "learning_rate": 4.176760650604678e-05, + "loss": 2.4084, + "step": 3324500 + }, + { + "epoch": 16.47, + "learning_rate": 4.17663679196207e-05, + "loss": 2.3874, + "step": 3325000 + }, + { + "epoch": 16.48, + "learning_rate": 4.1765129333194617e-05, + "loss": 2.4014, + "step": 3325500 + }, + { + "epoch": 16.48, + "learning_rate": 4.1763890746768533e-05, + "loss": 2.3739, + "step": 3326000 + }, + { + "epoch": 16.48, + "learning_rate": 4.1762652160342444e-05, + "loss": 2.362, + "step": 3326500 + }, + { + "epoch": 16.48, + "learning_rate": 4.176141357391636e-05, + "loss": 2.3732, + "step": 3327000 + }, + { + "epoch": 16.49, + "learning_rate": 4.176017498749028e-05, + "loss": 2.3626, + "step": 3327500 + }, + { + "epoch": 16.49, + "learning_rate": 4.1758936401064194e-05, + "loss": 2.3443, + "step": 3328000 + }, + { + "epoch": 16.49, + "learning_rate": 4.175770029181096e-05, + "loss": 2.3804, + "step": 3328500 + }, + { + "epoch": 16.49, + "learning_rate": 4.175646170538488e-05, + "loss": 2.3758, + "step": 3329000 + }, + { + "epoch": 16.5, + "learning_rate": 4.17552231189588e-05, + "loss": 2.3673, + "step": 3329500 + }, + { + "epoch": 16.5, + "learning_rate": 4.1753984532532714e-05, + "loss": 2.3549, + "step": 3330000 + }, + { + "epoch": 16.5, + "learning_rate": 4.175274594610663e-05, + "loss": 2.362, + "step": 3330500 + }, + { + "epoch": 16.5, + "learning_rate": 4.175150735968055e-05, + "loss": 2.3912, + "step": 3331000 + }, + { + "epoch": 16.51, + "learning_rate": 4.1750268773254465e-05, + "loss": 2.3859, + "step": 3331500 + }, + { + "epoch": 16.51, + "learning_rate": 4.174903018682838e-05, + "loss": 2.3581, + "step": 3332000 + }, + { + "epoch": 16.51, + "learning_rate": 4.17477916004023e-05, + "loss": 2.3842, + "step": 3332500 + }, + { + "epoch": 16.51, + "learning_rate": 4.1746553013976216e-05, + "loss": 2.3812, + "step": 3333000 + }, + { + "epoch": 16.52, + "learning_rate": 4.1745314427550126e-05, + "loss": 2.3868, + "step": 3333500 + }, + { + "epoch": 16.52, + "learning_rate": 4.174407584112404e-05, + "loss": 2.3852, + "step": 3334000 + }, + { + "epoch": 16.52, + "learning_rate": 4.174283725469796e-05, + "loss": 2.3561, + "step": 3334500 + }, + { + "epoch": 16.52, + "learning_rate": 4.1741598668271877e-05, + "loss": 2.3751, + "step": 3335000 + }, + { + "epoch": 16.53, + "learning_rate": 4.1740362559018645e-05, + "loss": 2.3717, + "step": 3335500 + }, + { + "epoch": 16.53, + "learning_rate": 4.173912397259256e-05, + "loss": 2.3852, + "step": 3336000 + }, + { + "epoch": 16.53, + "learning_rate": 4.173788786333933e-05, + "loss": 2.4165, + "step": 3336500 + }, + { + "epoch": 16.53, + "learning_rate": 4.173664927691325e-05, + "loss": 2.3534, + "step": 3337000 + }, + { + "epoch": 16.54, + "learning_rate": 4.1735410690487165e-05, + "loss": 2.3673, + "step": 3337500 + }, + { + "epoch": 16.54, + "learning_rate": 4.173417210406108e-05, + "loss": 2.3468, + "step": 3338000 + }, + { + "epoch": 16.54, + "learning_rate": 4.1732933517635e-05, + "loss": 2.3575, + "step": 3338500 + }, + { + "epoch": 16.54, + "learning_rate": 4.1731694931208916e-05, + "loss": 2.4043, + "step": 3339000 + }, + { + "epoch": 16.55, + "learning_rate": 4.1730456344782826e-05, + "loss": 2.3744, + "step": 3339500 + }, + { + "epoch": 16.55, + "learning_rate": 4.172921775835674e-05, + "loss": 2.3968, + "step": 3340000 + }, + { + "epoch": 16.55, + "learning_rate": 4.172797917193066e-05, + "loss": 2.389, + "step": 3340500 + }, + { + "epoch": 16.55, + "learning_rate": 4.172674306267743e-05, + "loss": 2.3549, + "step": 3341000 + }, + { + "epoch": 16.55, + "learning_rate": 4.17255069534242e-05, + "loss": 2.3684, + "step": 3341500 + }, + { + "epoch": 16.56, + "learning_rate": 4.1724268366998114e-05, + "loss": 2.379, + "step": 3342000 + }, + { + "epoch": 16.56, + "learning_rate": 4.172302978057203e-05, + "loss": 2.3729, + "step": 3342500 + }, + { + "epoch": 16.56, + "learning_rate": 4.172179119414595e-05, + "loss": 2.3568, + "step": 3343000 + }, + { + "epoch": 16.56, + "learning_rate": 4.172055508489272e-05, + "loss": 2.3773, + "step": 3343500 + }, + { + "epoch": 16.57, + "learning_rate": 4.1719316498466634e-05, + "loss": 2.3581, + "step": 3344000 + }, + { + "epoch": 16.57, + "learning_rate": 4.171807791204055e-05, + "loss": 2.368, + "step": 3344500 + }, + { + "epoch": 16.57, + "learning_rate": 4.171683932561447e-05, + "loss": 2.3668, + "step": 3345000 + }, + { + "epoch": 16.57, + "learning_rate": 4.1715600739188385e-05, + "loss": 2.3714, + "step": 3345500 + }, + { + "epoch": 16.58, + "learning_rate": 4.17143621527623e-05, + "loss": 2.3788, + "step": 3346000 + }, + { + "epoch": 16.58, + "learning_rate": 4.171312356633622e-05, + "loss": 2.3737, + "step": 3346500 + }, + { + "epoch": 16.58, + "learning_rate": 4.171188497991013e-05, + "loss": 2.3877, + "step": 3347000 + }, + { + "epoch": 16.58, + "learning_rate": 4.1710646393484046e-05, + "loss": 2.359, + "step": 3347500 + }, + { + "epoch": 16.59, + "learning_rate": 4.170940780705796e-05, + "loss": 2.3579, + "step": 3348000 + }, + { + "epoch": 16.59, + "learning_rate": 4.170817169780473e-05, + "loss": 2.3688, + "step": 3348500 + }, + { + "epoch": 16.59, + "learning_rate": 4.170693311137865e-05, + "loss": 2.3799, + "step": 3349000 + }, + { + "epoch": 16.59, + "learning_rate": 4.170569700212542e-05, + "loss": 2.3597, + "step": 3349500 + }, + { + "epoch": 16.6, + "learning_rate": 4.1704458415699334e-05, + "loss": 2.3678, + "step": 3350000 + }, + { + "epoch": 16.6, + "learning_rate": 4.170321982927325e-05, + "loss": 2.3693, + "step": 3350500 + }, + { + "epoch": 16.6, + "learning_rate": 4.170198124284717e-05, + "loss": 2.3535, + "step": 3351000 + }, + { + "epoch": 16.6, + "learning_rate": 4.1700742656421085e-05, + "loss": 2.3655, + "step": 3351500 + }, + { + "epoch": 16.61, + "learning_rate": 4.169950654716785e-05, + "loss": 2.3376, + "step": 3352000 + }, + { + "epoch": 16.61, + "learning_rate": 4.1698267960741764e-05, + "loss": 2.3532, + "step": 3352500 + }, + { + "epoch": 16.61, + "learning_rate": 4.169702937431568e-05, + "loss": 2.3747, + "step": 3353000 + }, + { + "epoch": 16.61, + "learning_rate": 4.16957907878896e-05, + "loss": 2.3603, + "step": 3353500 + }, + { + "epoch": 16.62, + "learning_rate": 4.1694552201463515e-05, + "loss": 2.377, + "step": 3354000 + }, + { + "epoch": 16.62, + "learning_rate": 4.169331361503743e-05, + "loss": 2.3917, + "step": 3354500 + }, + { + "epoch": 16.62, + "learning_rate": 4.16920775057842e-05, + "loss": 2.3773, + "step": 3355000 + }, + { + "epoch": 16.62, + "learning_rate": 4.169083891935812e-05, + "loss": 2.3885, + "step": 3355500 + }, + { + "epoch": 16.63, + "learning_rate": 4.1689600332932034e-05, + "loss": 2.3602, + "step": 3356000 + }, + { + "epoch": 16.63, + "learning_rate": 4.16883642236788e-05, + "loss": 2.3732, + "step": 3356500 + }, + { + "epoch": 16.63, + "learning_rate": 4.168712563725271e-05, + "loss": 2.3674, + "step": 3357000 + }, + { + "epoch": 16.63, + "learning_rate": 4.168588705082663e-05, + "loss": 2.3854, + "step": 3357500 + }, + { + "epoch": 16.64, + "learning_rate": 4.168464846440055e-05, + "loss": 2.3712, + "step": 3358000 + }, + { + "epoch": 16.64, + "learning_rate": 4.1683409877974464e-05, + "loss": 2.3552, + "step": 3358500 + }, + { + "epoch": 16.64, + "learning_rate": 4.168217129154838e-05, + "loss": 2.3662, + "step": 3359000 + }, + { + "epoch": 16.64, + "learning_rate": 4.168093518229515e-05, + "loss": 2.3816, + "step": 3359500 + }, + { + "epoch": 16.65, + "learning_rate": 4.1679696595869067e-05, + "loss": 2.3567, + "step": 3360000 + }, + { + "epoch": 16.65, + "learning_rate": 4.1678458009442983e-05, + "loss": 2.3709, + "step": 3360500 + }, + { + "epoch": 16.65, + "learning_rate": 4.16772194230169e-05, + "loss": 2.3764, + "step": 3361000 + }, + { + "epoch": 16.65, + "learning_rate": 4.167598083659082e-05, + "loss": 2.375, + "step": 3361500 + }, + { + "epoch": 16.66, + "learning_rate": 4.1674742250164734e-05, + "loss": 2.3625, + "step": 3362000 + }, + { + "epoch": 16.66, + "learning_rate": 4.167350366373865e-05, + "loss": 2.3537, + "step": 3362500 + }, + { + "epoch": 16.66, + "learning_rate": 4.167226507731257e-05, + "loss": 2.3476, + "step": 3363000 + }, + { + "epoch": 16.66, + "learning_rate": 4.1671026490886485e-05, + "loss": 2.4053, + "step": 3363500 + }, + { + "epoch": 16.67, + "learning_rate": 4.16697879044604e-05, + "loss": 2.3621, + "step": 3364000 + }, + { + "epoch": 16.67, + "learning_rate": 4.166854931803432e-05, + "loss": 2.3565, + "step": 3364500 + }, + { + "epoch": 16.67, + "learning_rate": 4.1667310731608236e-05, + "loss": 2.4182, + "step": 3365000 + }, + { + "epoch": 16.67, + "learning_rate": 4.1666074622355e-05, + "loss": 2.378, + "step": 3365500 + }, + { + "epoch": 16.68, + "learning_rate": 4.1664836035928915e-05, + "loss": 2.3553, + "step": 3366000 + }, + { + "epoch": 16.68, + "learning_rate": 4.166359744950283e-05, + "loss": 2.3641, + "step": 3366500 + }, + { + "epoch": 16.68, + "learning_rate": 4.166235886307675e-05, + "loss": 2.3704, + "step": 3367000 + }, + { + "epoch": 16.68, + "learning_rate": 4.1661120276650666e-05, + "loss": 2.3547, + "step": 3367500 + }, + { + "epoch": 16.69, + "learning_rate": 4.165988169022458e-05, + "loss": 2.3821, + "step": 3368000 + }, + { + "epoch": 16.69, + "learning_rate": 4.16586431037985e-05, + "loss": 2.3792, + "step": 3368500 + }, + { + "epoch": 16.69, + "learning_rate": 4.165740699454527e-05, + "loss": 2.3716, + "step": 3369000 + }, + { + "epoch": 16.69, + "learning_rate": 4.1656168408119185e-05, + "loss": 2.3796, + "step": 3369500 + }, + { + "epoch": 16.7, + "learning_rate": 4.16549298216931e-05, + "loss": 2.3907, + "step": 3370000 + }, + { + "epoch": 16.7, + "learning_rate": 4.165369123526702e-05, + "loss": 2.3926, + "step": 3370500 + }, + { + "epoch": 16.7, + "learning_rate": 4.165245512601378e-05, + "loss": 2.3875, + "step": 3371000 + }, + { + "epoch": 16.7, + "learning_rate": 4.16512165395877e-05, + "loss": 2.3632, + "step": 3371500 + }, + { + "epoch": 16.71, + "learning_rate": 4.1649977953161615e-05, + "loss": 2.3904, + "step": 3372000 + }, + { + "epoch": 16.71, + "learning_rate": 4.1648741843908384e-05, + "loss": 2.3739, + "step": 3372500 + }, + { + "epoch": 16.71, + "learning_rate": 4.16475032574823e-05, + "loss": 2.3697, + "step": 3373000 + }, + { + "epoch": 16.71, + "learning_rate": 4.164626467105622e-05, + "loss": 2.3728, + "step": 3373500 + }, + { + "epoch": 16.72, + "learning_rate": 4.164502856180299e-05, + "loss": 2.3865, + "step": 3374000 + }, + { + "epoch": 16.72, + "learning_rate": 4.16437899753769e-05, + "loss": 2.3485, + "step": 3374500 + }, + { + "epoch": 16.72, + "learning_rate": 4.164255138895082e-05, + "loss": 2.3721, + "step": 3375000 + }, + { + "epoch": 16.72, + "learning_rate": 4.164131280252474e-05, + "loss": 2.3634, + "step": 3375500 + }, + { + "epoch": 16.73, + "learning_rate": 4.1640076693271506e-05, + "loss": 2.3643, + "step": 3376000 + }, + { + "epoch": 16.73, + "learning_rate": 4.163883810684542e-05, + "loss": 2.3706, + "step": 3376500 + }, + { + "epoch": 16.73, + "learning_rate": 4.163759952041934e-05, + "loss": 2.3708, + "step": 3377000 + }, + { + "epoch": 16.73, + "learning_rate": 4.163636093399325e-05, + "loss": 2.3637, + "step": 3377500 + }, + { + "epoch": 16.74, + "learning_rate": 4.163512234756717e-05, + "loss": 2.378, + "step": 3378000 + }, + { + "epoch": 16.74, + "learning_rate": 4.1633883761141084e-05, + "loss": 2.3886, + "step": 3378500 + }, + { + "epoch": 16.74, + "learning_rate": 4.1632645174715e-05, + "loss": 2.3935, + "step": 3379000 + }, + { + "epoch": 16.74, + "learning_rate": 4.163140658828892e-05, + "loss": 2.3653, + "step": 3379500 + }, + { + "epoch": 16.75, + "learning_rate": 4.1630168001862835e-05, + "loss": 2.3607, + "step": 3380000 + }, + { + "epoch": 16.75, + "learning_rate": 4.162892941543675e-05, + "loss": 2.3704, + "step": 3380500 + }, + { + "epoch": 16.75, + "learning_rate": 4.162769082901067e-05, + "loss": 2.3722, + "step": 3381000 + }, + { + "epoch": 16.75, + "learning_rate": 4.1626452242584585e-05, + "loss": 2.3535, + "step": 3381500 + }, + { + "epoch": 16.76, + "learning_rate": 4.16252136561585e-05, + "loss": 2.3634, + "step": 3382000 + }, + { + "epoch": 16.76, + "learning_rate": 4.162397754690527e-05, + "loss": 2.3759, + "step": 3382500 + }, + { + "epoch": 16.76, + "learning_rate": 4.162273896047919e-05, + "loss": 2.3764, + "step": 3383000 + }, + { + "epoch": 16.76, + "learning_rate": 4.1621500374053105e-05, + "loss": 2.3739, + "step": 3383500 + }, + { + "epoch": 16.77, + "learning_rate": 4.1620261787627015e-05, + "loss": 2.3762, + "step": 3384000 + }, + { + "epoch": 16.77, + "learning_rate": 4.161902320120093e-05, + "loss": 2.3656, + "step": 3384500 + }, + { + "epoch": 16.77, + "learning_rate": 4.16177870919477e-05, + "loss": 2.3756, + "step": 3385000 + }, + { + "epoch": 16.77, + "learning_rate": 4.161654850552162e-05, + "loss": 2.3657, + "step": 3385500 + }, + { + "epoch": 16.78, + "learning_rate": 4.1615309919095535e-05, + "loss": 2.3836, + "step": 3386000 + }, + { + "epoch": 16.78, + "learning_rate": 4.161407133266945e-05, + "loss": 2.3893, + "step": 3386500 + }, + { + "epoch": 16.78, + "learning_rate": 4.161283274624337e-05, + "loss": 2.3778, + "step": 3387000 + }, + { + "epoch": 16.78, + "learning_rate": 4.1611594159817286e-05, + "loss": 2.3606, + "step": 3387500 + }, + { + "epoch": 16.79, + "learning_rate": 4.16103555733912e-05, + "loss": 2.3618, + "step": 3388000 + }, + { + "epoch": 16.79, + "learning_rate": 4.160911698696512e-05, + "loss": 2.3565, + "step": 3388500 + }, + { + "epoch": 16.79, + "learning_rate": 4.160788087771189e-05, + "loss": 2.3636, + "step": 3389000 + }, + { + "epoch": 16.79, + "learning_rate": 4.1606642291285805e-05, + "loss": 2.3672, + "step": 3389500 + }, + { + "epoch": 16.8, + "learning_rate": 4.160540370485972e-05, + "loss": 2.3587, + "step": 3390000 + }, + { + "epoch": 16.8, + "learning_rate": 4.160416511843364e-05, + "loss": 2.3782, + "step": 3390500 + }, + { + "epoch": 16.8, + "learning_rate": 4.160292653200755e-05, + "loss": 2.3666, + "step": 3391000 + }, + { + "epoch": 16.8, + "learning_rate": 4.160169042275432e-05, + "loss": 2.3514, + "step": 3391500 + }, + { + "epoch": 16.81, + "learning_rate": 4.1600451836328235e-05, + "loss": 2.3646, + "step": 3392000 + }, + { + "epoch": 16.81, + "learning_rate": 4.159921324990215e-05, + "loss": 2.3601, + "step": 3392500 + }, + { + "epoch": 16.81, + "learning_rate": 4.159797466347607e-05, + "loss": 2.3792, + "step": 3393000 + }, + { + "epoch": 16.81, + "learning_rate": 4.1596736077049986e-05, + "loss": 2.355, + "step": 3393500 + }, + { + "epoch": 16.82, + "learning_rate": 4.1595499967796754e-05, + "loss": 2.3727, + "step": 3394000 + }, + { + "epoch": 16.82, + "learning_rate": 4.159426138137067e-05, + "loss": 2.3774, + "step": 3394500 + }, + { + "epoch": 16.82, + "learning_rate": 4.159302279494459e-05, + "loss": 2.3607, + "step": 3395000 + }, + { + "epoch": 16.82, + "learning_rate": 4.1591784208518505e-05, + "loss": 2.3917, + "step": 3395500 + }, + { + "epoch": 16.82, + "learning_rate": 4.159054562209242e-05, + "loss": 2.3518, + "step": 3396000 + }, + { + "epoch": 16.83, + "learning_rate": 4.158930703566634e-05, + "loss": 2.3856, + "step": 3396500 + }, + { + "epoch": 16.83, + "learning_rate": 4.1588068449240256e-05, + "loss": 2.387, + "step": 3397000 + }, + { + "epoch": 16.83, + "learning_rate": 4.1586829862814166e-05, + "loss": 2.3707, + "step": 3397500 + }, + { + "epoch": 16.83, + "learning_rate": 4.158559127638808e-05, + "loss": 2.3603, + "step": 3398000 + }, + { + "epoch": 16.84, + "learning_rate": 4.158435764430771e-05, + "loss": 2.3707, + "step": 3398500 + }, + { + "epoch": 16.84, + "learning_rate": 4.158311905788163e-05, + "loss": 2.3861, + "step": 3399000 + }, + { + "epoch": 16.84, + "learning_rate": 4.158188047145554e-05, + "loss": 2.3695, + "step": 3399500 + }, + { + "epoch": 16.84, + "learning_rate": 4.1580641885029455e-05, + "loss": 2.3408, + "step": 3400000 + }, + { + "epoch": 16.85, + "learning_rate": 4.157940329860337e-05, + "loss": 2.3676, + "step": 3400500 + }, + { + "epoch": 16.85, + "learning_rate": 4.157816718935014e-05, + "loss": 2.3638, + "step": 3401000 + }, + { + "epoch": 16.85, + "learning_rate": 4.157692860292406e-05, + "loss": 2.3828, + "step": 3401500 + }, + { + "epoch": 16.85, + "learning_rate": 4.1575690016497974e-05, + "loss": 2.3853, + "step": 3402000 + }, + { + "epoch": 16.86, + "learning_rate": 4.1574451430071884e-05, + "loss": 2.3722, + "step": 3402500 + }, + { + "epoch": 16.86, + "learning_rate": 4.15732128436458e-05, + "loss": 2.3818, + "step": 3403000 + }, + { + "epoch": 16.86, + "learning_rate": 4.157197425721972e-05, + "loss": 2.3493, + "step": 3403500 + }, + { + "epoch": 16.86, + "learning_rate": 4.1570735670793635e-05, + "loss": 2.3795, + "step": 3404000 + }, + { + "epoch": 16.87, + "learning_rate": 4.156949708436755e-05, + "loss": 2.3624, + "step": 3404500 + }, + { + "epoch": 16.87, + "learning_rate": 4.156826097511433e-05, + "loss": 2.3783, + "step": 3405000 + }, + { + "epoch": 16.87, + "learning_rate": 4.1567022388688245e-05, + "loss": 2.379, + "step": 3405500 + }, + { + "epoch": 16.87, + "learning_rate": 4.1565783802262155e-05, + "loss": 2.3758, + "step": 3406000 + }, + { + "epoch": 16.88, + "learning_rate": 4.156454521583607e-05, + "loss": 2.3549, + "step": 3406500 + }, + { + "epoch": 16.88, + "learning_rate": 4.156330662940999e-05, + "loss": 2.3986, + "step": 3407000 + }, + { + "epoch": 16.88, + "learning_rate": 4.1562068042983905e-05, + "loss": 2.3615, + "step": 3407500 + }, + { + "epoch": 16.88, + "learning_rate": 4.156082945655782e-05, + "loss": 2.3583, + "step": 3408000 + }, + { + "epoch": 16.89, + "learning_rate": 4.155959334730459e-05, + "loss": 2.3854, + "step": 3408500 + }, + { + "epoch": 16.89, + "learning_rate": 4.15583547608785e-05, + "loss": 2.3673, + "step": 3409000 + }, + { + "epoch": 16.89, + "learning_rate": 4.155711617445242e-05, + "loss": 2.3642, + "step": 3409500 + }, + { + "epoch": 16.89, + "learning_rate": 4.1555877588026335e-05, + "loss": 2.3299, + "step": 3410000 + }, + { + "epoch": 16.9, + "learning_rate": 4.155463900160025e-05, + "loss": 2.3807, + "step": 3410500 + }, + { + "epoch": 16.9, + "learning_rate": 4.155340041517417e-05, + "loss": 2.3767, + "step": 3411000 + }, + { + "epoch": 16.9, + "learning_rate": 4.1552161828748086e-05, + "loss": 2.3789, + "step": 3411500 + }, + { + "epoch": 16.9, + "learning_rate": 4.1550923242322e-05, + "loss": 2.3876, + "step": 3412000 + }, + { + "epoch": 16.91, + "learning_rate": 4.154968713306877e-05, + "loss": 2.3517, + "step": 3412500 + }, + { + "epoch": 16.91, + "learning_rate": 4.154844854664269e-05, + "loss": 2.4007, + "step": 3413000 + }, + { + "epoch": 16.91, + "learning_rate": 4.1547209960216606e-05, + "loss": 2.3944, + "step": 3413500 + }, + { + "epoch": 16.91, + "learning_rate": 4.154597137379052e-05, + "loss": 2.3647, + "step": 3414000 + }, + { + "epoch": 16.92, + "learning_rate": 4.154473278736444e-05, + "loss": 2.3501, + "step": 3414500 + }, + { + "epoch": 16.92, + "learning_rate": 4.1543494200938356e-05, + "loss": 2.3941, + "step": 3415000 + }, + { + "epoch": 16.92, + "learning_rate": 4.154225561451227e-05, + "loss": 2.3809, + "step": 3415500 + }, + { + "epoch": 16.92, + "learning_rate": 4.154101702808619e-05, + "loss": 2.3663, + "step": 3416000 + }, + { + "epoch": 16.93, + "learning_rate": 4.153978091883295e-05, + "loss": 2.3529, + "step": 3416500 + }, + { + "epoch": 16.93, + "learning_rate": 4.153854233240687e-05, + "loss": 2.3712, + "step": 3417000 + }, + { + "epoch": 16.93, + "learning_rate": 4.1537306223153645e-05, + "loss": 2.3917, + "step": 3417500 + }, + { + "epoch": 16.93, + "learning_rate": 4.153606763672756e-05, + "loss": 2.3671, + "step": 3418000 + }, + { + "epoch": 16.94, + "learning_rate": 4.153482905030147e-05, + "loss": 2.3691, + "step": 3418500 + }, + { + "epoch": 16.94, + "learning_rate": 4.153359046387539e-05, + "loss": 2.3647, + "step": 3419000 + }, + { + "epoch": 16.94, + "learning_rate": 4.1532351877449306e-05, + "loss": 2.3684, + "step": 3419500 + }, + { + "epoch": 16.94, + "learning_rate": 4.153111329102322e-05, + "loss": 2.3566, + "step": 3420000 + }, + { + "epoch": 16.95, + "learning_rate": 4.152987718176999e-05, + "loss": 2.3672, + "step": 3420500 + }, + { + "epoch": 16.95, + "learning_rate": 4.152863859534391e-05, + "loss": 2.383, + "step": 3421000 + }, + { + "epoch": 16.95, + "learning_rate": 4.152740248609068e-05, + "loss": 2.3671, + "step": 3421500 + }, + { + "epoch": 16.95, + "learning_rate": 4.1526163899664594e-05, + "loss": 2.3578, + "step": 3422000 + }, + { + "epoch": 16.96, + "learning_rate": 4.152492531323851e-05, + "loss": 2.3837, + "step": 3422500 + }, + { + "epoch": 16.96, + "learning_rate": 4.152368672681243e-05, + "loss": 2.3824, + "step": 3423000 + }, + { + "epoch": 16.96, + "learning_rate": 4.1522448140386345e-05, + "loss": 2.3624, + "step": 3423500 + }, + { + "epoch": 16.96, + "learning_rate": 4.152120955396026e-05, + "loss": 2.3848, + "step": 3424000 + }, + { + "epoch": 16.97, + "learning_rate": 4.151997096753417e-05, + "loss": 2.379, + "step": 3424500 + }, + { + "epoch": 16.97, + "learning_rate": 4.151873238110809e-05, + "loss": 2.3548, + "step": 3425000 + }, + { + "epoch": 16.97, + "learning_rate": 4.1517493794682006e-05, + "loss": 2.363, + "step": 3425500 + }, + { + "epoch": 16.97, + "learning_rate": 4.151625520825592e-05, + "loss": 2.3791, + "step": 3426000 + }, + { + "epoch": 16.98, + "learning_rate": 4.151501662182984e-05, + "loss": 2.3776, + "step": 3426500 + }, + { + "epoch": 16.98, + "learning_rate": 4.151378051257661e-05, + "loss": 2.3708, + "step": 3427000 + }, + { + "epoch": 16.98, + "learning_rate": 4.1512541926150525e-05, + "loss": 2.3777, + "step": 3427500 + }, + { + "epoch": 16.98, + "learning_rate": 4.1511303339724436e-05, + "loss": 2.3577, + "step": 3428000 + }, + { + "epoch": 16.99, + "learning_rate": 4.151006475329835e-05, + "loss": 2.3726, + "step": 3428500 + }, + { + "epoch": 16.99, + "learning_rate": 4.150882616687227e-05, + "loss": 2.393, + "step": 3429000 + }, + { + "epoch": 16.99, + "learning_rate": 4.1507590057619045e-05, + "loss": 2.356, + "step": 3429500 + }, + { + "epoch": 16.99, + "learning_rate": 4.150635147119296e-05, + "loss": 2.3563, + "step": 3430000 + }, + { + "epoch": 17.0, + "learning_rate": 4.150511288476688e-05, + "loss": 2.3842, + "step": 3430500 + }, + { + "epoch": 17.0, + "learning_rate": 4.150387429834079e-05, + "loss": 2.3678, + "step": 3431000 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.6498075948776978, + "eval_accuracy_mlm": 0.6043656048690601, + "eval_accuracy_nsp": 0.8643428943477186, + "eval_loss": 2.376680374145508, + "eval_runtime": 145.8417, + "eval_samples_per_second": 1748.189, + "eval_steps_per_second": 72.846, + "step": 3431331 + }, + { + "epoch": 17.0, + "learning_rate": 4.1502638189087565e-05, + "loss": 2.3765, + "step": 3431500 + }, + { + "epoch": 17.0, + "learning_rate": 4.150139960266148e-05, + "loss": 2.3414, + "step": 3432000 + }, + { + "epoch": 17.01, + "learning_rate": 4.150016101623539e-05, + "loss": 2.3531, + "step": 3432500 + }, + { + "epoch": 17.01, + "learning_rate": 4.149892242980931e-05, + "loss": 2.3628, + "step": 3433000 + }, + { + "epoch": 17.01, + "learning_rate": 4.1497683843383226e-05, + "loss": 2.3441, + "step": 3433500 + }, + { + "epoch": 17.01, + "learning_rate": 4.149644525695714e-05, + "loss": 2.3688, + "step": 3434000 + }, + { + "epoch": 17.02, + "learning_rate": 4.149520667053106e-05, + "loss": 2.3296, + "step": 3434500 + }, + { + "epoch": 17.02, + "learning_rate": 4.149396808410497e-05, + "loss": 2.3295, + "step": 3435000 + }, + { + "epoch": 17.02, + "learning_rate": 4.1492729497678886e-05, + "loss": 2.3542, + "step": 3435500 + }, + { + "epoch": 17.02, + "learning_rate": 4.1491490911252803e-05, + "loss": 2.3233, + "step": 3436000 + }, + { + "epoch": 17.03, + "learning_rate": 4.149025232482672e-05, + "loss": 2.3492, + "step": 3436500 + }, + { + "epoch": 17.03, + "learning_rate": 4.148901373840064e-05, + "loss": 2.341, + "step": 3437000 + }, + { + "epoch": 17.03, + "learning_rate": 4.1487775151974554e-05, + "loss": 2.356, + "step": 3437500 + }, + { + "epoch": 17.03, + "learning_rate": 4.148653656554847e-05, + "loss": 2.3461, + "step": 3438000 + }, + { + "epoch": 17.04, + "learning_rate": 4.148529797912239e-05, + "loss": 2.3431, + "step": 3438500 + }, + { + "epoch": 17.04, + "learning_rate": 4.148406186986916e-05, + "loss": 2.3379, + "step": 3439000 + }, + { + "epoch": 17.04, + "learning_rate": 4.1482823283443074e-05, + "loss": 2.3413, + "step": 3439500 + }, + { + "epoch": 17.04, + "learning_rate": 4.148158469701699e-05, + "loss": 2.3506, + "step": 3440000 + }, + { + "epoch": 17.05, + "learning_rate": 4.148034858776376e-05, + "loss": 2.3299, + "step": 3440500 + }, + { + "epoch": 17.05, + "learning_rate": 4.1479110001337676e-05, + "loss": 2.3431, + "step": 3441000 + }, + { + "epoch": 17.05, + "learning_rate": 4.147787141491159e-05, + "loss": 2.3623, + "step": 3441500 + }, + { + "epoch": 17.05, + "learning_rate": 4.1476632828485504e-05, + "loss": 2.3722, + "step": 3442000 + }, + { + "epoch": 17.06, + "learning_rate": 4.147539424205942e-05, + "loss": 2.3552, + "step": 3442500 + }, + { + "epoch": 17.06, + "learning_rate": 4.147415565563334e-05, + "loss": 2.3468, + "step": 3443000 + }, + { + "epoch": 17.06, + "learning_rate": 4.1472917069207254e-05, + "loss": 2.3322, + "step": 3443500 + }, + { + "epoch": 17.06, + "learning_rate": 4.147167848278117e-05, + "loss": 2.3492, + "step": 3444000 + }, + { + "epoch": 17.07, + "learning_rate": 4.147044237352794e-05, + "loss": 2.3549, + "step": 3444500 + }, + { + "epoch": 17.07, + "learning_rate": 4.146920378710186e-05, + "loss": 2.373, + "step": 3445000 + }, + { + "epoch": 17.07, + "learning_rate": 4.1467965200675774e-05, + "loss": 2.3629, + "step": 3445500 + }, + { + "epoch": 17.07, + "learning_rate": 4.146672909142254e-05, + "loss": 2.3718, + "step": 3446000 + }, + { + "epoch": 17.08, + "learning_rate": 4.146549050499646e-05, + "loss": 2.3301, + "step": 3446500 + }, + { + "epoch": 17.08, + "learning_rate": 4.1464251918570377e-05, + "loss": 2.3385, + "step": 3447000 + }, + { + "epoch": 17.08, + "learning_rate": 4.1463013332144294e-05, + "loss": 2.3374, + "step": 3447500 + }, + { + "epoch": 17.08, + "learning_rate": 4.146177474571821e-05, + "loss": 2.3562, + "step": 3448000 + }, + { + "epoch": 17.09, + "learning_rate": 4.146053615929212e-05, + "loss": 2.3514, + "step": 3448500 + }, + { + "epoch": 17.09, + "learning_rate": 4.145929757286604e-05, + "loss": 2.3568, + "step": 3449000 + }, + { + "epoch": 17.09, + "learning_rate": 4.1458058986439954e-05, + "loss": 2.3525, + "step": 3449500 + }, + { + "epoch": 17.09, + "learning_rate": 4.145682287718672e-05, + "loss": 2.3269, + "step": 3450000 + }, + { + "epoch": 17.09, + "learning_rate": 4.145558429076064e-05, + "loss": 2.3618, + "step": 3450500 + }, + { + "epoch": 17.1, + "learning_rate": 4.145434570433456e-05, + "loss": 2.3438, + "step": 3451000 + }, + { + "epoch": 17.1, + "learning_rate": 4.1453107117908474e-05, + "loss": 2.341, + "step": 3451500 + }, + { + "epoch": 17.1, + "learning_rate": 4.145186853148239e-05, + "loss": 2.3567, + "step": 3452000 + }, + { + "epoch": 17.1, + "learning_rate": 4.145063489940201e-05, + "loss": 2.3433, + "step": 3452500 + }, + { + "epoch": 17.11, + "learning_rate": 4.144939631297593e-05, + "loss": 2.3524, + "step": 3453000 + }, + { + "epoch": 17.11, + "learning_rate": 4.1448157726549846e-05, + "loss": 2.3444, + "step": 3453500 + }, + { + "epoch": 17.11, + "learning_rate": 4.144691914012376e-05, + "loss": 2.3531, + "step": 3454000 + }, + { + "epoch": 17.11, + "learning_rate": 4.144568055369768e-05, + "loss": 2.3459, + "step": 3454500 + }, + { + "epoch": 17.12, + "learning_rate": 4.1444441967271596e-05, + "loss": 2.35, + "step": 3455000 + }, + { + "epoch": 17.12, + "learning_rate": 4.144320338084551e-05, + "loss": 2.338, + "step": 3455500 + }, + { + "epoch": 17.12, + "learning_rate": 4.1441964794419423e-05, + "loss": 2.3489, + "step": 3456000 + }, + { + "epoch": 17.12, + "learning_rate": 4.14407286851662e-05, + "loss": 2.3561, + "step": 3456500 + }, + { + "epoch": 17.13, + "learning_rate": 4.1439490098740116e-05, + "loss": 2.3618, + "step": 3457000 + }, + { + "epoch": 17.13, + "learning_rate": 4.143825151231403e-05, + "loss": 2.3614, + "step": 3457500 + }, + { + "epoch": 17.13, + "learning_rate": 4.143701292588795e-05, + "loss": 2.3375, + "step": 3458000 + }, + { + "epoch": 17.13, + "learning_rate": 4.143577433946187e-05, + "loss": 2.3632, + "step": 3458500 + }, + { + "epoch": 17.14, + "learning_rate": 4.143453823020863e-05, + "loss": 2.341, + "step": 3459000 + }, + { + "epoch": 17.14, + "learning_rate": 4.1433299643782546e-05, + "loss": 2.3918, + "step": 3459500 + }, + { + "epoch": 17.14, + "learning_rate": 4.143206105735646e-05, + "loss": 2.3394, + "step": 3460000 + }, + { + "epoch": 17.14, + "learning_rate": 4.143082247093038e-05, + "loss": 2.3564, + "step": 3460500 + }, + { + "epoch": 17.15, + "learning_rate": 4.1429583884504296e-05, + "loss": 2.351, + "step": 3461000 + }, + { + "epoch": 17.15, + "learning_rate": 4.142834529807821e-05, + "loss": 2.3515, + "step": 3461500 + }, + { + "epoch": 17.15, + "learning_rate": 4.142710918882498e-05, + "loss": 2.343, + "step": 3462000 + }, + { + "epoch": 17.15, + "learning_rate": 4.14258706023989e-05, + "loss": 2.3226, + "step": 3462500 + }, + { + "epoch": 17.16, + "learning_rate": 4.1424632015972816e-05, + "loss": 2.35, + "step": 3463000 + }, + { + "epoch": 17.16, + "learning_rate": 4.142339342954673e-05, + "loss": 2.3612, + "step": 3463500 + }, + { + "epoch": 17.16, + "learning_rate": 4.142215484312065e-05, + "loss": 2.3746, + "step": 3464000 + }, + { + "epoch": 17.16, + "learning_rate": 4.142091625669457e-05, + "loss": 2.3619, + "step": 3464500 + }, + { + "epoch": 17.17, + "learning_rate": 4.141967767026848e-05, + "loss": 2.3721, + "step": 3465000 + }, + { + "epoch": 17.17, + "learning_rate": 4.1418439083842394e-05, + "loss": 2.3639, + "step": 3465500 + }, + { + "epoch": 17.17, + "learning_rate": 4.1417205451762015e-05, + "loss": 2.343, + "step": 3466000 + }, + { + "epoch": 17.17, + "learning_rate": 4.141596686533593e-05, + "loss": 2.3616, + "step": 3466500 + }, + { + "epoch": 17.18, + "learning_rate": 4.141472827890985e-05, + "loss": 2.3572, + "step": 3467000 + }, + { + "epoch": 17.18, + "learning_rate": 4.1413489692483765e-05, + "loss": 2.374, + "step": 3467500 + }, + { + "epoch": 17.18, + "learning_rate": 4.1412253583230534e-05, + "loss": 2.3437, + "step": 3468000 + }, + { + "epoch": 17.18, + "learning_rate": 4.141101499680445e-05, + "loss": 2.3703, + "step": 3468500 + }, + { + "epoch": 17.19, + "learning_rate": 4.140977641037837e-05, + "loss": 2.3546, + "step": 3469000 + }, + { + "epoch": 17.19, + "learning_rate": 4.140853782395228e-05, + "loss": 2.3631, + "step": 3469500 + }, + { + "epoch": 17.19, + "learning_rate": 4.140730171469905e-05, + "loss": 2.3685, + "step": 3470000 + }, + { + "epoch": 17.19, + "learning_rate": 4.1406063128272964e-05, + "loss": 2.3637, + "step": 3470500 + }, + { + "epoch": 17.2, + "learning_rate": 4.140482454184688e-05, + "loss": 2.3558, + "step": 3471000 + }, + { + "epoch": 17.2, + "learning_rate": 4.14035859554208e-05, + "loss": 2.3688, + "step": 3471500 + }, + { + "epoch": 17.2, + "learning_rate": 4.1402347368994715e-05, + "loss": 2.343, + "step": 3472000 + }, + { + "epoch": 17.2, + "learning_rate": 4.140110878256863e-05, + "loss": 2.364, + "step": 3472500 + }, + { + "epoch": 17.21, + "learning_rate": 4.139987019614255e-05, + "loss": 2.3704, + "step": 3473000 + }, + { + "epoch": 17.21, + "learning_rate": 4.1398631609716465e-05, + "loss": 2.3474, + "step": 3473500 + }, + { + "epoch": 17.21, + "learning_rate": 4.1397395500463234e-05, + "loss": 2.3439, + "step": 3474000 + }, + { + "epoch": 17.21, + "learning_rate": 4.139615691403715e-05, + "loss": 2.3453, + "step": 3474500 + }, + { + "epoch": 17.22, + "learning_rate": 4.139491832761107e-05, + "loss": 2.3463, + "step": 3475000 + }, + { + "epoch": 17.22, + "learning_rate": 4.1393679741184985e-05, + "loss": 2.3682, + "step": 3475500 + }, + { + "epoch": 17.22, + "learning_rate": 4.1392441154758895e-05, + "loss": 2.3424, + "step": 3476000 + }, + { + "epoch": 17.22, + "learning_rate": 4.139120256833281e-05, + "loss": 2.3605, + "step": 3476500 + }, + { + "epoch": 17.23, + "learning_rate": 4.138996398190673e-05, + "loss": 2.3513, + "step": 3477000 + }, + { + "epoch": 17.23, + "learning_rate": 4.13887278726535e-05, + "loss": 2.3756, + "step": 3477500 + }, + { + "epoch": 17.23, + "learning_rate": 4.1387489286227415e-05, + "loss": 2.3382, + "step": 3478000 + }, + { + "epoch": 17.23, + "learning_rate": 4.138625069980133e-05, + "loss": 2.3326, + "step": 3478500 + }, + { + "epoch": 17.24, + "learning_rate": 4.138501211337525e-05, + "loss": 2.3489, + "step": 3479000 + }, + { + "epoch": 17.24, + "learning_rate": 4.1383773526949166e-05, + "loss": 2.3513, + "step": 3479500 + }, + { + "epoch": 17.24, + "learning_rate": 4.1382537417695934e-05, + "loss": 2.3649, + "step": 3480000 + }, + { + "epoch": 17.24, + "learning_rate": 4.13813013084427e-05, + "loss": 2.3805, + "step": 3480500 + }, + { + "epoch": 17.25, + "learning_rate": 4.138006272201662e-05, + "loss": 2.3487, + "step": 3481000 + }, + { + "epoch": 17.25, + "learning_rate": 4.137882413559053e-05, + "loss": 2.3675, + "step": 3481500 + }, + { + "epoch": 17.25, + "learning_rate": 4.137758554916445e-05, + "loss": 2.3361, + "step": 3482000 + }, + { + "epoch": 17.25, + "learning_rate": 4.1376346962738364e-05, + "loss": 2.3631, + "step": 3482500 + }, + { + "epoch": 17.26, + "learning_rate": 4.137510837631228e-05, + "loss": 2.3593, + "step": 3483000 + }, + { + "epoch": 17.26, + "learning_rate": 4.13738697898862e-05, + "loss": 2.3565, + "step": 3483500 + }, + { + "epoch": 17.26, + "learning_rate": 4.1372631203460115e-05, + "loss": 2.353, + "step": 3484000 + }, + { + "epoch": 17.26, + "learning_rate": 4.137139261703403e-05, + "loss": 2.3434, + "step": 3484500 + }, + { + "epoch": 17.27, + "learning_rate": 4.137015403060795e-05, + "loss": 2.3584, + "step": 3485000 + }, + { + "epoch": 17.27, + "learning_rate": 4.1368915444181866e-05, + "loss": 2.3592, + "step": 3485500 + }, + { + "epoch": 17.27, + "learning_rate": 4.136767685775578e-05, + "loss": 2.3616, + "step": 3486000 + }, + { + "epoch": 17.27, + "learning_rate": 4.13664382713297e-05, + "loss": 2.361, + "step": 3486500 + }, + { + "epoch": 17.28, + "learning_rate": 4.1365199684903617e-05, + "loss": 2.3554, + "step": 3487000 + }, + { + "epoch": 17.28, + "learning_rate": 4.1363961098477533e-05, + "loss": 2.3607, + "step": 3487500 + }, + { + "epoch": 17.28, + "learning_rate": 4.136272251205145e-05, + "loss": 2.3504, + "step": 3488000 + }, + { + "epoch": 17.28, + "learning_rate": 4.136148640279822e-05, + "loss": 2.3489, + "step": 3488500 + }, + { + "epoch": 17.29, + "learning_rate": 4.1360247816372136e-05, + "loss": 2.3515, + "step": 3489000 + }, + { + "epoch": 17.29, + "learning_rate": 4.1359009229946046e-05, + "loss": 2.3622, + "step": 3489500 + }, + { + "epoch": 17.29, + "learning_rate": 4.1357773120692815e-05, + "loss": 2.366, + "step": 3490000 + }, + { + "epoch": 17.29, + "learning_rate": 4.135653453426673e-05, + "loss": 2.3397, + "step": 3490500 + }, + { + "epoch": 17.3, + "learning_rate": 4.135529594784065e-05, + "loss": 2.3427, + "step": 3491000 + }, + { + "epoch": 17.3, + "learning_rate": 4.1354057361414566e-05, + "loss": 2.3803, + "step": 3491500 + }, + { + "epoch": 17.3, + "learning_rate": 4.135281877498848e-05, + "loss": 2.3359, + "step": 3492000 + }, + { + "epoch": 17.3, + "learning_rate": 4.13515801885624e-05, + "loss": 2.3294, + "step": 3492500 + }, + { + "epoch": 17.31, + "learning_rate": 4.135034160213632e-05, + "loss": 2.3517, + "step": 3493000 + }, + { + "epoch": 17.31, + "learning_rate": 4.1349103015710234e-05, + "loss": 2.3769, + "step": 3493500 + }, + { + "epoch": 17.31, + "learning_rate": 4.134786442928415e-05, + "loss": 2.388, + "step": 3494000 + }, + { + "epoch": 17.31, + "learning_rate": 4.134662584285807e-05, + "loss": 2.3651, + "step": 3494500 + }, + { + "epoch": 17.32, + "learning_rate": 4.1345387256431984e-05, + "loss": 2.3778, + "step": 3495000 + }, + { + "epoch": 17.32, + "learning_rate": 4.13441486700059e-05, + "loss": 2.3529, + "step": 3495500 + }, + { + "epoch": 17.32, + "learning_rate": 4.134291008357982e-05, + "loss": 2.3574, + "step": 3496000 + }, + { + "epoch": 17.32, + "learning_rate": 4.134167149715373e-05, + "loss": 2.3382, + "step": 3496500 + }, + { + "epoch": 17.33, + "learning_rate": 4.13404353879005e-05, + "loss": 2.3456, + "step": 3497000 + }, + { + "epoch": 17.33, + "learning_rate": 4.1339196801474414e-05, + "loss": 2.3635, + "step": 3497500 + }, + { + "epoch": 17.33, + "learning_rate": 4.133795821504833e-05, + "loss": 2.3514, + "step": 3498000 + }, + { + "epoch": 17.33, + "learning_rate": 4.133671962862225e-05, + "loss": 2.352, + "step": 3498500 + }, + { + "epoch": 17.34, + "learning_rate": 4.1335481042196165e-05, + "loss": 2.3373, + "step": 3499000 + }, + { + "epoch": 17.34, + "learning_rate": 4.1334242455770075e-05, + "loss": 2.3498, + "step": 3499500 + }, + { + "epoch": 17.34, + "learning_rate": 4.133300386934399e-05, + "loss": 2.3483, + "step": 3500000 + }, + { + "epoch": 17.34, + "learning_rate": 4.133176528291791e-05, + "loss": 2.3468, + "step": 3500500 + }, + { + "epoch": 17.35, + "learning_rate": 4.1330526696491826e-05, + "loss": 2.3388, + "step": 3501000 + }, + { + "epoch": 17.35, + "learning_rate": 4.132928811006574e-05, + "loss": 2.3492, + "step": 3501500 + }, + { + "epoch": 17.35, + "learning_rate": 4.132805200081252e-05, + "loss": 2.3273, + "step": 3502000 + }, + { + "epoch": 17.35, + "learning_rate": 4.132681341438643e-05, + "loss": 2.3493, + "step": 3502500 + }, + { + "epoch": 17.36, + "learning_rate": 4.132557978230605e-05, + "loss": 2.3476, + "step": 3503000 + }, + { + "epoch": 17.36, + "learning_rate": 4.1324341195879966e-05, + "loss": 2.3567, + "step": 3503500 + }, + { + "epoch": 17.36, + "learning_rate": 4.1323105086626735e-05, + "loss": 2.3562, + "step": 3504000 + }, + { + "epoch": 17.36, + "learning_rate": 4.132186650020065e-05, + "loss": 2.3567, + "step": 3504500 + }, + { + "epoch": 17.36, + "learning_rate": 4.132062791377457e-05, + "loss": 2.3373, + "step": 3505000 + }, + { + "epoch": 17.37, + "learning_rate": 4.1319389327348486e-05, + "loss": 2.3705, + "step": 3505500 + }, + { + "epoch": 17.37, + "learning_rate": 4.13181507409224e-05, + "loss": 2.36, + "step": 3506000 + }, + { + "epoch": 17.37, + "learning_rate": 4.131691215449632e-05, + "loss": 2.3511, + "step": 3506500 + }, + { + "epoch": 17.37, + "learning_rate": 4.1315673568070236e-05, + "loss": 2.3651, + "step": 3507000 + }, + { + "epoch": 17.38, + "learning_rate": 4.1314434981644153e-05, + "loss": 2.358, + "step": 3507500 + }, + { + "epoch": 17.38, + "learning_rate": 4.1313198872390915e-05, + "loss": 2.3668, + "step": 3508000 + }, + { + "epoch": 17.38, + "learning_rate": 4.131196028596483e-05, + "loss": 2.3245, + "step": 3508500 + }, + { + "epoch": 17.38, + "learning_rate": 4.131072169953875e-05, + "loss": 2.3623, + "step": 3509000 + }, + { + "epoch": 17.39, + "learning_rate": 4.1309483113112666e-05, + "loss": 2.3565, + "step": 3509500 + }, + { + "epoch": 17.39, + "learning_rate": 4.130824452668658e-05, + "loss": 2.3437, + "step": 3510000 + }, + { + "epoch": 17.39, + "learning_rate": 4.13070059402605e-05, + "loss": 2.3466, + "step": 3510500 + }, + { + "epoch": 17.39, + "learning_rate": 4.130576735383442e-05, + "loss": 2.383, + "step": 3511000 + }, + { + "epoch": 17.4, + "learning_rate": 4.1304528767408334e-05, + "loss": 2.3689, + "step": 3511500 + }, + { + "epoch": 17.4, + "learning_rate": 4.130329018098225e-05, + "loss": 2.3632, + "step": 3512000 + }, + { + "epoch": 17.4, + "learning_rate": 4.130205159455617e-05, + "loss": 2.3213, + "step": 3512500 + }, + { + "epoch": 17.4, + "learning_rate": 4.1300813008130085e-05, + "loss": 2.3577, + "step": 3513000 + }, + { + "epoch": 17.41, + "learning_rate": 4.1299576898876854e-05, + "loss": 2.3611, + "step": 3513500 + }, + { + "epoch": 17.41, + "learning_rate": 4.129833831245077e-05, + "loss": 2.3616, + "step": 3514000 + }, + { + "epoch": 17.41, + "learning_rate": 4.129709972602469e-05, + "loss": 2.3532, + "step": 3514500 + }, + { + "epoch": 17.41, + "learning_rate": 4.1295861139598604e-05, + "loss": 2.3241, + "step": 3515000 + }, + { + "epoch": 17.42, + "learning_rate": 4.129462255317252e-05, + "loss": 2.3725, + "step": 3515500 + }, + { + "epoch": 17.42, + "learning_rate": 4.129338396674644e-05, + "loss": 2.3333, + "step": 3516000 + }, + { + "epoch": 17.42, + "learning_rate": 4.1292145380320355e-05, + "loss": 2.3379, + "step": 3516500 + }, + { + "epoch": 17.42, + "learning_rate": 4.129090927106712e-05, + "loss": 2.3909, + "step": 3517000 + }, + { + "epoch": 17.43, + "learning_rate": 4.1289670684641034e-05, + "loss": 2.3664, + "step": 3517500 + }, + { + "epoch": 17.43, + "learning_rate": 4.128843209821495e-05, + "loss": 2.3661, + "step": 3518000 + }, + { + "epoch": 17.43, + "learning_rate": 4.128719351178887e-05, + "loss": 2.3586, + "step": 3518500 + }, + { + "epoch": 17.43, + "learning_rate": 4.1285954925362785e-05, + "loss": 2.3723, + "step": 3519000 + }, + { + "epoch": 17.44, + "learning_rate": 4.12847163389367e-05, + "loss": 2.3486, + "step": 3519500 + }, + { + "epoch": 17.44, + "learning_rate": 4.128347775251062e-05, + "loss": 2.3571, + "step": 3520000 + }, + { + "epoch": 17.44, + "learning_rate": 4.128224164325739e-05, + "loss": 2.3583, + "step": 3520500 + }, + { + "epoch": 17.44, + "learning_rate": 4.1281003056831304e-05, + "loss": 2.4005, + "step": 3521000 + }, + { + "epoch": 17.45, + "learning_rate": 4.127976447040522e-05, + "loss": 2.3775, + "step": 3521500 + }, + { + "epoch": 17.45, + "learning_rate": 4.127852588397914e-05, + "loss": 2.3687, + "step": 3522000 + }, + { + "epoch": 17.45, + "learning_rate": 4.1277287297553055e-05, + "loss": 2.3569, + "step": 3522500 + }, + { + "epoch": 17.45, + "learning_rate": 4.127605118829982e-05, + "loss": 2.3676, + "step": 3523000 + }, + { + "epoch": 17.46, + "learning_rate": 4.1274815079046586e-05, + "loss": 2.3535, + "step": 3523500 + }, + { + "epoch": 17.46, + "learning_rate": 4.12735764926205e-05, + "loss": 2.368, + "step": 3524000 + }, + { + "epoch": 17.46, + "learning_rate": 4.127233790619442e-05, + "loss": 2.3668, + "step": 3524500 + }, + { + "epoch": 17.46, + "learning_rate": 4.127109931976834e-05, + "loss": 2.3525, + "step": 3525000 + }, + { + "epoch": 17.47, + "learning_rate": 4.1269860733342254e-05, + "loss": 2.3639, + "step": 3525500 + }, + { + "epoch": 17.47, + "learning_rate": 4.126862214691617e-05, + "loss": 2.3634, + "step": 3526000 + }, + { + "epoch": 17.47, + "learning_rate": 4.126738603766294e-05, + "loss": 2.3497, + "step": 3526500 + }, + { + "epoch": 17.47, + "learning_rate": 4.126614992840971e-05, + "loss": 2.3586, + "step": 3527000 + }, + { + "epoch": 17.48, + "learning_rate": 4.1264911341983625e-05, + "loss": 2.3447, + "step": 3527500 + }, + { + "epoch": 17.48, + "learning_rate": 4.126367275555754e-05, + "loss": 2.3595, + "step": 3528000 + }, + { + "epoch": 17.48, + "learning_rate": 4.126243416913145e-05, + "loss": 2.3406, + "step": 3528500 + }, + { + "epoch": 17.48, + "learning_rate": 4.126119805987823e-05, + "loss": 2.3242, + "step": 3529000 + }, + { + "epoch": 17.49, + "learning_rate": 4.1259959473452145e-05, + "loss": 2.3484, + "step": 3529500 + }, + { + "epoch": 17.49, + "learning_rate": 4.125872336419891e-05, + "loss": 2.3547, + "step": 3530000 + }, + { + "epoch": 17.49, + "learning_rate": 4.1257484777772824e-05, + "loss": 2.3682, + "step": 3530500 + }, + { + "epoch": 17.49, + "learning_rate": 4.125624619134674e-05, + "loss": 2.352, + "step": 3531000 + }, + { + "epoch": 17.5, + "learning_rate": 4.125500760492066e-05, + "loss": 2.3719, + "step": 3531500 + }, + { + "epoch": 17.5, + "learning_rate": 4.1253769018494575e-05, + "loss": 2.3395, + "step": 3532000 + }, + { + "epoch": 17.5, + "learning_rate": 4.125253043206849e-05, + "loss": 2.3595, + "step": 3532500 + }, + { + "epoch": 17.5, + "learning_rate": 4.125129184564241e-05, + "loss": 2.3707, + "step": 3533000 + }, + { + "epoch": 17.51, + "learning_rate": 4.1250053259216325e-05, + "loss": 2.3691, + "step": 3533500 + }, + { + "epoch": 17.51, + "learning_rate": 4.124881467279024e-05, + "loss": 2.3668, + "step": 3534000 + }, + { + "epoch": 17.51, + "learning_rate": 4.124757608636415e-05, + "loss": 2.3482, + "step": 3534500 + }, + { + "epoch": 17.51, + "learning_rate": 4.124633749993807e-05, + "loss": 2.3422, + "step": 3535000 + }, + { + "epoch": 17.52, + "learning_rate": 4.1245098913511986e-05, + "loss": 2.3515, + "step": 3535500 + }, + { + "epoch": 17.52, + "learning_rate": 4.12438603270859e-05, + "loss": 2.3495, + "step": 3536000 + }, + { + "epoch": 17.52, + "learning_rate": 4.124262174065982e-05, + "loss": 2.3579, + "step": 3536500 + }, + { + "epoch": 17.52, + "learning_rate": 4.124138315423374e-05, + "loss": 2.3655, + "step": 3537000 + }, + { + "epoch": 17.53, + "learning_rate": 4.1240144567807654e-05, + "loss": 2.3543, + "step": 3537500 + }, + { + "epoch": 17.53, + "learning_rate": 4.123890598138157e-05, + "loss": 2.3707, + "step": 3538000 + }, + { + "epoch": 17.53, + "learning_rate": 4.123766987212834e-05, + "loss": 2.3427, + "step": 3538500 + }, + { + "epoch": 17.53, + "learning_rate": 4.123643128570226e-05, + "loss": 2.3651, + "step": 3539000 + }, + { + "epoch": 17.54, + "learning_rate": 4.1235192699276174e-05, + "loss": 2.3747, + "step": 3539500 + }, + { + "epoch": 17.54, + "learning_rate": 4.123395411285009e-05, + "loss": 2.3532, + "step": 3540000 + }, + { + "epoch": 17.54, + "learning_rate": 4.1232715526424e-05, + "loss": 2.3638, + "step": 3540500 + }, + { + "epoch": 17.54, + "learning_rate": 4.123147941717077e-05, + "loss": 2.3649, + "step": 3541000 + }, + { + "epoch": 17.55, + "learning_rate": 4.1230240830744686e-05, + "loss": 2.3551, + "step": 3541500 + }, + { + "epoch": 17.55, + "learning_rate": 4.12290022443186e-05, + "loss": 2.3676, + "step": 3542000 + }, + { + "epoch": 17.55, + "learning_rate": 4.122776365789252e-05, + "loss": 2.3658, + "step": 3542500 + }, + { + "epoch": 17.55, + "learning_rate": 4.122652507146644e-05, + "loss": 2.3553, + "step": 3543000 + }, + { + "epoch": 17.56, + "learning_rate": 4.1225286485040354e-05, + "loss": 2.3745, + "step": 3543500 + }, + { + "epoch": 17.56, + "learning_rate": 4.122405037578712e-05, + "loss": 2.3211, + "step": 3544000 + }, + { + "epoch": 17.56, + "learning_rate": 4.122281178936104e-05, + "loss": 2.3367, + "step": 3544500 + }, + { + "epoch": 17.56, + "learning_rate": 4.122157320293496e-05, + "loss": 2.3815, + "step": 3545000 + }, + { + "epoch": 17.57, + "learning_rate": 4.1220334616508874e-05, + "loss": 2.3569, + "step": 3545500 + }, + { + "epoch": 17.57, + "learning_rate": 4.121909603008279e-05, + "loss": 2.3516, + "step": 3546000 + }, + { + "epoch": 17.57, + "learning_rate": 4.121785992082956e-05, + "loss": 2.3911, + "step": 3546500 + }, + { + "epoch": 17.57, + "learning_rate": 4.121662133440347e-05, + "loss": 2.3477, + "step": 3547000 + }, + { + "epoch": 17.58, + "learning_rate": 4.1215382747977387e-05, + "loss": 2.3665, + "step": 3547500 + }, + { + "epoch": 17.58, + "learning_rate": 4.1214144161551304e-05, + "loss": 2.3727, + "step": 3548000 + }, + { + "epoch": 17.58, + "learning_rate": 4.121290557512522e-05, + "loss": 2.3339, + "step": 3548500 + }, + { + "epoch": 17.58, + "learning_rate": 4.121166698869914e-05, + "loss": 2.391, + "step": 3549000 + }, + { + "epoch": 17.59, + "learning_rate": 4.1210428402273054e-05, + "loss": 2.3653, + "step": 3549500 + }, + { + "epoch": 17.59, + "learning_rate": 4.120918981584697e-05, + "loss": 2.3585, + "step": 3550000 + }, + { + "epoch": 17.59, + "learning_rate": 4.120795122942089e-05, + "loss": 2.3839, + "step": 3550500 + }, + { + "epoch": 17.59, + "learning_rate": 4.1206712642994805e-05, + "loss": 2.3457, + "step": 3551000 + }, + { + "epoch": 17.6, + "learning_rate": 4.120547405656872e-05, + "loss": 2.3383, + "step": 3551500 + }, + { + "epoch": 17.6, + "learning_rate": 4.120423547014264e-05, + "loss": 2.3665, + "step": 3552000 + }, + { + "epoch": 17.6, + "learning_rate": 4.1202996883716556e-05, + "loss": 2.3762, + "step": 3552500 + }, + { + "epoch": 17.6, + "learning_rate": 4.120175829729047e-05, + "loss": 2.364, + "step": 3553000 + }, + { + "epoch": 17.61, + "learning_rate": 4.120051971086439e-05, + "loss": 2.3739, + "step": 3553500 + }, + { + "epoch": 17.61, + "learning_rate": 4.119928360161115e-05, + "loss": 2.3595, + "step": 3554000 + }, + { + "epoch": 17.61, + "learning_rate": 4.119804501518507e-05, + "loss": 2.3607, + "step": 3554500 + }, + { + "epoch": 17.61, + "learning_rate": 4.1196806428758986e-05, + "loss": 2.3675, + "step": 3555000 + }, + { + "epoch": 17.62, + "learning_rate": 4.11955678423329e-05, + "loss": 2.3651, + "step": 3555500 + }, + { + "epoch": 17.62, + "learning_rate": 4.119432925590682e-05, + "loss": 2.3478, + "step": 3556000 + }, + { + "epoch": 17.62, + "learning_rate": 4.1193090669480736e-05, + "loss": 2.3354, + "step": 3556500 + }, + { + "epoch": 17.62, + "learning_rate": 4.119185208305465e-05, + "loss": 2.3537, + "step": 3557000 + }, + { + "epoch": 17.63, + "learning_rate": 4.1190618450974274e-05, + "loss": 2.3613, + "step": 3557500 + }, + { + "epoch": 17.63, + "learning_rate": 4.118937986454819e-05, + "loss": 2.3725, + "step": 3558000 + }, + { + "epoch": 17.63, + "learning_rate": 4.118814127812211e-05, + "loss": 2.3538, + "step": 3558500 + }, + { + "epoch": 17.63, + "learning_rate": 4.1186902691696025e-05, + "loss": 2.3833, + "step": 3559000 + }, + { + "epoch": 17.63, + "learning_rate": 4.118566410526994e-05, + "loss": 2.3447, + "step": 3559500 + }, + { + "epoch": 17.64, + "learning_rate": 4.118442551884386e-05, + "loss": 2.3642, + "step": 3560000 + }, + { + "epoch": 17.64, + "learning_rate": 4.118318693241777e-05, + "loss": 2.3562, + "step": 3560500 + }, + { + "epoch": 17.64, + "learning_rate": 4.118195082316454e-05, + "loss": 2.3684, + "step": 3561000 + }, + { + "epoch": 17.64, + "learning_rate": 4.118071471391131e-05, + "loss": 2.376, + "step": 3561500 + }, + { + "epoch": 17.65, + "learning_rate": 4.117947612748523e-05, + "loss": 2.3549, + "step": 3562000 + }, + { + "epoch": 17.65, + "learning_rate": 4.117823754105914e-05, + "loss": 2.3716, + "step": 3562500 + }, + { + "epoch": 17.65, + "learning_rate": 4.117699895463306e-05, + "loss": 2.3731, + "step": 3563000 + }, + { + "epoch": 17.65, + "learning_rate": 4.1175760368206974e-05, + "loss": 2.3784, + "step": 3563500 + }, + { + "epoch": 17.66, + "learning_rate": 4.117452425895374e-05, + "loss": 2.375, + "step": 3564000 + }, + { + "epoch": 17.66, + "learning_rate": 4.117328567252766e-05, + "loss": 2.3604, + "step": 3564500 + }, + { + "epoch": 17.66, + "learning_rate": 4.117204708610158e-05, + "loss": 2.3685, + "step": 3565000 + }, + { + "epoch": 17.66, + "learning_rate": 4.1170808499675494e-05, + "loss": 2.365, + "step": 3565500 + }, + { + "epoch": 17.67, + "learning_rate": 4.1169569913249404e-05, + "loss": 2.3495, + "step": 3566000 + }, + { + "epoch": 17.67, + "learning_rate": 4.116833132682332e-05, + "loss": 2.3564, + "step": 3566500 + }, + { + "epoch": 17.67, + "learning_rate": 4.116709274039724e-05, + "loss": 2.3713, + "step": 3567000 + }, + { + "epoch": 17.67, + "learning_rate": 4.1165854153971155e-05, + "loss": 2.3543, + "step": 3567500 + }, + { + "epoch": 17.68, + "learning_rate": 4.116461556754507e-05, + "loss": 2.3739, + "step": 3568000 + }, + { + "epoch": 17.68, + "learning_rate": 4.116337945829185e-05, + "loss": 2.3615, + "step": 3568500 + }, + { + "epoch": 17.68, + "learning_rate": 4.116214087186576e-05, + "loss": 2.3605, + "step": 3569000 + }, + { + "epoch": 17.68, + "learning_rate": 4.1160902285439674e-05, + "loss": 2.3517, + "step": 3569500 + }, + { + "epoch": 17.69, + "learning_rate": 4.115966369901359e-05, + "loss": 2.3571, + "step": 3570000 + }, + { + "epoch": 17.69, + "learning_rate": 4.115842511258751e-05, + "loss": 2.3669, + "step": 3570500 + }, + { + "epoch": 17.69, + "learning_rate": 4.115718900333428e-05, + "loss": 2.377, + "step": 3571000 + }, + { + "epoch": 17.69, + "learning_rate": 4.1155950416908194e-05, + "loss": 2.3674, + "step": 3571500 + }, + { + "epoch": 17.7, + "learning_rate": 4.1154711830482104e-05, + "loss": 2.3401, + "step": 3572000 + }, + { + "epoch": 17.7, + "learning_rate": 4.115347572122888e-05, + "loss": 2.3685, + "step": 3572500 + }, + { + "epoch": 17.7, + "learning_rate": 4.1152237134802797e-05, + "loss": 2.3623, + "step": 3573000 + }, + { + "epoch": 17.7, + "learning_rate": 4.1150998548376713e-05, + "loss": 2.3706, + "step": 3573500 + }, + { + "epoch": 17.71, + "learning_rate": 4.114975996195063e-05, + "loss": 2.3436, + "step": 3574000 + }, + { + "epoch": 17.71, + "learning_rate": 4.114852137552455e-05, + "loss": 2.386, + "step": 3574500 + }, + { + "epoch": 17.71, + "learning_rate": 4.1147282789098464e-05, + "loss": 2.3572, + "step": 3575000 + }, + { + "epoch": 17.71, + "learning_rate": 4.1146044202672374e-05, + "loss": 2.3548, + "step": 3575500 + }, + { + "epoch": 17.72, + "learning_rate": 4.114480561624629e-05, + "loss": 2.3364, + "step": 3576000 + }, + { + "epoch": 17.72, + "learning_rate": 4.114356702982021e-05, + "loss": 2.3401, + "step": 3576500 + }, + { + "epoch": 17.72, + "learning_rate": 4.1142328443394125e-05, + "loss": 2.3559, + "step": 3577000 + }, + { + "epoch": 17.72, + "learning_rate": 4.1141094811313746e-05, + "loss": 2.3307, + "step": 3577500 + }, + { + "epoch": 17.73, + "learning_rate": 4.113985622488766e-05, + "loss": 2.3549, + "step": 3578000 + }, + { + "epoch": 17.73, + "learning_rate": 4.113861763846158e-05, + "loss": 2.3539, + "step": 3578500 + }, + { + "epoch": 17.73, + "learning_rate": 4.11373790520355e-05, + "loss": 2.3734, + "step": 3579000 + }, + { + "epoch": 17.73, + "learning_rate": 4.1136140465609414e-05, + "loss": 2.3802, + "step": 3579500 + }, + { + "epoch": 17.74, + "learning_rate": 4.113490187918333e-05, + "loss": 2.3796, + "step": 3580000 + }, + { + "epoch": 17.74, + "learning_rate": 4.113366329275725e-05, + "loss": 2.3417, + "step": 3580500 + }, + { + "epoch": 17.74, + "learning_rate": 4.1132424706331164e-05, + "loss": 2.3862, + "step": 3581000 + }, + { + "epoch": 17.74, + "learning_rate": 4.1131186119905075e-05, + "loss": 2.3848, + "step": 3581500 + }, + { + "epoch": 17.75, + "learning_rate": 4.112994753347899e-05, + "loss": 2.3474, + "step": 3582000 + }, + { + "epoch": 17.75, + "learning_rate": 4.112870894705291e-05, + "loss": 2.3563, + "step": 3582500 + }, + { + "epoch": 17.75, + "learning_rate": 4.1127470360626825e-05, + "loss": 2.3501, + "step": 3583000 + }, + { + "epoch": 17.75, + "learning_rate": 4.112623177420074e-05, + "loss": 2.3553, + "step": 3583500 + }, + { + "epoch": 17.76, + "learning_rate": 4.112499566494751e-05, + "loss": 2.3721, + "step": 3584000 + }, + { + "epoch": 17.76, + "learning_rate": 4.112375707852142e-05, + "loss": 2.3584, + "step": 3584500 + }, + { + "epoch": 17.76, + "learning_rate": 4.112251849209534e-05, + "loss": 2.3551, + "step": 3585000 + }, + { + "epoch": 17.76, + "learning_rate": 4.1121279905669255e-05, + "loss": 2.3418, + "step": 3585500 + }, + { + "epoch": 17.77, + "learning_rate": 4.112004379641603e-05, + "loss": 2.3532, + "step": 3586000 + }, + { + "epoch": 17.77, + "learning_rate": 4.111880520998995e-05, + "loss": 2.3619, + "step": 3586500 + }, + { + "epoch": 17.77, + "learning_rate": 4.1117566623563864e-05, + "loss": 2.3691, + "step": 3587000 + }, + { + "epoch": 17.77, + "learning_rate": 4.111632803713778e-05, + "loss": 2.3613, + "step": 3587500 + }, + { + "epoch": 17.78, + "learning_rate": 4.111508945071169e-05, + "loss": 2.3628, + "step": 3588000 + }, + { + "epoch": 17.78, + "learning_rate": 4.111385581863131e-05, + "loss": 2.3733, + "step": 3588500 + }, + { + "epoch": 17.78, + "learning_rate": 4.111261723220523e-05, + "loss": 2.35, + "step": 3589000 + }, + { + "epoch": 17.78, + "learning_rate": 4.1111378645779146e-05, + "loss": 2.3595, + "step": 3589500 + }, + { + "epoch": 17.79, + "learning_rate": 4.111014005935306e-05, + "loss": 2.3707, + "step": 3590000 + }, + { + "epoch": 17.79, + "learning_rate": 4.110890147292698e-05, + "loss": 2.3446, + "step": 3590500 + }, + { + "epoch": 17.79, + "learning_rate": 4.11076628865009e-05, + "loss": 2.3831, + "step": 3591000 + }, + { + "epoch": 17.79, + "learning_rate": 4.1106424300074814e-05, + "loss": 2.3779, + "step": 3591500 + }, + { + "epoch": 17.8, + "learning_rate": 4.110518571364873e-05, + "loss": 2.3586, + "step": 3592000 + }, + { + "epoch": 17.8, + "learning_rate": 4.11039496043955e-05, + "loss": 2.3604, + "step": 3592500 + }, + { + "epoch": 17.8, + "learning_rate": 4.1102711017969416e-05, + "loss": 2.3887, + "step": 3593000 + }, + { + "epoch": 17.8, + "learning_rate": 4.1101472431543333e-05, + "loss": 2.3616, + "step": 3593500 + }, + { + "epoch": 17.81, + "learning_rate": 4.110023384511725e-05, + "loss": 2.3608, + "step": 3594000 + }, + { + "epoch": 17.81, + "learning_rate": 4.109899773586401e-05, + "loss": 2.3625, + "step": 3594500 + }, + { + "epoch": 17.81, + "learning_rate": 4.109775914943793e-05, + "loss": 2.3792, + "step": 3595000 + }, + { + "epoch": 17.81, + "learning_rate": 4.1096520563011846e-05, + "loss": 2.3811, + "step": 3595500 + }, + { + "epoch": 17.82, + "learning_rate": 4.109528197658576e-05, + "loss": 2.3449, + "step": 3596000 + }, + { + "epoch": 17.82, + "learning_rate": 4.109404339015968e-05, + "loss": 2.3525, + "step": 3596500 + }, + { + "epoch": 17.82, + "learning_rate": 4.10928048037336e-05, + "loss": 2.365, + "step": 3597000 + }, + { + "epoch": 17.82, + "learning_rate": 4.1091568694480366e-05, + "loss": 2.3526, + "step": 3597500 + }, + { + "epoch": 17.83, + "learning_rate": 4.109033010805428e-05, + "loss": 2.3738, + "step": 3598000 + }, + { + "epoch": 17.83, + "learning_rate": 4.10890915216282e-05, + "loss": 2.3432, + "step": 3598500 + }, + { + "epoch": 17.83, + "learning_rate": 4.1087852935202117e-05, + "loss": 2.3489, + "step": 3599000 + }, + { + "epoch": 17.83, + "learning_rate": 4.1086614348776034e-05, + "loss": 2.3774, + "step": 3599500 + }, + { + "epoch": 17.84, + "learning_rate": 4.108537576234995e-05, + "loss": 2.3676, + "step": 3600000 + }, + { + "epoch": 17.84, + "learning_rate": 4.108413717592387e-05, + "loss": 2.3776, + "step": 3600500 + }, + { + "epoch": 17.84, + "learning_rate": 4.1082898589497784e-05, + "loss": 2.3718, + "step": 3601000 + }, + { + "epoch": 17.84, + "learning_rate": 4.10816649574174e-05, + "loss": 2.3682, + "step": 3601500 + }, + { + "epoch": 17.85, + "learning_rate": 4.1080426370991315e-05, + "loss": 2.376, + "step": 3602000 + }, + { + "epoch": 17.85, + "learning_rate": 4.107919026173809e-05, + "loss": 2.3763, + "step": 3602500 + }, + { + "epoch": 17.85, + "learning_rate": 4.1077951675312e-05, + "loss": 2.3482, + "step": 3603000 + }, + { + "epoch": 17.85, + "learning_rate": 4.107671308888592e-05, + "loss": 2.3469, + "step": 3603500 + }, + { + "epoch": 17.86, + "learning_rate": 4.1075474502459835e-05, + "loss": 2.3561, + "step": 3604000 + }, + { + "epoch": 17.86, + "learning_rate": 4.107423591603375e-05, + "loss": 2.3922, + "step": 3604500 + }, + { + "epoch": 17.86, + "learning_rate": 4.107299732960767e-05, + "loss": 2.3394, + "step": 3605000 + }, + { + "epoch": 17.86, + "learning_rate": 4.107175874318158e-05, + "loss": 2.3683, + "step": 3605500 + }, + { + "epoch": 17.87, + "learning_rate": 4.1070520156755496e-05, + "loss": 2.3565, + "step": 3606000 + }, + { + "epoch": 17.87, + "learning_rate": 4.106928157032941e-05, + "loss": 2.3693, + "step": 3606500 + }, + { + "epoch": 17.87, + "learning_rate": 4.106804298390333e-05, + "loss": 2.3589, + "step": 3607000 + }, + { + "epoch": 17.87, + "learning_rate": 4.1066804397477246e-05, + "loss": 2.3629, + "step": 3607500 + }, + { + "epoch": 17.88, + "learning_rate": 4.1065565811051163e-05, + "loss": 2.3639, + "step": 3608000 + }, + { + "epoch": 17.88, + "learning_rate": 4.106432970179793e-05, + "loss": 2.3378, + "step": 3608500 + }, + { + "epoch": 17.88, + "learning_rate": 4.106309111537185e-05, + "loss": 2.3463, + "step": 3609000 + }, + { + "epoch": 17.88, + "learning_rate": 4.1061855006118625e-05, + "loss": 2.3587, + "step": 3609500 + }, + { + "epoch": 17.89, + "learning_rate": 4.1060616419692535e-05, + "loss": 2.3465, + "step": 3610000 + }, + { + "epoch": 17.89, + "learning_rate": 4.105937783326645e-05, + "loss": 2.3632, + "step": 3610500 + }, + { + "epoch": 17.89, + "learning_rate": 4.105813924684037e-05, + "loss": 2.3575, + "step": 3611000 + }, + { + "epoch": 17.89, + "learning_rate": 4.1056900660414286e-05, + "loss": 2.3853, + "step": 3611500 + }, + { + "epoch": 17.9, + "learning_rate": 4.1055662073988196e-05, + "loss": 2.3734, + "step": 3612000 + }, + { + "epoch": 17.9, + "learning_rate": 4.105442348756211e-05, + "loss": 2.3292, + "step": 3612500 + }, + { + "epoch": 17.9, + "learning_rate": 4.105318490113603e-05, + "loss": 2.3488, + "step": 3613000 + }, + { + "epoch": 17.9, + "learning_rate": 4.10519487918828e-05, + "loss": 2.3648, + "step": 3613500 + }, + { + "epoch": 17.91, + "learning_rate": 4.1050710205456715e-05, + "loss": 2.3435, + "step": 3614000 + }, + { + "epoch": 17.91, + "learning_rate": 4.104947161903063e-05, + "loss": 2.3709, + "step": 3614500 + }, + { + "epoch": 17.91, + "learning_rate": 4.104823303260455e-05, + "loss": 2.375, + "step": 3615000 + }, + { + "epoch": 17.91, + "learning_rate": 4.1046994446178466e-05, + "loss": 2.3707, + "step": 3615500 + }, + { + "epoch": 17.91, + "learning_rate": 4.104575585975238e-05, + "loss": 2.3493, + "step": 3616000 + }, + { + "epoch": 17.92, + "learning_rate": 4.10445172733263e-05, + "loss": 2.3454, + "step": 3616500 + }, + { + "epoch": 17.92, + "learning_rate": 4.104327868690022e-05, + "loss": 2.34, + "step": 3617000 + }, + { + "epoch": 17.92, + "learning_rate": 4.1042042577646986e-05, + "loss": 2.3497, + "step": 3617500 + }, + { + "epoch": 17.92, + "learning_rate": 4.1040806468393755e-05, + "loss": 2.3711, + "step": 3618000 + }, + { + "epoch": 17.93, + "learning_rate": 4.103956788196767e-05, + "loss": 2.3608, + "step": 3618500 + }, + { + "epoch": 17.93, + "learning_rate": 4.103832929554159e-05, + "loss": 2.3884, + "step": 3619000 + }, + { + "epoch": 17.93, + "learning_rate": 4.10370907091155e-05, + "loss": 2.3432, + "step": 3619500 + }, + { + "epoch": 17.93, + "learning_rate": 4.1035852122689416e-05, + "loss": 2.3545, + "step": 3620000 + }, + { + "epoch": 17.94, + "learning_rate": 4.103462096778189e-05, + "loss": 2.3426, + "step": 3620500 + }, + { + "epoch": 17.94, + "learning_rate": 4.1033382381355805e-05, + "loss": 2.3625, + "step": 3621000 + }, + { + "epoch": 17.94, + "learning_rate": 4.103214379492972e-05, + "loss": 2.3656, + "step": 3621500 + }, + { + "epoch": 17.94, + "learning_rate": 4.103090520850364e-05, + "loss": 2.3802, + "step": 3622000 + }, + { + "epoch": 17.95, + "learning_rate": 4.1029666622077556e-05, + "loss": 2.3537, + "step": 3622500 + }, + { + "epoch": 17.95, + "learning_rate": 4.102842803565147e-05, + "loss": 2.3691, + "step": 3623000 + }, + { + "epoch": 17.95, + "learning_rate": 4.102718944922539e-05, + "loss": 2.3754, + "step": 3623500 + }, + { + "epoch": 17.95, + "learning_rate": 4.1025950862799307e-05, + "loss": 2.3664, + "step": 3624000 + }, + { + "epoch": 17.96, + "learning_rate": 4.1024712276373224e-05, + "loss": 2.3663, + "step": 3624500 + }, + { + "epoch": 17.96, + "learning_rate": 4.102347368994714e-05, + "loss": 2.3661, + "step": 3625000 + }, + { + "epoch": 17.96, + "learning_rate": 4.102223510352106e-05, + "loss": 2.3567, + "step": 3625500 + }, + { + "epoch": 17.96, + "learning_rate": 4.1020998994267826e-05, + "loss": 2.3632, + "step": 3626000 + }, + { + "epoch": 17.97, + "learning_rate": 4.1019760407841736e-05, + "loss": 2.3703, + "step": 3626500 + }, + { + "epoch": 17.97, + "learning_rate": 4.101852182141565e-05, + "loss": 2.3692, + "step": 3627000 + }, + { + "epoch": 17.97, + "learning_rate": 4.101728323498957e-05, + "loss": 2.3708, + "step": 3627500 + }, + { + "epoch": 17.97, + "learning_rate": 4.101604464856349e-05, + "loss": 2.3665, + "step": 3628000 + }, + { + "epoch": 17.98, + "learning_rate": 4.1014806062137404e-05, + "loss": 2.3794, + "step": 3628500 + }, + { + "epoch": 17.98, + "learning_rate": 4.101356747571132e-05, + "loss": 2.3282, + "step": 3629000 + }, + { + "epoch": 17.98, + "learning_rate": 4.101232888928524e-05, + "loss": 2.3399, + "step": 3629500 + }, + { + "epoch": 17.98, + "learning_rate": 4.1011090302859155e-05, + "loss": 2.3481, + "step": 3630000 + }, + { + "epoch": 17.99, + "learning_rate": 4.1009854193605924e-05, + "loss": 2.3503, + "step": 3630500 + }, + { + "epoch": 17.99, + "learning_rate": 4.100861560717984e-05, + "loss": 2.3472, + "step": 3631000 + }, + { + "epoch": 17.99, + "learning_rate": 4.100737702075376e-05, + "loss": 2.3613, + "step": 3631500 + }, + { + "epoch": 17.99, + "learning_rate": 4.1006140911500526e-05, + "loss": 2.3675, + "step": 3632000 + }, + { + "epoch": 18.0, + "learning_rate": 4.100490232507444e-05, + "loss": 2.3705, + "step": 3632500 + }, + { + "epoch": 18.0, + "learning_rate": 4.100366373864836e-05, + "loss": 2.3473, + "step": 3633000 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.650584751161539, + "eval_accuracy_mlm": 0.6049741663885951, + "eval_accuracy_nsp": 0.8655666205154554, + "eval_loss": 2.37389874458313, + "eval_runtime": 145.8488, + "eval_samples_per_second": 1748.105, + "eval_steps_per_second": 72.843, + "step": 3633174 + }, + { + "epoch": 18.0, + "learning_rate": 4.100242515222227e-05, + "loss": 2.3426, + "step": 3633500 + }, + { + "epoch": 18.0, + "learning_rate": 4.100118904296904e-05, + "loss": 2.3235, + "step": 3634000 + }, + { + "epoch": 18.01, + "learning_rate": 4.0999950456542956e-05, + "loss": 2.3318, + "step": 3634500 + }, + { + "epoch": 18.01, + "learning_rate": 4.099871187011687e-05, + "loss": 2.3237, + "step": 3635000 + }, + { + "epoch": 18.01, + "learning_rate": 4.099747328369079e-05, + "loss": 2.3424, + "step": 3635500 + }, + { + "epoch": 18.01, + "learning_rate": 4.099623469726471e-05, + "loss": 2.3453, + "step": 3636000 + }, + { + "epoch": 18.02, + "learning_rate": 4.0994996110838624e-05, + "loss": 2.3344, + "step": 3636500 + }, + { + "epoch": 18.02, + "learning_rate": 4.099375752441254e-05, + "loss": 2.3098, + "step": 3637000 + }, + { + "epoch": 18.02, + "learning_rate": 4.099251893798646e-05, + "loss": 2.3288, + "step": 3637500 + }, + { + "epoch": 18.02, + "learning_rate": 4.0991282828733226e-05, + "loss": 2.3565, + "step": 3638000 + }, + { + "epoch": 18.03, + "learning_rate": 4.099004424230714e-05, + "loss": 2.3382, + "step": 3638500 + }, + { + "epoch": 18.03, + "learning_rate": 4.098880565588106e-05, + "loss": 2.3385, + "step": 3639000 + }, + { + "epoch": 18.03, + "learning_rate": 4.098756954662782e-05, + "loss": 2.3369, + "step": 3639500 + }, + { + "epoch": 18.03, + "learning_rate": 4.098633096020174e-05, + "loss": 2.3324, + "step": 3640000 + }, + { + "epoch": 18.04, + "learning_rate": 4.0985092373775656e-05, + "loss": 2.3385, + "step": 3640500 + }, + { + "epoch": 18.04, + "learning_rate": 4.098385378734957e-05, + "loss": 2.341, + "step": 3641000 + }, + { + "epoch": 18.04, + "learning_rate": 4.098261520092349e-05, + "loss": 2.3253, + "step": 3641500 + }, + { + "epoch": 18.04, + "learning_rate": 4.098137661449741e-05, + "loss": 2.3427, + "step": 3642000 + }, + { + "epoch": 18.05, + "learning_rate": 4.0980138028071324e-05, + "loss": 2.3467, + "step": 3642500 + }, + { + "epoch": 18.05, + "learning_rate": 4.097889944164524e-05, + "loss": 2.3307, + "step": 3643000 + }, + { + "epoch": 18.05, + "learning_rate": 4.097766333239201e-05, + "loss": 2.3384, + "step": 3643500 + }, + { + "epoch": 18.05, + "learning_rate": 4.0976424745965927e-05, + "loss": 2.3376, + "step": 3644000 + }, + { + "epoch": 18.06, + "learning_rate": 4.0975186159539843e-05, + "loss": 2.3253, + "step": 3644500 + }, + { + "epoch": 18.06, + "learning_rate": 4.097394757311376e-05, + "loss": 2.347, + "step": 3645000 + }, + { + "epoch": 18.06, + "learning_rate": 4.097270898668768e-05, + "loss": 2.3095, + "step": 3645500 + }, + { + "epoch": 18.06, + "learning_rate": 4.0971470400261594e-05, + "loss": 2.3029, + "step": 3646000 + }, + { + "epoch": 18.07, + "learning_rate": 4.097023181383551e-05, + "loss": 2.3188, + "step": 3646500 + }, + { + "epoch": 18.07, + "learning_rate": 4.096899322740942e-05, + "loss": 2.3294, + "step": 3647000 + }, + { + "epoch": 18.07, + "learning_rate": 4.096775464098334e-05, + "loss": 2.3405, + "step": 3647500 + }, + { + "epoch": 18.07, + "learning_rate": 4.0966516054557255e-05, + "loss": 2.3273, + "step": 3648000 + }, + { + "epoch": 18.08, + "learning_rate": 4.096527746813117e-05, + "loss": 2.3155, + "step": 3648500 + }, + { + "epoch": 18.08, + "learning_rate": 4.096403888170509e-05, + "loss": 2.3269, + "step": 3649000 + }, + { + "epoch": 18.08, + "learning_rate": 4.0962800295279006e-05, + "loss": 2.3336, + "step": 3649500 + }, + { + "epoch": 18.08, + "learning_rate": 4.0961564186025775e-05, + "loss": 2.3234, + "step": 3650000 + }, + { + "epoch": 18.09, + "learning_rate": 4.096032559959969e-05, + "loss": 2.3282, + "step": 3650500 + }, + { + "epoch": 18.09, + "learning_rate": 4.095908701317361e-05, + "loss": 2.3405, + "step": 3651000 + }, + { + "epoch": 18.09, + "learning_rate": 4.0957848426747526e-05, + "loss": 2.317, + "step": 3651500 + }, + { + "epoch": 18.09, + "learning_rate": 4.0956612317494294e-05, + "loss": 2.3369, + "step": 3652000 + }, + { + "epoch": 18.1, + "learning_rate": 4.095537373106821e-05, + "loss": 2.3423, + "step": 3652500 + }, + { + "epoch": 18.1, + "learning_rate": 4.095413762181497e-05, + "loss": 2.3363, + "step": 3653000 + }, + { + "epoch": 18.1, + "learning_rate": 4.095289903538889e-05, + "loss": 2.3515, + "step": 3653500 + }, + { + "epoch": 18.1, + "learning_rate": 4.095166044896281e-05, + "loss": 2.3327, + "step": 3654000 + }, + { + "epoch": 18.11, + "learning_rate": 4.0950421862536724e-05, + "loss": 2.3208, + "step": 3654500 + }, + { + "epoch": 18.11, + "learning_rate": 4.094918327611064e-05, + "loss": 2.3527, + "step": 3655000 + }, + { + "epoch": 18.11, + "learning_rate": 4.094794468968456e-05, + "loss": 2.352, + "step": 3655500 + }, + { + "epoch": 18.11, + "learning_rate": 4.0946706103258475e-05, + "loss": 2.3502, + "step": 3656000 + }, + { + "epoch": 18.12, + "learning_rate": 4.094546751683239e-05, + "loss": 2.3427, + "step": 3656500 + }, + { + "epoch": 18.12, + "learning_rate": 4.094423140757916e-05, + "loss": 2.3514, + "step": 3657000 + }, + { + "epoch": 18.12, + "learning_rate": 4.094299282115308e-05, + "loss": 2.3399, + "step": 3657500 + }, + { + "epoch": 18.12, + "learning_rate": 4.0941754234726995e-05, + "loss": 2.3461, + "step": 3658000 + }, + { + "epoch": 18.13, + "learning_rate": 4.094051564830091e-05, + "loss": 2.3321, + "step": 3658500 + }, + { + "epoch": 18.13, + "learning_rate": 4.093927706187483e-05, + "loss": 2.3296, + "step": 3659000 + }, + { + "epoch": 18.13, + "learning_rate": 4.093804095262159e-05, + "loss": 2.338, + "step": 3659500 + }, + { + "epoch": 18.13, + "learning_rate": 4.093680236619551e-05, + "loss": 2.3325, + "step": 3660000 + }, + { + "epoch": 18.14, + "learning_rate": 4.093556625694228e-05, + "loss": 2.3297, + "step": 3660500 + }, + { + "epoch": 18.14, + "learning_rate": 4.093432767051619e-05, + "loss": 2.3328, + "step": 3661000 + }, + { + "epoch": 18.14, + "learning_rate": 4.093308908409011e-05, + "loss": 2.354, + "step": 3661500 + }, + { + "epoch": 18.14, + "learning_rate": 4.093185049766403e-05, + "loss": 2.3395, + "step": 3662000 + }, + { + "epoch": 18.15, + "learning_rate": 4.0930611911237944e-05, + "loss": 2.3472, + "step": 3662500 + }, + { + "epoch": 18.15, + "learning_rate": 4.092937332481186e-05, + "loss": 2.3441, + "step": 3663000 + }, + { + "epoch": 18.15, + "learning_rate": 4.092813473838578e-05, + "loss": 2.3015, + "step": 3663500 + }, + { + "epoch": 18.15, + "learning_rate": 4.092689862913254e-05, + "loss": 2.3355, + "step": 3664000 + }, + { + "epoch": 18.16, + "learning_rate": 4.092566004270646e-05, + "loss": 2.3456, + "step": 3664500 + }, + { + "epoch": 18.16, + "learning_rate": 4.0924421456280374e-05, + "loss": 2.3623, + "step": 3665000 + }, + { + "epoch": 18.16, + "learning_rate": 4.092318286985429e-05, + "loss": 2.3409, + "step": 3665500 + }, + { + "epoch": 18.16, + "learning_rate": 4.092194428342821e-05, + "loss": 2.3148, + "step": 3666000 + }, + { + "epoch": 18.17, + "learning_rate": 4.0920705697002124e-05, + "loss": 2.3218, + "step": 3666500 + }, + { + "epoch": 18.17, + "learning_rate": 4.091946711057604e-05, + "loss": 2.3479, + "step": 3667000 + }, + { + "epoch": 18.17, + "learning_rate": 4.091822852414996e-05, + "loss": 2.3304, + "step": 3667500 + }, + { + "epoch": 18.17, + "learning_rate": 4.0916989937723875e-05, + "loss": 2.3497, + "step": 3668000 + }, + { + "epoch": 18.18, + "learning_rate": 4.0915753828470644e-05, + "loss": 2.3362, + "step": 3668500 + }, + { + "epoch": 18.18, + "learning_rate": 4.091451771921741e-05, + "loss": 2.3458, + "step": 3669000 + }, + { + "epoch": 18.18, + "learning_rate": 4.091327913279133e-05, + "loss": 2.3631, + "step": 3669500 + }, + { + "epoch": 18.18, + "learning_rate": 4.091204054636525e-05, + "loss": 2.3432, + "step": 3670000 + }, + { + "epoch": 18.18, + "learning_rate": 4.091080195993916e-05, + "loss": 2.3421, + "step": 3670500 + }, + { + "epoch": 18.19, + "learning_rate": 4.0909563373513074e-05, + "loss": 2.3361, + "step": 3671000 + }, + { + "epoch": 18.19, + "learning_rate": 4.090832478708699e-05, + "loss": 2.3355, + "step": 3671500 + }, + { + "epoch": 18.19, + "learning_rate": 4.090708620066091e-05, + "loss": 2.3339, + "step": 3672000 + }, + { + "epoch": 18.19, + "learning_rate": 4.0905847614234825e-05, + "loss": 2.3367, + "step": 3672500 + }, + { + "epoch": 18.2, + "learning_rate": 4.090460902780874e-05, + "loss": 2.3447, + "step": 3673000 + }, + { + "epoch": 18.2, + "learning_rate": 4.090337291855551e-05, + "loss": 2.3549, + "step": 3673500 + }, + { + "epoch": 18.2, + "learning_rate": 4.090213433212943e-05, + "loss": 2.3378, + "step": 3674000 + }, + { + "epoch": 18.2, + "learning_rate": 4.0900895745703344e-05, + "loss": 2.3436, + "step": 3674500 + }, + { + "epoch": 18.21, + "learning_rate": 4.089965715927726e-05, + "loss": 2.3406, + "step": 3675000 + }, + { + "epoch": 18.21, + "learning_rate": 4.089841857285118e-05, + "loss": 2.3393, + "step": 3675500 + }, + { + "epoch": 18.21, + "learning_rate": 4.089718246359795e-05, + "loss": 2.3336, + "step": 3676000 + }, + { + "epoch": 18.21, + "learning_rate": 4.0895943877171864e-05, + "loss": 2.3367, + "step": 3676500 + }, + { + "epoch": 18.22, + "learning_rate": 4.089470529074578e-05, + "loss": 2.3672, + "step": 3677000 + }, + { + "epoch": 18.22, + "learning_rate": 4.089346670431969e-05, + "loss": 2.3312, + "step": 3677500 + }, + { + "epoch": 18.22, + "learning_rate": 4.089222811789361e-05, + "loss": 2.35, + "step": 3678000 + }, + { + "epoch": 18.22, + "learning_rate": 4.0890989531467525e-05, + "loss": 2.3514, + "step": 3678500 + }, + { + "epoch": 18.23, + "learning_rate": 4.08897534222143e-05, + "loss": 2.369, + "step": 3679000 + }, + { + "epoch": 18.23, + "learning_rate": 4.088851483578821e-05, + "loss": 2.3395, + "step": 3679500 + }, + { + "epoch": 18.23, + "learning_rate": 4.088727624936213e-05, + "loss": 2.3569, + "step": 3680000 + }, + { + "epoch": 18.23, + "learning_rate": 4.0886037662936044e-05, + "loss": 2.3456, + "step": 3680500 + }, + { + "epoch": 18.24, + "learning_rate": 4.088480155368282e-05, + "loss": 2.3367, + "step": 3681000 + }, + { + "epoch": 18.24, + "learning_rate": 4.088356544442958e-05, + "loss": 2.3175, + "step": 3681500 + }, + { + "epoch": 18.24, + "learning_rate": 4.08823268580035e-05, + "loss": 2.3266, + "step": 3682000 + }, + { + "epoch": 18.24, + "learning_rate": 4.0881088271577416e-05, + "loss": 2.3512, + "step": 3682500 + }, + { + "epoch": 18.25, + "learning_rate": 4.087984968515133e-05, + "loss": 2.3464, + "step": 3683000 + }, + { + "epoch": 18.25, + "learning_rate": 4.08786135758981e-05, + "loss": 2.3319, + "step": 3683500 + }, + { + "epoch": 18.25, + "learning_rate": 4.087737498947202e-05, + "loss": 2.3545, + "step": 3684000 + }, + { + "epoch": 18.25, + "learning_rate": 4.0876136403045935e-05, + "loss": 2.3483, + "step": 3684500 + }, + { + "epoch": 18.26, + "learning_rate": 4.087489781661985e-05, + "loss": 2.3413, + "step": 3685000 + }, + { + "epoch": 18.26, + "learning_rate": 4.087365923019377e-05, + "loss": 2.3222, + "step": 3685500 + }, + { + "epoch": 18.26, + "learning_rate": 4.0872420643767686e-05, + "loss": 2.3522, + "step": 3686000 + }, + { + "epoch": 18.26, + "learning_rate": 4.08711820573416e-05, + "loss": 2.3184, + "step": 3686500 + }, + { + "epoch": 18.27, + "learning_rate": 4.086994347091552e-05, + "loss": 2.3553, + "step": 3687000 + }, + { + "epoch": 18.27, + "learning_rate": 4.086870736166228e-05, + "loss": 2.3474, + "step": 3687500 + }, + { + "epoch": 18.27, + "learning_rate": 4.08674687752362e-05, + "loss": 2.3626, + "step": 3688000 + }, + { + "epoch": 18.27, + "learning_rate": 4.086623266598297e-05, + "loss": 2.3262, + "step": 3688500 + }, + { + "epoch": 18.28, + "learning_rate": 4.0864994079556885e-05, + "loss": 2.3548, + "step": 3689000 + }, + { + "epoch": 18.28, + "learning_rate": 4.08637554931308e-05, + "loss": 2.3467, + "step": 3689500 + }, + { + "epoch": 18.28, + "learning_rate": 4.086251690670472e-05, + "loss": 2.3482, + "step": 3690000 + }, + { + "epoch": 18.28, + "learning_rate": 4.086128079745149e-05, + "loss": 2.3704, + "step": 3690500 + }, + { + "epoch": 18.29, + "learning_rate": 4.0860042211025404e-05, + "loss": 2.3454, + "step": 3691000 + }, + { + "epoch": 18.29, + "learning_rate": 4.085880610177217e-05, + "loss": 2.3512, + "step": 3691500 + }, + { + "epoch": 18.29, + "learning_rate": 4.085756751534609e-05, + "loss": 2.3291, + "step": 3692000 + }, + { + "epoch": 18.29, + "learning_rate": 4.085632892892001e-05, + "loss": 2.3378, + "step": 3692500 + }, + { + "epoch": 18.3, + "learning_rate": 4.085509034249392e-05, + "loss": 2.3274, + "step": 3693000 + }, + { + "epoch": 18.3, + "learning_rate": 4.0853851756067834e-05, + "loss": 2.3324, + "step": 3693500 + }, + { + "epoch": 18.3, + "learning_rate": 4.085261316964175e-05, + "loss": 2.3558, + "step": 3694000 + }, + { + "epoch": 18.3, + "learning_rate": 4.085137458321567e-05, + "loss": 2.3529, + "step": 3694500 + }, + { + "epoch": 18.31, + "learning_rate": 4.0850135996789585e-05, + "loss": 2.3572, + "step": 3695000 + }, + { + "epoch": 18.31, + "learning_rate": 4.08488974103635e-05, + "loss": 2.3294, + "step": 3695500 + }, + { + "epoch": 18.31, + "learning_rate": 4.084765882393742e-05, + "loss": 2.3592, + "step": 3696000 + }, + { + "epoch": 18.31, + "learning_rate": 4.084642271468419e-05, + "loss": 2.3392, + "step": 3696500 + }, + { + "epoch": 18.32, + "learning_rate": 4.0845184128258104e-05, + "loss": 2.3154, + "step": 3697000 + }, + { + "epoch": 18.32, + "learning_rate": 4.084394801900487e-05, + "loss": 2.3509, + "step": 3697500 + }, + { + "epoch": 18.32, + "learning_rate": 4.084270943257879e-05, + "loss": 2.3477, + "step": 3698000 + }, + { + "epoch": 18.32, + "learning_rate": 4.084147084615271e-05, + "loss": 2.3218, + "step": 3698500 + }, + { + "epoch": 18.33, + "learning_rate": 4.084023225972662e-05, + "loss": 2.3387, + "step": 3699000 + }, + { + "epoch": 18.33, + "learning_rate": 4.0838993673300534e-05, + "loss": 2.3438, + "step": 3699500 + }, + { + "epoch": 18.33, + "learning_rate": 4.083775508687445e-05, + "loss": 2.341, + "step": 3700000 + }, + { + "epoch": 18.33, + "learning_rate": 4.083651650044837e-05, + "loss": 2.3482, + "step": 3700500 + }, + { + "epoch": 18.34, + "learning_rate": 4.0835277914022285e-05, + "loss": 2.3661, + "step": 3701000 + }, + { + "epoch": 18.34, + "learning_rate": 4.08340393275962e-05, + "loss": 2.3362, + "step": 3701500 + }, + { + "epoch": 18.34, + "learning_rate": 4.083280074117012e-05, + "loss": 2.3077, + "step": 3702000 + }, + { + "epoch": 18.34, + "learning_rate": 4.083156463191689e-05, + "loss": 2.362, + "step": 3702500 + }, + { + "epoch": 18.35, + "learning_rate": 4.0830326045490804e-05, + "loss": 2.3652, + "step": 3703000 + }, + { + "epoch": 18.35, + "learning_rate": 4.082908745906472e-05, + "loss": 2.3664, + "step": 3703500 + }, + { + "epoch": 18.35, + "learning_rate": 4.082784887263864e-05, + "loss": 2.3331, + "step": 3704000 + }, + { + "epoch": 18.35, + "learning_rate": 4.0826610286212555e-05, + "loss": 2.3267, + "step": 3704500 + }, + { + "epoch": 18.36, + "learning_rate": 4.0825371699786465e-05, + "loss": 2.3312, + "step": 3705000 + }, + { + "epoch": 18.36, + "learning_rate": 4.082413311336038e-05, + "loss": 2.362, + "step": 3705500 + }, + { + "epoch": 18.36, + "learning_rate": 4.082289700410715e-05, + "loss": 2.3326, + "step": 3706000 + }, + { + "epoch": 18.36, + "learning_rate": 4.082165841768107e-05, + "loss": 2.3602, + "step": 3706500 + }, + { + "epoch": 18.37, + "learning_rate": 4.0820419831254985e-05, + "loss": 2.3623, + "step": 3707000 + }, + { + "epoch": 18.37, + "learning_rate": 4.08191812448289e-05, + "loss": 2.3526, + "step": 3707500 + }, + { + "epoch": 18.37, + "learning_rate": 4.081794265840282e-05, + "loss": 2.3439, + "step": 3708000 + }, + { + "epoch": 18.37, + "learning_rate": 4.081670654914959e-05, + "loss": 2.3465, + "step": 3708500 + }, + { + "epoch": 18.38, + "learning_rate": 4.0815467962723505e-05, + "loss": 2.3447, + "step": 3709000 + }, + { + "epoch": 18.38, + "learning_rate": 4.081422937629742e-05, + "loss": 2.3459, + "step": 3709500 + }, + { + "epoch": 18.38, + "learning_rate": 4.081299078987134e-05, + "loss": 2.3666, + "step": 3710000 + }, + { + "epoch": 18.38, + "learning_rate": 4.0811752203445255e-05, + "loss": 2.3043, + "step": 3710500 + }, + { + "epoch": 18.39, + "learning_rate": 4.0810516094192024e-05, + "loss": 2.3246, + "step": 3711000 + }, + { + "epoch": 18.39, + "learning_rate": 4.0809277507765934e-05, + "loss": 2.3471, + "step": 3711500 + }, + { + "epoch": 18.39, + "learning_rate": 4.080803892133985e-05, + "loss": 2.3432, + "step": 3712000 + }, + { + "epoch": 18.39, + "learning_rate": 4.080680033491377e-05, + "loss": 2.3355, + "step": 3712500 + }, + { + "epoch": 18.4, + "learning_rate": 4.0805564225660544e-05, + "loss": 2.3684, + "step": 3713000 + }, + { + "epoch": 18.4, + "learning_rate": 4.080432563923446e-05, + "loss": 2.366, + "step": 3713500 + }, + { + "epoch": 18.4, + "learning_rate": 4.080308705280838e-05, + "loss": 2.3494, + "step": 3714000 + }, + { + "epoch": 18.4, + "learning_rate": 4.080184846638229e-05, + "loss": 2.3516, + "step": 3714500 + }, + { + "epoch": 18.41, + "learning_rate": 4.0800609879956205e-05, + "loss": 2.3581, + "step": 3715000 + }, + { + "epoch": 18.41, + "learning_rate": 4.079937129353012e-05, + "loss": 2.3587, + "step": 3715500 + }, + { + "epoch": 18.41, + "learning_rate": 4.079813270710404e-05, + "loss": 2.3318, + "step": 3716000 + }, + { + "epoch": 18.41, + "learning_rate": 4.0796894120677956e-05, + "loss": 2.3423, + "step": 3716500 + }, + { + "epoch": 18.42, + "learning_rate": 4.079565553425187e-05, + "loss": 2.338, + "step": 3717000 + }, + { + "epoch": 18.42, + "learning_rate": 4.079441942499864e-05, + "loss": 2.3467, + "step": 3717500 + }, + { + "epoch": 18.42, + "learning_rate": 4.079318331574541e-05, + "loss": 2.3822, + "step": 3718000 + }, + { + "epoch": 18.42, + "learning_rate": 4.079194472931933e-05, + "loss": 2.3626, + "step": 3718500 + }, + { + "epoch": 18.43, + "learning_rate": 4.0790706142893244e-05, + "loss": 2.3683, + "step": 3719000 + }, + { + "epoch": 18.43, + "learning_rate": 4.078946755646716e-05, + "loss": 2.3831, + "step": 3719500 + }, + { + "epoch": 18.43, + "learning_rate": 4.078822897004108e-05, + "loss": 2.3206, + "step": 3720000 + }, + { + "epoch": 18.43, + "learning_rate": 4.0786990383614995e-05, + "loss": 2.3446, + "step": 3720500 + }, + { + "epoch": 18.44, + "learning_rate": 4.0785751797188905e-05, + "loss": 2.3532, + "step": 3721000 + }, + { + "epoch": 18.44, + "learning_rate": 4.078451321076282e-05, + "loss": 2.3127, + "step": 3721500 + }, + { + "epoch": 18.44, + "learning_rate": 4.078327462433674e-05, + "loss": 2.3387, + "step": 3722000 + }, + { + "epoch": 18.44, + "learning_rate": 4.0782036037910656e-05, + "loss": 2.3392, + "step": 3722500 + }, + { + "epoch": 18.45, + "learning_rate": 4.0780799928657424e-05, + "loss": 2.3358, + "step": 3723000 + }, + { + "epoch": 18.45, + "learning_rate": 4.077956134223134e-05, + "loss": 2.3367, + "step": 3723500 + }, + { + "epoch": 18.45, + "learning_rate": 4.077832275580525e-05, + "loss": 2.375, + "step": 3724000 + }, + { + "epoch": 18.45, + "learning_rate": 4.077708416937917e-05, + "loss": 2.3369, + "step": 3724500 + }, + { + "epoch": 18.45, + "learning_rate": 4.0775845582953085e-05, + "loss": 2.3509, + "step": 3725000 + }, + { + "epoch": 18.46, + "learning_rate": 4.0774606996527e-05, + "loss": 2.3558, + "step": 3725500 + }, + { + "epoch": 18.46, + "learning_rate": 4.077336841010092e-05, + "loss": 2.338, + "step": 3726000 + }, + { + "epoch": 18.46, + "learning_rate": 4.0772129823674836e-05, + "loss": 2.351, + "step": 3726500 + }, + { + "epoch": 18.46, + "learning_rate": 4.0770893714421605e-05, + "loss": 2.3565, + "step": 3727000 + }, + { + "epoch": 18.47, + "learning_rate": 4.076965512799552e-05, + "loss": 2.3638, + "step": 3727500 + }, + { + "epoch": 18.47, + "learning_rate": 4.076841654156944e-05, + "loss": 2.3696, + "step": 3728000 + }, + { + "epoch": 18.47, + "learning_rate": 4.0767177955143356e-05, + "loss": 2.3566, + "step": 3728500 + }, + { + "epoch": 18.47, + "learning_rate": 4.076593936871727e-05, + "loss": 2.3472, + "step": 3729000 + }, + { + "epoch": 18.48, + "learning_rate": 4.076470325946404e-05, + "loss": 2.3396, + "step": 3729500 + }, + { + "epoch": 18.48, + "learning_rate": 4.076346467303796e-05, + "loss": 2.3794, + "step": 3730000 + }, + { + "epoch": 18.48, + "learning_rate": 4.076222608661187e-05, + "loss": 2.3788, + "step": 3730500 + }, + { + "epoch": 18.48, + "learning_rate": 4.0760987500185786e-05, + "loss": 2.3517, + "step": 3731000 + }, + { + "epoch": 18.49, + "learning_rate": 4.075975139093256e-05, + "loss": 2.3382, + "step": 3731500 + }, + { + "epoch": 18.49, + "learning_rate": 4.075851280450648e-05, + "loss": 2.3403, + "step": 3732000 + }, + { + "epoch": 18.49, + "learning_rate": 4.0757274218080395e-05, + "loss": 2.3588, + "step": 3732500 + }, + { + "epoch": 18.49, + "learning_rate": 4.075603563165431e-05, + "loss": 2.3352, + "step": 3733000 + }, + { + "epoch": 18.5, + "learning_rate": 4.075479704522822e-05, + "loss": 2.3494, + "step": 3733500 + }, + { + "epoch": 18.5, + "learning_rate": 4.075355845880214e-05, + "loss": 2.3463, + "step": 3734000 + }, + { + "epoch": 18.5, + "learning_rate": 4.0752319872376056e-05, + "loss": 2.3804, + "step": 3734500 + }, + { + "epoch": 18.5, + "learning_rate": 4.075108128594997e-05, + "loss": 2.3203, + "step": 3735000 + }, + { + "epoch": 18.51, + "learning_rate": 4.074984269952389e-05, + "loss": 2.3402, + "step": 3735500 + }, + { + "epoch": 18.51, + "learning_rate": 4.074860659027066e-05, + "loss": 2.3319, + "step": 3736000 + }, + { + "epoch": 18.51, + "learning_rate": 4.074737048101743e-05, + "loss": 2.3261, + "step": 3736500 + }, + { + "epoch": 18.51, + "learning_rate": 4.0746131894591344e-05, + "loss": 2.3539, + "step": 3737000 + }, + { + "epoch": 18.52, + "learning_rate": 4.074489330816526e-05, + "loss": 2.3372, + "step": 3737500 + }, + { + "epoch": 18.52, + "learning_rate": 4.074365472173918e-05, + "loss": 2.3349, + "step": 3738000 + }, + { + "epoch": 18.52, + "learning_rate": 4.0742416135313095e-05, + "loss": 2.3208, + "step": 3738500 + }, + { + "epoch": 18.52, + "learning_rate": 4.0741180026059864e-05, + "loss": 2.3495, + "step": 3739000 + }, + { + "epoch": 18.53, + "learning_rate": 4.073994143963378e-05, + "loss": 2.3535, + "step": 3739500 + }, + { + "epoch": 18.53, + "learning_rate": 4.073870285320769e-05, + "loss": 2.3686, + "step": 3740000 + }, + { + "epoch": 18.53, + "learning_rate": 4.073746426678161e-05, + "loss": 2.3508, + "step": 3740500 + }, + { + "epoch": 18.53, + "learning_rate": 4.0736225680355525e-05, + "loss": 2.3398, + "step": 3741000 + }, + { + "epoch": 18.54, + "learning_rate": 4.073498709392944e-05, + "loss": 2.3602, + "step": 3741500 + }, + { + "epoch": 18.54, + "learning_rate": 4.073375098467621e-05, + "loss": 2.344, + "step": 3742000 + }, + { + "epoch": 18.54, + "learning_rate": 4.073251239825013e-05, + "loss": 2.3464, + "step": 3742500 + }, + { + "epoch": 18.54, + "learning_rate": 4.0731273811824044e-05, + "loss": 2.3433, + "step": 3743000 + }, + { + "epoch": 18.55, + "learning_rate": 4.073003522539796e-05, + "loss": 2.3592, + "step": 3743500 + }, + { + "epoch": 18.55, + "learning_rate": 4.072879663897188e-05, + "loss": 2.318, + "step": 3744000 + }, + { + "epoch": 18.55, + "learning_rate": 4.072756052971865e-05, + "loss": 2.3456, + "step": 3744500 + }, + { + "epoch": 18.55, + "learning_rate": 4.0726321943292564e-05, + "loss": 2.3568, + "step": 3745000 + }, + { + "epoch": 18.56, + "learning_rate": 4.072508335686648e-05, + "loss": 2.3698, + "step": 3745500 + }, + { + "epoch": 18.56, + "learning_rate": 4.07238447704404e-05, + "loss": 2.3393, + "step": 3746000 + }, + { + "epoch": 18.56, + "learning_rate": 4.072260618401431e-05, + "loss": 2.3585, + "step": 3746500 + }, + { + "epoch": 18.56, + "learning_rate": 4.0721367597588225e-05, + "loss": 2.3521, + "step": 3747000 + }, + { + "epoch": 18.57, + "learning_rate": 4.072012901116214e-05, + "loss": 2.3465, + "step": 3747500 + }, + { + "epoch": 18.57, + "learning_rate": 4.071889042473606e-05, + "loss": 2.3389, + "step": 3748000 + }, + { + "epoch": 18.57, + "learning_rate": 4.071765431548283e-05, + "loss": 2.3552, + "step": 3748500 + }, + { + "epoch": 18.57, + "learning_rate": 4.0716415729056745e-05, + "loss": 2.3512, + "step": 3749000 + }, + { + "epoch": 18.58, + "learning_rate": 4.071517714263066e-05, + "loss": 2.3437, + "step": 3749500 + }, + { + "epoch": 18.58, + "learning_rate": 4.071393855620458e-05, + "loss": 2.3615, + "step": 3750000 + }, + { + "epoch": 18.58, + "learning_rate": 4.0712699969778495e-05, + "loss": 2.3265, + "step": 3750500 + }, + { + "epoch": 18.58, + "learning_rate": 4.071146138335241e-05, + "loss": 2.3665, + "step": 3751000 + }, + { + "epoch": 18.59, + "learning_rate": 4.071022527409918e-05, + "loss": 2.345, + "step": 3751500 + }, + { + "epoch": 18.59, + "learning_rate": 4.07089866876731e-05, + "loss": 2.3294, + "step": 3752000 + }, + { + "epoch": 18.59, + "learning_rate": 4.0707748101247015e-05, + "loss": 2.3589, + "step": 3752500 + }, + { + "epoch": 18.59, + "learning_rate": 4.070650951482093e-05, + "loss": 2.3329, + "step": 3753000 + }, + { + "epoch": 18.6, + "learning_rate": 4.070527092839484e-05, + "loss": 2.3413, + "step": 3753500 + }, + { + "epoch": 18.6, + "learning_rate": 4.070403729631446e-05, + "loss": 2.3506, + "step": 3754000 + }, + { + "epoch": 18.6, + "learning_rate": 4.070279870988838e-05, + "loss": 2.3563, + "step": 3754500 + }, + { + "epoch": 18.6, + "learning_rate": 4.0701560123462297e-05, + "loss": 2.3641, + "step": 3755000 + }, + { + "epoch": 18.61, + "learning_rate": 4.0700321537036213e-05, + "loss": 2.3566, + "step": 3755500 + }, + { + "epoch": 18.61, + "learning_rate": 4.069908295061013e-05, + "loss": 2.3409, + "step": 3756000 + }, + { + "epoch": 18.61, + "learning_rate": 4.06978468413569e-05, + "loss": 2.3472, + "step": 3756500 + }, + { + "epoch": 18.61, + "learning_rate": 4.069660825493081e-05, + "loss": 2.3627, + "step": 3757000 + }, + { + "epoch": 18.62, + "learning_rate": 4.0695369668504726e-05, + "loss": 2.3429, + "step": 3757500 + }, + { + "epoch": 18.62, + "learning_rate": 4.069413108207864e-05, + "loss": 2.3491, + "step": 3758000 + }, + { + "epoch": 18.62, + "learning_rate": 4.069289497282542e-05, + "loss": 2.3293, + "step": 3758500 + }, + { + "epoch": 18.62, + "learning_rate": 4.069165886357219e-05, + "loss": 2.365, + "step": 3759000 + }, + { + "epoch": 18.63, + "learning_rate": 4.0690420277146105e-05, + "loss": 2.3383, + "step": 3759500 + }, + { + "epoch": 18.63, + "learning_rate": 4.068918169072002e-05, + "loss": 2.3502, + "step": 3760000 + }, + { + "epoch": 18.63, + "learning_rate": 4.068794310429394e-05, + "loss": 2.3331, + "step": 3760500 + }, + { + "epoch": 18.63, + "learning_rate": 4.068670451786785e-05, + "loss": 2.3463, + "step": 3761000 + }, + { + "epoch": 18.64, + "learning_rate": 4.0685465931441765e-05, + "loss": 2.3493, + "step": 3761500 + }, + { + "epoch": 18.64, + "learning_rate": 4.068422734501568e-05, + "loss": 2.3455, + "step": 3762000 + }, + { + "epoch": 18.64, + "learning_rate": 4.06829887585896e-05, + "loss": 2.3398, + "step": 3762500 + }, + { + "epoch": 18.64, + "learning_rate": 4.0681750172163516e-05, + "loss": 2.3684, + "step": 3763000 + }, + { + "epoch": 18.65, + "learning_rate": 4.0680514062910285e-05, + "loss": 2.3276, + "step": 3763500 + }, + { + "epoch": 18.65, + "learning_rate": 4.06792754764842e-05, + "loss": 2.3632, + "step": 3764000 + }, + { + "epoch": 18.65, + "learning_rate": 4.067803689005812e-05, + "loss": 2.3577, + "step": 3764500 + }, + { + "epoch": 18.65, + "learning_rate": 4.0676798303632036e-05, + "loss": 2.3654, + "step": 3765000 + }, + { + "epoch": 18.66, + "learning_rate": 4.0675562194378805e-05, + "loss": 2.3418, + "step": 3765500 + }, + { + "epoch": 18.66, + "learning_rate": 4.067432360795272e-05, + "loss": 2.3504, + "step": 3766000 + }, + { + "epoch": 18.66, + "learning_rate": 4.067308502152664e-05, + "loss": 2.3258, + "step": 3766500 + }, + { + "epoch": 18.66, + "learning_rate": 4.0671846435100555e-05, + "loss": 2.3525, + "step": 3767000 + }, + { + "epoch": 18.67, + "learning_rate": 4.067060784867447e-05, + "loss": 2.3618, + "step": 3767500 + }, + { + "epoch": 18.67, + "learning_rate": 4.066936926224838e-05, + "loss": 2.3604, + "step": 3768000 + }, + { + "epoch": 18.67, + "learning_rate": 4.06681306758223e-05, + "loss": 2.365, + "step": 3768500 + }, + { + "epoch": 18.67, + "learning_rate": 4.0666892089396216e-05, + "loss": 2.3799, + "step": 3769000 + }, + { + "epoch": 18.68, + "learning_rate": 4.066565350297013e-05, + "loss": 2.3418, + "step": 3769500 + }, + { + "epoch": 18.68, + "learning_rate": 4.066441491654405e-05, + "loss": 2.3537, + "step": 3770000 + }, + { + "epoch": 18.68, + "learning_rate": 4.066317633011796e-05, + "loss": 2.363, + "step": 3770500 + }, + { + "epoch": 18.68, + "learning_rate": 4.066193774369188e-05, + "loss": 2.3617, + "step": 3771000 + }, + { + "epoch": 18.69, + "learning_rate": 4.0660699157265794e-05, + "loss": 2.3339, + "step": 3771500 + }, + { + "epoch": 18.69, + "learning_rate": 4.065946057083971e-05, + "loss": 2.3676, + "step": 3772000 + }, + { + "epoch": 18.69, + "learning_rate": 4.065822446158648e-05, + "loss": 2.3482, + "step": 3772500 + }, + { + "epoch": 18.69, + "learning_rate": 4.06569858751604e-05, + "loss": 2.333, + "step": 3773000 + }, + { + "epoch": 18.7, + "learning_rate": 4.0655747288734314e-05, + "loss": 2.3455, + "step": 3773500 + }, + { + "epoch": 18.7, + "learning_rate": 4.065450870230823e-05, + "loss": 2.3702, + "step": 3774000 + }, + { + "epoch": 18.7, + "learning_rate": 4.065327011588215e-05, + "loss": 2.3753, + "step": 3774500 + }, + { + "epoch": 18.7, + "learning_rate": 4.0652034006628917e-05, + "loss": 2.3434, + "step": 3775000 + }, + { + "epoch": 18.71, + "learning_rate": 4.0650795420202833e-05, + "loss": 2.3595, + "step": 3775500 + }, + { + "epoch": 18.71, + "learning_rate": 4.064955683377675e-05, + "loss": 2.3349, + "step": 3776000 + }, + { + "epoch": 18.71, + "learning_rate": 4.064831824735067e-05, + "loss": 2.3468, + "step": 3776500 + }, + { + "epoch": 18.71, + "learning_rate": 4.064707966092458e-05, + "loss": 2.35, + "step": 3777000 + }, + { + "epoch": 18.72, + "learning_rate": 4.0645841074498494e-05, + "loss": 2.3771, + "step": 3777500 + }, + { + "epoch": 18.72, + "learning_rate": 4.064460496524526e-05, + "loss": 2.3921, + "step": 3778000 + }, + { + "epoch": 18.72, + "learning_rate": 4.064336637881918e-05, + "loss": 2.3522, + "step": 3778500 + }, + { + "epoch": 18.72, + "learning_rate": 4.06421277923931e-05, + "loss": 2.3284, + "step": 3779000 + }, + { + "epoch": 18.72, + "learning_rate": 4.0640889205967014e-05, + "loss": 2.3193, + "step": 3779500 + }, + { + "epoch": 18.73, + "learning_rate": 4.063965061954093e-05, + "loss": 2.3292, + "step": 3780000 + }, + { + "epoch": 18.73, + "learning_rate": 4.063841203311485e-05, + "loss": 2.3412, + "step": 3780500 + }, + { + "epoch": 18.73, + "learning_rate": 4.0637173446688765e-05, + "loss": 2.3477, + "step": 3781000 + }, + { + "epoch": 18.73, + "learning_rate": 4.063593486026268e-05, + "loss": 2.3427, + "step": 3781500 + }, + { + "epoch": 18.74, + "learning_rate": 4.063469875100945e-05, + "loss": 2.359, + "step": 3782000 + }, + { + "epoch": 18.74, + "learning_rate": 4.063346264175622e-05, + "loss": 2.3708, + "step": 3782500 + }, + { + "epoch": 18.74, + "learning_rate": 4.0632224055330136e-05, + "loss": 2.3456, + "step": 3783000 + }, + { + "epoch": 18.74, + "learning_rate": 4.063098546890405e-05, + "loss": 2.3478, + "step": 3783500 + }, + { + "epoch": 18.75, + "learning_rate": 4.062974935965082e-05, + "loss": 2.3417, + "step": 3784000 + }, + { + "epoch": 18.75, + "learning_rate": 4.062851077322474e-05, + "loss": 2.3459, + "step": 3784500 + }, + { + "epoch": 18.75, + "learning_rate": 4.0627272186798656e-05, + "loss": 2.3443, + "step": 3785000 + }, + { + "epoch": 18.75, + "learning_rate": 4.062603360037257e-05, + "loss": 2.3511, + "step": 3785500 + }, + { + "epoch": 18.76, + "learning_rate": 4.062479501394649e-05, + "loss": 2.3571, + "step": 3786000 + }, + { + "epoch": 18.76, + "learning_rate": 4.062355890469325e-05, + "loss": 2.3711, + "step": 3786500 + }, + { + "epoch": 18.76, + "learning_rate": 4.062232031826717e-05, + "loss": 2.3704, + "step": 3787000 + }, + { + "epoch": 18.76, + "learning_rate": 4.0621081731841086e-05, + "loss": 2.3424, + "step": 3787500 + }, + { + "epoch": 18.77, + "learning_rate": 4.0619843145415e-05, + "loss": 2.3654, + "step": 3788000 + }, + { + "epoch": 18.77, + "learning_rate": 4.061860455898892e-05, + "loss": 2.3397, + "step": 3788500 + }, + { + "epoch": 18.77, + "learning_rate": 4.0617365972562836e-05, + "loss": 2.3416, + "step": 3789000 + }, + { + "epoch": 18.77, + "learning_rate": 4.061612738613675e-05, + "loss": 2.3722, + "step": 3789500 + }, + { + "epoch": 18.78, + "learning_rate": 4.061488879971067e-05, + "loss": 2.3566, + "step": 3790000 + }, + { + "epoch": 18.78, + "learning_rate": 4.061365269045744e-05, + "loss": 2.3691, + "step": 3790500 + }, + { + "epoch": 18.78, + "learning_rate": 4.0612414104031356e-05, + "loss": 2.3407, + "step": 3791000 + }, + { + "epoch": 18.78, + "learning_rate": 4.061117551760527e-05, + "loss": 2.3298, + "step": 3791500 + }, + { + "epoch": 18.79, + "learning_rate": 4.060993693117919e-05, + "loss": 2.3593, + "step": 3792000 + }, + { + "epoch": 18.79, + "learning_rate": 4.060869834475311e-05, + "loss": 2.3477, + "step": 3792500 + }, + { + "epoch": 18.79, + "learning_rate": 4.0607459758327024e-05, + "loss": 2.3582, + "step": 3793000 + }, + { + "epoch": 18.79, + "learning_rate": 4.0606223649073786e-05, + "loss": 2.3396, + "step": 3793500 + }, + { + "epoch": 18.8, + "learning_rate": 4.06049850626477e-05, + "loss": 2.3546, + "step": 3794000 + }, + { + "epoch": 18.8, + "learning_rate": 4.060374647622162e-05, + "loss": 2.3364, + "step": 3794500 + }, + { + "epoch": 18.8, + "learning_rate": 4.0602507889795536e-05, + "loss": 2.3689, + "step": 3795000 + }, + { + "epoch": 18.8, + "learning_rate": 4.0601269303369453e-05, + "loss": 2.3572, + "step": 3795500 + }, + { + "epoch": 18.81, + "learning_rate": 4.060003071694337e-05, + "loss": 2.3567, + "step": 3796000 + }, + { + "epoch": 18.81, + "learning_rate": 4.059879213051728e-05, + "loss": 2.3234, + "step": 3796500 + }, + { + "epoch": 18.81, + "learning_rate": 4.05975535440912e-05, + "loss": 2.3548, + "step": 3797000 + }, + { + "epoch": 18.81, + "learning_rate": 4.0596314957665114e-05, + "loss": 2.3628, + "step": 3797500 + }, + { + "epoch": 18.82, + "learning_rate": 4.059507884841189e-05, + "loss": 2.3432, + "step": 3798000 + }, + { + "epoch": 18.82, + "learning_rate": 4.059384026198581e-05, + "loss": 2.3482, + "step": 3798500 + }, + { + "epoch": 18.82, + "learning_rate": 4.0592601675559724e-05, + "loss": 2.3262, + "step": 3799000 + }, + { + "epoch": 18.82, + "learning_rate": 4.059136308913364e-05, + "loss": 2.354, + "step": 3799500 + }, + { + "epoch": 18.83, + "learning_rate": 4.05901269798804e-05, + "loss": 2.3366, + "step": 3800000 + }, + { + "epoch": 18.83, + "learning_rate": 4.058889087062717e-05, + "loss": 2.3351, + "step": 3800500 + }, + { + "epoch": 18.83, + "learning_rate": 4.058765228420109e-05, + "loss": 2.3698, + "step": 3801000 + }, + { + "epoch": 18.83, + "learning_rate": 4.0586413697775005e-05, + "loss": 2.3652, + "step": 3801500 + }, + { + "epoch": 18.84, + "learning_rate": 4.058517511134892e-05, + "loss": 2.3572, + "step": 3802000 + }, + { + "epoch": 18.84, + "learning_rate": 4.058393900209569e-05, + "loss": 2.3613, + "step": 3802500 + }, + { + "epoch": 18.84, + "learning_rate": 4.058270041566961e-05, + "loss": 2.3577, + "step": 3803000 + }, + { + "epoch": 18.84, + "learning_rate": 4.0581461829243525e-05, + "loss": 2.3717, + "step": 3803500 + }, + { + "epoch": 18.85, + "learning_rate": 4.058022324281744e-05, + "loss": 2.3315, + "step": 3804000 + }, + { + "epoch": 18.85, + "learning_rate": 4.057898465639136e-05, + "loss": 2.375, + "step": 3804500 + }, + { + "epoch": 18.85, + "learning_rate": 4.057774606996527e-05, + "loss": 2.3772, + "step": 3805000 + }, + { + "epoch": 18.85, + "learning_rate": 4.0576507483539186e-05, + "loss": 2.3574, + "step": 3805500 + }, + { + "epoch": 18.86, + "learning_rate": 4.05752688971131e-05, + "loss": 2.3731, + "step": 3806000 + }, + { + "epoch": 18.86, + "learning_rate": 4.057403031068702e-05, + "loss": 2.3567, + "step": 3806500 + }, + { + "epoch": 18.86, + "learning_rate": 4.057279172426094e-05, + "loss": 2.3155, + "step": 3807000 + }, + { + "epoch": 18.86, + "learning_rate": 4.0571553137834854e-05, + "loss": 2.362, + "step": 3807500 + }, + { + "epoch": 18.87, + "learning_rate": 4.057031455140877e-05, + "loss": 2.3639, + "step": 3808000 + }, + { + "epoch": 18.87, + "learning_rate": 4.056907596498269e-05, + "loss": 2.3458, + "step": 3808500 + }, + { + "epoch": 18.87, + "learning_rate": 4.05678373785566e-05, + "loss": 2.3485, + "step": 3809000 + }, + { + "epoch": 18.87, + "learning_rate": 4.0566598792130515e-05, + "loss": 2.3583, + "step": 3809500 + }, + { + "epoch": 18.88, + "learning_rate": 4.056536516005014e-05, + "loss": 2.3601, + "step": 3810000 + }, + { + "epoch": 18.88, + "learning_rate": 4.056412657362406e-05, + "loss": 2.3376, + "step": 3810500 + }, + { + "epoch": 18.88, + "learning_rate": 4.0562887987197976e-05, + "loss": 2.3372, + "step": 3811000 + }, + { + "epoch": 18.88, + "learning_rate": 4.056165187794474e-05, + "loss": 2.3443, + "step": 3811500 + }, + { + "epoch": 18.89, + "learning_rate": 4.0560413291518655e-05, + "loss": 2.349, + "step": 3812000 + }, + { + "epoch": 18.89, + "learning_rate": 4.055917470509257e-05, + "loss": 2.3483, + "step": 3812500 + }, + { + "epoch": 18.89, + "learning_rate": 4.055793611866649e-05, + "loss": 2.3419, + "step": 3813000 + }, + { + "epoch": 18.89, + "learning_rate": 4.0556697532240406e-05, + "loss": 2.3571, + "step": 3813500 + }, + { + "epoch": 18.9, + "learning_rate": 4.055545894581432e-05, + "loss": 2.3455, + "step": 3814000 + }, + { + "epoch": 18.9, + "learning_rate": 4.055422035938824e-05, + "loss": 2.3596, + "step": 3814500 + }, + { + "epoch": 18.9, + "learning_rate": 4.0552981772962156e-05, + "loss": 2.3755, + "step": 3815000 + }, + { + "epoch": 18.9, + "learning_rate": 4.055174318653607e-05, + "loss": 2.3312, + "step": 3815500 + }, + { + "epoch": 18.91, + "learning_rate": 4.055050460010999e-05, + "loss": 2.356, + "step": 3816000 + }, + { + "epoch": 18.91, + "learning_rate": 4.054926601368391e-05, + "loss": 2.3591, + "step": 3816500 + }, + { + "epoch": 18.91, + "learning_rate": 4.0548027427257824e-05, + "loss": 2.3418, + "step": 3817000 + }, + { + "epoch": 18.91, + "learning_rate": 4.054678884083174e-05, + "loss": 2.3178, + "step": 3817500 + }, + { + "epoch": 18.92, + "learning_rate": 4.054555025440566e-05, + "loss": 2.3461, + "step": 3818000 + }, + { + "epoch": 18.92, + "learning_rate": 4.054431166797957e-05, + "loss": 2.3436, + "step": 3818500 + }, + { + "epoch": 18.92, + "learning_rate": 4.0543073081553485e-05, + "loss": 2.3313, + "step": 3819000 + }, + { + "epoch": 18.92, + "learning_rate": 4.05418344951274e-05, + "loss": 2.3429, + "step": 3819500 + }, + { + "epoch": 18.93, + "learning_rate": 4.054059838587417e-05, + "loss": 2.3593, + "step": 3820000 + }, + { + "epoch": 18.93, + "learning_rate": 4.053936227662094e-05, + "loss": 2.3424, + "step": 3820500 + }, + { + "epoch": 18.93, + "learning_rate": 4.0538123690194857e-05, + "loss": 2.3253, + "step": 3821000 + }, + { + "epoch": 18.93, + "learning_rate": 4.0536885103768774e-05, + "loss": 2.3515, + "step": 3821500 + }, + { + "epoch": 18.94, + "learning_rate": 4.053564651734269e-05, + "loss": 2.3587, + "step": 3822000 + }, + { + "epoch": 18.94, + "learning_rate": 4.053440793091661e-05, + "loss": 2.3128, + "step": 3822500 + }, + { + "epoch": 18.94, + "learning_rate": 4.0533169344490524e-05, + "loss": 2.3619, + "step": 3823000 + }, + { + "epoch": 18.94, + "learning_rate": 4.053193075806444e-05, + "loss": 2.3424, + "step": 3823500 + }, + { + "epoch": 18.95, + "learning_rate": 4.053069217163836e-05, + "loss": 2.3527, + "step": 3824000 + }, + { + "epoch": 18.95, + "learning_rate": 4.0529453585212275e-05, + "loss": 2.3656, + "step": 3824500 + }, + { + "epoch": 18.95, + "learning_rate": 4.0528217475959044e-05, + "loss": 2.3551, + "step": 3825000 + }, + { + "epoch": 18.95, + "learning_rate": 4.0526978889532954e-05, + "loss": 2.3346, + "step": 3825500 + }, + { + "epoch": 18.96, + "learning_rate": 4.052574278027972e-05, + "loss": 2.3258, + "step": 3826000 + }, + { + "epoch": 18.96, + "learning_rate": 4.052450419385364e-05, + "loss": 2.3451, + "step": 3826500 + }, + { + "epoch": 18.96, + "learning_rate": 4.052326560742756e-05, + "loss": 2.3521, + "step": 3827000 + }, + { + "epoch": 18.96, + "learning_rate": 4.0522027021001474e-05, + "loss": 2.3527, + "step": 3827500 + }, + { + "epoch": 18.97, + "learning_rate": 4.052078843457539e-05, + "loss": 2.3459, + "step": 3828000 + }, + { + "epoch": 18.97, + "learning_rate": 4.051954984814931e-05, + "loss": 2.3684, + "step": 3828500 + }, + { + "epoch": 18.97, + "learning_rate": 4.0518311261723224e-05, + "loss": 2.3569, + "step": 3829000 + }, + { + "epoch": 18.97, + "learning_rate": 4.051707267529714e-05, + "loss": 2.368, + "step": 3829500 + }, + { + "epoch": 18.98, + "learning_rate": 4.051583408887106e-05, + "loss": 2.3412, + "step": 3830000 + }, + { + "epoch": 18.98, + "learning_rate": 4.0514595502444975e-05, + "loss": 2.3685, + "step": 3830500 + }, + { + "epoch": 18.98, + "learning_rate": 4.0513356916018885e-05, + "loss": 2.3667, + "step": 3831000 + }, + { + "epoch": 18.98, + "learning_rate": 4.05121183295928e-05, + "loss": 2.333, + "step": 3831500 + }, + { + "epoch": 18.99, + "learning_rate": 4.051088469751242e-05, + "loss": 2.3416, + "step": 3832000 + }, + { + "epoch": 18.99, + "learning_rate": 4.050964611108634e-05, + "loss": 2.344, + "step": 3832500 + }, + { + "epoch": 18.99, + "learning_rate": 4.050840752466026e-05, + "loss": 2.3712, + "step": 3833000 + }, + { + "epoch": 18.99, + "learning_rate": 4.0507168938234174e-05, + "loss": 2.3576, + "step": 3833500 + }, + { + "epoch": 18.99, + "learning_rate": 4.050593282898094e-05, + "loss": 2.3196, + "step": 3834000 + }, + { + "epoch": 19.0, + "learning_rate": 4.050469671972771e-05, + "loss": 2.3713, + "step": 3834500 + }, + { + "epoch": 19.0, + "learning_rate": 4.050345813330163e-05, + "loss": 2.3427, + "step": 3835000 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.6510998864965143, + "eval_accuracy_mlm": 0.6054676656033432, + "eval_accuracy_nsp": 0.8664373487501912, + "eval_loss": 2.3667972087860107, + "eval_runtime": 145.68, + "eval_samples_per_second": 1750.131, + "eval_steps_per_second": 72.927, + "step": 3835017 + }, + { + "epoch": 19.0, + "learning_rate": 4.050221954687554e-05, + "loss": 2.3276, + "step": 3835500 + }, + { + "epoch": 19.0, + "learning_rate": 4.0500983437622314e-05, + "loss": 2.3316, + "step": 3836000 + }, + { + "epoch": 19.01, + "learning_rate": 4.049974485119623e-05, + "loss": 2.3063, + "step": 3836500 + }, + { + "epoch": 19.01, + "learning_rate": 4.049850626477015e-05, + "loss": 2.3161, + "step": 3837000 + }, + { + "epoch": 19.01, + "learning_rate": 4.0497267678344065e-05, + "loss": 2.3319, + "step": 3837500 + }, + { + "epoch": 19.01, + "learning_rate": 4.0496031569090834e-05, + "loss": 2.3158, + "step": 3838000 + }, + { + "epoch": 19.02, + "learning_rate": 4.049479298266475e-05, + "loss": 2.3323, + "step": 3838500 + }, + { + "epoch": 19.02, + "learning_rate": 4.049355439623867e-05, + "loss": 2.3132, + "step": 3839000 + }, + { + "epoch": 19.02, + "learning_rate": 4.049231580981258e-05, + "loss": 2.3162, + "step": 3839500 + }, + { + "epoch": 19.02, + "learning_rate": 4.0491077223386495e-05, + "loss": 2.3272, + "step": 3840000 + }, + { + "epoch": 19.03, + "learning_rate": 4.048983863696041e-05, + "loss": 2.3237, + "step": 3840500 + }, + { + "epoch": 19.03, + "learning_rate": 4.048860005053433e-05, + "loss": 2.3282, + "step": 3841000 + }, + { + "epoch": 19.03, + "learning_rate": 4.0487361464108245e-05, + "loss": 2.3251, + "step": 3841500 + }, + { + "epoch": 19.03, + "learning_rate": 4.0486122877682156e-05, + "loss": 2.3182, + "step": 3842000 + }, + { + "epoch": 19.04, + "learning_rate": 4.048488429125607e-05, + "loss": 2.3366, + "step": 3842500 + }, + { + "epoch": 19.04, + "learning_rate": 4.048364570482999e-05, + "loss": 2.312, + "step": 3843000 + }, + { + "epoch": 19.04, + "learning_rate": 4.0482407118403906e-05, + "loss": 2.3254, + "step": 3843500 + }, + { + "epoch": 19.04, + "learning_rate": 4.048116853197782e-05, + "loss": 2.3187, + "step": 3844000 + }, + { + "epoch": 19.05, + "learning_rate": 4.047992994555174e-05, + "loss": 2.3117, + "step": 3844500 + }, + { + "epoch": 19.05, + "learning_rate": 4.047869135912566e-05, + "loss": 2.3228, + "step": 3845000 + }, + { + "epoch": 19.05, + "learning_rate": 4.0477452772699574e-05, + "loss": 2.301, + "step": 3845500 + }, + { + "epoch": 19.05, + "learning_rate": 4.047621666344634e-05, + "loss": 2.3024, + "step": 3846000 + }, + { + "epoch": 19.06, + "learning_rate": 4.047497807702026e-05, + "loss": 2.3125, + "step": 3846500 + }, + { + "epoch": 19.06, + "learning_rate": 4.047373949059418e-05, + "loss": 2.3355, + "step": 3847000 + }, + { + "epoch": 19.06, + "learning_rate": 4.0472500904168094e-05, + "loss": 2.3184, + "step": 3847500 + }, + { + "epoch": 19.06, + "learning_rate": 4.047126231774201e-05, + "loss": 2.2946, + "step": 3848000 + }, + { + "epoch": 19.07, + "learning_rate": 4.047002620848878e-05, + "loss": 2.3503, + "step": 3848500 + }, + { + "epoch": 19.07, + "learning_rate": 4.046878762206269e-05, + "loss": 2.3138, + "step": 3849000 + }, + { + "epoch": 19.07, + "learning_rate": 4.0467549035636606e-05, + "loss": 2.3092, + "step": 3849500 + }, + { + "epoch": 19.07, + "learning_rate": 4.046631044921052e-05, + "loss": 2.3133, + "step": 3850000 + }, + { + "epoch": 19.08, + "learning_rate": 4.046507186278444e-05, + "loss": 2.337, + "step": 3850500 + }, + { + "epoch": 19.08, + "learning_rate": 4.046383327635836e-05, + "loss": 2.3352, + "step": 3851000 + }, + { + "epoch": 19.08, + "learning_rate": 4.0462594689932274e-05, + "loss": 2.3084, + "step": 3851500 + }, + { + "epoch": 19.08, + "learning_rate": 4.046135858067904e-05, + "loss": 2.3219, + "step": 3852000 + }, + { + "epoch": 19.09, + "learning_rate": 4.046011999425296e-05, + "loss": 2.3261, + "step": 3852500 + }, + { + "epoch": 19.09, + "learning_rate": 4.045888140782688e-05, + "loss": 2.3286, + "step": 3853000 + }, + { + "epoch": 19.09, + "learning_rate": 4.0457642821400794e-05, + "loss": 2.2986, + "step": 3853500 + }, + { + "epoch": 19.09, + "learning_rate": 4.045640423497471e-05, + "loss": 2.3174, + "step": 3854000 + }, + { + "epoch": 19.1, + "learning_rate": 4.045516564854863e-05, + "loss": 2.3252, + "step": 3854500 + }, + { + "epoch": 19.1, + "learning_rate": 4.0453927062122545e-05, + "loss": 2.3386, + "step": 3855000 + }, + { + "epoch": 19.1, + "learning_rate": 4.0452690952869307e-05, + "loss": 2.3323, + "step": 3855500 + }, + { + "epoch": 19.1, + "learning_rate": 4.0451452366443223e-05, + "loss": 2.3168, + "step": 3856000 + }, + { + "epoch": 19.11, + "learning_rate": 4.045021378001714e-05, + "loss": 2.3312, + "step": 3856500 + }, + { + "epoch": 19.11, + "learning_rate": 4.044897519359106e-05, + "loss": 2.3278, + "step": 3857000 + }, + { + "epoch": 19.11, + "learning_rate": 4.0447736607164974e-05, + "loss": 2.3539, + "step": 3857500 + }, + { + "epoch": 19.11, + "learning_rate": 4.044649802073889e-05, + "loss": 2.3532, + "step": 3858000 + }, + { + "epoch": 19.12, + "learning_rate": 4.044525943431281e-05, + "loss": 2.3201, + "step": 3858500 + }, + { + "epoch": 19.12, + "learning_rate": 4.0444020847886725e-05, + "loss": 2.2934, + "step": 3859000 + }, + { + "epoch": 19.12, + "learning_rate": 4.044278226146064e-05, + "loss": 2.3019, + "step": 3859500 + }, + { + "epoch": 19.12, + "learning_rate": 4.044154367503456e-05, + "loss": 2.3039, + "step": 3860000 + }, + { + "epoch": 19.13, + "learning_rate": 4.0440305088608476e-05, + "loss": 2.3172, + "step": 3860500 + }, + { + "epoch": 19.13, + "learning_rate": 4.0439071456528097e-05, + "loss": 2.3581, + "step": 3861000 + }, + { + "epoch": 19.13, + "learning_rate": 4.0437832870102013e-05, + "loss": 2.3188, + "step": 3861500 + }, + { + "epoch": 19.13, + "learning_rate": 4.043659428367593e-05, + "loss": 2.3124, + "step": 3862000 + }, + { + "epoch": 19.14, + "learning_rate": 4.043535569724984e-05, + "loss": 2.3261, + "step": 3862500 + }, + { + "epoch": 19.14, + "learning_rate": 4.043411958799661e-05, + "loss": 2.3299, + "step": 3863000 + }, + { + "epoch": 19.14, + "learning_rate": 4.0432881001570526e-05, + "loss": 2.3548, + "step": 3863500 + }, + { + "epoch": 19.14, + "learning_rate": 4.043164241514444e-05, + "loss": 2.3506, + "step": 3864000 + }, + { + "epoch": 19.15, + "learning_rate": 4.043040382871836e-05, + "loss": 2.3457, + "step": 3864500 + }, + { + "epoch": 19.15, + "learning_rate": 4.0429167719465136e-05, + "loss": 2.3462, + "step": 3865000 + }, + { + "epoch": 19.15, + "learning_rate": 4.042792913303905e-05, + "loss": 2.3082, + "step": 3865500 + }, + { + "epoch": 19.15, + "learning_rate": 4.042669054661296e-05, + "loss": 2.3479, + "step": 3866000 + }, + { + "epoch": 19.16, + "learning_rate": 4.042545196018688e-05, + "loss": 2.3207, + "step": 3866500 + }, + { + "epoch": 19.16, + "learning_rate": 4.042421585093365e-05, + "loss": 2.332, + "step": 3867000 + }, + { + "epoch": 19.16, + "learning_rate": 4.0422977264507565e-05, + "loss": 2.3244, + "step": 3867500 + }, + { + "epoch": 19.16, + "learning_rate": 4.042173867808148e-05, + "loss": 2.3418, + "step": 3868000 + }, + { + "epoch": 19.17, + "learning_rate": 4.04205000916554e-05, + "loss": 2.3337, + "step": 3868500 + }, + { + "epoch": 19.17, + "learning_rate": 4.041926150522931e-05, + "loss": 2.3363, + "step": 3869000 + }, + { + "epoch": 19.17, + "learning_rate": 4.0418022918803226e-05, + "loss": 2.3161, + "step": 3869500 + }, + { + "epoch": 19.17, + "learning_rate": 4.041678433237714e-05, + "loss": 2.3293, + "step": 3870000 + }, + { + "epoch": 19.18, + "learning_rate": 4.041554574595106e-05, + "loss": 2.3562, + "step": 3870500 + }, + { + "epoch": 19.18, + "learning_rate": 4.041430715952498e-05, + "loss": 2.3249, + "step": 3871000 + }, + { + "epoch": 19.18, + "learning_rate": 4.0413068573098894e-05, + "loss": 2.3386, + "step": 3871500 + }, + { + "epoch": 19.18, + "learning_rate": 4.041182998667281e-05, + "loss": 2.3363, + "step": 3872000 + }, + { + "epoch": 19.19, + "learning_rate": 4.041059140024673e-05, + "loss": 2.3261, + "step": 3872500 + }, + { + "epoch": 19.19, + "learning_rate": 4.04093552909935e-05, + "loss": 2.3164, + "step": 3873000 + }, + { + "epoch": 19.19, + "learning_rate": 4.0408119181740266e-05, + "loss": 2.3363, + "step": 3873500 + }, + { + "epoch": 19.19, + "learning_rate": 4.040688059531418e-05, + "loss": 2.3191, + "step": 3874000 + }, + { + "epoch": 19.2, + "learning_rate": 4.04056420088881e-05, + "loss": 2.3325, + "step": 3874500 + }, + { + "epoch": 19.2, + "learning_rate": 4.0404403422462016e-05, + "loss": 2.3294, + "step": 3875000 + }, + { + "epoch": 19.2, + "learning_rate": 4.0403164836035927e-05, + "loss": 2.348, + "step": 3875500 + }, + { + "epoch": 19.2, + "learning_rate": 4.0401926249609843e-05, + "loss": 2.3279, + "step": 3876000 + }, + { + "epoch": 19.21, + "learning_rate": 4.040068766318376e-05, + "loss": 2.3494, + "step": 3876500 + }, + { + "epoch": 19.21, + "learning_rate": 4.0399451553930536e-05, + "loss": 2.3361, + "step": 3877000 + }, + { + "epoch": 19.21, + "learning_rate": 4.039821296750445e-05, + "loss": 2.329, + "step": 3877500 + }, + { + "epoch": 19.21, + "learning_rate": 4.039697438107837e-05, + "loss": 2.3137, + "step": 3878000 + }, + { + "epoch": 19.22, + "learning_rate": 4.039573579465228e-05, + "loss": 2.3535, + "step": 3878500 + }, + { + "epoch": 19.22, + "learning_rate": 4.03944972082262e-05, + "loss": 2.3296, + "step": 3879000 + }, + { + "epoch": 19.22, + "learning_rate": 4.0393258621800114e-05, + "loss": 2.3558, + "step": 3879500 + }, + { + "epoch": 19.22, + "learning_rate": 4.039202251254688e-05, + "loss": 2.3215, + "step": 3880000 + }, + { + "epoch": 19.23, + "learning_rate": 4.03907839261208e-05, + "loss": 2.3023, + "step": 3880500 + }, + { + "epoch": 19.23, + "learning_rate": 4.038954781686757e-05, + "loss": 2.3208, + "step": 3881000 + }, + { + "epoch": 19.23, + "learning_rate": 4.0388309230441485e-05, + "loss": 2.3491, + "step": 3881500 + }, + { + "epoch": 19.23, + "learning_rate": 4.0387073121188254e-05, + "loss": 2.3284, + "step": 3882000 + }, + { + "epoch": 19.24, + "learning_rate": 4.038583453476217e-05, + "loss": 2.3285, + "step": 3882500 + }, + { + "epoch": 19.24, + "learning_rate": 4.038459594833609e-05, + "loss": 2.3254, + "step": 3883000 + }, + { + "epoch": 19.24, + "learning_rate": 4.038335736191e-05, + "loss": 2.3343, + "step": 3883500 + }, + { + "epoch": 19.24, + "learning_rate": 4.0382118775483915e-05, + "loss": 2.3346, + "step": 3884000 + }, + { + "epoch": 19.25, + "learning_rate": 4.038088018905783e-05, + "loss": 2.3331, + "step": 3884500 + }, + { + "epoch": 19.25, + "learning_rate": 4.037964160263175e-05, + "loss": 2.3303, + "step": 3885000 + }, + { + "epoch": 19.25, + "learning_rate": 4.0378403016205666e-05, + "loss": 2.3329, + "step": 3885500 + }, + { + "epoch": 19.25, + "learning_rate": 4.037716442977958e-05, + "loss": 2.3502, + "step": 3886000 + }, + { + "epoch": 19.26, + "learning_rate": 4.03759258433535e-05, + "loss": 2.3128, + "step": 3886500 + }, + { + "epoch": 19.26, + "learning_rate": 4.0374687256927417e-05, + "loss": 2.3384, + "step": 3887000 + }, + { + "epoch": 19.26, + "learning_rate": 4.0373448670501334e-05, + "loss": 2.3581, + "step": 3887500 + }, + { + "epoch": 19.26, + "learning_rate": 4.0372210084075244e-05, + "loss": 2.3329, + "step": 3888000 + }, + { + "epoch": 19.26, + "learning_rate": 4.037097397482202e-05, + "loss": 2.3289, + "step": 3888500 + }, + { + "epoch": 19.27, + "learning_rate": 4.0369735388395936e-05, + "loss": 2.3199, + "step": 3889000 + }, + { + "epoch": 19.27, + "learning_rate": 4.036849680196985e-05, + "loss": 2.3655, + "step": 3889500 + }, + { + "epoch": 19.27, + "learning_rate": 4.036725821554377e-05, + "loss": 2.3308, + "step": 3890000 + }, + { + "epoch": 19.27, + "learning_rate": 4.036601962911769e-05, + "loss": 2.3335, + "step": 3890500 + }, + { + "epoch": 19.28, + "learning_rate": 4.03647810426916e-05, + "loss": 2.3548, + "step": 3891000 + }, + { + "epoch": 19.28, + "learning_rate": 4.0363542456265514e-05, + "loss": 2.3258, + "step": 3891500 + }, + { + "epoch": 19.28, + "learning_rate": 4.036230386983943e-05, + "loss": 2.3499, + "step": 3892000 + }, + { + "epoch": 19.28, + "learning_rate": 4.036106528341335e-05, + "loss": 2.3421, + "step": 3892500 + }, + { + "epoch": 19.29, + "learning_rate": 4.035982917416012e-05, + "loss": 2.3172, + "step": 3893000 + }, + { + "epoch": 19.29, + "learning_rate": 4.0358593064906886e-05, + "loss": 2.3416, + "step": 3893500 + }, + { + "epoch": 19.29, + "learning_rate": 4.03573544784808e-05, + "loss": 2.3245, + "step": 3894000 + }, + { + "epoch": 19.29, + "learning_rate": 4.035611589205472e-05, + "loss": 2.3291, + "step": 3894500 + }, + { + "epoch": 19.3, + "learning_rate": 4.035487978280149e-05, + "loss": 2.3485, + "step": 3895000 + }, + { + "epoch": 19.3, + "learning_rate": 4.0353641196375405e-05, + "loss": 2.3216, + "step": 3895500 + }, + { + "epoch": 19.3, + "learning_rate": 4.035240260994932e-05, + "loss": 2.352, + "step": 3896000 + }, + { + "epoch": 19.3, + "learning_rate": 4.035116402352324e-05, + "loss": 2.3244, + "step": 3896500 + }, + { + "epoch": 19.31, + "learning_rate": 4.034992543709715e-05, + "loss": 2.347, + "step": 3897000 + }, + { + "epoch": 19.31, + "learning_rate": 4.0348686850671066e-05, + "loss": 2.3369, + "step": 3897500 + }, + { + "epoch": 19.31, + "learning_rate": 4.034744826424498e-05, + "loss": 2.31, + "step": 3898000 + }, + { + "epoch": 19.31, + "learning_rate": 4.03462096778189e-05, + "loss": 2.3318, + "step": 3898500 + }, + { + "epoch": 19.32, + "learning_rate": 4.034497109139282e-05, + "loss": 2.3256, + "step": 3899000 + }, + { + "epoch": 19.32, + "learning_rate": 4.0343732504966734e-05, + "loss": 2.3221, + "step": 3899500 + }, + { + "epoch": 19.32, + "learning_rate": 4.03424963957135e-05, + "loss": 2.3195, + "step": 3900000 + }, + { + "epoch": 19.32, + "learning_rate": 4.034125780928742e-05, + "loss": 2.3262, + "step": 3900500 + }, + { + "epoch": 19.33, + "learning_rate": 4.0340019222861336e-05, + "loss": 2.3251, + "step": 3901000 + }, + { + "epoch": 19.33, + "learning_rate": 4.033878063643525e-05, + "loss": 2.3431, + "step": 3901500 + }, + { + "epoch": 19.33, + "learning_rate": 4.033754452718202e-05, + "loss": 2.3286, + "step": 3902000 + }, + { + "epoch": 19.33, + "learning_rate": 4.033630594075594e-05, + "loss": 2.3164, + "step": 3902500 + }, + { + "epoch": 19.34, + "learning_rate": 4.03350698315027e-05, + "loss": 2.3303, + "step": 3903000 + }, + { + "epoch": 19.34, + "learning_rate": 4.033383124507662e-05, + "loss": 2.3216, + "step": 3903500 + }, + { + "epoch": 19.34, + "learning_rate": 4.033259513582339e-05, + "loss": 2.3386, + "step": 3904000 + }, + { + "epoch": 19.34, + "learning_rate": 4.0331356549397304e-05, + "loss": 2.3393, + "step": 3904500 + }, + { + "epoch": 19.35, + "learning_rate": 4.033011796297122e-05, + "loss": 2.342, + "step": 3905000 + }, + { + "epoch": 19.35, + "learning_rate": 4.032887937654514e-05, + "loss": 2.3289, + "step": 3905500 + }, + { + "epoch": 19.35, + "learning_rate": 4.0327640790119055e-05, + "loss": 2.3325, + "step": 3906000 + }, + { + "epoch": 19.35, + "learning_rate": 4.032640220369297e-05, + "loss": 2.3331, + "step": 3906500 + }, + { + "epoch": 19.36, + "learning_rate": 4.032516361726689e-05, + "loss": 2.3308, + "step": 3907000 + }, + { + "epoch": 19.36, + "learning_rate": 4.0323925030840805e-05, + "loss": 2.3238, + "step": 3907500 + }, + { + "epoch": 19.36, + "learning_rate": 4.032268644441472e-05, + "loss": 2.3196, + "step": 3908000 + }, + { + "epoch": 19.36, + "learning_rate": 4.032144785798864e-05, + "loss": 2.3406, + "step": 3908500 + }, + { + "epoch": 19.37, + "learning_rate": 4.0320209271562556e-05, + "loss": 2.3241, + "step": 3909000 + }, + { + "epoch": 19.37, + "learning_rate": 4.031897068513647e-05, + "loss": 2.3429, + "step": 3909500 + }, + { + "epoch": 19.37, + "learning_rate": 4.031773209871039e-05, + "loss": 2.3428, + "step": 3910000 + }, + { + "epoch": 19.37, + "learning_rate": 4.03164935122843e-05, + "loss": 2.3308, + "step": 3910500 + }, + { + "epoch": 19.38, + "learning_rate": 4.031525492585822e-05, + "loss": 2.3214, + "step": 3911000 + }, + { + "epoch": 19.38, + "learning_rate": 4.0314016339432134e-05, + "loss": 2.3418, + "step": 3911500 + }, + { + "epoch": 19.38, + "learning_rate": 4.031277775300605e-05, + "loss": 2.3363, + "step": 3912000 + }, + { + "epoch": 19.38, + "learning_rate": 4.031153916657997e-05, + "loss": 2.3595, + "step": 3912500 + }, + { + "epoch": 19.39, + "learning_rate": 4.031030058015388e-05, + "loss": 2.3229, + "step": 3913000 + }, + { + "epoch": 19.39, + "learning_rate": 4.0309064470900654e-05, + "loss": 2.323, + "step": 3913500 + }, + { + "epoch": 19.39, + "learning_rate": 4.030782836164742e-05, + "loss": 2.3263, + "step": 3914000 + }, + { + "epoch": 19.39, + "learning_rate": 4.030658977522134e-05, + "loss": 2.3366, + "step": 3914500 + }, + { + "epoch": 19.4, + "learning_rate": 4.0305351188795256e-05, + "loss": 2.3512, + "step": 3915000 + }, + { + "epoch": 19.4, + "learning_rate": 4.030411260236917e-05, + "loss": 2.3201, + "step": 3915500 + }, + { + "epoch": 19.4, + "learning_rate": 4.030287401594309e-05, + "loss": 2.338, + "step": 3916000 + }, + { + "epoch": 19.4, + "learning_rate": 4.030163542951701e-05, + "loss": 2.3361, + "step": 3916500 + }, + { + "epoch": 19.41, + "learning_rate": 4.0300396843090924e-05, + "loss": 2.3406, + "step": 3917000 + }, + { + "epoch": 19.41, + "learning_rate": 4.0299158256664834e-05, + "loss": 2.3373, + "step": 3917500 + }, + { + "epoch": 19.41, + "learning_rate": 4.029791967023875e-05, + "loss": 2.3003, + "step": 3918000 + }, + { + "epoch": 19.41, + "learning_rate": 4.029668108381267e-05, + "loss": 2.3411, + "step": 3918500 + }, + { + "epoch": 19.42, + "learning_rate": 4.029544497455944e-05, + "loss": 2.3372, + "step": 3919000 + }, + { + "epoch": 19.42, + "learning_rate": 4.0294206388133354e-05, + "loss": 2.3438, + "step": 3919500 + }, + { + "epoch": 19.42, + "learning_rate": 4.029296780170727e-05, + "loss": 2.3411, + "step": 3920000 + }, + { + "epoch": 19.42, + "learning_rate": 4.029172921528119e-05, + "loss": 2.3428, + "step": 3920500 + }, + { + "epoch": 19.43, + "learning_rate": 4.0290493106027956e-05, + "loss": 2.3244, + "step": 3921000 + }, + { + "epoch": 19.43, + "learning_rate": 4.028925451960187e-05, + "loss": 2.339, + "step": 3921500 + }, + { + "epoch": 19.43, + "learning_rate": 4.028801593317579e-05, + "loss": 2.3394, + "step": 3922000 + }, + { + "epoch": 19.43, + "learning_rate": 4.028677734674971e-05, + "loss": 2.3725, + "step": 3922500 + }, + { + "epoch": 19.44, + "learning_rate": 4.028554123749647e-05, + "loss": 2.3452, + "step": 3923000 + }, + { + "epoch": 19.44, + "learning_rate": 4.0284302651070386e-05, + "loss": 2.307, + "step": 3923500 + }, + { + "epoch": 19.44, + "learning_rate": 4.0283066541817155e-05, + "loss": 2.3612, + "step": 3924000 + }, + { + "epoch": 19.44, + "learning_rate": 4.028182795539107e-05, + "loss": 2.3514, + "step": 3924500 + }, + { + "epoch": 19.45, + "learning_rate": 4.028058936896499e-05, + "loss": 2.3375, + "step": 3925000 + }, + { + "epoch": 19.45, + "learning_rate": 4.0279350782538906e-05, + "loss": 2.342, + "step": 3925500 + }, + { + "epoch": 19.45, + "learning_rate": 4.027811219611282e-05, + "loss": 2.3437, + "step": 3926000 + }, + { + "epoch": 19.45, + "learning_rate": 4.027687608685959e-05, + "loss": 2.3342, + "step": 3926500 + }, + { + "epoch": 19.46, + "learning_rate": 4.027563750043351e-05, + "loss": 2.3393, + "step": 3927000 + }, + { + "epoch": 19.46, + "learning_rate": 4.027439891400742e-05, + "loss": 2.3505, + "step": 3927500 + }, + { + "epoch": 19.46, + "learning_rate": 4.0273160327581335e-05, + "loss": 2.3358, + "step": 3928000 + }, + { + "epoch": 19.46, + "learning_rate": 4.027192174115525e-05, + "loss": 2.3349, + "step": 3928500 + }, + { + "epoch": 19.47, + "learning_rate": 4.027068315472917e-05, + "loss": 2.3537, + "step": 3929000 + }, + { + "epoch": 19.47, + "learning_rate": 4.026944704547594e-05, + "loss": 2.323, + "step": 3929500 + }, + { + "epoch": 19.47, + "learning_rate": 4.0268208459049855e-05, + "loss": 2.3303, + "step": 3930000 + }, + { + "epoch": 19.47, + "learning_rate": 4.026696987262377e-05, + "loss": 2.3301, + "step": 3930500 + }, + { + "epoch": 19.48, + "learning_rate": 4.026573128619769e-05, + "loss": 2.3492, + "step": 3931000 + }, + { + "epoch": 19.48, + "learning_rate": 4.0264492699771606e-05, + "loss": 2.3522, + "step": 3931500 + }, + { + "epoch": 19.48, + "learning_rate": 4.026325411334552e-05, + "loss": 2.3272, + "step": 3932000 + }, + { + "epoch": 19.48, + "learning_rate": 4.026201552691944e-05, + "loss": 2.3068, + "step": 3932500 + }, + { + "epoch": 19.49, + "learning_rate": 4.026077694049336e-05, + "loss": 2.3614, + "step": 3933000 + }, + { + "epoch": 19.49, + "learning_rate": 4.0259540831240125e-05, + "loss": 2.3525, + "step": 3933500 + }, + { + "epoch": 19.49, + "learning_rate": 4.0258304721986894e-05, + "loss": 2.3418, + "step": 3934000 + }, + { + "epoch": 19.49, + "learning_rate": 4.025706613556081e-05, + "loss": 2.3367, + "step": 3934500 + }, + { + "epoch": 19.5, + "learning_rate": 4.025582754913473e-05, + "loss": 2.3377, + "step": 3935000 + }, + { + "epoch": 19.5, + "learning_rate": 4.025458896270864e-05, + "loss": 2.3424, + "step": 3935500 + }, + { + "epoch": 19.5, + "learning_rate": 4.0253350376282555e-05, + "loss": 2.3364, + "step": 3936000 + }, + { + "epoch": 19.5, + "learning_rate": 4.025211178985647e-05, + "loss": 2.3356, + "step": 3936500 + }, + { + "epoch": 19.51, + "learning_rate": 4.025087320343039e-05, + "loss": 2.3515, + "step": 3937000 + }, + { + "epoch": 19.51, + "learning_rate": 4.0249637094177165e-05, + "loss": 2.3304, + "step": 3937500 + }, + { + "epoch": 19.51, + "learning_rate": 4.024839850775108e-05, + "loss": 2.3372, + "step": 3938000 + }, + { + "epoch": 19.51, + "learning_rate": 4.024715992132499e-05, + "loss": 2.3153, + "step": 3938500 + }, + { + "epoch": 19.52, + "learning_rate": 4.024592133489891e-05, + "loss": 2.3227, + "step": 3939000 + }, + { + "epoch": 19.52, + "learning_rate": 4.024468522564568e-05, + "loss": 2.3313, + "step": 3939500 + }, + { + "epoch": 19.52, + "learning_rate": 4.0243446639219594e-05, + "loss": 2.3482, + "step": 3940000 + }, + { + "epoch": 19.52, + "learning_rate": 4.024221052996636e-05, + "loss": 2.305, + "step": 3940500 + }, + { + "epoch": 19.53, + "learning_rate": 4.024097194354028e-05, + "loss": 2.3352, + "step": 3941000 + }, + { + "epoch": 19.53, + "learning_rate": 4.02397333571142e-05, + "loss": 2.3291, + "step": 3941500 + }, + { + "epoch": 19.53, + "learning_rate": 4.0238494770688114e-05, + "loss": 2.333, + "step": 3942000 + }, + { + "epoch": 19.53, + "learning_rate": 4.0237258661434876e-05, + "loss": 2.3466, + "step": 3942500 + }, + { + "epoch": 19.53, + "learning_rate": 4.023602007500879e-05, + "loss": 2.3585, + "step": 3943000 + }, + { + "epoch": 19.54, + "learning_rate": 4.023478148858271e-05, + "loss": 2.3453, + "step": 3943500 + }, + { + "epoch": 19.54, + "learning_rate": 4.023354290215663e-05, + "loss": 2.3602, + "step": 3944000 + }, + { + "epoch": 19.54, + "learning_rate": 4.0232304315730544e-05, + "loss": 2.3127, + "step": 3944500 + }, + { + "epoch": 19.54, + "learning_rate": 4.023106572930446e-05, + "loss": 2.331, + "step": 3945000 + }, + { + "epoch": 19.55, + "learning_rate": 4.022982714287838e-05, + "loss": 2.3243, + "step": 3945500 + }, + { + "epoch": 19.55, + "learning_rate": 4.0228588556452295e-05, + "loss": 2.3414, + "step": 3946000 + }, + { + "epoch": 19.55, + "learning_rate": 4.022734997002621e-05, + "loss": 2.3321, + "step": 3946500 + }, + { + "epoch": 19.55, + "learning_rate": 4.022611138360013e-05, + "loss": 2.3295, + "step": 3947000 + }, + { + "epoch": 19.56, + "learning_rate": 4.0224872797174045e-05, + "loss": 2.3272, + "step": 3947500 + }, + { + "epoch": 19.56, + "learning_rate": 4.0223634210747955e-05, + "loss": 2.3229, + "step": 3948000 + }, + { + "epoch": 19.56, + "learning_rate": 4.022239810149473e-05, + "loss": 2.3459, + "step": 3948500 + }, + { + "epoch": 19.56, + "learning_rate": 4.022115951506865e-05, + "loss": 2.3256, + "step": 3949000 + }, + { + "epoch": 19.57, + "learning_rate": 4.0219920928642565e-05, + "loss": 2.3478, + "step": 3949500 + }, + { + "epoch": 19.57, + "learning_rate": 4.021868234221648e-05, + "loss": 2.3261, + "step": 3950000 + }, + { + "epoch": 19.57, + "learning_rate": 4.02174437557904e-05, + "loss": 2.3297, + "step": 3950500 + }, + { + "epoch": 19.57, + "learning_rate": 4.021620764653716e-05, + "loss": 2.326, + "step": 3951000 + }, + { + "epoch": 19.58, + "learning_rate": 4.021496906011108e-05, + "loss": 2.33, + "step": 3951500 + }, + { + "epoch": 19.58, + "learning_rate": 4.0213730473684995e-05, + "loss": 2.3451, + "step": 3952000 + }, + { + "epoch": 19.58, + "learning_rate": 4.021249188725891e-05, + "loss": 2.3306, + "step": 3952500 + }, + { + "epoch": 19.58, + "learning_rate": 4.021125330083283e-05, + "loss": 2.3403, + "step": 3953000 + }, + { + "epoch": 19.59, + "learning_rate": 4.0210014714406745e-05, + "loss": 2.3302, + "step": 3953500 + }, + { + "epoch": 19.59, + "learning_rate": 4.0208776127980656e-05, + "loss": 2.3446, + "step": 3954000 + }, + { + "epoch": 19.59, + "learning_rate": 4.020754001872743e-05, + "loss": 2.3467, + "step": 3954500 + }, + { + "epoch": 19.59, + "learning_rate": 4.020630143230135e-05, + "loss": 2.3475, + "step": 3955000 + }, + { + "epoch": 19.6, + "learning_rate": 4.0205062845875265e-05, + "loss": 2.3296, + "step": 3955500 + }, + { + "epoch": 19.6, + "learning_rate": 4.020382425944918e-05, + "loss": 2.3456, + "step": 3956000 + }, + { + "epoch": 19.6, + "learning_rate": 4.02025856730231e-05, + "loss": 2.3194, + "step": 3956500 + }, + { + "epoch": 19.6, + "learning_rate": 4.020134956376986e-05, + "loss": 2.3377, + "step": 3957000 + }, + { + "epoch": 19.61, + "learning_rate": 4.020011097734378e-05, + "loss": 2.3476, + "step": 3957500 + }, + { + "epoch": 19.61, + "learning_rate": 4.0198872390917695e-05, + "loss": 2.3331, + "step": 3958000 + }, + { + "epoch": 19.61, + "learning_rate": 4.0197636281664464e-05, + "loss": 2.3387, + "step": 3958500 + }, + { + "epoch": 19.61, + "learning_rate": 4.019639769523838e-05, + "loss": 2.3505, + "step": 3959000 + }, + { + "epoch": 19.62, + "learning_rate": 4.01951591088123e-05, + "loss": 2.3324, + "step": 3959500 + }, + { + "epoch": 19.62, + "learning_rate": 4.0193920522386214e-05, + "loss": 2.32, + "step": 3960000 + }, + { + "epoch": 19.62, + "learning_rate": 4.019268193596013e-05, + "loss": 2.3419, + "step": 3960500 + }, + { + "epoch": 19.62, + "learning_rate": 4.019144334953405e-05, + "loss": 2.3614, + "step": 3961000 + }, + { + "epoch": 19.63, + "learning_rate": 4.0190204763107965e-05, + "loss": 2.338, + "step": 3961500 + }, + { + "epoch": 19.63, + "learning_rate": 4.018896865385473e-05, + "loss": 2.3367, + "step": 3962000 + }, + { + "epoch": 19.63, + "learning_rate": 4.0187730067428644e-05, + "loss": 2.3342, + "step": 3962500 + }, + { + "epoch": 19.63, + "learning_rate": 4.018649148100256e-05, + "loss": 2.3435, + "step": 3963000 + }, + { + "epoch": 19.64, + "learning_rate": 4.018525537174933e-05, + "loss": 2.3708, + "step": 3963500 + }, + { + "epoch": 19.64, + "learning_rate": 4.018401678532325e-05, + "loss": 2.3477, + "step": 3964000 + }, + { + "epoch": 19.64, + "learning_rate": 4.0182780676070016e-05, + "loss": 2.3401, + "step": 3964500 + }, + { + "epoch": 19.64, + "learning_rate": 4.018154208964393e-05, + "loss": 2.2965, + "step": 3965000 + }, + { + "epoch": 19.65, + "learning_rate": 4.018030350321785e-05, + "loss": 2.3713, + "step": 3965500 + }, + { + "epoch": 19.65, + "learning_rate": 4.0179064916791766e-05, + "loss": 2.3456, + "step": 3966000 + }, + { + "epoch": 19.65, + "learning_rate": 4.017782633036568e-05, + "loss": 2.3476, + "step": 3966500 + }, + { + "epoch": 19.65, + "learning_rate": 4.01765877439396e-05, + "loss": 2.3343, + "step": 3967000 + }, + { + "epoch": 19.66, + "learning_rate": 4.017534915751352e-05, + "loss": 2.3137, + "step": 3967500 + }, + { + "epoch": 19.66, + "learning_rate": 4.0174110571087434e-05, + "loss": 2.3458, + "step": 3968000 + }, + { + "epoch": 19.66, + "learning_rate": 4.017287198466135e-05, + "loss": 2.3405, + "step": 3968500 + }, + { + "epoch": 19.66, + "learning_rate": 4.017163339823526e-05, + "loss": 2.3362, + "step": 3969000 + }, + { + "epoch": 19.67, + "learning_rate": 4.017039481180918e-05, + "loss": 2.3175, + "step": 3969500 + }, + { + "epoch": 19.67, + "learning_rate": 4.0169156225383095e-05, + "loss": 2.3659, + "step": 3970000 + }, + { + "epoch": 19.67, + "learning_rate": 4.016791763895701e-05, + "loss": 2.3572, + "step": 3970500 + }, + { + "epoch": 19.67, + "learning_rate": 4.016668400687663e-05, + "loss": 2.3597, + "step": 3971000 + }, + { + "epoch": 19.68, + "learning_rate": 4.016544542045055e-05, + "loss": 2.3242, + "step": 3971500 + }, + { + "epoch": 19.68, + "learning_rate": 4.0164206834024466e-05, + "loss": 2.3366, + "step": 3972000 + }, + { + "epoch": 19.68, + "learning_rate": 4.0162968247598383e-05, + "loss": 2.3619, + "step": 3972500 + }, + { + "epoch": 19.68, + "learning_rate": 4.01617296611723e-05, + "loss": 2.3342, + "step": 3973000 + }, + { + "epoch": 19.69, + "learning_rate": 4.016049107474622e-05, + "loss": 2.3261, + "step": 3973500 + }, + { + "epoch": 19.69, + "learning_rate": 4.0159252488320134e-05, + "loss": 2.3447, + "step": 3974000 + }, + { + "epoch": 19.69, + "learning_rate": 4.015801390189405e-05, + "loss": 2.3384, + "step": 3974500 + }, + { + "epoch": 19.69, + "learning_rate": 4.015677531546797e-05, + "loss": 2.3568, + "step": 3975000 + }, + { + "epoch": 19.7, + "learning_rate": 4.015553672904188e-05, + "loss": 2.3522, + "step": 3975500 + }, + { + "epoch": 19.7, + "learning_rate": 4.0154298142615795e-05, + "loss": 2.3391, + "step": 3976000 + }, + { + "epoch": 19.7, + "learning_rate": 4.015305955618971e-05, + "loss": 2.3405, + "step": 3976500 + }, + { + "epoch": 19.7, + "learning_rate": 4.015182096976363e-05, + "loss": 2.3432, + "step": 3977000 + }, + { + "epoch": 19.71, + "learning_rate": 4.0150582383337546e-05, + "loss": 2.3234, + "step": 3977500 + }, + { + "epoch": 19.71, + "learning_rate": 4.0149346274084315e-05, + "loss": 2.3282, + "step": 3978000 + }, + { + "epoch": 19.71, + "learning_rate": 4.014810768765823e-05, + "loss": 2.3459, + "step": 3978500 + }, + { + "epoch": 19.71, + "learning_rate": 4.014686910123215e-05, + "loss": 2.3324, + "step": 3979000 + }, + { + "epoch": 19.72, + "learning_rate": 4.0145630514806066e-05, + "loss": 2.3331, + "step": 3979500 + }, + { + "epoch": 19.72, + "learning_rate": 4.014439192837998e-05, + "loss": 2.3306, + "step": 3980000 + }, + { + "epoch": 19.72, + "learning_rate": 4.014315581912675e-05, + "loss": 2.3346, + "step": 3980500 + }, + { + "epoch": 19.72, + "learning_rate": 4.014191723270067e-05, + "loss": 2.3382, + "step": 3981000 + }, + { + "epoch": 19.73, + "learning_rate": 4.0140678646274585e-05, + "loss": 2.3328, + "step": 3981500 + }, + { + "epoch": 19.73, + "learning_rate": 4.01394400598485e-05, + "loss": 2.3343, + "step": 3982000 + }, + { + "epoch": 19.73, + "learning_rate": 4.013820147342241e-05, + "loss": 2.3313, + "step": 3982500 + }, + { + "epoch": 19.73, + "learning_rate": 4.013696288699633e-05, + "loss": 2.3678, + "step": 3983000 + }, + { + "epoch": 19.74, + "learning_rate": 4.0135724300570246e-05, + "loss": 2.3607, + "step": 3983500 + }, + { + "epoch": 19.74, + "learning_rate": 4.013448571414416e-05, + "loss": 2.3278, + "step": 3984000 + }, + { + "epoch": 19.74, + "learning_rate": 4.013324712771808e-05, + "loss": 2.3487, + "step": 3984500 + }, + { + "epoch": 19.74, + "learning_rate": 4.0132008541292e-05, + "loss": 2.3366, + "step": 3985000 + }, + { + "epoch": 19.75, + "learning_rate": 4.013076995486591e-05, + "loss": 2.364, + "step": 3985500 + }, + { + "epoch": 19.75, + "learning_rate": 4.012953384561268e-05, + "loss": 2.3186, + "step": 3986000 + }, + { + "epoch": 19.75, + "learning_rate": 4.012829773635945e-05, + "loss": 2.3758, + "step": 3986500 + }, + { + "epoch": 19.75, + "learning_rate": 4.012705914993337e-05, + "loss": 2.3873, + "step": 3987000 + }, + { + "epoch": 19.76, + "learning_rate": 4.0125820563507285e-05, + "loss": 2.3456, + "step": 3987500 + }, + { + "epoch": 19.76, + "learning_rate": 4.01245819770812e-05, + "loss": 2.342, + "step": 3988000 + }, + { + "epoch": 19.76, + "learning_rate": 4.012334339065512e-05, + "loss": 2.3393, + "step": 3988500 + }, + { + "epoch": 19.76, + "learning_rate": 4.012210480422903e-05, + "loss": 2.3263, + "step": 3989000 + }, + { + "epoch": 19.77, + "learning_rate": 4.0120866217802946e-05, + "loss": 2.3438, + "step": 3989500 + }, + { + "epoch": 19.77, + "learning_rate": 4.011962763137686e-05, + "loss": 2.348, + "step": 3990000 + }, + { + "epoch": 19.77, + "learning_rate": 4.011838904495078e-05, + "loss": 2.3395, + "step": 3990500 + }, + { + "epoch": 19.77, + "learning_rate": 4.011715293569755e-05, + "loss": 2.3636, + "step": 3991000 + }, + { + "epoch": 19.78, + "learning_rate": 4.0115914349271466e-05, + "loss": 2.3862, + "step": 3991500 + }, + { + "epoch": 19.78, + "learning_rate": 4.0114678240018235e-05, + "loss": 2.3051, + "step": 3992000 + }, + { + "epoch": 19.78, + "learning_rate": 4.011343965359215e-05, + "loss": 2.3611, + "step": 3992500 + }, + { + "epoch": 19.78, + "learning_rate": 4.011220106716607e-05, + "loss": 2.3512, + "step": 3993000 + }, + { + "epoch": 19.79, + "learning_rate": 4.0110962480739985e-05, + "loss": 2.3359, + "step": 3993500 + }, + { + "epoch": 19.79, + "learning_rate": 4.01097238943139e-05, + "loss": 2.3261, + "step": 3994000 + }, + { + "epoch": 19.79, + "learning_rate": 4.010848530788782e-05, + "loss": 2.369, + "step": 3994500 + }, + { + "epoch": 19.79, + "learning_rate": 4.010724919863458e-05, + "loss": 2.3553, + "step": 3995000 + }, + { + "epoch": 19.8, + "learning_rate": 4.01060106122085e-05, + "loss": 2.3294, + "step": 3995500 + }, + { + "epoch": 19.8, + "learning_rate": 4.0104772025782415e-05, + "loss": 2.3247, + "step": 3996000 + }, + { + "epoch": 19.8, + "learning_rate": 4.010353343935633e-05, + "loss": 2.3215, + "step": 3996500 + }, + { + "epoch": 19.8, + "learning_rate": 4.010229485293025e-05, + "loss": 2.3778, + "step": 3997000 + }, + { + "epoch": 19.8, + "learning_rate": 4.0101056266504166e-05, + "loss": 2.3558, + "step": 3997500 + }, + { + "epoch": 19.81, + "learning_rate": 4.009981768007808e-05, + "loss": 2.3269, + "step": 3998000 + }, + { + "epoch": 19.81, + "learning_rate": 4.0098579093652e-05, + "loss": 2.3752, + "step": 3998500 + }, + { + "epoch": 19.81, + "learning_rate": 4.009734050722592e-05, + "loss": 2.3359, + "step": 3999000 + }, + { + "epoch": 19.81, + "learning_rate": 4.0096101920799834e-05, + "loss": 2.3454, + "step": 3999500 + }, + { + "epoch": 19.82, + "learning_rate": 4.009486333437375e-05, + "loss": 2.3231, + "step": 4000000 + }, + { + "epoch": 19.82, + "learning_rate": 4.009362474794767e-05, + "loss": 2.3238, + "step": 4000500 + }, + { + "epoch": 19.82, + "learning_rate": 4.009238616152158e-05, + "loss": 2.3168, + "step": 4001000 + }, + { + "epoch": 19.82, + "learning_rate": 4.00911525294412e-05, + "loss": 2.3225, + "step": 4001500 + }, + { + "epoch": 19.83, + "learning_rate": 4.0089913943015115e-05, + "loss": 2.3095, + "step": 4002000 + }, + { + "epoch": 19.83, + "learning_rate": 4.008867535658903e-05, + "loss": 2.3578, + "step": 4002500 + }, + { + "epoch": 19.83, + "learning_rate": 4.008743677016295e-05, + "loss": 2.3386, + "step": 4003000 + }, + { + "epoch": 19.83, + "learning_rate": 4.0086198183736866e-05, + "loss": 2.3406, + "step": 4003500 + }, + { + "epoch": 19.84, + "learning_rate": 4.008496455165649e-05, + "loss": 2.3178, + "step": 4004000 + }, + { + "epoch": 19.84, + "learning_rate": 4.0083725965230404e-05, + "loss": 2.3481, + "step": 4004500 + }, + { + "epoch": 19.84, + "learning_rate": 4.008248737880432e-05, + "loss": 2.3606, + "step": 4005000 + }, + { + "epoch": 19.84, + "learning_rate": 4.008124879237824e-05, + "loss": 2.3519, + "step": 4005500 + }, + { + "epoch": 19.85, + "learning_rate": 4.008001020595215e-05, + "loss": 2.3332, + "step": 4006000 + }, + { + "epoch": 19.85, + "learning_rate": 4.0078771619526065e-05, + "loss": 2.3432, + "step": 4006500 + }, + { + "epoch": 19.85, + "learning_rate": 4.007753551027284e-05, + "loss": 2.3641, + "step": 4007000 + }, + { + "epoch": 19.85, + "learning_rate": 4.007629692384676e-05, + "loss": 2.3344, + "step": 4007500 + }, + { + "epoch": 19.86, + "learning_rate": 4.007505833742067e-05, + "loss": 2.3358, + "step": 4008000 + }, + { + "epoch": 19.86, + "learning_rate": 4.0073819750994584e-05, + "loss": 2.3221, + "step": 4008500 + }, + { + "epoch": 19.86, + "learning_rate": 4.00725811645685e-05, + "loss": 2.3158, + "step": 4009000 + }, + { + "epoch": 19.86, + "learning_rate": 4.007134505531528e-05, + "loss": 2.3348, + "step": 4009500 + }, + { + "epoch": 19.87, + "learning_rate": 4.0070106468889194e-05, + "loss": 2.3408, + "step": 4010000 + }, + { + "epoch": 19.87, + "learning_rate": 4.0068870359635956e-05, + "loss": 2.339, + "step": 4010500 + }, + { + "epoch": 19.87, + "learning_rate": 4.0067634250382724e-05, + "loss": 2.3199, + "step": 4011000 + }, + { + "epoch": 19.87, + "learning_rate": 4.006639566395664e-05, + "loss": 2.3297, + "step": 4011500 + }, + { + "epoch": 19.88, + "learning_rate": 4.006515707753056e-05, + "loss": 2.3532, + "step": 4012000 + }, + { + "epoch": 19.88, + "learning_rate": 4.0063918491104475e-05, + "loss": 2.3398, + "step": 4012500 + }, + { + "epoch": 19.88, + "learning_rate": 4.006267990467839e-05, + "loss": 2.328, + "step": 4013000 + }, + { + "epoch": 19.88, + "learning_rate": 4.006144131825231e-05, + "loss": 2.3538, + "step": 4013500 + }, + { + "epoch": 19.89, + "learning_rate": 4.0060202731826226e-05, + "loss": 2.3285, + "step": 4014000 + }, + { + "epoch": 19.89, + "learning_rate": 4.005896414540014e-05, + "loss": 2.3619, + "step": 4014500 + }, + { + "epoch": 19.89, + "learning_rate": 4.005772555897406e-05, + "loss": 2.3521, + "step": 4015000 + }, + { + "epoch": 19.89, + "learning_rate": 4.005648697254798e-05, + "loss": 2.3502, + "step": 4015500 + }, + { + "epoch": 19.9, + "learning_rate": 4.0055248386121894e-05, + "loss": 2.3306, + "step": 4016000 + }, + { + "epoch": 19.9, + "learning_rate": 4.0054012276868656e-05, + "loss": 2.3506, + "step": 4016500 + }, + { + "epoch": 19.9, + "learning_rate": 4.005277369044257e-05, + "loss": 2.3259, + "step": 4017000 + }, + { + "epoch": 19.9, + "learning_rate": 4.005153758118934e-05, + "loss": 2.3518, + "step": 4017500 + }, + { + "epoch": 19.91, + "learning_rate": 4.005029899476326e-05, + "loss": 2.3261, + "step": 4018000 + }, + { + "epoch": 19.91, + "learning_rate": 4.0049060408337175e-05, + "loss": 2.3528, + "step": 4018500 + }, + { + "epoch": 19.91, + "learning_rate": 4.004782182191109e-05, + "loss": 2.3506, + "step": 4019000 + }, + { + "epoch": 19.91, + "learning_rate": 4.004658323548501e-05, + "loss": 2.3426, + "step": 4019500 + }, + { + "epoch": 19.92, + "learning_rate": 4.0045344649058926e-05, + "loss": 2.3404, + "step": 4020000 + }, + { + "epoch": 19.92, + "learning_rate": 4.004410606263284e-05, + "loss": 2.373, + "step": 4020500 + }, + { + "epoch": 19.92, + "learning_rate": 4.004286747620676e-05, + "loss": 2.3435, + "step": 4021000 + }, + { + "epoch": 19.92, + "learning_rate": 4.004163136695352e-05, + "loss": 2.329, + "step": 4021500 + }, + { + "epoch": 19.93, + "learning_rate": 4.004039278052744e-05, + "loss": 2.336, + "step": 4022000 + }, + { + "epoch": 19.93, + "learning_rate": 4.0039154194101356e-05, + "loss": 2.3432, + "step": 4022500 + }, + { + "epoch": 19.93, + "learning_rate": 4.003791560767527e-05, + "loss": 2.3376, + "step": 4023000 + }, + { + "epoch": 19.93, + "learning_rate": 4.003667702124919e-05, + "loss": 2.3447, + "step": 4023500 + }, + { + "epoch": 19.94, + "learning_rate": 4.003543843482311e-05, + "loss": 2.3564, + "step": 4024000 + }, + { + "epoch": 19.94, + "learning_rate": 4.0034202325569875e-05, + "loss": 2.3494, + "step": 4024500 + }, + { + "epoch": 19.94, + "learning_rate": 4.003296373914379e-05, + "loss": 2.3439, + "step": 4025000 + }, + { + "epoch": 19.94, + "learning_rate": 4.003172515271771e-05, + "loss": 2.3499, + "step": 4025500 + }, + { + "epoch": 19.95, + "learning_rate": 4.003048904346448e-05, + "loss": 2.311, + "step": 4026000 + }, + { + "epoch": 19.95, + "learning_rate": 4.0029250457038395e-05, + "loss": 2.3349, + "step": 4026500 + }, + { + "epoch": 19.95, + "learning_rate": 4.0028011870612305e-05, + "loss": 2.3582, + "step": 4027000 + }, + { + "epoch": 19.95, + "learning_rate": 4.002677328418622e-05, + "loss": 2.3338, + "step": 4027500 + }, + { + "epoch": 19.96, + "learning_rate": 4.002553469776014e-05, + "loss": 2.3483, + "step": 4028000 + }, + { + "epoch": 19.96, + "learning_rate": 4.0024296111334056e-05, + "loss": 2.3628, + "step": 4028500 + }, + { + "epoch": 19.96, + "learning_rate": 4.002305752490797e-05, + "loss": 2.3416, + "step": 4029000 + }, + { + "epoch": 19.96, + "learning_rate": 4.002182141565474e-05, + "loss": 2.3288, + "step": 4029500 + }, + { + "epoch": 19.97, + "learning_rate": 4.002058282922866e-05, + "loss": 2.3303, + "step": 4030000 + }, + { + "epoch": 19.97, + "learning_rate": 4.0019344242802576e-05, + "loss": 2.3498, + "step": 4030500 + }, + { + "epoch": 19.97, + "learning_rate": 4.001810565637649e-05, + "loss": 2.3482, + "step": 4031000 + }, + { + "epoch": 19.97, + "learning_rate": 4.001686706995041e-05, + "loss": 2.3286, + "step": 4031500 + }, + { + "epoch": 19.98, + "learning_rate": 4.0015628483524326e-05, + "loss": 2.3441, + "step": 4032000 + }, + { + "epoch": 19.98, + "learning_rate": 4.001438989709824e-05, + "loss": 2.3522, + "step": 4032500 + }, + { + "epoch": 19.98, + "learning_rate": 4.001315131067216e-05, + "loss": 2.3592, + "step": 4033000 + }, + { + "epoch": 19.98, + "learning_rate": 4.001191272424608e-05, + "loss": 2.3551, + "step": 4033500 + }, + { + "epoch": 19.99, + "learning_rate": 4.0010674137819994e-05, + "loss": 2.3237, + "step": 4034000 + }, + { + "epoch": 19.99, + "learning_rate": 4.000943555139391e-05, + "loss": 2.3385, + "step": 4034500 + }, + { + "epoch": 19.99, + "learning_rate": 4.000819696496783e-05, + "loss": 2.3485, + "step": 4035000 + }, + { + "epoch": 19.99, + "learning_rate": 4.0006958378541745e-05, + "loss": 2.3399, + "step": 4035500 + }, + { + "epoch": 20.0, + "learning_rate": 4.000572226928851e-05, + "loss": 2.3312, + "step": 4036000 + }, + { + "epoch": 20.0, + "learning_rate": 4.0004483682862424e-05, + "loss": 2.3215, + "step": 4036500 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.6514623229212978, + "eval_accuracy_mlm": 0.6059239055867153, + "eval_accuracy_nsp": 0.8662569275844352, + "eval_loss": 2.360621213912964, + "eval_runtime": 145.9157, + "eval_samples_per_second": 1747.304, + "eval_steps_per_second": 72.809, + "step": 4036860 + }, + { + "epoch": 20.0, + "learning_rate": 4.000324509643634e-05, + "loss": 2.3361, + "step": 4037000 + }, + { + "epoch": 20.0, + "learning_rate": 4.000200651001026e-05, + "loss": 2.331, + "step": 4037500 + }, + { + "epoch": 20.01, + "learning_rate": 4.0000767923584175e-05, + "loss": 2.3296, + "step": 4038000 + }, + { + "epoch": 20.01, + "learning_rate": 3.999952933715809e-05, + "loss": 2.324, + "step": 4038500 + }, + { + "epoch": 20.01, + "learning_rate": 3.999829075073201e-05, + "loss": 2.274, + "step": 4039000 + }, + { + "epoch": 20.01, + "learning_rate": 3.999705216430592e-05, + "loss": 2.3324, + "step": 4039500 + }, + { + "epoch": 20.02, + "learning_rate": 3.9995813577879836e-05, + "loss": 2.3256, + "step": 4040000 + }, + { + "epoch": 20.02, + "learning_rate": 3.999457499145375e-05, + "loss": 2.2992, + "step": 4040500 + }, + { + "epoch": 20.02, + "learning_rate": 3.999333640502767e-05, + "loss": 2.2902, + "step": 4041000 + }, + { + "epoch": 20.02, + "learning_rate": 3.9992097818601586e-05, + "loss": 2.2879, + "step": 4041500 + }, + { + "epoch": 20.03, + "learning_rate": 3.999086170934836e-05, + "loss": 2.3138, + "step": 4042000 + }, + { + "epoch": 20.03, + "learning_rate": 3.998962312292227e-05, + "loss": 2.3257, + "step": 4042500 + }, + { + "epoch": 20.03, + "learning_rate": 3.998838453649619e-05, + "loss": 2.3098, + "step": 4043000 + }, + { + "epoch": 20.03, + "learning_rate": 3.9987145950070106e-05, + "loss": 2.2984, + "step": 4043500 + }, + { + "epoch": 20.04, + "learning_rate": 3.998590736364402e-05, + "loss": 2.2918, + "step": 4044000 + }, + { + "epoch": 20.04, + "learning_rate": 3.998466877721794e-05, + "loss": 2.2826, + "step": 4044500 + }, + { + "epoch": 20.04, + "learning_rate": 3.998343019079186e-05, + "loss": 2.3165, + "step": 4045000 + }, + { + "epoch": 20.04, + "learning_rate": 3.998219408153862e-05, + "loss": 2.3079, + "step": 4045500 + }, + { + "epoch": 20.05, + "learning_rate": 3.9980955495112536e-05, + "loss": 2.3265, + "step": 4046000 + }, + { + "epoch": 20.05, + "learning_rate": 3.997971690868645e-05, + "loss": 2.3006, + "step": 4046500 + }, + { + "epoch": 20.05, + "learning_rate": 3.997848079943323e-05, + "loss": 2.3074, + "step": 4047000 + }, + { + "epoch": 20.05, + "learning_rate": 3.9977242213007145e-05, + "loss": 2.3117, + "step": 4047500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997600362658106e-05, + "loss": 2.3119, + "step": 4048000 + }, + { + "epoch": 20.06, + "learning_rate": 3.997476504015497e-05, + "loss": 2.3299, + "step": 4048500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997352645372889e-05, + "loss": 2.3168, + "step": 4049000 + }, + { + "epoch": 20.06, + "learning_rate": 3.9972287867302806e-05, + "loss": 2.3114, + "step": 4049500 + }, + { + "epoch": 20.07, + "learning_rate": 3.997104928087672e-05, + "loss": 2.3004, + "step": 4050000 + }, + { + "epoch": 20.07, + "learning_rate": 3.996981069445064e-05, + "loss": 2.3204, + "step": 4050500 + }, + { + "epoch": 20.07, + "learning_rate": 3.996857458519741e-05, + "loss": 2.3282, + "step": 4051000 + }, + { + "epoch": 20.07, + "learning_rate": 3.9967335998771326e-05, + "loss": 2.3293, + "step": 4051500 + }, + { + "epoch": 20.08, + "learning_rate": 3.9966097412345236e-05, + "loss": 2.3379, + "step": 4052000 + }, + { + "epoch": 20.08, + "learning_rate": 3.996485882591915e-05, + "loss": 2.3334, + "step": 4052500 + }, + { + "epoch": 20.08, + "learning_rate": 3.996362271666593e-05, + "loss": 2.3042, + "step": 4053000 + }, + { + "epoch": 20.08, + "learning_rate": 3.9962384130239845e-05, + "loss": 2.3357, + "step": 4053500 + }, + { + "epoch": 20.08, + "learning_rate": 3.9961148020986614e-05, + "loss": 2.3007, + "step": 4054000 + }, + { + "epoch": 20.09, + "learning_rate": 3.9959909434560524e-05, + "loss": 2.3134, + "step": 4054500 + }, + { + "epoch": 20.09, + "learning_rate": 3.995867084813444e-05, + "loss": 2.3141, + "step": 4055000 + }, + { + "epoch": 20.09, + "learning_rate": 3.995743226170836e-05, + "loss": 2.3416, + "step": 4055500 + }, + { + "epoch": 20.09, + "learning_rate": 3.9956193675282275e-05, + "loss": 2.3416, + "step": 4056000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995495508885619e-05, + "loss": 2.3061, + "step": 4056500 + }, + { + "epoch": 20.1, + "learning_rate": 3.995371650243011e-05, + "loss": 2.3172, + "step": 4057000 + }, + { + "epoch": 20.1, + "learning_rate": 3.9952477916004026e-05, + "loss": 2.3176, + "step": 4057500 + }, + { + "epoch": 20.1, + "learning_rate": 3.9951241806750795e-05, + "loss": 2.3158, + "step": 4058000 + }, + { + "epoch": 20.11, + "learning_rate": 3.995000322032471e-05, + "loss": 2.3287, + "step": 4058500 + }, + { + "epoch": 20.11, + "learning_rate": 3.994876711107148e-05, + "loss": 2.3434, + "step": 4059000 + }, + { + "epoch": 20.11, + "learning_rate": 3.99475285246454e-05, + "loss": 2.3069, + "step": 4059500 + }, + { + "epoch": 20.11, + "learning_rate": 3.9946289938219314e-05, + "loss": 2.3125, + "step": 4060000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994505135179323e-05, + "loss": 2.3109, + "step": 4060500 + }, + { + "epoch": 20.12, + "learning_rate": 3.994381276536714e-05, + "loss": 2.3453, + "step": 4061000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994257417894106e-05, + "loss": 2.339, + "step": 4061500 + }, + { + "epoch": 20.12, + "learning_rate": 3.9941335592514975e-05, + "loss": 2.3199, + "step": 4062000 + }, + { + "epoch": 20.13, + "learning_rate": 3.994009700608889e-05, + "loss": 2.3294, + "step": 4062500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993885841966281e-05, + "loss": 2.3076, + "step": 4063000 + }, + { + "epoch": 20.13, + "learning_rate": 3.9937619833236726e-05, + "loss": 2.3158, + "step": 4063500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993638124681064e-05, + "loss": 2.3061, + "step": 4064000 + }, + { + "epoch": 20.14, + "learning_rate": 3.993514266038455e-05, + "loss": 2.343, + "step": 4064500 + }, + { + "epoch": 20.14, + "learning_rate": 3.993390655113133e-05, + "loss": 2.3145, + "step": 4065000 + }, + { + "epoch": 20.14, + "learning_rate": 3.9932667964705246e-05, + "loss": 2.2968, + "step": 4065500 + }, + { + "epoch": 20.14, + "learning_rate": 3.993142937827916e-05, + "loss": 2.3266, + "step": 4066000 + }, + { + "epoch": 20.15, + "learning_rate": 3.993019079185308e-05, + "loss": 2.3402, + "step": 4066500 + }, + { + "epoch": 20.15, + "learning_rate": 3.9928952205426996e-05, + "loss": 2.3157, + "step": 4067000 + }, + { + "epoch": 20.15, + "learning_rate": 3.992771857334661e-05, + "loss": 2.3301, + "step": 4067500 + }, + { + "epoch": 20.15, + "learning_rate": 3.992647998692053e-05, + "loss": 2.3513, + "step": 4068000 + }, + { + "epoch": 20.16, + "learning_rate": 3.9925241400494444e-05, + "loss": 2.3077, + "step": 4068500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992400281406836e-05, + "loss": 2.3166, + "step": 4069000 + }, + { + "epoch": 20.16, + "learning_rate": 3.992276422764228e-05, + "loss": 2.3251, + "step": 4069500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992152811838905e-05, + "loss": 2.3054, + "step": 4070000 + }, + { + "epoch": 20.17, + "learning_rate": 3.9920289531962964e-05, + "loss": 2.322, + "step": 4070500 + }, + { + "epoch": 20.17, + "learning_rate": 3.9919053422709726e-05, + "loss": 2.3153, + "step": 4071000 + }, + { + "epoch": 20.17, + "learning_rate": 3.991781483628364e-05, + "loss": 2.2919, + "step": 4071500 + }, + { + "epoch": 20.17, + "learning_rate": 3.991657624985756e-05, + "loss": 2.3338, + "step": 4072000 + }, + { + "epoch": 20.18, + "learning_rate": 3.9915337663431476e-05, + "loss": 2.3325, + "step": 4072500 + }, + { + "epoch": 20.18, + "learning_rate": 3.9914099077005393e-05, + "loss": 2.3226, + "step": 4073000 + }, + { + "epoch": 20.18, + "learning_rate": 3.991286049057931e-05, + "loss": 2.3341, + "step": 4073500 + }, + { + "epoch": 20.18, + "learning_rate": 3.991162190415323e-05, + "loss": 2.2985, + "step": 4074000 + }, + { + "epoch": 20.19, + "learning_rate": 3.9910383317727144e-05, + "loss": 2.3138, + "step": 4074500 + }, + { + "epoch": 20.19, + "learning_rate": 3.990914473130106e-05, + "loss": 2.3167, + "step": 4075000 + }, + { + "epoch": 20.19, + "learning_rate": 3.990790614487498e-05, + "loss": 2.3082, + "step": 4075500 + }, + { + "epoch": 20.19, + "learning_rate": 3.9906667558448895e-05, + "loss": 2.3136, + "step": 4076000 + }, + { + "epoch": 20.2, + "learning_rate": 3.990542897202281e-05, + "loss": 2.3414, + "step": 4076500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990419038559673e-05, + "loss": 2.298, + "step": 4077000 + }, + { + "epoch": 20.2, + "learning_rate": 3.99029542763435e-05, + "loss": 2.317, + "step": 4077500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990171816709026e-05, + "loss": 2.3172, + "step": 4078000 + }, + { + "epoch": 20.21, + "learning_rate": 3.9900479580664177e-05, + "loss": 2.3035, + "step": 4078500 + }, + { + "epoch": 20.21, + "learning_rate": 3.9899240994238094e-05, + "loss": 2.3391, + "step": 4079000 + }, + { + "epoch": 20.21, + "learning_rate": 3.989800240781201e-05, + "loss": 2.3139, + "step": 4079500 + }, + { + "epoch": 20.21, + "learning_rate": 3.989676382138593e-05, + "loss": 2.297, + "step": 4080000 + }, + { + "epoch": 20.22, + "learning_rate": 3.9895527712132696e-05, + "loss": 2.326, + "step": 4080500 + }, + { + "epoch": 20.22, + "learning_rate": 3.989428912570661e-05, + "loss": 2.3263, + "step": 4081000 + }, + { + "epoch": 20.22, + "learning_rate": 3.989305053928053e-05, + "loss": 2.3248, + "step": 4081500 + }, + { + "epoch": 20.22, + "learning_rate": 3.98918144300273e-05, + "loss": 2.3094, + "step": 4082000 + }, + { + "epoch": 20.23, + "learning_rate": 3.9890575843601216e-05, + "loss": 2.3216, + "step": 4082500 + }, + { + "epoch": 20.23, + "learning_rate": 3.988933725717513e-05, + "loss": 2.3138, + "step": 4083000 + }, + { + "epoch": 20.23, + "learning_rate": 3.988809867074905e-05, + "loss": 2.3215, + "step": 4083500 + }, + { + "epoch": 20.23, + "learning_rate": 3.9886860084322967e-05, + "loss": 2.3122, + "step": 4084000 + }, + { + "epoch": 20.24, + "learning_rate": 3.988562149789688e-05, + "loss": 2.3316, + "step": 4084500 + }, + { + "epoch": 20.24, + "learning_rate": 3.988438538864365e-05, + "loss": 2.324, + "step": 4085000 + }, + { + "epoch": 20.24, + "learning_rate": 3.988314680221757e-05, + "loss": 2.3268, + "step": 4085500 + }, + { + "epoch": 20.24, + "learning_rate": 3.9881908215791486e-05, + "loss": 2.2985, + "step": 4086000 + }, + { + "epoch": 20.25, + "learning_rate": 3.98806696293654e-05, + "loss": 2.3345, + "step": 4086500 + }, + { + "epoch": 20.25, + "learning_rate": 3.987943104293931e-05, + "loss": 2.3098, + "step": 4087000 + }, + { + "epoch": 20.25, + "learning_rate": 3.987819245651323e-05, + "loss": 2.3195, + "step": 4087500 + }, + { + "epoch": 20.25, + "learning_rate": 3.987695387008715e-05, + "loss": 2.373, + "step": 4088000 + }, + { + "epoch": 20.26, + "learning_rate": 3.9875715283661064e-05, + "loss": 2.3466, + "step": 4088500 + }, + { + "epoch": 20.26, + "learning_rate": 3.987447669723498e-05, + "loss": 2.3485, + "step": 4089000 + }, + { + "epoch": 20.26, + "learning_rate": 3.98732381108089e-05, + "loss": 2.3326, + "step": 4089500 + }, + { + "epoch": 20.26, + "learning_rate": 3.9871999524382815e-05, + "loss": 2.2901, + "step": 4090000 + }, + { + "epoch": 20.27, + "learning_rate": 3.987076093795673e-05, + "loss": 2.3226, + "step": 4090500 + }, + { + "epoch": 20.27, + "learning_rate": 3.98695248287035e-05, + "loss": 2.3075, + "step": 4091000 + }, + { + "epoch": 20.27, + "learning_rate": 3.986828624227741e-05, + "loss": 2.2915, + "step": 4091500 + }, + { + "epoch": 20.27, + "learning_rate": 3.986704765585133e-05, + "loss": 2.3465, + "step": 4092000 + }, + { + "epoch": 20.28, + "learning_rate": 3.9865809069425245e-05, + "loss": 2.3153, + "step": 4092500 + }, + { + "epoch": 20.28, + "learning_rate": 3.986457048299916e-05, + "loss": 2.3102, + "step": 4093000 + }, + { + "epoch": 20.28, + "learning_rate": 3.986333189657308e-05, + "loss": 2.3314, + "step": 4093500 + }, + { + "epoch": 20.28, + "learning_rate": 3.9862093310146995e-05, + "loss": 2.3403, + "step": 4094000 + }, + { + "epoch": 20.29, + "learning_rate": 3.9860857200893764e-05, + "loss": 2.3266, + "step": 4094500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985962109164054e-05, + "loss": 2.3155, + "step": 4095000 + }, + { + "epoch": 20.29, + "learning_rate": 3.985838250521445e-05, + "loss": 2.3508, + "step": 4095500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985714391878837e-05, + "loss": 2.32, + "step": 4096000 + }, + { + "epoch": 20.3, + "learning_rate": 3.9855905332362284e-05, + "loss": 2.328, + "step": 4096500 + }, + { + "epoch": 20.3, + "learning_rate": 3.98546667459362e-05, + "loss": 2.3088, + "step": 4097000 + }, + { + "epoch": 20.3, + "learning_rate": 3.985343063668297e-05, + "loss": 2.3212, + "step": 4097500 + }, + { + "epoch": 20.3, + "learning_rate": 3.9852192050256886e-05, + "loss": 2.3098, + "step": 4098000 + }, + { + "epoch": 20.31, + "learning_rate": 3.98509534638308e-05, + "loss": 2.32, + "step": 4098500 + }, + { + "epoch": 20.31, + "learning_rate": 3.984971487740472e-05, + "loss": 2.3258, + "step": 4099000 + }, + { + "epoch": 20.31, + "learning_rate": 3.984847629097863e-05, + "loss": 2.3186, + "step": 4099500 + }, + { + "epoch": 20.31, + "learning_rate": 3.984723770455255e-05, + "loss": 2.3401, + "step": 4100000 + }, + { + "epoch": 20.32, + "learning_rate": 3.984600159529932e-05, + "loss": 2.3358, + "step": 4100500 + }, + { + "epoch": 20.32, + "learning_rate": 3.984476300887324e-05, + "loss": 2.323, + "step": 4101000 + }, + { + "epoch": 20.32, + "learning_rate": 3.984352442244716e-05, + "loss": 2.3239, + "step": 4101500 + }, + { + "epoch": 20.32, + "learning_rate": 3.9842285836021074e-05, + "loss": 2.2897, + "step": 4102000 + }, + { + "epoch": 20.33, + "learning_rate": 3.9841049726767836e-05, + "loss": 2.3372, + "step": 4102500 + }, + { + "epoch": 20.33, + "learning_rate": 3.983981114034175e-05, + "loss": 2.3183, + "step": 4103000 + }, + { + "epoch": 20.33, + "learning_rate": 3.983857255391567e-05, + "loss": 2.3244, + "step": 4103500 + }, + { + "epoch": 20.33, + "learning_rate": 3.9837333967489587e-05, + "loss": 2.341, + "step": 4104000 + }, + { + "epoch": 20.34, + "learning_rate": 3.9836095381063503e-05, + "loss": 2.3251, + "step": 4104500 + }, + { + "epoch": 20.34, + "learning_rate": 3.983485679463742e-05, + "loss": 2.3282, + "step": 4105000 + }, + { + "epoch": 20.34, + "learning_rate": 3.983361820821133e-05, + "loss": 2.3602, + "step": 4105500 + }, + { + "epoch": 20.34, + "learning_rate": 3.983237962178525e-05, + "loss": 2.3238, + "step": 4106000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9831141035359164e-05, + "loss": 2.3484, + "step": 4106500 + }, + { + "epoch": 20.35, + "learning_rate": 3.982990244893308e-05, + "loss": 2.3466, + "step": 4107000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9828663862507e-05, + "loss": 2.3151, + "step": 4107500 + }, + { + "epoch": 20.35, + "learning_rate": 3.9827427753253774e-05, + "loss": 2.3078, + "step": 4108000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9826189166827684e-05, + "loss": 2.3361, + "step": 4108500 + }, + { + "epoch": 20.36, + "learning_rate": 3.98249505804016e-05, + "loss": 2.3302, + "step": 4109000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982371199397552e-05, + "loss": 2.3368, + "step": 4109500 + }, + { + "epoch": 20.36, + "learning_rate": 3.9822473407549435e-05, + "loss": 2.3268, + "step": 4110000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982123482112335e-05, + "loss": 2.337, + "step": 4110500 + }, + { + "epoch": 20.37, + "learning_rate": 3.981999623469727e-05, + "loss": 2.3263, + "step": 4111000 + }, + { + "epoch": 20.37, + "learning_rate": 3.981875764827118e-05, + "loss": 2.3309, + "step": 4111500 + }, + { + "epoch": 20.37, + "learning_rate": 3.9817519061845096e-05, + "loss": 2.3406, + "step": 4112000 + }, + { + "epoch": 20.37, + "learning_rate": 3.981628047541901e-05, + "loss": 2.304, + "step": 4112500 + }, + { + "epoch": 20.38, + "learning_rate": 3.981504188899293e-05, + "loss": 2.3435, + "step": 4113000 + }, + { + "epoch": 20.38, + "learning_rate": 3.9813803302566847e-05, + "loss": 2.3374, + "step": 4113500 + }, + { + "epoch": 20.38, + "learning_rate": 3.9812567193313615e-05, + "loss": 2.334, + "step": 4114000 + }, + { + "epoch": 20.38, + "learning_rate": 3.981132860688753e-05, + "loss": 2.3147, + "step": 4114500 + }, + { + "epoch": 20.39, + "learning_rate": 3.981009002046145e-05, + "loss": 2.33, + "step": 4115000 + }, + { + "epoch": 20.39, + "learning_rate": 3.9808851434035366e-05, + "loss": 2.3088, + "step": 4115500 + }, + { + "epoch": 20.39, + "learning_rate": 3.980761284760928e-05, + "loss": 2.3281, + "step": 4116000 + }, + { + "epoch": 20.39, + "learning_rate": 3.98063742611832e-05, + "loss": 2.3257, + "step": 4116500 + }, + { + "epoch": 20.4, + "learning_rate": 3.980513815192997e-05, + "loss": 2.2962, + "step": 4117000 + }, + { + "epoch": 20.4, + "learning_rate": 3.9803899565503886e-05, + "loss": 2.3362, + "step": 4117500 + }, + { + "epoch": 20.4, + "learning_rate": 3.98026609790778e-05, + "loss": 2.3423, + "step": 4118000 + }, + { + "epoch": 20.4, + "learning_rate": 3.980142239265171e-05, + "loss": 2.2976, + "step": 4118500 + }, + { + "epoch": 20.41, + "learning_rate": 3.980018380622563e-05, + "loss": 2.3139, + "step": 4119000 + }, + { + "epoch": 20.41, + "learning_rate": 3.979894521979955e-05, + "loss": 2.3334, + "step": 4119500 + }, + { + "epoch": 20.41, + "learning_rate": 3.9797709110546315e-05, + "loss": 2.3412, + "step": 4120000 + }, + { + "epoch": 20.41, + "learning_rate": 3.979647052412023e-05, + "loss": 2.3465, + "step": 4120500 + }, + { + "epoch": 20.42, + "learning_rate": 3.979523193769415e-05, + "loss": 2.3413, + "step": 4121000 + }, + { + "epoch": 20.42, + "learning_rate": 3.9793993351268066e-05, + "loss": 2.3351, + "step": 4121500 + }, + { + "epoch": 20.42, + "learning_rate": 3.979275476484198e-05, + "loss": 2.3251, + "step": 4122000 + }, + { + "epoch": 20.42, + "learning_rate": 3.979151865558875e-05, + "loss": 2.3504, + "step": 4122500 + }, + { + "epoch": 20.43, + "learning_rate": 3.979028006916267e-05, + "loss": 2.3297, + "step": 4123000 + }, + { + "epoch": 20.43, + "learning_rate": 3.9789041482736586e-05, + "loss": 2.3087, + "step": 4123500 + }, + { + "epoch": 20.43, + "learning_rate": 3.97878028963105e-05, + "loss": 2.3201, + "step": 4124000 + }, + { + "epoch": 20.43, + "learning_rate": 3.978656430988442e-05, + "loss": 2.2992, + "step": 4124500 + }, + { + "epoch": 20.44, + "learning_rate": 3.978532572345833e-05, + "loss": 2.3279, + "step": 4125000 + }, + { + "epoch": 20.44, + "learning_rate": 3.97840896142051e-05, + "loss": 2.3199, + "step": 4125500 + }, + { + "epoch": 20.44, + "learning_rate": 3.9782851027779016e-05, + "loss": 2.3297, + "step": 4126000 + }, + { + "epoch": 20.44, + "learning_rate": 3.978161244135293e-05, + "loss": 2.3423, + "step": 4126500 + }, + { + "epoch": 20.45, + "learning_rate": 3.978037385492685e-05, + "loss": 2.3141, + "step": 4127000 + }, + { + "epoch": 20.45, + "learning_rate": 3.9779135268500766e-05, + "loss": 2.336, + "step": 4127500 + }, + { + "epoch": 20.45, + "learning_rate": 3.977789668207468e-05, + "loss": 2.3278, + "step": 4128000 + }, + { + "epoch": 20.45, + "learning_rate": 3.97766580956486e-05, + "loss": 2.3016, + "step": 4128500 + }, + { + "epoch": 20.46, + "learning_rate": 3.977541950922252e-05, + "loss": 2.3267, + "step": 4129000 + }, + { + "epoch": 20.46, + "learning_rate": 3.9774183399969286e-05, + "loss": 2.3293, + "step": 4129500 + }, + { + "epoch": 20.46, + "learning_rate": 3.97729448135432e-05, + "loss": 2.3389, + "step": 4130000 + }, + { + "epoch": 20.46, + "learning_rate": 3.977170622711712e-05, + "loss": 2.3131, + "step": 4130500 + }, + { + "epoch": 20.47, + "learning_rate": 3.977046764069104e-05, + "loss": 2.3192, + "step": 4131000 + }, + { + "epoch": 20.47, + "learning_rate": 3.9769229054264954e-05, + "loss": 2.3528, + "step": 4131500 + }, + { + "epoch": 20.47, + "learning_rate": 3.9767992945011716e-05, + "loss": 2.3322, + "step": 4132000 + }, + { + "epoch": 20.47, + "learning_rate": 3.976675435858563e-05, + "loss": 2.3396, + "step": 4132500 + }, + { + "epoch": 20.48, + "learning_rate": 3.976551824933241e-05, + "loss": 2.3243, + "step": 4133000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976427966290632e-05, + "loss": 2.3496, + "step": 4133500 + }, + { + "epoch": 20.48, + "learning_rate": 3.9763041076480235e-05, + "loss": 2.3338, + "step": 4134000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976180249005415e-05, + "loss": 2.2961, + "step": 4134500 + }, + { + "epoch": 20.49, + "learning_rate": 3.976056390362807e-05, + "loss": 2.3068, + "step": 4135000 + }, + { + "epoch": 20.49, + "learning_rate": 3.9759325317201986e-05, + "loss": 2.358, + "step": 4135500 + }, + { + "epoch": 20.49, + "learning_rate": 3.97580867307759e-05, + "loss": 2.2942, + "step": 4136000 + }, + { + "epoch": 20.49, + "learning_rate": 3.975684814434982e-05, + "loss": 2.3191, + "step": 4136500 + }, + { + "epoch": 20.5, + "learning_rate": 3.975561203509658e-05, + "loss": 2.3382, + "step": 4137000 + }, + { + "epoch": 20.5, + "learning_rate": 3.97543734486705e-05, + "loss": 2.3275, + "step": 4137500 + }, + { + "epoch": 20.5, + "learning_rate": 3.9753134862244416e-05, + "loss": 2.352, + "step": 4138000 + }, + { + "epoch": 20.5, + "learning_rate": 3.975189627581833e-05, + "loss": 2.3277, + "step": 4138500 + }, + { + "epoch": 20.51, + "learning_rate": 3.975066016656511e-05, + "loss": 2.3214, + "step": 4139000 + }, + { + "epoch": 20.51, + "learning_rate": 3.974942405731187e-05, + "loss": 2.3368, + "step": 4139500 + }, + { + "epoch": 20.51, + "learning_rate": 3.974818547088579e-05, + "loss": 2.3367, + "step": 4140000 + }, + { + "epoch": 20.51, + "learning_rate": 3.9746946884459704e-05, + "loss": 2.3376, + "step": 4140500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974570829803362e-05, + "loss": 2.3505, + "step": 4141000 + }, + { + "epoch": 20.52, + "learning_rate": 3.974447218878039e-05, + "loss": 2.3357, + "step": 4141500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974323607952716e-05, + "loss": 2.326, + "step": 4142000 + }, + { + "epoch": 20.52, + "learning_rate": 3.9741997493101076e-05, + "loss": 2.3177, + "step": 4142500 + }, + { + "epoch": 20.53, + "learning_rate": 3.974075890667499e-05, + "loss": 2.3331, + "step": 4143000 + }, + { + "epoch": 20.53, + "learning_rate": 3.973952032024891e-05, + "loss": 2.3333, + "step": 4143500 + }, + { + "epoch": 20.53, + "learning_rate": 3.9738281733822826e-05, + "loss": 2.3378, + "step": 4144000 + }, + { + "epoch": 20.53, + "learning_rate": 3.9737043147396743e-05, + "loss": 2.3116, + "step": 4144500 + }, + { + "epoch": 20.54, + "learning_rate": 3.973580456097066e-05, + "loss": 2.3071, + "step": 4145000 + }, + { + "epoch": 20.54, + "learning_rate": 3.973456597454458e-05, + "loss": 2.302, + "step": 4145500 + }, + { + "epoch": 20.54, + "learning_rate": 3.9733327388118494e-05, + "loss": 2.3401, + "step": 4146000 + }, + { + "epoch": 20.54, + "learning_rate": 3.9732088801692404e-05, + "loss": 2.3107, + "step": 4146500 + }, + { + "epoch": 20.55, + "learning_rate": 3.973085021526632e-05, + "loss": 2.3115, + "step": 4147000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972961162884024e-05, + "loss": 2.3198, + "step": 4147500 + }, + { + "epoch": 20.55, + "learning_rate": 3.9728373042414155e-05, + "loss": 2.3458, + "step": 4148000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972713445598807e-05, + "loss": 2.3251, + "step": 4148500 + }, + { + "epoch": 20.56, + "learning_rate": 3.972589586956199e-05, + "loss": 2.3449, + "step": 4149000 + }, + { + "epoch": 20.56, + "learning_rate": 3.97246572831359e-05, + "loss": 2.3381, + "step": 4149500 + }, + { + "epoch": 20.56, + "learning_rate": 3.9723421173882675e-05, + "loss": 2.3345, + "step": 4150000 + }, + { + "epoch": 20.56, + "learning_rate": 3.972218258745659e-05, + "loss": 2.2837, + "step": 4150500 + }, + { + "epoch": 20.57, + "learning_rate": 3.972094400103051e-05, + "loss": 2.3166, + "step": 4151000 + }, + { + "epoch": 20.57, + "learning_rate": 3.9719705414604426e-05, + "loss": 2.3352, + "step": 4151500 + }, + { + "epoch": 20.57, + "learning_rate": 3.971846682817834e-05, + "loss": 2.3251, + "step": 4152000 + }, + { + "epoch": 20.57, + "learning_rate": 3.971723071892511e-05, + "loss": 2.3379, + "step": 4152500 + }, + { + "epoch": 20.58, + "learning_rate": 3.971599213249902e-05, + "loss": 2.3292, + "step": 4153000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971475354607294e-05, + "loss": 2.3387, + "step": 4153500 + }, + { + "epoch": 20.58, + "learning_rate": 3.9713514959646855e-05, + "loss": 2.3423, + "step": 4154000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971227637322077e-05, + "loss": 2.3238, + "step": 4154500 + }, + { + "epoch": 20.59, + "learning_rate": 3.971104026396754e-05, + "loss": 2.3356, + "step": 4155000 + }, + { + "epoch": 20.59, + "learning_rate": 3.970980167754146e-05, + "loss": 2.3428, + "step": 4155500 + }, + { + "epoch": 20.59, + "learning_rate": 3.9708563091115375e-05, + "loss": 2.3381, + "step": 4156000 + }, + { + "epoch": 20.59, + "learning_rate": 3.970732450468929e-05, + "loss": 2.335, + "step": 4156500 + }, + { + "epoch": 20.6, + "learning_rate": 3.970608591826321e-05, + "loss": 2.3374, + "step": 4157000 + }, + { + "epoch": 20.6, + "learning_rate": 3.9704847331837126e-05, + "loss": 2.3173, + "step": 4157500 + }, + { + "epoch": 20.6, + "learning_rate": 3.9703611222583894e-05, + "loss": 2.3357, + "step": 4158000 + }, + { + "epoch": 20.6, + "learning_rate": 3.970237263615781e-05, + "loss": 2.3303, + "step": 4158500 + }, + { + "epoch": 20.61, + "learning_rate": 3.9701136526904573e-05, + "loss": 2.3437, + "step": 4159000 + }, + { + "epoch": 20.61, + "learning_rate": 3.969989794047849e-05, + "loss": 2.2999, + "step": 4159500 + }, + { + "epoch": 20.61, + "learning_rate": 3.969865935405241e-05, + "loss": 2.3308, + "step": 4160000 + }, + { + "epoch": 20.61, + "learning_rate": 3.9697420767626324e-05, + "loss": 2.3444, + "step": 4160500 + }, + { + "epoch": 20.62, + "learning_rate": 3.969618218120024e-05, + "loss": 2.3279, + "step": 4161000 + }, + { + "epoch": 20.62, + "learning_rate": 3.969494359477416e-05, + "loss": 2.3423, + "step": 4161500 + }, + { + "epoch": 20.62, + "learning_rate": 3.9693705008348075e-05, + "loss": 2.352, + "step": 4162000 + }, + { + "epoch": 20.62, + "learning_rate": 3.969246642192199e-05, + "loss": 2.3336, + "step": 4162500 + }, + { + "epoch": 20.62, + "learning_rate": 3.969122783549591e-05, + "loss": 2.3079, + "step": 4163000 + }, + { + "epoch": 20.63, + "learning_rate": 3.9689989249069826e-05, + "loss": 2.3315, + "step": 4163500 + }, + { + "epoch": 20.63, + "learning_rate": 3.9688753139816595e-05, + "loss": 2.3208, + "step": 4164000 + }, + { + "epoch": 20.63, + "learning_rate": 3.968751455339051e-05, + "loss": 2.3149, + "step": 4164500 + }, + { + "epoch": 20.63, + "learning_rate": 3.968627596696443e-05, + "loss": 2.3026, + "step": 4165000 + }, + { + "epoch": 20.64, + "learning_rate": 3.9685037380538345e-05, + "loss": 2.3623, + "step": 4165500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968379879411226e-05, + "loss": 2.3316, + "step": 4166000 + }, + { + "epoch": 20.64, + "learning_rate": 3.968256020768617e-05, + "loss": 2.3202, + "step": 4166500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968132162126009e-05, + "loss": 2.3328, + "step": 4167000 + }, + { + "epoch": 20.65, + "learning_rate": 3.9680083034834006e-05, + "loss": 2.3363, + "step": 4167500 + }, + { + "epoch": 20.65, + "learning_rate": 3.9678846925580775e-05, + "loss": 2.339, + "step": 4168000 + }, + { + "epoch": 20.65, + "learning_rate": 3.967760833915469e-05, + "loss": 2.3511, + "step": 4168500 + }, + { + "epoch": 20.65, + "learning_rate": 3.967636975272861e-05, + "loss": 2.3296, + "step": 4169000 + }, + { + "epoch": 20.66, + "learning_rate": 3.967513364347538e-05, + "loss": 2.3214, + "step": 4169500 + }, + { + "epoch": 20.66, + "learning_rate": 3.9673895057049295e-05, + "loss": 2.3289, + "step": 4170000 + }, + { + "epoch": 20.66, + "learning_rate": 3.967265647062321e-05, + "loss": 2.333, + "step": 4170500 + }, + { + "epoch": 20.66, + "learning_rate": 3.967141788419713e-05, + "loss": 2.3358, + "step": 4171000 + }, + { + "epoch": 20.67, + "learning_rate": 3.9670179297771045e-05, + "loss": 2.3285, + "step": 4171500 + }, + { + "epoch": 20.67, + "learning_rate": 3.966894318851781e-05, + "loss": 2.3058, + "step": 4172000 + }, + { + "epoch": 20.67, + "learning_rate": 3.9667704602091724e-05, + "loss": 2.3333, + "step": 4172500 + }, + { + "epoch": 20.67, + "learning_rate": 3.966646601566564e-05, + "loss": 2.3302, + "step": 4173000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966522742923956e-05, + "loss": 2.3255, + "step": 4173500 + }, + { + "epoch": 20.68, + "learning_rate": 3.9663988842813475e-05, + "loss": 2.3237, + "step": 4174000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966275025638739e-05, + "loss": 2.3398, + "step": 4174500 + }, + { + "epoch": 20.68, + "learning_rate": 3.966151166996131e-05, + "loss": 2.3193, + "step": 4175000 + }, + { + "epoch": 20.69, + "learning_rate": 3.9660273083535226e-05, + "loss": 2.3686, + "step": 4175500 + }, + { + "epoch": 20.69, + "learning_rate": 3.9659036974281995e-05, + "loss": 2.3719, + "step": 4176000 + }, + { + "epoch": 20.69, + "learning_rate": 3.965779838785591e-05, + "loss": 2.3404, + "step": 4176500 + }, + { + "epoch": 20.69, + "learning_rate": 3.965655980142983e-05, + "loss": 2.333, + "step": 4177000 + }, + { + "epoch": 20.7, + "learning_rate": 3.965532616934945e-05, + "loss": 2.339, + "step": 4177500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965408758292336e-05, + "loss": 2.3373, + "step": 4178000 + }, + { + "epoch": 20.7, + "learning_rate": 3.9652851473670135e-05, + "loss": 2.3554, + "step": 4178500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965161288724405e-05, + "loss": 2.3353, + "step": 4179000 + }, + { + "epoch": 20.71, + "learning_rate": 3.965037430081797e-05, + "loss": 2.3286, + "step": 4179500 + }, + { + "epoch": 20.71, + "learning_rate": 3.9649135714391886e-05, + "loss": 2.3024, + "step": 4180000 + }, + { + "epoch": 20.71, + "learning_rate": 3.964789960513865e-05, + "loss": 2.3233, + "step": 4180500 + }, + { + "epoch": 20.71, + "learning_rate": 3.9646661018712565e-05, + "loss": 2.328, + "step": 4181000 + }, + { + "epoch": 20.72, + "learning_rate": 3.964542243228648e-05, + "loss": 2.301, + "step": 4181500 + }, + { + "epoch": 20.72, + "learning_rate": 3.96441838458604e-05, + "loss": 2.3744, + "step": 4182000 + }, + { + "epoch": 20.72, + "learning_rate": 3.9642945259434316e-05, + "loss": 2.3801, + "step": 4182500 + }, + { + "epoch": 20.72, + "learning_rate": 3.964170667300823e-05, + "loss": 2.3168, + "step": 4183000 + }, + { + "epoch": 20.73, + "learning_rate": 3.964046808658215e-05, + "loss": 2.3182, + "step": 4183500 + }, + { + "epoch": 20.73, + "learning_rate": 3.9639229500156066e-05, + "loss": 2.3429, + "step": 4184000 + }, + { + "epoch": 20.73, + "learning_rate": 3.9637990913729977e-05, + "loss": 2.3526, + "step": 4184500 + }, + { + "epoch": 20.73, + "learning_rate": 3.9636752327303893e-05, + "loss": 2.3447, + "step": 4185000 + }, + { + "epoch": 20.74, + "learning_rate": 3.963551374087781e-05, + "loss": 2.3361, + "step": 4185500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963427515445173e-05, + "loss": 2.3486, + "step": 4186000 + }, + { + "epoch": 20.74, + "learning_rate": 3.9633036568025644e-05, + "loss": 2.3517, + "step": 4186500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963179798159956e-05, + "loss": 2.3206, + "step": 4187000 + }, + { + "epoch": 20.75, + "learning_rate": 3.963055939517348e-05, + "loss": 2.3178, + "step": 4187500 + }, + { + "epoch": 20.75, + "learning_rate": 3.9629320808747395e-05, + "loss": 2.3548, + "step": 4188000 + }, + { + "epoch": 20.75, + "learning_rate": 3.962808222232131e-05, + "loss": 2.3043, + "step": 4188500 + }, + { + "epoch": 20.75, + "learning_rate": 3.962684859024093e-05, + "loss": 2.3362, + "step": 4189000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962561000381485e-05, + "loss": 2.3417, + "step": 4189500 + }, + { + "epoch": 20.76, + "learning_rate": 3.9624371417388767e-05, + "loss": 2.3329, + "step": 4190000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962313283096268e-05, + "loss": 2.3078, + "step": 4190500 + }, + { + "epoch": 20.76, + "learning_rate": 3.9621894244536594e-05, + "loss": 2.3241, + "step": 4191000 + }, + { + "epoch": 20.77, + "learning_rate": 3.962065565811051e-05, + "loss": 2.3591, + "step": 4191500 + }, + { + "epoch": 20.77, + "learning_rate": 3.961941707168443e-05, + "loss": 2.3285, + "step": 4192000 + }, + { + "epoch": 20.77, + "learning_rate": 3.9618178485258344e-05, + "loss": 2.3298, + "step": 4192500 + }, + { + "epoch": 20.77, + "learning_rate": 3.961694237600512e-05, + "loss": 2.3402, + "step": 4193000 + }, + { + "epoch": 20.78, + "learning_rate": 3.961570378957903e-05, + "loss": 2.3157, + "step": 4193500 + }, + { + "epoch": 20.78, + "learning_rate": 3.961446520315295e-05, + "loss": 2.3364, + "step": 4194000 + }, + { + "epoch": 20.78, + "learning_rate": 3.9613226616726864e-05, + "loss": 2.339, + "step": 4194500 + }, + { + "epoch": 20.78, + "learning_rate": 3.961198803030078e-05, + "loss": 2.3329, + "step": 4195000 + }, + { + "epoch": 20.79, + "learning_rate": 3.961075192104755e-05, + "loss": 2.3085, + "step": 4195500 + }, + { + "epoch": 20.79, + "learning_rate": 3.960951333462147e-05, + "loss": 2.3486, + "step": 4196000 + }, + { + "epoch": 20.79, + "learning_rate": 3.9608277225368235e-05, + "loss": 2.3269, + "step": 4196500 + }, + { + "epoch": 20.79, + "learning_rate": 3.960703863894215e-05, + "loss": 2.3502, + "step": 4197000 + }, + { + "epoch": 20.8, + "learning_rate": 3.960580005251607e-05, + "loss": 2.32, + "step": 4197500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960456642043568e-05, + "loss": 2.3226, + "step": 4198000 + }, + { + "epoch": 20.8, + "learning_rate": 3.96033278340096e-05, + "loss": 2.3389, + "step": 4198500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960208924758352e-05, + "loss": 2.321, + "step": 4199000 + }, + { + "epoch": 20.81, + "learning_rate": 3.9600850661157434e-05, + "loss": 2.3624, + "step": 4199500 + }, + { + "epoch": 20.81, + "learning_rate": 3.959961207473135e-05, + "loss": 2.3265, + "step": 4200000 + }, + { + "epoch": 20.81, + "learning_rate": 3.959837596547812e-05, + "loss": 2.3382, + "step": 4200500 + }, + { + "epoch": 20.81, + "learning_rate": 3.959713737905204e-05, + "loss": 2.3298, + "step": 4201000 + }, + { + "epoch": 20.82, + "learning_rate": 3.9595898792625954e-05, + "loss": 2.3306, + "step": 4201500 + }, + { + "epoch": 20.82, + "learning_rate": 3.959466020619987e-05, + "loss": 2.3273, + "step": 4202000 + }, + { + "epoch": 20.82, + "learning_rate": 3.959342409694664e-05, + "loss": 2.3362, + "step": 4202500 + }, + { + "epoch": 20.82, + "learning_rate": 3.9592185510520556e-05, + "loss": 2.3331, + "step": 4203000 + }, + { + "epoch": 20.83, + "learning_rate": 3.9590946924094466e-05, + "loss": 2.3285, + "step": 4203500 + }, + { + "epoch": 20.83, + "learning_rate": 3.958970833766838e-05, + "loss": 2.3242, + "step": 4204000 + }, + { + "epoch": 20.83, + "learning_rate": 3.95884697512423e-05, + "loss": 2.3425, + "step": 4204500 + }, + { + "epoch": 20.83, + "learning_rate": 3.958723116481622e-05, + "loss": 2.3424, + "step": 4205000 + }, + { + "epoch": 20.84, + "learning_rate": 3.9585992578390134e-05, + "loss": 2.3153, + "step": 4205500 + }, + { + "epoch": 20.84, + "learning_rate": 3.958475399196405e-05, + "loss": 2.3106, + "step": 4206000 + }, + { + "epoch": 20.84, + "learning_rate": 3.958351540553797e-05, + "loss": 2.3452, + "step": 4206500 + }, + { + "epoch": 20.84, + "learning_rate": 3.9582276819111885e-05, + "loss": 2.3583, + "step": 4207000 + }, + { + "epoch": 20.85, + "learning_rate": 3.95810382326858e-05, + "loss": 2.339, + "step": 4207500 + }, + { + "epoch": 20.85, + "learning_rate": 3.957979964625972e-05, + "loss": 2.3194, + "step": 4208000 + }, + { + "epoch": 20.85, + "learning_rate": 3.957856353700649e-05, + "loss": 2.3322, + "step": 4208500 + }, + { + "epoch": 20.85, + "learning_rate": 3.9577324950580405e-05, + "loss": 2.3334, + "step": 4209000 + }, + { + "epoch": 20.86, + "learning_rate": 3.957608636415432e-05, + "loss": 2.3398, + "step": 4209500 + }, + { + "epoch": 20.86, + "learning_rate": 3.957484777772824e-05, + "loss": 2.3255, + "step": 4210000 + }, + { + "epoch": 20.86, + "learning_rate": 3.9573611668475e-05, + "loss": 2.3186, + "step": 4210500 + }, + { + "epoch": 20.86, + "learning_rate": 3.957237308204892e-05, + "loss": 2.3099, + "step": 4211000 + }, + { + "epoch": 20.87, + "learning_rate": 3.9571134495622834e-05, + "loss": 2.3274, + "step": 4211500 + }, + { + "epoch": 20.87, + "learning_rate": 3.956989590919675e-05, + "loss": 2.3277, + "step": 4212000 + }, + { + "epoch": 20.87, + "learning_rate": 3.956865732277067e-05, + "loss": 2.3225, + "step": 4212500 + }, + { + "epoch": 20.87, + "learning_rate": 3.9567418736344585e-05, + "loss": 2.3221, + "step": 4213000 + }, + { + "epoch": 20.88, + "learning_rate": 3.95661801499185e-05, + "loss": 2.337, + "step": 4213500 + }, + { + "epoch": 20.88, + "learning_rate": 3.956494156349242e-05, + "loss": 2.3301, + "step": 4214000 + }, + { + "epoch": 20.88, + "learning_rate": 3.9563702977066336e-05, + "loss": 2.3094, + "step": 4214500 + }, + { + "epoch": 20.88, + "learning_rate": 3.956246439064025e-05, + "loss": 2.353, + "step": 4215000 + }, + { + "epoch": 20.89, + "learning_rate": 3.956122580421417e-05, + "loss": 2.3262, + "step": 4215500 + }, + { + "epoch": 20.89, + "learning_rate": 3.955998969496094e-05, + "loss": 2.3429, + "step": 4216000 + }, + { + "epoch": 20.89, + "learning_rate": 3.95587535857077e-05, + "loss": 2.3003, + "step": 4216500 + }, + { + "epoch": 20.89, + "learning_rate": 3.955751499928162e-05, + "loss": 2.3653, + "step": 4217000 + }, + { + "epoch": 20.89, + "learning_rate": 3.9556276412855534e-05, + "loss": 2.3238, + "step": 4217500 + }, + { + "epoch": 20.9, + "learning_rate": 3.955503782642945e-05, + "loss": 2.3313, + "step": 4218000 + }, + { + "epoch": 20.9, + "learning_rate": 3.955379924000337e-05, + "loss": 2.3488, + "step": 4218500 + }, + { + "epoch": 20.9, + "learning_rate": 3.9552560653577285e-05, + "loss": 2.3553, + "step": 4219000 + }, + { + "epoch": 20.9, + "learning_rate": 3.95513220671512e-05, + "loss": 2.3272, + "step": 4219500 + }, + { + "epoch": 20.91, + "learning_rate": 3.955008348072512e-05, + "loss": 2.3104, + "step": 4220000 + }, + { + "epoch": 20.91, + "learning_rate": 3.9548844894299036e-05, + "loss": 2.3523, + "step": 4220500 + }, + { + "epoch": 20.91, + "learning_rate": 3.954760630787295e-05, + "loss": 2.3315, + "step": 4221000 + }, + { + "epoch": 20.91, + "learning_rate": 3.954636772144687e-05, + "loss": 2.3421, + "step": 4221500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954512913502079e-05, + "loss": 2.3485, + "step": 4222000 + }, + { + "epoch": 20.92, + "learning_rate": 3.9543893025767556e-05, + "loss": 2.3143, + "step": 4222500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954265443934147e-05, + "loss": 2.3271, + "step": 4223000 + }, + { + "epoch": 20.92, + "learning_rate": 3.954141585291539e-05, + "loss": 2.3282, + "step": 4223500 + }, + { + "epoch": 20.93, + "learning_rate": 3.9540177266489306e-05, + "loss": 2.3492, + "step": 4224000 + }, + { + "epoch": 20.93, + "learning_rate": 3.953894115723607e-05, + "loss": 2.3315, + "step": 4224500 + }, + { + "epoch": 20.93, + "learning_rate": 3.9537702570809985e-05, + "loss": 2.3257, + "step": 4225000 + }, + { + "epoch": 20.93, + "learning_rate": 3.95364639843839e-05, + "loss": 2.351, + "step": 4225500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953522539795782e-05, + "loss": 2.3266, + "step": 4226000 + }, + { + "epoch": 20.94, + "learning_rate": 3.9533986811531736e-05, + "loss": 2.3206, + "step": 4226500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953274822510565e-05, + "loss": 2.343, + "step": 4227000 + }, + { + "epoch": 20.94, + "learning_rate": 3.953150963867957e-05, + "loss": 2.3361, + "step": 4227500 + }, + { + "epoch": 20.95, + "learning_rate": 3.953027105225349e-05, + "loss": 2.2889, + "step": 4228000 + }, + { + "epoch": 20.95, + "learning_rate": 3.95290374201731e-05, + "loss": 2.3352, + "step": 4228500 + }, + { + "epoch": 20.95, + "learning_rate": 3.952779883374702e-05, + "loss": 2.3433, + "step": 4229000 + }, + { + "epoch": 20.95, + "learning_rate": 3.9526560247320935e-05, + "loss": 2.326, + "step": 4229500 + }, + { + "epoch": 20.96, + "learning_rate": 3.952532413806771e-05, + "loss": 2.3601, + "step": 4230000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952408555164163e-05, + "loss": 2.314, + "step": 4230500 + }, + { + "epoch": 20.96, + "learning_rate": 3.9522846965215544e-05, + "loss": 2.3232, + "step": 4231000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952160837878946e-05, + "loss": 2.3366, + "step": 4231500 + }, + { + "epoch": 20.97, + "learning_rate": 3.952036979236337e-05, + "loss": 2.3331, + "step": 4232000 + }, + { + "epoch": 20.97, + "learning_rate": 3.951913120593729e-05, + "loss": 2.3387, + "step": 4232500 + }, + { + "epoch": 20.97, + "learning_rate": 3.951789509668406e-05, + "loss": 2.3342, + "step": 4233000 + }, + { + "epoch": 20.97, + "learning_rate": 3.9516656510257974e-05, + "loss": 2.3227, + "step": 4233500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951541792383189e-05, + "loss": 2.3227, + "step": 4234000 + }, + { + "epoch": 20.98, + "learning_rate": 3.951417933740581e-05, + "loss": 2.3304, + "step": 4234500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951294075097972e-05, + "loss": 2.3277, + "step": 4235000 + }, + { + "epoch": 20.98, + "learning_rate": 3.9511702164553635e-05, + "loss": 2.3255, + "step": 4235500 + }, + { + "epoch": 20.99, + "learning_rate": 3.951046357812755e-05, + "loss": 2.3444, + "step": 4236000 + }, + { + "epoch": 20.99, + "learning_rate": 3.950922499170147e-05, + "loss": 2.3233, + "step": 4236500 + }, + { + "epoch": 20.99, + "learning_rate": 3.9507986405275386e-05, + "loss": 2.3227, + "step": 4237000 + }, + { + "epoch": 20.99, + "learning_rate": 3.95067478188493e-05, + "loss": 2.3412, + "step": 4237500 + }, + { + "epoch": 21.0, + "learning_rate": 3.950550923242322e-05, + "loss": 2.336, + "step": 4238000 + }, + { + "epoch": 21.0, + "learning_rate": 3.9504270645997136e-05, + "loss": 2.3633, + "step": 4238500 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.6523621169343081, + "eval_accuracy_mlm": 0.6068608911574469, + "eval_accuracy_nsp": 0.8669707678489482, + "eval_loss": 2.365056276321411, + "eval_runtime": 145.9769, + "eval_samples_per_second": 1746.57, + "eval_steps_per_second": 72.779, + "step": 4238703 + }, + { + "epoch": 21.0, + "learning_rate": 3.9503034536743905e-05, + "loss": 2.2921, + "step": 4239000 + }, + { + "epoch": 21.0, + "learning_rate": 3.950179595031782e-05, + "loss": 2.3045, + "step": 4239500 + }, + { + "epoch": 21.01, + "learning_rate": 3.950055736389174e-05, + "loss": 2.2858, + "step": 4240000 + }, + { + "epoch": 21.01, + "learning_rate": 3.9499318777465656e-05, + "loss": 2.2861, + "step": 4240500 + }, + { + "epoch": 21.01, + "learning_rate": 3.949808019103957e-05, + "loss": 2.2879, + "step": 4241000 + }, + { + "epoch": 21.01, + "learning_rate": 3.949684160461349e-05, + "loss": 2.2669, + "step": 4241500 + }, + { + "epoch": 21.02, + "learning_rate": 3.949560301818741e-05, + "loss": 2.3146, + "step": 4242000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949436690893417e-05, + "loss": 2.2891, + "step": 4242500 + }, + { + "epoch": 21.02, + "learning_rate": 3.9493128322508086e-05, + "loss": 2.3136, + "step": 4243000 + }, + { + "epoch": 21.02, + "learning_rate": 3.9491889736082e-05, + "loss": 2.2931, + "step": 4243500 + }, + { + "epoch": 21.03, + "learning_rate": 3.949065114965592e-05, + "loss": 2.3204, + "step": 4244000 + }, + { + "epoch": 21.03, + "learning_rate": 3.9489412563229836e-05, + "loss": 2.2862, + "step": 4244500 + }, + { + "epoch": 21.03, + "learning_rate": 3.9488176453976605e-05, + "loss": 2.2945, + "step": 4245000 + }, + { + "epoch": 21.03, + "learning_rate": 3.948693786755052e-05, + "loss": 2.3142, + "step": 4245500 + }, + { + "epoch": 21.04, + "learning_rate": 3.948569928112444e-05, + "loss": 2.305, + "step": 4246000 + }, + { + "epoch": 21.04, + "learning_rate": 3.9484460694698356e-05, + "loss": 2.2793, + "step": 4246500 + }, + { + "epoch": 21.04, + "learning_rate": 3.948322210827227e-05, + "loss": 2.3251, + "step": 4247000 + }, + { + "epoch": 21.04, + "learning_rate": 3.948198352184619e-05, + "loss": 2.3238, + "step": 4247500 + }, + { + "epoch": 21.05, + "learning_rate": 3.948074493542011e-05, + "loss": 2.2985, + "step": 4248000 + }, + { + "epoch": 21.05, + "learning_rate": 3.9479506348994024e-05, + "loss": 2.3153, + "step": 4248500 + }, + { + "epoch": 21.05, + "learning_rate": 3.947826776256794e-05, + "loss": 2.3251, + "step": 4249000 + }, + { + "epoch": 21.05, + "learning_rate": 3.947702917614186e-05, + "loss": 2.2942, + "step": 4249500 + }, + { + "epoch": 21.06, + "learning_rate": 3.9475790589715775e-05, + "loss": 2.2967, + "step": 4250000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947455200328969e-05, + "loss": 2.3266, + "step": 4250500 + }, + { + "epoch": 21.06, + "learning_rate": 3.947331341686361e-05, + "loss": 2.3041, + "step": 4251000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947207730761037e-05, + "loss": 2.3075, + "step": 4251500 + }, + { + "epoch": 21.07, + "learning_rate": 3.947083872118429e-05, + "loss": 2.3057, + "step": 4252000 + }, + { + "epoch": 21.07, + "learning_rate": 3.9469600134758204e-05, + "loss": 2.3371, + "step": 4252500 + }, + { + "epoch": 21.07, + "learning_rate": 3.946836154833212e-05, + "loss": 2.2894, + "step": 4253000 + }, + { + "epoch": 21.07, + "learning_rate": 3.946712296190604e-05, + "loss": 2.3192, + "step": 4253500 + }, + { + "epoch": 21.08, + "learning_rate": 3.946588932982566e-05, + "loss": 2.2943, + "step": 4254000 + }, + { + "epoch": 21.08, + "learning_rate": 3.946465322057243e-05, + "loss": 2.3263, + "step": 4254500 + }, + { + "epoch": 21.08, + "learning_rate": 3.9463414634146345e-05, + "loss": 2.292, + "step": 4255000 + }, + { + "epoch": 21.08, + "learning_rate": 3.946217604772026e-05, + "loss": 2.324, + "step": 4255500 + }, + { + "epoch": 21.09, + "learning_rate": 3.946093746129418e-05, + "loss": 2.3155, + "step": 4256000 + }, + { + "epoch": 21.09, + "learning_rate": 3.9459698874868095e-05, + "loss": 2.3074, + "step": 4256500 + }, + { + "epoch": 21.09, + "learning_rate": 3.9458460288442006e-05, + "loss": 2.2982, + "step": 4257000 + }, + { + "epoch": 21.09, + "learning_rate": 3.945722170201592e-05, + "loss": 2.3279, + "step": 4257500 + }, + { + "epoch": 21.1, + "learning_rate": 3.945598311558984e-05, + "loss": 2.3079, + "step": 4258000 + }, + { + "epoch": 21.1, + "learning_rate": 3.9454744529163756e-05, + "loss": 2.2971, + "step": 4258500 + }, + { + "epoch": 21.1, + "learning_rate": 3.945350594273767e-05, + "loss": 2.2837, + "step": 4259000 + }, + { + "epoch": 21.1, + "learning_rate": 3.945226735631159e-05, + "loss": 2.3128, + "step": 4259500 + }, + { + "epoch": 21.11, + "learning_rate": 3.945103124705836e-05, + "loss": 2.2928, + "step": 4260000 + }, + { + "epoch": 21.11, + "learning_rate": 3.9449792660632276e-05, + "loss": 2.3077, + "step": 4260500 + }, + { + "epoch": 21.11, + "learning_rate": 3.944855407420619e-05, + "loss": 2.2944, + "step": 4261000 + }, + { + "epoch": 21.11, + "learning_rate": 3.944731548778011e-05, + "loss": 2.3236, + "step": 4261500 + }, + { + "epoch": 21.12, + "learning_rate": 3.944607690135402e-05, + "loss": 2.3145, + "step": 4262000 + }, + { + "epoch": 21.12, + "learning_rate": 3.944483831492794e-05, + "loss": 2.2849, + "step": 4262500 + }, + { + "epoch": 21.12, + "learning_rate": 3.9443602205674706e-05, + "loss": 2.3208, + "step": 4263000 + }, + { + "epoch": 21.12, + "learning_rate": 3.944236361924862e-05, + "loss": 2.3103, + "step": 4263500 + }, + { + "epoch": 21.13, + "learning_rate": 3.944112503282254e-05, + "loss": 2.3213, + "step": 4264000 + }, + { + "epoch": 21.13, + "learning_rate": 3.9439886446396456e-05, + "loss": 2.3028, + "step": 4264500 + }, + { + "epoch": 21.13, + "learning_rate": 3.943864785997037e-05, + "loss": 2.3208, + "step": 4265000 + }, + { + "epoch": 21.13, + "learning_rate": 3.943740927354429e-05, + "loss": 2.2868, + "step": 4265500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943617316429106e-05, + "loss": 2.3265, + "step": 4266000 + }, + { + "epoch": 21.14, + "learning_rate": 3.9434934577864976e-05, + "loss": 2.3104, + "step": 4266500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943369599143889e-05, + "loss": 2.325, + "step": 4267000 + }, + { + "epoch": 21.14, + "learning_rate": 3.943245740501281e-05, + "loss": 2.3422, + "step": 4267500 + }, + { + "epoch": 21.15, + "learning_rate": 3.943121881858673e-05, + "loss": 2.3074, + "step": 4268000 + }, + { + "epoch": 21.15, + "learning_rate": 3.9429980232160644e-05, + "loss": 2.3229, + "step": 4268500 + }, + { + "epoch": 21.15, + "learning_rate": 3.942874412290741e-05, + "loss": 2.2827, + "step": 4269000 + }, + { + "epoch": 21.15, + "learning_rate": 3.942750553648132e-05, + "loss": 2.3152, + "step": 4269500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942626695005524e-05, + "loss": 2.3225, + "step": 4270000 + }, + { + "epoch": 21.16, + "learning_rate": 3.9425028363629157e-05, + "loss": 2.3046, + "step": 4270500 + }, + { + "epoch": 21.16, + "learning_rate": 3.9423789777203073e-05, + "loss": 2.3032, + "step": 4271000 + }, + { + "epoch": 21.16, + "learning_rate": 3.942255119077699e-05, + "loss": 2.2956, + "step": 4271500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942131260435091e-05, + "loss": 2.3496, + "step": 4272000 + }, + { + "epoch": 21.17, + "learning_rate": 3.9420074017924824e-05, + "loss": 2.3057, + "step": 4272500 + }, + { + "epoch": 21.17, + "learning_rate": 3.941883790867159e-05, + "loss": 2.2873, + "step": 4273000 + }, + { + "epoch": 21.17, + "learning_rate": 3.941759932224551e-05, + "loss": 2.2936, + "step": 4273500 + }, + { + "epoch": 21.17, + "learning_rate": 3.941636073581943e-05, + "loss": 2.3081, + "step": 4274000 + }, + { + "epoch": 21.18, + "learning_rate": 3.9415122149393344e-05, + "loss": 2.2982, + "step": 4274500 + }, + { + "epoch": 21.18, + "learning_rate": 3.941388604014011e-05, + "loss": 2.3144, + "step": 4275000 + }, + { + "epoch": 21.18, + "learning_rate": 3.941264745371402e-05, + "loss": 2.3072, + "step": 4275500 + }, + { + "epoch": 21.18, + "learning_rate": 3.941140886728794e-05, + "loss": 2.3279, + "step": 4276000 + }, + { + "epoch": 21.19, + "learning_rate": 3.941017028086186e-05, + "loss": 2.3102, + "step": 4276500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9408931694435774e-05, + "loss": 2.298, + "step": 4277000 + }, + { + "epoch": 21.19, + "learning_rate": 3.940769558518255e-05, + "loss": 2.3229, + "step": 4277500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9406456998756466e-05, + "loss": 2.3222, + "step": 4278000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940521841233038e-05, + "loss": 2.2881, + "step": 4278500 + }, + { + "epoch": 21.2, + "learning_rate": 3.940397982590429e-05, + "loss": 2.3254, + "step": 4279000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940274123947821e-05, + "loss": 2.3011, + "step": 4279500 + }, + { + "epoch": 21.2, + "learning_rate": 3.940150265305213e-05, + "loss": 2.3101, + "step": 4280000 + }, + { + "epoch": 21.21, + "learning_rate": 3.9400264066626044e-05, + "loss": 2.3173, + "step": 4280500 + }, + { + "epoch": 21.21, + "learning_rate": 3.939902548019996e-05, + "loss": 2.3111, + "step": 4281000 + }, + { + "epoch": 21.21, + "learning_rate": 3.939778937094673e-05, + "loss": 2.3002, + "step": 4281500 + }, + { + "epoch": 21.21, + "learning_rate": 3.93965532616935e-05, + "loss": 2.2919, + "step": 4282000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9395314675267415e-05, + "loss": 2.2976, + "step": 4282500 + }, + { + "epoch": 21.22, + "learning_rate": 3.9394078566014184e-05, + "loss": 2.3086, + "step": 4283000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9392839979588094e-05, + "loss": 2.3153, + "step": 4283500 + }, + { + "epoch": 21.22, + "learning_rate": 3.939160387033486e-05, + "loss": 2.3233, + "step": 4284000 + }, + { + "epoch": 21.23, + "learning_rate": 3.939036528390878e-05, + "loss": 2.3166, + "step": 4284500 + }, + { + "epoch": 21.23, + "learning_rate": 3.9389129174655556e-05, + "loss": 2.2879, + "step": 4285000 + }, + { + "epoch": 21.23, + "learning_rate": 3.9387890588229466e-05, + "loss": 2.2924, + "step": 4285500 + }, + { + "epoch": 21.23, + "learning_rate": 3.938665200180338e-05, + "loss": 2.3315, + "step": 4286000 + }, + { + "epoch": 21.24, + "learning_rate": 3.93854134153773e-05, + "loss": 2.2909, + "step": 4286500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938417482895122e-05, + "loss": 2.3155, + "step": 4287000 + }, + { + "epoch": 21.24, + "learning_rate": 3.9382936242525134e-05, + "loss": 2.3186, + "step": 4287500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938169765609905e-05, + "loss": 2.3072, + "step": 4288000 + }, + { + "epoch": 21.25, + "learning_rate": 3.938046154684582e-05, + "loss": 2.3222, + "step": 4288500 + }, + { + "epoch": 21.25, + "learning_rate": 3.937922296041973e-05, + "loss": 2.3089, + "step": 4289000 + }, + { + "epoch": 21.25, + "learning_rate": 3.9377984373993646e-05, + "loss": 2.3375, + "step": 4289500 + }, + { + "epoch": 21.25, + "learning_rate": 3.937674578756756e-05, + "loss": 2.3187, + "step": 4290000 + }, + { + "epoch": 21.26, + "learning_rate": 3.937550720114148e-05, + "loss": 2.3231, + "step": 4290500 + }, + { + "epoch": 21.26, + "learning_rate": 3.93742686147154e-05, + "loss": 2.2876, + "step": 4291000 + }, + { + "epoch": 21.26, + "learning_rate": 3.9373030028289314e-05, + "loss": 2.3264, + "step": 4291500 + }, + { + "epoch": 21.26, + "learning_rate": 3.937179144186323e-05, + "loss": 2.3282, + "step": 4292000 + }, + { + "epoch": 21.27, + "learning_rate": 3.937055285543715e-05, + "loss": 2.3319, + "step": 4292500 + }, + { + "epoch": 21.27, + "learning_rate": 3.9369314269011065e-05, + "loss": 2.3423, + "step": 4293000 + }, + { + "epoch": 21.27, + "learning_rate": 3.936807568258498e-05, + "loss": 2.286, + "step": 4293500 + }, + { + "epoch": 21.27, + "learning_rate": 3.93668370961589e-05, + "loss": 2.3192, + "step": 4294000 + }, + { + "epoch": 21.28, + "learning_rate": 3.9365598509732816e-05, + "loss": 2.3145, + "step": 4294500 + }, + { + "epoch": 21.28, + "learning_rate": 3.936435992330673e-05, + "loss": 2.3265, + "step": 4295000 + }, + { + "epoch": 21.28, + "learning_rate": 3.936312133688065e-05, + "loss": 2.334, + "step": 4295500 + }, + { + "epoch": 21.28, + "learning_rate": 3.9361882750454566e-05, + "loss": 2.3322, + "step": 4296000 + }, + { + "epoch": 21.29, + "learning_rate": 3.9360646641201335e-05, + "loss": 2.2928, + "step": 4296500 + }, + { + "epoch": 21.29, + "learning_rate": 3.9359408054775245e-05, + "loss": 2.314, + "step": 4297000 + }, + { + "epoch": 21.29, + "learning_rate": 3.935816946834916e-05, + "loss": 2.3296, + "step": 4297500 + }, + { + "epoch": 21.29, + "learning_rate": 3.935693335909593e-05, + "loss": 2.3279, + "step": 4298000 + }, + { + "epoch": 21.3, + "learning_rate": 3.935569477266985e-05, + "loss": 2.2969, + "step": 4298500 + }, + { + "epoch": 21.3, + "learning_rate": 3.9354456186243765e-05, + "loss": 2.3151, + "step": 4299000 + }, + { + "epoch": 21.3, + "learning_rate": 3.935321759981768e-05, + "loss": 2.3044, + "step": 4299500 + }, + { + "epoch": 21.3, + "learning_rate": 3.935198149056445e-05, + "loss": 2.3075, + "step": 4300000 + }, + { + "epoch": 21.31, + "learning_rate": 3.935074290413837e-05, + "loss": 2.2896, + "step": 4300500 + }, + { + "epoch": 21.31, + "learning_rate": 3.9349504317712285e-05, + "loss": 2.3119, + "step": 4301000 + }, + { + "epoch": 21.31, + "learning_rate": 3.93482657312862e-05, + "loss": 2.3079, + "step": 4301500 + }, + { + "epoch": 21.31, + "learning_rate": 3.934702714486012e-05, + "loss": 2.3173, + "step": 4302000 + }, + { + "epoch": 21.32, + "learning_rate": 3.9345788558434035e-05, + "loss": 2.3188, + "step": 4302500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934454997200795e-05, + "loss": 2.2956, + "step": 4303000 + }, + { + "epoch": 21.32, + "learning_rate": 3.934331138558186e-05, + "loss": 2.3025, + "step": 4303500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934207279915578e-05, + "loss": 2.3133, + "step": 4304000 + }, + { + "epoch": 21.33, + "learning_rate": 3.9340834212729696e-05, + "loss": 2.2965, + "step": 4304500 + }, + { + "epoch": 21.33, + "learning_rate": 3.933959562630361e-05, + "loss": 2.3176, + "step": 4305000 + }, + { + "epoch": 21.33, + "learning_rate": 3.933835703987753e-05, + "loss": 2.3177, + "step": 4305500 + }, + { + "epoch": 21.33, + "learning_rate": 3.933711845345145e-05, + "loss": 2.3032, + "step": 4306000 + }, + { + "epoch": 21.34, + "learning_rate": 3.9335879867025364e-05, + "loss": 2.2993, + "step": 4306500 + }, + { + "epoch": 21.34, + "learning_rate": 3.9334641280599274e-05, + "loss": 2.3499, + "step": 4307000 + }, + { + "epoch": 21.34, + "learning_rate": 3.933340269417319e-05, + "loss": 2.3083, + "step": 4307500 + }, + { + "epoch": 21.34, + "learning_rate": 3.933216658491997e-05, + "loss": 2.3264, + "step": 4308000 + }, + { + "epoch": 21.35, + "learning_rate": 3.9330927998493884e-05, + "loss": 2.3329, + "step": 4308500 + }, + { + "epoch": 21.35, + "learning_rate": 3.93296894120678e-05, + "loss": 2.2855, + "step": 4309000 + }, + { + "epoch": 21.35, + "learning_rate": 3.932845082564172e-05, + "loss": 2.3285, + "step": 4309500 + }, + { + "epoch": 21.35, + "learning_rate": 3.932721223921563e-05, + "loss": 2.2971, + "step": 4310000 + }, + { + "epoch": 21.36, + "learning_rate": 3.9325976129962396e-05, + "loss": 2.3157, + "step": 4310500 + }, + { + "epoch": 21.36, + "learning_rate": 3.9324740020709165e-05, + "loss": 2.3484, + "step": 4311000 + }, + { + "epoch": 21.36, + "learning_rate": 3.932350143428308e-05, + "loss": 2.3032, + "step": 4311500 + }, + { + "epoch": 21.36, + "learning_rate": 3.932226532502985e-05, + "loss": 2.3432, + "step": 4312000 + }, + { + "epoch": 21.37, + "learning_rate": 3.932102673860377e-05, + "loss": 2.3068, + "step": 4312500 + }, + { + "epoch": 21.37, + "learning_rate": 3.9319788152177685e-05, + "loss": 2.3444, + "step": 4313000 + }, + { + "epoch": 21.37, + "learning_rate": 3.93185495657516e-05, + "loss": 2.3294, + "step": 4313500 + }, + { + "epoch": 21.37, + "learning_rate": 3.931731097932552e-05, + "loss": 2.3024, + "step": 4314000 + }, + { + "epoch": 21.38, + "learning_rate": 3.9316072392899436e-05, + "loss": 2.3364, + "step": 4314500 + }, + { + "epoch": 21.38, + "learning_rate": 3.931483380647335e-05, + "loss": 2.3322, + "step": 4315000 + }, + { + "epoch": 21.38, + "learning_rate": 3.931359522004727e-05, + "loss": 2.3135, + "step": 4315500 + }, + { + "epoch": 21.38, + "learning_rate": 3.9312356633621186e-05, + "loss": 2.3176, + "step": 4316000 + }, + { + "epoch": 21.39, + "learning_rate": 3.93111180471951e-05, + "loss": 2.34, + "step": 4316500 + }, + { + "epoch": 21.39, + "learning_rate": 3.9309879460769014e-05, + "loss": 2.3245, + "step": 4317000 + }, + { + "epoch": 21.39, + "learning_rate": 3.930864087434293e-05, + "loss": 2.3008, + "step": 4317500 + }, + { + "epoch": 21.39, + "learning_rate": 3.93074047650897e-05, + "loss": 2.2976, + "step": 4318000 + }, + { + "epoch": 21.4, + "learning_rate": 3.9306166178663616e-05, + "loss": 2.3099, + "step": 4318500 + }, + { + "epoch": 21.4, + "learning_rate": 3.9304930069410385e-05, + "loss": 2.3081, + "step": 4319000 + }, + { + "epoch": 21.4, + "learning_rate": 3.93036914829843e-05, + "loss": 2.3236, + "step": 4319500 + }, + { + "epoch": 21.4, + "learning_rate": 3.930245289655822e-05, + "loss": 2.3435, + "step": 4320000 + }, + { + "epoch": 21.41, + "learning_rate": 3.9301214310132136e-05, + "loss": 2.3184, + "step": 4320500 + }, + { + "epoch": 21.41, + "learning_rate": 3.929997572370605e-05, + "loss": 2.3174, + "step": 4321000 + }, + { + "epoch": 21.41, + "learning_rate": 3.929873713727997e-05, + "loss": 2.3308, + "step": 4321500 + }, + { + "epoch": 21.41, + "learning_rate": 3.9297498550853887e-05, + "loss": 2.2802, + "step": 4322000 + }, + { + "epoch": 21.42, + "learning_rate": 3.9296259964427804e-05, + "loss": 2.2863, + "step": 4322500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929502137800172e-05, + "loss": 2.3012, + "step": 4323000 + }, + { + "epoch": 21.42, + "learning_rate": 3.929378526874848e-05, + "loss": 2.3041, + "step": 4323500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929254915949525e-05, + "loss": 2.343, + "step": 4324000 + }, + { + "epoch": 21.43, + "learning_rate": 3.929131057306917e-05, + "loss": 2.3007, + "step": 4324500 + }, + { + "epoch": 21.43, + "learning_rate": 3.9290071986643085e-05, + "loss": 2.3265, + "step": 4325000 + }, + { + "epoch": 21.43, + "learning_rate": 3.9288833400217e-05, + "loss": 2.3181, + "step": 4325500 + }, + { + "epoch": 21.43, + "learning_rate": 3.928759481379092e-05, + "loss": 2.3068, + "step": 4326000 + }, + { + "epoch": 21.43, + "learning_rate": 3.9286356227364836e-05, + "loss": 2.3173, + "step": 4326500 + }, + { + "epoch": 21.44, + "learning_rate": 3.928511764093875e-05, + "loss": 2.3145, + "step": 4327000 + }, + { + "epoch": 21.44, + "learning_rate": 3.9283881531685515e-05, + "loss": 2.3157, + "step": 4327500 + }, + { + "epoch": 21.44, + "learning_rate": 3.928264294525943e-05, + "loss": 2.3428, + "step": 4328000 + }, + { + "epoch": 21.44, + "learning_rate": 3.928140435883335e-05, + "loss": 2.3414, + "step": 4328500 + }, + { + "epoch": 21.45, + "learning_rate": 3.9280165772407266e-05, + "loss": 2.3233, + "step": 4329000 + }, + { + "epoch": 21.45, + "learning_rate": 3.9278929663154034e-05, + "loss": 2.3465, + "step": 4329500 + }, + { + "epoch": 21.45, + "learning_rate": 3.927769107672795e-05, + "loss": 2.3259, + "step": 4330000 + }, + { + "epoch": 21.45, + "learning_rate": 3.927645249030187e-05, + "loss": 2.3362, + "step": 4330500 + }, + { + "epoch": 21.46, + "learning_rate": 3.9275213903875785e-05, + "loss": 2.3229, + "step": 4331000 + }, + { + "epoch": 21.46, + "learning_rate": 3.92739753174497e-05, + "loss": 2.3119, + "step": 4331500 + }, + { + "epoch": 21.46, + "learning_rate": 3.927273673102362e-05, + "loss": 2.3136, + "step": 4332000 + }, + { + "epoch": 21.46, + "learning_rate": 3.9271498144597536e-05, + "loss": 2.332, + "step": 4332500 + }, + { + "epoch": 21.47, + "learning_rate": 3.927025955817145e-05, + "loss": 2.3427, + "step": 4333000 + }, + { + "epoch": 21.47, + "learning_rate": 3.926902097174537e-05, + "loss": 2.348, + "step": 4333500 + }, + { + "epoch": 21.47, + "learning_rate": 3.926778486249213e-05, + "loss": 2.3072, + "step": 4334000 + }, + { + "epoch": 21.47, + "learning_rate": 3.926654627606605e-05, + "loss": 2.2956, + "step": 4334500 + }, + { + "epoch": 21.48, + "learning_rate": 3.9265310166812824e-05, + "loss": 2.3138, + "step": 4335000 + }, + { + "epoch": 21.48, + "learning_rate": 3.9264071580386735e-05, + "loss": 2.296, + "step": 4335500 + }, + { + "epoch": 21.48, + "learning_rate": 3.926283299396065e-05, + "loss": 2.3195, + "step": 4336000 + }, + { + "epoch": 21.48, + "learning_rate": 3.926159440753457e-05, + "loss": 2.3196, + "step": 4336500 + }, + { + "epoch": 21.49, + "learning_rate": 3.9260358298281344e-05, + "loss": 2.2992, + "step": 4337000 + }, + { + "epoch": 21.49, + "learning_rate": 3.925911971185526e-05, + "loss": 2.3306, + "step": 4337500 + }, + { + "epoch": 21.49, + "learning_rate": 3.925788112542918e-05, + "loss": 2.3089, + "step": 4338000 + }, + { + "epoch": 21.49, + "learning_rate": 3.925664253900309e-05, + "loss": 2.3162, + "step": 4338500 + }, + { + "epoch": 21.5, + "learning_rate": 3.9255403952577005e-05, + "loss": 2.3234, + "step": 4339000 + }, + { + "epoch": 21.5, + "learning_rate": 3.925416536615092e-05, + "loss": 2.3258, + "step": 4339500 + }, + { + "epoch": 21.5, + "learning_rate": 3.925292677972484e-05, + "loss": 2.3317, + "step": 4340000 + }, + { + "epoch": 21.5, + "learning_rate": 3.925168819329875e-05, + "loss": 2.3034, + "step": 4340500 + }, + { + "epoch": 21.51, + "learning_rate": 3.9250449606872666e-05, + "loss": 2.3462, + "step": 4341000 + }, + { + "epoch": 21.51, + "learning_rate": 3.924921102044658e-05, + "loss": 2.3138, + "step": 4341500 + }, + { + "epoch": 21.51, + "learning_rate": 3.924797491119335e-05, + "loss": 2.3311, + "step": 4342000 + }, + { + "epoch": 21.51, + "learning_rate": 3.924673632476727e-05, + "loss": 2.3276, + "step": 4342500 + }, + { + "epoch": 21.52, + "learning_rate": 3.9245497738341186e-05, + "loss": 2.3321, + "step": 4343000 + }, + { + "epoch": 21.52, + "learning_rate": 3.924426162908796e-05, + "loss": 2.2764, + "step": 4343500 + }, + { + "epoch": 21.52, + "learning_rate": 3.924302304266188e-05, + "loss": 2.3127, + "step": 4344000 + }, + { + "epoch": 21.52, + "learning_rate": 3.9241784456235795e-05, + "loss": 2.3023, + "step": 4344500 + }, + { + "epoch": 21.53, + "learning_rate": 3.9240545869809705e-05, + "loss": 2.2963, + "step": 4345000 + }, + { + "epoch": 21.53, + "learning_rate": 3.923930728338362e-05, + "loss": 2.3223, + "step": 4345500 + }, + { + "epoch": 21.53, + "learning_rate": 3.923806869695754e-05, + "loss": 2.3039, + "step": 4346000 + }, + { + "epoch": 21.53, + "learning_rate": 3.9236830110531456e-05, + "loss": 2.3454, + "step": 4346500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923559152410537e-05, + "loss": 2.3503, + "step": 4347000 + }, + { + "epoch": 21.54, + "learning_rate": 3.923435293767928e-05, + "loss": 2.3224, + "step": 4347500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923311682842605e-05, + "loss": 2.3291, + "step": 4348000 + }, + { + "epoch": 21.54, + "learning_rate": 3.923187824199997e-05, + "loss": 2.3457, + "step": 4348500 + }, + { + "epoch": 21.55, + "learning_rate": 3.9230639655573886e-05, + "loss": 2.3133, + "step": 4349000 + }, + { + "epoch": 21.55, + "learning_rate": 3.92294010691478e-05, + "loss": 2.3439, + "step": 4349500 + }, + { + "epoch": 21.55, + "learning_rate": 3.922816248272172e-05, + "loss": 2.3106, + "step": 4350000 + }, + { + "epoch": 21.55, + "learning_rate": 3.9226926373468495e-05, + "loss": 2.3065, + "step": 4350500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922569026421526e-05, + "loss": 2.3188, + "step": 4351000 + }, + { + "epoch": 21.56, + "learning_rate": 3.9224451677789174e-05, + "loss": 2.3561, + "step": 4351500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922321309136309e-05, + "loss": 2.2994, + "step": 4352000 + }, + { + "epoch": 21.56, + "learning_rate": 3.922197450493701e-05, + "loss": 2.2983, + "step": 4352500 + }, + { + "epoch": 21.57, + "learning_rate": 3.9220735918510925e-05, + "loss": 2.2753, + "step": 4353000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921949733208484e-05, + "loss": 2.3172, + "step": 4353500 + }, + { + "epoch": 21.57, + "learning_rate": 3.921825874565876e-05, + "loss": 2.3432, + "step": 4354000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921702015923267e-05, + "loss": 2.3251, + "step": 4354500 + }, + { + "epoch": 21.58, + "learning_rate": 3.9215781572806586e-05, + "loss": 2.3145, + "step": 4355000 + }, + { + "epoch": 21.58, + "learning_rate": 3.92145429863805e-05, + "loss": 2.3169, + "step": 4355500 + }, + { + "epoch": 21.58, + "learning_rate": 3.921330687712728e-05, + "loss": 2.3224, + "step": 4356000 + }, + { + "epoch": 21.58, + "learning_rate": 3.9212068290701195e-05, + "loss": 2.2893, + "step": 4356500 + }, + { + "epoch": 21.59, + "learning_rate": 3.921082970427511e-05, + "loss": 2.3134, + "step": 4357000 + }, + { + "epoch": 21.59, + "learning_rate": 3.920959111784902e-05, + "loss": 2.3234, + "step": 4357500 + }, + { + "epoch": 21.59, + "learning_rate": 3.920835500859579e-05, + "loss": 2.2992, + "step": 4358000 + }, + { + "epoch": 21.59, + "learning_rate": 3.920711642216971e-05, + "loss": 2.3117, + "step": 4358500 + }, + { + "epoch": 21.6, + "learning_rate": 3.9205877835743625e-05, + "loss": 2.321, + "step": 4359000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920463924931754e-05, + "loss": 2.2993, + "step": 4359500 + }, + { + "epoch": 21.6, + "learning_rate": 3.920340314006431e-05, + "loss": 2.319, + "step": 4360000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920216455363823e-05, + "loss": 2.3492, + "step": 4360500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9200928444384996e-05, + "loss": 2.3343, + "step": 4361000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919968985795891e-05, + "loss": 2.3362, + "step": 4361500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9198451271532824e-05, + "loss": 2.3155, + "step": 4362000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919721516227959e-05, + "loss": 2.3279, + "step": 4362500 + }, + { + "epoch": 21.62, + "learning_rate": 3.919597657585351e-05, + "loss": 2.3272, + "step": 4363000 + }, + { + "epoch": 21.62, + "learning_rate": 3.9194737989427426e-05, + "loss": 2.3127, + "step": 4363500 + }, + { + "epoch": 21.62, + "learning_rate": 3.919349940300134e-05, + "loss": 2.3002, + "step": 4364000 + }, + { + "epoch": 21.62, + "learning_rate": 3.919226081657526e-05, + "loss": 2.3042, + "step": 4364500 + }, + { + "epoch": 21.63, + "learning_rate": 3.919102223014918e-05, + "loss": 2.302, + "step": 4365000 + }, + { + "epoch": 21.63, + "learning_rate": 3.9189786120895946e-05, + "loss": 2.3265, + "step": 4365500 + }, + { + "epoch": 21.63, + "learning_rate": 3.918854753446986e-05, + "loss": 2.3245, + "step": 4366000 + }, + { + "epoch": 21.63, + "learning_rate": 3.918730894804378e-05, + "loss": 2.3159, + "step": 4366500 + }, + { + "epoch": 21.64, + "learning_rate": 3.918607283879055e-05, + "loss": 2.3157, + "step": 4367000 + }, + { + "epoch": 21.64, + "learning_rate": 3.918483425236446e-05, + "loss": 2.3319, + "step": 4367500 + }, + { + "epoch": 21.64, + "learning_rate": 3.9183595665938376e-05, + "loss": 2.3131, + "step": 4368000 + }, + { + "epoch": 21.64, + "learning_rate": 3.918235707951229e-05, + "loss": 2.3323, + "step": 4368500 + }, + { + "epoch": 21.65, + "learning_rate": 3.918111849308621e-05, + "loss": 2.3466, + "step": 4369000 + }, + { + "epoch": 21.65, + "learning_rate": 3.9179879906660126e-05, + "loss": 2.3123, + "step": 4369500 + }, + { + "epoch": 21.65, + "learning_rate": 3.917864132023404e-05, + "loss": 2.296, + "step": 4370000 + }, + { + "epoch": 21.65, + "learning_rate": 3.917740273380796e-05, + "loss": 2.313, + "step": 4370500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917616414738188e-05, + "loss": 2.3314, + "step": 4371000 + }, + { + "epoch": 21.66, + "learning_rate": 3.9174928038128646e-05, + "loss": 2.2958, + "step": 4371500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917368945170256e-05, + "loss": 2.3307, + "step": 4372000 + }, + { + "epoch": 21.66, + "learning_rate": 3.917245086527648e-05, + "loss": 2.3187, + "step": 4372500 + }, + { + "epoch": 21.67, + "learning_rate": 3.91712122788504e-05, + "loss": 2.3269, + "step": 4373000 + }, + { + "epoch": 21.67, + "learning_rate": 3.9169973692424314e-05, + "loss": 2.3228, + "step": 4373500 + }, + { + "epoch": 21.67, + "learning_rate": 3.916873510599823e-05, + "loss": 2.3128, + "step": 4374000 + }, + { + "epoch": 21.67, + "learning_rate": 3.916749651957215e-05, + "loss": 2.3191, + "step": 4374500 + }, + { + "epoch": 21.68, + "learning_rate": 3.9166257933146064e-05, + "loss": 2.3091, + "step": 4375000 + }, + { + "epoch": 21.68, + "learning_rate": 3.9165019346719975e-05, + "loss": 2.3338, + "step": 4375500 + }, + { + "epoch": 21.68, + "learning_rate": 3.916378076029389e-05, + "loss": 2.3332, + "step": 4376000 + }, + { + "epoch": 21.68, + "learning_rate": 3.916254465104066e-05, + "loss": 2.3182, + "step": 4376500 + }, + { + "epoch": 21.69, + "learning_rate": 3.916130606461458e-05, + "loss": 2.3053, + "step": 4377000 + }, + { + "epoch": 21.69, + "learning_rate": 3.9160067478188494e-05, + "loss": 2.3355, + "step": 4377500 + }, + { + "epoch": 21.69, + "learning_rate": 3.915882889176241e-05, + "loss": 2.3166, + "step": 4378000 + }, + { + "epoch": 21.69, + "learning_rate": 3.915759278250918e-05, + "loss": 2.3133, + "step": 4378500 + }, + { + "epoch": 21.7, + "learning_rate": 3.91563541960831e-05, + "loss": 2.3149, + "step": 4379000 + }, + { + "epoch": 21.7, + "learning_rate": 3.9155115609657014e-05, + "loss": 2.3224, + "step": 4379500 + }, + { + "epoch": 21.7, + "learning_rate": 3.915387702323093e-05, + "loss": 2.3327, + "step": 4380000 + }, + { + "epoch": 21.7, + "learning_rate": 3.915263843680485e-05, + "loss": 2.3332, + "step": 4380500 + }, + { + "epoch": 21.7, + "learning_rate": 3.9151399850378765e-05, + "loss": 2.3271, + "step": 4381000 + }, + { + "epoch": 21.71, + "learning_rate": 3.915016126395268e-05, + "loss": 2.286, + "step": 4381500 + }, + { + "epoch": 21.71, + "learning_rate": 3.914892267752659e-05, + "loss": 2.3318, + "step": 4382000 + }, + { + "epoch": 21.71, + "learning_rate": 3.914768409110051e-05, + "loss": 2.3186, + "step": 4382500 + }, + { + "epoch": 21.71, + "learning_rate": 3.9146445504674425e-05, + "loss": 2.3394, + "step": 4383000 + }, + { + "epoch": 21.72, + "learning_rate": 3.914520691824834e-05, + "loss": 2.3411, + "step": 4383500 + }, + { + "epoch": 21.72, + "learning_rate": 3.914396833182226e-05, + "loss": 2.312, + "step": 4384000 + }, + { + "epoch": 21.72, + "learning_rate": 3.914273222256903e-05, + "loss": 2.3266, + "step": 4384500 + }, + { + "epoch": 21.72, + "learning_rate": 3.9141493636142945e-05, + "loss": 2.3351, + "step": 4385000 + }, + { + "epoch": 21.73, + "learning_rate": 3.914025504971686e-05, + "loss": 2.3116, + "step": 4385500 + }, + { + "epoch": 21.73, + "learning_rate": 3.913901646329078e-05, + "loss": 2.3258, + "step": 4386000 + }, + { + "epoch": 21.73, + "learning_rate": 3.9137777876864696e-05, + "loss": 2.3249, + "step": 4386500 + }, + { + "epoch": 21.73, + "learning_rate": 3.9136541767611465e-05, + "loss": 2.3161, + "step": 4387000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913530318118538e-05, + "loss": 2.3343, + "step": 4387500 + }, + { + "epoch": 21.74, + "learning_rate": 3.91340645947593e-05, + "loss": 2.2996, + "step": 4388000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913282848550606e-05, + "loss": 2.307, + "step": 4388500 + }, + { + "epoch": 21.74, + "learning_rate": 3.913159485342569e-05, + "loss": 2.308, + "step": 4389000 + }, + { + "epoch": 21.75, + "learning_rate": 3.9130356266999605e-05, + "loss": 2.3072, + "step": 4389500 + }, + { + "epoch": 21.75, + "learning_rate": 3.9129117680573515e-05, + "loss": 2.3254, + "step": 4390000 + }, + { + "epoch": 21.75, + "learning_rate": 3.912787909414743e-05, + "loss": 2.3295, + "step": 4390500 + }, + { + "epoch": 21.75, + "learning_rate": 3.912664050772135e-05, + "loss": 2.3067, + "step": 4391000 + }, + { + "epoch": 21.76, + "learning_rate": 3.9125401921295266e-05, + "loss": 2.3233, + "step": 4391500 + }, + { + "epoch": 21.76, + "learning_rate": 3.912416333486918e-05, + "loss": 2.3388, + "step": 4392000 + }, + { + "epoch": 21.76, + "learning_rate": 3.912292474844309e-05, + "loss": 2.2997, + "step": 4392500 + }, + { + "epoch": 21.76, + "learning_rate": 3.912168616201701e-05, + "loss": 2.3244, + "step": 4393000 + }, + { + "epoch": 21.77, + "learning_rate": 3.912044757559093e-05, + "loss": 2.3057, + "step": 4393500 + }, + { + "epoch": 21.77, + "learning_rate": 3.9119208989164844e-05, + "loss": 2.291, + "step": 4394000 + }, + { + "epoch": 21.77, + "learning_rate": 3.911797040273876e-05, + "loss": 2.3322, + "step": 4394500 + }, + { + "epoch": 21.77, + "learning_rate": 3.911673181631268e-05, + "loss": 2.3331, + "step": 4395000 + }, + { + "epoch": 21.78, + "learning_rate": 3.9115493229886595e-05, + "loss": 2.3059, + "step": 4395500 + }, + { + "epoch": 21.78, + "learning_rate": 3.911425464346051e-05, + "loss": 2.3113, + "step": 4396000 + }, + { + "epoch": 21.78, + "learning_rate": 3.911301605703443e-05, + "loss": 2.3046, + "step": 4396500 + }, + { + "epoch": 21.78, + "learning_rate": 3.91117799477812e-05, + "loss": 2.3182, + "step": 4397000 + }, + { + "epoch": 21.79, + "learning_rate": 3.9110541361355114e-05, + "loss": 2.3106, + "step": 4397500 + }, + { + "epoch": 21.79, + "learning_rate": 3.910930277492903e-05, + "loss": 2.3463, + "step": 4398000 + }, + { + "epoch": 21.79, + "learning_rate": 3.910806418850295e-05, + "loss": 2.3151, + "step": 4398500 + }, + { + "epoch": 21.79, + "learning_rate": 3.9106825602076865e-05, + "loss": 2.3151, + "step": 4399000 + }, + { + "epoch": 21.8, + "learning_rate": 3.910558701565078e-05, + "loss": 2.332, + "step": 4399500 + }, + { + "epoch": 21.8, + "learning_rate": 3.91043484292247e-05, + "loss": 2.3448, + "step": 4400000 + }, + { + "epoch": 21.8, + "learning_rate": 3.9103109842798616e-05, + "loss": 2.3485, + "step": 4400500 + }, + { + "epoch": 21.8, + "learning_rate": 3.910187373354538e-05, + "loss": 2.342, + "step": 4401000 + }, + { + "epoch": 21.81, + "learning_rate": 3.9100635147119295e-05, + "loss": 2.3263, + "step": 4401500 + }, + { + "epoch": 21.81, + "learning_rate": 3.9099399037866063e-05, + "loss": 2.3294, + "step": 4402000 + }, + { + "epoch": 21.81, + "learning_rate": 3.909816292861284e-05, + "loss": 2.3133, + "step": 4402500 + }, + { + "epoch": 21.81, + "learning_rate": 3.9096924342186756e-05, + "loss": 2.3127, + "step": 4403000 + }, + { + "epoch": 21.82, + "learning_rate": 3.9095685755760666e-05, + "loss": 2.3094, + "step": 4403500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909444716933458e-05, + "loss": 2.2822, + "step": 4404000 + }, + { + "epoch": 21.82, + "learning_rate": 3.90932085829085e-05, + "loss": 2.3257, + "step": 4404500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909196999648242e-05, + "loss": 2.2952, + "step": 4405000 + }, + { + "epoch": 21.83, + "learning_rate": 3.9090731410056334e-05, + "loss": 2.3526, + "step": 4405500 + }, + { + "epoch": 21.83, + "learning_rate": 3.9089492823630244e-05, + "loss": 2.3249, + "step": 4406000 + }, + { + "epoch": 21.83, + "learning_rate": 3.908825423720416e-05, + "loss": 2.3282, + "step": 4406500 + }, + { + "epoch": 21.83, + "learning_rate": 3.9087018127950936e-05, + "loss": 2.3322, + "step": 4407000 + }, + { + "epoch": 21.84, + "learning_rate": 3.9085779541524853e-05, + "loss": 2.3248, + "step": 4407500 + }, + { + "epoch": 21.84, + "learning_rate": 3.9084540955098764e-05, + "loss": 2.325, + "step": 4408000 + }, + { + "epoch": 21.84, + "learning_rate": 3.908330236867268e-05, + "loss": 2.3289, + "step": 4408500 + }, + { + "epoch": 21.84, + "learning_rate": 3.90820637822466e-05, + "loss": 2.3138, + "step": 4409000 + }, + { + "epoch": 21.85, + "learning_rate": 3.908082767299337e-05, + "loss": 2.3137, + "step": 4409500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907958908656728e-05, + "loss": 2.3257, + "step": 4410000 + }, + { + "epoch": 21.85, + "learning_rate": 3.90783505001412e-05, + "loss": 2.322, + "step": 4410500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907711191371512e-05, + "loss": 2.3273, + "step": 4411000 + }, + { + "epoch": 21.86, + "learning_rate": 3.9075873327289034e-05, + "loss": 2.3332, + "step": 4411500 + }, + { + "epoch": 21.86, + "learning_rate": 3.907463474086295e-05, + "loss": 2.337, + "step": 4412000 + }, + { + "epoch": 21.86, + "learning_rate": 3.907339863160972e-05, + "loss": 2.3351, + "step": 4412500 + }, + { + "epoch": 21.86, + "learning_rate": 3.9072160045183637e-05, + "loss": 2.3349, + "step": 4413000 + }, + { + "epoch": 21.87, + "learning_rate": 3.9070921458757554e-05, + "loss": 2.3297, + "step": 4413500 + }, + { + "epoch": 21.87, + "learning_rate": 3.906968287233147e-05, + "loss": 2.3183, + "step": 4414000 + }, + { + "epoch": 21.87, + "learning_rate": 3.906844428590538e-05, + "loss": 2.3166, + "step": 4414500 + }, + { + "epoch": 21.87, + "learning_rate": 3.90672056994793e-05, + "loss": 2.317, + "step": 4415000 + }, + { + "epoch": 21.88, + "learning_rate": 3.9065967113053214e-05, + "loss": 2.3059, + "step": 4415500 + }, + { + "epoch": 21.88, + "learning_rate": 3.906472852662713e-05, + "loss": 2.3185, + "step": 4416000 + }, + { + "epoch": 21.88, + "learning_rate": 3.906349241737391e-05, + "loss": 2.3452, + "step": 4416500 + }, + { + "epoch": 21.88, + "learning_rate": 3.906225383094782e-05, + "loss": 2.32, + "step": 4417000 + }, + { + "epoch": 21.89, + "learning_rate": 3.9061015244521734e-05, + "loss": 2.3385, + "step": 4417500 + }, + { + "epoch": 21.89, + "learning_rate": 3.905977665809565e-05, + "loss": 2.3351, + "step": 4418000 + }, + { + "epoch": 21.89, + "learning_rate": 3.905853807166957e-05, + "loss": 2.3168, + "step": 4418500 + }, + { + "epoch": 21.89, + "learning_rate": 3.905730196241634e-05, + "loss": 2.3263, + "step": 4419000 + }, + { + "epoch": 21.9, + "learning_rate": 3.9056063375990254e-05, + "loss": 2.331, + "step": 4419500 + }, + { + "epoch": 21.9, + "learning_rate": 3.905482478956417e-05, + "loss": 2.3449, + "step": 4420000 + }, + { + "epoch": 21.9, + "learning_rate": 3.905358868031094e-05, + "loss": 2.327, + "step": 4420500 + }, + { + "epoch": 21.9, + "learning_rate": 3.9052350093884856e-05, + "loss": 2.3243, + "step": 4421000 + }, + { + "epoch": 21.91, + "learning_rate": 3.905111150745877e-05, + "loss": 2.3226, + "step": 4421500 + }, + { + "epoch": 21.91, + "learning_rate": 3.904987292103269e-05, + "loss": 2.3242, + "step": 4422000 + }, + { + "epoch": 21.91, + "learning_rate": 3.904863433460661e-05, + "loss": 2.3421, + "step": 4422500 + }, + { + "epoch": 21.91, + "learning_rate": 3.9047395748180524e-05, + "loss": 2.3393, + "step": 4423000 + }, + { + "epoch": 21.92, + "learning_rate": 3.9046157161754434e-05, + "loss": 2.3457, + "step": 4423500 + }, + { + "epoch": 21.92, + "learning_rate": 3.904491857532835e-05, + "loss": 2.3174, + "step": 4424000 + }, + { + "epoch": 21.92, + "learning_rate": 3.904367998890227e-05, + "loss": 2.3136, + "step": 4424500 + }, + { + "epoch": 21.92, + "learning_rate": 3.904244387964904e-05, + "loss": 2.311, + "step": 4425000 + }, + { + "epoch": 21.93, + "learning_rate": 3.9041205293222954e-05, + "loss": 2.3206, + "step": 4425500 + }, + { + "epoch": 21.93, + "learning_rate": 3.903996670679687e-05, + "loss": 2.3157, + "step": 4426000 + }, + { + "epoch": 21.93, + "learning_rate": 3.903872812037079e-05, + "loss": 2.3178, + "step": 4426500 + }, + { + "epoch": 21.93, + "learning_rate": 3.90374895339447e-05, + "loss": 2.3013, + "step": 4427000 + }, + { + "epoch": 21.94, + "learning_rate": 3.9036250947518615e-05, + "loss": 2.3279, + "step": 4427500 + }, + { + "epoch": 21.94, + "learning_rate": 3.903501236109253e-05, + "loss": 2.3246, + "step": 4428000 + }, + { + "epoch": 21.94, + "learning_rate": 3.903377377466645e-05, + "loss": 2.3211, + "step": 4428500 + }, + { + "epoch": 21.94, + "learning_rate": 3.9032535188240365e-05, + "loss": 2.3386, + "step": 4429000 + }, + { + "epoch": 21.95, + "learning_rate": 3.903129907898714e-05, + "loss": 2.3297, + "step": 4429500 + }, + { + "epoch": 21.95, + "learning_rate": 3.903006049256105e-05, + "loss": 2.3189, + "step": 4430000 + }, + { + "epoch": 21.95, + "learning_rate": 3.902882438330782e-05, + "loss": 2.3271, + "step": 4430500 + }, + { + "epoch": 21.95, + "learning_rate": 3.902758579688174e-05, + "loss": 2.2818, + "step": 4431000 + }, + { + "epoch": 21.96, + "learning_rate": 3.9026347210455654e-05, + "loss": 2.2994, + "step": 4431500 + }, + { + "epoch": 21.96, + "learning_rate": 3.902510862402957e-05, + "loss": 2.3206, + "step": 4432000 + }, + { + "epoch": 21.96, + "learning_rate": 3.902387003760349e-05, + "loss": 2.3146, + "step": 4432500 + }, + { + "epoch": 21.96, + "learning_rate": 3.90226314511774e-05, + "loss": 2.325, + "step": 4433000 + }, + { + "epoch": 21.97, + "learning_rate": 3.9021392864751315e-05, + "loss": 2.3225, + "step": 4433500 + }, + { + "epoch": 21.97, + "learning_rate": 3.902015675549809e-05, + "loss": 2.3511, + "step": 4434000 + }, + { + "epoch": 21.97, + "learning_rate": 3.901891816907201e-05, + "loss": 2.307, + "step": 4434500 + }, + { + "epoch": 21.97, + "learning_rate": 3.9017679582645924e-05, + "loss": 2.3254, + "step": 4435000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901644099621984e-05, + "loss": 2.3352, + "step": 4435500 + }, + { + "epoch": 21.98, + "learning_rate": 3.901520240979376e-05, + "loss": 2.337, + "step": 4436000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901396630054052e-05, + "loss": 2.3028, + "step": 4436500 + }, + { + "epoch": 21.98, + "learning_rate": 3.901272771411444e-05, + "loss": 2.3097, + "step": 4437000 + }, + { + "epoch": 21.98, + "learning_rate": 3.9011489127688354e-05, + "loss": 2.3093, + "step": 4437500 + }, + { + "epoch": 21.99, + "learning_rate": 3.901025054126227e-05, + "loss": 2.348, + "step": 4438000 + }, + { + "epoch": 21.99, + "learning_rate": 3.900901195483619e-05, + "loss": 2.3076, + "step": 4438500 + }, + { + "epoch": 21.99, + "learning_rate": 3.900777584558296e-05, + "loss": 2.3348, + "step": 4439000 + }, + { + "epoch": 21.99, + "learning_rate": 3.9006537259156874e-05, + "loss": 2.3648, + "step": 4439500 + }, + { + "epoch": 22.0, + "learning_rate": 3.900529867273079e-05, + "loss": 2.3346, + "step": 4440000 + }, + { + "epoch": 22.0, + "learning_rate": 3.900406008630471e-05, + "loss": 2.3289, + "step": 4440500 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.6535946905725298, + "eval_accuracy_mlm": 0.6084227722805187, + "eval_accuracy_nsp": 0.86680995767947, + "eval_loss": 2.3525571823120117, + "eval_runtime": 145.9518, + "eval_samples_per_second": 1746.871, + "eval_steps_per_second": 72.791, + "step": 4440546 + }, + { + "epoch": 22.0, + "learning_rate": 3.900282397705147e-05, + "loss": 2.2772, + "step": 4441000 + }, + { + "epoch": 22.0, + "learning_rate": 3.9001585390625386e-05, + "loss": 2.2912, + "step": 4441500 + }, + { + "epoch": 22.01, + "learning_rate": 3.90003468041993e-05, + "loss": 2.282, + "step": 4442000 + }, + { + "epoch": 22.01, + "learning_rate": 3.899911069494607e-05, + "loss": 2.2739, + "step": 4442500 + }, + { + "epoch": 22.01, + "learning_rate": 3.899787210851999e-05, + "loss": 2.2682, + "step": 4443000 + }, + { + "epoch": 22.01, + "learning_rate": 3.8996633522093906e-05, + "loss": 2.2959, + "step": 4443500 + }, + { + "epoch": 22.02, + "learning_rate": 3.899539493566782e-05, + "loss": 2.2891, + "step": 4444000 + }, + { + "epoch": 22.02, + "learning_rate": 3.899415634924174e-05, + "loss": 2.2862, + "step": 4444500 + }, + { + "epoch": 22.02, + "learning_rate": 3.899291776281566e-05, + "loss": 2.3008, + "step": 4445000 + }, + { + "epoch": 22.02, + "learning_rate": 3.8991679176389574e-05, + "loss": 2.2792, + "step": 4445500 + }, + { + "epoch": 22.03, + "learning_rate": 3.899044058996349e-05, + "loss": 2.2976, + "step": 4446000 + }, + { + "epoch": 22.03, + "learning_rate": 3.898920448071026e-05, + "loss": 2.303, + "step": 4446500 + }, + { + "epoch": 22.03, + "learning_rate": 3.898796589428417e-05, + "loss": 2.2843, + "step": 4447000 + }, + { + "epoch": 22.03, + "learning_rate": 3.8986727307858087e-05, + "loss": 2.2869, + "step": 4447500 + }, + { + "epoch": 22.04, + "learning_rate": 3.8985488721432003e-05, + "loss": 2.2573, + "step": 4448000 + }, + { + "epoch": 22.04, + "learning_rate": 3.898425013500592e-05, + "loss": 2.2712, + "step": 4448500 + }, + { + "epoch": 22.04, + "learning_rate": 3.898301154857984e-05, + "loss": 2.275, + "step": 4449000 + }, + { + "epoch": 22.04, + "learning_rate": 3.8981772962153754e-05, + "loss": 2.2868, + "step": 4449500 + }, + { + "epoch": 22.05, + "learning_rate": 3.898053437572767e-05, + "loss": 2.3145, + "step": 4450000 + }, + { + "epoch": 22.05, + "learning_rate": 3.897929578930159e-05, + "loss": 2.3197, + "step": 4450500 + }, + { + "epoch": 22.05, + "learning_rate": 3.8978057202875505e-05, + "loss": 2.2986, + "step": 4451000 + }, + { + "epoch": 22.05, + "learning_rate": 3.8976821093622274e-05, + "loss": 2.2807, + "step": 4451500 + }, + { + "epoch": 22.06, + "learning_rate": 3.897558498436904e-05, + "loss": 2.3027, + "step": 4452000 + }, + { + "epoch": 22.06, + "learning_rate": 3.897434639794296e-05, + "loss": 2.2952, + "step": 4452500 + }, + { + "epoch": 22.06, + "learning_rate": 3.8973107811516877e-05, + "loss": 2.2791, + "step": 4453000 + }, + { + "epoch": 22.06, + "learning_rate": 3.8971869225090793e-05, + "loss": 2.2952, + "step": 4453500 + }, + { + "epoch": 22.07, + "learning_rate": 3.8970630638664704e-05, + "loss": 2.3056, + "step": 4454000 + }, + { + "epoch": 22.07, + "learning_rate": 3.896939205223862e-05, + "loss": 2.3098, + "step": 4454500 + }, + { + "epoch": 22.07, + "learning_rate": 3.896815346581254e-05, + "loss": 2.2978, + "step": 4455000 + }, + { + "epoch": 22.07, + "learning_rate": 3.8966914879386454e-05, + "loss": 2.306, + "step": 4455500 + }, + { + "epoch": 22.08, + "learning_rate": 3.896567629296037e-05, + "loss": 2.2921, + "step": 4456000 + }, + { + "epoch": 22.08, + "learning_rate": 3.896443770653429e-05, + "loss": 2.3061, + "step": 4456500 + }, + { + "epoch": 22.08, + "learning_rate": 3.896320159728106e-05, + "loss": 2.2947, + "step": 4457000 + }, + { + "epoch": 22.08, + "learning_rate": 3.8961963010854974e-05, + "loss": 2.2962, + "step": 4457500 + }, + { + "epoch": 22.09, + "learning_rate": 3.896072442442889e-05, + "loss": 2.2948, + "step": 4458000 + }, + { + "epoch": 22.09, + "learning_rate": 3.895948831517566e-05, + "loss": 2.3185, + "step": 4458500 + }, + { + "epoch": 22.09, + "learning_rate": 3.895824972874958e-05, + "loss": 2.3078, + "step": 4459000 + }, + { + "epoch": 22.09, + "learning_rate": 3.8957011142323494e-05, + "loss": 2.303, + "step": 4459500 + }, + { + "epoch": 22.1, + "learning_rate": 3.8955775033070256e-05, + "loss": 2.2979, + "step": 4460000 + }, + { + "epoch": 22.1, + "learning_rate": 3.895453644664417e-05, + "loss": 2.2898, + "step": 4460500 + }, + { + "epoch": 22.1, + "learning_rate": 3.895330033739095e-05, + "loss": 2.3058, + "step": 4461000 + }, + { + "epoch": 22.1, + "learning_rate": 3.8952061750964865e-05, + "loss": 2.2828, + "step": 4461500 + }, + { + "epoch": 22.11, + "learning_rate": 3.8950823164538775e-05, + "loss": 2.3222, + "step": 4462000 + }, + { + "epoch": 22.11, + "learning_rate": 3.894958457811269e-05, + "loss": 2.3003, + "step": 4462500 + }, + { + "epoch": 22.11, + "learning_rate": 3.894834599168661e-05, + "loss": 2.2981, + "step": 4463000 + }, + { + "epoch": 22.11, + "learning_rate": 3.8947107405260526e-05, + "loss": 2.3091, + "step": 4463500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894586881883444e-05, + "loss": 2.271, + "step": 4464000 + }, + { + "epoch": 22.12, + "learning_rate": 3.894463023240836e-05, + "loss": 2.334, + "step": 4464500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894339164598228e-05, + "loss": 2.2926, + "step": 4465000 + }, + { + "epoch": 22.12, + "learning_rate": 3.8942153059556194e-05, + "loss": 2.3064, + "step": 4465500 + }, + { + "epoch": 22.13, + "learning_rate": 3.894091447313011e-05, + "loss": 2.2886, + "step": 4466000 + }, + { + "epoch": 22.13, + "learning_rate": 3.893967588670403e-05, + "loss": 2.2827, + "step": 4466500 + }, + { + "epoch": 22.13, + "learning_rate": 3.8938437300277944e-05, + "loss": 2.2968, + "step": 4467000 + }, + { + "epoch": 22.13, + "learning_rate": 3.8937201191024707e-05, + "loss": 2.298, + "step": 4467500 + }, + { + "epoch": 22.14, + "learning_rate": 3.8935962604598623e-05, + "loss": 2.2935, + "step": 4468000 + }, + { + "epoch": 22.14, + "learning_rate": 3.893472401817254e-05, + "loss": 2.2881, + "step": 4468500 + }, + { + "epoch": 22.14, + "learning_rate": 3.893348543174646e-05, + "loss": 2.2915, + "step": 4469000 + }, + { + "epoch": 22.14, + "learning_rate": 3.8932246845320374e-05, + "loss": 2.3102, + "step": 4469500 + }, + { + "epoch": 22.15, + "learning_rate": 3.893100825889429e-05, + "loss": 2.3013, + "step": 4470000 + }, + { + "epoch": 22.15, + "learning_rate": 3.892977214964106e-05, + "loss": 2.285, + "step": 4470500 + }, + { + "epoch": 22.15, + "learning_rate": 3.892853356321498e-05, + "loss": 2.2956, + "step": 4471000 + }, + { + "epoch": 22.15, + "learning_rate": 3.892729745396174e-05, + "loss": 2.3063, + "step": 4471500 + }, + { + "epoch": 22.16, + "learning_rate": 3.8926058867535656e-05, + "loss": 2.3184, + "step": 4472000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892482028110957e-05, + "loss": 2.2951, + "step": 4472500 + }, + { + "epoch": 22.16, + "learning_rate": 3.892358169468349e-05, + "loss": 2.2968, + "step": 4473000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892234310825741e-05, + "loss": 2.2868, + "step": 4473500 + }, + { + "epoch": 22.17, + "learning_rate": 3.892110699900418e-05, + "loss": 2.2937, + "step": 4474000 + }, + { + "epoch": 22.17, + "learning_rate": 3.891986841257809e-05, + "loss": 2.2953, + "step": 4474500 + }, + { + "epoch": 22.17, + "learning_rate": 3.891862982615201e-05, + "loss": 2.3181, + "step": 4475000 + }, + { + "epoch": 22.17, + "learning_rate": 3.8917391239725926e-05, + "loss": 2.2859, + "step": 4475500 + }, + { + "epoch": 22.18, + "learning_rate": 3.891615265329984e-05, + "loss": 2.3111, + "step": 4476000 + }, + { + "epoch": 22.18, + "learning_rate": 3.891491406687376e-05, + "loss": 2.3094, + "step": 4476500 + }, + { + "epoch": 22.18, + "learning_rate": 3.891367548044768e-05, + "loss": 2.3165, + "step": 4477000 + }, + { + "epoch": 22.18, + "learning_rate": 3.8912436894021594e-05, + "loss": 2.3021, + "step": 4477500 + }, + { + "epoch": 22.19, + "learning_rate": 3.891119830759551e-05, + "loss": 2.2889, + "step": 4478000 + }, + { + "epoch": 22.19, + "learning_rate": 3.890995972116943e-05, + "loss": 2.3061, + "step": 4478500 + }, + { + "epoch": 22.19, + "learning_rate": 3.8908721134743345e-05, + "loss": 2.303, + "step": 4479000 + }, + { + "epoch": 22.19, + "learning_rate": 3.890748254831726e-05, + "loss": 2.2969, + "step": 4479500 + }, + { + "epoch": 22.2, + "learning_rate": 3.890624891623688e-05, + "loss": 2.2988, + "step": 4480000 + }, + { + "epoch": 22.2, + "learning_rate": 3.890501032981079e-05, + "loss": 2.3114, + "step": 4480500 + }, + { + "epoch": 22.2, + "learning_rate": 3.890377174338471e-05, + "loss": 2.3078, + "step": 4481000 + }, + { + "epoch": 22.2, + "learning_rate": 3.8902533156958626e-05, + "loss": 2.3274, + "step": 4481500 + }, + { + "epoch": 22.21, + "learning_rate": 3.890129457053254e-05, + "loss": 2.296, + "step": 4482000 + }, + { + "epoch": 22.21, + "learning_rate": 3.890005598410646e-05, + "loss": 2.3063, + "step": 4482500 + }, + { + "epoch": 22.21, + "learning_rate": 3.889881739768038e-05, + "loss": 2.2988, + "step": 4483000 + }, + { + "epoch": 22.21, + "learning_rate": 3.8897578811254294e-05, + "loss": 2.2946, + "step": 4483500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889634022482821e-05, + "loss": 2.2692, + "step": 4484000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889510411557497e-05, + "loss": 2.2717, + "step": 4484500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889386552914889e-05, + "loss": 2.3011, + "step": 4485000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889262694272281e-05, + "loss": 2.2948, + "step": 4485500 + }, + { + "epoch": 22.23, + "learning_rate": 3.8891388356296724e-05, + "loss": 2.2936, + "step": 4486000 + }, + { + "epoch": 22.23, + "learning_rate": 3.889014976987064e-05, + "loss": 2.2986, + "step": 4486500 + }, + { + "epoch": 22.23, + "learning_rate": 3.888891366061741e-05, + "loss": 2.3014, + "step": 4487000 + }, + { + "epoch": 22.23, + "learning_rate": 3.8887675074191326e-05, + "loss": 2.3245, + "step": 4487500 + }, + { + "epoch": 22.24, + "learning_rate": 3.8886436487765243e-05, + "loss": 2.3155, + "step": 4488000 + }, + { + "epoch": 22.24, + "learning_rate": 3.888520037851201e-05, + "loss": 2.3137, + "step": 4488500 + }, + { + "epoch": 22.24, + "learning_rate": 3.888396179208593e-05, + "loss": 2.3031, + "step": 4489000 + }, + { + "epoch": 22.24, + "learning_rate": 3.8882723205659846e-05, + "loss": 2.3025, + "step": 4489500 + }, + { + "epoch": 22.25, + "learning_rate": 3.888148461923376e-05, + "loss": 2.3273, + "step": 4490000 + }, + { + "epoch": 22.25, + "learning_rate": 3.888024850998053e-05, + "loss": 2.3053, + "step": 4490500 + }, + { + "epoch": 22.25, + "learning_rate": 3.887900992355445e-05, + "loss": 2.3006, + "step": 4491000 + }, + { + "epoch": 22.25, + "learning_rate": 3.8877771337128366e-05, + "loss": 2.3109, + "step": 4491500 + }, + { + "epoch": 22.25, + "learning_rate": 3.887653275070228e-05, + "loss": 2.3198, + "step": 4492000 + }, + { + "epoch": 22.26, + "learning_rate": 3.887529664144905e-05, + "loss": 2.3127, + "step": 4492500 + }, + { + "epoch": 22.26, + "learning_rate": 3.887405805502297e-05, + "loss": 2.2989, + "step": 4493000 + }, + { + "epoch": 22.26, + "learning_rate": 3.8872819468596885e-05, + "loss": 2.3253, + "step": 4493500 + }, + { + "epoch": 22.26, + "learning_rate": 3.88715808821708e-05, + "loss": 2.2805, + "step": 4494000 + }, + { + "epoch": 22.27, + "learning_rate": 3.887034229574472e-05, + "loss": 2.3011, + "step": 4494500 + }, + { + "epoch": 22.27, + "learning_rate": 3.8869103709318636e-05, + "loss": 2.3047, + "step": 4495000 + }, + { + "epoch": 22.27, + "learning_rate": 3.8867865122892546e-05, + "loss": 2.2922, + "step": 4495500 + }, + { + "epoch": 22.27, + "learning_rate": 3.886662653646646e-05, + "loss": 2.2869, + "step": 4496000 + }, + { + "epoch": 22.28, + "learning_rate": 3.886538795004038e-05, + "loss": 2.3214, + "step": 4496500 + }, + { + "epoch": 22.28, + "learning_rate": 3.88641493636143e-05, + "loss": 2.3244, + "step": 4497000 + }, + { + "epoch": 22.28, + "learning_rate": 3.8862910777188214e-05, + "loss": 2.3028, + "step": 4497500 + }, + { + "epoch": 22.28, + "learning_rate": 3.8861672190762124e-05, + "loss": 2.3, + "step": 4498000 + }, + { + "epoch": 22.29, + "learning_rate": 3.88604360815089e-05, + "loss": 2.2982, + "step": 4498500 + }, + { + "epoch": 22.29, + "learning_rate": 3.8859197495082817e-05, + "loss": 2.3043, + "step": 4499000 + }, + { + "epoch": 22.29, + "learning_rate": 3.885795890865673e-05, + "loss": 2.3042, + "step": 4499500 + }, + { + "epoch": 22.29, + "learning_rate": 3.8856720322230644e-05, + "loss": 2.3022, + "step": 4500000 + }, + { + "epoch": 22.3, + "learning_rate": 3.885548173580456e-05, + "loss": 2.3037, + "step": 4500500 + }, + { + "epoch": 22.3, + "learning_rate": 3.885424314937848e-05, + "loss": 2.2896, + "step": 4501000 + }, + { + "epoch": 22.3, + "learning_rate": 3.88530095172981e-05, + "loss": 2.2943, + "step": 4501500 + }, + { + "epoch": 22.3, + "learning_rate": 3.8851770930872015e-05, + "loss": 2.2988, + "step": 4502000 + }, + { + "epoch": 22.31, + "learning_rate": 3.8850534821618784e-05, + "loss": 2.3111, + "step": 4502500 + }, + { + "epoch": 22.31, + "learning_rate": 3.88492962351927e-05, + "loss": 2.3171, + "step": 4503000 + }, + { + "epoch": 22.31, + "learning_rate": 3.884805764876662e-05, + "loss": 2.2992, + "step": 4503500 + }, + { + "epoch": 22.31, + "learning_rate": 3.8846819062340535e-05, + "loss": 2.3312, + "step": 4504000 + }, + { + "epoch": 22.32, + "learning_rate": 3.884558047591445e-05, + "loss": 2.3045, + "step": 4504500 + }, + { + "epoch": 22.32, + "learning_rate": 3.884434188948837e-05, + "loss": 2.3171, + "step": 4505000 + }, + { + "epoch": 22.32, + "learning_rate": 3.8843103303062286e-05, + "loss": 2.3009, + "step": 4505500 + }, + { + "epoch": 22.32, + "learning_rate": 3.88418647166362e-05, + "loss": 2.3001, + "step": 4506000 + }, + { + "epoch": 22.33, + "learning_rate": 3.884062613021012e-05, + "loss": 2.3133, + "step": 4506500 + }, + { + "epoch": 22.33, + "learning_rate": 3.883939002095688e-05, + "loss": 2.2882, + "step": 4507000 + }, + { + "epoch": 22.33, + "learning_rate": 3.88381514345308e-05, + "loss": 2.3329, + "step": 4507500 + }, + { + "epoch": 22.33, + "learning_rate": 3.8836912848104715e-05, + "loss": 2.3059, + "step": 4508000 + }, + { + "epoch": 22.34, + "learning_rate": 3.883567426167863e-05, + "loss": 2.2917, + "step": 4508500 + }, + { + "epoch": 22.34, + "learning_rate": 3.883443567525255e-05, + "loss": 2.2894, + "step": 4509000 + }, + { + "epoch": 22.34, + "learning_rate": 3.8833197088826466e-05, + "loss": 2.3078, + "step": 4509500 + }, + { + "epoch": 22.34, + "learning_rate": 3.883195850240038e-05, + "loss": 2.3134, + "step": 4510000 + }, + { + "epoch": 22.35, + "learning_rate": 3.88307199159743e-05, + "loss": 2.3113, + "step": 4510500 + }, + { + "epoch": 22.35, + "learning_rate": 3.882948132954822e-05, + "loss": 2.2759, + "step": 4511000 + }, + { + "epoch": 22.35, + "learning_rate": 3.8828242743122134e-05, + "loss": 2.3251, + "step": 4511500 + }, + { + "epoch": 22.35, + "learning_rate": 3.8827004156696044e-05, + "loss": 2.3013, + "step": 4512000 + }, + { + "epoch": 22.36, + "learning_rate": 3.882576557026996e-05, + "loss": 2.2991, + "step": 4512500 + }, + { + "epoch": 22.36, + "learning_rate": 3.882453193818958e-05, + "loss": 2.3148, + "step": 4513000 + }, + { + "epoch": 22.36, + "learning_rate": 3.88232933517635e-05, + "loss": 2.3174, + "step": 4513500 + }, + { + "epoch": 22.36, + "learning_rate": 3.882205724251027e-05, + "loss": 2.2938, + "step": 4514000 + }, + { + "epoch": 22.37, + "learning_rate": 3.8820818656084184e-05, + "loss": 2.3036, + "step": 4514500 + }, + { + "epoch": 22.37, + "learning_rate": 3.88195800696581e-05, + "loss": 2.2946, + "step": 4515000 + }, + { + "epoch": 22.37, + "learning_rate": 3.881834148323202e-05, + "loss": 2.3099, + "step": 4515500 + }, + { + "epoch": 22.37, + "learning_rate": 3.8817102896805935e-05, + "loss": 2.3022, + "step": 4516000 + }, + { + "epoch": 22.38, + "learning_rate": 3.881586431037985e-05, + "loss": 2.309, + "step": 4516500 + }, + { + "epoch": 22.38, + "learning_rate": 3.881462820112662e-05, + "loss": 2.2991, + "step": 4517000 + }, + { + "epoch": 22.38, + "learning_rate": 3.881338961470054e-05, + "loss": 2.2815, + "step": 4517500 + }, + { + "epoch": 22.38, + "learning_rate": 3.8812151028274455e-05, + "loss": 2.3127, + "step": 4518000 + }, + { + "epoch": 22.39, + "learning_rate": 3.881091244184837e-05, + "loss": 2.3122, + "step": 4518500 + }, + { + "epoch": 22.39, + "learning_rate": 3.880967385542228e-05, + "loss": 2.305, + "step": 4519000 + }, + { + "epoch": 22.39, + "learning_rate": 3.88084352689962e-05, + "loss": 2.3096, + "step": 4519500 + }, + { + "epoch": 22.39, + "learning_rate": 3.8807196682570116e-05, + "loss": 2.2711, + "step": 4520000 + }, + { + "epoch": 22.4, + "learning_rate": 3.880595809614403e-05, + "loss": 2.3383, + "step": 4520500 + }, + { + "epoch": 22.4, + "learning_rate": 3.880471950971795e-05, + "loss": 2.2977, + "step": 4521000 + }, + { + "epoch": 22.4, + "learning_rate": 3.8803480923291866e-05, + "loss": 2.3228, + "step": 4521500 + }, + { + "epoch": 22.4, + "learning_rate": 3.880224233686578e-05, + "loss": 2.3133, + "step": 4522000 + }, + { + "epoch": 22.41, + "learning_rate": 3.880100622761255e-05, + "loss": 2.3178, + "step": 4522500 + }, + { + "epoch": 22.41, + "learning_rate": 3.879976764118647e-05, + "loss": 2.2977, + "step": 4523000 + }, + { + "epoch": 22.41, + "learning_rate": 3.8798529054760386e-05, + "loss": 2.3113, + "step": 4523500 + }, + { + "epoch": 22.41, + "learning_rate": 3.87972904683343e-05, + "loss": 2.2864, + "step": 4524000 + }, + { + "epoch": 22.42, + "learning_rate": 3.879605435908107e-05, + "loss": 2.2973, + "step": 4524500 + }, + { + "epoch": 22.42, + "learning_rate": 3.879481577265499e-05, + "loss": 2.3181, + "step": 4525000 + }, + { + "epoch": 22.42, + "learning_rate": 3.8793577186228905e-05, + "loss": 2.3087, + "step": 4525500 + }, + { + "epoch": 22.42, + "learning_rate": 3.8792338599802816e-05, + "loss": 2.3024, + "step": 4526000 + }, + { + "epoch": 22.43, + "learning_rate": 3.879110001337673e-05, + "loss": 2.297, + "step": 4526500 + }, + { + "epoch": 22.43, + "learning_rate": 3.878986142695065e-05, + "loss": 2.2964, + "step": 4527000 + }, + { + "epoch": 22.43, + "learning_rate": 3.878862531769742e-05, + "loss": 2.3192, + "step": 4527500 + }, + { + "epoch": 22.43, + "learning_rate": 3.8787386731271335e-05, + "loss": 2.2818, + "step": 4528000 + }, + { + "epoch": 22.44, + "learning_rate": 3.878614814484525e-05, + "loss": 2.3023, + "step": 4528500 + }, + { + "epoch": 22.44, + "learning_rate": 3.878490955841917e-05, + "loss": 2.3361, + "step": 4529000 + }, + { + "epoch": 22.44, + "learning_rate": 3.8783670971993086e-05, + "loss": 2.3235, + "step": 4529500 + }, + { + "epoch": 22.44, + "learning_rate": 3.8782432385567e-05, + "loss": 2.2829, + "step": 4530000 + }, + { + "epoch": 22.45, + "learning_rate": 3.878119379914092e-05, + "loss": 2.3039, + "step": 4530500 + }, + { + "epoch": 22.45, + "learning_rate": 3.877995521271484e-05, + "loss": 2.2703, + "step": 4531000 + }, + { + "epoch": 22.45, + "learning_rate": 3.8778719103461606e-05, + "loss": 2.2963, + "step": 4531500 + }, + { + "epoch": 22.45, + "learning_rate": 3.877748051703552e-05, + "loss": 2.3257, + "step": 4532000 + }, + { + "epoch": 22.46, + "learning_rate": 3.877624193060943e-05, + "loss": 2.3125, + "step": 4532500 + }, + { + "epoch": 22.46, + "learning_rate": 3.877500334418335e-05, + "loss": 2.3036, + "step": 4533000 + }, + { + "epoch": 22.46, + "learning_rate": 3.877376723493012e-05, + "loss": 2.3172, + "step": 4533500 + }, + { + "epoch": 22.46, + "learning_rate": 3.8772528648504035e-05, + "loss": 2.3257, + "step": 4534000 + }, + { + "epoch": 22.47, + "learning_rate": 3.8771292539250804e-05, + "loss": 2.2861, + "step": 4534500 + }, + { + "epoch": 22.47, + "learning_rate": 3.877005395282472e-05, + "loss": 2.3049, + "step": 4535000 + }, + { + "epoch": 22.47, + "learning_rate": 3.876881536639864e-05, + "loss": 2.3128, + "step": 4535500 + }, + { + "epoch": 22.47, + "learning_rate": 3.8767576779972555e-05, + "loss": 2.3123, + "step": 4536000 + }, + { + "epoch": 22.48, + "learning_rate": 3.8766340670719324e-05, + "loss": 2.3054, + "step": 4536500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876510208429324e-05, + "loss": 2.3278, + "step": 4537000 + }, + { + "epoch": 22.48, + "learning_rate": 3.876386349786715e-05, + "loss": 2.3234, + "step": 4537500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876262491144107e-05, + "loss": 2.3163, + "step": 4538000 + }, + { + "epoch": 22.49, + "learning_rate": 3.8761386325014985e-05, + "loss": 2.3147, + "step": 4538500 + }, + { + "epoch": 22.49, + "learning_rate": 3.87601477385889e-05, + "loss": 2.325, + "step": 4539000 + }, + { + "epoch": 22.49, + "learning_rate": 3.875890915216282e-05, + "loss": 2.3072, + "step": 4539500 + }, + { + "epoch": 22.49, + "learning_rate": 3.8757670565736735e-05, + "loss": 2.3123, + "step": 4540000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875643197931065e-05, + "loss": 2.3073, + "step": 4540500 + }, + { + "epoch": 22.5, + "learning_rate": 3.875519587005742e-05, + "loss": 2.2933, + "step": 4541000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875395976080419e-05, + "loss": 2.3299, + "step": 4541500 + }, + { + "epoch": 22.5, + "learning_rate": 3.875272117437811e-05, + "loss": 2.3161, + "step": 4542000 + }, + { + "epoch": 22.51, + "learning_rate": 3.8751482587952024e-05, + "loss": 2.3171, + "step": 4542500 + }, + { + "epoch": 22.51, + "learning_rate": 3.875024400152594e-05, + "loss": 2.3235, + "step": 4543000 + }, + { + "epoch": 22.51, + "learning_rate": 3.874900541509986e-05, + "loss": 2.3079, + "step": 4543500 + }, + { + "epoch": 22.51, + "learning_rate": 3.8747769305846627e-05, + "loss": 2.313, + "step": 4544000 + }, + { + "epoch": 22.52, + "learning_rate": 3.8746530719420543e-05, + "loss": 2.3353, + "step": 4544500 + }, + { + "epoch": 22.52, + "learning_rate": 3.874529213299446e-05, + "loss": 2.3102, + "step": 4545000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874405354656838e-05, + "loss": 2.3287, + "step": 4545500 + }, + { + "epoch": 22.52, + "learning_rate": 3.8742817437315146e-05, + "loss": 2.3158, + "step": 4546000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874157885088906e-05, + "loss": 2.2897, + "step": 4546500 + }, + { + "epoch": 22.53, + "learning_rate": 3.874034026446297e-05, + "loss": 2.3011, + "step": 4547000 + }, + { + "epoch": 22.53, + "learning_rate": 3.873910167803689e-05, + "loss": 2.2923, + "step": 4547500 + }, + { + "epoch": 22.53, + "learning_rate": 3.873786309161081e-05, + "loss": 2.2992, + "step": 4548000 + }, + { + "epoch": 22.53, + "learning_rate": 3.8736624505184724e-05, + "loss": 2.3284, + "step": 4548500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873538839593149e-05, + "loss": 2.3066, + "step": 4549000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873414980950541e-05, + "loss": 2.3289, + "step": 4549500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873291122307933e-05, + "loss": 2.2994, + "step": 4550000 + }, + { + "epoch": 22.54, + "learning_rate": 3.8731672636653244e-05, + "loss": 2.3128, + "step": 4550500 + }, + { + "epoch": 22.55, + "learning_rate": 3.873043405022716e-05, + "loss": 2.3113, + "step": 4551000 + }, + { + "epoch": 22.55, + "learning_rate": 3.872919794097393e-05, + "loss": 2.3115, + "step": 4551500 + }, + { + "epoch": 22.55, + "learning_rate": 3.8727959354547846e-05, + "loss": 2.3109, + "step": 4552000 + }, + { + "epoch": 22.55, + "learning_rate": 3.872672076812176e-05, + "loss": 2.32, + "step": 4552500 + }, + { + "epoch": 22.56, + "learning_rate": 3.872548218169568e-05, + "loss": 2.3099, + "step": 4553000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872424607244244e-05, + "loss": 2.3342, + "step": 4553500 + }, + { + "epoch": 22.56, + "learning_rate": 3.872300996318921e-05, + "loss": 2.2928, + "step": 4554000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872177137676313e-05, + "loss": 2.3082, + "step": 4554500 + }, + { + "epoch": 22.57, + "learning_rate": 3.8720532790337045e-05, + "loss": 2.326, + "step": 4555000 + }, + { + "epoch": 22.57, + "learning_rate": 3.871929420391096e-05, + "loss": 2.3155, + "step": 4555500 + }, + { + "epoch": 22.57, + "learning_rate": 3.871805561748488e-05, + "loss": 2.299, + "step": 4556000 + }, + { + "epoch": 22.57, + "learning_rate": 3.8716817031058796e-05, + "loss": 2.3201, + "step": 4556500 + }, + { + "epoch": 22.58, + "learning_rate": 3.871557844463271e-05, + "loss": 2.2982, + "step": 4557000 + }, + { + "epoch": 22.58, + "learning_rate": 3.871433985820663e-05, + "loss": 2.3207, + "step": 4557500 + }, + { + "epoch": 22.58, + "learning_rate": 3.8713101271780546e-05, + "loss": 2.316, + "step": 4558000 + }, + { + "epoch": 22.58, + "learning_rate": 3.871186516252731e-05, + "loss": 2.3293, + "step": 4558500 + }, + { + "epoch": 22.59, + "learning_rate": 3.8710626576101225e-05, + "loss": 2.3033, + "step": 4559000 + }, + { + "epoch": 22.59, + "learning_rate": 3.870938798967514e-05, + "loss": 2.3159, + "step": 4559500 + }, + { + "epoch": 22.59, + "learning_rate": 3.870814940324906e-05, + "loss": 2.3366, + "step": 4560000 + }, + { + "epoch": 22.59, + "learning_rate": 3.8706910816822976e-05, + "loss": 2.3203, + "step": 4560500 + }, + { + "epoch": 22.6, + "learning_rate": 3.870567223039689e-05, + "loss": 2.32, + "step": 4561000 + }, + { + "epoch": 22.6, + "learning_rate": 3.870443364397081e-05, + "loss": 2.315, + "step": 4561500 + }, + { + "epoch": 22.6, + "learning_rate": 3.870319505754473e-05, + "loss": 2.3213, + "step": 4562000 + }, + { + "epoch": 22.6, + "learning_rate": 3.8701956471118644e-05, + "loss": 2.3211, + "step": 4562500 + }, + { + "epoch": 22.61, + "learning_rate": 3.870072036186541e-05, + "loss": 2.3087, + "step": 4563000 + }, + { + "epoch": 22.61, + "learning_rate": 3.869948177543933e-05, + "loss": 2.3402, + "step": 4563500 + }, + { + "epoch": 22.61, + "learning_rate": 3.8698243189013247e-05, + "loss": 2.3021, + "step": 4564000 + }, + { + "epoch": 22.61, + "learning_rate": 3.8697004602587163e-05, + "loss": 2.3204, + "step": 4564500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869576601616108e-05, + "loss": 2.2966, + "step": 4565000 + }, + { + "epoch": 22.62, + "learning_rate": 3.869452990690784e-05, + "loss": 2.3114, + "step": 4565500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869329132048176e-05, + "loss": 2.3284, + "step": 4566000 + }, + { + "epoch": 22.62, + "learning_rate": 3.8692052734055676e-05, + "loss": 2.3001, + "step": 4566500 + }, + { + "epoch": 22.63, + "learning_rate": 3.869081414762959e-05, + "loss": 2.3107, + "step": 4567000 + }, + { + "epoch": 22.63, + "learning_rate": 3.868957556120351e-05, + "loss": 2.322, + "step": 4567500 + }, + { + "epoch": 22.63, + "learning_rate": 3.868833697477743e-05, + "loss": 2.3449, + "step": 4568000 + }, + { + "epoch": 22.63, + "learning_rate": 3.8687098388351344e-05, + "loss": 2.3089, + "step": 4568500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868585980192526e-05, + "loss": 2.3135, + "step": 4569000 + }, + { + "epoch": 22.64, + "learning_rate": 3.868462121549918e-05, + "loss": 2.3005, + "step": 4569500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868338510624595e-05, + "loss": 2.3158, + "step": 4570000 + }, + { + "epoch": 22.64, + "learning_rate": 3.868214899699271e-05, + "loss": 2.3041, + "step": 4570500 + }, + { + "epoch": 22.65, + "learning_rate": 3.8680910410566626e-05, + "loss": 2.328, + "step": 4571000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867967182414054e-05, + "loss": 2.3015, + "step": 4571500 + }, + { + "epoch": 22.65, + "learning_rate": 3.867843323771446e-05, + "loss": 2.3389, + "step": 4572000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867719712846123e-05, + "loss": 2.2967, + "step": 4572500 + }, + { + "epoch": 22.66, + "learning_rate": 3.8675961019208004e-05, + "loss": 2.3422, + "step": 4573000 + }, + { + "epoch": 22.66, + "learning_rate": 3.867472243278192e-05, + "loss": 2.3099, + "step": 4573500 + }, + { + "epoch": 22.66, + "learning_rate": 3.867348384635584e-05, + "loss": 2.326, + "step": 4574000 + }, + { + "epoch": 22.66, + "learning_rate": 3.8672245259929755e-05, + "loss": 2.3225, + "step": 4574500 + }, + { + "epoch": 22.67, + "learning_rate": 3.8671006673503665e-05, + "loss": 2.3198, + "step": 4575000 + }, + { + "epoch": 22.67, + "learning_rate": 3.866976808707758e-05, + "loss": 2.3108, + "step": 4575500 + }, + { + "epoch": 22.67, + "learning_rate": 3.86685295006515e-05, + "loss": 2.3139, + "step": 4576000 + }, + { + "epoch": 22.67, + "learning_rate": 3.8667290914225416e-05, + "loss": 2.3271, + "step": 4576500 + }, + { + "epoch": 22.68, + "learning_rate": 3.866605232779933e-05, + "loss": 2.3365, + "step": 4577000 + }, + { + "epoch": 22.68, + "learning_rate": 3.86648162185461e-05, + "loss": 2.316, + "step": 4577500 + }, + { + "epoch": 22.68, + "learning_rate": 3.866357763212002e-05, + "loss": 2.3204, + "step": 4578000 + }, + { + "epoch": 22.68, + "learning_rate": 3.8662339045693935e-05, + "loss": 2.3074, + "step": 4578500 + }, + { + "epoch": 22.69, + "learning_rate": 3.8661100459267845e-05, + "loss": 2.3012, + "step": 4579000 + }, + { + "epoch": 22.69, + "learning_rate": 3.865986187284176e-05, + "loss": 2.3167, + "step": 4579500 + }, + { + "epoch": 22.69, + "learning_rate": 3.865862328641568e-05, + "loss": 2.3111, + "step": 4580000 + }, + { + "epoch": 22.69, + "learning_rate": 3.8657387177162455e-05, + "loss": 2.3244, + "step": 4580500 + }, + { + "epoch": 22.7, + "learning_rate": 3.865614859073637e-05, + "loss": 2.3031, + "step": 4581000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865491000431028e-05, + "loss": 2.3241, + "step": 4581500 + }, + { + "epoch": 22.7, + "learning_rate": 3.86536714178842e-05, + "loss": 2.2923, + "step": 4582000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865243530863097e-05, + "loss": 2.2862, + "step": 4582500 + }, + { + "epoch": 22.71, + "learning_rate": 3.8651196722204885e-05, + "loss": 2.3268, + "step": 4583000 + }, + { + "epoch": 22.71, + "learning_rate": 3.86499581357788e-05, + "loss": 2.3275, + "step": 4583500 + }, + { + "epoch": 22.71, + "learning_rate": 3.864871954935272e-05, + "loss": 2.3048, + "step": 4584000 + }, + { + "epoch": 22.71, + "learning_rate": 3.8647480962926635e-05, + "loss": 2.3134, + "step": 4584500 + }, + { + "epoch": 22.72, + "learning_rate": 3.8646244853673404e-05, + "loss": 2.304, + "step": 4585000 + }, + { + "epoch": 22.72, + "learning_rate": 3.864500626724732e-05, + "loss": 2.2997, + "step": 4585500 + }, + { + "epoch": 22.72, + "learning_rate": 3.864376768082124e-05, + "loss": 2.3093, + "step": 4586000 + }, + { + "epoch": 22.72, + "learning_rate": 3.8642529094395155e-05, + "loss": 2.3179, + "step": 4586500 + }, + { + "epoch": 22.73, + "learning_rate": 3.864129050796907e-05, + "loss": 2.317, + "step": 4587000 + }, + { + "epoch": 22.73, + "learning_rate": 3.8640054398715834e-05, + "loss": 2.3088, + "step": 4587500 + }, + { + "epoch": 22.73, + "learning_rate": 3.86388182894626e-05, + "loss": 2.2975, + "step": 4588000 + }, + { + "epoch": 22.73, + "learning_rate": 3.863757970303652e-05, + "loss": 2.3084, + "step": 4588500 + }, + { + "epoch": 22.74, + "learning_rate": 3.8636341116610437e-05, + "loss": 2.2775, + "step": 4589000 + }, + { + "epoch": 22.74, + "learning_rate": 3.8635102530184353e-05, + "loss": 2.2953, + "step": 4589500 + }, + { + "epoch": 22.74, + "learning_rate": 3.863386642093112e-05, + "loss": 2.3188, + "step": 4590000 + }, + { + "epoch": 22.74, + "learning_rate": 3.863262783450504e-05, + "loss": 2.3119, + "step": 4590500 + }, + { + "epoch": 22.75, + "learning_rate": 3.8631389248078956e-05, + "loss": 2.2956, + "step": 4591000 + }, + { + "epoch": 22.75, + "learning_rate": 3.863015066165287e-05, + "loss": 2.2869, + "step": 4591500 + }, + { + "epoch": 22.75, + "learning_rate": 3.862891207522678e-05, + "loss": 2.3249, + "step": 4592000 + }, + { + "epoch": 22.75, + "learning_rate": 3.86276734888007e-05, + "loss": 2.3244, + "step": 4592500 + }, + { + "epoch": 22.76, + "learning_rate": 3.862643737954747e-05, + "loss": 2.3138, + "step": 4593000 + }, + { + "epoch": 22.76, + "learning_rate": 3.8625198793121386e-05, + "loss": 2.2791, + "step": 4593500 + }, + { + "epoch": 22.76, + "learning_rate": 3.86239602066953e-05, + "loss": 2.313, + "step": 4594000 + }, + { + "epoch": 22.76, + "learning_rate": 3.862272409744208e-05, + "loss": 2.3295, + "step": 4594500 + }, + { + "epoch": 22.77, + "learning_rate": 3.862148551101599e-05, + "loss": 2.3138, + "step": 4595000 + }, + { + "epoch": 22.77, + "learning_rate": 3.8620246924589905e-05, + "loss": 2.2907, + "step": 4595500 + }, + { + "epoch": 22.77, + "learning_rate": 3.861900833816382e-05, + "loss": 2.3033, + "step": 4596000 + }, + { + "epoch": 22.77, + "learning_rate": 3.861776975173774e-05, + "loss": 2.3326, + "step": 4596500 + }, + { + "epoch": 22.78, + "learning_rate": 3.8616531165311656e-05, + "loss": 2.3104, + "step": 4597000 + }, + { + "epoch": 22.78, + "learning_rate": 3.861529257888557e-05, + "loss": 2.3412, + "step": 4597500 + }, + { + "epoch": 22.78, + "learning_rate": 3.861405399245949e-05, + "loss": 2.3166, + "step": 4598000 + }, + { + "epoch": 22.78, + "learning_rate": 3.86128154060334e-05, + "loss": 2.3151, + "step": 4598500 + }, + { + "epoch": 22.79, + "learning_rate": 3.861157681960732e-05, + "loss": 2.325, + "step": 4599000 + }, + { + "epoch": 22.79, + "learning_rate": 3.8610338233181234e-05, + "loss": 2.307, + "step": 4599500 + }, + { + "epoch": 22.79, + "learning_rate": 3.860909964675515e-05, + "loss": 2.3147, + "step": 4600000 + }, + { + "epoch": 22.79, + "learning_rate": 3.860786106032907e-05, + "loss": 2.3078, + "step": 4600500 + }, + { + "epoch": 22.79, + "learning_rate": 3.8606622473902985e-05, + "loss": 2.3078, + "step": 4601000 + }, + { + "epoch": 22.8, + "learning_rate": 3.86053838874769e-05, + "loss": 2.3242, + "step": 4601500 + }, + { + "epoch": 22.8, + "learning_rate": 3.860414777822367e-05, + "loss": 2.3032, + "step": 4602000 + }, + { + "epoch": 22.8, + "learning_rate": 3.860290919179759e-05, + "loss": 2.3121, + "step": 4602500 + }, + { + "epoch": 22.8, + "learning_rate": 3.8601670605371504e-05, + "loss": 2.3143, + "step": 4603000 + }, + { + "epoch": 22.81, + "learning_rate": 3.860043201894542e-05, + "loss": 2.3154, + "step": 4603500 + }, + { + "epoch": 22.81, + "learning_rate": 3.859919343251934e-05, + "loss": 2.327, + "step": 4604000 + }, + { + "epoch": 22.81, + "learning_rate": 3.8597954846093255e-05, + "loss": 2.3061, + "step": 4604500 + }, + { + "epoch": 22.81, + "learning_rate": 3.859671625966717e-05, + "loss": 2.3172, + "step": 4605000 + }, + { + "epoch": 22.82, + "learning_rate": 3.859547767324109e-05, + "loss": 2.3136, + "step": 4605500 + }, + { + "epoch": 22.82, + "learning_rate": 3.8594239086815006e-05, + "loss": 2.3018, + "step": 4606000 + }, + { + "epoch": 22.82, + "learning_rate": 3.859300297756177e-05, + "loss": 2.2998, + "step": 4606500 + }, + { + "epoch": 22.82, + "learning_rate": 3.8591764391135685e-05, + "loss": 2.3314, + "step": 4607000 + }, + { + "epoch": 22.83, + "learning_rate": 3.85905258047096e-05, + "loss": 2.3057, + "step": 4607500 + }, + { + "epoch": 22.83, + "learning_rate": 3.858928721828352e-05, + "loss": 2.3195, + "step": 4608000 + }, + { + "epoch": 22.83, + "learning_rate": 3.8588048631857436e-05, + "loss": 2.3239, + "step": 4608500 + }, + { + "epoch": 22.83, + "learning_rate": 3.8586812522604205e-05, + "loss": 2.293, + "step": 4609000 + }, + { + "epoch": 22.84, + "learning_rate": 3.858557393617812e-05, + "loss": 2.3212, + "step": 4609500 + }, + { + "epoch": 22.84, + "learning_rate": 3.858433534975204e-05, + "loss": 2.3048, + "step": 4610000 + }, + { + "epoch": 22.84, + "learning_rate": 3.8583096763325955e-05, + "loss": 2.2934, + "step": 4610500 + }, + { + "epoch": 22.84, + "learning_rate": 3.858185817689987e-05, + "loss": 2.2999, + "step": 4611000 + }, + { + "epoch": 22.85, + "learning_rate": 3.858062206764664e-05, + "loss": 2.3105, + "step": 4611500 + }, + { + "epoch": 22.85, + "learning_rate": 3.857938348122055e-05, + "loss": 2.3013, + "step": 4612000 + }, + { + "epoch": 22.85, + "learning_rate": 3.857814489479447e-05, + "loss": 2.3245, + "step": 4612500 + }, + { + "epoch": 22.85, + "learning_rate": 3.8576911262714096e-05, + "loss": 2.311, + "step": 4613000 + }, + { + "epoch": 22.86, + "learning_rate": 3.857567267628801e-05, + "loss": 2.3275, + "step": 4613500 + }, + { + "epoch": 22.86, + "learning_rate": 3.857443408986192e-05, + "loss": 2.3123, + "step": 4614000 + }, + { + "epoch": 22.86, + "learning_rate": 3.857319550343584e-05, + "loss": 2.3256, + "step": 4614500 + }, + { + "epoch": 22.86, + "learning_rate": 3.8571956917009757e-05, + "loss": 2.3182, + "step": 4615000 + }, + { + "epoch": 22.87, + "learning_rate": 3.8570718330583674e-05, + "loss": 2.3452, + "step": 4615500 + }, + { + "epoch": 22.87, + "learning_rate": 3.856947974415759e-05, + "loss": 2.3253, + "step": 4616000 + }, + { + "epoch": 22.87, + "learning_rate": 3.856824115773151e-05, + "loss": 2.3012, + "step": 4616500 + }, + { + "epoch": 22.87, + "learning_rate": 3.8567002571305424e-05, + "loss": 2.3173, + "step": 4617000 + }, + { + "epoch": 22.88, + "learning_rate": 3.856576398487934e-05, + "loss": 2.3155, + "step": 4617500 + }, + { + "epoch": 22.88, + "learning_rate": 3.856452539845326e-05, + "loss": 2.2929, + "step": 4618000 + }, + { + "epoch": 22.88, + "learning_rate": 3.8563286812027175e-05, + "loss": 2.3329, + "step": 4618500 + }, + { + "epoch": 22.88, + "learning_rate": 3.8562048225601085e-05, + "loss": 2.3319, + "step": 4619000 + }, + { + "epoch": 22.89, + "learning_rate": 3.8560809639175e-05, + "loss": 2.3235, + "step": 4619500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855957600709462e-05, + "loss": 2.3148, + "step": 4620000 + }, + { + "epoch": 22.89, + "learning_rate": 3.855833742066854e-05, + "loss": 2.3293, + "step": 4620500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855709883424246e-05, + "loss": 2.2721, + "step": 4621000 + }, + { + "epoch": 22.9, + "learning_rate": 3.8555860247816374e-05, + "loss": 2.3437, + "step": 4621500 + }, + { + "epoch": 22.9, + "learning_rate": 3.855462166139029e-05, + "loss": 2.3259, + "step": 4622000 + }, + { + "epoch": 22.9, + "learning_rate": 3.855338307496421e-05, + "loss": 2.3164, + "step": 4622500 + }, + { + "epoch": 22.9, + "learning_rate": 3.8552146965710976e-05, + "loss": 2.3135, + "step": 4623000 + }, + { + "epoch": 22.91, + "learning_rate": 3.8550910856457745e-05, + "loss": 2.3246, + "step": 4623500 + }, + { + "epoch": 22.91, + "learning_rate": 3.854967227003166e-05, + "loss": 2.3097, + "step": 4624000 + }, + { + "epoch": 22.91, + "learning_rate": 3.854843368360558e-05, + "loss": 2.3233, + "step": 4624500 + }, + { + "epoch": 22.91, + "learning_rate": 3.8547195097179496e-05, + "loss": 2.3195, + "step": 4625000 + }, + { + "epoch": 22.92, + "learning_rate": 3.854595651075341e-05, + "loss": 2.3101, + "step": 4625500 + }, + { + "epoch": 22.92, + "learning_rate": 3.854471792432733e-05, + "loss": 2.3006, + "step": 4626000 + }, + { + "epoch": 22.92, + "learning_rate": 3.854347933790124e-05, + "loss": 2.3205, + "step": 4626500 + }, + { + "epoch": 22.92, + "learning_rate": 3.854224075147516e-05, + "loss": 2.3168, + "step": 4627000 + }, + { + "epoch": 22.93, + "learning_rate": 3.8541002165049074e-05, + "loss": 2.3087, + "step": 4627500 + }, + { + "epoch": 22.93, + "learning_rate": 3.853976357862299e-05, + "loss": 2.3106, + "step": 4628000 + }, + { + "epoch": 22.93, + "learning_rate": 3.853852746936976e-05, + "loss": 2.3153, + "step": 4628500 + }, + { + "epoch": 22.93, + "learning_rate": 3.8537288882943676e-05, + "loss": 2.3313, + "step": 4629000 + }, + { + "epoch": 22.94, + "learning_rate": 3.8536050296517587e-05, + "loss": 2.3148, + "step": 4629500 + }, + { + "epoch": 22.94, + "learning_rate": 3.8534811710091504e-05, + "loss": 2.3088, + "step": 4630000 + }, + { + "epoch": 22.94, + "learning_rate": 3.853357312366542e-05, + "loss": 2.3086, + "step": 4630500 + }, + { + "epoch": 22.94, + "learning_rate": 3.853233453723934e-05, + "loss": 2.3283, + "step": 4631000 + }, + { + "epoch": 22.95, + "learning_rate": 3.853109842798611e-05, + "loss": 2.3124, + "step": 4631500 + }, + { + "epoch": 22.95, + "learning_rate": 3.852985984156003e-05, + "loss": 2.3007, + "step": 4632000 + }, + { + "epoch": 22.95, + "learning_rate": 3.852862125513394e-05, + "loss": 2.318, + "step": 4632500 + }, + { + "epoch": 22.95, + "learning_rate": 3.852738266870786e-05, + "loss": 2.3008, + "step": 4633000 + }, + { + "epoch": 22.96, + "learning_rate": 3.8526144082281774e-05, + "loss": 2.3385, + "step": 4633500 + }, + { + "epoch": 22.96, + "learning_rate": 3.852490549585569e-05, + "loss": 2.3005, + "step": 4634000 + }, + { + "epoch": 22.96, + "learning_rate": 3.852366690942961e-05, + "loss": 2.2903, + "step": 4634500 + }, + { + "epoch": 22.96, + "learning_rate": 3.8522428323003525e-05, + "loss": 2.3239, + "step": 4635000 + }, + { + "epoch": 22.97, + "learning_rate": 3.8521192213750294e-05, + "loss": 2.3164, + "step": 4635500 + }, + { + "epoch": 22.97, + "learning_rate": 3.8519953627324204e-05, + "loss": 2.3282, + "step": 4636000 + }, + { + "epoch": 22.97, + "learning_rate": 3.851871504089812e-05, + "loss": 2.3116, + "step": 4636500 + }, + { + "epoch": 22.97, + "learning_rate": 3.851747645447204e-05, + "loss": 2.3066, + "step": 4637000 + }, + { + "epoch": 22.98, + "learning_rate": 3.8516237868045954e-05, + "loss": 2.3298, + "step": 4637500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851499928161987e-05, + "loss": 2.3011, + "step": 4638000 + }, + { + "epoch": 22.98, + "learning_rate": 3.851376069519379e-05, + "loss": 2.33, + "step": 4638500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851252458594056e-05, + "loss": 2.2978, + "step": 4639000 + }, + { + "epoch": 22.99, + "learning_rate": 3.8511285999514474e-05, + "loss": 2.3144, + "step": 4639500 + }, + { + "epoch": 22.99, + "learning_rate": 3.851004741308839e-05, + "loss": 2.3008, + "step": 4640000 + }, + { + "epoch": 22.99, + "learning_rate": 3.850880882666231e-05, + "loss": 2.3002, + "step": 4640500 + }, + { + "epoch": 22.99, + "learning_rate": 3.8507570240236225e-05, + "loss": 2.3052, + "step": 4641000 + }, + { + "epoch": 23.0, + "learning_rate": 3.850633165381014e-05, + "loss": 2.3447, + "step": 4641500 + }, + { + "epoch": 23.0, + "learning_rate": 3.850509306738406e-05, + "loss": 2.2959, + "step": 4642000 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.6535051236946925, + "eval_accuracy_mlm": 0.6085347615669268, + "eval_accuracy_nsp": 0.8653273663608658, + "eval_loss": 2.3427975177764893, + "eval_runtime": 145.7935, + "eval_samples_per_second": 1748.768, + "eval_steps_per_second": 72.87, + "step": 4642389 + }, + { + "epoch": 23.0, + "learning_rate": 3.8503854480957976e-05, + "loss": 2.2864, + "step": 4642500 + }, + { + "epoch": 23.0, + "learning_rate": 3.850261589453189e-05, + "loss": 2.2797, + "step": 4643000 + }, + { + "epoch": 23.01, + "learning_rate": 3.850137730810581e-05, + "loss": 2.2687, + "step": 4643500 + }, + { + "epoch": 23.01, + "learning_rate": 3.8500138721679726e-05, + "loss": 2.2998, + "step": 4644000 + }, + { + "epoch": 23.01, + "learning_rate": 3.849890261242649e-05, + "loss": 2.2772, + "step": 4644500 + }, + { + "epoch": 23.01, + "learning_rate": 3.8497664026000405e-05, + "loss": 2.2775, + "step": 4645000 + }, + { + "epoch": 23.02, + "learning_rate": 3.849642543957432e-05, + "loss": 2.2837, + "step": 4645500 + }, + { + "epoch": 23.02, + "learning_rate": 3.849518685314824e-05, + "loss": 2.2925, + "step": 4646000 + }, + { + "epoch": 23.02, + "learning_rate": 3.849395074389501e-05, + "loss": 2.2727, + "step": 4646500 + }, + { + "epoch": 23.02, + "learning_rate": 3.8492712157468925e-05, + "loss": 2.2708, + "step": 4647000 + }, + { + "epoch": 23.03, + "learning_rate": 3.849147357104284e-05, + "loss": 2.2694, + "step": 4647500 + }, + { + "epoch": 23.03, + "learning_rate": 3.849023498461676e-05, + "loss": 2.2986, + "step": 4648000 + }, + { + "epoch": 23.03, + "learning_rate": 3.8488996398190676e-05, + "loss": 2.2675, + "step": 4648500 + }, + { + "epoch": 23.03, + "learning_rate": 3.848775781176459e-05, + "loss": 2.2932, + "step": 4649000 + }, + { + "epoch": 23.04, + "learning_rate": 3.848651922533851e-05, + "loss": 2.2778, + "step": 4649500 + }, + { + "epoch": 23.04, + "learning_rate": 3.8485280638912427e-05, + "loss": 2.2582, + "step": 4650000 + }, + { + "epoch": 23.04, + "learning_rate": 3.848404452965919e-05, + "loss": 2.2853, + "step": 4650500 + }, + { + "epoch": 23.04, + "learning_rate": 3.8482805943233105e-05, + "loss": 2.2806, + "step": 4651000 + }, + { + "epoch": 23.05, + "learning_rate": 3.848156735680702e-05, + "loss": 2.3017, + "step": 4651500 + }, + { + "epoch": 23.05, + "learning_rate": 3.848032877038094e-05, + "loss": 2.2922, + "step": 4652000 + }, + { + "epoch": 23.05, + "learning_rate": 3.8479090183954856e-05, + "loss": 2.2852, + "step": 4652500 + }, + { + "epoch": 23.05, + "learning_rate": 3.847785159752877e-05, + "loss": 2.2834, + "step": 4653000 + }, + { + "epoch": 23.06, + "learning_rate": 3.847661301110269e-05, + "loss": 2.2794, + "step": 4653500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847537690184946e-05, + "loss": 2.2736, + "step": 4654000 + }, + { + "epoch": 23.06, + "learning_rate": 3.8474138315423376e-05, + "loss": 2.2817, + "step": 4654500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847289972899729e-05, + "loss": 2.3046, + "step": 4655000 + }, + { + "epoch": 23.06, + "learning_rate": 3.847166114257121e-05, + "loss": 2.2827, + "step": 4655500 + }, + { + "epoch": 23.07, + "learning_rate": 3.847042255614513e-05, + "loss": 2.3029, + "step": 4656000 + }, + { + "epoch": 23.07, + "learning_rate": 3.8469183969719044e-05, + "loss": 2.2855, + "step": 4656500 + }, + { + "epoch": 23.07, + "learning_rate": 3.846794538329296e-05, + "loss": 2.2797, + "step": 4657000 + }, + { + "epoch": 23.07, + "learning_rate": 3.846670927403972e-05, + "loss": 2.269, + "step": 4657500 + }, + { + "epoch": 23.08, + "learning_rate": 3.846547068761364e-05, + "loss": 2.3017, + "step": 4658000 + }, + { + "epoch": 23.08, + "learning_rate": 3.8464232101187556e-05, + "loss": 2.3036, + "step": 4658500 + }, + { + "epoch": 23.08, + "learning_rate": 3.846299351476147e-05, + "loss": 2.2968, + "step": 4659000 + }, + { + "epoch": 23.08, + "learning_rate": 3.846175492833539e-05, + "loss": 2.2592, + "step": 4659500 + }, + { + "epoch": 23.09, + "learning_rate": 3.846051634190931e-05, + "loss": 2.2923, + "step": 4660000 + }, + { + "epoch": 23.09, + "learning_rate": 3.8459277755483224e-05, + "loss": 2.2877, + "step": 4660500 + }, + { + "epoch": 23.09, + "learning_rate": 3.845804164622999e-05, + "loss": 2.2687, + "step": 4661000 + }, + { + "epoch": 23.09, + "learning_rate": 3.845680305980391e-05, + "loss": 2.2669, + "step": 4661500 + }, + { + "epoch": 23.1, + "learning_rate": 3.845556447337783e-05, + "loss": 2.2845, + "step": 4662000 + }, + { + "epoch": 23.1, + "learning_rate": 3.8454325886951744e-05, + "loss": 2.2866, + "step": 4662500 + }, + { + "epoch": 23.1, + "learning_rate": 3.845308730052566e-05, + "loss": 2.3037, + "step": 4663000 + }, + { + "epoch": 23.1, + "learning_rate": 3.845184871409958e-05, + "loss": 2.2609, + "step": 4663500 + }, + { + "epoch": 23.11, + "learning_rate": 3.8450610127673494e-05, + "loss": 2.2816, + "step": 4664000 + }, + { + "epoch": 23.11, + "learning_rate": 3.844937154124741e-05, + "loss": 2.2791, + "step": 4664500 + }, + { + "epoch": 23.11, + "learning_rate": 3.8448135431994173e-05, + "loss": 2.3103, + "step": 4665000 + }, + { + "epoch": 23.11, + "learning_rate": 3.844689932274094e-05, + "loss": 2.2907, + "step": 4665500 + }, + { + "epoch": 23.12, + "learning_rate": 3.844566073631486e-05, + "loss": 2.296, + "step": 4666000 + }, + { + "epoch": 23.12, + "learning_rate": 3.8444422149888776e-05, + "loss": 2.2811, + "step": 4666500 + }, + { + "epoch": 23.12, + "learning_rate": 3.844318356346269e-05, + "loss": 2.2881, + "step": 4667000 + }, + { + "epoch": 23.12, + "learning_rate": 3.844194497703661e-05, + "loss": 2.2979, + "step": 4667500 + }, + { + "epoch": 23.13, + "learning_rate": 3.844070886778338e-05, + "loss": 2.3004, + "step": 4668000 + }, + { + "epoch": 23.13, + "learning_rate": 3.8439470281357296e-05, + "loss": 2.2805, + "step": 4668500 + }, + { + "epoch": 23.13, + "learning_rate": 3.843823169493121e-05, + "loss": 2.2995, + "step": 4669000 + }, + { + "epoch": 23.13, + "learning_rate": 3.843699310850512e-05, + "loss": 2.2929, + "step": 4669500 + }, + { + "epoch": 23.14, + "learning_rate": 3.843575452207904e-05, + "loss": 2.3062, + "step": 4670000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843451593565296e-05, + "loss": 2.3233, + "step": 4670500 + }, + { + "epoch": 23.14, + "learning_rate": 3.8433277349226874e-05, + "loss": 2.3191, + "step": 4671000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843203876280079e-05, + "loss": 2.2908, + "step": 4671500 + }, + { + "epoch": 23.15, + "learning_rate": 3.843080265354756e-05, + "loss": 2.267, + "step": 4672000 + }, + { + "epoch": 23.15, + "learning_rate": 3.842956902146718e-05, + "loss": 2.2694, + "step": 4672500 + }, + { + "epoch": 23.15, + "learning_rate": 3.84283304350411e-05, + "loss": 2.2964, + "step": 4673000 + }, + { + "epoch": 23.15, + "learning_rate": 3.8427091848615014e-05, + "loss": 2.3116, + "step": 4673500 + }, + { + "epoch": 23.16, + "learning_rate": 3.842585573936178e-05, + "loss": 2.2689, + "step": 4674000 + }, + { + "epoch": 23.16, + "learning_rate": 3.84246171529357e-05, + "loss": 2.2976, + "step": 4674500 + }, + { + "epoch": 23.16, + "learning_rate": 3.8423378566509617e-05, + "loss": 2.3003, + "step": 4675000 + }, + { + "epoch": 23.16, + "learning_rate": 3.8422139980083533e-05, + "loss": 2.3213, + "step": 4675500 + }, + { + "epoch": 23.17, + "learning_rate": 3.842090139365745e-05, + "loss": 2.287, + "step": 4676000 + }, + { + "epoch": 23.17, + "learning_rate": 3.841966280723137e-05, + "loss": 2.303, + "step": 4676500 + }, + { + "epoch": 23.17, + "learning_rate": 3.8418424220805284e-05, + "loss": 2.2827, + "step": 4677000 + }, + { + "epoch": 23.17, + "learning_rate": 3.84171856343792e-05, + "loss": 2.2881, + "step": 4677500 + }, + { + "epoch": 23.18, + "learning_rate": 3.841594704795312e-05, + "loss": 2.3183, + "step": 4678000 + }, + { + "epoch": 23.18, + "learning_rate": 3.8414708461527035e-05, + "loss": 2.3166, + "step": 4678500 + }, + { + "epoch": 23.18, + "learning_rate": 3.841346987510095e-05, + "loss": 2.3065, + "step": 4679000 + }, + { + "epoch": 23.18, + "learning_rate": 3.841223128867486e-05, + "loss": 2.278, + "step": 4679500 + }, + { + "epoch": 23.19, + "learning_rate": 3.841099270224878e-05, + "loss": 2.3138, + "step": 4680000 + }, + { + "epoch": 23.19, + "learning_rate": 3.8409754115822696e-05, + "loss": 2.3044, + "step": 4680500 + }, + { + "epoch": 23.19, + "learning_rate": 3.840851552939661e-05, + "loss": 2.2684, + "step": 4681000 + }, + { + "epoch": 23.19, + "learning_rate": 3.840727694297053e-05, + "loss": 2.293, + "step": 4681500 + }, + { + "epoch": 23.2, + "learning_rate": 3.840603835654445e-05, + "loss": 2.3256, + "step": 4682000 + }, + { + "epoch": 23.2, + "learning_rate": 3.840480224729121e-05, + "loss": 2.2807, + "step": 4682500 + }, + { + "epoch": 23.2, + "learning_rate": 3.8403563660865126e-05, + "loss": 2.2796, + "step": 4683000 + }, + { + "epoch": 23.2, + "learning_rate": 3.840232507443904e-05, + "loss": 2.3154, + "step": 4683500 + }, + { + "epoch": 23.21, + "learning_rate": 3.840108648801296e-05, + "loss": 2.2889, + "step": 4684000 + }, + { + "epoch": 23.21, + "learning_rate": 3.8399847901586876e-05, + "loss": 2.2843, + "step": 4684500 + }, + { + "epoch": 23.21, + "learning_rate": 3.839861179233365e-05, + "loss": 2.2907, + "step": 4685000 + }, + { + "epoch": 23.21, + "learning_rate": 3.839737320590757e-05, + "loss": 2.3155, + "step": 4685500 + }, + { + "epoch": 23.22, + "learning_rate": 3.839613461948148e-05, + "loss": 2.3057, + "step": 4686000 + }, + { + "epoch": 23.22, + "learning_rate": 3.8394896033055396e-05, + "loss": 2.2936, + "step": 4686500 + }, + { + "epoch": 23.22, + "learning_rate": 3.839365744662931e-05, + "loss": 2.3158, + "step": 4687000 + }, + { + "epoch": 23.22, + "learning_rate": 3.839241886020323e-05, + "loss": 2.2809, + "step": 4687500 + }, + { + "epoch": 23.23, + "learning_rate": 3.839118027377715e-05, + "loss": 2.2988, + "step": 4688000 + }, + { + "epoch": 23.23, + "learning_rate": 3.8389941687351064e-05, + "loss": 2.2952, + "step": 4688500 + }, + { + "epoch": 23.23, + "learning_rate": 3.838870310092498e-05, + "loss": 2.2915, + "step": 4689000 + }, + { + "epoch": 23.23, + "learning_rate": 3.83874645144989e-05, + "loss": 2.2737, + "step": 4689500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838622592807281e-05, + "loss": 2.2757, + "step": 4690000 + }, + { + "epoch": 23.24, + "learning_rate": 3.8384989818819577e-05, + "loss": 2.3108, + "step": 4690500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838375370956635e-05, + "loss": 2.3196, + "step": 4691000 + }, + { + "epoch": 23.24, + "learning_rate": 3.838251512314027e-05, + "loss": 2.2907, + "step": 4691500 + }, + { + "epoch": 23.25, + "learning_rate": 3.838127653671418e-05, + "loss": 2.2718, + "step": 4692000 + }, + { + "epoch": 23.25, + "learning_rate": 3.8380037950288096e-05, + "loss": 2.2993, + "step": 4692500 + }, + { + "epoch": 23.25, + "learning_rate": 3.8378801841034865e-05, + "loss": 2.3027, + "step": 4693000 + }, + { + "epoch": 23.25, + "learning_rate": 3.837756325460878e-05, + "loss": 2.29, + "step": 4693500 + }, + { + "epoch": 23.26, + "learning_rate": 3.83763246681827e-05, + "loss": 2.2988, + "step": 4694000 + }, + { + "epoch": 23.26, + "learning_rate": 3.8375086081756616e-05, + "loss": 2.2946, + "step": 4694500 + }, + { + "epoch": 23.26, + "learning_rate": 3.8373847495330526e-05, + "loss": 2.3021, + "step": 4695000 + }, + { + "epoch": 23.26, + "learning_rate": 3.837260890890444e-05, + "loss": 2.2775, + "step": 4695500 + }, + { + "epoch": 23.27, + "learning_rate": 3.837137032247836e-05, + "loss": 2.2802, + "step": 4696000 + }, + { + "epoch": 23.27, + "learning_rate": 3.837013173605228e-05, + "loss": 2.3041, + "step": 4696500 + }, + { + "epoch": 23.27, + "learning_rate": 3.8368893149626194e-05, + "loss": 2.2622, + "step": 4697000 + }, + { + "epoch": 23.27, + "learning_rate": 3.836765704037297e-05, + "loss": 2.305, + "step": 4697500 + }, + { + "epoch": 23.28, + "learning_rate": 3.8366418453946886e-05, + "loss": 2.3045, + "step": 4698000 + }, + { + "epoch": 23.28, + "learning_rate": 3.836518234469365e-05, + "loss": 2.282, + "step": 4698500 + }, + { + "epoch": 23.28, + "learning_rate": 3.8363943758267565e-05, + "loss": 2.2845, + "step": 4699000 + }, + { + "epoch": 23.28, + "learning_rate": 3.836270517184148e-05, + "loss": 2.2755, + "step": 4699500 + }, + { + "epoch": 23.29, + "learning_rate": 3.83614665854154e-05, + "loss": 2.2895, + "step": 4700000 + }, + { + "epoch": 23.29, + "learning_rate": 3.8360227998989316e-05, + "loss": 2.2926, + "step": 4700500 + }, + { + "epoch": 23.29, + "learning_rate": 3.835898941256323e-05, + "loss": 2.2743, + "step": 4701000 + }, + { + "epoch": 23.29, + "learning_rate": 3.835775330331e-05, + "loss": 2.2907, + "step": 4701500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835651471688392e-05, + "loss": 2.2967, + "step": 4702000 + }, + { + "epoch": 23.3, + "learning_rate": 3.8355276130457836e-05, + "loss": 2.3121, + "step": 4702500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835403754403175e-05, + "loss": 2.3018, + "step": 4703000 + }, + { + "epoch": 23.3, + "learning_rate": 3.835279895760567e-05, + "loss": 2.2903, + "step": 4703500 + }, + { + "epoch": 23.31, + "learning_rate": 3.8351560371179586e-05, + "loss": 2.3101, + "step": 4704000 + }, + { + "epoch": 23.31, + "learning_rate": 3.8350321784753496e-05, + "loss": 2.2976, + "step": 4704500 + }, + { + "epoch": 23.31, + "learning_rate": 3.8349085675500265e-05, + "loss": 2.2819, + "step": 4705000 + }, + { + "epoch": 23.31, + "learning_rate": 3.834784708907418e-05, + "loss": 2.2727, + "step": 4705500 + }, + { + "epoch": 23.32, + "learning_rate": 3.834661097982095e-05, + "loss": 2.2911, + "step": 4706000 + }, + { + "epoch": 23.32, + "learning_rate": 3.834537239339487e-05, + "loss": 2.3118, + "step": 4706500 + }, + { + "epoch": 23.32, + "learning_rate": 3.8344133806968785e-05, + "loss": 2.3047, + "step": 4707000 + }, + { + "epoch": 23.32, + "learning_rate": 3.83428952205427e-05, + "loss": 2.3097, + "step": 4707500 + }, + { + "epoch": 23.33, + "learning_rate": 3.834165663411662e-05, + "loss": 2.2727, + "step": 4708000 + }, + { + "epoch": 23.33, + "learning_rate": 3.8340418047690536e-05, + "loss": 2.313, + "step": 4708500 + }, + { + "epoch": 23.33, + "learning_rate": 3.833917946126445e-05, + "loss": 2.295, + "step": 4709000 + }, + { + "epoch": 23.33, + "learning_rate": 3.833794087483837e-05, + "loss": 2.3, + "step": 4709500 + }, + { + "epoch": 23.33, + "learning_rate": 3.8336702288412286e-05, + "loss": 2.287, + "step": 4710000 + }, + { + "epoch": 23.34, + "learning_rate": 3.83354637019862e-05, + "loss": 2.2963, + "step": 4710500 + }, + { + "epoch": 23.34, + "learning_rate": 3.8334227592732965e-05, + "loss": 2.2942, + "step": 4711000 + }, + { + "epoch": 23.34, + "learning_rate": 3.833298900630688e-05, + "loss": 2.2964, + "step": 4711500 + }, + { + "epoch": 23.34, + "learning_rate": 3.83317504198808e-05, + "loss": 2.3011, + "step": 4712000 + }, + { + "epoch": 23.35, + "learning_rate": 3.8330511833454716e-05, + "loss": 2.2922, + "step": 4712500 + }, + { + "epoch": 23.35, + "learning_rate": 3.832927324702863e-05, + "loss": 2.2892, + "step": 4713000 + }, + { + "epoch": 23.35, + "learning_rate": 3.832803466060255e-05, + "loss": 2.3098, + "step": 4713500 + }, + { + "epoch": 23.35, + "learning_rate": 3.832679607417646e-05, + "loss": 2.2946, + "step": 4714000 + }, + { + "epoch": 23.36, + "learning_rate": 3.8325559964923236e-05, + "loss": 2.2958, + "step": 4714500 + }, + { + "epoch": 23.36, + "learning_rate": 3.832432137849715e-05, + "loss": 2.3095, + "step": 4715000 + }, + { + "epoch": 23.36, + "learning_rate": 3.832308279207107e-05, + "loss": 2.2891, + "step": 4715500 + }, + { + "epoch": 23.36, + "learning_rate": 3.8321844205644987e-05, + "loss": 2.2835, + "step": 4716000 + }, + { + "epoch": 23.37, + "learning_rate": 3.8320605619218903e-05, + "loss": 2.3211, + "step": 4716500 + }, + { + "epoch": 23.37, + "learning_rate": 3.831936950996567e-05, + "loss": 2.2844, + "step": 4717000 + }, + { + "epoch": 23.37, + "learning_rate": 3.831813092353958e-05, + "loss": 2.298, + "step": 4717500 + }, + { + "epoch": 23.37, + "learning_rate": 3.83168923371135e-05, + "loss": 2.2937, + "step": 4718000 + }, + { + "epoch": 23.38, + "learning_rate": 3.8315653750687416e-05, + "loss": 2.2833, + "step": 4718500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831441516426133e-05, + "loss": 2.2817, + "step": 4719000 + }, + { + "epoch": 23.38, + "learning_rate": 3.831317657783525e-05, + "loss": 2.2641, + "step": 4719500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831193799140916e-05, + "loss": 2.3006, + "step": 4720000 + }, + { + "epoch": 23.39, + "learning_rate": 3.8310701882155936e-05, + "loss": 2.3117, + "step": 4720500 + }, + { + "epoch": 23.39, + "learning_rate": 3.830946329572985e-05, + "loss": 2.2952, + "step": 4721000 + }, + { + "epoch": 23.39, + "learning_rate": 3.830822470930377e-05, + "loss": 2.2941, + "step": 4721500 + }, + { + "epoch": 23.39, + "learning_rate": 3.830698612287769e-05, + "loss": 2.303, + "step": 4722000 + }, + { + "epoch": 23.4, + "learning_rate": 3.8305747536451604e-05, + "loss": 2.2916, + "step": 4722500 + }, + { + "epoch": 23.4, + "learning_rate": 3.830451142719837e-05, + "loss": 2.2864, + "step": 4723000 + }, + { + "epoch": 23.4, + "learning_rate": 3.830327284077229e-05, + "loss": 2.2994, + "step": 4723500 + }, + { + "epoch": 23.4, + "learning_rate": 3.8302034254346206e-05, + "loss": 2.3089, + "step": 4724000 + }, + { + "epoch": 23.41, + "learning_rate": 3.830079814509297e-05, + "loss": 2.3033, + "step": 4724500 + }, + { + "epoch": 23.41, + "learning_rate": 3.8299559558666885e-05, + "loss": 2.2952, + "step": 4725000 + }, + { + "epoch": 23.41, + "learning_rate": 3.82983209722408e-05, + "loss": 2.3225, + "step": 4725500 + }, + { + "epoch": 23.41, + "learning_rate": 3.829708238581472e-05, + "loss": 2.2881, + "step": 4726000 + }, + { + "epoch": 23.42, + "learning_rate": 3.8295843799388636e-05, + "loss": 2.2867, + "step": 4726500 + }, + { + "epoch": 23.42, + "learning_rate": 3.829460521296255e-05, + "loss": 2.2851, + "step": 4727000 + }, + { + "epoch": 23.42, + "learning_rate": 3.829336662653647e-05, + "loss": 2.2913, + "step": 4727500 + }, + { + "epoch": 23.42, + "learning_rate": 3.829212804011039e-05, + "loss": 2.2991, + "step": 4728000 + }, + { + "epoch": 23.43, + "learning_rate": 3.8290889453684304e-05, + "loss": 2.2934, + "step": 4728500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828965086725822e-05, + "loss": 2.2761, + "step": 4729000 + }, + { + "epoch": 23.43, + "learning_rate": 3.828841228083213e-05, + "loss": 2.3159, + "step": 4729500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828717369440605e-05, + "loss": 2.3028, + "step": 4730000 + }, + { + "epoch": 23.44, + "learning_rate": 3.8285935107979965e-05, + "loss": 2.3088, + "step": 4730500 + }, + { + "epoch": 23.44, + "learning_rate": 3.828469652155388e-05, + "loss": 2.2843, + "step": 4731000 + }, + { + "epoch": 23.44, + "learning_rate": 3.828346041230065e-05, + "loss": 2.3045, + "step": 4731500 + }, + { + "epoch": 23.44, + "learning_rate": 3.828222182587457e-05, + "loss": 2.2686, + "step": 4732000 + }, + { + "epoch": 23.45, + "learning_rate": 3.8280983239448484e-05, + "loss": 2.2848, + "step": 4732500 + }, + { + "epoch": 23.45, + "learning_rate": 3.82797446530224e-05, + "loss": 2.2904, + "step": 4733000 + }, + { + "epoch": 23.45, + "learning_rate": 3.827850854376917e-05, + "loss": 2.3025, + "step": 4733500 + }, + { + "epoch": 23.45, + "learning_rate": 3.827726995734309e-05, + "loss": 2.312, + "step": 4734000 + }, + { + "epoch": 23.46, + "learning_rate": 3.8276031370917004e-05, + "loss": 2.2943, + "step": 4734500 + }, + { + "epoch": 23.46, + "learning_rate": 3.827479526166377e-05, + "loss": 2.2959, + "step": 4735000 + }, + { + "epoch": 23.46, + "learning_rate": 3.8273559152410535e-05, + "loss": 2.284, + "step": 4735500 + }, + { + "epoch": 23.46, + "learning_rate": 3.827232056598445e-05, + "loss": 2.2884, + "step": 4736000 + }, + { + "epoch": 23.47, + "learning_rate": 3.827108197955837e-05, + "loss": 2.3079, + "step": 4736500 + }, + { + "epoch": 23.47, + "learning_rate": 3.8269843393132285e-05, + "loss": 2.2866, + "step": 4737000 + }, + { + "epoch": 23.47, + "learning_rate": 3.82686048067062e-05, + "loss": 2.283, + "step": 4737500 + }, + { + "epoch": 23.47, + "learning_rate": 3.826736622028012e-05, + "loss": 2.3011, + "step": 4738000 + }, + { + "epoch": 23.48, + "learning_rate": 3.8266127633854036e-05, + "loss": 2.2811, + "step": 4738500 + }, + { + "epoch": 23.48, + "learning_rate": 3.826488904742795e-05, + "loss": 2.2937, + "step": 4739000 + }, + { + "epoch": 23.48, + "learning_rate": 3.826365046100187e-05, + "loss": 2.2939, + "step": 4739500 + }, + { + "epoch": 23.48, + "learning_rate": 3.826241187457579e-05, + "loss": 2.3019, + "step": 4740000 + }, + { + "epoch": 23.49, + "learning_rate": 3.8261173288149704e-05, + "loss": 2.3265, + "step": 4740500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825993470172362e-05, + "loss": 2.2916, + "step": 4741000 + }, + { + "epoch": 23.49, + "learning_rate": 3.825869611529754e-05, + "loss": 2.2738, + "step": 4741500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825746000604431e-05, + "loss": 2.2937, + "step": 4742000 + }, + { + "epoch": 23.5, + "learning_rate": 3.8256221419618224e-05, + "loss": 2.2827, + "step": 4742500 + }, + { + "epoch": 23.5, + "learning_rate": 3.825498283319214e-05, + "loss": 2.2835, + "step": 4743000 + }, + { + "epoch": 23.5, + "learning_rate": 3.825374424676606e-05, + "loss": 2.3097, + "step": 4743500 + }, + { + "epoch": 23.5, + "learning_rate": 3.8252505660339974e-05, + "loss": 2.3038, + "step": 4744000 + }, + { + "epoch": 23.51, + "learning_rate": 3.8251267073913884e-05, + "loss": 2.3029, + "step": 4744500 + }, + { + "epoch": 23.51, + "learning_rate": 3.82500284874878e-05, + "loss": 2.327, + "step": 4745000 + }, + { + "epoch": 23.51, + "learning_rate": 3.824878990106172e-05, + "loss": 2.3, + "step": 4745500 + }, + { + "epoch": 23.51, + "learning_rate": 3.8247551314635635e-05, + "loss": 2.2879, + "step": 4746000 + }, + { + "epoch": 23.52, + "learning_rate": 3.8246315205382404e-05, + "loss": 2.2921, + "step": 4746500 + }, + { + "epoch": 23.52, + "learning_rate": 3.824507661895632e-05, + "loss": 2.2748, + "step": 4747000 + }, + { + "epoch": 23.52, + "learning_rate": 3.824383803253024e-05, + "loss": 2.26, + "step": 4747500 + }, + { + "epoch": 23.52, + "learning_rate": 3.8242599446104155e-05, + "loss": 2.3142, + "step": 4748000 + }, + { + "epoch": 23.53, + "learning_rate": 3.8241360859678065e-05, + "loss": 2.2673, + "step": 4748500 + }, + { + "epoch": 23.53, + "learning_rate": 3.824012475042484e-05, + "loss": 2.2915, + "step": 4749000 + }, + { + "epoch": 23.53, + "learning_rate": 3.823888616399876e-05, + "loss": 2.2943, + "step": 4749500 + }, + { + "epoch": 23.53, + "learning_rate": 3.8237647577572674e-05, + "loss": 2.2901, + "step": 4750000 + }, + { + "epoch": 23.54, + "learning_rate": 3.823640899114659e-05, + "loss": 2.3014, + "step": 4750500 + }, + { + "epoch": 23.54, + "learning_rate": 3.8235172881893353e-05, + "loss": 2.3051, + "step": 4751000 + }, + { + "epoch": 23.54, + "learning_rate": 3.823393429546727e-05, + "loss": 2.3029, + "step": 4751500 + }, + { + "epoch": 23.54, + "learning_rate": 3.823269570904119e-05, + "loss": 2.3158, + "step": 4752000 + }, + { + "epoch": 23.55, + "learning_rate": 3.8231459599787956e-05, + "loss": 2.3026, + "step": 4752500 + }, + { + "epoch": 23.55, + "learning_rate": 3.823022101336187e-05, + "loss": 2.2962, + "step": 4753000 + }, + { + "epoch": 23.55, + "learning_rate": 3.822898242693579e-05, + "loss": 2.3032, + "step": 4753500 + }, + { + "epoch": 23.55, + "learning_rate": 3.822774384050971e-05, + "loss": 2.2858, + "step": 4754000 + }, + { + "epoch": 23.56, + "learning_rate": 3.8226505254083624e-05, + "loss": 2.312, + "step": 4754500 + }, + { + "epoch": 23.56, + "learning_rate": 3.8225269144830386e-05, + "loss": 2.3003, + "step": 4755000 + }, + { + "epoch": 23.56, + "learning_rate": 3.82240305584043e-05, + "loss": 2.3145, + "step": 4755500 + }, + { + "epoch": 23.56, + "learning_rate": 3.822279197197822e-05, + "loss": 2.2898, + "step": 4756000 + }, + { + "epoch": 23.57, + "learning_rate": 3.822155338555214e-05, + "loss": 2.2888, + "step": 4756500 + }, + { + "epoch": 23.57, + "learning_rate": 3.8220314799126054e-05, + "loss": 2.3015, + "step": 4757000 + }, + { + "epoch": 23.57, + "learning_rate": 3.821907621269997e-05, + "loss": 2.2855, + "step": 4757500 + }, + { + "epoch": 23.57, + "learning_rate": 3.821784010344674e-05, + "loss": 2.3261, + "step": 4758000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8216601517020656e-05, + "loss": 2.2987, + "step": 4758500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821536293059457e-05, + "loss": 2.2882, + "step": 4759000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8214129298514194e-05, + "loss": 2.3058, + "step": 4759500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821289071208811e-05, + "loss": 2.3102, + "step": 4760000 + }, + { + "epoch": 23.59, + "learning_rate": 3.821165460283488e-05, + "loss": 2.3256, + "step": 4760500 + }, + { + "epoch": 23.59, + "learning_rate": 3.8210416016408796e-05, + "loss": 2.3053, + "step": 4761000 + }, + { + "epoch": 23.59, + "learning_rate": 3.8209177429982713e-05, + "loss": 2.3171, + "step": 4761500 + }, + { + "epoch": 23.59, + "learning_rate": 3.820793884355663e-05, + "loss": 2.3362, + "step": 4762000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820670025713055e-05, + "loss": 2.3121, + "step": 4762500 + }, + { + "epoch": 23.6, + "learning_rate": 3.8205461670704464e-05, + "loss": 2.3041, + "step": 4763000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820422308427838e-05, + "loss": 2.2931, + "step": 4763500 + }, + { + "epoch": 23.6, + "learning_rate": 3.82029844978523e-05, + "loss": 2.3172, + "step": 4764000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820174591142621e-05, + "loss": 2.3078, + "step": 4764500 + }, + { + "epoch": 23.61, + "learning_rate": 3.8200507325000125e-05, + "loss": 2.2917, + "step": 4765000 + }, + { + "epoch": 23.61, + "learning_rate": 3.819926873857404e-05, + "loss": 2.3073, + "step": 4765500 + }, + { + "epoch": 23.61, + "learning_rate": 3.819803015214796e-05, + "loss": 2.2881, + "step": 4766000 + }, + { + "epoch": 23.61, + "learning_rate": 3.8196791565721876e-05, + "loss": 2.335, + "step": 4766500 + }, + { + "epoch": 23.62, + "learning_rate": 3.819555297929579e-05, + "loss": 2.2791, + "step": 4767000 + }, + { + "epoch": 23.62, + "learning_rate": 3.819431439286971e-05, + "loss": 2.2827, + "step": 4767500 + }, + { + "epoch": 23.62, + "learning_rate": 3.819307580644363e-05, + "loss": 2.3115, + "step": 4768000 + }, + { + "epoch": 23.62, + "learning_rate": 3.819183722001754e-05, + "loss": 2.3083, + "step": 4768500 + }, + { + "epoch": 23.63, + "learning_rate": 3.8190598633591454e-05, + "loss": 2.3059, + "step": 4769000 + }, + { + "epoch": 23.63, + "learning_rate": 3.818936252433822e-05, + "loss": 2.2958, + "step": 4769500 + }, + { + "epoch": 23.63, + "learning_rate": 3.8188126415085e-05, + "loss": 2.3038, + "step": 4770000 + }, + { + "epoch": 23.63, + "learning_rate": 3.8186887828658915e-05, + "loss": 2.3175, + "step": 4770500 + }, + { + "epoch": 23.64, + "learning_rate": 3.8185649242232825e-05, + "loss": 2.2936, + "step": 4771000 + }, + { + "epoch": 23.64, + "learning_rate": 3.818441065580674e-05, + "loss": 2.2973, + "step": 4771500 + }, + { + "epoch": 23.64, + "learning_rate": 3.818317206938066e-05, + "loss": 2.2993, + "step": 4772000 + }, + { + "epoch": 23.64, + "learning_rate": 3.8181933482954576e-05, + "loss": 2.3115, + "step": 4772500 + }, + { + "epoch": 23.65, + "learning_rate": 3.818069489652849e-05, + "loss": 2.3108, + "step": 4773000 + }, + { + "epoch": 23.65, + "learning_rate": 3.817945631010241e-05, + "loss": 2.3075, + "step": 4773500 + }, + { + "epoch": 23.65, + "learning_rate": 3.817821772367633e-05, + "loss": 2.2759, + "step": 4774000 + }, + { + "epoch": 23.65, + "learning_rate": 3.817698409159595e-05, + "loss": 2.3221, + "step": 4774500 + }, + { + "epoch": 23.66, + "learning_rate": 3.8175747982342716e-05, + "loss": 2.2834, + "step": 4775000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817450939591663e-05, + "loss": 2.2967, + "step": 4775500 + }, + { + "epoch": 23.66, + "learning_rate": 3.8173270809490543e-05, + "loss": 2.2973, + "step": 4776000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817203222306446e-05, + "loss": 2.2763, + "step": 4776500 + }, + { + "epoch": 23.67, + "learning_rate": 3.817079611381123e-05, + "loss": 2.2766, + "step": 4777000 + }, + { + "epoch": 23.67, + "learning_rate": 3.8169557527385146e-05, + "loss": 2.3263, + "step": 4777500 + }, + { + "epoch": 23.67, + "learning_rate": 3.816831894095906e-05, + "loss": 2.2982, + "step": 4778000 + }, + { + "epoch": 23.67, + "learning_rate": 3.816708035453298e-05, + "loss": 2.3112, + "step": 4778500 + }, + { + "epoch": 23.68, + "learning_rate": 3.81658417681069e-05, + "loss": 2.3014, + "step": 4779000 + }, + { + "epoch": 23.68, + "learning_rate": 3.8164603181680814e-05, + "loss": 2.286, + "step": 4779500 + }, + { + "epoch": 23.68, + "learning_rate": 3.816336459525473e-05, + "loss": 2.3127, + "step": 4780000 + }, + { + "epoch": 23.68, + "learning_rate": 3.816212600882865e-05, + "loss": 2.3024, + "step": 4780500 + }, + { + "epoch": 23.69, + "learning_rate": 3.8160887422402565e-05, + "loss": 2.2597, + "step": 4781000 + }, + { + "epoch": 23.69, + "learning_rate": 3.815964883597648e-05, + "loss": 2.2918, + "step": 4781500 + }, + { + "epoch": 23.69, + "learning_rate": 3.81584102495504e-05, + "loss": 2.2857, + "step": 4782000 + }, + { + "epoch": 23.69, + "learning_rate": 3.8157171663124315e-05, + "loss": 2.3056, + "step": 4782500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815593307669823e-05, + "loss": 2.2823, + "step": 4783000 + }, + { + "epoch": 23.7, + "learning_rate": 3.815469449027214e-05, + "loss": 2.2994, + "step": 4783500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815345590384606e-05, + "loss": 2.2764, + "step": 4784000 + }, + { + "epoch": 23.7, + "learning_rate": 3.8152217317419976e-05, + "loss": 2.2975, + "step": 4784500 + }, + { + "epoch": 23.71, + "learning_rate": 3.815097873099389e-05, + "loss": 2.2998, + "step": 4785000 + }, + { + "epoch": 23.71, + "learning_rate": 3.814974014456781e-05, + "loss": 2.293, + "step": 4785500 + }, + { + "epoch": 23.71, + "learning_rate": 3.814850403531458e-05, + "loss": 2.2987, + "step": 4786000 + }, + { + "epoch": 23.71, + "learning_rate": 3.814726544888849e-05, + "loss": 2.2947, + "step": 4786500 + }, + { + "epoch": 23.72, + "learning_rate": 3.8146026862462406e-05, + "loss": 2.296, + "step": 4787000 + }, + { + "epoch": 23.72, + "learning_rate": 3.814478827603632e-05, + "loss": 2.3184, + "step": 4787500 + }, + { + "epoch": 23.72, + "learning_rate": 3.814354968961024e-05, + "loss": 2.3023, + "step": 4788000 + }, + { + "epoch": 23.72, + "learning_rate": 3.814231110318416e-05, + "loss": 2.2848, + "step": 4788500 + }, + { + "epoch": 23.73, + "learning_rate": 3.8141072516758074e-05, + "loss": 2.2869, + "step": 4789000 + }, + { + "epoch": 23.73, + "learning_rate": 3.813983640750484e-05, + "loss": 2.3372, + "step": 4789500 + }, + { + "epoch": 23.73, + "learning_rate": 3.813859782107876e-05, + "loss": 2.3091, + "step": 4790000 + }, + { + "epoch": 23.73, + "learning_rate": 3.8137359234652676e-05, + "loss": 2.2714, + "step": 4790500 + }, + { + "epoch": 23.74, + "learning_rate": 3.813612064822659e-05, + "loss": 2.2939, + "step": 4791000 + }, + { + "epoch": 23.74, + "learning_rate": 3.813488206180051e-05, + "loss": 2.3059, + "step": 4791500 + }, + { + "epoch": 23.74, + "learning_rate": 3.813364347537443e-05, + "loss": 2.2798, + "step": 4792000 + }, + { + "epoch": 23.74, + "learning_rate": 3.8132407366121196e-05, + "loss": 2.2939, + "step": 4792500 + }, + { + "epoch": 23.75, + "learning_rate": 3.8131168779695106e-05, + "loss": 2.2965, + "step": 4793000 + }, + { + "epoch": 23.75, + "learning_rate": 3.812993019326902e-05, + "loss": 2.2802, + "step": 4793500 + }, + { + "epoch": 23.75, + "learning_rate": 3.81286940840158e-05, + "loss": 2.3213, + "step": 4794000 + }, + { + "epoch": 23.75, + "learning_rate": 3.8127455497589716e-05, + "loss": 2.3187, + "step": 4794500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812621691116363e-05, + "loss": 2.314, + "step": 4795000 + }, + { + "epoch": 23.76, + "learning_rate": 3.812497832473755e-05, + "loss": 2.283, + "step": 4795500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812373973831146e-05, + "loss": 2.3006, + "step": 4796000 + }, + { + "epoch": 23.76, + "learning_rate": 3.812250362905823e-05, + "loss": 2.3053, + "step": 4796500 + }, + { + "epoch": 23.77, + "learning_rate": 3.8121265042632145e-05, + "loss": 2.2898, + "step": 4797000 + }, + { + "epoch": 23.77, + "learning_rate": 3.812002645620606e-05, + "loss": 2.3042, + "step": 4797500 + }, + { + "epoch": 23.77, + "learning_rate": 3.811878786977998e-05, + "loss": 2.3032, + "step": 4798000 + }, + { + "epoch": 23.77, + "learning_rate": 3.8117549283353896e-05, + "loss": 2.2994, + "step": 4798500 + }, + { + "epoch": 23.78, + "learning_rate": 3.8116310696927806e-05, + "loss": 2.3109, + "step": 4799000 + }, + { + "epoch": 23.78, + "learning_rate": 3.811507211050172e-05, + "loss": 2.3052, + "step": 4799500 + }, + { + "epoch": 23.78, + "learning_rate": 3.81138360012485e-05, + "loss": 2.2868, + "step": 4800000 + }, + { + "epoch": 23.78, + "learning_rate": 3.8112597414822416e-05, + "loss": 2.3109, + "step": 4800500 + }, + { + "epoch": 23.79, + "learning_rate": 3.811135882839633e-05, + "loss": 2.3048, + "step": 4801000 + }, + { + "epoch": 23.79, + "learning_rate": 3.811012024197025e-05, + "loss": 2.3028, + "step": 4801500 + }, + { + "epoch": 23.79, + "learning_rate": 3.810888165554416e-05, + "loss": 2.3225, + "step": 4802000 + }, + { + "epoch": 23.79, + "learning_rate": 3.810764306911808e-05, + "loss": 2.2936, + "step": 4802500 + }, + { + "epoch": 23.8, + "learning_rate": 3.8106404482691994e-05, + "loss": 2.3293, + "step": 4803000 + }, + { + "epoch": 23.8, + "learning_rate": 3.810516589626591e-05, + "loss": 2.3076, + "step": 4803500 + }, + { + "epoch": 23.8, + "learning_rate": 3.810392730983983e-05, + "loss": 2.2963, + "step": 4804000 + }, + { + "epoch": 23.8, + "learning_rate": 3.8102688723413744e-05, + "loss": 2.2903, + "step": 4804500 + }, + { + "epoch": 23.81, + "learning_rate": 3.810145261416051e-05, + "loss": 2.3059, + "step": 4805000 + }, + { + "epoch": 23.81, + "learning_rate": 3.810021402773442e-05, + "loss": 2.2973, + "step": 4805500 + }, + { + "epoch": 23.81, + "learning_rate": 3.809897544130834e-05, + "loss": 2.2988, + "step": 4806000 + }, + { + "epoch": 23.81, + "learning_rate": 3.809773685488226e-05, + "loss": 2.2914, + "step": 4806500 + }, + { + "epoch": 23.82, + "learning_rate": 3.8096498268456174e-05, + "loss": 2.3179, + "step": 4807000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809525968203009e-05, + "loss": 2.3134, + "step": 4807500 + }, + { + "epoch": 23.82, + "learning_rate": 3.809402357277687e-05, + "loss": 2.28, + "step": 4808000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809278498635078e-05, + "loss": 2.3142, + "step": 4808500 + }, + { + "epoch": 23.83, + "learning_rate": 3.8091546399924694e-05, + "loss": 2.2888, + "step": 4809000 + }, + { + "epoch": 23.83, + "learning_rate": 3.809030781349861e-05, + "loss": 2.3241, + "step": 4809500 + }, + { + "epoch": 23.83, + "learning_rate": 3.808906922707253e-05, + "loss": 2.3173, + "step": 4810000 + }, + { + "epoch": 23.83, + "learning_rate": 3.8087830640646445e-05, + "loss": 2.2905, + "step": 4810500 + }, + { + "epoch": 23.84, + "learning_rate": 3.8086597008566065e-05, + "loss": 2.3409, + "step": 4811000 + }, + { + "epoch": 23.84, + "learning_rate": 3.808535842213998e-05, + "loss": 2.3207, + "step": 4811500 + }, + { + "epoch": 23.84, + "learning_rate": 3.80841198357139e-05, + "loss": 2.3175, + "step": 4812000 + }, + { + "epoch": 23.84, + "learning_rate": 3.808288372646067e-05, + "loss": 2.3065, + "step": 4812500 + }, + { + "epoch": 23.85, + "learning_rate": 3.8081645140034585e-05, + "loss": 2.2876, + "step": 4813000 + }, + { + "epoch": 23.85, + "learning_rate": 3.80804065536085e-05, + "loss": 2.3141, + "step": 4813500 + }, + { + "epoch": 23.85, + "learning_rate": 3.8079170444355264e-05, + "loss": 2.2991, + "step": 4814000 + }, + { + "epoch": 23.85, + "learning_rate": 3.807793185792918e-05, + "loss": 2.2971, + "step": 4814500 + }, + { + "epoch": 23.86, + "learning_rate": 3.80766932715031e-05, + "loss": 2.2939, + "step": 4815000 + }, + { + "epoch": 23.86, + "learning_rate": 3.8075454685077015e-05, + "loss": 2.3154, + "step": 4815500 + }, + { + "epoch": 23.86, + "learning_rate": 3.807421609865093e-05, + "loss": 2.3036, + "step": 4816000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807297751222485e-05, + "loss": 2.3105, + "step": 4816500 + }, + { + "epoch": 23.87, + "learning_rate": 3.8071738925798765e-05, + "loss": 2.281, + "step": 4817000 + }, + { + "epoch": 23.87, + "learning_rate": 3.807050033937268e-05, + "loss": 2.2819, + "step": 4817500 + }, + { + "epoch": 23.87, + "learning_rate": 3.80692617529466e-05, + "loss": 2.2728, + "step": 4818000 + }, + { + "epoch": 23.87, + "learning_rate": 3.8068023166520516e-05, + "loss": 2.3142, + "step": 4818500 + }, + { + "epoch": 23.87, + "learning_rate": 3.806678458009443e-05, + "loss": 2.3092, + "step": 4819000 + }, + { + "epoch": 23.88, + "learning_rate": 3.806554599366835e-05, + "loss": 2.3183, + "step": 4819500 + }, + { + "epoch": 23.88, + "learning_rate": 3.806430740724227e-05, + "loss": 2.2989, + "step": 4820000 + }, + { + "epoch": 23.88, + "learning_rate": 3.8063071297989036e-05, + "loss": 2.2973, + "step": 4820500 + }, + { + "epoch": 23.88, + "learning_rate": 3.8061837665908656e-05, + "loss": 2.3134, + "step": 4821000 + }, + { + "epoch": 23.89, + "learning_rate": 3.8060599079482567e-05, + "loss": 2.3101, + "step": 4821500 + }, + { + "epoch": 23.89, + "learning_rate": 3.8059360493056483e-05, + "loss": 2.3073, + "step": 4822000 + }, + { + "epoch": 23.89, + "learning_rate": 3.80581219066304e-05, + "loss": 2.2903, + "step": 4822500 + }, + { + "epoch": 23.89, + "learning_rate": 3.805688332020432e-05, + "loss": 2.2952, + "step": 4823000 + }, + { + "epoch": 23.9, + "learning_rate": 3.8055644733778234e-05, + "loss": 2.334, + "step": 4823500 + }, + { + "epoch": 23.9, + "learning_rate": 3.805440614735215e-05, + "loss": 2.3168, + "step": 4824000 + }, + { + "epoch": 23.9, + "learning_rate": 3.805316756092607e-05, + "loss": 2.3011, + "step": 4824500 + }, + { + "epoch": 23.9, + "learning_rate": 3.805193145167284e-05, + "loss": 2.3194, + "step": 4825000 + }, + { + "epoch": 23.91, + "learning_rate": 3.8050692865246754e-05, + "loss": 2.313, + "step": 4825500 + }, + { + "epoch": 23.91, + "learning_rate": 3.804945427882067e-05, + "loss": 2.3036, + "step": 4826000 + }, + { + "epoch": 23.91, + "learning_rate": 3.804821569239458e-05, + "loss": 2.2979, + "step": 4826500 + }, + { + "epoch": 23.91, + "learning_rate": 3.80469771059685e-05, + "loss": 2.2836, + "step": 4827000 + }, + { + "epoch": 23.92, + "learning_rate": 3.8045738519542415e-05, + "loss": 2.3093, + "step": 4827500 + }, + { + "epoch": 23.92, + "learning_rate": 3.8044502410289184e-05, + "loss": 2.2908, + "step": 4828000 + }, + { + "epoch": 23.92, + "learning_rate": 3.80432638238631e-05, + "loss": 2.3154, + "step": 4828500 + }, + { + "epoch": 23.92, + "learning_rate": 3.804202523743702e-05, + "loss": 2.3275, + "step": 4829000 + }, + { + "epoch": 23.93, + "learning_rate": 3.8040786651010934e-05, + "loss": 2.3259, + "step": 4829500 + }, + { + "epoch": 23.93, + "learning_rate": 3.803954806458485e-05, + "loss": 2.3006, + "step": 4830000 + }, + { + "epoch": 23.93, + "learning_rate": 3.803830947815877e-05, + "loss": 2.3199, + "step": 4830500 + }, + { + "epoch": 23.93, + "learning_rate": 3.8037070891732685e-05, + "loss": 2.3036, + "step": 4831000 + }, + { + "epoch": 23.94, + "learning_rate": 3.80358323053066e-05, + "loss": 2.2679, + "step": 4831500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803459619605337e-05, + "loss": 2.3195, + "step": 4832000 + }, + { + "epoch": 23.94, + "learning_rate": 3.803335760962729e-05, + "loss": 2.3192, + "step": 4832500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803212150037406e-05, + "loss": 2.3352, + "step": 4833000 + }, + { + "epoch": 23.95, + "learning_rate": 3.8030885391120825e-05, + "loss": 2.3116, + "step": 4833500 + }, + { + "epoch": 23.95, + "learning_rate": 3.802964680469474e-05, + "loss": 2.3202, + "step": 4834000 + }, + { + "epoch": 23.95, + "learning_rate": 3.802840821826866e-05, + "loss": 2.3153, + "step": 4834500 + }, + { + "epoch": 23.95, + "learning_rate": 3.8027169631842576e-05, + "loss": 2.3291, + "step": 4835000 + }, + { + "epoch": 23.96, + "learning_rate": 3.802593104541649e-05, + "loss": 2.2869, + "step": 4835500 + }, + { + "epoch": 23.96, + "learning_rate": 3.802469245899041e-05, + "loss": 2.3052, + "step": 4836000 + }, + { + "epoch": 23.96, + "learning_rate": 3.802345387256433e-05, + "loss": 2.313, + "step": 4836500 + }, + { + "epoch": 23.96, + "learning_rate": 3.802221776331109e-05, + "loss": 2.2973, + "step": 4837000 + }, + { + "epoch": 23.97, + "learning_rate": 3.8020979176885006e-05, + "loss": 2.3024, + "step": 4837500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801974059045892e-05, + "loss": 2.3348, + "step": 4838000 + }, + { + "epoch": 23.97, + "learning_rate": 3.801850200403284e-05, + "loss": 2.3053, + "step": 4838500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801726341760676e-05, + "loss": 2.3055, + "step": 4839000 + }, + { + "epoch": 23.98, + "learning_rate": 3.8016024831180674e-05, + "loss": 2.3312, + "step": 4839500 + }, + { + "epoch": 23.98, + "learning_rate": 3.801478872192744e-05, + "loss": 2.2953, + "step": 4840000 + }, + { + "epoch": 23.98, + "learning_rate": 3.801355013550136e-05, + "loss": 2.3206, + "step": 4840500 + }, + { + "epoch": 23.98, + "learning_rate": 3.8012311549075276e-05, + "loss": 2.3084, + "step": 4841000 + }, + { + "epoch": 23.99, + "learning_rate": 3.801107296264919e-05, + "loss": 2.2991, + "step": 4841500 + }, + { + "epoch": 23.99, + "learning_rate": 3.800983437622311e-05, + "loss": 2.2901, + "step": 4842000 + }, + { + "epoch": 23.99, + "learning_rate": 3.800859578979703e-05, + "loss": 2.3141, + "step": 4842500 + }, + { + "epoch": 23.99, + "learning_rate": 3.8007357203370944e-05, + "loss": 2.3149, + "step": 4843000 + }, + { + "epoch": 24.0, + "learning_rate": 3.8006118616944854e-05, + "loss": 2.2997, + "step": 4843500 + }, + { + "epoch": 24.0, + "learning_rate": 3.800488003051877e-05, + "loss": 2.3124, + "step": 4844000 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6541513737695038, + "eval_accuracy_mlm": 0.608915092455769, + "eval_accuracy_nsp": 0.8673943653685494, + "eval_loss": 2.3393051624298096, + "eval_runtime": 146.2902, + "eval_samples_per_second": 1742.831, + "eval_steps_per_second": 72.623, + "step": 4844232 + }, + { + "epoch": 24.0, + "learning_rate": 3.800364144409269e-05, + "loss": 2.2969, + "step": 4844500 + }, + { + "epoch": 24.0, + "learning_rate": 3.8002402857666605e-05, + "loss": 2.2555, + "step": 4845000 + }, + { + "epoch": 24.01, + "learning_rate": 3.800116427124052e-05, + "loss": 2.2708, + "step": 4845500 + }, + { + "epoch": 24.01, + "learning_rate": 3.799992568481444e-05, + "loss": 2.2948, + "step": 4846000 + }, + { + "epoch": 24.01, + "learning_rate": 3.7998687098388356e-05, + "loss": 2.277, + "step": 4846500 + }, + { + "epoch": 24.01, + "learning_rate": 3.7997448511962266e-05, + "loss": 2.249, + "step": 4847000 + }, + { + "epoch": 24.02, + "learning_rate": 3.7996212402709035e-05, + "loss": 2.2613, + "step": 4847500 + }, + { + "epoch": 24.02, + "learning_rate": 3.799497629345581e-05, + "loss": 2.2796, + "step": 4848000 + }, + { + "epoch": 24.02, + "learning_rate": 3.799373770702973e-05, + "loss": 2.2798, + "step": 4848500 + }, + { + "epoch": 24.02, + "learning_rate": 3.7992499120603644e-05, + "loss": 2.2457, + "step": 4849000 + }, + { + "epoch": 24.03, + "learning_rate": 3.7991260534177554e-05, + "loss": 2.2718, + "step": 4849500 + }, + { + "epoch": 24.03, + "learning_rate": 3.799002194775147e-05, + "loss": 2.2644, + "step": 4850000 + }, + { + "epoch": 24.03, + "learning_rate": 3.798878336132539e-05, + "loss": 2.282, + "step": 4850500 + }, + { + "epoch": 24.03, + "learning_rate": 3.7987544774899305e-05, + "loss": 2.2948, + "step": 4851000 + }, + { + "epoch": 24.04, + "learning_rate": 3.7986308665646074e-05, + "loss": 2.2776, + "step": 4851500 + }, + { + "epoch": 24.04, + "learning_rate": 3.798507007921999e-05, + "loss": 2.2824, + "step": 4852000 + }, + { + "epoch": 24.04, + "learning_rate": 3.798383149279391e-05, + "loss": 2.2988, + "step": 4852500 + }, + { + "epoch": 24.04, + "learning_rate": 3.798259290636782e-05, + "loss": 2.296, + "step": 4853000 + }, + { + "epoch": 24.05, + "learning_rate": 3.7981356797114594e-05, + "loss": 2.2601, + "step": 4853500 + }, + { + "epoch": 24.05, + "learning_rate": 3.798011821068851e-05, + "loss": 2.2779, + "step": 4854000 + }, + { + "epoch": 24.05, + "learning_rate": 3.797887962426243e-05, + "loss": 2.2901, + "step": 4854500 + }, + { + "epoch": 24.05, + "learning_rate": 3.7977641037836344e-05, + "loss": 2.286, + "step": 4855000 + }, + { + "epoch": 24.06, + "learning_rate": 3.797640245141026e-05, + "loss": 2.2828, + "step": 4855500 + }, + { + "epoch": 24.06, + "learning_rate": 3.797516386498417e-05, + "loss": 2.2735, + "step": 4856000 + }, + { + "epoch": 24.06, + "learning_rate": 3.797392527855809e-05, + "loss": 2.2708, + "step": 4856500 + }, + { + "epoch": 24.06, + "learning_rate": 3.7972686692132005e-05, + "loss": 2.2948, + "step": 4857000 + }, + { + "epoch": 24.07, + "learning_rate": 3.7971450582878774e-05, + "loss": 2.2892, + "step": 4857500 + }, + { + "epoch": 24.07, + "learning_rate": 3.797021199645269e-05, + "loss": 2.2911, + "step": 4858000 + }, + { + "epoch": 24.07, + "learning_rate": 3.796897341002661e-05, + "loss": 2.2749, + "step": 4858500 + }, + { + "epoch": 24.07, + "learning_rate": 3.796773482360052e-05, + "loss": 2.3011, + "step": 4859000 + }, + { + "epoch": 24.08, + "learning_rate": 3.7966496237174435e-05, + "loss": 2.2884, + "step": 4859500 + }, + { + "epoch": 24.08, + "learning_rate": 3.796525765074835e-05, + "loss": 2.2755, + "step": 4860000 + }, + { + "epoch": 24.08, + "learning_rate": 3.796401906432227e-05, + "loss": 2.3002, + "step": 4860500 + }, + { + "epoch": 24.08, + "learning_rate": 3.7962780477896186e-05, + "loss": 2.2786, + "step": 4861000 + }, + { + "epoch": 24.09, + "learning_rate": 3.79615418914701e-05, + "loss": 2.2814, + "step": 4861500 + }, + { + "epoch": 24.09, + "learning_rate": 3.796030330504402e-05, + "loss": 2.2544, + "step": 4862000 + }, + { + "epoch": 24.09, + "learning_rate": 3.795906719579079e-05, + "loss": 2.2841, + "step": 4862500 + }, + { + "epoch": 24.09, + "learning_rate": 3.7957828609364705e-05, + "loss": 2.3162, + "step": 4863000 + }, + { + "epoch": 24.1, + "learning_rate": 3.7956592500111474e-05, + "loss": 2.2993, + "step": 4863500 + }, + { + "epoch": 24.1, + "learning_rate": 3.795535391368539e-05, + "loss": 2.288, + "step": 4864000 + }, + { + "epoch": 24.1, + "learning_rate": 3.795411532725931e-05, + "loss": 2.2672, + "step": 4864500 + }, + { + "epoch": 24.1, + "learning_rate": 3.795287921800608e-05, + "loss": 2.2889, + "step": 4865000 + }, + { + "epoch": 24.11, + "learning_rate": 3.7951640631579994e-05, + "loss": 2.2822, + "step": 4865500 + }, + { + "epoch": 24.11, + "learning_rate": 3.795040204515391e-05, + "loss": 2.271, + "step": 4866000 + }, + { + "epoch": 24.11, + "learning_rate": 3.794916345872783e-05, + "loss": 2.2691, + "step": 4866500 + }, + { + "epoch": 24.11, + "learning_rate": 3.7947924872301745e-05, + "loss": 2.2968, + "step": 4867000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794668876304851e-05, + "loss": 2.2602, + "step": 4867500 + }, + { + "epoch": 24.12, + "learning_rate": 3.7945450176622424e-05, + "loss": 2.2793, + "step": 4868000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794421159019634e-05, + "loss": 2.2726, + "step": 4868500 + }, + { + "epoch": 24.12, + "learning_rate": 3.794297300377026e-05, + "loss": 2.2902, + "step": 4869000 + }, + { + "epoch": 24.13, + "learning_rate": 3.7941734417344174e-05, + "loss": 2.2575, + "step": 4869500 + }, + { + "epoch": 24.13, + "learning_rate": 3.794049583091809e-05, + "loss": 2.2759, + "step": 4870000 + }, + { + "epoch": 24.13, + "learning_rate": 3.793925724449201e-05, + "loss": 2.2869, + "step": 4870500 + }, + { + "epoch": 24.13, + "learning_rate": 3.7938018658065925e-05, + "loss": 2.2826, + "step": 4871000 + }, + { + "epoch": 24.14, + "learning_rate": 3.7936780071639835e-05, + "loss": 2.2913, + "step": 4871500 + }, + { + "epoch": 24.14, + "learning_rate": 3.793554396238661e-05, + "loss": 2.3063, + "step": 4872000 + }, + { + "epoch": 24.14, + "learning_rate": 3.793430537596053e-05, + "loss": 2.2777, + "step": 4872500 + }, + { + "epoch": 24.14, + "learning_rate": 3.7933066789534445e-05, + "loss": 2.2694, + "step": 4873000 + }, + { + "epoch": 24.15, + "learning_rate": 3.793182820310836e-05, + "loss": 2.2867, + "step": 4873500 + }, + { + "epoch": 24.15, + "learning_rate": 3.793058961668228e-05, + "loss": 2.2729, + "step": 4874000 + }, + { + "epoch": 24.15, + "learning_rate": 3.792935350742905e-05, + "loss": 2.2849, + "step": 4874500 + }, + { + "epoch": 24.15, + "learning_rate": 3.792811492100296e-05, + "loss": 2.2694, + "step": 4875000 + }, + { + "epoch": 24.15, + "learning_rate": 3.7926876334576874e-05, + "loss": 2.263, + "step": 4875500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792563774815079e-05, + "loss": 2.2894, + "step": 4876000 + }, + { + "epoch": 24.16, + "learning_rate": 3.792439916172471e-05, + "loss": 2.2946, + "step": 4876500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792316305247148e-05, + "loss": 2.2824, + "step": 4877000 + }, + { + "epoch": 24.16, + "learning_rate": 3.7921924466045394e-05, + "loss": 2.2882, + "step": 4877500 + }, + { + "epoch": 24.17, + "learning_rate": 3.792068587961931e-05, + "loss": 2.2966, + "step": 4878000 + }, + { + "epoch": 24.17, + "learning_rate": 3.791944729319323e-05, + "loss": 2.2884, + "step": 4878500 + }, + { + "epoch": 24.17, + "learning_rate": 3.7918208706767145e-05, + "loss": 2.2832, + "step": 4879000 + }, + { + "epoch": 24.17, + "learning_rate": 3.7916972597513914e-05, + "loss": 2.268, + "step": 4879500 + }, + { + "epoch": 24.18, + "learning_rate": 3.791573401108783e-05, + "loss": 2.2686, + "step": 4880000 + }, + { + "epoch": 24.18, + "learning_rate": 3.791449542466175e-05, + "loss": 2.2891, + "step": 4880500 + }, + { + "epoch": 24.18, + "learning_rate": 3.7913256838235664e-05, + "loss": 2.2771, + "step": 4881000 + }, + { + "epoch": 24.18, + "learning_rate": 3.7912018251809575e-05, + "loss": 2.3122, + "step": 4881500 + }, + { + "epoch": 24.19, + "learning_rate": 3.791077966538349e-05, + "loss": 2.2735, + "step": 4882000 + }, + { + "epoch": 24.19, + "learning_rate": 3.790954107895741e-05, + "loss": 2.2336, + "step": 4882500 + }, + { + "epoch": 24.19, + "learning_rate": 3.7908302492531325e-05, + "loss": 2.2546, + "step": 4883000 + }, + { + "epoch": 24.19, + "learning_rate": 3.7907066383278094e-05, + "loss": 2.2733, + "step": 4883500 + }, + { + "epoch": 24.2, + "learning_rate": 3.790582779685201e-05, + "loss": 2.2813, + "step": 4884000 + }, + { + "epoch": 24.2, + "learning_rate": 3.790459168759878e-05, + "loss": 2.2616, + "step": 4884500 + }, + { + "epoch": 24.2, + "learning_rate": 3.79033531011727e-05, + "loss": 2.2972, + "step": 4885000 + }, + { + "epoch": 24.2, + "learning_rate": 3.7902114514746614e-05, + "loss": 2.2637, + "step": 4885500 + }, + { + "epoch": 24.21, + "learning_rate": 3.790087592832053e-05, + "loss": 2.2805, + "step": 4886000 + }, + { + "epoch": 24.21, + "learning_rate": 3.789963734189445e-05, + "loss": 2.2822, + "step": 4886500 + }, + { + "epoch": 24.21, + "learning_rate": 3.7898398755468365e-05, + "loss": 2.2969, + "step": 4887000 + }, + { + "epoch": 24.21, + "learning_rate": 3.789716016904228e-05, + "loss": 2.2901, + "step": 4887500 + }, + { + "epoch": 24.22, + "learning_rate": 3.78959215826162e-05, + "loss": 2.2994, + "step": 4888000 + }, + { + "epoch": 24.22, + "learning_rate": 3.789468299619011e-05, + "loss": 2.2735, + "step": 4888500 + }, + { + "epoch": 24.22, + "learning_rate": 3.7893444409764025e-05, + "loss": 2.2762, + "step": 4889000 + }, + { + "epoch": 24.22, + "learning_rate": 3.789220582333794e-05, + "loss": 2.3055, + "step": 4889500 + }, + { + "epoch": 24.23, + "learning_rate": 3.789096971408471e-05, + "loss": 2.2829, + "step": 4890000 + }, + { + "epoch": 24.23, + "learning_rate": 3.788973112765863e-05, + "loss": 2.2869, + "step": 4890500 + }, + { + "epoch": 24.23, + "learning_rate": 3.78884950184054e-05, + "loss": 2.284, + "step": 4891000 + }, + { + "epoch": 24.23, + "learning_rate": 3.7887256431979314e-05, + "loss": 2.2953, + "step": 4891500 + }, + { + "epoch": 24.24, + "learning_rate": 3.788601784555323e-05, + "loss": 2.2887, + "step": 4892000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788477925912715e-05, + "loss": 2.2865, + "step": 4892500 + }, + { + "epoch": 24.24, + "learning_rate": 3.7883540672701065e-05, + "loss": 2.2958, + "step": 4893000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788230208627498e-05, + "loss": 2.2692, + "step": 4893500 + }, + { + "epoch": 24.25, + "learning_rate": 3.7881065977021744e-05, + "loss": 2.2848, + "step": 4894000 + }, + { + "epoch": 24.25, + "learning_rate": 3.787982986776851e-05, + "loss": 2.2996, + "step": 4894500 + }, + { + "epoch": 24.25, + "learning_rate": 3.787859128134243e-05, + "loss": 2.2844, + "step": 4895000 + }, + { + "epoch": 24.25, + "learning_rate": 3.7877352694916346e-05, + "loss": 2.2728, + "step": 4895500 + }, + { + "epoch": 24.26, + "learning_rate": 3.7876116585663115e-05, + "loss": 2.2742, + "step": 4896000 + }, + { + "epoch": 24.26, + "learning_rate": 3.787487799923703e-05, + "loss": 2.2734, + "step": 4896500 + }, + { + "epoch": 24.26, + "learning_rate": 3.787363941281095e-05, + "loss": 2.3034, + "step": 4897000 + }, + { + "epoch": 24.26, + "learning_rate": 3.7872400826384866e-05, + "loss": 2.3104, + "step": 4897500 + }, + { + "epoch": 24.27, + "learning_rate": 3.787116223995878e-05, + "loss": 2.2996, + "step": 4898000 + }, + { + "epoch": 24.27, + "learning_rate": 3.786992365353269e-05, + "loss": 2.2914, + "step": 4898500 + }, + { + "epoch": 24.27, + "learning_rate": 3.786868506710661e-05, + "loss": 2.2983, + "step": 4899000 + }, + { + "epoch": 24.27, + "learning_rate": 3.786744648068053e-05, + "loss": 2.3012, + "step": 4899500 + }, + { + "epoch": 24.28, + "learning_rate": 3.78662103714273e-05, + "loss": 2.2913, + "step": 4900000 + }, + { + "epoch": 24.28, + "learning_rate": 3.786497178500121e-05, + "loss": 2.2849, + "step": 4900500 + }, + { + "epoch": 24.28, + "learning_rate": 3.786373319857513e-05, + "loss": 2.2893, + "step": 4901000 + }, + { + "epoch": 24.28, + "learning_rate": 3.7862494612149046e-05, + "loss": 2.2917, + "step": 4901500 + }, + { + "epoch": 24.29, + "learning_rate": 3.786125602572296e-05, + "loss": 2.2711, + "step": 4902000 + }, + { + "epoch": 24.29, + "learning_rate": 3.786001743929688e-05, + "loss": 2.2723, + "step": 4902500 + }, + { + "epoch": 24.29, + "learning_rate": 3.78587788528708e-05, + "loss": 2.2921, + "step": 4903000 + }, + { + "epoch": 24.29, + "learning_rate": 3.7857540266444714e-05, + "loss": 2.2999, + "step": 4903500 + }, + { + "epoch": 24.3, + "learning_rate": 3.785630168001863e-05, + "loss": 2.2939, + "step": 4904000 + }, + { + "epoch": 24.3, + "learning_rate": 3.78550655707654e-05, + "loss": 2.3084, + "step": 4904500 + }, + { + "epoch": 24.3, + "learning_rate": 3.785382698433931e-05, + "loss": 2.311, + "step": 4905000 + }, + { + "epoch": 24.3, + "learning_rate": 3.785258839791323e-05, + "loss": 2.2878, + "step": 4905500 + }, + { + "epoch": 24.31, + "learning_rate": 3.7851349811487144e-05, + "loss": 2.273, + "step": 4906000 + }, + { + "epoch": 24.31, + "learning_rate": 3.785011122506106e-05, + "loss": 2.2828, + "step": 4906500 + }, + { + "epoch": 24.31, + "learning_rate": 3.784887263863498e-05, + "loss": 2.2966, + "step": 4907000 + }, + { + "epoch": 24.31, + "learning_rate": 3.7847634052208895e-05, + "loss": 2.2957, + "step": 4907500 + }, + { + "epoch": 24.32, + "learning_rate": 3.784639546578281e-05, + "loss": 2.2923, + "step": 4908000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784515687935673e-05, + "loss": 2.2919, + "step": 4908500 + }, + { + "epoch": 24.32, + "learning_rate": 3.7843918292930645e-05, + "loss": 2.2741, + "step": 4909000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784267970650456e-05, + "loss": 2.3029, + "step": 4909500 + }, + { + "epoch": 24.33, + "learning_rate": 3.784144112007848e-05, + "loss": 2.2969, + "step": 4910000 + }, + { + "epoch": 24.33, + "learning_rate": 3.784020501082525e-05, + "loss": 2.2778, + "step": 4910500 + }, + { + "epoch": 24.33, + "learning_rate": 3.7838966424399165e-05, + "loss": 2.3072, + "step": 4911000 + }, + { + "epoch": 24.33, + "learning_rate": 3.783772783797308e-05, + "loss": 2.2775, + "step": 4911500 + }, + { + "epoch": 24.34, + "learning_rate": 3.7836489251547e-05, + "loss": 2.2819, + "step": 4912000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783525314229376e-05, + "loss": 2.2748, + "step": 4912500 + }, + { + "epoch": 24.34, + "learning_rate": 3.783401703304053e-05, + "loss": 2.3047, + "step": 4913000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783277844661445e-05, + "loss": 2.2986, + "step": 4913500 + }, + { + "epoch": 24.35, + "learning_rate": 3.7831539860188364e-05, + "loss": 2.3051, + "step": 4914000 + }, + { + "epoch": 24.35, + "learning_rate": 3.783030127376228e-05, + "loss": 2.2865, + "step": 4914500 + }, + { + "epoch": 24.35, + "learning_rate": 3.78290626873362e-05, + "loss": 2.3101, + "step": 4915000 + }, + { + "epoch": 24.35, + "learning_rate": 3.7827824100910114e-05, + "loss": 2.3132, + "step": 4915500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782658551448403e-05, + "loss": 2.2878, + "step": 4916000 + }, + { + "epoch": 24.36, + "learning_rate": 3.78253494052308e-05, + "loss": 2.2744, + "step": 4916500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782411329597757e-05, + "loss": 2.2969, + "step": 4917000 + }, + { + "epoch": 24.36, + "learning_rate": 3.7822874709551486e-05, + "loss": 2.2918, + "step": 4917500 + }, + { + "epoch": 24.37, + "learning_rate": 3.7821638600298255e-05, + "loss": 2.2919, + "step": 4918000 + }, + { + "epoch": 24.37, + "learning_rate": 3.782040001387217e-05, + "loss": 2.3031, + "step": 4918500 + }, + { + "epoch": 24.37, + "learning_rate": 3.781916142744609e-05, + "loss": 2.2955, + "step": 4919000 + }, + { + "epoch": 24.37, + "learning_rate": 3.7817922841020005e-05, + "loss": 2.2644, + "step": 4919500 + }, + { + "epoch": 24.38, + "learning_rate": 3.781668425459392e-05, + "loss": 2.3126, + "step": 4920000 + }, + { + "epoch": 24.38, + "learning_rate": 3.781544566816784e-05, + "loss": 2.2968, + "step": 4920500 + }, + { + "epoch": 24.38, + "learning_rate": 3.7814207081741756e-05, + "loss": 2.2943, + "step": 4921000 + }, + { + "epoch": 24.38, + "learning_rate": 3.781296849531567e-05, + "loss": 2.3009, + "step": 4921500 + }, + { + "epoch": 24.39, + "learning_rate": 3.781172990888958e-05, + "loss": 2.2804, + "step": 4922000 + }, + { + "epoch": 24.39, + "learning_rate": 3.781049379963635e-05, + "loss": 2.2773, + "step": 4922500 + }, + { + "epoch": 24.39, + "learning_rate": 3.780925521321027e-05, + "loss": 2.3084, + "step": 4923000 + }, + { + "epoch": 24.39, + "learning_rate": 3.7808016626784186e-05, + "loss": 2.3082, + "step": 4923500 + }, + { + "epoch": 24.4, + "learning_rate": 3.78067780403581e-05, + "loss": 2.2789, + "step": 4924000 + }, + { + "epoch": 24.4, + "learning_rate": 3.780554193110487e-05, + "loss": 2.2776, + "step": 4924500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780430334467879e-05, + "loss": 2.2606, + "step": 4925000 + }, + { + "epoch": 24.4, + "learning_rate": 3.7803064758252706e-05, + "loss": 2.3084, + "step": 4925500 + }, + { + "epoch": 24.41, + "learning_rate": 3.780182617182662e-05, + "loss": 2.2744, + "step": 4926000 + }, + { + "epoch": 24.41, + "learning_rate": 3.780058758540054e-05, + "loss": 2.2805, + "step": 4926500 + }, + { + "epoch": 24.41, + "learning_rate": 3.7799348998974456e-05, + "loss": 2.2899, + "step": 4927000 + }, + { + "epoch": 24.41, + "learning_rate": 3.779811041254837e-05, + "loss": 2.2927, + "step": 4927500 + }, + { + "epoch": 24.42, + "learning_rate": 3.7796874303295135e-05, + "loss": 2.3032, + "step": 4928000 + }, + { + "epoch": 24.42, + "learning_rate": 3.779563571686905e-05, + "loss": 2.2916, + "step": 4928500 + }, + { + "epoch": 24.42, + "learning_rate": 3.779439713044297e-05, + "loss": 2.284, + "step": 4929000 + }, + { + "epoch": 24.42, + "learning_rate": 3.7793158544016886e-05, + "loss": 2.2702, + "step": 4929500 + }, + { + "epoch": 24.42, + "learning_rate": 3.7791922434763655e-05, + "loss": 2.2966, + "step": 4930000 + }, + { + "epoch": 24.43, + "learning_rate": 3.779068384833757e-05, + "loss": 2.2612, + "step": 4930500 + }, + { + "epoch": 24.43, + "learning_rate": 3.778944526191149e-05, + "loss": 2.304, + "step": 4931000 + }, + { + "epoch": 24.43, + "learning_rate": 3.7788206675485406e-05, + "loss": 2.2901, + "step": 4931500 + }, + { + "epoch": 24.43, + "learning_rate": 3.778696808905932e-05, + "loss": 2.2847, + "step": 4932000 + }, + { + "epoch": 24.44, + "learning_rate": 3.778572950263324e-05, + "loss": 2.2852, + "step": 4932500 + }, + { + "epoch": 24.44, + "learning_rate": 3.7784490916207156e-05, + "loss": 2.2935, + "step": 4933000 + }, + { + "epoch": 24.44, + "learning_rate": 3.7783252329781073e-05, + "loss": 2.2765, + "step": 4933500 + }, + { + "epoch": 24.44, + "learning_rate": 3.778201374335499e-05, + "loss": 2.3069, + "step": 4934000 + }, + { + "epoch": 24.45, + "learning_rate": 3.77807751569289e-05, + "loss": 2.3149, + "step": 4934500 + }, + { + "epoch": 24.45, + "learning_rate": 3.777953657050282e-05, + "loss": 2.3075, + "step": 4935000 + }, + { + "epoch": 24.45, + "learning_rate": 3.7778297984076734e-05, + "loss": 2.2755, + "step": 4935500 + }, + { + "epoch": 24.45, + "learning_rate": 3.777705939765065e-05, + "loss": 2.2864, + "step": 4936000 + }, + { + "epoch": 24.46, + "learning_rate": 3.777582328839742e-05, + "loss": 2.3105, + "step": 4936500 + }, + { + "epoch": 24.46, + "learning_rate": 3.777458470197134e-05, + "loss": 2.2645, + "step": 4937000 + }, + { + "epoch": 24.46, + "learning_rate": 3.7773346115545254e-05, + "loss": 2.2963, + "step": 4937500 + }, + { + "epoch": 24.46, + "learning_rate": 3.7772107529119164e-05, + "loss": 2.3063, + "step": 4938000 + }, + { + "epoch": 24.47, + "learning_rate": 3.777086894269308e-05, + "loss": 2.309, + "step": 4938500 + }, + { + "epoch": 24.47, + "learning_rate": 3.7769630356267e-05, + "loss": 2.309, + "step": 4939000 + }, + { + "epoch": 24.47, + "learning_rate": 3.7768391769840915e-05, + "loss": 2.2765, + "step": 4939500 + }, + { + "epoch": 24.47, + "learning_rate": 3.776715318341483e-05, + "loss": 2.2682, + "step": 4940000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776591459698875e-05, + "loss": 2.2867, + "step": 4940500 + }, + { + "epoch": 24.48, + "learning_rate": 3.7764676010562666e-05, + "loss": 2.2626, + "step": 4941000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776343742413658e-05, + "loss": 2.2847, + "step": 4941500 + }, + { + "epoch": 24.48, + "learning_rate": 3.776220131488335e-05, + "loss": 2.2891, + "step": 4942000 + }, + { + "epoch": 24.49, + "learning_rate": 3.776096272845727e-05, + "loss": 2.2844, + "step": 4942500 + }, + { + "epoch": 24.49, + "learning_rate": 3.7759724142031185e-05, + "loss": 2.2788, + "step": 4943000 + }, + { + "epoch": 24.49, + "learning_rate": 3.7758488032777954e-05, + "loss": 2.2986, + "step": 4943500 + }, + { + "epoch": 24.49, + "learning_rate": 3.7757249446351864e-05, + "loss": 2.2886, + "step": 4944000 + }, + { + "epoch": 24.5, + "learning_rate": 3.775601085992578e-05, + "loss": 2.3065, + "step": 4944500 + }, + { + "epoch": 24.5, + "learning_rate": 3.77547722734997e-05, + "loss": 2.2977, + "step": 4945000 + }, + { + "epoch": 24.5, + "learning_rate": 3.7753536164246474e-05, + "loss": 2.2616, + "step": 4945500 + }, + { + "epoch": 24.5, + "learning_rate": 3.775229757782039e-05, + "loss": 2.2727, + "step": 4946000 + }, + { + "epoch": 24.51, + "learning_rate": 3.775105899139431e-05, + "loss": 2.2846, + "step": 4946500 + }, + { + "epoch": 24.51, + "learning_rate": 3.774982040496822e-05, + "loss": 2.2827, + "step": 4947000 + }, + { + "epoch": 24.51, + "learning_rate": 3.7748581818542135e-05, + "loss": 2.2623, + "step": 4947500 + }, + { + "epoch": 24.51, + "learning_rate": 3.774734323211605e-05, + "loss": 2.3012, + "step": 4948000 + }, + { + "epoch": 24.52, + "learning_rate": 3.774610712286282e-05, + "loss": 2.2657, + "step": 4948500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774486853643674e-05, + "loss": 2.279, + "step": 4949000 + }, + { + "epoch": 24.52, + "learning_rate": 3.7743629950010654e-05, + "loss": 2.3157, + "step": 4949500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774239136358457e-05, + "loss": 2.2974, + "step": 4950000 + }, + { + "epoch": 24.53, + "learning_rate": 3.774115277715848e-05, + "loss": 2.2873, + "step": 4950500 + }, + { + "epoch": 24.53, + "learning_rate": 3.773991666790526e-05, + "loss": 2.2983, + "step": 4951000 + }, + { + "epoch": 24.53, + "learning_rate": 3.7738678081479174e-05, + "loss": 2.2926, + "step": 4951500 + }, + { + "epoch": 24.53, + "learning_rate": 3.773743949505309e-05, + "loss": 2.2734, + "step": 4952000 + }, + { + "epoch": 24.54, + "learning_rate": 3.773620090862701e-05, + "loss": 2.3017, + "step": 4952500 + }, + { + "epoch": 24.54, + "learning_rate": 3.7734962322200925e-05, + "loss": 2.3132, + "step": 4953000 + }, + { + "epoch": 24.54, + "learning_rate": 3.7733723735774835e-05, + "loss": 2.2891, + "step": 4953500 + }, + { + "epoch": 24.54, + "learning_rate": 3.773248514934875e-05, + "loss": 2.2889, + "step": 4954000 + }, + { + "epoch": 24.55, + "learning_rate": 3.773124656292267e-05, + "loss": 2.3073, + "step": 4954500 + }, + { + "epoch": 24.55, + "learning_rate": 3.7730007976496585e-05, + "loss": 2.2684, + "step": 4955000 + }, + { + "epoch": 24.55, + "learning_rate": 3.7728774344416206e-05, + "loss": 2.2905, + "step": 4955500 + }, + { + "epoch": 24.55, + "learning_rate": 3.772753575799012e-05, + "loss": 2.2836, + "step": 4956000 + }, + { + "epoch": 24.56, + "learning_rate": 3.772629717156404e-05, + "loss": 2.2936, + "step": 4956500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772505858513796e-05, + "loss": 2.3092, + "step": 4957000 + }, + { + "epoch": 24.56, + "learning_rate": 3.7723822475884726e-05, + "loss": 2.2971, + "step": 4957500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772258388945864e-05, + "loss": 2.2959, + "step": 4958000 + }, + { + "epoch": 24.57, + "learning_rate": 3.772134530303256e-05, + "loss": 2.2656, + "step": 4958500 + }, + { + "epoch": 24.57, + "learning_rate": 3.7720106716606477e-05, + "loss": 2.2685, + "step": 4959000 + }, + { + "epoch": 24.57, + "learning_rate": 3.7718868130180393e-05, + "loss": 2.2871, + "step": 4959500 + }, + { + "epoch": 24.57, + "learning_rate": 3.7717629543754304e-05, + "loss": 2.2768, + "step": 4960000 + }, + { + "epoch": 24.58, + "learning_rate": 3.771639095732822e-05, + "loss": 2.3032, + "step": 4960500 + }, + { + "epoch": 24.58, + "learning_rate": 3.771515237090214e-05, + "loss": 2.2941, + "step": 4961000 + }, + { + "epoch": 24.58, + "learning_rate": 3.7713913784476054e-05, + "loss": 2.2861, + "step": 4961500 + }, + { + "epoch": 24.58, + "learning_rate": 3.771267519804997e-05, + "loss": 2.2827, + "step": 4962000 + }, + { + "epoch": 24.59, + "learning_rate": 3.771143661162389e-05, + "loss": 2.2945, + "step": 4962500 + }, + { + "epoch": 24.59, + "learning_rate": 3.77101980251978e-05, + "loss": 2.292, + "step": 4963000 + }, + { + "epoch": 24.59, + "learning_rate": 3.7708959438771715e-05, + "loss": 2.3007, + "step": 4963500 + }, + { + "epoch": 24.59, + "learning_rate": 3.770772085234563e-05, + "loss": 2.2972, + "step": 4964000 + }, + { + "epoch": 24.6, + "learning_rate": 3.770648226591955e-05, + "loss": 2.2883, + "step": 4964500 + }, + { + "epoch": 24.6, + "learning_rate": 3.7705246156666325e-05, + "loss": 2.3001, + "step": 4965000 + }, + { + "epoch": 24.6, + "learning_rate": 3.7704010047413094e-05, + "loss": 2.2894, + "step": 4965500 + }, + { + "epoch": 24.6, + "learning_rate": 3.770277146098701e-05, + "loss": 2.2897, + "step": 4966000 + }, + { + "epoch": 24.61, + "learning_rate": 3.770153287456093e-05, + "loss": 2.267, + "step": 4966500 + }, + { + "epoch": 24.61, + "learning_rate": 3.770029428813484e-05, + "loss": 2.2917, + "step": 4967000 + }, + { + "epoch": 24.61, + "learning_rate": 3.7699055701708755e-05, + "loss": 2.3047, + "step": 4967500 + }, + { + "epoch": 24.61, + "learning_rate": 3.769781711528267e-05, + "loss": 2.2945, + "step": 4968000 + }, + { + "epoch": 24.62, + "learning_rate": 3.769657852885659e-05, + "loss": 2.2779, + "step": 4968500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769534241960336e-05, + "loss": 2.2889, + "step": 4969000 + }, + { + "epoch": 24.62, + "learning_rate": 3.7694103833177274e-05, + "loss": 2.2736, + "step": 4969500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769286524675119e-05, + "loss": 2.2839, + "step": 4970000 + }, + { + "epoch": 24.63, + "learning_rate": 3.769162666032511e-05, + "loss": 2.2858, + "step": 4970500 + }, + { + "epoch": 24.63, + "learning_rate": 3.7690388073899025e-05, + "loss": 2.2925, + "step": 4971000 + }, + { + "epoch": 24.63, + "learning_rate": 3.768914948747294e-05, + "loss": 2.2977, + "step": 4971500 + }, + { + "epoch": 24.63, + "learning_rate": 3.768791090104685e-05, + "loss": 2.2755, + "step": 4972000 + }, + { + "epoch": 24.64, + "learning_rate": 3.768667231462077e-05, + "loss": 2.2916, + "step": 4972500 + }, + { + "epoch": 24.64, + "learning_rate": 3.7685433728194686e-05, + "loss": 2.2846, + "step": 4973000 + }, + { + "epoch": 24.64, + "learning_rate": 3.76841951417686e-05, + "loss": 2.3258, + "step": 4973500 + }, + { + "epoch": 24.64, + "learning_rate": 3.768295903251537e-05, + "loss": 2.3021, + "step": 4974000 + }, + { + "epoch": 24.65, + "learning_rate": 3.768172044608929e-05, + "loss": 2.29, + "step": 4974500 + }, + { + "epoch": 24.65, + "learning_rate": 3.7680481859663205e-05, + "loss": 2.2926, + "step": 4975000 + }, + { + "epoch": 24.65, + "learning_rate": 3.7679245750409974e-05, + "loss": 2.3141, + "step": 4975500 + }, + { + "epoch": 24.65, + "learning_rate": 3.767800716398389e-05, + "loss": 2.3305, + "step": 4976000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767676857755781e-05, + "loss": 2.298, + "step": 4976500 + }, + { + "epoch": 24.66, + "learning_rate": 3.7675529991131725e-05, + "loss": 2.2815, + "step": 4977000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767429140470564e-05, + "loss": 2.2933, + "step": 4977500 + }, + { + "epoch": 24.66, + "learning_rate": 3.767305281827956e-05, + "loss": 2.2923, + "step": 4978000 + }, + { + "epoch": 24.67, + "learning_rate": 3.767181423185347e-05, + "loss": 2.2987, + "step": 4978500 + }, + { + "epoch": 24.67, + "learning_rate": 3.7670575645427386e-05, + "loss": 2.2733, + "step": 4979000 + }, + { + "epoch": 24.67, + "learning_rate": 3.76693370590013e-05, + "loss": 2.2781, + "step": 4979500 + }, + { + "epoch": 24.67, + "learning_rate": 3.766809847257522e-05, + "loss": 2.2993, + "step": 4980000 + }, + { + "epoch": 24.68, + "learning_rate": 3.766685988614914e-05, + "loss": 2.2971, + "step": 4980500 + }, + { + "epoch": 24.68, + "learning_rate": 3.7665621299723054e-05, + "loss": 2.2793, + "step": 4981000 + }, + { + "epoch": 24.68, + "learning_rate": 3.766438519046982e-05, + "loss": 2.3047, + "step": 4981500 + }, + { + "epoch": 24.68, + "learning_rate": 3.766314908121659e-05, + "loss": 2.2659, + "step": 4982000 + }, + { + "epoch": 24.69, + "learning_rate": 3.766191049479051e-05, + "loss": 2.2947, + "step": 4982500 + }, + { + "epoch": 24.69, + "learning_rate": 3.7660671908364425e-05, + "loss": 2.2981, + "step": 4983000 + }, + { + "epoch": 24.69, + "learning_rate": 3.765943332193834e-05, + "loss": 2.3044, + "step": 4983500 + }, + { + "epoch": 24.69, + "learning_rate": 3.765819473551226e-05, + "loss": 2.2545, + "step": 4984000 + }, + { + "epoch": 24.69, + "learning_rate": 3.765695614908617e-05, + "loss": 2.2985, + "step": 4984500 + }, + { + "epoch": 24.7, + "learning_rate": 3.7655717562660086e-05, + "loss": 2.2902, + "step": 4985000 + }, + { + "epoch": 24.7, + "learning_rate": 3.765448145340686e-05, + "loss": 2.2621, + "step": 4985500 + }, + { + "epoch": 24.7, + "learning_rate": 3.765324286698078e-05, + "loss": 2.2897, + "step": 4986000 + }, + { + "epoch": 24.7, + "learning_rate": 3.7652004280554696e-05, + "loss": 2.2919, + "step": 4986500 + }, + { + "epoch": 24.71, + "learning_rate": 3.7650765694128606e-05, + "loss": 2.2849, + "step": 4987000 + }, + { + "epoch": 24.71, + "learning_rate": 3.764952710770252e-05, + "loss": 2.2823, + "step": 4987500 + }, + { + "epoch": 24.71, + "learning_rate": 3.764828852127644e-05, + "loss": 2.3095, + "step": 4988000 + }, + { + "epoch": 24.71, + "learning_rate": 3.764705241202321e-05, + "loss": 2.3133, + "step": 4988500 + }, + { + "epoch": 24.72, + "learning_rate": 3.7645813825597125e-05, + "loss": 2.2789, + "step": 4989000 + }, + { + "epoch": 24.72, + "learning_rate": 3.764457523917104e-05, + "loss": 2.2912, + "step": 4989500 + }, + { + "epoch": 24.72, + "learning_rate": 3.764333665274496e-05, + "loss": 2.2882, + "step": 4990000 + }, + { + "epoch": 24.72, + "learning_rate": 3.7642098066318876e-05, + "loss": 2.2744, + "step": 4990500 + }, + { + "epoch": 24.73, + "learning_rate": 3.7640859479892786e-05, + "loss": 2.2954, + "step": 4991000 + }, + { + "epoch": 24.73, + "learning_rate": 3.76396208934667e-05, + "loss": 2.2657, + "step": 4991500 + }, + { + "epoch": 24.73, + "learning_rate": 3.763838230704062e-05, + "loss": 2.2872, + "step": 4992000 + }, + { + "epoch": 24.73, + "learning_rate": 3.763714372061454e-05, + "loss": 2.3018, + "step": 4992500 + }, + { + "epoch": 24.74, + "learning_rate": 3.7635905134188454e-05, + "loss": 2.276, + "step": 4993000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763466654776237e-05, + "loss": 2.2895, + "step": 4993500 + }, + { + "epoch": 24.74, + "learning_rate": 3.763342796133629e-05, + "loss": 2.3095, + "step": 4994000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763219185208306e-05, + "loss": 2.2876, + "step": 4994500 + }, + { + "epoch": 24.75, + "learning_rate": 3.7630955742829825e-05, + "loss": 2.3048, + "step": 4995000 + }, + { + "epoch": 24.75, + "learning_rate": 3.762971715640374e-05, + "loss": 2.296, + "step": 4995500 + }, + { + "epoch": 24.75, + "learning_rate": 3.762847856997766e-05, + "loss": 2.2919, + "step": 4996000 + }, + { + "epoch": 24.75, + "learning_rate": 3.7627239983551576e-05, + "loss": 2.2907, + "step": 4996500 + }, + { + "epoch": 24.76, + "learning_rate": 3.7626001397125486e-05, + "loss": 2.3036, + "step": 4997000 + }, + { + "epoch": 24.76, + "learning_rate": 3.76247628106994e-05, + "loss": 2.2775, + "step": 4997500 + }, + { + "epoch": 24.76, + "learning_rate": 3.762352422427332e-05, + "loss": 2.3023, + "step": 4998000 + }, + { + "epoch": 24.76, + "learning_rate": 3.762228563784724e-05, + "loss": 2.2945, + "step": 4998500 + }, + { + "epoch": 24.77, + "learning_rate": 3.762104952859401e-05, + "loss": 2.2875, + "step": 4999000 + }, + { + "epoch": 24.77, + "learning_rate": 3.761981094216793e-05, + "loss": 2.3163, + "step": 4999500 + }, + { + "epoch": 24.77, + "learning_rate": 3.7618572355741847e-05, + "loss": 2.3003, + "step": 5000000 + }, + { + "epoch": 24.77, + "learning_rate": 3.761733376931576e-05, + "loss": 2.2923, + "step": 5000500 + }, + { + "epoch": 24.78, + "learning_rate": 3.7616095182889674e-05, + "loss": 2.3032, + "step": 5001000 + }, + { + "epoch": 24.78, + "learning_rate": 3.761485659646359e-05, + "loss": 2.2881, + "step": 5001500 + }, + { + "epoch": 24.78, + "learning_rate": 3.761361801003751e-05, + "loss": 2.2852, + "step": 5002000 + }, + { + "epoch": 24.78, + "learning_rate": 3.7612379423611424e-05, + "loss": 2.2943, + "step": 5002500 + }, + { + "epoch": 24.79, + "learning_rate": 3.7611140837185335e-05, + "loss": 2.2967, + "step": 5003000 + }, + { + "epoch": 24.79, + "learning_rate": 3.760990225075925e-05, + "loss": 2.3114, + "step": 5003500 + }, + { + "epoch": 24.79, + "learning_rate": 3.760866614150602e-05, + "loss": 2.301, + "step": 5004000 + }, + { + "epoch": 24.79, + "learning_rate": 3.7607430032252796e-05, + "loss": 2.2613, + "step": 5004500 + }, + { + "epoch": 24.8, + "learning_rate": 3.760619144582671e-05, + "loss": 2.2814, + "step": 5005000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760495285940063e-05, + "loss": 2.2881, + "step": 5005500 + }, + { + "epoch": 24.8, + "learning_rate": 3.760371427297455e-05, + "loss": 2.2897, + "step": 5006000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760247568654846e-05, + "loss": 2.2766, + "step": 5006500 + }, + { + "epoch": 24.81, + "learning_rate": 3.7601237100122374e-05, + "loss": 2.2921, + "step": 5007000 + }, + { + "epoch": 24.81, + "learning_rate": 3.759999851369629e-05, + "loss": 2.3077, + "step": 5007500 + }, + { + "epoch": 24.81, + "learning_rate": 3.759875992727021e-05, + "loss": 2.2995, + "step": 5008000 + }, + { + "epoch": 24.81, + "learning_rate": 3.7597521340844125e-05, + "loss": 2.3194, + "step": 5008500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759628275441804e-05, + "loss": 2.2753, + "step": 5009000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759504416799196e-05, + "loss": 2.2873, + "step": 5009500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759380558156587e-05, + "loss": 2.279, + "step": 5010000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759256947231264e-05, + "loss": 2.2831, + "step": 5010500 + }, + { + "epoch": 24.83, + "learning_rate": 3.7591330885886554e-05, + "loss": 2.2996, + "step": 5011000 + }, + { + "epoch": 24.83, + "learning_rate": 3.759009229946047e-05, + "loss": 2.3105, + "step": 5011500 + }, + { + "epoch": 24.83, + "learning_rate": 3.758885371303439e-05, + "loss": 2.3228, + "step": 5012000 + }, + { + "epoch": 24.83, + "learning_rate": 3.7587615126608305e-05, + "loss": 2.3095, + "step": 5012500 + }, + { + "epoch": 24.84, + "learning_rate": 3.7586379017355074e-05, + "loss": 2.2962, + "step": 5013000 + }, + { + "epoch": 24.84, + "learning_rate": 3.758514043092899e-05, + "loss": 2.3076, + "step": 5013500 + }, + { + "epoch": 24.84, + "learning_rate": 3.758390432167576e-05, + "loss": 2.2853, + "step": 5014000 + }, + { + "epoch": 24.84, + "learning_rate": 3.7582665735249677e-05, + "loss": 2.2836, + "step": 5014500 + }, + { + "epoch": 24.85, + "learning_rate": 3.7581427148823594e-05, + "loss": 2.2983, + "step": 5015000 + }, + { + "epoch": 24.85, + "learning_rate": 3.758018856239751e-05, + "loss": 2.2958, + "step": 5015500 + }, + { + "epoch": 24.85, + "learning_rate": 3.757894997597142e-05, + "loss": 2.312, + "step": 5016000 + }, + { + "epoch": 24.85, + "learning_rate": 3.757771138954534e-05, + "loss": 2.2927, + "step": 5016500 + }, + { + "epoch": 24.86, + "learning_rate": 3.7576472803119254e-05, + "loss": 2.3107, + "step": 5017000 + }, + { + "epoch": 24.86, + "learning_rate": 3.757523421669317e-05, + "loss": 2.2668, + "step": 5017500 + }, + { + "epoch": 24.86, + "learning_rate": 3.757399563026709e-05, + "loss": 2.29, + "step": 5018000 + }, + { + "epoch": 24.86, + "learning_rate": 3.7572757043841005e-05, + "loss": 2.2804, + "step": 5018500 + }, + { + "epoch": 24.87, + "learning_rate": 3.757151845741492e-05, + "loss": 2.301, + "step": 5019000 + }, + { + "epoch": 24.87, + "learning_rate": 3.757027987098884e-05, + "loss": 2.2906, + "step": 5019500 + }, + { + "epoch": 24.87, + "learning_rate": 3.756904376173561e-05, + "loss": 2.264, + "step": 5020000 + }, + { + "epoch": 24.87, + "learning_rate": 3.756780765248238e-05, + "loss": 2.3048, + "step": 5020500 + }, + { + "epoch": 24.88, + "learning_rate": 3.7566571543229146e-05, + "loss": 2.2908, + "step": 5021000 + }, + { + "epoch": 24.88, + "learning_rate": 3.756533295680306e-05, + "loss": 2.31, + "step": 5021500 + }, + { + "epoch": 24.88, + "learning_rate": 3.756409437037698e-05, + "loss": 2.2986, + "step": 5022000 + }, + { + "epoch": 24.88, + "learning_rate": 3.7562855783950896e-05, + "loss": 2.2729, + "step": 5022500 + }, + { + "epoch": 24.89, + "learning_rate": 3.756161719752481e-05, + "loss": 2.2964, + "step": 5023000 + }, + { + "epoch": 24.89, + "learning_rate": 3.756038108827158e-05, + "loss": 2.2741, + "step": 5023500 + }, + { + "epoch": 24.89, + "learning_rate": 3.75591425018455e-05, + "loss": 2.315, + "step": 5024000 + }, + { + "epoch": 24.89, + "learning_rate": 3.755790391541941e-05, + "loss": 2.2977, + "step": 5024500 + }, + { + "epoch": 24.9, + "learning_rate": 3.7556665328993326e-05, + "loss": 2.292, + "step": 5025000 + }, + { + "epoch": 24.9, + "learning_rate": 3.755542674256724e-05, + "loss": 2.285, + "step": 5025500 + }, + { + "epoch": 24.9, + "learning_rate": 3.755418815614116e-05, + "loss": 2.3066, + "step": 5026000 + }, + { + "epoch": 24.9, + "learning_rate": 3.755295204688793e-05, + "loss": 2.2951, + "step": 5026500 + }, + { + "epoch": 24.91, + "learning_rate": 3.7551713460461846e-05, + "loss": 2.2866, + "step": 5027000 + }, + { + "epoch": 24.91, + "learning_rate": 3.755047487403576e-05, + "loss": 2.2931, + "step": 5027500 + }, + { + "epoch": 24.91, + "learning_rate": 3.754923628760968e-05, + "loss": 2.2865, + "step": 5028000 + }, + { + "epoch": 24.91, + "learning_rate": 3.7547997701183596e-05, + "loss": 2.2993, + "step": 5028500 + }, + { + "epoch": 24.92, + "learning_rate": 3.7546761591930365e-05, + "loss": 2.3104, + "step": 5029000 + }, + { + "epoch": 24.92, + "learning_rate": 3.754552300550428e-05, + "loss": 2.2981, + "step": 5029500 + }, + { + "epoch": 24.92, + "learning_rate": 3.75442844190782e-05, + "loss": 2.3134, + "step": 5030000 + }, + { + "epoch": 24.92, + "learning_rate": 3.7543045832652116e-05, + "loss": 2.3015, + "step": 5030500 + }, + { + "epoch": 24.93, + "learning_rate": 3.7541807246226026e-05, + "loss": 2.2976, + "step": 5031000 + }, + { + "epoch": 24.93, + "learning_rate": 3.754056865979994e-05, + "loss": 2.3091, + "step": 5031500 + }, + { + "epoch": 24.93, + "learning_rate": 3.753933007337386e-05, + "loss": 2.2988, + "step": 5032000 + }, + { + "epoch": 24.93, + "learning_rate": 3.753809396412063e-05, + "loss": 2.2839, + "step": 5032500 + }, + { + "epoch": 24.94, + "learning_rate": 3.75368578548674e-05, + "loss": 2.297, + "step": 5033000 + }, + { + "epoch": 24.94, + "learning_rate": 3.7535619268441315e-05, + "loss": 2.2998, + "step": 5033500 + }, + { + "epoch": 24.94, + "learning_rate": 3.753438068201523e-05, + "loss": 2.2997, + "step": 5034000 + }, + { + "epoch": 24.94, + "learning_rate": 3.753314209558915e-05, + "loss": 2.2748, + "step": 5034500 + }, + { + "epoch": 24.95, + "learning_rate": 3.7531903509163065e-05, + "loss": 2.3065, + "step": 5035000 + }, + { + "epoch": 24.95, + "learning_rate": 3.753066492273698e-05, + "loss": 2.3051, + "step": 5035500 + }, + { + "epoch": 24.95, + "learning_rate": 3.75294263363109e-05, + "loss": 2.2992, + "step": 5036000 + }, + { + "epoch": 24.95, + "learning_rate": 3.7528187749884816e-05, + "loss": 2.3059, + "step": 5036500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752694916345873e-05, + "loss": 2.2977, + "step": 5037000 + }, + { + "epoch": 24.96, + "learning_rate": 3.752571057703265e-05, + "loss": 2.289, + "step": 5037500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752447199060656e-05, + "loss": 2.2699, + "step": 5038000 + }, + { + "epoch": 24.96, + "learning_rate": 3.752323340418048e-05, + "loss": 2.2974, + "step": 5038500 + }, + { + "epoch": 24.96, + "learning_rate": 3.7521994817754394e-05, + "loss": 2.2897, + "step": 5039000 + }, + { + "epoch": 24.97, + "learning_rate": 3.752075623132831e-05, + "loss": 2.3178, + "step": 5039500 + }, + { + "epoch": 24.97, + "learning_rate": 3.751951764490223e-05, + "loss": 2.3062, + "step": 5040000 + }, + { + "epoch": 24.97, + "learning_rate": 3.7518279058476145e-05, + "loss": 2.3189, + "step": 5040500 + }, + { + "epoch": 24.97, + "learning_rate": 3.7517040472050055e-05, + "loss": 2.299, + "step": 5041000 + }, + { + "epoch": 24.98, + "learning_rate": 3.751580188562397e-05, + "loss": 2.3129, + "step": 5041500 + }, + { + "epoch": 24.98, + "learning_rate": 3.751456329919789e-05, + "loss": 2.3282, + "step": 5042000 + }, + { + "epoch": 24.98, + "learning_rate": 3.7513324712771806e-05, + "loss": 2.2736, + "step": 5042500 + }, + { + "epoch": 24.98, + "learning_rate": 3.751208612634572e-05, + "loss": 2.3151, + "step": 5043000 + }, + { + "epoch": 24.99, + "learning_rate": 3.751085249426535e-05, + "loss": 2.2894, + "step": 5043500 + }, + { + "epoch": 24.99, + "learning_rate": 3.750961390783927e-05, + "loss": 2.3101, + "step": 5044000 + }, + { + "epoch": 24.99, + "learning_rate": 3.750837532141318e-05, + "loss": 2.3174, + "step": 5044500 + }, + { + "epoch": 24.99, + "learning_rate": 3.7507136734987094e-05, + "loss": 2.311, + "step": 5045000 + }, + { + "epoch": 25.0, + "learning_rate": 3.750589814856101e-05, + "loss": 2.3085, + "step": 5045500 + }, + { + "epoch": 25.0, + "learning_rate": 3.750465956213493e-05, + "loss": 2.2712, + "step": 5046000 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.654987755169183, + "eval_accuracy_mlm": 0.6101664086430171, + "eval_accuracy_nsp": 0.8666530697092474, + "eval_loss": 2.337043523788452, + "eval_runtime": 145.9167, + "eval_samples_per_second": 1747.291, + "eval_steps_per_second": 72.809, + "step": 5046075 + }, + { + "epoch": 25.0, + "learning_rate": 3.7503420975708845e-05, + "loss": 2.2487, + "step": 5046500 + }, + { + "epoch": 25.0, + "learning_rate": 3.7502184866455614e-05, + "loss": 2.254, + "step": 5047000 + }, + { + "epoch": 25.01, + "learning_rate": 3.750094628002953e-05, + "loss": 2.2641, + "step": 5047500 + }, + { + "epoch": 25.01, + "learning_rate": 3.74997101707763e-05, + "loss": 2.2432, + "step": 5048000 + }, + { + "epoch": 25.01, + "learning_rate": 3.7498471584350216e-05, + "loss": 2.2432, + "step": 5048500 + }, + { + "epoch": 25.01, + "learning_rate": 3.749723299792413e-05, + "loss": 2.2643, + "step": 5049000 + }, + { + "epoch": 25.02, + "learning_rate": 3.749599441149805e-05, + "loss": 2.2568, + "step": 5049500 + }, + { + "epoch": 25.02, + "learning_rate": 3.749475582507197e-05, + "loss": 2.2875, + "step": 5050000 + }, + { + "epoch": 25.02, + "learning_rate": 3.749351971581873e-05, + "loss": 2.2394, + "step": 5050500 + }, + { + "epoch": 25.02, + "learning_rate": 3.74922836065655e-05, + "loss": 2.2771, + "step": 5051000 + }, + { + "epoch": 25.03, + "learning_rate": 3.7491045020139415e-05, + "loss": 2.3011, + "step": 5051500 + }, + { + "epoch": 25.03, + "learning_rate": 3.748980643371333e-05, + "loss": 2.2375, + "step": 5052000 + }, + { + "epoch": 25.03, + "learning_rate": 3.748856784728725e-05, + "loss": 2.2646, + "step": 5052500 + }, + { + "epoch": 25.03, + "learning_rate": 3.7487329260861166e-05, + "loss": 2.2578, + "step": 5053000 + }, + { + "epoch": 25.04, + "learning_rate": 3.748609067443508e-05, + "loss": 2.2628, + "step": 5053500 + }, + { + "epoch": 25.04, + "learning_rate": 3.7484852088009e-05, + "loss": 2.2763, + "step": 5054000 + }, + { + "epoch": 25.04, + "learning_rate": 3.7483613501582917e-05, + "loss": 2.2344, + "step": 5054500 + }, + { + "epoch": 25.04, + "learning_rate": 3.7482374915156833e-05, + "loss": 2.2395, + "step": 5055000 + }, + { + "epoch": 25.05, + "learning_rate": 3.7481138805903595e-05, + "loss": 2.2747, + "step": 5055500 + }, + { + "epoch": 25.05, + "learning_rate": 3.747990021947751e-05, + "loss": 2.2727, + "step": 5056000 + }, + { + "epoch": 25.05, + "learning_rate": 3.747866411022429e-05, + "loss": 2.2932, + "step": 5056500 + }, + { + "epoch": 25.05, + "learning_rate": 3.7477425523798205e-05, + "loss": 2.2713, + "step": 5057000 + }, + { + "epoch": 25.06, + "learning_rate": 3.7476186937372115e-05, + "loss": 2.2643, + "step": 5057500 + }, + { + "epoch": 25.06, + "learning_rate": 3.747494835094603e-05, + "loss": 2.2898, + "step": 5058000 + }, + { + "epoch": 25.06, + "learning_rate": 3.747370976451995e-05, + "loss": 2.2548, + "step": 5058500 + }, + { + "epoch": 25.06, + "learning_rate": 3.7472471178093866e-05, + "loss": 2.2443, + "step": 5059000 + }, + { + "epoch": 25.07, + "learning_rate": 3.747123259166778e-05, + "loss": 2.2565, + "step": 5059500 + }, + { + "epoch": 25.07, + "learning_rate": 3.74699940052417e-05, + "loss": 2.2639, + "step": 5060000 + }, + { + "epoch": 25.07, + "learning_rate": 3.746875541881562e-05, + "loss": 2.266, + "step": 5060500 + }, + { + "epoch": 25.07, + "learning_rate": 3.7467516832389534e-05, + "loss": 2.2691, + "step": 5061000 + }, + { + "epoch": 25.08, + "learning_rate": 3.746627824596345e-05, + "loss": 2.2544, + "step": 5061500 + }, + { + "epoch": 25.08, + "learning_rate": 3.746504213671021e-05, + "loss": 2.2926, + "step": 5062000 + }, + { + "epoch": 25.08, + "learning_rate": 3.746380355028413e-05, + "loss": 2.2788, + "step": 5062500 + }, + { + "epoch": 25.08, + "learning_rate": 3.7462564963858046e-05, + "loss": 2.2783, + "step": 5063000 + }, + { + "epoch": 25.09, + "learning_rate": 3.7461328854604815e-05, + "loss": 2.279, + "step": 5063500 + }, + { + "epoch": 25.09, + "learning_rate": 3.746009274535159e-05, + "loss": 2.3008, + "step": 5064000 + }, + { + "epoch": 25.09, + "learning_rate": 3.745885415892551e-05, + "loss": 2.2739, + "step": 5064500 + }, + { + "epoch": 25.09, + "learning_rate": 3.7457615572499425e-05, + "loss": 2.2622, + "step": 5065000 + }, + { + "epoch": 25.1, + "learning_rate": 3.745637698607334e-05, + "loss": 2.2654, + "step": 5065500 + }, + { + "epoch": 25.1, + "learning_rate": 3.745513839964725e-05, + "loss": 2.2805, + "step": 5066000 + }, + { + "epoch": 25.1, + "learning_rate": 3.745389981322117e-05, + "loss": 2.2743, + "step": 5066500 + }, + { + "epoch": 25.1, + "learning_rate": 3.7452661226795086e-05, + "loss": 2.2902, + "step": 5067000 + }, + { + "epoch": 25.11, + "learning_rate": 3.7451422640369e-05, + "loss": 2.2832, + "step": 5067500 + }, + { + "epoch": 25.11, + "learning_rate": 3.745018405394292e-05, + "loss": 2.2714, + "step": 5068000 + }, + { + "epoch": 25.11, + "learning_rate": 3.744894546751683e-05, + "loss": 2.2632, + "step": 5068500 + }, + { + "epoch": 25.11, + "learning_rate": 3.7447706881090747e-05, + "loss": 2.2622, + "step": 5069000 + }, + { + "epoch": 25.12, + "learning_rate": 3.7446468294664663e-05, + "loss": 2.2784, + "step": 5069500 + }, + { + "epoch": 25.12, + "learning_rate": 3.744522970823858e-05, + "loss": 2.257, + "step": 5070000 + }, + { + "epoch": 25.12, + "learning_rate": 3.744399607615821e-05, + "loss": 2.2936, + "step": 5070500 + }, + { + "epoch": 25.12, + "learning_rate": 3.7442757489732125e-05, + "loss": 2.2487, + "step": 5071000 + }, + { + "epoch": 25.13, + "learning_rate": 3.744151890330604e-05, + "loss": 2.281, + "step": 5071500 + }, + { + "epoch": 25.13, + "learning_rate": 3.744028031687996e-05, + "loss": 2.2606, + "step": 5072000 + }, + { + "epoch": 25.13, + "learning_rate": 3.743904173045387e-05, + "loss": 2.2643, + "step": 5072500 + }, + { + "epoch": 25.13, + "learning_rate": 3.7437803144027786e-05, + "loss": 2.2806, + "step": 5073000 + }, + { + "epoch": 25.14, + "learning_rate": 3.74365645576017e-05, + "loss": 2.2664, + "step": 5073500 + }, + { + "epoch": 25.14, + "learning_rate": 3.743532844834847e-05, + "loss": 2.261, + "step": 5074000 + }, + { + "epoch": 25.14, + "learning_rate": 3.743408986192239e-05, + "loss": 2.2754, + "step": 5074500 + }, + { + "epoch": 25.14, + "learning_rate": 3.743285375266916e-05, + "loss": 2.2802, + "step": 5075000 + }, + { + "epoch": 25.15, + "learning_rate": 3.7431615166243074e-05, + "loss": 2.2682, + "step": 5075500 + }, + { + "epoch": 25.15, + "learning_rate": 3.743037657981699e-05, + "loss": 2.2538, + "step": 5076000 + }, + { + "epoch": 25.15, + "learning_rate": 3.742913799339091e-05, + "loss": 2.2676, + "step": 5076500 + }, + { + "epoch": 25.15, + "learning_rate": 3.7427899406964825e-05, + "loss": 2.291, + "step": 5077000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742666082053874e-05, + "loss": 2.2751, + "step": 5077500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742542223411266e-05, + "loss": 2.2786, + "step": 5078000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742418612485942e-05, + "loss": 2.2768, + "step": 5078500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742294753843334e-05, + "loss": 2.2702, + "step": 5079000 + }, + { + "epoch": 25.17, + "learning_rate": 3.7421708952007255e-05, + "loss": 2.2698, + "step": 5079500 + }, + { + "epoch": 25.17, + "learning_rate": 3.7420472842754023e-05, + "loss": 2.2873, + "step": 5080000 + }, + { + "epoch": 25.17, + "learning_rate": 3.741923425632794e-05, + "loss": 2.2968, + "step": 5080500 + }, + { + "epoch": 25.17, + "learning_rate": 3.741799566990186e-05, + "loss": 2.2721, + "step": 5081000 + }, + { + "epoch": 25.18, + "learning_rate": 3.7416757083475774e-05, + "loss": 2.2607, + "step": 5081500 + }, + { + "epoch": 25.18, + "learning_rate": 3.741551849704969e-05, + "loss": 2.2801, + "step": 5082000 + }, + { + "epoch": 25.18, + "learning_rate": 3.741427991062361e-05, + "loss": 2.2657, + "step": 5082500 + }, + { + "epoch": 25.18, + "learning_rate": 3.7413041324197525e-05, + "loss": 2.2593, + "step": 5083000 + }, + { + "epoch": 25.19, + "learning_rate": 3.741180273777144e-05, + "loss": 2.2759, + "step": 5083500 + }, + { + "epoch": 25.19, + "learning_rate": 3.741056415134536e-05, + "loss": 2.2836, + "step": 5084000 + }, + { + "epoch": 25.19, + "learning_rate": 3.7409325564919276e-05, + "loss": 2.2825, + "step": 5084500 + }, + { + "epoch": 25.19, + "learning_rate": 3.740808697849319e-05, + "loss": 2.2724, + "step": 5085000 + }, + { + "epoch": 25.2, + "learning_rate": 3.74068483920671e-05, + "loss": 2.268, + "step": 5085500 + }, + { + "epoch": 25.2, + "learning_rate": 3.740560980564102e-05, + "loss": 2.2915, + "step": 5086000 + }, + { + "epoch": 25.2, + "learning_rate": 3.740437121921494e-05, + "loss": 2.2578, + "step": 5086500 + }, + { + "epoch": 25.2, + "learning_rate": 3.7403132632788854e-05, + "loss": 2.2763, + "step": 5087000 + }, + { + "epoch": 25.21, + "learning_rate": 3.740189404636277e-05, + "loss": 2.2847, + "step": 5087500 + }, + { + "epoch": 25.21, + "learning_rate": 3.740065545993669e-05, + "loss": 2.2892, + "step": 5088000 + }, + { + "epoch": 25.21, + "learning_rate": 3.73994168735106e-05, + "loss": 2.2638, + "step": 5088500 + }, + { + "epoch": 25.21, + "learning_rate": 3.7398178287084515e-05, + "loss": 2.2464, + "step": 5089000 + }, + { + "epoch": 25.22, + "learning_rate": 3.739693970065843e-05, + "loss": 2.2865, + "step": 5089500 + }, + { + "epoch": 25.22, + "learning_rate": 3.739570111423235e-05, + "loss": 2.2407, + "step": 5090000 + }, + { + "epoch": 25.22, + "learning_rate": 3.7394462527806265e-05, + "loss": 2.2847, + "step": 5090500 + }, + { + "epoch": 25.22, + "learning_rate": 3.739322394138018e-05, + "loss": 2.2936, + "step": 5091000 + }, + { + "epoch": 25.23, + "learning_rate": 3.73919853549541e-05, + "loss": 2.2751, + "step": 5091500 + }, + { + "epoch": 25.23, + "learning_rate": 3.7390746768528016e-05, + "loss": 2.2734, + "step": 5092000 + }, + { + "epoch": 25.23, + "learning_rate": 3.738951313644764e-05, + "loss": 2.2856, + "step": 5092500 + }, + { + "epoch": 25.23, + "learning_rate": 3.7388274550021554e-05, + "loss": 2.2731, + "step": 5093000 + }, + { + "epoch": 25.23, + "learning_rate": 3.738703596359547e-05, + "loss": 2.2916, + "step": 5093500 + }, + { + "epoch": 25.24, + "learning_rate": 3.738579737716939e-05, + "loss": 2.2879, + "step": 5094000 + }, + { + "epoch": 25.24, + "learning_rate": 3.7384561267916156e-05, + "loss": 2.2828, + "step": 5094500 + }, + { + "epoch": 25.24, + "learning_rate": 3.7383325158662925e-05, + "loss": 2.2666, + "step": 5095000 + }, + { + "epoch": 25.24, + "learning_rate": 3.738208657223684e-05, + "loss": 2.2738, + "step": 5095500 + }, + { + "epoch": 25.25, + "learning_rate": 3.738084798581076e-05, + "loss": 2.2947, + "step": 5096000 + }, + { + "epoch": 25.25, + "learning_rate": 3.7379609399384676e-05, + "loss": 2.2574, + "step": 5096500 + }, + { + "epoch": 25.25, + "learning_rate": 3.737837081295859e-05, + "loss": 2.2668, + "step": 5097000 + }, + { + "epoch": 25.25, + "learning_rate": 3.737713222653251e-05, + "loss": 2.2884, + "step": 5097500 + }, + { + "epoch": 25.26, + "learning_rate": 3.737589364010642e-05, + "loss": 2.2407, + "step": 5098000 + }, + { + "epoch": 25.26, + "learning_rate": 3.737465505368034e-05, + "loss": 2.2656, + "step": 5098500 + }, + { + "epoch": 25.26, + "learning_rate": 3.7373416467254254e-05, + "loss": 2.2799, + "step": 5099000 + }, + { + "epoch": 25.26, + "learning_rate": 3.737217788082817e-05, + "loss": 2.2688, + "step": 5099500 + }, + { + "epoch": 25.27, + "learning_rate": 3.737093929440209e-05, + "loss": 2.284, + "step": 5100000 + }, + { + "epoch": 25.27, + "learning_rate": 3.7369703185148857e-05, + "loss": 2.2631, + "step": 5100500 + }, + { + "epoch": 25.27, + "learning_rate": 3.736846459872277e-05, + "loss": 2.3051, + "step": 5101000 + }, + { + "epoch": 25.27, + "learning_rate": 3.7367226012296684e-05, + "loss": 2.2549, + "step": 5101500 + }, + { + "epoch": 25.28, + "learning_rate": 3.73659874258706e-05, + "loss": 2.2622, + "step": 5102000 + }, + { + "epoch": 25.28, + "learning_rate": 3.736474883944452e-05, + "loss": 2.2823, + "step": 5102500 + }, + { + "epoch": 25.28, + "learning_rate": 3.7363510253018434e-05, + "loss": 2.2786, + "step": 5103000 + }, + { + "epoch": 25.28, + "learning_rate": 3.736227166659235e-05, + "loss": 2.3078, + "step": 5103500 + }, + { + "epoch": 25.29, + "learning_rate": 3.736103803451197e-05, + "loss": 2.3087, + "step": 5104000 + }, + { + "epoch": 25.29, + "learning_rate": 3.735980192525874e-05, + "loss": 2.2929, + "step": 5104500 + }, + { + "epoch": 25.29, + "learning_rate": 3.735856333883266e-05, + "loss": 2.2852, + "step": 5105000 + }, + { + "epoch": 25.29, + "learning_rate": 3.7357324752406575e-05, + "loss": 2.2776, + "step": 5105500 + }, + { + "epoch": 25.3, + "learning_rate": 3.735608616598049e-05, + "loss": 2.2995, + "step": 5106000 + }, + { + "epoch": 25.3, + "learning_rate": 3.735484757955441e-05, + "loss": 2.2672, + "step": 5106500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7353608993128326e-05, + "loss": 2.2895, + "step": 5107000 + }, + { + "epoch": 25.3, + "learning_rate": 3.735237040670224e-05, + "loss": 2.277, + "step": 5107500 + }, + { + "epoch": 25.31, + "learning_rate": 3.735113182027616e-05, + "loss": 2.275, + "step": 5108000 + }, + { + "epoch": 25.31, + "learning_rate": 3.7349893233850076e-05, + "loss": 2.2546, + "step": 5108500 + }, + { + "epoch": 25.31, + "learning_rate": 3.734865464742399e-05, + "loss": 2.295, + "step": 5109000 + }, + { + "epoch": 25.31, + "learning_rate": 3.7347418538170755e-05, + "loss": 2.2752, + "step": 5109500 + }, + { + "epoch": 25.32, + "learning_rate": 3.734617995174467e-05, + "loss": 2.2777, + "step": 5110000 + }, + { + "epoch": 25.32, + "learning_rate": 3.734494136531859e-05, + "loss": 2.2843, + "step": 5110500 + }, + { + "epoch": 25.32, + "learning_rate": 3.7343702778892506e-05, + "loss": 2.2714, + "step": 5111000 + }, + { + "epoch": 25.32, + "learning_rate": 3.734246419246642e-05, + "loss": 2.2732, + "step": 5111500 + }, + { + "epoch": 25.33, + "learning_rate": 3.734122560604034e-05, + "loss": 2.3063, + "step": 5112000 + }, + { + "epoch": 25.33, + "learning_rate": 3.733998701961426e-05, + "loss": 2.2975, + "step": 5112500 + }, + { + "epoch": 25.33, + "learning_rate": 3.7338748433188174e-05, + "loss": 2.291, + "step": 5113000 + }, + { + "epoch": 25.33, + "learning_rate": 3.7337509846762084e-05, + "loss": 2.2564, + "step": 5113500 + }, + { + "epoch": 25.34, + "learning_rate": 3.7336271260336e-05, + "loss": 2.2872, + "step": 5114000 + }, + { + "epoch": 25.34, + "learning_rate": 3.733503267390992e-05, + "loss": 2.2867, + "step": 5114500 + }, + { + "epoch": 25.34, + "learning_rate": 3.7333794087483835e-05, + "loss": 2.243, + "step": 5115000 + }, + { + "epoch": 25.34, + "learning_rate": 3.733255797823061e-05, + "loss": 2.2655, + "step": 5115500 + }, + { + "epoch": 25.35, + "learning_rate": 3.733131939180453e-05, + "loss": 2.2926, + "step": 5116000 + }, + { + "epoch": 25.35, + "learning_rate": 3.733008080537844e-05, + "loss": 2.2715, + "step": 5116500 + }, + { + "epoch": 25.35, + "learning_rate": 3.7328842218952354e-05, + "loss": 2.2757, + "step": 5117000 + }, + { + "epoch": 25.35, + "learning_rate": 3.732760363252627e-05, + "loss": 2.2806, + "step": 5117500 + }, + { + "epoch": 25.36, + "learning_rate": 3.732636752327304e-05, + "loss": 2.2805, + "step": 5118000 + }, + { + "epoch": 25.36, + "learning_rate": 3.732513141401981e-05, + "loss": 2.2542, + "step": 5118500 + }, + { + "epoch": 25.36, + "learning_rate": 3.7323892827593726e-05, + "loss": 2.277, + "step": 5119000 + }, + { + "epoch": 25.36, + "learning_rate": 3.732265424116764e-05, + "loss": 2.2745, + "step": 5119500 + }, + { + "epoch": 25.37, + "learning_rate": 3.732141565474156e-05, + "loss": 2.2858, + "step": 5120000 + }, + { + "epoch": 25.37, + "learning_rate": 3.7320177068315477e-05, + "loss": 2.2666, + "step": 5120500 + }, + { + "epoch": 25.37, + "learning_rate": 3.7318938481889393e-05, + "loss": 2.2903, + "step": 5121000 + }, + { + "epoch": 25.37, + "learning_rate": 3.731769989546331e-05, + "loss": 2.2642, + "step": 5121500 + }, + { + "epoch": 25.38, + "learning_rate": 3.731646130903723e-05, + "loss": 2.2661, + "step": 5122000 + }, + { + "epoch": 25.38, + "learning_rate": 3.7315222722611144e-05, + "loss": 2.2305, + "step": 5122500 + }, + { + "epoch": 25.38, + "learning_rate": 3.7313984136185054e-05, + "loss": 2.2755, + "step": 5123000 + }, + { + "epoch": 25.38, + "learning_rate": 3.731274554975897e-05, + "loss": 2.2832, + "step": 5123500 + }, + { + "epoch": 25.39, + "learning_rate": 3.731150696333289e-05, + "loss": 2.2592, + "step": 5124000 + }, + { + "epoch": 25.39, + "learning_rate": 3.7310268376906805e-05, + "loss": 2.2704, + "step": 5124500 + }, + { + "epoch": 25.39, + "learning_rate": 3.7309032267653574e-05, + "loss": 2.2827, + "step": 5125000 + }, + { + "epoch": 25.39, + "learning_rate": 3.730779368122749e-05, + "loss": 2.2715, + "step": 5125500 + }, + { + "epoch": 25.4, + "learning_rate": 3.73065550948014e-05, + "loss": 2.2773, + "step": 5126000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730531650837532e-05, + "loss": 2.2667, + "step": 5126500 + }, + { + "epoch": 25.4, + "learning_rate": 3.7304080399122094e-05, + "loss": 2.3061, + "step": 5127000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730284428986886e-05, + "loss": 2.2737, + "step": 5127500 + }, + { + "epoch": 25.41, + "learning_rate": 3.730160570344278e-05, + "loss": 2.2715, + "step": 5128000 + }, + { + "epoch": 25.41, + "learning_rate": 3.7300367117016696e-05, + "loss": 2.2898, + "step": 5128500 + }, + { + "epoch": 25.41, + "learning_rate": 3.729912853059061e-05, + "loss": 2.2592, + "step": 5129000 + }, + { + "epoch": 25.41, + "learning_rate": 3.729788994416453e-05, + "loss": 2.2816, + "step": 5129500 + }, + { + "epoch": 25.42, + "learning_rate": 3.729665135773844e-05, + "loss": 2.2692, + "step": 5130000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729541277131236e-05, + "loss": 2.2715, + "step": 5130500 + }, + { + "epoch": 25.42, + "learning_rate": 3.7294174184886274e-05, + "loss": 2.28, + "step": 5131000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729293807563304e-05, + "loss": 2.2883, + "step": 5131500 + }, + { + "epoch": 25.43, + "learning_rate": 3.729169948920696e-05, + "loss": 2.2569, + "step": 5132000 + }, + { + "epoch": 25.43, + "learning_rate": 3.729046337995373e-05, + "loss": 2.2704, + "step": 5132500 + }, + { + "epoch": 25.43, + "learning_rate": 3.7289224793527646e-05, + "loss": 2.279, + "step": 5133000 + }, + { + "epoch": 25.43, + "learning_rate": 3.728798620710156e-05, + "loss": 2.2866, + "step": 5133500 + }, + { + "epoch": 25.44, + "learning_rate": 3.728674762067548e-05, + "loss": 2.2773, + "step": 5134000 + }, + { + "epoch": 25.44, + "learning_rate": 3.7285509034249396e-05, + "loss": 2.3014, + "step": 5134500 + }, + { + "epoch": 25.44, + "learning_rate": 3.728427044782331e-05, + "loss": 2.269, + "step": 5135000 + }, + { + "epoch": 25.44, + "learning_rate": 3.728303186139723e-05, + "loss": 2.2995, + "step": 5135500 + }, + { + "epoch": 25.45, + "learning_rate": 3.728179327497115e-05, + "loss": 2.2661, + "step": 5136000 + }, + { + "epoch": 25.45, + "learning_rate": 3.7280554688545064e-05, + "loss": 2.2761, + "step": 5136500 + }, + { + "epoch": 25.45, + "learning_rate": 3.7279316102118974e-05, + "loss": 2.2874, + "step": 5137000 + }, + { + "epoch": 25.45, + "learning_rate": 3.727807751569289e-05, + "loss": 2.2651, + "step": 5137500 + }, + { + "epoch": 25.46, + "learning_rate": 3.727683892926681e-05, + "loss": 2.2692, + "step": 5138000 + }, + { + "epoch": 25.46, + "learning_rate": 3.7275600342840725e-05, + "loss": 2.2918, + "step": 5138500 + }, + { + "epoch": 25.46, + "learning_rate": 3.7274364233587494e-05, + "loss": 2.288, + "step": 5139000 + }, + { + "epoch": 25.46, + "learning_rate": 3.727312564716141e-05, + "loss": 2.2863, + "step": 5139500 + }, + { + "epoch": 25.47, + "learning_rate": 3.727188706073533e-05, + "loss": 2.2988, + "step": 5140000 + }, + { + "epoch": 25.47, + "learning_rate": 3.7270648474309245e-05, + "loss": 2.2731, + "step": 5140500 + }, + { + "epoch": 25.47, + "learning_rate": 3.726940988788316e-05, + "loss": 2.2773, + "step": 5141000 + }, + { + "epoch": 25.47, + "learning_rate": 3.726817130145707e-05, + "loss": 2.2942, + "step": 5141500 + }, + { + "epoch": 25.48, + "learning_rate": 3.726693519220385e-05, + "loss": 2.2827, + "step": 5142000 + }, + { + "epoch": 25.48, + "learning_rate": 3.7265696605777764e-05, + "loss": 2.2954, + "step": 5142500 + }, + { + "epoch": 25.48, + "learning_rate": 3.726445801935168e-05, + "loss": 2.2909, + "step": 5143000 + }, + { + "epoch": 25.48, + "learning_rate": 3.726322191009844e-05, + "loss": 2.3019, + "step": 5143500 + }, + { + "epoch": 25.49, + "learning_rate": 3.726198332367236e-05, + "loss": 2.2943, + "step": 5144000 + }, + { + "epoch": 25.49, + "learning_rate": 3.726074473724628e-05, + "loss": 2.2736, + "step": 5144500 + }, + { + "epoch": 25.49, + "learning_rate": 3.7259506150820194e-05, + "loss": 2.2876, + "step": 5145000 + }, + { + "epoch": 25.49, + "learning_rate": 3.725826756439411e-05, + "loss": 2.2856, + "step": 5145500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725702897796803e-05, + "loss": 2.272, + "step": 5146000 + }, + { + "epoch": 25.5, + "learning_rate": 3.7255790391541945e-05, + "loss": 2.2921, + "step": 5146500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725455180511586e-05, + "loss": 2.2833, + "step": 5147000 + }, + { + "epoch": 25.5, + "learning_rate": 3.725331321868978e-05, + "loss": 2.2587, + "step": 5147500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725207463226369e-05, + "loss": 2.2883, + "step": 5148000 + }, + { + "epoch": 25.51, + "learning_rate": 3.7250836045837606e-05, + "loss": 2.2676, + "step": 5148500 + }, + { + "epoch": 25.51, + "learning_rate": 3.724959993658438e-05, + "loss": 2.2797, + "step": 5149000 + }, + { + "epoch": 25.51, + "learning_rate": 3.72483613501583e-05, + "loss": 2.2961, + "step": 5149500 + }, + { + "epoch": 25.51, + "learning_rate": 3.724712524090506e-05, + "loss": 2.2984, + "step": 5150000 + }, + { + "epoch": 25.52, + "learning_rate": 3.724588665447898e-05, + "loss": 2.2816, + "step": 5150500 + }, + { + "epoch": 25.52, + "learning_rate": 3.7244648068052894e-05, + "loss": 2.2632, + "step": 5151000 + }, + { + "epoch": 25.52, + "learning_rate": 3.724340948162681e-05, + "loss": 2.2847, + "step": 5151500 + }, + { + "epoch": 25.52, + "learning_rate": 3.724217089520073e-05, + "loss": 2.2749, + "step": 5152000 + }, + { + "epoch": 25.53, + "learning_rate": 3.7240932308774645e-05, + "loss": 2.3042, + "step": 5152500 + }, + { + "epoch": 25.53, + "learning_rate": 3.723969372234856e-05, + "loss": 2.2551, + "step": 5153000 + }, + { + "epoch": 25.53, + "learning_rate": 3.723845513592248e-05, + "loss": 2.2692, + "step": 5153500 + }, + { + "epoch": 25.53, + "learning_rate": 3.723721654949639e-05, + "loss": 2.2742, + "step": 5154000 + }, + { + "epoch": 25.54, + "learning_rate": 3.7235980440243164e-05, + "loss": 2.2607, + "step": 5154500 + }, + { + "epoch": 25.54, + "learning_rate": 3.7234744330989927e-05, + "loss": 2.3025, + "step": 5155000 + }, + { + "epoch": 25.54, + "learning_rate": 3.7233505744563843e-05, + "loss": 2.2741, + "step": 5155500 + }, + { + "epoch": 25.54, + "learning_rate": 3.723226715813776e-05, + "loss": 2.2887, + "step": 5156000 + }, + { + "epoch": 25.55, + "learning_rate": 3.723102857171168e-05, + "loss": 2.2849, + "step": 5156500 + }, + { + "epoch": 25.55, + "learning_rate": 3.7229789985285594e-05, + "loss": 2.2911, + "step": 5157000 + }, + { + "epoch": 25.55, + "learning_rate": 3.722855139885951e-05, + "loss": 2.266, + "step": 5157500 + }, + { + "epoch": 25.55, + "learning_rate": 3.722731281243343e-05, + "loss": 2.2631, + "step": 5158000 + }, + { + "epoch": 25.56, + "learning_rate": 3.7226074226007345e-05, + "loss": 2.2779, + "step": 5158500 + }, + { + "epoch": 25.56, + "learning_rate": 3.722483563958126e-05, + "loss": 2.2959, + "step": 5159000 + }, + { + "epoch": 25.56, + "learning_rate": 3.722359705315518e-05, + "loss": 2.306, + "step": 5159500 + }, + { + "epoch": 25.56, + "learning_rate": 3.7222358466729096e-05, + "loss": 2.2844, + "step": 5160000 + }, + { + "epoch": 25.57, + "learning_rate": 3.7221119880303006e-05, + "loss": 2.2897, + "step": 5160500 + }, + { + "epoch": 25.57, + "learning_rate": 3.721988377104978e-05, + "loss": 2.2778, + "step": 5161000 + }, + { + "epoch": 25.57, + "learning_rate": 3.7218647661796544e-05, + "loss": 2.2916, + "step": 5161500 + }, + { + "epoch": 25.57, + "learning_rate": 3.721740907537046e-05, + "loss": 2.2833, + "step": 5162000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721617048894438e-05, + "loss": 2.2951, + "step": 5162500 + }, + { + "epoch": 25.58, + "learning_rate": 3.7214931902518294e-05, + "loss": 2.273, + "step": 5163000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721369331609221e-05, + "loss": 2.2695, + "step": 5163500 + }, + { + "epoch": 25.58, + "learning_rate": 3.721245720683898e-05, + "loss": 2.2833, + "step": 5164000 + }, + { + "epoch": 25.59, + "learning_rate": 3.72112186204129e-05, + "loss": 2.2957, + "step": 5164500 + }, + { + "epoch": 25.59, + "learning_rate": 3.7209980033986814e-05, + "loss": 2.2693, + "step": 5165000 + }, + { + "epoch": 25.59, + "learning_rate": 3.720874144756073e-05, + "loss": 2.2783, + "step": 5165500 + }, + { + "epoch": 25.59, + "learning_rate": 3.720750286113465e-05, + "loss": 2.2865, + "step": 5166000 + }, + { + "epoch": 25.6, + "learning_rate": 3.7206264274708565e-05, + "loss": 2.2855, + "step": 5166500 + }, + { + "epoch": 25.6, + "learning_rate": 3.720502568828248e-05, + "loss": 2.2972, + "step": 5167000 + }, + { + "epoch": 25.6, + "learning_rate": 3.72037871018564e-05, + "loss": 2.2696, + "step": 5167500 + }, + { + "epoch": 25.6, + "learning_rate": 3.720255099260316e-05, + "loss": 2.2739, + "step": 5168000 + }, + { + "epoch": 25.61, + "learning_rate": 3.720131488334993e-05, + "loss": 2.2988, + "step": 5168500 + }, + { + "epoch": 25.61, + "learning_rate": 3.7200078774096705e-05, + "loss": 2.2543, + "step": 5169000 + }, + { + "epoch": 25.61, + "learning_rate": 3.719884018767062e-05, + "loss": 2.2768, + "step": 5169500 + }, + { + "epoch": 25.61, + "learning_rate": 3.719760160124454e-05, + "loss": 2.2944, + "step": 5170000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719636301481845e-05, + "loss": 2.2929, + "step": 5170500 + }, + { + "epoch": 25.62, + "learning_rate": 3.7195124428392366e-05, + "loss": 2.273, + "step": 5171000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719388584196628e-05, + "loss": 2.2615, + "step": 5171500 + }, + { + "epoch": 25.62, + "learning_rate": 3.71926472555402e-05, + "loss": 2.2707, + "step": 5172000 + }, + { + "epoch": 25.63, + "learning_rate": 3.719140866911412e-05, + "loss": 2.288, + "step": 5172500 + }, + { + "epoch": 25.63, + "learning_rate": 3.7190170082688034e-05, + "loss": 2.2828, + "step": 5173000 + }, + { + "epoch": 25.63, + "learning_rate": 3.718893149626195e-05, + "loss": 2.2858, + "step": 5173500 + }, + { + "epoch": 25.63, + "learning_rate": 3.718769290983586e-05, + "loss": 2.2594, + "step": 5174000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718645432340978e-05, + "loss": 2.2852, + "step": 5174500 + }, + { + "epoch": 25.64, + "learning_rate": 3.7185215736983695e-05, + "loss": 2.2674, + "step": 5175000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718398210490332e-05, + "loss": 2.2819, + "step": 5175500 + }, + { + "epoch": 25.64, + "learning_rate": 3.718274351847724e-05, + "loss": 2.2753, + "step": 5176000 + }, + { + "epoch": 25.65, + "learning_rate": 3.718150493205115e-05, + "loss": 2.2723, + "step": 5176500 + }, + { + "epoch": 25.65, + "learning_rate": 3.7180266345625066e-05, + "loss": 2.2838, + "step": 5177000 + }, + { + "epoch": 25.65, + "learning_rate": 3.717902775919898e-05, + "loss": 2.2957, + "step": 5177500 + }, + { + "epoch": 25.65, + "learning_rate": 3.717779164994575e-05, + "loss": 2.2762, + "step": 5178000 + }, + { + "epoch": 25.66, + "learning_rate": 3.717655306351967e-05, + "loss": 2.2829, + "step": 5178500 + }, + { + "epoch": 25.66, + "learning_rate": 3.7175314477093586e-05, + "loss": 2.2563, + "step": 5179000 + }, + { + "epoch": 25.66, + "learning_rate": 3.71740758906675e-05, + "loss": 2.2887, + "step": 5179500 + }, + { + "epoch": 25.66, + "learning_rate": 3.717283730424141e-05, + "loss": 2.283, + "step": 5180000 + }, + { + "epoch": 25.67, + "learning_rate": 3.717160119498819e-05, + "loss": 2.2762, + "step": 5180500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7170362608562105e-05, + "loss": 2.2929, + "step": 5181000 + }, + { + "epoch": 25.67, + "learning_rate": 3.716912649930887e-05, + "loss": 2.2826, + "step": 5181500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7167887912882784e-05, + "loss": 2.3166, + "step": 5182000 + }, + { + "epoch": 25.68, + "learning_rate": 3.71666493264567e-05, + "loss": 2.2852, + "step": 5182500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716541074003062e-05, + "loss": 2.2891, + "step": 5183000 + }, + { + "epoch": 25.68, + "learning_rate": 3.7164172153604535e-05, + "loss": 2.2888, + "step": 5183500 + }, + { + "epoch": 25.68, + "learning_rate": 3.7162936044351304e-05, + "loss": 2.2808, + "step": 5184000 + }, + { + "epoch": 25.69, + "learning_rate": 3.716169745792522e-05, + "loss": 2.2693, + "step": 5184500 + }, + { + "epoch": 25.69, + "learning_rate": 3.716046134867199e-05, + "loss": 2.2618, + "step": 5185000 + }, + { + "epoch": 25.69, + "learning_rate": 3.7159222762245906e-05, + "loss": 2.2966, + "step": 5185500 + }, + { + "epoch": 25.69, + "learning_rate": 3.7157984175819823e-05, + "loss": 2.2962, + "step": 5186000 + }, + { + "epoch": 25.7, + "learning_rate": 3.715674558939374e-05, + "loss": 2.2637, + "step": 5186500 + }, + { + "epoch": 25.7, + "learning_rate": 3.715550700296766e-05, + "loss": 2.3125, + "step": 5187000 + }, + { + "epoch": 25.7, + "learning_rate": 3.715427089371442e-05, + "loss": 2.2799, + "step": 5187500 + }, + { + "epoch": 25.7, + "learning_rate": 3.7153032307288336e-05, + "loss": 2.3013, + "step": 5188000 + }, + { + "epoch": 25.71, + "learning_rate": 3.715179372086225e-05, + "loss": 2.2987, + "step": 5188500 + }, + { + "epoch": 25.71, + "learning_rate": 3.715055513443617e-05, + "loss": 2.3231, + "step": 5189000 + }, + { + "epoch": 25.71, + "learning_rate": 3.714931654801009e-05, + "loss": 2.2836, + "step": 5189500 + }, + { + "epoch": 25.71, + "learning_rate": 3.7148077961584004e-05, + "loss": 2.2828, + "step": 5190000 + }, + { + "epoch": 25.72, + "learning_rate": 3.7146844329503625e-05, + "loss": 2.2943, + "step": 5190500 + }, + { + "epoch": 25.72, + "learning_rate": 3.714560574307754e-05, + "loss": 2.2569, + "step": 5191000 + }, + { + "epoch": 25.72, + "learning_rate": 3.714436715665146e-05, + "loss": 2.2996, + "step": 5191500 + }, + { + "epoch": 25.72, + "learning_rate": 3.7143128570225375e-05, + "loss": 2.2977, + "step": 5192000 + }, + { + "epoch": 25.73, + "learning_rate": 3.714188998379929e-05, + "loss": 2.2883, + "step": 5192500 + }, + { + "epoch": 25.73, + "learning_rate": 3.71406513973732e-05, + "loss": 2.2551, + "step": 5193000 + }, + { + "epoch": 25.73, + "learning_rate": 3.713941281094712e-05, + "loss": 2.2708, + "step": 5193500 + }, + { + "epoch": 25.73, + "learning_rate": 3.7138174224521036e-05, + "loss": 2.2823, + "step": 5194000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713693563809495e-05, + "loss": 2.274, + "step": 5194500 + }, + { + "epoch": 25.74, + "learning_rate": 3.713569705166887e-05, + "loss": 2.2976, + "step": 5195000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713445846524279e-05, + "loss": 2.2881, + "step": 5195500 + }, + { + "epoch": 25.74, + "learning_rate": 3.7133219878816704e-05, + "loss": 2.2842, + "step": 5196000 + }, + { + "epoch": 25.75, + "learning_rate": 3.713198129239062e-05, + "loss": 2.2757, + "step": 5196500 + }, + { + "epoch": 25.75, + "learning_rate": 3.713074270596454e-05, + "loss": 2.2781, + "step": 5197000 + }, + { + "epoch": 25.75, + "learning_rate": 3.7129504119538455e-05, + "loss": 2.2648, + "step": 5197500 + }, + { + "epoch": 25.75, + "learning_rate": 3.712826553311237e-05, + "loss": 2.2698, + "step": 5198000 + }, + { + "epoch": 25.76, + "learning_rate": 3.712702942385914e-05, + "loss": 2.2989, + "step": 5198500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712579083743306e-05, + "loss": 2.2976, + "step": 5199000 + }, + { + "epoch": 25.76, + "learning_rate": 3.7124552251006974e-05, + "loss": 2.2934, + "step": 5199500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712331366458089e-05, + "loss": 2.2713, + "step": 5200000 + }, + { + "epoch": 25.77, + "learning_rate": 3.7122077555327653e-05, + "loss": 2.3012, + "step": 5200500 + }, + { + "epoch": 25.77, + "learning_rate": 3.712083896890157e-05, + "loss": 2.2833, + "step": 5201000 + }, + { + "epoch": 25.77, + "learning_rate": 3.711960038247549e-05, + "loss": 2.2911, + "step": 5201500 + }, + { + "epoch": 25.77, + "learning_rate": 3.7118361796049404e-05, + "loss": 2.2916, + "step": 5202000 + }, + { + "epoch": 25.77, + "learning_rate": 3.711712320962332e-05, + "loss": 2.2596, + "step": 5202500 + }, + { + "epoch": 25.78, + "learning_rate": 3.711588710037009e-05, + "loss": 2.2809, + "step": 5203000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711464851394401e-05, + "loss": 2.3006, + "step": 5203500 + }, + { + "epoch": 25.78, + "learning_rate": 3.7113409927517924e-05, + "loss": 2.2742, + "step": 5204000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711217134109184e-05, + "loss": 2.2681, + "step": 5204500 + }, + { + "epoch": 25.79, + "learning_rate": 3.711093275466576e-05, + "loss": 2.3087, + "step": 5205000 + }, + { + "epoch": 25.79, + "learning_rate": 3.7109694168239675e-05, + "loss": 2.3044, + "step": 5205500 + }, + { + "epoch": 25.79, + "learning_rate": 3.710845558181359e-05, + "loss": 2.277, + "step": 5206000 + }, + { + "epoch": 25.79, + "learning_rate": 3.710721699538751e-05, + "loss": 2.2873, + "step": 5206500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710598088613427e-05, + "loss": 2.2831, + "step": 5207000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710474229970819e-05, + "loss": 2.2702, + "step": 5207500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710350619045496e-05, + "loss": 2.2777, + "step": 5208000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710226760402887e-05, + "loss": 2.2703, + "step": 5208500 + }, + { + "epoch": 25.81, + "learning_rate": 3.710102901760279e-05, + "loss": 2.2656, + "step": 5209000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709979043117671e-05, + "loss": 2.3058, + "step": 5209500 + }, + { + "epoch": 25.81, + "learning_rate": 3.7098551844750624e-05, + "loss": 2.3062, + "step": 5210000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709731573549739e-05, + "loss": 2.2778, + "step": 5210500 + }, + { + "epoch": 25.82, + "learning_rate": 3.709607714907131e-05, + "loss": 2.2868, + "step": 5211000 + }, + { + "epoch": 25.82, + "learning_rate": 3.7094838562645227e-05, + "loss": 2.2837, + "step": 5211500 + }, + { + "epoch": 25.82, + "learning_rate": 3.709359997621914e-05, + "loss": 2.2617, + "step": 5212000 + }, + { + "epoch": 25.82, + "learning_rate": 3.7092361389793054e-05, + "loss": 2.2789, + "step": 5212500 + }, + { + "epoch": 25.83, + "learning_rate": 3.709112528053983e-05, + "loss": 2.3062, + "step": 5213000 + }, + { + "epoch": 25.83, + "learning_rate": 3.7089886694113746e-05, + "loss": 2.2737, + "step": 5213500 + }, + { + "epoch": 25.83, + "learning_rate": 3.708864810768766e-05, + "loss": 2.2862, + "step": 5214000 + }, + { + "epoch": 25.83, + "learning_rate": 3.708740952126158e-05, + "loss": 2.2864, + "step": 5214500 + }, + { + "epoch": 25.84, + "learning_rate": 3.708617093483549e-05, + "loss": 2.3172, + "step": 5215000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708493234840941e-05, + "loss": 2.2644, + "step": 5215500 + }, + { + "epoch": 25.84, + "learning_rate": 3.7083693761983324e-05, + "loss": 2.2906, + "step": 5216000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708245765273009e-05, + "loss": 2.277, + "step": 5216500 + }, + { + "epoch": 25.85, + "learning_rate": 3.708121906630401e-05, + "loss": 2.2867, + "step": 5217000 + }, + { + "epoch": 25.85, + "learning_rate": 3.707998047987793e-05, + "loss": 2.2931, + "step": 5217500 + }, + { + "epoch": 25.85, + "learning_rate": 3.7078744370624695e-05, + "loss": 2.2907, + "step": 5218000 + }, + { + "epoch": 25.85, + "learning_rate": 3.707750578419861e-05, + "loss": 2.2581, + "step": 5218500 + }, + { + "epoch": 25.86, + "learning_rate": 3.707626719777253e-05, + "loss": 2.3109, + "step": 5219000 + }, + { + "epoch": 25.86, + "learning_rate": 3.7075028611346446e-05, + "loss": 2.293, + "step": 5219500 + }, + { + "epoch": 25.86, + "learning_rate": 3.707379002492036e-05, + "loss": 2.311, + "step": 5220000 + }, + { + "epoch": 25.86, + "learning_rate": 3.707255143849428e-05, + "loss": 2.2732, + "step": 5220500 + }, + { + "epoch": 25.87, + "learning_rate": 3.707131285206819e-05, + "loss": 2.2977, + "step": 5221000 + }, + { + "epoch": 25.87, + "learning_rate": 3.707007426564211e-05, + "loss": 2.2984, + "step": 5221500 + }, + { + "epoch": 25.87, + "learning_rate": 3.706883815638888e-05, + "loss": 2.2718, + "step": 5222000 + }, + { + "epoch": 25.87, + "learning_rate": 3.7067602047135645e-05, + "loss": 2.2732, + "step": 5222500 + }, + { + "epoch": 25.88, + "learning_rate": 3.706636346070956e-05, + "loss": 2.2941, + "step": 5223000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706512487428348e-05, + "loss": 2.2739, + "step": 5223500 + }, + { + "epoch": 25.88, + "learning_rate": 3.7063886287857396e-05, + "loss": 2.265, + "step": 5224000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706264770143131e-05, + "loss": 2.2956, + "step": 5224500 + }, + { + "epoch": 25.89, + "learning_rate": 3.706140911500523e-05, + "loss": 2.2913, + "step": 5225000 + }, + { + "epoch": 25.89, + "learning_rate": 3.7060170528579146e-05, + "loss": 2.2667, + "step": 5225500 + }, + { + "epoch": 25.89, + "learning_rate": 3.705893194215306e-05, + "loss": 2.3244, + "step": 5226000 + }, + { + "epoch": 25.89, + "learning_rate": 3.705769335572698e-05, + "loss": 2.2802, + "step": 5226500 + }, + { + "epoch": 25.9, + "learning_rate": 3.70564547693009e-05, + "loss": 2.2817, + "step": 5227000 + }, + { + "epoch": 25.9, + "learning_rate": 3.705521618287481e-05, + "loss": 2.3061, + "step": 5227500 + }, + { + "epoch": 25.9, + "learning_rate": 3.7053977596448724e-05, + "loss": 2.2814, + "step": 5228000 + }, + { + "epoch": 25.9, + "learning_rate": 3.705273901002264e-05, + "loss": 2.2876, + "step": 5228500 + }, + { + "epoch": 25.91, + "learning_rate": 3.705150042359656e-05, + "loss": 2.2637, + "step": 5229000 + }, + { + "epoch": 25.91, + "learning_rate": 3.7050261837170475e-05, + "loss": 2.2767, + "step": 5229500 + }, + { + "epoch": 25.91, + "learning_rate": 3.704902325074439e-05, + "loss": 2.2874, + "step": 5230000 + }, + { + "epoch": 25.91, + "learning_rate": 3.704778466431831e-05, + "loss": 2.2886, + "step": 5230500 + }, + { + "epoch": 25.92, + "learning_rate": 3.7046546077892226e-05, + "loss": 2.2975, + "step": 5231000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704530749146614e-05, + "loss": 2.2706, + "step": 5231500 + }, + { + "epoch": 25.92, + "learning_rate": 3.7044071382212905e-05, + "loss": 2.2877, + "step": 5232000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704283279578682e-05, + "loss": 2.3207, + "step": 5232500 + }, + { + "epoch": 25.93, + "learning_rate": 3.704159420936074e-05, + "loss": 2.2841, + "step": 5233000 + }, + { + "epoch": 25.93, + "learning_rate": 3.7040355622934656e-05, + "loss": 2.2769, + "step": 5233500 + }, + { + "epoch": 25.93, + "learning_rate": 3.7039119513681424e-05, + "loss": 2.2918, + "step": 5234000 + }, + { + "epoch": 25.93, + "learning_rate": 3.703788092725534e-05, + "loss": 2.288, + "step": 5234500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703664234082926e-05, + "loss": 2.2764, + "step": 5235000 + }, + { + "epoch": 25.94, + "learning_rate": 3.7035403754403175e-05, + "loss": 2.283, + "step": 5235500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703416764514995e-05, + "loss": 2.2929, + "step": 5236000 + }, + { + "epoch": 25.94, + "learning_rate": 3.703292905872386e-05, + "loss": 2.2911, + "step": 5236500 + }, + { + "epoch": 25.95, + "learning_rate": 3.703169047229778e-05, + "loss": 2.2815, + "step": 5237000 + }, + { + "epoch": 25.95, + "learning_rate": 3.7030451885871695e-05, + "loss": 2.3038, + "step": 5237500 + }, + { + "epoch": 25.95, + "learning_rate": 3.702921329944561e-05, + "loss": 2.2901, + "step": 5238000 + }, + { + "epoch": 25.95, + "learning_rate": 3.702797471301953e-05, + "loss": 2.3041, + "step": 5238500 + }, + { + "epoch": 25.96, + "learning_rate": 3.702673612659344e-05, + "loss": 2.2909, + "step": 5239000 + }, + { + "epoch": 25.96, + "learning_rate": 3.7025497540167356e-05, + "loss": 2.3005, + "step": 5239500 + }, + { + "epoch": 25.96, + "learning_rate": 3.7024261430914125e-05, + "loss": 2.3043, + "step": 5240000 + }, + { + "epoch": 25.96, + "learning_rate": 3.702302284448804e-05, + "loss": 2.3069, + "step": 5240500 + }, + { + "epoch": 25.97, + "learning_rate": 3.702178425806196e-05, + "loss": 2.2887, + "step": 5241000 + }, + { + "epoch": 25.97, + "learning_rate": 3.7020545671635875e-05, + "loss": 2.2661, + "step": 5241500 + }, + { + "epoch": 25.97, + "learning_rate": 3.701930708520979e-05, + "loss": 2.2777, + "step": 5242000 + }, + { + "epoch": 25.97, + "learning_rate": 3.701806849878371e-05, + "loss": 2.2992, + "step": 5242500 + }, + { + "epoch": 25.98, + "learning_rate": 3.701683238953048e-05, + "loss": 2.2748, + "step": 5243000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701559628027725e-05, + "loss": 2.2756, + "step": 5243500 + }, + { + "epoch": 25.98, + "learning_rate": 3.7014357693851164e-05, + "loss": 2.2784, + "step": 5244000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701311910742508e-05, + "loss": 2.307, + "step": 5244500 + }, + { + "epoch": 25.99, + "learning_rate": 3.7011880520999e-05, + "loss": 2.2752, + "step": 5245000 + }, + { + "epoch": 25.99, + "learning_rate": 3.7010641934572914e-05, + "loss": 2.2948, + "step": 5245500 + }, + { + "epoch": 25.99, + "learning_rate": 3.7009403348146825e-05, + "loss": 2.3109, + "step": 5246000 + }, + { + "epoch": 25.99, + "learning_rate": 3.700816476172074e-05, + "loss": 2.2798, + "step": 5246500 + }, + { + "epoch": 26.0, + "learning_rate": 3.700692617529466e-05, + "loss": 2.3082, + "step": 5247000 + }, + { + "epoch": 26.0, + "learning_rate": 3.7005687588868575e-05, + "loss": 2.3089, + "step": 5247500 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6544683759370017, + "eval_accuracy_mlm": 0.6095992095473112, + "eval_accuracy_nsp": 0.8660647398209124, + "eval_loss": 2.3457705974578857, + "eval_runtime": 146.2347, + "eval_samples_per_second": 1743.491, + "eval_steps_per_second": 72.65, + "step": 5247918 + }, + { + "epoch": 26.0, + "learning_rate": 3.700444900244249e-05, + "loss": 2.275, + "step": 5248000 + }, + { + "epoch": 26.0, + "learning_rate": 3.700321041601641e-05, + "loss": 2.2462, + "step": 5248500 + }, + { + "epoch": 26.01, + "learning_rate": 3.700197430676318e-05, + "loss": 2.2828, + "step": 5249000 + }, + { + "epoch": 26.01, + "learning_rate": 3.7000735720337095e-05, + "loss": 2.2497, + "step": 5249500 + }, + { + "epoch": 26.01, + "learning_rate": 3.699949713391101e-05, + "loss": 2.2561, + "step": 5250000 + }, + { + "epoch": 26.01, + "learning_rate": 3.699825854748493e-05, + "loss": 2.2692, + "step": 5250500 + }, + { + "epoch": 26.02, + "learning_rate": 3.6997019961058846e-05, + "loss": 2.2704, + "step": 5251000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699578137463276e-05, + "loss": 2.2654, + "step": 5251500 + }, + { + "epoch": 26.02, + "learning_rate": 3.699454278820668e-05, + "loss": 2.2692, + "step": 5252000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699330420178059e-05, + "loss": 2.2392, + "step": 5252500 + }, + { + "epoch": 26.03, + "learning_rate": 3.699206561535451e-05, + "loss": 2.2405, + "step": 5253000 + }, + { + "epoch": 26.03, + "learning_rate": 3.6990831983274134e-05, + "loss": 2.2633, + "step": 5253500 + }, + { + "epoch": 26.03, + "learning_rate": 3.6989595874020896e-05, + "loss": 2.2523, + "step": 5254000 + }, + { + "epoch": 26.03, + "learning_rate": 3.698835728759481e-05, + "loss": 2.2422, + "step": 5254500 + }, + { + "epoch": 26.04, + "learning_rate": 3.698711870116873e-05, + "loss": 2.2852, + "step": 5255000 + }, + { + "epoch": 26.04, + "learning_rate": 3.698588011474265e-05, + "loss": 2.2703, + "step": 5255500 + }, + { + "epoch": 26.04, + "learning_rate": 3.6984641528316564e-05, + "loss": 2.2614, + "step": 5256000 + }, + { + "epoch": 26.04, + "learning_rate": 3.698340541906333e-05, + "loss": 2.2694, + "step": 5256500 + }, + { + "epoch": 26.04, + "learning_rate": 3.698216683263725e-05, + "loss": 2.2442, + "step": 5257000 + }, + { + "epoch": 26.05, + "learning_rate": 3.698092824621117e-05, + "loss": 2.263, + "step": 5257500 + }, + { + "epoch": 26.05, + "learning_rate": 3.6979689659785084e-05, + "loss": 2.2511, + "step": 5258000 + }, + { + "epoch": 26.05, + "learning_rate": 3.6978451073359e-05, + "loss": 2.2405, + "step": 5258500 + }, + { + "epoch": 26.05, + "learning_rate": 3.697721248693292e-05, + "loss": 2.2604, + "step": 5259000 + }, + { + "epoch": 26.06, + "learning_rate": 3.6975973900506834e-05, + "loss": 2.2556, + "step": 5259500 + }, + { + "epoch": 26.06, + "learning_rate": 3.697473531408075e-05, + "loss": 2.2919, + "step": 5260000 + }, + { + "epoch": 26.06, + "learning_rate": 3.697349672765467e-05, + "loss": 2.2512, + "step": 5260500 + }, + { + "epoch": 26.06, + "learning_rate": 3.6972258141228585e-05, + "loss": 2.2531, + "step": 5261000 + }, + { + "epoch": 26.07, + "learning_rate": 3.697102203197535e-05, + "loss": 2.2625, + "step": 5261500 + }, + { + "epoch": 26.07, + "learning_rate": 3.6969783445549264e-05, + "loss": 2.2725, + "step": 5262000 + }, + { + "epoch": 26.07, + "learning_rate": 3.696854733629603e-05, + "loss": 2.2693, + "step": 5262500 + }, + { + "epoch": 26.07, + "learning_rate": 3.696730874986995e-05, + "loss": 2.2746, + "step": 5263000 + }, + { + "epoch": 26.08, + "learning_rate": 3.696607016344387e-05, + "loss": 2.2666, + "step": 5263500 + }, + { + "epoch": 26.08, + "learning_rate": 3.6964831577017784e-05, + "loss": 2.271, + "step": 5264000 + }, + { + "epoch": 26.08, + "learning_rate": 3.69635929905917e-05, + "loss": 2.2448, + "step": 5264500 + }, + { + "epoch": 26.08, + "learning_rate": 3.696235440416562e-05, + "loss": 2.283, + "step": 5265000 + }, + { + "epoch": 26.09, + "learning_rate": 3.6961118294912386e-05, + "loss": 2.2575, + "step": 5265500 + }, + { + "epoch": 26.09, + "learning_rate": 3.695988218565915e-05, + "loss": 2.2743, + "step": 5266000 + }, + { + "epoch": 26.09, + "learning_rate": 3.6958643599233065e-05, + "loss": 2.2608, + "step": 5266500 + }, + { + "epoch": 26.09, + "learning_rate": 3.695740501280698e-05, + "loss": 2.2905, + "step": 5267000 + }, + { + "epoch": 26.1, + "learning_rate": 3.69561664263809e-05, + "loss": 2.2947, + "step": 5267500 + }, + { + "epoch": 26.1, + "learning_rate": 3.6954927839954816e-05, + "loss": 2.2509, + "step": 5268000 + }, + { + "epoch": 26.1, + "learning_rate": 3.695368925352873e-05, + "loss": 2.2685, + "step": 5268500 + }, + { + "epoch": 26.1, + "learning_rate": 3.695245066710265e-05, + "loss": 2.2794, + "step": 5269000 + }, + { + "epoch": 26.11, + "learning_rate": 3.695121208067657e-05, + "loss": 2.2516, + "step": 5269500 + }, + { + "epoch": 26.11, + "learning_rate": 3.6949973494250484e-05, + "loss": 2.2744, + "step": 5270000 + }, + { + "epoch": 26.11, + "learning_rate": 3.69487349078244e-05, + "loss": 2.2572, + "step": 5270500 + }, + { + "epoch": 26.11, + "learning_rate": 3.694749632139832e-05, + "loss": 2.2396, + "step": 5271000 + }, + { + "epoch": 26.12, + "learning_rate": 3.6946257734972235e-05, + "loss": 2.2823, + "step": 5271500 + }, + { + "epoch": 26.12, + "learning_rate": 3.694501914854615e-05, + "loss": 2.2637, + "step": 5272000 + }, + { + "epoch": 26.12, + "learning_rate": 3.694378056212007e-05, + "loss": 2.2612, + "step": 5272500 + }, + { + "epoch": 26.12, + "learning_rate": 3.6942541975693985e-05, + "loss": 2.2399, + "step": 5273000 + }, + { + "epoch": 26.13, + "learning_rate": 3.69413033892679e-05, + "loss": 2.2862, + "step": 5273500 + }, + { + "epoch": 26.13, + "learning_rate": 3.6940067280014664e-05, + "loss": 2.284, + "step": 5274000 + }, + { + "epoch": 26.13, + "learning_rate": 3.693882869358858e-05, + "loss": 2.2642, + "step": 5274500 + }, + { + "epoch": 26.13, + "learning_rate": 3.69375901071625e-05, + "loss": 2.2629, + "step": 5275000 + }, + { + "epoch": 26.14, + "learning_rate": 3.6936351520736415e-05, + "loss": 2.2659, + "step": 5275500 + }, + { + "epoch": 26.14, + "learning_rate": 3.693511293431033e-05, + "loss": 2.2365, + "step": 5276000 + }, + { + "epoch": 26.14, + "learning_rate": 3.693387434788425e-05, + "loss": 2.2769, + "step": 5276500 + }, + { + "epoch": 26.14, + "learning_rate": 3.693263823863102e-05, + "loss": 2.2433, + "step": 5277000 + }, + { + "epoch": 26.15, + "learning_rate": 3.6931399652204935e-05, + "loss": 2.2665, + "step": 5277500 + }, + { + "epoch": 26.15, + "learning_rate": 3.693016106577885e-05, + "loss": 2.2601, + "step": 5278000 + }, + { + "epoch": 26.15, + "learning_rate": 3.692892247935277e-05, + "loss": 2.3155, + "step": 5278500 + }, + { + "epoch": 26.15, + "learning_rate": 3.6927683892926685e-05, + "loss": 2.2518, + "step": 5279000 + }, + { + "epoch": 26.16, + "learning_rate": 3.69264453065006e-05, + "loss": 2.2622, + "step": 5279500 + }, + { + "epoch": 26.16, + "learning_rate": 3.692520672007452e-05, + "loss": 2.2623, + "step": 5280000 + }, + { + "epoch": 26.16, + "learning_rate": 3.692397061082128e-05, + "loss": 2.2732, + "step": 5280500 + }, + { + "epoch": 26.16, + "learning_rate": 3.69227320243952e-05, + "loss": 2.2425, + "step": 5281000 + }, + { + "epoch": 26.17, + "learning_rate": 3.6921493437969115e-05, + "loss": 2.2449, + "step": 5281500 + }, + { + "epoch": 26.17, + "learning_rate": 3.692025485154303e-05, + "loss": 2.2487, + "step": 5282000 + }, + { + "epoch": 26.17, + "learning_rate": 3.691901626511695e-05, + "loss": 2.2494, + "step": 5282500 + }, + { + "epoch": 26.17, + "learning_rate": 3.6917777678690866e-05, + "loss": 2.2683, + "step": 5283000 + }, + { + "epoch": 26.18, + "learning_rate": 3.6916539092264776e-05, + "loss": 2.2414, + "step": 5283500 + }, + { + "epoch": 26.18, + "learning_rate": 3.691530050583869e-05, + "loss": 2.2555, + "step": 5284000 + }, + { + "epoch": 26.18, + "learning_rate": 3.691406439658547e-05, + "loss": 2.246, + "step": 5284500 + }, + { + "epoch": 26.18, + "learning_rate": 3.6912825810159386e-05, + "loss": 2.2715, + "step": 5285000 + }, + { + "epoch": 26.19, + "learning_rate": 3.69115872237333e-05, + "loss": 2.2586, + "step": 5285500 + }, + { + "epoch": 26.19, + "learning_rate": 3.691034863730722e-05, + "loss": 2.2493, + "step": 5286000 + }, + { + "epoch": 26.19, + "learning_rate": 3.690911500522683e-05, + "loss": 2.2628, + "step": 5286500 + }, + { + "epoch": 26.19, + "learning_rate": 3.690787641880075e-05, + "loss": 2.2646, + "step": 5287000 + }, + { + "epoch": 26.2, + "learning_rate": 3.690663783237467e-05, + "loss": 2.245, + "step": 5287500 + }, + { + "epoch": 26.2, + "learning_rate": 3.6905399245948584e-05, + "loss": 2.2657, + "step": 5288000 + }, + { + "epoch": 26.2, + "learning_rate": 3.69041606595225e-05, + "loss": 2.2547, + "step": 5288500 + }, + { + "epoch": 26.2, + "learning_rate": 3.690292207309642e-05, + "loss": 2.2863, + "step": 5289000 + }, + { + "epoch": 26.21, + "learning_rate": 3.6901683486670335e-05, + "loss": 2.2627, + "step": 5289500 + }, + { + "epoch": 26.21, + "learning_rate": 3.690044490024425e-05, + "loss": 2.2421, + "step": 5290000 + }, + { + "epoch": 26.21, + "learning_rate": 3.689920631381817e-05, + "loss": 2.2575, + "step": 5290500 + }, + { + "epoch": 26.21, + "learning_rate": 3.6897967727392086e-05, + "loss": 2.2554, + "step": 5291000 + }, + { + "epoch": 26.22, + "learning_rate": 3.6896729140966e-05, + "loss": 2.2682, + "step": 5291500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689549055453992e-05, + "loss": 2.257, + "step": 5292000 + }, + { + "epoch": 26.22, + "learning_rate": 3.6894256922459534e-05, + "loss": 2.2817, + "step": 5292500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689301833603345e-05, + "loss": 2.2867, + "step": 5293000 + }, + { + "epoch": 26.23, + "learning_rate": 3.689177974960737e-05, + "loss": 2.2552, + "step": 5293500 + }, + { + "epoch": 26.23, + "learning_rate": 3.6890541163181284e-05, + "loss": 2.2692, + "step": 5294000 + }, + { + "epoch": 26.23, + "learning_rate": 3.68893025767552e-05, + "loss": 2.2514, + "step": 5294500 + }, + { + "epoch": 26.23, + "learning_rate": 3.688806399032912e-05, + "loss": 2.2587, + "step": 5295000 + }, + { + "epoch": 26.24, + "learning_rate": 3.6886825403903035e-05, + "loss": 2.2817, + "step": 5295500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688558681747695e-05, + "loss": 2.2631, + "step": 5296000 + }, + { + "epoch": 26.24, + "learning_rate": 3.688434823105087e-05, + "loss": 2.2627, + "step": 5296500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688311212179764e-05, + "loss": 2.2805, + "step": 5297000 + }, + { + "epoch": 26.25, + "learning_rate": 3.6881873535371555e-05, + "loss": 2.2687, + "step": 5297500 + }, + { + "epoch": 26.25, + "learning_rate": 3.688063494894547e-05, + "loss": 2.2834, + "step": 5298000 + }, + { + "epoch": 26.25, + "learning_rate": 3.687939636251939e-05, + "loss": 2.2436, + "step": 5298500 + }, + { + "epoch": 26.25, + "learning_rate": 3.687816025326615e-05, + "loss": 2.2389, + "step": 5299000 + }, + { + "epoch": 26.26, + "learning_rate": 3.687692166684007e-05, + "loss": 2.2489, + "step": 5299500 + }, + { + "epoch": 26.26, + "learning_rate": 3.6875683080413984e-05, + "loss": 2.2658, + "step": 5300000 + }, + { + "epoch": 26.26, + "learning_rate": 3.68744444939879e-05, + "loss": 2.2578, + "step": 5300500 + }, + { + "epoch": 26.26, + "learning_rate": 3.687320590756182e-05, + "loss": 2.2667, + "step": 5301000 + }, + { + "epoch": 26.27, + "learning_rate": 3.6871967321135735e-05, + "loss": 2.2481, + "step": 5301500 + }, + { + "epoch": 26.27, + "learning_rate": 3.687072873470965e-05, + "loss": 2.2475, + "step": 5302000 + }, + { + "epoch": 26.27, + "learning_rate": 3.686949014828357e-05, + "loss": 2.2658, + "step": 5302500 + }, + { + "epoch": 26.27, + "learning_rate": 3.6868251561857486e-05, + "loss": 2.2964, + "step": 5303000 + }, + { + "epoch": 26.28, + "learning_rate": 3.68670129754314e-05, + "loss": 2.2901, + "step": 5303500 + }, + { + "epoch": 26.28, + "learning_rate": 3.686577438900532e-05, + "loss": 2.2668, + "step": 5304000 + }, + { + "epoch": 26.28, + "learning_rate": 3.686453827975209e-05, + "loss": 2.289, + "step": 5304500 + }, + { + "epoch": 26.28, + "learning_rate": 3.6863299693326006e-05, + "loss": 2.2806, + "step": 5305000 + }, + { + "epoch": 26.29, + "learning_rate": 3.686206110689992e-05, + "loss": 2.2532, + "step": 5305500 + }, + { + "epoch": 26.29, + "learning_rate": 3.686082252047384e-05, + "loss": 2.2487, + "step": 5306000 + }, + { + "epoch": 26.29, + "learning_rate": 3.6859583934047756e-05, + "loss": 2.2815, + "step": 5306500 + }, + { + "epoch": 26.29, + "learning_rate": 3.685834534762167e-05, + "loss": 2.2663, + "step": 5307000 + }, + { + "epoch": 26.3, + "learning_rate": 3.6857109238368435e-05, + "loss": 2.2659, + "step": 5307500 + }, + { + "epoch": 26.3, + "learning_rate": 3.685587065194235e-05, + "loss": 2.2551, + "step": 5308000 + }, + { + "epoch": 26.3, + "learning_rate": 3.685463206551627e-05, + "loss": 2.2631, + "step": 5308500 + }, + { + "epoch": 26.3, + "learning_rate": 3.6853393479090186e-05, + "loss": 2.2611, + "step": 5309000 + }, + { + "epoch": 26.31, + "learning_rate": 3.68521548926641e-05, + "loss": 2.2766, + "step": 5309500 + }, + { + "epoch": 26.31, + "learning_rate": 3.685091630623802e-05, + "loss": 2.2725, + "step": 5310000 + }, + { + "epoch": 26.31, + "learning_rate": 3.684967771981194e-05, + "loss": 2.2788, + "step": 5310500 + }, + { + "epoch": 26.31, + "learning_rate": 3.6848439133385854e-05, + "loss": 2.2321, + "step": 5311000 + }, + { + "epoch": 26.32, + "learning_rate": 3.684720302413262e-05, + "loss": 2.2733, + "step": 5311500 + }, + { + "epoch": 26.32, + "learning_rate": 3.684596443770654e-05, + "loss": 2.2867, + "step": 5312000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6844725851280456e-05, + "loss": 2.2839, + "step": 5312500 + }, + { + "epoch": 26.32, + "learning_rate": 3.6843487264854373e-05, + "loss": 2.2721, + "step": 5313000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6842251155601135e-05, + "loss": 2.2814, + "step": 5313500 + }, + { + "epoch": 26.33, + "learning_rate": 3.6841015046347904e-05, + "loss": 2.2994, + "step": 5314000 + }, + { + "epoch": 26.33, + "learning_rate": 3.683977645992182e-05, + "loss": 2.2346, + "step": 5314500 + }, + { + "epoch": 26.33, + "learning_rate": 3.683853787349574e-05, + "loss": 2.272, + "step": 5315000 + }, + { + "epoch": 26.33, + "learning_rate": 3.6837299287069655e-05, + "loss": 2.2787, + "step": 5315500 + }, + { + "epoch": 26.34, + "learning_rate": 3.683606070064357e-05, + "loss": 2.2815, + "step": 5316000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683482211421749e-05, + "loss": 2.2951, + "step": 5316500 + }, + { + "epoch": 26.34, + "learning_rate": 3.6833583527791406e-05, + "loss": 2.2583, + "step": 5317000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683234741853817e-05, + "loss": 2.2752, + "step": 5317500 + }, + { + "epoch": 26.35, + "learning_rate": 3.6831111309284943e-05, + "loss": 2.245, + "step": 5318000 + }, + { + "epoch": 26.35, + "learning_rate": 3.682987520003171e-05, + "loss": 2.2642, + "step": 5318500 + }, + { + "epoch": 26.35, + "learning_rate": 3.6828639090778474e-05, + "loss": 2.2769, + "step": 5319000 + }, + { + "epoch": 26.35, + "learning_rate": 3.682740050435239e-05, + "loss": 2.2546, + "step": 5319500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682616191792631e-05, + "loss": 2.2726, + "step": 5320000 + }, + { + "epoch": 26.36, + "learning_rate": 3.6824923331500225e-05, + "loss": 2.247, + "step": 5320500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682368474507414e-05, + "loss": 2.2547, + "step": 5321000 + }, + { + "epoch": 26.36, + "learning_rate": 3.682244615864806e-05, + "loss": 2.2627, + "step": 5321500 + }, + { + "epoch": 26.37, + "learning_rate": 3.6821207572221976e-05, + "loss": 2.2813, + "step": 5322000 + }, + { + "epoch": 26.37, + "learning_rate": 3.681996898579589e-05, + "loss": 2.2566, + "step": 5322500 + }, + { + "epoch": 26.37, + "learning_rate": 3.681873039936981e-05, + "loss": 2.2719, + "step": 5323000 + }, + { + "epoch": 26.37, + "learning_rate": 3.681749181294373e-05, + "loss": 2.2736, + "step": 5323500 + }, + { + "epoch": 26.38, + "learning_rate": 3.6816253226517644e-05, + "loss": 2.2618, + "step": 5324000 + }, + { + "epoch": 26.38, + "learning_rate": 3.681501464009156e-05, + "loss": 2.2542, + "step": 5324500 + }, + { + "epoch": 26.38, + "learning_rate": 3.681377605366547e-05, + "loss": 2.262, + "step": 5325000 + }, + { + "epoch": 26.38, + "learning_rate": 3.6812539944412246e-05, + "loss": 2.2745, + "step": 5325500 + }, + { + "epoch": 26.39, + "learning_rate": 3.681130135798616e-05, + "loss": 2.2565, + "step": 5326000 + }, + { + "epoch": 26.39, + "learning_rate": 3.681006277156008e-05, + "loss": 2.2563, + "step": 5326500 + }, + { + "epoch": 26.39, + "learning_rate": 3.6808824185134e-05, + "loss": 2.2395, + "step": 5327000 + }, + { + "epoch": 26.39, + "learning_rate": 3.6807585598707914e-05, + "loss": 2.2644, + "step": 5327500 + }, + { + "epoch": 26.4, + "learning_rate": 3.6806347012281824e-05, + "loss": 2.2437, + "step": 5328000 + }, + { + "epoch": 26.4, + "learning_rate": 3.680510842585574e-05, + "loss": 2.2721, + "step": 5328500 + }, + { + "epoch": 26.4, + "learning_rate": 3.680386983942966e-05, + "loss": 2.273, + "step": 5329000 + }, + { + "epoch": 26.4, + "learning_rate": 3.6802631253003575e-05, + "loss": 2.2778, + "step": 5329500 + }, + { + "epoch": 26.41, + "learning_rate": 3.680139266657749e-05, + "loss": 2.2782, + "step": 5330000 + }, + { + "epoch": 26.41, + "learning_rate": 3.680015408015141e-05, + "loss": 2.2676, + "step": 5330500 + }, + { + "epoch": 26.41, + "learning_rate": 3.679891549372532e-05, + "loss": 2.2632, + "step": 5331000 + }, + { + "epoch": 26.41, + "learning_rate": 3.6797676907299236e-05, + "loss": 2.2902, + "step": 5331500 + }, + { + "epoch": 26.42, + "learning_rate": 3.679643832087315e-05, + "loss": 2.2698, + "step": 5332000 + }, + { + "epoch": 26.42, + "learning_rate": 3.679519973444707e-05, + "loss": 2.2682, + "step": 5332500 + }, + { + "epoch": 26.42, + "learning_rate": 3.6793961148020987e-05, + "loss": 2.2695, + "step": 5333000 + }, + { + "epoch": 26.42, + "learning_rate": 3.6792722561594904e-05, + "loss": 2.2699, + "step": 5333500 + }, + { + "epoch": 26.43, + "learning_rate": 3.679148645234167e-05, + "loss": 2.2628, + "step": 5334000 + }, + { + "epoch": 26.43, + "learning_rate": 3.679025034308844e-05, + "loss": 2.2823, + "step": 5334500 + }, + { + "epoch": 26.43, + "learning_rate": 3.678901175666236e-05, + "loss": 2.2675, + "step": 5335000 + }, + { + "epoch": 26.43, + "learning_rate": 3.6787773170236275e-05, + "loss": 2.2469, + "step": 5335500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678653458381019e-05, + "loss": 2.2607, + "step": 5336000 + }, + { + "epoch": 26.44, + "learning_rate": 3.678529599738411e-05, + "loss": 2.2687, + "step": 5336500 + }, + { + "epoch": 26.44, + "learning_rate": 3.6784057410958026e-05, + "loss": 2.2789, + "step": 5337000 + }, + { + "epoch": 26.44, + "learning_rate": 3.678281882453194e-05, + "loss": 2.2953, + "step": 5337500 + }, + { + "epoch": 26.45, + "learning_rate": 3.678158023810585e-05, + "loss": 2.2404, + "step": 5338000 + }, + { + "epoch": 26.45, + "learning_rate": 3.678034165167977e-05, + "loss": 2.282, + "step": 5338500 + }, + { + "epoch": 26.45, + "learning_rate": 3.677910306525369e-05, + "loss": 2.2767, + "step": 5339000 + }, + { + "epoch": 26.45, + "learning_rate": 3.6777864478827604e-05, + "loss": 2.2794, + "step": 5339500 + }, + { + "epoch": 26.46, + "learning_rate": 3.677662589240152e-05, + "loss": 2.2838, + "step": 5340000 + }, + { + "epoch": 26.46, + "learning_rate": 3.677538730597544e-05, + "loss": 2.2583, + "step": 5340500 + }, + { + "epoch": 26.46, + "learning_rate": 3.6774151196722206e-05, + "loss": 2.2644, + "step": 5341000 + }, + { + "epoch": 26.46, + "learning_rate": 3.677291261029612e-05, + "loss": 2.2895, + "step": 5341500 + }, + { + "epoch": 26.47, + "learning_rate": 3.677167402387004e-05, + "loss": 2.2884, + "step": 5342000 + }, + { + "epoch": 26.47, + "learning_rate": 3.677043543744396e-05, + "loss": 2.2567, + "step": 5342500 + }, + { + "epoch": 26.47, + "learning_rate": 3.6769196851017874e-05, + "loss": 2.2824, + "step": 5343000 + }, + { + "epoch": 26.47, + "learning_rate": 3.676796074176464e-05, + "loss": 2.2754, + "step": 5343500 + }, + { + "epoch": 26.48, + "learning_rate": 3.676672215533856e-05, + "loss": 2.2883, + "step": 5344000 + }, + { + "epoch": 26.48, + "learning_rate": 3.676548604608532e-05, + "loss": 2.2732, + "step": 5344500 + }, + { + "epoch": 26.48, + "learning_rate": 3.676424745965924e-05, + "loss": 2.2879, + "step": 5345000 + }, + { + "epoch": 26.48, + "learning_rate": 3.6763008873233156e-05, + "loss": 2.2564, + "step": 5345500 + }, + { + "epoch": 26.49, + "learning_rate": 3.676177028680707e-05, + "loss": 2.2885, + "step": 5346000 + }, + { + "epoch": 26.49, + "learning_rate": 3.676053170038099e-05, + "loss": 2.2796, + "step": 5346500 + }, + { + "epoch": 26.49, + "learning_rate": 3.6759293113954906e-05, + "loss": 2.2713, + "step": 5347000 + }, + { + "epoch": 26.49, + "learning_rate": 3.6758057004701675e-05, + "loss": 2.2911, + "step": 5347500 + }, + { + "epoch": 26.5, + "learning_rate": 3.6756823372621296e-05, + "loss": 2.2539, + "step": 5348000 + }, + { + "epoch": 26.5, + "learning_rate": 3.675558478619521e-05, + "loss": 2.2742, + "step": 5348500 + }, + { + "epoch": 26.5, + "learning_rate": 3.675434619976913e-05, + "loss": 2.246, + "step": 5349000 + }, + { + "epoch": 26.5, + "learning_rate": 3.675310761334305e-05, + "loss": 2.2775, + "step": 5349500 + }, + { + "epoch": 26.51, + "learning_rate": 3.6751871504089816e-05, + "loss": 2.2543, + "step": 5350000 + }, + { + "epoch": 26.51, + "learning_rate": 3.675063291766373e-05, + "loss": 2.2928, + "step": 5350500 + }, + { + "epoch": 26.51, + "learning_rate": 3.674939433123765e-05, + "loss": 2.2686, + "step": 5351000 + }, + { + "epoch": 26.51, + "learning_rate": 3.6748155744811566e-05, + "loss": 2.2733, + "step": 5351500 + }, + { + "epoch": 26.52, + "learning_rate": 3.674691715838548e-05, + "loss": 2.2779, + "step": 5352000 + }, + { + "epoch": 26.52, + "learning_rate": 3.6745678571959393e-05, + "loss": 2.2912, + "step": 5352500 + }, + { + "epoch": 26.52, + "learning_rate": 3.674444246270616e-05, + "loss": 2.2687, + "step": 5353000 + }, + { + "epoch": 26.52, + "learning_rate": 3.674320387628008e-05, + "loss": 2.2662, + "step": 5353500 + }, + { + "epoch": 26.53, + "learning_rate": 3.674196776702685e-05, + "loss": 2.2396, + "step": 5354000 + }, + { + "epoch": 26.53, + "learning_rate": 3.6740729180600765e-05, + "loss": 2.2742, + "step": 5354500 + }, + { + "epoch": 26.53, + "learning_rate": 3.673949059417468e-05, + "loss": 2.2846, + "step": 5355000 + }, + { + "epoch": 26.53, + "learning_rate": 3.67382520077486e-05, + "loss": 2.2667, + "step": 5355500 + }, + { + "epoch": 26.54, + "learning_rate": 3.6737013421322516e-05, + "loss": 2.2765, + "step": 5356000 + }, + { + "epoch": 26.54, + "learning_rate": 3.673577483489643e-05, + "loss": 2.251, + "step": 5356500 + }, + { + "epoch": 26.54, + "learning_rate": 3.673453624847035e-05, + "loss": 2.2586, + "step": 5357000 + }, + { + "epoch": 26.54, + "learning_rate": 3.6733297662044266e-05, + "loss": 2.2858, + "step": 5357500 + }, + { + "epoch": 26.55, + "learning_rate": 3.673205907561818e-05, + "loss": 2.2807, + "step": 5358000 + }, + { + "epoch": 26.55, + "learning_rate": 3.6730822966364945e-05, + "loss": 2.2795, + "step": 5358500 + }, + { + "epoch": 26.55, + "learning_rate": 3.672958437993886e-05, + "loss": 2.2698, + "step": 5359000 + }, + { + "epoch": 26.55, + "learning_rate": 3.672834579351278e-05, + "loss": 2.2692, + "step": 5359500 + }, + { + "epoch": 26.56, + "learning_rate": 3.6727107207086696e-05, + "loss": 2.2697, + "step": 5360000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672586862066061e-05, + "loss": 2.2799, + "step": 5360500 + }, + { + "epoch": 26.56, + "learning_rate": 3.672463003423453e-05, + "loss": 2.2751, + "step": 5361000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672339144780845e-05, + "loss": 2.2689, + "step": 5361500 + }, + { + "epoch": 26.57, + "learning_rate": 3.6722152861382364e-05, + "loss": 2.2778, + "step": 5362000 + }, + { + "epoch": 26.57, + "learning_rate": 3.672091675212913e-05, + "loss": 2.2961, + "step": 5362500 + }, + { + "epoch": 26.57, + "learning_rate": 3.671967816570305e-05, + "loss": 2.2765, + "step": 5363000 + }, + { + "epoch": 26.57, + "learning_rate": 3.6718439579276967e-05, + "loss": 2.2756, + "step": 5363500 + }, + { + "epoch": 26.58, + "learning_rate": 3.6717200992850884e-05, + "loss": 2.3013, + "step": 5364000 + }, + { + "epoch": 26.58, + "learning_rate": 3.67159624064248e-05, + "loss": 2.2892, + "step": 5364500 + }, + { + "epoch": 26.58, + "learning_rate": 3.671473125151727e-05, + "loss": 2.3011, + "step": 5365000 + }, + { + "epoch": 26.58, + "learning_rate": 3.671349266509119e-05, + "loss": 2.3148, + "step": 5365500 + }, + { + "epoch": 26.59, + "learning_rate": 3.671225407866511e-05, + "loss": 2.2778, + "step": 5366000 + }, + { + "epoch": 26.59, + "learning_rate": 3.671101549223902e-05, + "loss": 2.2993, + "step": 5366500 + }, + { + "epoch": 26.59, + "learning_rate": 3.6709776905812934e-05, + "loss": 2.2779, + "step": 5367000 + }, + { + "epoch": 26.59, + "learning_rate": 3.670853831938685e-05, + "loss": 2.2696, + "step": 5367500 + }, + { + "epoch": 26.59, + "learning_rate": 3.670729973296077e-05, + "loss": 2.2576, + "step": 5368000 + }, + { + "epoch": 26.6, + "learning_rate": 3.6706061146534685e-05, + "loss": 2.2611, + "step": 5368500 + }, + { + "epoch": 26.6, + "learning_rate": 3.6704825037281454e-05, + "loss": 2.2756, + "step": 5369000 + }, + { + "epoch": 26.6, + "learning_rate": 3.670358645085537e-05, + "loss": 2.2703, + "step": 5369500 + }, + { + "epoch": 26.6, + "learning_rate": 3.670234786442929e-05, + "loss": 2.2862, + "step": 5370000 + }, + { + "epoch": 26.61, + "learning_rate": 3.6701109278003204e-05, + "loss": 2.3082, + "step": 5370500 + }, + { + "epoch": 26.61, + "learning_rate": 3.669987069157712e-05, + "loss": 2.2528, + "step": 5371000 + }, + { + "epoch": 26.61, + "learning_rate": 3.669863210515104e-05, + "loss": 2.2901, + "step": 5371500 + }, + { + "epoch": 26.61, + "learning_rate": 3.6697393518724955e-05, + "loss": 2.2658, + "step": 5372000 + }, + { + "epoch": 26.62, + "learning_rate": 3.6696154932298865e-05, + "loss": 2.2874, + "step": 5372500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669491634587278e-05, + "loss": 2.2565, + "step": 5373000 + }, + { + "epoch": 26.62, + "learning_rate": 3.66936777594467e-05, + "loss": 2.2681, + "step": 5373500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669244412736632e-05, + "loss": 2.2731, + "step": 5374000 + }, + { + "epoch": 26.63, + "learning_rate": 3.669120554094024e-05, + "loss": 2.3088, + "step": 5374500 + }, + { + "epoch": 26.63, + "learning_rate": 3.6689966954514154e-05, + "loss": 2.2603, + "step": 5375000 + }, + { + "epoch": 26.63, + "learning_rate": 3.668872836808807e-05, + "loss": 2.2773, + "step": 5375500 + }, + { + "epoch": 26.63, + "learning_rate": 3.668748978166199e-05, + "loss": 2.2609, + "step": 5376000 + }, + { + "epoch": 26.64, + "learning_rate": 3.6686251195235904e-05, + "loss": 2.2708, + "step": 5376500 + }, + { + "epoch": 26.64, + "learning_rate": 3.668501260880982e-05, + "loss": 2.2872, + "step": 5377000 + }, + { + "epoch": 26.64, + "learning_rate": 3.668377402238374e-05, + "loss": 2.2752, + "step": 5377500 + }, + { + "epoch": 26.64, + "learning_rate": 3.6682535435957655e-05, + "loss": 2.2825, + "step": 5378000 + }, + { + "epoch": 26.65, + "learning_rate": 3.6681296849531565e-05, + "loss": 2.2562, + "step": 5378500 + }, + { + "epoch": 26.65, + "learning_rate": 3.668005826310548e-05, + "loss": 2.2711, + "step": 5379000 + }, + { + "epoch": 26.65, + "learning_rate": 3.667882215385226e-05, + "loss": 2.2773, + "step": 5379500 + }, + { + "epoch": 26.65, + "learning_rate": 3.667758356742617e-05, + "loss": 2.2731, + "step": 5380000 + }, + { + "epoch": 26.66, + "learning_rate": 3.6676344981000085e-05, + "loss": 2.253, + "step": 5380500 + }, + { + "epoch": 26.66, + "learning_rate": 3.6675106394574e-05, + "loss": 2.2824, + "step": 5381000 + }, + { + "epoch": 26.66, + "learning_rate": 3.667387028532077e-05, + "loss": 2.2933, + "step": 5381500 + }, + { + "epoch": 26.66, + "learning_rate": 3.667263417606754e-05, + "loss": 2.2788, + "step": 5382000 + }, + { + "epoch": 26.67, + "learning_rate": 3.6671395589641456e-05, + "loss": 2.2608, + "step": 5382500 + }, + { + "epoch": 26.67, + "learning_rate": 3.667015700321537e-05, + "loss": 2.2762, + "step": 5383000 + }, + { + "epoch": 26.67, + "learning_rate": 3.666891841678929e-05, + "loss": 2.2692, + "step": 5383500 + }, + { + "epoch": 26.67, + "learning_rate": 3.666767983036321e-05, + "loss": 2.2757, + "step": 5384000 + }, + { + "epoch": 26.68, + "learning_rate": 3.666644372110997e-05, + "loss": 2.2969, + "step": 5384500 + }, + { + "epoch": 26.68, + "learning_rate": 3.6665205134683886e-05, + "loss": 2.2756, + "step": 5385000 + }, + { + "epoch": 26.68, + "learning_rate": 3.66639665482578e-05, + "loss": 2.3052, + "step": 5385500 + }, + { + "epoch": 26.68, + "learning_rate": 3.666272796183172e-05, + "loss": 2.2807, + "step": 5386000 + }, + { + "epoch": 26.69, + "learning_rate": 3.666148937540564e-05, + "loss": 2.2769, + "step": 5386500 + }, + { + "epoch": 26.69, + "learning_rate": 3.6660250788979554e-05, + "loss": 2.2956, + "step": 5387000 + }, + { + "epoch": 26.69, + "learning_rate": 3.665901220255347e-05, + "loss": 2.2773, + "step": 5387500 + }, + { + "epoch": 26.69, + "learning_rate": 3.665777361612739e-05, + "loss": 2.2996, + "step": 5388000 + }, + { + "epoch": 26.7, + "learning_rate": 3.6656535029701305e-05, + "loss": 2.2919, + "step": 5388500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665529644327522e-05, + "loss": 2.2895, + "step": 5389000 + }, + { + "epoch": 26.7, + "learning_rate": 3.665406033402199e-05, + "loss": 2.2703, + "step": 5389500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665282174759591e-05, + "loss": 2.2458, + "step": 5390000 + }, + { + "epoch": 26.71, + "learning_rate": 3.6651583161169824e-05, + "loss": 2.2799, + "step": 5390500 + }, + { + "epoch": 26.71, + "learning_rate": 3.665034457474374e-05, + "loss": 2.272, + "step": 5391000 + }, + { + "epoch": 26.71, + "learning_rate": 3.664910598831766e-05, + "loss": 2.2755, + "step": 5391500 + }, + { + "epoch": 26.71, + "learning_rate": 3.6647867401891575e-05, + "loss": 2.284, + "step": 5392000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664662881546549e-05, + "loss": 2.2968, + "step": 5392500 + }, + { + "epoch": 26.72, + "learning_rate": 3.664539022903941e-05, + "loss": 2.2897, + "step": 5393000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664415164261332e-05, + "loss": 2.2828, + "step": 5393500 + }, + { + "epoch": 26.72, + "learning_rate": 3.6642913056187236e-05, + "loss": 2.2667, + "step": 5394000 + }, + { + "epoch": 26.73, + "learning_rate": 3.6641676946934005e-05, + "loss": 2.2811, + "step": 5394500 + }, + { + "epoch": 26.73, + "learning_rate": 3.6640440837680774e-05, + "loss": 2.2899, + "step": 5395000 + }, + { + "epoch": 26.73, + "learning_rate": 3.663920225125469e-05, + "loss": 2.2946, + "step": 5395500 + }, + { + "epoch": 26.73, + "learning_rate": 3.663796366482861e-05, + "loss": 2.2743, + "step": 5396000 + }, + { + "epoch": 26.74, + "learning_rate": 3.6636725078402524e-05, + "loss": 2.2719, + "step": 5396500 + }, + { + "epoch": 26.74, + "learning_rate": 3.663548649197644e-05, + "loss": 2.2736, + "step": 5397000 + }, + { + "epoch": 26.74, + "learning_rate": 3.663424790555036e-05, + "loss": 2.2547, + "step": 5397500 + }, + { + "epoch": 26.74, + "learning_rate": 3.6633009319124275e-05, + "loss": 2.2961, + "step": 5398000 + }, + { + "epoch": 26.75, + "learning_rate": 3.663177073269819e-05, + "loss": 2.2816, + "step": 5398500 + }, + { + "epoch": 26.75, + "learning_rate": 3.663053214627211e-05, + "loss": 2.2523, + "step": 5399000 + }, + { + "epoch": 26.75, + "learning_rate": 3.662929603701887e-05, + "loss": 2.2643, + "step": 5399500 + }, + { + "epoch": 26.75, + "learning_rate": 3.662805745059279e-05, + "loss": 2.2791, + "step": 5400000 + }, + { + "epoch": 26.76, + "learning_rate": 3.6626818864166705e-05, + "loss": 2.2853, + "step": 5400500 + }, + { + "epoch": 26.76, + "learning_rate": 3.662558027774062e-05, + "loss": 2.282, + "step": 5401000 + }, + { + "epoch": 26.76, + "learning_rate": 3.662434169131454e-05, + "loss": 2.2665, + "step": 5401500 + }, + { + "epoch": 26.76, + "learning_rate": 3.662310558206131e-05, + "loss": 2.2855, + "step": 5402000 + }, + { + "epoch": 26.77, + "learning_rate": 3.6621866995635225e-05, + "loss": 2.2654, + "step": 5402500 + }, + { + "epoch": 26.77, + "learning_rate": 3.662062840920914e-05, + "loss": 2.2647, + "step": 5403000 + }, + { + "epoch": 26.77, + "learning_rate": 3.661938982278306e-05, + "loss": 2.2608, + "step": 5403500 + }, + { + "epoch": 26.77, + "learning_rate": 3.661815371352982e-05, + "loss": 2.2639, + "step": 5404000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661691512710374e-05, + "loss": 2.274, + "step": 5404500 + }, + { + "epoch": 26.78, + "learning_rate": 3.6615676540677654e-05, + "loss": 2.2579, + "step": 5405000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661443795425157e-05, + "loss": 2.2698, + "step": 5405500 + }, + { + "epoch": 26.78, + "learning_rate": 3.661319936782549e-05, + "loss": 2.2665, + "step": 5406000 + }, + { + "epoch": 26.79, + "learning_rate": 3.6611960781399405e-05, + "loss": 2.2833, + "step": 5406500 + }, + { + "epoch": 26.79, + "learning_rate": 3.6610724672146174e-05, + "loss": 2.2807, + "step": 5407000 + }, + { + "epoch": 26.79, + "learning_rate": 3.660948608572009e-05, + "loss": 2.2814, + "step": 5407500 + }, + { + "epoch": 26.79, + "learning_rate": 3.660824749929401e-05, + "loss": 2.2917, + "step": 5408000 + }, + { + "epoch": 26.8, + "learning_rate": 3.6607008912867925e-05, + "loss": 2.2824, + "step": 5408500 + }, + { + "epoch": 26.8, + "learning_rate": 3.660577032644184e-05, + "loss": 2.2577, + "step": 5409000 + }, + { + "epoch": 26.8, + "learning_rate": 3.660453174001576e-05, + "loss": 2.2902, + "step": 5409500 + }, + { + "epoch": 26.8, + "learning_rate": 3.6603293153589675e-05, + "loss": 2.2665, + "step": 5410000 + }, + { + "epoch": 26.81, + "learning_rate": 3.660205456716359e-05, + "loss": 2.2558, + "step": 5410500 + }, + { + "epoch": 26.81, + "learning_rate": 3.660081598073751e-05, + "loss": 2.291, + "step": 5411000 + }, + { + "epoch": 26.81, + "learning_rate": 3.6599577394311426e-05, + "loss": 2.2716, + "step": 5411500 + }, + { + "epoch": 26.81, + "learning_rate": 3.659833880788534e-05, + "loss": 2.286, + "step": 5412000 + }, + { + "epoch": 26.82, + "learning_rate": 3.6597102698632105e-05, + "loss": 2.2877, + "step": 5412500 + }, + { + "epoch": 26.82, + "learning_rate": 3.659586411220602e-05, + "loss": 2.272, + "step": 5413000 + }, + { + "epoch": 26.82, + "learning_rate": 3.659462552577994e-05, + "loss": 2.2849, + "step": 5413500 + }, + { + "epoch": 26.82, + "learning_rate": 3.6593386939353856e-05, + "loss": 2.2769, + "step": 5414000 + }, + { + "epoch": 26.83, + "learning_rate": 3.659214835292777e-05, + "loss": 2.2784, + "step": 5414500 + }, + { + "epoch": 26.83, + "learning_rate": 3.659090976650169e-05, + "loss": 2.2649, + "step": 5415000 + }, + { + "epoch": 26.83, + "learning_rate": 3.658967365724846e-05, + "loss": 2.2982, + "step": 5415500 + }, + { + "epoch": 26.83, + "learning_rate": 3.658843754799523e-05, + "loss": 2.2614, + "step": 5416000 + }, + { + "epoch": 26.84, + "learning_rate": 3.6587198961569144e-05, + "loss": 2.3042, + "step": 5416500 + }, + { + "epoch": 26.84, + "learning_rate": 3.658596037514306e-05, + "loss": 2.3011, + "step": 5417000 + }, + { + "epoch": 26.84, + "learning_rate": 3.658472178871697e-05, + "loss": 2.2909, + "step": 5417500 + }, + { + "epoch": 26.84, + "learning_rate": 3.658348320229089e-05, + "loss": 2.2738, + "step": 5418000 + }, + { + "epoch": 26.85, + "learning_rate": 3.6582244615864805e-05, + "loss": 2.2702, + "step": 5418500 + }, + { + "epoch": 26.85, + "learning_rate": 3.658100602943872e-05, + "loss": 2.2858, + "step": 5419000 + }, + { + "epoch": 26.85, + "learning_rate": 3.657976744301264e-05, + "loss": 2.2621, + "step": 5419500 + }, + { + "epoch": 26.85, + "learning_rate": 3.6578528856586556e-05, + "loss": 2.2872, + "step": 5420000 + }, + { + "epoch": 26.86, + "learning_rate": 3.657729027016047e-05, + "loss": 2.2968, + "step": 5420500 + }, + { + "epoch": 26.86, + "learning_rate": 3.657605416090724e-05, + "loss": 2.2733, + "step": 5421000 + }, + { + "epoch": 26.86, + "learning_rate": 3.657481557448116e-05, + "loss": 2.2552, + "step": 5421500 + }, + { + "epoch": 26.86, + "learning_rate": 3.6573576988055076e-05, + "loss": 2.2804, + "step": 5422000 + }, + { + "epoch": 26.86, + "learning_rate": 3.657233840162899e-05, + "loss": 2.2929, + "step": 5422500 + }, + { + "epoch": 26.87, + "learning_rate": 3.657110229237576e-05, + "loss": 2.2556, + "step": 5423000 + }, + { + "epoch": 26.87, + "learning_rate": 3.656986370594968e-05, + "loss": 2.2761, + "step": 5423500 + }, + { + "epoch": 26.87, + "learning_rate": 3.656862511952359e-05, + "loss": 2.289, + "step": 5424000 + }, + { + "epoch": 26.87, + "learning_rate": 3.6567386533097505e-05, + "loss": 2.2732, + "step": 5424500 + }, + { + "epoch": 26.88, + "learning_rate": 3.6566150423844274e-05, + "loss": 2.3058, + "step": 5425000 + }, + { + "epoch": 26.88, + "learning_rate": 3.656491183741819e-05, + "loss": 2.2575, + "step": 5425500 + }, + { + "epoch": 26.88, + "learning_rate": 3.656367325099211e-05, + "loss": 2.2856, + "step": 5426000 + }, + { + "epoch": 26.88, + "learning_rate": 3.6562434664566025e-05, + "loss": 2.2945, + "step": 5426500 + }, + { + "epoch": 26.89, + "learning_rate": 3.656119607813994e-05, + "loss": 2.2855, + "step": 5427000 + }, + { + "epoch": 26.89, + "learning_rate": 3.655995749171386e-05, + "loss": 2.2709, + "step": 5427500 + }, + { + "epoch": 26.89, + "learning_rate": 3.655872138246063e-05, + "loss": 2.2528, + "step": 5428000 + }, + { + "epoch": 26.89, + "learning_rate": 3.6557485273207397e-05, + "loss": 2.2848, + "step": 5428500 + }, + { + "epoch": 26.9, + "learning_rate": 3.6556246686781313e-05, + "loss": 2.2729, + "step": 5429000 + }, + { + "epoch": 26.9, + "learning_rate": 3.6555008100355224e-05, + "loss": 2.3043, + "step": 5429500 + }, + { + "epoch": 26.9, + "learning_rate": 3.655376951392914e-05, + "loss": 2.2512, + "step": 5430000 + }, + { + "epoch": 26.9, + "learning_rate": 3.655253092750306e-05, + "loss": 2.2953, + "step": 5430500 + }, + { + "epoch": 26.91, + "learning_rate": 3.6551292341076974e-05, + "loss": 2.2616, + "step": 5431000 + }, + { + "epoch": 26.91, + "learning_rate": 3.655005375465089e-05, + "loss": 2.2824, + "step": 5431500 + }, + { + "epoch": 26.91, + "learning_rate": 3.654881516822481e-05, + "loss": 2.2861, + "step": 5432000 + }, + { + "epoch": 26.91, + "learning_rate": 3.6547576581798725e-05, + "loss": 2.2852, + "step": 5432500 + }, + { + "epoch": 26.92, + "learning_rate": 3.654633799537264e-05, + "loss": 2.2819, + "step": 5433000 + }, + { + "epoch": 26.92, + "learning_rate": 3.654509940894656e-05, + "loss": 2.2683, + "step": 5433500 + }, + { + "epoch": 26.92, + "learning_rate": 3.6543860822520476e-05, + "loss": 2.2694, + "step": 5434000 + }, + { + "epoch": 26.92, + "learning_rate": 3.654262223609439e-05, + "loss": 2.2698, + "step": 5434500 + }, + { + "epoch": 26.93, + "learning_rate": 3.654138364966831e-05, + "loss": 2.2743, + "step": 5435000 + }, + { + "epoch": 26.93, + "learning_rate": 3.654014506324223e-05, + "loss": 2.2672, + "step": 5435500 + }, + { + "epoch": 26.93, + "learning_rate": 3.6538906476816144e-05, + "loss": 2.2739, + "step": 5436000 + }, + { + "epoch": 26.93, + "learning_rate": 3.653767036756291e-05, + "loss": 2.2586, + "step": 5436500 + }, + { + "epoch": 26.94, + "learning_rate": 3.653643178113683e-05, + "loss": 2.29, + "step": 5437000 + }, + { + "epoch": 26.94, + "learning_rate": 3.653519319471074e-05, + "loss": 2.2828, + "step": 5437500 + }, + { + "epoch": 26.94, + "learning_rate": 3.6533954608284656e-05, + "loss": 2.2699, + "step": 5438000 + }, + { + "epoch": 26.94, + "learning_rate": 3.6532716021858573e-05, + "loss": 2.2833, + "step": 5438500 + }, + { + "epoch": 26.95, + "learning_rate": 3.653147743543249e-05, + "loss": 2.2968, + "step": 5439000 + }, + { + "epoch": 26.95, + "learning_rate": 3.653023884900641e-05, + "loss": 2.2807, + "step": 5439500 + }, + { + "epoch": 26.95, + "learning_rate": 3.6529000262580324e-05, + "loss": 2.2711, + "step": 5440000 + }, + { + "epoch": 26.95, + "learning_rate": 3.652776167615424e-05, + "loss": 2.2559, + "step": 5440500 + }, + { + "epoch": 26.96, + "learning_rate": 3.652652308972815e-05, + "loss": 2.2752, + "step": 5441000 + }, + { + "epoch": 26.96, + "learning_rate": 3.652528450330207e-05, + "loss": 2.2879, + "step": 5441500 + }, + { + "epoch": 26.96, + "learning_rate": 3.6524050871221696e-05, + "loss": 2.2852, + "step": 5442000 + }, + { + "epoch": 26.96, + "learning_rate": 3.652281228479561e-05, + "loss": 2.2834, + "step": 5442500 + }, + { + "epoch": 26.97, + "learning_rate": 3.652157369836953e-05, + "loss": 2.2683, + "step": 5443000 + }, + { + "epoch": 26.97, + "learning_rate": 3.6520335111943446e-05, + "loss": 2.2996, + "step": 5443500 + }, + { + "epoch": 26.97, + "learning_rate": 3.651909652551736e-05, + "loss": 2.2578, + "step": 5444000 + }, + { + "epoch": 26.97, + "learning_rate": 3.6517857939091274e-05, + "loss": 2.2798, + "step": 5444500 + }, + { + "epoch": 26.98, + "learning_rate": 3.651661935266519e-05, + "loss": 2.287, + "step": 5445000 + }, + { + "epoch": 26.98, + "learning_rate": 3.651538076623911e-05, + "loss": 2.2635, + "step": 5445500 + }, + { + "epoch": 26.98, + "learning_rate": 3.6514144656985876e-05, + "loss": 2.2767, + "step": 5446000 + }, + { + "epoch": 26.98, + "learning_rate": 3.651290607055979e-05, + "loss": 2.2714, + "step": 5446500 + }, + { + "epoch": 26.99, + "learning_rate": 3.651166748413371e-05, + "loss": 2.2901, + "step": 5447000 + }, + { + "epoch": 26.99, + "learning_rate": 3.651042889770763e-05, + "loss": 2.2857, + "step": 5447500 + }, + { + "epoch": 26.99, + "learning_rate": 3.6509190311281544e-05, + "loss": 2.2839, + "step": 5448000 + }, + { + "epoch": 26.99, + "learning_rate": 3.650795172485546e-05, + "loss": 2.2809, + "step": 5448500 + }, + { + "epoch": 27.0, + "learning_rate": 3.650671313842938e-05, + "loss": 2.2747, + "step": 5449000 + }, + { + "epoch": 27.0, + "learning_rate": 3.6505474552003295e-05, + "loss": 2.291, + "step": 5449500 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.6562619972139041, + "eval_accuracy_mlm": 0.6113849397655421, + "eval_accuracy_nsp": 0.8681356610278516, + "eval_loss": 2.3292908668518066, + "eval_runtime": 146.2467, + "eval_samples_per_second": 1743.349, + "eval_steps_per_second": 72.644, + "step": 5449761 + }, + { + "epoch": 27.0, + "learning_rate": 3.650423596557721e-05, + "loss": 2.2555, + "step": 5450000 + }, + { + "epoch": 27.0, + "learning_rate": 3.650299985632398e-05, + "loss": 2.257, + "step": 5450500 + }, + { + "epoch": 27.01, + "learning_rate": 3.6501766224243594e-05, + "loss": 2.2329, + "step": 5451000 + }, + { + "epoch": 27.01, + "learning_rate": 3.650052763781751e-05, + "loss": 2.2794, + "step": 5451500 + }, + { + "epoch": 27.01, + "learning_rate": 3.649928905139143e-05, + "loss": 2.2362, + "step": 5452000 + }, + { + "epoch": 27.01, + "learning_rate": 3.6498050464965345e-05, + "loss": 2.2553, + "step": 5452500 + }, + { + "epoch": 27.02, + "learning_rate": 3.649681187853926e-05, + "loss": 2.2592, + "step": 5453000 + }, + { + "epoch": 27.02, + "learning_rate": 3.649557329211318e-05, + "loss": 2.2158, + "step": 5453500 + }, + { + "epoch": 27.02, + "learning_rate": 3.6494334705687096e-05, + "loss": 2.2312, + "step": 5454000 + }, + { + "epoch": 27.02, + "learning_rate": 3.649309611926101e-05, + "loss": 2.2288, + "step": 5454500 + }, + { + "epoch": 27.03, + "learning_rate": 3.649185753283493e-05, + "loss": 2.2151, + "step": 5455000 + }, + { + "epoch": 27.03, + "learning_rate": 3.649061894640885e-05, + "loss": 2.2561, + "step": 5455500 + }, + { + "epoch": 27.03, + "learning_rate": 3.6489380359982764e-05, + "loss": 2.2237, + "step": 5456000 + }, + { + "epoch": 27.03, + "learning_rate": 3.648814177355668e-05, + "loss": 2.2344, + "step": 5456500 + }, + { + "epoch": 27.04, + "learning_rate": 3.64869031871306e-05, + "loss": 2.2628, + "step": 5457000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648566707787736e-05, + "loss": 2.2354, + "step": 5457500 + }, + { + "epoch": 27.04, + "learning_rate": 3.6484428491451276e-05, + "loss": 2.2272, + "step": 5458000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648318990502519e-05, + "loss": 2.2534, + "step": 5458500 + }, + { + "epoch": 27.05, + "learning_rate": 3.648195131859911e-05, + "loss": 2.2329, + "step": 5459000 + }, + { + "epoch": 27.05, + "learning_rate": 3.648071273217303e-05, + "loss": 2.2415, + "step": 5459500 + }, + { + "epoch": 27.05, + "learning_rate": 3.6479474145746944e-05, + "loss": 2.2537, + "step": 5460000 + }, + { + "epoch": 27.05, + "learning_rate": 3.647823803649371e-05, + "loss": 2.2636, + "step": 5460500 + }, + { + "epoch": 27.06, + "learning_rate": 3.647699945006763e-05, + "loss": 2.2439, + "step": 5461000 + }, + { + "epoch": 27.06, + "learning_rate": 3.647576086364155e-05, + "loss": 2.264, + "step": 5461500 + }, + { + "epoch": 27.06, + "learning_rate": 3.6474522277215464e-05, + "loss": 2.2518, + "step": 5462000 + }, + { + "epoch": 27.06, + "learning_rate": 3.6473286167962226e-05, + "loss": 2.2465, + "step": 5462500 + }, + { + "epoch": 27.07, + "learning_rate": 3.647204758153614e-05, + "loss": 2.246, + "step": 5463000 + }, + { + "epoch": 27.07, + "learning_rate": 3.647080899511006e-05, + "loss": 2.2395, + "step": 5463500 + }, + { + "epoch": 27.07, + "learning_rate": 3.6469570408683977e-05, + "loss": 2.2537, + "step": 5464000 + }, + { + "epoch": 27.07, + "learning_rate": 3.6468331822257893e-05, + "loss": 2.2474, + "step": 5464500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646709323583181e-05, + "loss": 2.2647, + "step": 5465000 + }, + { + "epoch": 27.08, + "learning_rate": 3.646585712657858e-05, + "loss": 2.2496, + "step": 5465500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646462101732535e-05, + "loss": 2.2495, + "step": 5466000 + }, + { + "epoch": 27.08, + "learning_rate": 3.6463382430899265e-05, + "loss": 2.2468, + "step": 5466500 + }, + { + "epoch": 27.09, + "learning_rate": 3.646214384447318e-05, + "loss": 2.2772, + "step": 5467000 + }, + { + "epoch": 27.09, + "learning_rate": 3.64609052580471e-05, + "loss": 2.2372, + "step": 5467500 + }, + { + "epoch": 27.09, + "learning_rate": 3.645966914879387e-05, + "loss": 2.2775, + "step": 5468000 + }, + { + "epoch": 27.09, + "learning_rate": 3.6458430562367785e-05, + "loss": 2.2419, + "step": 5468500 + }, + { + "epoch": 27.1, + "learning_rate": 3.64571919759417e-05, + "loss": 2.2398, + "step": 5469000 + }, + { + "epoch": 27.1, + "learning_rate": 3.645595586668847e-05, + "loss": 2.2505, + "step": 5469500 + }, + { + "epoch": 27.1, + "learning_rate": 3.645471728026239e-05, + "loss": 2.259, + "step": 5470000 + }, + { + "epoch": 27.1, + "learning_rate": 3.6453478693836304e-05, + "loss": 2.2549, + "step": 5470500 + }, + { + "epoch": 27.11, + "learning_rate": 3.645224010741022e-05, + "loss": 2.256, + "step": 5471000 + }, + { + "epoch": 27.11, + "learning_rate": 3.645100152098414e-05, + "loss": 2.2535, + "step": 5471500 + }, + { + "epoch": 27.11, + "learning_rate": 3.6449762934558055e-05, + "loss": 2.2698, + "step": 5472000 + }, + { + "epoch": 27.11, + "learning_rate": 3.6448524348131965e-05, + "loss": 2.2286, + "step": 5472500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644728576170588e-05, + "loss": 2.2582, + "step": 5473000 + }, + { + "epoch": 27.12, + "learning_rate": 3.64460471752798e-05, + "loss": 2.2486, + "step": 5473500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644481106602657e-05, + "loss": 2.261, + "step": 5474000 + }, + { + "epoch": 27.12, + "learning_rate": 3.6443572479600485e-05, + "loss": 2.2363, + "step": 5474500 + }, + { + "epoch": 27.13, + "learning_rate": 3.64423338931744e-05, + "loss": 2.2753, + "step": 5475000 + }, + { + "epoch": 27.13, + "learning_rate": 3.644109530674832e-05, + "loss": 2.2233, + "step": 5475500 + }, + { + "epoch": 27.13, + "learning_rate": 3.643985672032223e-05, + "loss": 2.2671, + "step": 5476000 + }, + { + "epoch": 27.13, + "learning_rate": 3.6438618133896146e-05, + "loss": 2.2589, + "step": 5476500 + }, + { + "epoch": 27.13, + "learning_rate": 3.643737954747006e-05, + "loss": 2.2706, + "step": 5477000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643614096104398e-05, + "loss": 2.247, + "step": 5477500 + }, + { + "epoch": 27.14, + "learning_rate": 3.6434902374617896e-05, + "loss": 2.2405, + "step": 5478000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643366378819181e-05, + "loss": 2.2682, + "step": 5478500 + }, + { + "epoch": 27.14, + "learning_rate": 3.643242520176573e-05, + "loss": 2.2618, + "step": 5479000 + }, + { + "epoch": 27.15, + "learning_rate": 3.643118661533965e-05, + "loss": 2.2553, + "step": 5479500 + }, + { + "epoch": 27.15, + "learning_rate": 3.6429948028913564e-05, + "loss": 2.2585, + "step": 5480000 + }, + { + "epoch": 27.15, + "learning_rate": 3.642870944248748e-05, + "loss": 2.2591, + "step": 5480500 + }, + { + "epoch": 27.15, + "learning_rate": 3.642747333323425e-05, + "loss": 2.2141, + "step": 5481000 + }, + { + "epoch": 27.16, + "learning_rate": 3.642623474680816e-05, + "loss": 2.2618, + "step": 5481500 + }, + { + "epoch": 27.16, + "learning_rate": 3.642499616038208e-05, + "loss": 2.2731, + "step": 5482000 + }, + { + "epoch": 27.16, + "learning_rate": 3.6423757573955994e-05, + "loss": 2.2593, + "step": 5482500 + }, + { + "epoch": 27.16, + "learning_rate": 3.642252146470276e-05, + "loss": 2.2595, + "step": 5483000 + }, + { + "epoch": 27.17, + "learning_rate": 3.642128287827668e-05, + "loss": 2.2376, + "step": 5483500 + }, + { + "epoch": 27.17, + "learning_rate": 3.6420044291850597e-05, + "loss": 2.2437, + "step": 5484000 + }, + { + "epoch": 27.17, + "learning_rate": 3.6418805705424513e-05, + "loss": 2.2427, + "step": 5484500 + }, + { + "epoch": 27.17, + "learning_rate": 3.641756711899843e-05, + "loss": 2.2614, + "step": 5485000 + }, + { + "epoch": 27.18, + "learning_rate": 3.641633348691805e-05, + "loss": 2.2662, + "step": 5485500 + }, + { + "epoch": 27.18, + "learning_rate": 3.641509490049197e-05, + "loss": 2.232, + "step": 5486000 + }, + { + "epoch": 27.18, + "learning_rate": 3.6413856314065885e-05, + "loss": 2.2682, + "step": 5486500 + }, + { + "epoch": 27.18, + "learning_rate": 3.64126177276398e-05, + "loss": 2.2515, + "step": 5487000 + }, + { + "epoch": 27.19, + "learning_rate": 3.641138161838657e-05, + "loss": 2.2346, + "step": 5487500 + }, + { + "epoch": 27.19, + "learning_rate": 3.641014303196049e-05, + "loss": 2.2527, + "step": 5488000 + }, + { + "epoch": 27.19, + "learning_rate": 3.6408904445534405e-05, + "loss": 2.2485, + "step": 5488500 + }, + { + "epoch": 27.19, + "learning_rate": 3.640766585910832e-05, + "loss": 2.2585, + "step": 5489000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640642727268224e-05, + "loss": 2.2364, + "step": 5489500 + }, + { + "epoch": 27.2, + "learning_rate": 3.6405188686256155e-05, + "loss": 2.257, + "step": 5490000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640395009983007e-05, + "loss": 2.2554, + "step": 5490500 + }, + { + "epoch": 27.2, + "learning_rate": 3.640271151340399e-05, + "loss": 2.2422, + "step": 5491000 + }, + { + "epoch": 27.21, + "learning_rate": 3.6401472926977906e-05, + "loss": 2.2549, + "step": 5491500 + }, + { + "epoch": 27.21, + "learning_rate": 3.640023681772467e-05, + "loss": 2.2699, + "step": 5492000 + }, + { + "epoch": 27.21, + "learning_rate": 3.6398998231298585e-05, + "loss": 2.266, + "step": 5492500 + }, + { + "epoch": 27.21, + "learning_rate": 3.63977596448725e-05, + "loss": 2.2627, + "step": 5493000 + }, + { + "epoch": 27.22, + "learning_rate": 3.639652105844642e-05, + "loss": 2.2377, + "step": 5493500 + }, + { + "epoch": 27.22, + "learning_rate": 3.6395282472020336e-05, + "loss": 2.2849, + "step": 5494000 + }, + { + "epoch": 27.22, + "learning_rate": 3.6394046362767105e-05, + "loss": 2.2537, + "step": 5494500 + }, + { + "epoch": 27.22, + "learning_rate": 3.6392810253513873e-05, + "loss": 2.2768, + "step": 5495000 + }, + { + "epoch": 27.23, + "learning_rate": 3.639157166708779e-05, + "loss": 2.2535, + "step": 5495500 + }, + { + "epoch": 27.23, + "learning_rate": 3.63903330806617e-05, + "loss": 2.2404, + "step": 5496000 + }, + { + "epoch": 27.23, + "learning_rate": 3.638909697140847e-05, + "loss": 2.2792, + "step": 5496500 + }, + { + "epoch": 27.23, + "learning_rate": 3.6387858384982386e-05, + "loss": 2.2611, + "step": 5497000 + }, + { + "epoch": 27.24, + "learning_rate": 3.63866197985563e-05, + "loss": 2.2887, + "step": 5497500 + }, + { + "epoch": 27.24, + "learning_rate": 3.638538121213022e-05, + "loss": 2.2696, + "step": 5498000 + }, + { + "epoch": 27.24, + "learning_rate": 3.638414262570414e-05, + "loss": 2.2436, + "step": 5498500 + }, + { + "epoch": 27.24, + "learning_rate": 3.6382904039278054e-05, + "loss": 2.2483, + "step": 5499000 + }, + { + "epoch": 27.25, + "learning_rate": 3.638166545285197e-05, + "loss": 2.2444, + "step": 5499500 + }, + { + "epoch": 27.25, + "learning_rate": 3.638042686642589e-05, + "loss": 2.2416, + "step": 5500000 + }, + { + "epoch": 27.25, + "learning_rate": 3.637919075717266e-05, + "loss": 2.2547, + "step": 5500500 + }, + { + "epoch": 27.25, + "learning_rate": 3.6377952170746574e-05, + "loss": 2.2881, + "step": 5501000 + }, + { + "epoch": 27.26, + "learning_rate": 3.637671358432049e-05, + "loss": 2.2535, + "step": 5501500 + }, + { + "epoch": 27.26, + "learning_rate": 3.637547747506725e-05, + "loss": 2.2535, + "step": 5502000 + }, + { + "epoch": 27.26, + "learning_rate": 3.637423888864117e-05, + "loss": 2.2756, + "step": 5502500 + }, + { + "epoch": 27.26, + "learning_rate": 3.6373000302215086e-05, + "loss": 2.2454, + "step": 5503000 + }, + { + "epoch": 27.27, + "learning_rate": 3.6371761715789e-05, + "loss": 2.2552, + "step": 5503500 + }, + { + "epoch": 27.27, + "learning_rate": 3.637052312936292e-05, + "loss": 2.2726, + "step": 5504000 + }, + { + "epoch": 27.27, + "learning_rate": 3.636928454293684e-05, + "loss": 2.2531, + "step": 5504500 + }, + { + "epoch": 27.27, + "learning_rate": 3.6368045956510754e-05, + "loss": 2.2683, + "step": 5505000 + }, + { + "epoch": 27.28, + "learning_rate": 3.636680737008467e-05, + "loss": 2.2521, + "step": 5505500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636556878365859e-05, + "loss": 2.25, + "step": 5506000 + }, + { + "epoch": 27.28, + "learning_rate": 3.6364330197232505e-05, + "loss": 2.2742, + "step": 5506500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636309161080642e-05, + "loss": 2.261, + "step": 5507000 + }, + { + "epoch": 27.29, + "learning_rate": 3.636185302438034e-05, + "loss": 2.2644, + "step": 5507500 + }, + { + "epoch": 27.29, + "learning_rate": 3.636061691512711e-05, + "loss": 2.2741, + "step": 5508000 + }, + { + "epoch": 27.29, + "learning_rate": 3.6359378328701024e-05, + "loss": 2.2615, + "step": 5508500 + }, + { + "epoch": 27.29, + "learning_rate": 3.6358142219447787e-05, + "loss": 2.235, + "step": 5509000 + }, + { + "epoch": 27.3, + "learning_rate": 3.6356903633021703e-05, + "loss": 2.2733, + "step": 5509500 + }, + { + "epoch": 27.3, + "learning_rate": 3.635566504659562e-05, + "loss": 2.254, + "step": 5510000 + }, + { + "epoch": 27.3, + "learning_rate": 3.635442646016954e-05, + "loss": 2.2436, + "step": 5510500 + }, + { + "epoch": 27.3, + "learning_rate": 3.6353187873743454e-05, + "loss": 2.2736, + "step": 5511000 + }, + { + "epoch": 27.31, + "learning_rate": 3.635194928731737e-05, + "loss": 2.2374, + "step": 5511500 + }, + { + "epoch": 27.31, + "learning_rate": 3.635071070089129e-05, + "loss": 2.2574, + "step": 5512000 + }, + { + "epoch": 27.31, + "learning_rate": 3.6349472114465205e-05, + "loss": 2.253, + "step": 5512500 + }, + { + "epoch": 27.31, + "learning_rate": 3.634823352803912e-05, + "loss": 2.2539, + "step": 5513000 + }, + { + "epoch": 27.32, + "learning_rate": 3.634699494161304e-05, + "loss": 2.2727, + "step": 5513500 + }, + { + "epoch": 27.32, + "learning_rate": 3.6345756355186956e-05, + "loss": 2.2654, + "step": 5514000 + }, + { + "epoch": 27.32, + "learning_rate": 3.634451776876087e-05, + "loss": 2.2546, + "step": 5514500 + }, + { + "epoch": 27.32, + "learning_rate": 3.634327918233479e-05, + "loss": 2.2389, + "step": 5515000 + }, + { + "epoch": 27.33, + "learning_rate": 3.6342040595908707e-05, + "loss": 2.2643, + "step": 5515500 + }, + { + "epoch": 27.33, + "learning_rate": 3.6340802009482624e-05, + "loss": 2.2547, + "step": 5516000 + }, + { + "epoch": 27.33, + "learning_rate": 3.633956342305654e-05, + "loss": 2.2357, + "step": 5516500 + }, + { + "epoch": 27.33, + "learning_rate": 3.633832483663045e-05, + "loss": 2.2576, + "step": 5517000 + }, + { + "epoch": 27.34, + "learning_rate": 3.633708625020437e-05, + "loss": 2.2713, + "step": 5517500 + }, + { + "epoch": 27.34, + "learning_rate": 3.633585261812399e-05, + "loss": 2.2306, + "step": 5518000 + }, + { + "epoch": 27.34, + "learning_rate": 3.6334614031697905e-05, + "loss": 2.2457, + "step": 5518500 + }, + { + "epoch": 27.34, + "learning_rate": 3.633337544527182e-05, + "loss": 2.2603, + "step": 5519000 + }, + { + "epoch": 27.35, + "learning_rate": 3.633213685884574e-05, + "loss": 2.2669, + "step": 5519500 + }, + { + "epoch": 27.35, + "learning_rate": 3.6330898272419656e-05, + "loss": 2.2569, + "step": 5520000 + }, + { + "epoch": 27.35, + "learning_rate": 3.632965968599357e-05, + "loss": 2.2568, + "step": 5520500 + }, + { + "epoch": 27.35, + "learning_rate": 3.632842109956749e-05, + "loss": 2.2558, + "step": 5521000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632718251314141e-05, + "loss": 2.2727, + "step": 5521500 + }, + { + "epoch": 27.36, + "learning_rate": 3.6325943926715324e-05, + "loss": 2.2767, + "step": 5522000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632470534028924e-05, + "loss": 2.2559, + "step": 5522500 + }, + { + "epoch": 27.36, + "learning_rate": 3.632346675386315e-05, + "loss": 2.2577, + "step": 5523000 + }, + { + "epoch": 27.37, + "learning_rate": 3.632223064460992e-05, + "loss": 2.2861, + "step": 5523500 + }, + { + "epoch": 27.37, + "learning_rate": 3.6320992058183836e-05, + "loss": 2.256, + "step": 5524000 + }, + { + "epoch": 27.37, + "learning_rate": 3.6319753471757753e-05, + "loss": 2.2271, + "step": 5524500 + }, + { + "epoch": 27.37, + "learning_rate": 3.631851488533167e-05, + "loss": 2.261, + "step": 5525000 + }, + { + "epoch": 27.38, + "learning_rate": 3.631727629890559e-05, + "loss": 2.245, + "step": 5525500 + }, + { + "epoch": 27.38, + "learning_rate": 3.63160377124795e-05, + "loss": 2.2664, + "step": 5526000 + }, + { + "epoch": 27.38, + "learning_rate": 3.6314799126053414e-05, + "loss": 2.2493, + "step": 5526500 + }, + { + "epoch": 27.38, + "learning_rate": 3.631356301680019e-05, + "loss": 2.2382, + "step": 5527000 + }, + { + "epoch": 27.39, + "learning_rate": 3.631232443037411e-05, + "loss": 2.252, + "step": 5527500 + }, + { + "epoch": 27.39, + "learning_rate": 3.6311085843948024e-05, + "loss": 2.2755, + "step": 5528000 + }, + { + "epoch": 27.39, + "learning_rate": 3.630984725752194e-05, + "loss": 2.2891, + "step": 5528500 + }, + { + "epoch": 27.39, + "learning_rate": 3.630861114826871e-05, + "loss": 2.2629, + "step": 5529000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630737256184262e-05, + "loss": 2.2718, + "step": 5529500 + }, + { + "epoch": 27.4, + "learning_rate": 3.630613892976224e-05, + "loss": 2.2516, + "step": 5530000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630490034333616e-05, + "loss": 2.2641, + "step": 5530500 + }, + { + "epoch": 27.4, + "learning_rate": 3.6303661756910074e-05, + "loss": 2.2786, + "step": 5531000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630242317048399e-05, + "loss": 2.2593, + "step": 5531500 + }, + { + "epoch": 27.41, + "learning_rate": 3.630118458405791e-05, + "loss": 2.2493, + "step": 5532000 + }, + { + "epoch": 27.41, + "learning_rate": 3.6299945997631825e-05, + "loss": 2.2607, + "step": 5532500 + }, + { + "epoch": 27.41, + "learning_rate": 3.629870741120574e-05, + "loss": 2.2367, + "step": 5533000 + }, + { + "epoch": 27.41, + "learning_rate": 3.629746882477966e-05, + "loss": 2.2638, + "step": 5533500 + }, + { + "epoch": 27.42, + "learning_rate": 3.6296230238353576e-05, + "loss": 2.2509, + "step": 5534000 + }, + { + "epoch": 27.42, + "learning_rate": 3.629499165192749e-05, + "loss": 2.2706, + "step": 5534500 + }, + { + "epoch": 27.42, + "learning_rate": 3.6293755542674255e-05, + "loss": 2.2787, + "step": 5535000 + }, + { + "epoch": 27.42, + "learning_rate": 3.629251695624817e-05, + "loss": 2.254, + "step": 5535500 + }, + { + "epoch": 27.43, + "learning_rate": 3.629127836982209e-05, + "loss": 2.2543, + "step": 5536000 + }, + { + "epoch": 27.43, + "learning_rate": 3.629004226056886e-05, + "loss": 2.2467, + "step": 5536500 + }, + { + "epoch": 27.43, + "learning_rate": 3.6288803674142774e-05, + "loss": 2.2442, + "step": 5537000 + }, + { + "epoch": 27.43, + "learning_rate": 3.628756508771669e-05, + "loss": 2.2459, + "step": 5537500 + }, + { + "epoch": 27.44, + "learning_rate": 3.628632897846346e-05, + "loss": 2.285, + "step": 5538000 + }, + { + "epoch": 27.44, + "learning_rate": 3.628509039203738e-05, + "loss": 2.2431, + "step": 5538500 + }, + { + "epoch": 27.44, + "learning_rate": 3.6283851805611294e-05, + "loss": 2.2626, + "step": 5539000 + }, + { + "epoch": 27.44, + "learning_rate": 3.628261321918521e-05, + "loss": 2.2379, + "step": 5539500 + }, + { + "epoch": 27.45, + "learning_rate": 3.628137710993198e-05, + "loss": 2.2729, + "step": 5540000 + }, + { + "epoch": 27.45, + "learning_rate": 3.6280138523505897e-05, + "loss": 2.2661, + "step": 5540500 + }, + { + "epoch": 27.45, + "learning_rate": 3.6278899937079814e-05, + "loss": 2.2461, + "step": 5541000 + }, + { + "epoch": 27.45, + "learning_rate": 3.627766135065373e-05, + "loss": 2.2775, + "step": 5541500 + }, + { + "epoch": 27.46, + "learning_rate": 3.627642276422765e-05, + "loss": 2.2716, + "step": 5542000 + }, + { + "epoch": 27.46, + "learning_rate": 3.627518417780156e-05, + "loss": 2.2505, + "step": 5542500 + }, + { + "epoch": 27.46, + "learning_rate": 3.6273945591375474e-05, + "loss": 2.2486, + "step": 5543000 + }, + { + "epoch": 27.46, + "learning_rate": 3.627270700494939e-05, + "loss": 2.2634, + "step": 5543500 + }, + { + "epoch": 27.47, + "learning_rate": 3.627146841852331e-05, + "loss": 2.2735, + "step": 5544000 + }, + { + "epoch": 27.47, + "learning_rate": 3.6270229832097225e-05, + "loss": 2.2689, + "step": 5544500 + }, + { + "epoch": 27.47, + "learning_rate": 3.626899124567114e-05, + "loss": 2.2402, + "step": 5545000 + }, + { + "epoch": 27.47, + "learning_rate": 3.626775265924506e-05, + "loss": 2.2754, + "step": 5545500 + }, + { + "epoch": 27.48, + "learning_rate": 3.6266514072818976e-05, + "loss": 2.2596, + "step": 5546000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626527548639289e-05, + "loss": 2.2925, + "step": 5546500 + }, + { + "epoch": 27.48, + "learning_rate": 3.626403689996681e-05, + "loss": 2.2827, + "step": 5547000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626279831354073e-05, + "loss": 2.2446, + "step": 5547500 + }, + { + "epoch": 27.49, + "learning_rate": 3.6261559727114644e-05, + "loss": 2.2501, + "step": 5548000 + }, + { + "epoch": 27.49, + "learning_rate": 3.626032114068856e-05, + "loss": 2.2617, + "step": 5548500 + }, + { + "epoch": 27.49, + "learning_rate": 3.625908255426248e-05, + "loss": 2.2591, + "step": 5549000 + }, + { + "epoch": 27.49, + "learning_rate": 3.6257843967836395e-05, + "loss": 2.2573, + "step": 5549500 + }, + { + "epoch": 27.5, + "learning_rate": 3.6256605381410305e-05, + "loss": 2.2601, + "step": 5550000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625536679498422e-05, + "loss": 2.2669, + "step": 5550500 + }, + { + "epoch": 27.5, + "learning_rate": 3.625412820855814e-05, + "loss": 2.2688, + "step": 5551000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625289209930491e-05, + "loss": 2.256, + "step": 5551500 + }, + { + "epoch": 27.51, + "learning_rate": 3.6251653512878824e-05, + "loss": 2.2706, + "step": 5552000 + }, + { + "epoch": 27.51, + "learning_rate": 3.625041492645274e-05, + "loss": 2.2595, + "step": 5552500 + }, + { + "epoch": 27.51, + "learning_rate": 3.624917634002666e-05, + "loss": 2.271, + "step": 5553000 + }, + { + "epoch": 27.51, + "learning_rate": 3.6247937753600575e-05, + "loss": 2.2814, + "step": 5553500 + }, + { + "epoch": 27.52, + "learning_rate": 3.624669916717449e-05, + "loss": 2.277, + "step": 5554000 + }, + { + "epoch": 27.52, + "learning_rate": 3.62454605807484e-05, + "loss": 2.2732, + "step": 5554500 + }, + { + "epoch": 27.52, + "learning_rate": 3.624422199432232e-05, + "loss": 2.2527, + "step": 5555000 + }, + { + "epoch": 27.52, + "learning_rate": 3.624298836224194e-05, + "loss": 2.2457, + "step": 5555500 + }, + { + "epoch": 27.53, + "learning_rate": 3.624174977581586e-05, + "loss": 2.2607, + "step": 5556000 + }, + { + "epoch": 27.53, + "learning_rate": 3.6240511189389774e-05, + "loss": 2.2696, + "step": 5556500 + }, + { + "epoch": 27.53, + "learning_rate": 3.623927260296369e-05, + "loss": 2.2735, + "step": 5557000 + }, + { + "epoch": 27.53, + "learning_rate": 3.623803401653761e-05, + "loss": 2.2682, + "step": 5557500 + }, + { + "epoch": 27.54, + "learning_rate": 3.6236795430111524e-05, + "loss": 2.2556, + "step": 5558000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623555684368544e-05, + "loss": 2.263, + "step": 5558500 + }, + { + "epoch": 27.54, + "learning_rate": 3.623432073443221e-05, + "loss": 2.2585, + "step": 5559000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623308214800613e-05, + "loss": 2.2543, + "step": 5559500 + }, + { + "epoch": 27.55, + "learning_rate": 3.6231843561580044e-05, + "loss": 2.2579, + "step": 5560000 + }, + { + "epoch": 27.55, + "learning_rate": 3.623060497515396e-05, + "loss": 2.2519, + "step": 5560500 + }, + { + "epoch": 27.55, + "learning_rate": 3.622936638872788e-05, + "loss": 2.2483, + "step": 5561000 + }, + { + "epoch": 27.55, + "learning_rate": 3.6228127802301795e-05, + "loss": 2.2644, + "step": 5561500 + }, + { + "epoch": 27.56, + "learning_rate": 3.622688921587571e-05, + "loss": 2.2766, + "step": 5562000 + }, + { + "epoch": 27.56, + "learning_rate": 3.622565062944963e-05, + "loss": 2.2594, + "step": 5562500 + }, + { + "epoch": 27.56, + "learning_rate": 3.6224412043023546e-05, + "loss": 2.2581, + "step": 5563000 + }, + { + "epoch": 27.56, + "learning_rate": 3.6223173456597456e-05, + "loss": 2.2555, + "step": 5563500 + }, + { + "epoch": 27.57, + "learning_rate": 3.6221937347344225e-05, + "loss": 2.2705, + "step": 5564000 + }, + { + "epoch": 27.57, + "learning_rate": 3.622069876091814e-05, + "loss": 2.2884, + "step": 5564500 + }, + { + "epoch": 27.57, + "learning_rate": 3.621946017449206e-05, + "loss": 2.2761, + "step": 5565000 + }, + { + "epoch": 27.57, + "learning_rate": 3.6218221588065975e-05, + "loss": 2.2661, + "step": 5565500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621698300163989e-05, + "loss": 2.2659, + "step": 5566000 + }, + { + "epoch": 27.58, + "learning_rate": 3.621574441521381e-05, + "loss": 2.2419, + "step": 5566500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621450582878772e-05, + "loss": 2.2708, + "step": 5567000 + }, + { + "epoch": 27.58, + "learning_rate": 3.6213269719534495e-05, + "loss": 2.2641, + "step": 5567500 + }, + { + "epoch": 27.59, + "learning_rate": 3.621203113310841e-05, + "loss": 2.2466, + "step": 5568000 + }, + { + "epoch": 27.59, + "learning_rate": 3.621079254668233e-05, + "loss": 2.2507, + "step": 5568500 + }, + { + "epoch": 27.59, + "learning_rate": 3.6209553960256246e-05, + "loss": 2.2843, + "step": 5569000 + }, + { + "epoch": 27.59, + "learning_rate": 3.620831785100301e-05, + "loss": 2.2604, + "step": 5569500 + }, + { + "epoch": 27.6, + "learning_rate": 3.6207079264576925e-05, + "loss": 2.2593, + "step": 5570000 + }, + { + "epoch": 27.6, + "learning_rate": 3.620584067815084e-05, + "loss": 2.2368, + "step": 5570500 + }, + { + "epoch": 27.6, + "learning_rate": 3.620460209172476e-05, + "loss": 2.2762, + "step": 5571000 + }, + { + "epoch": 27.6, + "learning_rate": 3.6203363505298675e-05, + "loss": 2.2359, + "step": 5571500 + }, + { + "epoch": 27.61, + "learning_rate": 3.6202127396045444e-05, + "loss": 2.2649, + "step": 5572000 + }, + { + "epoch": 27.61, + "learning_rate": 3.6200893763965065e-05, + "loss": 2.2789, + "step": 5572500 + }, + { + "epoch": 27.61, + "learning_rate": 3.619965517753898e-05, + "loss": 2.2694, + "step": 5573000 + }, + { + "epoch": 27.61, + "learning_rate": 3.61984165911129e-05, + "loss": 2.3009, + "step": 5573500 + }, + { + "epoch": 27.62, + "learning_rate": 3.619717800468681e-05, + "loss": 2.2679, + "step": 5574000 + }, + { + "epoch": 27.62, + "learning_rate": 3.6195939418260726e-05, + "loss": 2.2534, + "step": 5574500 + }, + { + "epoch": 27.62, + "learning_rate": 3.619470083183464e-05, + "loss": 2.2697, + "step": 5575000 + }, + { + "epoch": 27.62, + "learning_rate": 3.619346224540856e-05, + "loss": 2.2451, + "step": 5575500 + }, + { + "epoch": 27.63, + "learning_rate": 3.619222365898248e-05, + "loss": 2.2733, + "step": 5576000 + }, + { + "epoch": 27.63, + "learning_rate": 3.6190985072556394e-05, + "loss": 2.2543, + "step": 5576500 + }, + { + "epoch": 27.63, + "learning_rate": 3.618974896330316e-05, + "loss": 2.2541, + "step": 5577000 + }, + { + "epoch": 27.63, + "learning_rate": 3.618851037687708e-05, + "loss": 2.2615, + "step": 5577500 + }, + { + "epoch": 27.64, + "learning_rate": 3.6187271790450996e-05, + "loss": 2.2746, + "step": 5578000 + }, + { + "epoch": 27.64, + "learning_rate": 3.6186035681197765e-05, + "loss": 2.2567, + "step": 5578500 + }, + { + "epoch": 27.64, + "learning_rate": 3.6184799571944534e-05, + "loss": 2.2616, + "step": 5579000 + }, + { + "epoch": 27.64, + "learning_rate": 3.618356098551845e-05, + "loss": 2.2827, + "step": 5579500 + }, + { + "epoch": 27.65, + "learning_rate": 3.618232239909237e-05, + "loss": 2.2811, + "step": 5580000 + }, + { + "epoch": 27.65, + "learning_rate": 3.6181083812666285e-05, + "loss": 2.2912, + "step": 5580500 + }, + { + "epoch": 27.65, + "learning_rate": 3.61798452262402e-05, + "loss": 2.31, + "step": 5581000 + }, + { + "epoch": 27.65, + "learning_rate": 3.617860663981412e-05, + "loss": 2.2568, + "step": 5581500 + }, + { + "epoch": 27.66, + "learning_rate": 3.6177368053388035e-05, + "loss": 2.273, + "step": 5582000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617612946696195e-05, + "loss": 2.2506, + "step": 5582500 + }, + { + "epoch": 27.66, + "learning_rate": 3.617489088053586e-05, + "loss": 2.2841, + "step": 5583000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617365477128263e-05, + "loss": 2.2624, + "step": 5583500 + }, + { + "epoch": 27.67, + "learning_rate": 3.617241618485655e-05, + "loss": 2.2689, + "step": 5584000 + }, + { + "epoch": 27.67, + "learning_rate": 3.6171177598430465e-05, + "loss": 2.262, + "step": 5584500 + }, + { + "epoch": 27.67, + "learning_rate": 3.616993901200438e-05, + "loss": 2.2513, + "step": 5585000 + }, + { + "epoch": 27.67, + "learning_rate": 3.61687004255783e-05, + "loss": 2.2472, + "step": 5585500 + }, + { + "epoch": 27.67, + "learning_rate": 3.6167461839152216e-05, + "loss": 2.2559, + "step": 5586000 + }, + { + "epoch": 27.68, + "learning_rate": 3.6166223252726126e-05, + "loss": 2.2452, + "step": 5586500 + }, + { + "epoch": 27.68, + "learning_rate": 3.616498466630004e-05, + "loss": 2.2461, + "step": 5587000 + }, + { + "epoch": 27.68, + "learning_rate": 3.616374607987396e-05, + "loss": 2.2635, + "step": 5587500 + }, + { + "epoch": 27.68, + "learning_rate": 3.616251244779358e-05, + "loss": 2.2708, + "step": 5588000 + }, + { + "epoch": 27.69, + "learning_rate": 3.61612738613675e-05, + "loss": 2.2443, + "step": 5588500 + }, + { + "epoch": 27.69, + "learning_rate": 3.6160035274941415e-05, + "loss": 2.2791, + "step": 5589000 + }, + { + "epoch": 27.69, + "learning_rate": 3.615879668851533e-05, + "loss": 2.2916, + "step": 5589500 + }, + { + "epoch": 27.69, + "learning_rate": 3.615755810208925e-05, + "loss": 2.2805, + "step": 5590000 + }, + { + "epoch": 27.7, + "learning_rate": 3.6156319515663165e-05, + "loss": 2.2724, + "step": 5590500 + }, + { + "epoch": 27.7, + "learning_rate": 3.615508092923708e-05, + "loss": 2.2532, + "step": 5591000 + }, + { + "epoch": 27.7, + "learning_rate": 3.6153842342811e-05, + "loss": 2.2645, + "step": 5591500 + }, + { + "epoch": 27.7, + "learning_rate": 3.6152603756384916e-05, + "loss": 2.264, + "step": 5592000 + }, + { + "epoch": 27.71, + "learning_rate": 3.6151365169958826e-05, + "loss": 2.2729, + "step": 5592500 + }, + { + "epoch": 27.71, + "learning_rate": 3.615012658353274e-05, + "loss": 2.3001, + "step": 5593000 + }, + { + "epoch": 27.71, + "learning_rate": 3.614888799710666e-05, + "loss": 2.2556, + "step": 5593500 + }, + { + "epoch": 27.71, + "learning_rate": 3.6147651887853436e-05, + "loss": 2.2536, + "step": 5594000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614641330142735e-05, + "loss": 2.2779, + "step": 5594500 + }, + { + "epoch": 27.72, + "learning_rate": 3.614517471500127e-05, + "loss": 2.2704, + "step": 5595000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614393612857518e-05, + "loss": 2.2851, + "step": 5595500 + }, + { + "epoch": 27.72, + "learning_rate": 3.6142697542149097e-05, + "loss": 2.2879, + "step": 5596000 + }, + { + "epoch": 27.73, + "learning_rate": 3.6141461432895865e-05, + "loss": 2.2571, + "step": 5596500 + }, + { + "epoch": 27.73, + "learning_rate": 3.614022284646978e-05, + "loss": 2.2481, + "step": 5597000 + }, + { + "epoch": 27.73, + "learning_rate": 3.61389842600437e-05, + "loss": 2.2497, + "step": 5597500 + }, + { + "epoch": 27.73, + "learning_rate": 3.6137745673617616e-05, + "loss": 2.2729, + "step": 5598000 + }, + { + "epoch": 27.74, + "learning_rate": 3.6136509564364385e-05, + "loss": 2.2782, + "step": 5598500 + }, + { + "epoch": 27.74, + "learning_rate": 3.61352709779383e-05, + "loss": 2.2591, + "step": 5599000 + }, + { + "epoch": 27.74, + "learning_rate": 3.613403239151222e-05, + "loss": 2.2868, + "step": 5599500 + }, + { + "epoch": 27.74, + "learning_rate": 3.6132793805086136e-05, + "loss": 2.2517, + "step": 5600000 + }, + { + "epoch": 27.75, + "learning_rate": 3.613155521866005e-05, + "loss": 2.2596, + "step": 5600500 + }, + { + "epoch": 27.75, + "learning_rate": 3.613031663223397e-05, + "loss": 2.2664, + "step": 5601000 + }, + { + "epoch": 27.75, + "learning_rate": 3.6129078045807887e-05, + "loss": 2.2664, + "step": 5601500 + }, + { + "epoch": 27.75, + "learning_rate": 3.61278394593818e-05, + "loss": 2.2545, + "step": 5602000 + }, + { + "epoch": 27.76, + "learning_rate": 3.6126600872955714e-05, + "loss": 2.2648, + "step": 5602500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612536476370248e-05, + "loss": 2.2745, + "step": 5603000 + }, + { + "epoch": 27.76, + "learning_rate": 3.612412865444925e-05, + "loss": 2.2538, + "step": 5603500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612289006802317e-05, + "loss": 2.2435, + "step": 5604000 + }, + { + "epoch": 27.77, + "learning_rate": 3.6121651481597085e-05, + "loss": 2.2784, + "step": 5604500 + }, + { + "epoch": 27.77, + "learning_rate": 3.6120412895171e-05, + "loss": 2.2727, + "step": 5605000 + }, + { + "epoch": 27.77, + "learning_rate": 3.611917430874492e-05, + "loss": 2.2786, + "step": 5605500 + }, + { + "epoch": 27.77, + "learning_rate": 3.6117935722318836e-05, + "loss": 2.2708, + "step": 5606000 + }, + { + "epoch": 27.78, + "learning_rate": 3.6116699613065605e-05, + "loss": 2.268, + "step": 5606500 + }, + { + "epoch": 27.78, + "learning_rate": 3.611546102663952e-05, + "loss": 2.2608, + "step": 5607000 + }, + { + "epoch": 27.78, + "learning_rate": 3.611422244021344e-05, + "loss": 2.2927, + "step": 5607500 + }, + { + "epoch": 27.78, + "learning_rate": 3.6112983853787356e-05, + "loss": 2.2539, + "step": 5608000 + }, + { + "epoch": 27.79, + "learning_rate": 3.6111745267361266e-05, + "loss": 2.2372, + "step": 5608500 + }, + { + "epoch": 27.79, + "learning_rate": 3.611050668093518e-05, + "loss": 2.2446, + "step": 5609000 + }, + { + "epoch": 27.79, + "learning_rate": 3.61092680945091e-05, + "loss": 2.2775, + "step": 5609500 + }, + { + "epoch": 27.79, + "learning_rate": 3.610803446242872e-05, + "loss": 2.2643, + "step": 5610000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610679587600264e-05, + "loss": 2.2885, + "step": 5610500 + }, + { + "epoch": 27.8, + "learning_rate": 3.6105557289576554e-05, + "loss": 2.2784, + "step": 5611000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610431870315047e-05, + "loss": 2.2686, + "step": 5611500 + }, + { + "epoch": 27.8, + "learning_rate": 3.610308011672439e-05, + "loss": 2.2745, + "step": 5612000 + }, + { + "epoch": 27.81, + "learning_rate": 3.6101841530298305e-05, + "loss": 2.2697, + "step": 5612500 + }, + { + "epoch": 27.81, + "learning_rate": 3.610060294387222e-05, + "loss": 2.2637, + "step": 5613000 + }, + { + "epoch": 27.81, + "learning_rate": 3.609936435744614e-05, + "loss": 2.2522, + "step": 5613500 + }, + { + "epoch": 27.81, + "learning_rate": 3.6098125771020056e-05, + "loss": 2.2637, + "step": 5614000 + }, + { + "epoch": 27.82, + "learning_rate": 3.609688718459397e-05, + "loss": 2.275, + "step": 5614500 + }, + { + "epoch": 27.82, + "learning_rate": 3.6095653552513586e-05, + "loss": 2.2715, + "step": 5615000 + }, + { + "epoch": 27.82, + "learning_rate": 3.6094414966087503e-05, + "loss": 2.2733, + "step": 5615500 + }, + { + "epoch": 27.82, + "learning_rate": 3.609317637966142e-05, + "loss": 2.2502, + "step": 5616000 + }, + { + "epoch": 27.83, + "learning_rate": 3.609193779323534e-05, + "loss": 2.2829, + "step": 5616500 + }, + { + "epoch": 27.83, + "learning_rate": 3.6090699206809254e-05, + "loss": 2.2659, + "step": 5617000 + }, + { + "epoch": 27.83, + "learning_rate": 3.608946062038317e-05, + "loss": 2.2579, + "step": 5617500 + }, + { + "epoch": 27.83, + "learning_rate": 3.608822203395709e-05, + "loss": 2.2465, + "step": 5618000 + }, + { + "epoch": 27.84, + "learning_rate": 3.6086983447531005e-05, + "loss": 2.2796, + "step": 5618500 + }, + { + "epoch": 27.84, + "learning_rate": 3.608574486110492e-05, + "loss": 2.2645, + "step": 5619000 + }, + { + "epoch": 27.84, + "learning_rate": 3.608450627467884e-05, + "loss": 2.2538, + "step": 5619500 + }, + { + "epoch": 27.84, + "learning_rate": 3.6083267688252756e-05, + "loss": 2.2571, + "step": 5620000 + }, + { + "epoch": 27.85, + "learning_rate": 3.608202910182667e-05, + "loss": 2.2596, + "step": 5620500 + }, + { + "epoch": 27.85, + "learning_rate": 3.608079051540059e-05, + "loss": 2.2624, + "step": 5621000 + }, + { + "epoch": 27.85, + "learning_rate": 3.6079551928974507e-05, + "loss": 2.2717, + "step": 5621500 + }, + { + "epoch": 27.85, + "learning_rate": 3.607831829689412e-05, + "loss": 2.2788, + "step": 5622000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607707971046804e-05, + "loss": 2.2695, + "step": 5622500 + }, + { + "epoch": 27.86, + "learning_rate": 3.6075841124041954e-05, + "loss": 2.2771, + "step": 5623000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607460253761587e-05, + "loss": 2.2837, + "step": 5623500 + }, + { + "epoch": 27.86, + "learning_rate": 3.607336395118979e-05, + "loss": 2.2757, + "step": 5624000 + }, + { + "epoch": 27.87, + "learning_rate": 3.6072125364763705e-05, + "loss": 2.2864, + "step": 5624500 + }, + { + "epoch": 27.87, + "learning_rate": 3.607088677833762e-05, + "loss": 2.2869, + "step": 5625000 + }, + { + "epoch": 27.87, + "learning_rate": 3.606964819191154e-05, + "loss": 2.2667, + "step": 5625500 + }, + { + "epoch": 27.87, + "learning_rate": 3.6068409605485456e-05, + "loss": 2.2932, + "step": 5626000 + }, + { + "epoch": 27.88, + "learning_rate": 3.606717349623222e-05, + "loss": 2.2459, + "step": 5626500 + }, + { + "epoch": 27.88, + "learning_rate": 3.6065934909806135e-05, + "loss": 2.2514, + "step": 5627000 + }, + { + "epoch": 27.88, + "learning_rate": 3.606469632338005e-05, + "loss": 2.2673, + "step": 5627500 + }, + { + "epoch": 27.88, + "learning_rate": 3.606345773695397e-05, + "loss": 2.2727, + "step": 5628000 + }, + { + "epoch": 27.89, + "learning_rate": 3.6062219150527886e-05, + "loss": 2.2612, + "step": 5628500 + }, + { + "epoch": 27.89, + "learning_rate": 3.6060983041274654e-05, + "loss": 2.2404, + "step": 5629000 + }, + { + "epoch": 27.89, + "learning_rate": 3.605974445484857e-05, + "loss": 2.2698, + "step": 5629500 + }, + { + "epoch": 27.89, + "learning_rate": 3.605850834559534e-05, + "loss": 2.2836, + "step": 5630000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605726975916926e-05, + "loss": 2.2787, + "step": 5630500 + }, + { + "epoch": 27.9, + "learning_rate": 3.6056031172743174e-05, + "loss": 2.2894, + "step": 5631000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605479506348994e-05, + "loss": 2.249, + "step": 5631500 + }, + { + "epoch": 27.9, + "learning_rate": 3.605355647706386e-05, + "loss": 2.2716, + "step": 5632000 + }, + { + "epoch": 27.91, + "learning_rate": 3.605231789063778e-05, + "loss": 2.2512, + "step": 5632500 + }, + { + "epoch": 27.91, + "learning_rate": 3.6051079304211694e-05, + "loss": 2.2884, + "step": 5633000 + }, + { + "epoch": 27.91, + "learning_rate": 3.604984071778561e-05, + "loss": 2.2531, + "step": 5633500 + }, + { + "epoch": 27.91, + "learning_rate": 3.604860213135952e-05, + "loss": 2.2674, + "step": 5634000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604736354493344e-05, + "loss": 2.2461, + "step": 5634500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604612743568021e-05, + "loss": 2.2815, + "step": 5635000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604488884925413e-05, + "loss": 2.2463, + "step": 5635500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604365026282804e-05, + "loss": 2.2841, + "step": 5636000 + }, + { + "epoch": 27.93, + "learning_rate": 3.604241167640196e-05, + "loss": 2.2488, + "step": 5636500 + }, + { + "epoch": 27.93, + "learning_rate": 3.6041173089975874e-05, + "loss": 2.2632, + "step": 5637000 + }, + { + "epoch": 27.93, + "learning_rate": 3.603993698072264e-05, + "loss": 2.2737, + "step": 5637500 + }, + { + "epoch": 27.93, + "learning_rate": 3.603869839429656e-05, + "loss": 2.2868, + "step": 5638000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603745980787048e-05, + "loss": 2.244, + "step": 5638500 + }, + { + "epoch": 27.94, + "learning_rate": 3.6036221221444394e-05, + "loss": 2.2787, + "step": 5639000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603498263501831e-05, + "loss": 2.2593, + "step": 5639500 + }, + { + "epoch": 27.94, + "learning_rate": 3.603374404859222e-05, + "loss": 2.2728, + "step": 5640000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603250546216614e-05, + "loss": 2.2731, + "step": 5640500 + }, + { + "epoch": 27.95, + "learning_rate": 3.6031266875740055e-05, + "loss": 2.2936, + "step": 5641000 + }, + { + "epoch": 27.95, + "learning_rate": 3.603003076648683e-05, + "loss": 2.2646, + "step": 5641500 + }, + { + "epoch": 27.95, + "learning_rate": 3.602879218006075e-05, + "loss": 2.3049, + "step": 5642000 + }, + { + "epoch": 27.95, + "learning_rate": 3.6027553593634664e-05, + "loss": 2.2743, + "step": 5642500 + }, + { + "epoch": 27.96, + "learning_rate": 3.6026315007208574e-05, + "loss": 2.2442, + "step": 5643000 + }, + { + "epoch": 27.96, + "learning_rate": 3.602507642078249e-05, + "loss": 2.2908, + "step": 5643500 + }, + { + "epoch": 27.96, + "learning_rate": 3.602383783435641e-05, + "loss": 2.2864, + "step": 5644000 + }, + { + "epoch": 27.96, + "learning_rate": 3.6022599247930325e-05, + "loss": 2.2823, + "step": 5644500 + }, + { + "epoch": 27.97, + "learning_rate": 3.602136066150424e-05, + "loss": 2.2878, + "step": 5645000 + }, + { + "epoch": 27.97, + "learning_rate": 3.602012207507815e-05, + "loss": 2.2742, + "step": 5645500 + }, + { + "epoch": 27.97, + "learning_rate": 3.601888844299778e-05, + "loss": 2.2998, + "step": 5646000 + }, + { + "epoch": 27.97, + "learning_rate": 3.6017649856571697e-05, + "loss": 2.2767, + "step": 5646500 + }, + { + "epoch": 27.98, + "learning_rate": 3.6016411270145613e-05, + "loss": 2.264, + "step": 5647000 + }, + { + "epoch": 27.98, + "learning_rate": 3.601517268371953e-05, + "loss": 2.2855, + "step": 5647500 + }, + { + "epoch": 27.98, + "learning_rate": 3.601393409729345e-05, + "loss": 2.2364, + "step": 5648000 + }, + { + "epoch": 27.98, + "learning_rate": 3.6012695510867364e-05, + "loss": 2.2728, + "step": 5648500 + }, + { + "epoch": 27.99, + "learning_rate": 3.601145692444128e-05, + "loss": 2.2712, + "step": 5649000 + }, + { + "epoch": 27.99, + "learning_rate": 3.601021833801519e-05, + "loss": 2.2799, + "step": 5649500 + }, + { + "epoch": 27.99, + "learning_rate": 3.600897975158911e-05, + "loss": 2.2532, + "step": 5650000 + }, + { + "epoch": 27.99, + "learning_rate": 3.6007741165163025e-05, + "loss": 2.2599, + "step": 5650500 + }, + { + "epoch": 28.0, + "learning_rate": 3.600650257873694e-05, + "loss": 2.2928, + "step": 5651000 + }, + { + "epoch": 28.0, + "learning_rate": 3.600526399231086e-05, + "loss": 2.2934, + "step": 5651500 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.6571310181504103, + "eval_accuracy_mlm": 0.6123743861697156, + "eval_accuracy_nsp": 0.8682572492047741, + "eval_loss": 2.3233656883239746, + "eval_runtime": 146.0438, + "eval_samples_per_second": 1745.771, + "eval_steps_per_second": 72.745, + "step": 5651604 + }, + { + "epoch": 28.0, + "learning_rate": 3.600402540588477e-05, + "loss": 2.2378, + "step": 5652000 + }, + { + "epoch": 28.0, + "learning_rate": 3.6002786819458686e-05, + "loss": 2.2507, + "step": 5652500 + }, + { + "epoch": 28.01, + "learning_rate": 3.6001550710205455e-05, + "loss": 2.2047, + "step": 5653000 + }, + { + "epoch": 28.01, + "learning_rate": 3.600031212377937e-05, + "loss": 2.2663, + "step": 5653500 + }, + { + "epoch": 28.01, + "learning_rate": 3.599907353735329e-05, + "loss": 2.2253, + "step": 5654000 + }, + { + "epoch": 28.01, + "learning_rate": 3.5997834950927206e-05, + "loss": 2.2129, + "step": 5654500 + }, + { + "epoch": 28.02, + "learning_rate": 3.599659636450112e-05, + "loss": 2.2342, + "step": 5655000 + }, + { + "epoch": 28.02, + "learning_rate": 3.599535777807504e-05, + "loss": 2.2216, + "step": 5655500 + }, + { + "epoch": 28.02, + "learning_rate": 3.5994119191648957e-05, + "loss": 2.211, + "step": 5656000 + }, + { + "epoch": 28.02, + "learning_rate": 3.5992883082395725e-05, + "loss": 2.2144, + "step": 5656500 + }, + { + "epoch": 28.03, + "learning_rate": 3.599164449596964e-05, + "loss": 2.2563, + "step": 5657000 + }, + { + "epoch": 28.03, + "learning_rate": 3.599040590954356e-05, + "loss": 2.2285, + "step": 5657500 + }, + { + "epoch": 28.03, + "learning_rate": 3.5989167323117476e-05, + "loss": 2.2173, + "step": 5658000 + }, + { + "epoch": 28.03, + "learning_rate": 3.598792873669139e-05, + "loss": 2.2218, + "step": 5658500 + }, + { + "epoch": 28.04, + "learning_rate": 3.5986692627438155e-05, + "loss": 2.2319, + "step": 5659000 + }, + { + "epoch": 28.04, + "learning_rate": 3.598545404101207e-05, + "loss": 2.2306, + "step": 5659500 + }, + { + "epoch": 28.04, + "learning_rate": 3.598421793175885e-05, + "loss": 2.2447, + "step": 5660000 + }, + { + "epoch": 28.04, + "learning_rate": 3.5982979345332765e-05, + "loss": 2.26, + "step": 5660500 + }, + { + "epoch": 28.05, + "learning_rate": 3.598174075890668e-05, + "loss": 2.2248, + "step": 5661000 + }, + { + "epoch": 28.05, + "learning_rate": 3.59805021724806e-05, + "loss": 2.2226, + "step": 5661500 + }, + { + "epoch": 28.05, + "learning_rate": 3.597926358605451e-05, + "loss": 2.2503, + "step": 5662000 + }, + { + "epoch": 28.05, + "learning_rate": 3.5978024999628425e-05, + "loss": 2.2375, + "step": 5662500 + }, + { + "epoch": 28.06, + "learning_rate": 3.597678641320234e-05, + "loss": 2.2391, + "step": 5663000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597554782677626e-05, + "loss": 2.2428, + "step": 5663500 + }, + { + "epoch": 28.06, + "learning_rate": 3.5974309240350176e-05, + "loss": 2.2463, + "step": 5664000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597307065392409e-05, + "loss": 2.2465, + "step": 5664500 + }, + { + "epoch": 28.07, + "learning_rate": 3.597183206749801e-05, + "loss": 2.2468, + "step": 5665000 + }, + { + "epoch": 28.07, + "learning_rate": 3.597059348107192e-05, + "loss": 2.2688, + "step": 5665500 + }, + { + "epoch": 28.07, + "learning_rate": 3.596935737181869e-05, + "loss": 2.2244, + "step": 5666000 + }, + { + "epoch": 28.07, + "learning_rate": 3.5968118785392606e-05, + "loss": 2.2423, + "step": 5666500 + }, + { + "epoch": 28.08, + "learning_rate": 3.596688019896652e-05, + "loss": 2.2341, + "step": 5667000 + }, + { + "epoch": 28.08, + "learning_rate": 3.596564161254044e-05, + "loss": 2.2489, + "step": 5667500 + }, + { + "epoch": 28.08, + "learning_rate": 3.596440302611436e-05, + "loss": 2.2332, + "step": 5668000 + }, + { + "epoch": 28.08, + "learning_rate": 3.596316939403398e-05, + "loss": 2.2225, + "step": 5668500 + }, + { + "epoch": 28.09, + "learning_rate": 3.5961930807607894e-05, + "loss": 2.2174, + "step": 5669000 + }, + { + "epoch": 28.09, + "learning_rate": 3.596069222118181e-05, + "loss": 2.2281, + "step": 5669500 + }, + { + "epoch": 28.09, + "learning_rate": 3.595945363475573e-05, + "loss": 2.2196, + "step": 5670000 + }, + { + "epoch": 28.09, + "learning_rate": 3.5958215048329645e-05, + "loss": 2.273, + "step": 5670500 + }, + { + "epoch": 28.1, + "learning_rate": 3.595697646190356e-05, + "loss": 2.2395, + "step": 5671000 + }, + { + "epoch": 28.1, + "learning_rate": 3.595573787547747e-05, + "loss": 2.2184, + "step": 5671500 + }, + { + "epoch": 28.1, + "learning_rate": 3.595449928905139e-05, + "loss": 2.2504, + "step": 5672000 + }, + { + "epoch": 28.1, + "learning_rate": 3.5953260702625306e-05, + "loss": 2.2414, + "step": 5672500 + }, + { + "epoch": 28.11, + "learning_rate": 3.595202459337208e-05, + "loss": 2.2489, + "step": 5673000 + }, + { + "epoch": 28.11, + "learning_rate": 3.5950786006946e-05, + "loss": 2.2339, + "step": 5673500 + }, + { + "epoch": 28.11, + "learning_rate": 3.5949547420519916e-05, + "loss": 2.2422, + "step": 5674000 + }, + { + "epoch": 28.11, + "learning_rate": 3.594831626561239e-05, + "loss": 2.2523, + "step": 5674500 + }, + { + "epoch": 28.12, + "learning_rate": 3.59470776791863e-05, + "loss": 2.2267, + "step": 5675000 + }, + { + "epoch": 28.12, + "learning_rate": 3.594584156993307e-05, + "loss": 2.2262, + "step": 5675500 + }, + { + "epoch": 28.12, + "learning_rate": 3.5944602983506984e-05, + "loss": 2.2335, + "step": 5676000 + }, + { + "epoch": 28.12, + "learning_rate": 3.59433643970809e-05, + "loss": 2.2257, + "step": 5676500 + }, + { + "epoch": 28.13, + "learning_rate": 3.594212581065482e-05, + "loss": 2.2219, + "step": 5677000 + }, + { + "epoch": 28.13, + "learning_rate": 3.5940887224228735e-05, + "loss": 2.2501, + "step": 5677500 + }, + { + "epoch": 28.13, + "learning_rate": 3.5939648637802645e-05, + "loss": 2.2395, + "step": 5678000 + }, + { + "epoch": 28.13, + "learning_rate": 3.593841005137656e-05, + "loss": 2.2518, + "step": 5678500 + }, + { + "epoch": 28.14, + "learning_rate": 3.593717394212334e-05, + "loss": 2.2567, + "step": 5679000 + }, + { + "epoch": 28.14, + "learning_rate": 3.5935935355697254e-05, + "loss": 2.2426, + "step": 5679500 + }, + { + "epoch": 28.14, + "learning_rate": 3.593469676927117e-05, + "loss": 2.2331, + "step": 5680000 + }, + { + "epoch": 28.14, + "learning_rate": 3.593346066001794e-05, + "loss": 2.2449, + "step": 5680500 + }, + { + "epoch": 28.15, + "learning_rate": 3.593222207359185e-05, + "loss": 2.2522, + "step": 5681000 + }, + { + "epoch": 28.15, + "learning_rate": 3.593098348716577e-05, + "loss": 2.238, + "step": 5681500 + }, + { + "epoch": 28.15, + "learning_rate": 3.5929744900739684e-05, + "loss": 2.2348, + "step": 5682000 + }, + { + "epoch": 28.15, + "learning_rate": 3.59285063143136e-05, + "loss": 2.2584, + "step": 5682500 + }, + { + "epoch": 28.16, + "learning_rate": 3.592726772788752e-05, + "loss": 2.2451, + "step": 5683000 + }, + { + "epoch": 28.16, + "learning_rate": 3.5926029141461435e-05, + "loss": 2.2386, + "step": 5683500 + }, + { + "epoch": 28.16, + "learning_rate": 3.592479055503535e-05, + "loss": 2.2602, + "step": 5684000 + }, + { + "epoch": 28.16, + "learning_rate": 3.592355196860926e-05, + "loss": 2.2475, + "step": 5684500 + }, + { + "epoch": 28.17, + "learning_rate": 3.592231338218318e-05, + "loss": 2.2594, + "step": 5685000 + }, + { + "epoch": 28.17, + "learning_rate": 3.5921074795757096e-05, + "loss": 2.2606, + "step": 5685500 + }, + { + "epoch": 28.17, + "learning_rate": 3.591983620933101e-05, + "loss": 2.2404, + "step": 5686000 + }, + { + "epoch": 28.17, + "learning_rate": 3.591859762290493e-05, + "loss": 2.219, + "step": 5686500 + }, + { + "epoch": 28.18, + "learning_rate": 3.591735903647885e-05, + "loss": 2.2316, + "step": 5687000 + }, + { + "epoch": 28.18, + "learning_rate": 3.5916122927225615e-05, + "loss": 2.2293, + "step": 5687500 + }, + { + "epoch": 28.18, + "learning_rate": 3.591488434079953e-05, + "loss": 2.2417, + "step": 5688000 + }, + { + "epoch": 28.18, + "learning_rate": 3.591364575437345e-05, + "loss": 2.2511, + "step": 5688500 + }, + { + "epoch": 28.19, + "learning_rate": 3.5912407167947366e-05, + "loss": 2.2389, + "step": 5689000 + }, + { + "epoch": 28.19, + "learning_rate": 3.591116858152128e-05, + "loss": 2.2493, + "step": 5689500 + }, + { + "epoch": 28.19, + "learning_rate": 3.59099299950952e-05, + "loss": 2.2295, + "step": 5690000 + }, + { + "epoch": 28.19, + "learning_rate": 3.590869140866912e-05, + "loss": 2.2424, + "step": 5690500 + }, + { + "epoch": 28.2, + "learning_rate": 3.5907452822243034e-05, + "loss": 2.267, + "step": 5691000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590621423581695e-05, + "loss": 2.2283, + "step": 5691500 + }, + { + "epoch": 28.2, + "learning_rate": 3.590497564939087e-05, + "loss": 2.2434, + "step": 5692000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590373954013763e-05, + "loss": 2.2637, + "step": 5692500 + }, + { + "epoch": 28.21, + "learning_rate": 3.590250095371155e-05, + "loss": 2.2305, + "step": 5693000 + }, + { + "epoch": 28.21, + "learning_rate": 3.5901262367285464e-05, + "loss": 2.2433, + "step": 5693500 + }, + { + "epoch": 28.21, + "learning_rate": 3.590002873520509e-05, + "loss": 2.2236, + "step": 5694000 + }, + { + "epoch": 28.21, + "learning_rate": 3.5898790148779e-05, + "loss": 2.261, + "step": 5694500 + }, + { + "epoch": 28.21, + "learning_rate": 3.589755156235292e-05, + "loss": 2.2614, + "step": 5695000 + }, + { + "epoch": 28.22, + "learning_rate": 3.5896312975926835e-05, + "loss": 2.2338, + "step": 5695500 + }, + { + "epoch": 28.22, + "learning_rate": 3.589507438950075e-05, + "loss": 2.2459, + "step": 5696000 + }, + { + "epoch": 28.22, + "learning_rate": 3.589383580307467e-05, + "loss": 2.2287, + "step": 5696500 + }, + { + "epoch": 28.22, + "learning_rate": 3.589259721664858e-05, + "loss": 2.2181, + "step": 5697000 + }, + { + "epoch": 28.23, + "learning_rate": 3.5891358630222496e-05, + "loss": 2.2656, + "step": 5697500 + }, + { + "epoch": 28.23, + "learning_rate": 3.589012252096927e-05, + "loss": 2.2618, + "step": 5698000 + }, + { + "epoch": 28.23, + "learning_rate": 3.588888393454319e-05, + "loss": 2.2414, + "step": 5698500 + }, + { + "epoch": 28.23, + "learning_rate": 3.5887645348117106e-05, + "loss": 2.2461, + "step": 5699000 + }, + { + "epoch": 28.24, + "learning_rate": 3.588640676169102e-05, + "loss": 2.2572, + "step": 5699500 + }, + { + "epoch": 28.24, + "learning_rate": 3.588516817526493e-05, + "loss": 2.2578, + "step": 5700000 + }, + { + "epoch": 28.24, + "learning_rate": 3.588392958883885e-05, + "loss": 2.2657, + "step": 5700500 + }, + { + "epoch": 28.24, + "learning_rate": 3.5882691002412766e-05, + "loss": 2.2494, + "step": 5701000 + }, + { + "epoch": 28.25, + "learning_rate": 3.5881452415986683e-05, + "loss": 2.259, + "step": 5701500 + }, + { + "epoch": 28.25, + "learning_rate": 3.58802138295606e-05, + "loss": 2.2313, + "step": 5702000 + }, + { + "epoch": 28.25, + "learning_rate": 3.587897772030737e-05, + "loss": 2.2567, + "step": 5702500 + }, + { + "epoch": 28.25, + "learning_rate": 3.5877739133881286e-05, + "loss": 2.2508, + "step": 5703000 + }, + { + "epoch": 28.26, + "learning_rate": 3.58765005474552e-05, + "loss": 2.2611, + "step": 5703500 + }, + { + "epoch": 28.26, + "learning_rate": 3.587526196102911e-05, + "loss": 2.2495, + "step": 5704000 + }, + { + "epoch": 28.26, + "learning_rate": 3.587402337460303e-05, + "loss": 2.2509, + "step": 5704500 + }, + { + "epoch": 28.26, + "learning_rate": 3.587278478817695e-05, + "loss": 2.2434, + "step": 5705000 + }, + { + "epoch": 28.27, + "learning_rate": 3.5871546201750864e-05, + "loss": 2.2263, + "step": 5705500 + }, + { + "epoch": 28.27, + "learning_rate": 3.587030761532478e-05, + "loss": 2.2574, + "step": 5706000 + }, + { + "epoch": 28.27, + "learning_rate": 3.58690690288987e-05, + "loss": 2.2488, + "step": 5706500 + }, + { + "epoch": 28.27, + "learning_rate": 3.5867835396818325e-05, + "loss": 2.2452, + "step": 5707000 + }, + { + "epoch": 28.28, + "learning_rate": 3.586659681039224e-05, + "loss": 2.2347, + "step": 5707500 + }, + { + "epoch": 28.28, + "learning_rate": 3.586535822396615e-05, + "loss": 2.2418, + "step": 5708000 + }, + { + "epoch": 28.28, + "learning_rate": 3.586411963754007e-05, + "loss": 2.2475, + "step": 5708500 + }, + { + "epoch": 28.28, + "learning_rate": 3.5862881051113986e-05, + "loss": 2.2471, + "step": 5709000 + }, + { + "epoch": 28.29, + "learning_rate": 3.58616424646879e-05, + "loss": 2.2539, + "step": 5709500 + }, + { + "epoch": 28.29, + "learning_rate": 3.586040387826182e-05, + "loss": 2.2612, + "step": 5710000 + }, + { + "epoch": 28.29, + "learning_rate": 3.585916529183573e-05, + "loss": 2.248, + "step": 5710500 + }, + { + "epoch": 28.29, + "learning_rate": 3.5857929182582506e-05, + "loss": 2.2418, + "step": 5711000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585669059615642e-05, + "loss": 2.2432, + "step": 5711500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585545200973034e-05, + "loss": 2.2694, + "step": 5712000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585421342330425e-05, + "loss": 2.2284, + "step": 5712500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585297483687817e-05, + "loss": 2.2573, + "step": 5713000 + }, + { + "epoch": 28.31, + "learning_rate": 3.585173872762494e-05, + "loss": 2.2553, + "step": 5713500 + }, + { + "epoch": 28.31, + "learning_rate": 3.585050014119886e-05, + "loss": 2.2186, + "step": 5714000 + }, + { + "epoch": 28.31, + "learning_rate": 3.5849261554772776e-05, + "loss": 2.2606, + "step": 5714500 + }, + { + "epoch": 28.31, + "learning_rate": 3.5848022968346686e-05, + "loss": 2.2582, + "step": 5715000 + }, + { + "epoch": 28.32, + "learning_rate": 3.58467843819206e-05, + "loss": 2.2227, + "step": 5715500 + }, + { + "epoch": 28.32, + "learning_rate": 3.584554579549452e-05, + "loss": 2.254, + "step": 5716000 + }, + { + "epoch": 28.32, + "learning_rate": 3.584430720906844e-05, + "loss": 2.2496, + "step": 5716500 + }, + { + "epoch": 28.32, + "learning_rate": 3.5843068622642354e-05, + "loss": 2.2388, + "step": 5717000 + }, + { + "epoch": 28.33, + "learning_rate": 3.5841830036216264e-05, + "loss": 2.243, + "step": 5717500 + }, + { + "epoch": 28.33, + "learning_rate": 3.584059144979018e-05, + "loss": 2.2567, + "step": 5718000 + }, + { + "epoch": 28.33, + "learning_rate": 3.58393528633641e-05, + "loss": 2.2261, + "step": 5718500 + }, + { + "epoch": 28.33, + "learning_rate": 3.5838114276938015e-05, + "loss": 2.2367, + "step": 5719000 + }, + { + "epoch": 28.34, + "learning_rate": 3.5836878167684784e-05, + "loss": 2.272, + "step": 5719500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583564205843156e-05, + "loss": 2.2329, + "step": 5720000 + }, + { + "epoch": 28.34, + "learning_rate": 3.583440594917832e-05, + "loss": 2.2338, + "step": 5720500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583316736275224e-05, + "loss": 2.2532, + "step": 5721000 + }, + { + "epoch": 28.35, + "learning_rate": 3.583193125349901e-05, + "loss": 2.2604, + "step": 5721500 + }, + { + "epoch": 28.35, + "learning_rate": 3.5830692667072924e-05, + "loss": 2.274, + "step": 5722000 + }, + { + "epoch": 28.35, + "learning_rate": 3.582945408064684e-05, + "loss": 2.243, + "step": 5722500 + }, + { + "epoch": 28.35, + "learning_rate": 3.582821549422076e-05, + "loss": 2.2256, + "step": 5723000 + }, + { + "epoch": 28.36, + "learning_rate": 3.5826976907794675e-05, + "loss": 2.2452, + "step": 5723500 + }, + { + "epoch": 28.36, + "learning_rate": 3.582573832136859e-05, + "loss": 2.2638, + "step": 5724000 + }, + { + "epoch": 28.36, + "learning_rate": 3.582449973494251e-05, + "loss": 2.2412, + "step": 5724500 + }, + { + "epoch": 28.36, + "learning_rate": 3.5823261148516426e-05, + "loss": 2.2771, + "step": 5725000 + }, + { + "epoch": 28.37, + "learning_rate": 3.582202256209034e-05, + "loss": 2.2621, + "step": 5725500 + }, + { + "epoch": 28.37, + "learning_rate": 3.5820788930009956e-05, + "loss": 2.23, + "step": 5726000 + }, + { + "epoch": 28.37, + "learning_rate": 3.5819550343583873e-05, + "loss": 2.2686, + "step": 5726500 + }, + { + "epoch": 28.37, + "learning_rate": 3.581831175715779e-05, + "loss": 2.2551, + "step": 5727000 + }, + { + "epoch": 28.38, + "learning_rate": 3.581707317073171e-05, + "loss": 2.2563, + "step": 5727500 + }, + { + "epoch": 28.38, + "learning_rate": 3.5815834584305624e-05, + "loss": 2.2388, + "step": 5728000 + }, + { + "epoch": 28.38, + "learning_rate": 3.581459599787954e-05, + "loss": 2.2399, + "step": 5728500 + }, + { + "epoch": 28.38, + "learning_rate": 3.581335741145346e-05, + "loss": 2.2514, + "step": 5729000 + }, + { + "epoch": 28.39, + "learning_rate": 3.581212130220023e-05, + "loss": 2.2183, + "step": 5729500 + }, + { + "epoch": 28.39, + "learning_rate": 3.5810885192946996e-05, + "loss": 2.236, + "step": 5730000 + }, + { + "epoch": 28.39, + "learning_rate": 3.580964660652091e-05, + "loss": 2.2386, + "step": 5730500 + }, + { + "epoch": 28.39, + "learning_rate": 3.580840802009483e-05, + "loss": 2.251, + "step": 5731000 + }, + { + "epoch": 28.4, + "learning_rate": 3.5807169433668746e-05, + "loss": 2.2459, + "step": 5731500 + }, + { + "epoch": 28.4, + "learning_rate": 3.5805930847242657e-05, + "loss": 2.2252, + "step": 5732000 + }, + { + "epoch": 28.4, + "learning_rate": 3.5804692260816574e-05, + "loss": 2.247, + "step": 5732500 + }, + { + "epoch": 28.4, + "learning_rate": 3.580345367439049e-05, + "loss": 2.2305, + "step": 5733000 + }, + { + "epoch": 28.41, + "learning_rate": 3.580221508796441e-05, + "loss": 2.2407, + "step": 5733500 + }, + { + "epoch": 28.41, + "learning_rate": 3.5800976501538324e-05, + "loss": 2.251, + "step": 5734000 + }, + { + "epoch": 28.41, + "learning_rate": 3.579973791511224e-05, + "loss": 2.2658, + "step": 5734500 + }, + { + "epoch": 28.41, + "learning_rate": 3.579849932868616e-05, + "loss": 2.2874, + "step": 5735000 + }, + { + "epoch": 28.42, + "learning_rate": 3.5797260742260075e-05, + "loss": 2.2551, + "step": 5735500 + }, + { + "epoch": 28.42, + "learning_rate": 3.579602215583399e-05, + "loss": 2.2694, + "step": 5736000 + }, + { + "epoch": 28.42, + "learning_rate": 3.579478356940791e-05, + "loss": 2.2369, + "step": 5736500 + }, + { + "epoch": 28.42, + "learning_rate": 3.5793544982981826e-05, + "loss": 2.2427, + "step": 5737000 + }, + { + "epoch": 28.43, + "learning_rate": 3.579230639655574e-05, + "loss": 2.2528, + "step": 5737500 + }, + { + "epoch": 28.43, + "learning_rate": 3.579106781012966e-05, + "loss": 2.252, + "step": 5738000 + }, + { + "epoch": 28.43, + "learning_rate": 3.578982922370358e-05, + "loss": 2.25, + "step": 5738500 + }, + { + "epoch": 28.43, + "learning_rate": 3.5788590637277494e-05, + "loss": 2.278, + "step": 5739000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578735205085141e-05, + "loss": 2.2768, + "step": 5739500 + }, + { + "epoch": 28.44, + "learning_rate": 3.578611346442533e-05, + "loss": 2.2398, + "step": 5740000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578487735517209e-05, + "loss": 2.2404, + "step": 5740500 + }, + { + "epoch": 28.44, + "learning_rate": 3.5783638768746006e-05, + "loss": 2.2455, + "step": 5741000 + }, + { + "epoch": 28.45, + "learning_rate": 3.578240018231992e-05, + "loss": 2.2767, + "step": 5741500 + }, + { + "epoch": 28.45, + "learning_rate": 3.578116159589384e-05, + "loss": 2.2687, + "step": 5742000 + }, + { + "epoch": 28.45, + "learning_rate": 3.577992300946776e-05, + "loss": 2.2557, + "step": 5742500 + }, + { + "epoch": 28.45, + "learning_rate": 3.5778684423041674e-05, + "loss": 2.25, + "step": 5743000 + }, + { + "epoch": 28.46, + "learning_rate": 3.577744583661559e-05, + "loss": 2.2547, + "step": 5743500 + }, + { + "epoch": 28.46, + "learning_rate": 3.57762072501895e-05, + "loss": 2.2511, + "step": 5744000 + }, + { + "epoch": 28.46, + "learning_rate": 3.577497114093628e-05, + "loss": 2.2485, + "step": 5744500 + }, + { + "epoch": 28.46, + "learning_rate": 3.577373503168304e-05, + "loss": 2.2567, + "step": 5745000 + }, + { + "epoch": 28.47, + "learning_rate": 3.577249892242981e-05, + "loss": 2.2858, + "step": 5745500 + }, + { + "epoch": 28.47, + "learning_rate": 3.5771260336003725e-05, + "loss": 2.2511, + "step": 5746000 + }, + { + "epoch": 28.47, + "learning_rate": 3.577002174957764e-05, + "loss": 2.248, + "step": 5746500 + }, + { + "epoch": 28.47, + "learning_rate": 3.576878564032442e-05, + "loss": 2.2604, + "step": 5747000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576754705389833e-05, + "loss": 2.2871, + "step": 5747500 + }, + { + "epoch": 28.48, + "learning_rate": 3.5766308467472244e-05, + "loss": 2.2474, + "step": 5748000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576506988104616e-05, + "loss": 2.2581, + "step": 5748500 + }, + { + "epoch": 28.48, + "learning_rate": 3.576383129462008e-05, + "loss": 2.228, + "step": 5749000 + }, + { + "epoch": 28.49, + "learning_rate": 3.5762592708193995e-05, + "loss": 2.2505, + "step": 5749500 + }, + { + "epoch": 28.49, + "learning_rate": 3.576135412176791e-05, + "loss": 2.2651, + "step": 5750000 + }, + { + "epoch": 28.49, + "learning_rate": 3.576011553534183e-05, + "loss": 2.24, + "step": 5750500 + }, + { + "epoch": 28.49, + "learning_rate": 3.5758876948915746e-05, + "loss": 2.2268, + "step": 5751000 + }, + { + "epoch": 28.49, + "learning_rate": 3.575763836248966e-05, + "loss": 2.2565, + "step": 5751500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575639977606357e-05, + "loss": 2.25, + "step": 5752000 + }, + { + "epoch": 28.5, + "learning_rate": 3.575516118963749e-05, + "loss": 2.2529, + "step": 5752500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575392508038426e-05, + "loss": 2.2352, + "step": 5753000 + }, + { + "epoch": 28.5, + "learning_rate": 3.5752686493958175e-05, + "loss": 2.2627, + "step": 5753500 + }, + { + "epoch": 28.51, + "learning_rate": 3.575144790753209e-05, + "loss": 2.2574, + "step": 5754000 + }, + { + "epoch": 28.51, + "learning_rate": 3.575020932110601e-05, + "loss": 2.2656, + "step": 5754500 + }, + { + "epoch": 28.51, + "learning_rate": 3.574897321185278e-05, + "loss": 2.2663, + "step": 5755000 + }, + { + "epoch": 28.51, + "learning_rate": 3.5747734625426695e-05, + "loss": 2.2273, + "step": 5755500 + }, + { + "epoch": 28.52, + "learning_rate": 3.574649603900061e-05, + "loss": 2.2567, + "step": 5756000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574525745257453e-05, + "loss": 2.2755, + "step": 5756500 + }, + { + "epoch": 28.52, + "learning_rate": 3.5744018866148446e-05, + "loss": 2.246, + "step": 5757000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574278027972236e-05, + "loss": 2.2647, + "step": 5757500 + }, + { + "epoch": 28.53, + "learning_rate": 3.574154169329628e-05, + "loss": 2.2595, + "step": 5758000 + }, + { + "epoch": 28.53, + "learning_rate": 3.574030558404304e-05, + "loss": 2.2468, + "step": 5758500 + }, + { + "epoch": 28.53, + "learning_rate": 3.573906699761696e-05, + "loss": 2.2384, + "step": 5759000 + }, + { + "epoch": 28.53, + "learning_rate": 3.5737828411190876e-05, + "loss": 2.2599, + "step": 5759500 + }, + { + "epoch": 28.54, + "learning_rate": 3.573658982476479e-05, + "loss": 2.2706, + "step": 5760000 + }, + { + "epoch": 28.54, + "learning_rate": 3.573535123833871e-05, + "loss": 2.2765, + "step": 5760500 + }, + { + "epoch": 28.54, + "learning_rate": 3.5734112651912626e-05, + "loss": 2.2622, + "step": 5761000 + }, + { + "epoch": 28.54, + "learning_rate": 3.573287406548654e-05, + "loss": 2.2588, + "step": 5761500 + }, + { + "epoch": 28.55, + "learning_rate": 3.573163547906046e-05, + "loss": 2.2274, + "step": 5762000 + }, + { + "epoch": 28.55, + "learning_rate": 3.573039689263438e-05, + "loss": 2.264, + "step": 5762500 + }, + { + "epoch": 28.55, + "learning_rate": 3.5729158306208294e-05, + "loss": 2.2544, + "step": 5763000 + }, + { + "epoch": 28.55, + "learning_rate": 3.572792219695506e-05, + "loss": 2.2477, + "step": 5763500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572668361052898e-05, + "loss": 2.2682, + "step": 5764000 + }, + { + "epoch": 28.56, + "learning_rate": 3.57254450241029e-05, + "loss": 2.238, + "step": 5764500 + }, + { + "epoch": 28.56, + "learning_rate": 3.5724206437676814e-05, + "loss": 2.2259, + "step": 5765000 + }, + { + "epoch": 28.56, + "learning_rate": 3.5722970328423576e-05, + "loss": 2.2703, + "step": 5765500 + }, + { + "epoch": 28.57, + "learning_rate": 3.572173174199749e-05, + "loss": 2.2302, + "step": 5766000 + }, + { + "epoch": 28.57, + "learning_rate": 3.572049315557141e-05, + "loss": 2.2499, + "step": 5766500 + }, + { + "epoch": 28.57, + "learning_rate": 3.5719254569145326e-05, + "loss": 2.2483, + "step": 5767000 + }, + { + "epoch": 28.57, + "learning_rate": 3.5718015982719243e-05, + "loss": 2.272, + "step": 5767500 + }, + { + "epoch": 28.58, + "learning_rate": 3.571677987346601e-05, + "loss": 2.2575, + "step": 5768000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571554128703993e-05, + "loss": 2.2488, + "step": 5768500 + }, + { + "epoch": 28.58, + "learning_rate": 3.5714302700613846e-05, + "loss": 2.2621, + "step": 5769000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571306411418776e-05, + "loss": 2.2267, + "step": 5769500 + }, + { + "epoch": 28.59, + "learning_rate": 3.571182552776168e-05, + "loss": 2.2648, + "step": 5770000 + }, + { + "epoch": 28.59, + "learning_rate": 3.57105869413356e-05, + "loss": 2.235, + "step": 5770500 + }, + { + "epoch": 28.59, + "learning_rate": 3.5709348354909514e-05, + "loss": 2.2069, + "step": 5771000 + }, + { + "epoch": 28.59, + "learning_rate": 3.570810976848343e-05, + "loss": 2.2329, + "step": 5771500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570687365923019e-05, + "loss": 2.2703, + "step": 5772000 + }, + { + "epoch": 28.6, + "learning_rate": 3.570563507280411e-05, + "loss": 2.2646, + "step": 5772500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570439648637803e-05, + "loss": 2.2603, + "step": 5773000 + }, + { + "epoch": 28.6, + "learning_rate": 3.5703160377124795e-05, + "loss": 2.2573, + "step": 5773500 + }, + { + "epoch": 28.61, + "learning_rate": 3.5701924267871564e-05, + "loss": 2.2794, + "step": 5774000 + }, + { + "epoch": 28.61, + "learning_rate": 3.570068568144548e-05, + "loss": 2.247, + "step": 5774500 + }, + { + "epoch": 28.61, + "learning_rate": 3.56994470950194e-05, + "loss": 2.2595, + "step": 5775000 + }, + { + "epoch": 28.61, + "learning_rate": 3.569820850859331e-05, + "loss": 2.2653, + "step": 5775500 + }, + { + "epoch": 28.62, + "learning_rate": 3.5696972399340084e-05, + "loss": 2.2649, + "step": 5776000 + }, + { + "epoch": 28.62, + "learning_rate": 3.5695733812914e-05, + "loss": 2.2686, + "step": 5776500 + }, + { + "epoch": 28.62, + "learning_rate": 3.569449522648792e-05, + "loss": 2.2608, + "step": 5777000 + }, + { + "epoch": 28.62, + "learning_rate": 3.5693256640061835e-05, + "loss": 2.2304, + "step": 5777500 + }, + { + "epoch": 28.63, + "learning_rate": 3.5692020530808603e-05, + "loss": 2.2519, + "step": 5778000 + }, + { + "epoch": 28.63, + "learning_rate": 3.569078194438252e-05, + "loss": 2.2614, + "step": 5778500 + }, + { + "epoch": 28.63, + "learning_rate": 3.568954335795644e-05, + "loss": 2.2653, + "step": 5779000 + }, + { + "epoch": 28.63, + "learning_rate": 3.5688304771530354e-05, + "loss": 2.2743, + "step": 5779500 + }, + { + "epoch": 28.64, + "learning_rate": 3.5687066185104264e-05, + "loss": 2.2705, + "step": 5780000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568583007585103e-05, + "loss": 2.2531, + "step": 5780500 + }, + { + "epoch": 28.64, + "learning_rate": 3.568459148942495e-05, + "loss": 2.2766, + "step": 5781000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568335290299887e-05, + "loss": 2.2445, + "step": 5781500 + }, + { + "epoch": 28.65, + "learning_rate": 3.5682114316572784e-05, + "loss": 2.2499, + "step": 5782000 + }, + { + "epoch": 28.65, + "learning_rate": 3.56808757301467e-05, + "loss": 2.2532, + "step": 5782500 + }, + { + "epoch": 28.65, + "learning_rate": 3.567963714372062e-05, + "loss": 2.2463, + "step": 5783000 + }, + { + "epoch": 28.65, + "learning_rate": 3.5678398557294535e-05, + "loss": 2.2268, + "step": 5783500 + }, + { + "epoch": 28.66, + "learning_rate": 3.567715997086845e-05, + "loss": 2.2623, + "step": 5784000 + }, + { + "epoch": 28.66, + "learning_rate": 3.567592138444237e-05, + "loss": 2.2633, + "step": 5784500 + }, + { + "epoch": 28.66, + "learning_rate": 3.567468279801628e-05, + "loss": 2.2451, + "step": 5785000 + }, + { + "epoch": 28.66, + "learning_rate": 3.5673444211590196e-05, + "loss": 2.2643, + "step": 5785500 + }, + { + "epoch": 28.67, + "learning_rate": 3.567220562516411e-05, + "loss": 2.2295, + "step": 5786000 + }, + { + "epoch": 28.67, + "learning_rate": 3.567096703873803e-05, + "loss": 2.2327, + "step": 5786500 + }, + { + "epoch": 28.67, + "learning_rate": 3.5669728452311946e-05, + "loss": 2.2591, + "step": 5787000 + }, + { + "epoch": 28.67, + "learning_rate": 3.5668489865885863e-05, + "loss": 2.2324, + "step": 5787500 + }, + { + "epoch": 28.68, + "learning_rate": 3.566725375663263e-05, + "loss": 2.279, + "step": 5788000 + }, + { + "epoch": 28.68, + "learning_rate": 3.566601517020655e-05, + "loss": 2.2687, + "step": 5788500 + }, + { + "epoch": 28.68, + "learning_rate": 3.566477658378046e-05, + "loss": 2.2327, + "step": 5789000 + }, + { + "epoch": 28.68, + "learning_rate": 3.5663537997354376e-05, + "loss": 2.2751, + "step": 5789500 + }, + { + "epoch": 28.69, + "learning_rate": 3.566230188810115e-05, + "loss": 2.2476, + "step": 5790000 + }, + { + "epoch": 28.69, + "learning_rate": 3.566106330167507e-05, + "loss": 2.2564, + "step": 5790500 + }, + { + "epoch": 28.69, + "learning_rate": 3.5659824715248986e-05, + "loss": 2.2629, + "step": 5791000 + }, + { + "epoch": 28.69, + "learning_rate": 3.5658586128822896e-05, + "loss": 2.2568, + "step": 5791500 + }, + { + "epoch": 28.7, + "learning_rate": 3.565735001956967e-05, + "loss": 2.2414, + "step": 5792000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565611143314359e-05, + "loss": 2.2787, + "step": 5792500 + }, + { + "epoch": 28.7, + "learning_rate": 3.565487532389035e-05, + "loss": 2.2533, + "step": 5793000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565363673746427e-05, + "loss": 2.2566, + "step": 5793500 + }, + { + "epoch": 28.71, + "learning_rate": 3.5652400628211036e-05, + "loss": 2.2658, + "step": 5794000 + }, + { + "epoch": 28.71, + "learning_rate": 3.565116204178495e-05, + "loss": 2.2591, + "step": 5794500 + }, + { + "epoch": 28.71, + "learning_rate": 3.564992345535887e-05, + "loss": 2.2673, + "step": 5795000 + }, + { + "epoch": 28.71, + "learning_rate": 3.564868486893279e-05, + "loss": 2.2577, + "step": 5795500 + }, + { + "epoch": 28.72, + "learning_rate": 3.5647446282506704e-05, + "loss": 2.2438, + "step": 5796000 + }, + { + "epoch": 28.72, + "learning_rate": 3.564620769608062e-05, + "loss": 2.2575, + "step": 5796500 + }, + { + "epoch": 28.72, + "learning_rate": 3.564496910965454e-05, + "loss": 2.2445, + "step": 5797000 + }, + { + "epoch": 28.72, + "learning_rate": 3.5643730523228455e-05, + "loss": 2.2777, + "step": 5797500 + }, + { + "epoch": 28.73, + "learning_rate": 3.564249193680237e-05, + "loss": 2.2571, + "step": 5798000 + }, + { + "epoch": 28.73, + "learning_rate": 3.564125335037629e-05, + "loss": 2.2519, + "step": 5798500 + }, + { + "epoch": 28.73, + "learning_rate": 3.5640014763950205e-05, + "loss": 2.2186, + "step": 5799000 + }, + { + "epoch": 28.73, + "learning_rate": 3.563877617752412e-05, + "loss": 2.2601, + "step": 5799500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563753759109803e-05, + "loss": 2.2653, + "step": 5800000 + }, + { + "epoch": 28.74, + "learning_rate": 3.563629900467195e-05, + "loss": 2.2519, + "step": 5800500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563506289541872e-05, + "loss": 2.2545, + "step": 5801000 + }, + { + "epoch": 28.74, + "learning_rate": 3.5633824308992635e-05, + "loss": 2.2799, + "step": 5801500 + }, + { + "epoch": 28.75, + "learning_rate": 3.563258572256655e-05, + "loss": 2.2447, + "step": 5802000 + }, + { + "epoch": 28.75, + "learning_rate": 3.563134961331332e-05, + "loss": 2.297, + "step": 5802500 + }, + { + "epoch": 28.75, + "learning_rate": 3.563011102688724e-05, + "loss": 2.2558, + "step": 5803000 + }, + { + "epoch": 28.75, + "learning_rate": 3.5628872440461155e-05, + "loss": 2.2682, + "step": 5803500 + }, + { + "epoch": 28.76, + "learning_rate": 3.562763385403507e-05, + "loss": 2.2454, + "step": 5804000 + }, + { + "epoch": 28.76, + "learning_rate": 3.562639526760899e-05, + "loss": 2.2382, + "step": 5804500 + }, + { + "epoch": 28.76, + "learning_rate": 3.5625156681182905e-05, + "loss": 2.2444, + "step": 5805000 + }, + { + "epoch": 28.76, + "learning_rate": 3.562391809475682e-05, + "loss": 2.2791, + "step": 5805500 + }, + { + "epoch": 28.76, + "learning_rate": 3.562267950833074e-05, + "loss": 2.2489, + "step": 5806000 + }, + { + "epoch": 28.77, + "learning_rate": 3.56214433990775e-05, + "loss": 2.2662, + "step": 5806500 + }, + { + "epoch": 28.77, + "learning_rate": 3.562020481265142e-05, + "loss": 2.241, + "step": 5807000 + }, + { + "epoch": 28.77, + "learning_rate": 3.5618966226225335e-05, + "loss": 2.2429, + "step": 5807500 + }, + { + "epoch": 28.77, + "learning_rate": 3.561772763979925e-05, + "loss": 2.2777, + "step": 5808000 + }, + { + "epoch": 28.78, + "learning_rate": 3.561648905337317e-05, + "loss": 2.2731, + "step": 5808500 + }, + { + "epoch": 28.78, + "learning_rate": 3.5615250466947086e-05, + "loss": 2.2572, + "step": 5809000 + }, + { + "epoch": 28.78, + "learning_rate": 3.5614014357693855e-05, + "loss": 2.2283, + "step": 5809500 + }, + { + "epoch": 28.78, + "learning_rate": 3.561277577126777e-05, + "loss": 2.2644, + "step": 5810000 + }, + { + "epoch": 28.79, + "learning_rate": 3.561153718484169e-05, + "loss": 2.2584, + "step": 5810500 + }, + { + "epoch": 28.79, + "learning_rate": 3.5610298598415606e-05, + "loss": 2.2588, + "step": 5811000 + }, + { + "epoch": 28.79, + "learning_rate": 3.560906001198952e-05, + "loss": 2.263, + "step": 5811500 + }, + { + "epoch": 28.79, + "learning_rate": 3.5607826379909136e-05, + "loss": 2.2713, + "step": 5812000 + }, + { + "epoch": 28.8, + "learning_rate": 3.560658779348305e-05, + "loss": 2.2487, + "step": 5812500 + }, + { + "epoch": 28.8, + "learning_rate": 3.560534920705697e-05, + "loss": 2.2539, + "step": 5813000 + }, + { + "epoch": 28.8, + "learning_rate": 3.560411062063089e-05, + "loss": 2.2511, + "step": 5813500 + }, + { + "epoch": 28.8, + "learning_rate": 3.5602872034204804e-05, + "loss": 2.2785, + "step": 5814000 + }, + { + "epoch": 28.81, + "learning_rate": 3.560163592495157e-05, + "loss": 2.2514, + "step": 5814500 + }, + { + "epoch": 28.81, + "learning_rate": 3.560039733852549e-05, + "loss": 2.2603, + "step": 5815000 + }, + { + "epoch": 28.81, + "learning_rate": 3.559915875209941e-05, + "loss": 2.2606, + "step": 5815500 + }, + { + "epoch": 28.81, + "learning_rate": 3.5597920165673324e-05, + "loss": 2.247, + "step": 5816000 + }, + { + "epoch": 28.82, + "learning_rate": 3.559668157924724e-05, + "loss": 2.2564, + "step": 5816500 + }, + { + "epoch": 28.82, + "learning_rate": 3.559544299282115e-05, + "loss": 2.2528, + "step": 5817000 + }, + { + "epoch": 28.82, + "learning_rate": 3.559420688356792e-05, + "loss": 2.2559, + "step": 5817500 + }, + { + "epoch": 28.82, + "learning_rate": 3.5592968297141837e-05, + "loss": 2.2693, + "step": 5818000 + }, + { + "epoch": 28.83, + "learning_rate": 3.559173218788861e-05, + "loss": 2.2497, + "step": 5818500 + }, + { + "epoch": 28.83, + "learning_rate": 3.559049360146253e-05, + "loss": 2.2595, + "step": 5819000 + }, + { + "epoch": 28.83, + "learning_rate": 3.5589255015036446e-05, + "loss": 2.2414, + "step": 5819500 + }, + { + "epoch": 28.83, + "learning_rate": 3.5588016428610356e-05, + "loss": 2.2523, + "step": 5820000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558677784218427e-05, + "loss": 2.2844, + "step": 5820500 + }, + { + "epoch": 28.84, + "learning_rate": 3.558553925575819e-05, + "loss": 2.2532, + "step": 5821000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558430314650496e-05, + "loss": 2.245, + "step": 5821500 + }, + { + "epoch": 28.84, + "learning_rate": 3.5583064560078876e-05, + "loss": 2.2501, + "step": 5822000 + }, + { + "epoch": 28.85, + "learning_rate": 3.558182597365279e-05, + "loss": 2.2561, + "step": 5822500 + }, + { + "epoch": 28.85, + "learning_rate": 3.558058738722671e-05, + "loss": 2.2602, + "step": 5823000 + }, + { + "epoch": 28.85, + "learning_rate": 3.557935127797348e-05, + "loss": 2.2512, + "step": 5823500 + }, + { + "epoch": 28.85, + "learning_rate": 3.5578112691547395e-05, + "loss": 2.2484, + "step": 5824000 + }, + { + "epoch": 28.86, + "learning_rate": 3.557687410512131e-05, + "loss": 2.2785, + "step": 5824500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557563551869523e-05, + "loss": 2.2271, + "step": 5825000 + }, + { + "epoch": 28.86, + "learning_rate": 3.5574396932269146e-05, + "loss": 2.264, + "step": 5825500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557315834584306e-05, + "loss": 2.2774, + "step": 5826000 + }, + { + "epoch": 28.87, + "learning_rate": 3.557191975941697e-05, + "loss": 2.2599, + "step": 5826500 + }, + { + "epoch": 28.87, + "learning_rate": 3.557068117299089e-05, + "loss": 2.2745, + "step": 5827000 + }, + { + "epoch": 28.87, + "learning_rate": 3.556944258656481e-05, + "loss": 2.2626, + "step": 5827500 + }, + { + "epoch": 28.87, + "learning_rate": 3.5568206477311576e-05, + "loss": 2.2708, + "step": 5828000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556696789088549e-05, + "loss": 2.277, + "step": 5828500 + }, + { + "epoch": 28.88, + "learning_rate": 3.556572930445941e-05, + "loss": 2.2848, + "step": 5829000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556449319520618e-05, + "loss": 2.2448, + "step": 5829500 + }, + { + "epoch": 28.88, + "learning_rate": 3.5563254608780095e-05, + "loss": 2.2617, + "step": 5830000 + }, + { + "epoch": 28.89, + "learning_rate": 3.556201602235401e-05, + "loss": 2.2809, + "step": 5830500 + }, + { + "epoch": 28.89, + "learning_rate": 3.556077743592793e-05, + "loss": 2.2586, + "step": 5831000 + }, + { + "epoch": 28.89, + "learning_rate": 3.5559538849501846e-05, + "loss": 2.232, + "step": 5831500 + }, + { + "epoch": 28.89, + "learning_rate": 3.555830026307576e-05, + "loss": 2.2542, + "step": 5832000 + }, + { + "epoch": 28.9, + "learning_rate": 3.555706167664967e-05, + "loss": 2.2311, + "step": 5832500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555582556739644e-05, + "loss": 2.2316, + "step": 5833000 + }, + { + "epoch": 28.9, + "learning_rate": 3.555458945814321e-05, + "loss": 2.2274, + "step": 5833500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555335087171713e-05, + "loss": 2.2638, + "step": 5834000 + }, + { + "epoch": 28.91, + "learning_rate": 3.5552112285291045e-05, + "loss": 2.2413, + "step": 5834500 + }, + { + "epoch": 28.91, + "learning_rate": 3.555087369886496e-05, + "loss": 2.2671, + "step": 5835000 + }, + { + "epoch": 28.91, + "learning_rate": 3.554963511243888e-05, + "loss": 2.2601, + "step": 5835500 + }, + { + "epoch": 28.91, + "learning_rate": 3.5548396526012796e-05, + "loss": 2.2831, + "step": 5836000 + }, + { + "epoch": 28.92, + "learning_rate": 3.554715793958671e-05, + "loss": 2.2749, + "step": 5836500 + }, + { + "epoch": 28.92, + "learning_rate": 3.554591935316063e-05, + "loss": 2.2563, + "step": 5837000 + }, + { + "epoch": 28.92, + "learning_rate": 3.5544680766734546e-05, + "loss": 2.285, + "step": 5837500 + }, + { + "epoch": 28.92, + "learning_rate": 3.554344713465416e-05, + "loss": 2.2429, + "step": 5838000 + }, + { + "epoch": 28.93, + "learning_rate": 3.554220854822808e-05, + "loss": 2.2898, + "step": 5838500 + }, + { + "epoch": 28.93, + "learning_rate": 3.5540969961801994e-05, + "loss": 2.243, + "step": 5839000 + }, + { + "epoch": 28.93, + "learning_rate": 3.553973137537591e-05, + "loss": 2.2507, + "step": 5839500 + }, + { + "epoch": 28.93, + "learning_rate": 3.553849278894983e-05, + "loss": 2.2531, + "step": 5840000 + }, + { + "epoch": 28.94, + "learning_rate": 3.5537254202523745e-05, + "loss": 2.2665, + "step": 5840500 + }, + { + "epoch": 28.94, + "learning_rate": 3.553601561609766e-05, + "loss": 2.2682, + "step": 5841000 + }, + { + "epoch": 28.94, + "learning_rate": 3.553477702967158e-05, + "loss": 2.2489, + "step": 5841500 + }, + { + "epoch": 28.94, + "learning_rate": 3.5533538443245496e-05, + "loss": 2.2418, + "step": 5842000 + }, + { + "epoch": 28.95, + "learning_rate": 3.5532302333992265e-05, + "loss": 2.2649, + "step": 5842500 + }, + { + "epoch": 28.95, + "learning_rate": 3.553106374756618e-05, + "loss": 2.2535, + "step": 5843000 + }, + { + "epoch": 28.95, + "learning_rate": 3.55298251611401e-05, + "loss": 2.2434, + "step": 5843500 + }, + { + "epoch": 28.95, + "learning_rate": 3.5528586574714015e-05, + "loss": 2.2766, + "step": 5844000 + }, + { + "epoch": 28.96, + "learning_rate": 3.552734798828793e-05, + "loss": 2.2398, + "step": 5844500 + }, + { + "epoch": 28.96, + "learning_rate": 3.552610940186184e-05, + "loss": 2.2472, + "step": 5845000 + }, + { + "epoch": 28.96, + "learning_rate": 3.552487081543576e-05, + "loss": 2.2474, + "step": 5845500 + }, + { + "epoch": 28.96, + "learning_rate": 3.5523632229009676e-05, + "loss": 2.2566, + "step": 5846000 + }, + { + "epoch": 28.97, + "learning_rate": 3.552239364258359e-05, + "loss": 2.2593, + "step": 5846500 + }, + { + "epoch": 28.97, + "learning_rate": 3.552115505615751e-05, + "loss": 2.2721, + "step": 5847000 + }, + { + "epoch": 28.97, + "learning_rate": 3.551992142407713e-05, + "loss": 2.2743, + "step": 5847500 + }, + { + "epoch": 28.97, + "learning_rate": 3.551868283765105e-05, + "loss": 2.2448, + "step": 5848000 + }, + { + "epoch": 28.98, + "learning_rate": 3.5517444251224965e-05, + "loss": 2.289, + "step": 5848500 + }, + { + "epoch": 28.98, + "learning_rate": 3.551620566479888e-05, + "loss": 2.264, + "step": 5849000 + }, + { + "epoch": 28.98, + "learning_rate": 3.55149670783728e-05, + "loss": 2.2393, + "step": 5849500 + }, + { + "epoch": 28.98, + "learning_rate": 3.5513728491946715e-05, + "loss": 2.241, + "step": 5850000 + }, + { + "epoch": 28.99, + "learning_rate": 3.551249238269348e-05, + "loss": 2.232, + "step": 5850500 + }, + { + "epoch": 28.99, + "learning_rate": 3.5511253796267394e-05, + "loss": 2.2528, + "step": 5851000 + }, + { + "epoch": 28.99, + "learning_rate": 3.551001520984131e-05, + "loss": 2.2253, + "step": 5851500 + }, + { + "epoch": 28.99, + "learning_rate": 3.550877662341523e-05, + "loss": 2.2537, + "step": 5852000 + }, + { + "epoch": 29.0, + "learning_rate": 3.5507538036989145e-05, + "loss": 2.2496, + "step": 5852500 + }, + { + "epoch": 29.0, + "learning_rate": 3.550629945056306e-05, + "loss": 2.2503, + "step": 5853000 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.6575265605183357, + "eval_accuracy_mlm": 0.6131820592582494, + "eval_accuracy_nsp": 0.8667511246906365, + "eval_loss": 2.3342039585113525, + "eval_runtime": 146.0117, + "eval_samples_per_second": 1746.155, + "eval_steps_per_second": 72.761, + "step": 5853447 + }, + { + "epoch": 29.0, + "learning_rate": 3.550506581848268e-05, + "loss": 2.2472, + "step": 5853500 + }, + { + "epoch": 29.0, + "learning_rate": 3.55038272320566e-05, + "loss": 2.2281, + "step": 5854000 + }, + { + "epoch": 29.01, + "learning_rate": 3.550258864563052e-05, + "loss": 2.233, + "step": 5854500 + }, + { + "epoch": 29.01, + "learning_rate": 3.550135005920443e-05, + "loss": 2.2422, + "step": 5855000 + }, + { + "epoch": 29.01, + "learning_rate": 3.5500111472778344e-05, + "loss": 2.2156, + "step": 5855500 + }, + { + "epoch": 29.01, + "learning_rate": 3.549887288635226e-05, + "loss": 2.2308, + "step": 5856000 + }, + { + "epoch": 29.02, + "learning_rate": 3.549763429992618e-05, + "loss": 2.2383, + "step": 5856500 + }, + { + "epoch": 29.02, + "learning_rate": 3.5496395713500095e-05, + "loss": 2.2424, + "step": 5857000 + }, + { + "epoch": 29.02, + "learning_rate": 3.549515712707401e-05, + "loss": 2.2365, + "step": 5857500 + }, + { + "epoch": 29.02, + "learning_rate": 3.549391854064793e-05, + "loss": 2.2243, + "step": 5858000 + }, + { + "epoch": 29.03, + "learning_rate": 3.54926824313947e-05, + "loss": 2.2149, + "step": 5858500 + }, + { + "epoch": 29.03, + "learning_rate": 3.549144632214147e-05, + "loss": 2.2196, + "step": 5859000 + }, + { + "epoch": 29.03, + "learning_rate": 3.549020773571538e-05, + "loss": 2.2177, + "step": 5859500 + }, + { + "epoch": 29.03, + "learning_rate": 3.54889691492893e-05, + "loss": 2.2114, + "step": 5860000 + }, + { + "epoch": 29.03, + "learning_rate": 3.548773056286322e-05, + "loss": 2.2293, + "step": 5860500 + }, + { + "epoch": 29.04, + "learning_rate": 3.5486491976437134e-05, + "loss": 2.2463, + "step": 5861000 + }, + { + "epoch": 29.04, + "learning_rate": 3.548525339001105e-05, + "loss": 2.2409, + "step": 5861500 + }, + { + "epoch": 29.04, + "learning_rate": 3.548401480358496e-05, + "loss": 2.2488, + "step": 5862000 + }, + { + "epoch": 29.04, + "learning_rate": 3.548277621715888e-05, + "loss": 2.2524, + "step": 5862500 + }, + { + "epoch": 29.05, + "learning_rate": 3.5481537630732795e-05, + "loss": 2.2146, + "step": 5863000 + }, + { + "epoch": 29.05, + "learning_rate": 3.548029904430671e-05, + "loss": 2.2241, + "step": 5863500 + }, + { + "epoch": 29.05, + "learning_rate": 3.547906045788063e-05, + "loss": 2.2167, + "step": 5864000 + }, + { + "epoch": 29.05, + "learning_rate": 3.5477821871454545e-05, + "loss": 2.2205, + "step": 5864500 + }, + { + "epoch": 29.06, + "learning_rate": 3.547658328502846e-05, + "loss": 2.2322, + "step": 5865000 + }, + { + "epoch": 29.06, + "learning_rate": 3.547534469860238e-05, + "loss": 2.2218, + "step": 5865500 + }, + { + "epoch": 29.06, + "learning_rate": 3.5474106112176296e-05, + "loss": 2.2323, + "step": 5866000 + }, + { + "epoch": 29.06, + "learning_rate": 3.547286752575021e-05, + "loss": 2.2263, + "step": 5866500 + }, + { + "epoch": 29.07, + "learning_rate": 3.547162893932413e-05, + "loss": 2.1892, + "step": 5867000 + }, + { + "epoch": 29.07, + "learning_rate": 3.547039035289805e-05, + "loss": 2.2291, + "step": 5867500 + }, + { + "epoch": 29.07, + "learning_rate": 3.5469151766471964e-05, + "loss": 2.2354, + "step": 5868000 + }, + { + "epoch": 29.07, + "learning_rate": 3.546791318004588e-05, + "loss": 2.2302, + "step": 5868500 + }, + { + "epoch": 29.08, + "learning_rate": 3.54666745936198e-05, + "loss": 2.2066, + "step": 5869000 + }, + { + "epoch": 29.08, + "learning_rate": 3.5465436007193715e-05, + "loss": 2.219, + "step": 5869500 + }, + { + "epoch": 29.08, + "learning_rate": 3.5464197420767625e-05, + "loss": 2.2361, + "step": 5870000 + }, + { + "epoch": 29.08, + "learning_rate": 3.54629613115144e-05, + "loss": 2.2123, + "step": 5870500 + }, + { + "epoch": 29.09, + "learning_rate": 3.546172272508832e-05, + "loss": 2.2175, + "step": 5871000 + }, + { + "epoch": 29.09, + "learning_rate": 3.546048661583508e-05, + "loss": 2.2331, + "step": 5871500 + }, + { + "epoch": 29.09, + "learning_rate": 3.545925050658185e-05, + "loss": 2.2405, + "step": 5872000 + }, + { + "epoch": 29.09, + "learning_rate": 3.5458011920155765e-05, + "loss": 2.2258, + "step": 5872500 + }, + { + "epoch": 29.1, + "learning_rate": 3.5456775810902534e-05, + "loss": 2.2055, + "step": 5873000 + }, + { + "epoch": 29.1, + "learning_rate": 3.545553722447645e-05, + "loss": 2.2264, + "step": 5873500 + }, + { + "epoch": 29.1, + "learning_rate": 3.545429863805037e-05, + "loss": 2.2461, + "step": 5874000 + }, + { + "epoch": 29.1, + "learning_rate": 3.5453060051624285e-05, + "loss": 2.2322, + "step": 5874500 + }, + { + "epoch": 29.11, + "learning_rate": 3.54518214651982e-05, + "loss": 2.2506, + "step": 5875000 + }, + { + "epoch": 29.11, + "learning_rate": 3.545058287877211e-05, + "loss": 2.2381, + "step": 5875500 + }, + { + "epoch": 29.11, + "learning_rate": 3.544934429234603e-05, + "loss": 2.2179, + "step": 5876000 + }, + { + "epoch": 29.11, + "learning_rate": 3.5448105705919946e-05, + "loss": 2.2353, + "step": 5876500 + }, + { + "epoch": 29.12, + "learning_rate": 3.544686711949386e-05, + "loss": 2.2316, + "step": 5877000 + }, + { + "epoch": 29.12, + "learning_rate": 3.544563101024063e-05, + "loss": 2.2306, + "step": 5877500 + }, + { + "epoch": 29.12, + "learning_rate": 3.544439242381455e-05, + "loss": 2.2229, + "step": 5878000 + }, + { + "epoch": 29.12, + "learning_rate": 3.5443153837388465e-05, + "loss": 2.2512, + "step": 5878500 + }, + { + "epoch": 29.13, + "learning_rate": 3.544191525096238e-05, + "loss": 2.2306, + "step": 5879000 + }, + { + "epoch": 29.13, + "learning_rate": 3.54406766645363e-05, + "loss": 2.2534, + "step": 5879500 + }, + { + "epoch": 29.13, + "learning_rate": 3.543944055528307e-05, + "loss": 2.2212, + "step": 5880000 + }, + { + "epoch": 29.13, + "learning_rate": 3.5438201968856985e-05, + "loss": 2.2609, + "step": 5880500 + }, + { + "epoch": 29.14, + "learning_rate": 3.54369633824309e-05, + "loss": 2.2195, + "step": 5881000 + }, + { + "epoch": 29.14, + "learning_rate": 3.543572727317767e-05, + "loss": 2.2432, + "step": 5881500 + }, + { + "epoch": 29.14, + "learning_rate": 3.543448868675159e-05, + "loss": 2.2379, + "step": 5882000 + }, + { + "epoch": 29.14, + "learning_rate": 3.5433250100325504e-05, + "loss": 2.2313, + "step": 5882500 + }, + { + "epoch": 29.15, + "learning_rate": 3.543201151389942e-05, + "loss": 2.2383, + "step": 5883000 + }, + { + "epoch": 29.15, + "learning_rate": 3.543077292747333e-05, + "loss": 2.2507, + "step": 5883500 + }, + { + "epoch": 29.15, + "learning_rate": 3.542953434104725e-05, + "loss": 2.2331, + "step": 5884000 + }, + { + "epoch": 29.15, + "learning_rate": 3.5428295754621165e-05, + "loss": 2.2345, + "step": 5884500 + }, + { + "epoch": 29.16, + "learning_rate": 3.542705964536794e-05, + "loss": 2.2113, + "step": 5885000 + }, + { + "epoch": 29.16, + "learning_rate": 3.542582105894186e-05, + "loss": 2.2003, + "step": 5885500 + }, + { + "epoch": 29.16, + "learning_rate": 3.5424582472515775e-05, + "loss": 2.2238, + "step": 5886000 + }, + { + "epoch": 29.16, + "learning_rate": 3.5423343886089685e-05, + "loss": 2.2471, + "step": 5886500 + }, + { + "epoch": 29.17, + "learning_rate": 3.54221052996636e-05, + "loss": 2.1966, + "step": 5887000 + }, + { + "epoch": 29.17, + "learning_rate": 3.542086671323752e-05, + "loss": 2.2397, + "step": 5887500 + }, + { + "epoch": 29.17, + "learning_rate": 3.5419628126811436e-05, + "loss": 2.2606, + "step": 5888000 + }, + { + "epoch": 29.17, + "learning_rate": 3.541838954038535e-05, + "loss": 2.2447, + "step": 5888500 + }, + { + "epoch": 29.18, + "learning_rate": 3.541715095395926e-05, + "loss": 2.2425, + "step": 5889000 + }, + { + "epoch": 29.18, + "learning_rate": 3.541591236753318e-05, + "loss": 2.226, + "step": 5889500 + }, + { + "epoch": 29.18, + "learning_rate": 3.54146737811071e-05, + "loss": 2.2204, + "step": 5890000 + }, + { + "epoch": 29.18, + "learning_rate": 3.5413437671853866e-05, + "loss": 2.2296, + "step": 5890500 + }, + { + "epoch": 29.19, + "learning_rate": 3.541220156260064e-05, + "loss": 2.2481, + "step": 5891000 + }, + { + "epoch": 29.19, + "learning_rate": 3.541096297617456e-05, + "loss": 2.2228, + "step": 5891500 + }, + { + "epoch": 29.19, + "learning_rate": 3.5409724389748475e-05, + "loss": 2.2576, + "step": 5892000 + }, + { + "epoch": 29.19, + "learning_rate": 3.5408485803322385e-05, + "loss": 2.2459, + "step": 5892500 + }, + { + "epoch": 29.2, + "learning_rate": 3.54072472168963e-05, + "loss": 2.2061, + "step": 5893000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540600863047022e-05, + "loss": 2.2236, + "step": 5893500 + }, + { + "epoch": 29.2, + "learning_rate": 3.5404770044044136e-05, + "loss": 2.2591, + "step": 5894000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540353145761805e-05, + "loss": 2.2513, + "step": 5894500 + }, + { + "epoch": 29.21, + "learning_rate": 3.540229534836482e-05, + "loss": 2.2299, + "step": 5895000 + }, + { + "epoch": 29.21, + "learning_rate": 3.540105676193874e-05, + "loss": 2.2314, + "step": 5895500 + }, + { + "epoch": 29.21, + "learning_rate": 3.539981817551265e-05, + "loss": 2.2248, + "step": 5896000 + }, + { + "epoch": 29.21, + "learning_rate": 3.5398579589086566e-05, + "loss": 2.2565, + "step": 5896500 + }, + { + "epoch": 29.22, + "learning_rate": 3.539734100266048e-05, + "loss": 2.207, + "step": 5897000 + }, + { + "epoch": 29.22, + "learning_rate": 3.53961024162344e-05, + "loss": 2.2213, + "step": 5897500 + }, + { + "epoch": 29.22, + "learning_rate": 3.5394863829808316e-05, + "loss": 2.2324, + "step": 5898000 + }, + { + "epoch": 29.22, + "learning_rate": 3.539362524338223e-05, + "loss": 2.2222, + "step": 5898500 + }, + { + "epoch": 29.23, + "learning_rate": 3.5392389134129e-05, + "loss": 2.2263, + "step": 5899000 + }, + { + "epoch": 29.23, + "learning_rate": 3.539115054770292e-05, + "loss": 2.2123, + "step": 5899500 + }, + { + "epoch": 29.23, + "learning_rate": 3.5389911961276836e-05, + "loss": 2.2744, + "step": 5900000 + }, + { + "epoch": 29.23, + "learning_rate": 3.538867337485075e-05, + "loss": 2.231, + "step": 5900500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538743478842467e-05, + "loss": 2.2395, + "step": 5901000 + }, + { + "epoch": 29.24, + "learning_rate": 3.538619867917144e-05, + "loss": 2.2213, + "step": 5901500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538496256991821e-05, + "loss": 2.2558, + "step": 5902000 + }, + { + "epoch": 29.24, + "learning_rate": 3.5383723983492124e-05, + "loss": 2.234, + "step": 5902500 + }, + { + "epoch": 29.25, + "learning_rate": 3.538248539706604e-05, + "loss": 2.2555, + "step": 5903000 + }, + { + "epoch": 29.25, + "learning_rate": 3.538124681063996e-05, + "loss": 2.245, + "step": 5903500 + }, + { + "epoch": 29.25, + "learning_rate": 3.5380008224213875e-05, + "loss": 2.239, + "step": 5904000 + }, + { + "epoch": 29.25, + "learning_rate": 3.537876963778779e-05, + "loss": 2.2479, + "step": 5904500 + }, + { + "epoch": 29.26, + "learning_rate": 3.53775310513617e-05, + "loss": 2.2461, + "step": 5905000 + }, + { + "epoch": 29.26, + "learning_rate": 3.537629494210847e-05, + "loss": 2.2416, + "step": 5905500 + }, + { + "epoch": 29.26, + "learning_rate": 3.537505635568239e-05, + "loss": 2.2077, + "step": 5906000 + }, + { + "epoch": 29.26, + "learning_rate": 3.5373817769256305e-05, + "loss": 2.2354, + "step": 5906500 + }, + { + "epoch": 29.27, + "learning_rate": 3.537257918283022e-05, + "loss": 2.2289, + "step": 5907000 + }, + { + "epoch": 29.27, + "learning_rate": 3.537134059640414e-05, + "loss": 2.2317, + "step": 5907500 + }, + { + "epoch": 29.27, + "learning_rate": 3.5370102009978056e-05, + "loss": 2.2199, + "step": 5908000 + }, + { + "epoch": 29.27, + "learning_rate": 3.5368863423551966e-05, + "loss": 2.2475, + "step": 5908500 + }, + { + "epoch": 29.28, + "learning_rate": 3.536762483712588e-05, + "loss": 2.2429, + "step": 5909000 + }, + { + "epoch": 29.28, + "learning_rate": 3.53663862506998e-05, + "loss": 2.2527, + "step": 5909500 + }, + { + "epoch": 29.28, + "learning_rate": 3.5365150141446575e-05, + "loss": 2.2502, + "step": 5910000 + }, + { + "epoch": 29.28, + "learning_rate": 3.536391403219334e-05, + "loss": 2.2442, + "step": 5910500 + }, + { + "epoch": 29.29, + "learning_rate": 3.5362675445767254e-05, + "loss": 2.2257, + "step": 5911000 + }, + { + "epoch": 29.29, + "learning_rate": 3.536143685934117e-05, + "loss": 2.2407, + "step": 5911500 + }, + { + "epoch": 29.29, + "learning_rate": 3.536019827291509e-05, + "loss": 2.238, + "step": 5912000 + }, + { + "epoch": 29.29, + "learning_rate": 3.5358959686489005e-05, + "loss": 2.2623, + "step": 5912500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535772110006292e-05, + "loss": 2.228, + "step": 5913000 + }, + { + "epoch": 29.3, + "learning_rate": 3.535648251363684e-05, + "loss": 2.2201, + "step": 5913500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535524640438361e-05, + "loss": 2.2372, + "step": 5914000 + }, + { + "epoch": 29.3, + "learning_rate": 3.5354007817957525e-05, + "loss": 2.2437, + "step": 5914500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535276923153144e-05, + "loss": 2.2395, + "step": 5915000 + }, + { + "epoch": 29.31, + "learning_rate": 3.535153312227821e-05, + "loss": 2.223, + "step": 5915500 + }, + { + "epoch": 29.31, + "learning_rate": 3.535029453585213e-05, + "loss": 2.2415, + "step": 5916000 + }, + { + "epoch": 29.31, + "learning_rate": 3.5349055949426044e-05, + "loss": 2.2512, + "step": 5916500 + }, + { + "epoch": 29.31, + "learning_rate": 3.5347817362999954e-05, + "loss": 2.2071, + "step": 5917000 + }, + { + "epoch": 29.32, + "learning_rate": 3.534657877657387e-05, + "loss": 2.2317, + "step": 5917500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534534019014779e-05, + "loss": 2.2262, + "step": 5918000 + }, + { + "epoch": 29.32, + "learning_rate": 3.5344101603721705e-05, + "loss": 2.2189, + "step": 5918500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534286301729562e-05, + "loss": 2.2357, + "step": 5919000 + }, + { + "epoch": 29.33, + "learning_rate": 3.534162690804239e-05, + "loss": 2.245, + "step": 5919500 + }, + { + "epoch": 29.33, + "learning_rate": 3.534038832161631e-05, + "loss": 2.2388, + "step": 5920000 + }, + { + "epoch": 29.33, + "learning_rate": 3.5339149735190225e-05, + "loss": 2.2373, + "step": 5920500 + }, + { + "epoch": 29.33, + "learning_rate": 3.533791114876414e-05, + "loss": 2.2406, + "step": 5921000 + }, + { + "epoch": 29.34, + "learning_rate": 3.533667256233806e-05, + "loss": 2.2509, + "step": 5921500 + }, + { + "epoch": 29.34, + "learning_rate": 3.5335433975911976e-05, + "loss": 2.2308, + "step": 5922000 + }, + { + "epoch": 29.34, + "learning_rate": 3.533419538948589e-05, + "loss": 2.2571, + "step": 5922500 + }, + { + "epoch": 29.34, + "learning_rate": 3.533295680305981e-05, + "loss": 2.2244, + "step": 5923000 + }, + { + "epoch": 29.35, + "learning_rate": 3.5331718216633726e-05, + "loss": 2.2334, + "step": 5923500 + }, + { + "epoch": 29.35, + "learning_rate": 3.5330479630207637e-05, + "loss": 2.2752, + "step": 5924000 + }, + { + "epoch": 29.35, + "learning_rate": 3.5329241043781553e-05, + "loss": 2.2395, + "step": 5924500 + }, + { + "epoch": 29.35, + "learning_rate": 3.532800245735547e-05, + "loss": 2.2595, + "step": 5925000 + }, + { + "epoch": 29.36, + "learning_rate": 3.532676387092939e-05, + "loss": 2.2495, + "step": 5925500 + }, + { + "epoch": 29.36, + "learning_rate": 3.5325525284503304e-05, + "loss": 2.2548, + "step": 5926000 + }, + { + "epoch": 29.36, + "learning_rate": 3.532428669807722e-05, + "loss": 2.2415, + "step": 5926500 + }, + { + "epoch": 29.36, + "learning_rate": 3.532304811165114e-05, + "loss": 2.2515, + "step": 5927000 + }, + { + "epoch": 29.37, + "learning_rate": 3.532181447957076e-05, + "loss": 2.2455, + "step": 5927500 + }, + { + "epoch": 29.37, + "learning_rate": 3.532057837031753e-05, + "loss": 2.2333, + "step": 5928000 + }, + { + "epoch": 29.37, + "learning_rate": 3.5319339783891445e-05, + "loss": 2.2581, + "step": 5928500 + }, + { + "epoch": 29.37, + "learning_rate": 3.531810119746536e-05, + "loss": 2.2516, + "step": 5929000 + }, + { + "epoch": 29.38, + "learning_rate": 3.5316865088212123e-05, + "loss": 2.2583, + "step": 5929500 + }, + { + "epoch": 29.38, + "learning_rate": 3.53156289789589e-05, + "loss": 2.2352, + "step": 5930000 + }, + { + "epoch": 29.38, + "learning_rate": 3.5314390392532816e-05, + "loss": 2.2317, + "step": 5930500 + }, + { + "epoch": 29.38, + "learning_rate": 3.5313151806106726e-05, + "loss": 2.2426, + "step": 5931000 + }, + { + "epoch": 29.39, + "learning_rate": 3.531191321968064e-05, + "loss": 2.2369, + "step": 5931500 + }, + { + "epoch": 29.39, + "learning_rate": 3.531067463325456e-05, + "loss": 2.2255, + "step": 5932000 + }, + { + "epoch": 29.39, + "learning_rate": 3.530943604682848e-05, + "loss": 2.2445, + "step": 5932500 + }, + { + "epoch": 29.39, + "learning_rate": 3.5308197460402394e-05, + "loss": 2.2298, + "step": 5933000 + }, + { + "epoch": 29.4, + "learning_rate": 3.530695887397631e-05, + "loss": 2.2297, + "step": 5933500 + }, + { + "epoch": 29.4, + "learning_rate": 3.530572028755023e-05, + "loss": 2.2726, + "step": 5934000 + }, + { + "epoch": 29.4, + "learning_rate": 3.5304481701124145e-05, + "loss": 2.2399, + "step": 5934500 + }, + { + "epoch": 29.4, + "learning_rate": 3.530324311469806e-05, + "loss": 2.262, + "step": 5935000 + }, + { + "epoch": 29.41, + "learning_rate": 3.530200452827198e-05, + "loss": 2.2255, + "step": 5935500 + }, + { + "epoch": 29.41, + "learning_rate": 3.5300765941845895e-05, + "loss": 2.2357, + "step": 5936000 + }, + { + "epoch": 29.41, + "learning_rate": 3.529952735541981e-05, + "loss": 2.2195, + "step": 5936500 + }, + { + "epoch": 29.41, + "learning_rate": 3.5298291246166574e-05, + "loss": 2.2428, + "step": 5937000 + }, + { + "epoch": 29.42, + "learning_rate": 3.529705513691334e-05, + "loss": 2.2513, + "step": 5937500 + }, + { + "epoch": 29.42, + "learning_rate": 3.529581655048726e-05, + "loss": 2.2211, + "step": 5938000 + }, + { + "epoch": 29.42, + "learning_rate": 3.529457796406118e-05, + "loss": 2.2356, + "step": 5938500 + }, + { + "epoch": 29.42, + "learning_rate": 3.5293339377635094e-05, + "loss": 2.2263, + "step": 5939000 + }, + { + "epoch": 29.43, + "learning_rate": 3.529210079120901e-05, + "loss": 2.2276, + "step": 5939500 + }, + { + "epoch": 29.43, + "learning_rate": 3.529086220478293e-05, + "loss": 2.2462, + "step": 5940000 + }, + { + "epoch": 29.43, + "learning_rate": 3.5289623618356845e-05, + "loss": 2.1902, + "step": 5940500 + }, + { + "epoch": 29.43, + "learning_rate": 3.528838503193076e-05, + "loss": 2.26, + "step": 5941000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528714644550468e-05, + "loss": 2.2554, + "step": 5941500 + }, + { + "epoch": 29.44, + "learning_rate": 3.5285907859078596e-05, + "loss": 2.2377, + "step": 5942000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528466927265251e-05, + "loss": 2.2355, + "step": 5942500 + }, + { + "epoch": 29.44, + "learning_rate": 3.528343068622643e-05, + "loss": 2.2532, + "step": 5943000 + }, + { + "epoch": 29.45, + "learning_rate": 3.528219705414604e-05, + "loss": 2.2435, + "step": 5943500 + }, + { + "epoch": 29.45, + "learning_rate": 3.528096094489282e-05, + "loss": 2.2458, + "step": 5944000 + }, + { + "epoch": 29.45, + "learning_rate": 3.527972235846673e-05, + "loss": 2.2236, + "step": 5944500 + }, + { + "epoch": 29.45, + "learning_rate": 3.5278483772040646e-05, + "loss": 2.2477, + "step": 5945000 + }, + { + "epoch": 29.46, + "learning_rate": 3.527724518561456e-05, + "loss": 2.2348, + "step": 5945500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527600659918848e-05, + "loss": 2.2568, + "step": 5946000 + }, + { + "epoch": 29.46, + "learning_rate": 3.52747680127624e-05, + "loss": 2.2553, + "step": 5946500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527352942633631e-05, + "loss": 2.246, + "step": 5947000 + }, + { + "epoch": 29.47, + "learning_rate": 3.5272290839910224e-05, + "loss": 2.258, + "step": 5947500 + }, + { + "epoch": 29.47, + "learning_rate": 3.527105225348414e-05, + "loss": 2.2518, + "step": 5948000 + }, + { + "epoch": 29.47, + "learning_rate": 3.5269816144230916e-05, + "loss": 2.2239, + "step": 5948500 + }, + { + "epoch": 29.47, + "learning_rate": 3.526857755780483e-05, + "loss": 2.2415, + "step": 5949000 + }, + { + "epoch": 29.48, + "learning_rate": 3.5267338971378743e-05, + "loss": 2.256, + "step": 5949500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526610038495266e-05, + "loss": 2.2548, + "step": 5950000 + }, + { + "epoch": 29.48, + "learning_rate": 3.5264864275699436e-05, + "loss": 2.2458, + "step": 5950500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526362568927335e-05, + "loss": 2.2636, + "step": 5951000 + }, + { + "epoch": 29.49, + "learning_rate": 3.526238710284726e-05, + "loss": 2.221, + "step": 5951500 + }, + { + "epoch": 29.49, + "learning_rate": 3.526114851642118e-05, + "loss": 2.2384, + "step": 5952000 + }, + { + "epoch": 29.49, + "learning_rate": 3.525991240716795e-05, + "loss": 2.2514, + "step": 5952500 + }, + { + "epoch": 29.49, + "learning_rate": 3.5258673820741866e-05, + "loss": 2.2481, + "step": 5953000 + }, + { + "epoch": 29.5, + "learning_rate": 3.525743523431578e-05, + "loss": 2.205, + "step": 5953500 + }, + { + "epoch": 29.5, + "learning_rate": 3.52561966478897e-05, + "loss": 2.2412, + "step": 5954000 + }, + { + "epoch": 29.5, + "learning_rate": 3.5254958061463617e-05, + "loss": 2.2481, + "step": 5954500 + }, + { + "epoch": 29.5, + "learning_rate": 3.5253719475037533e-05, + "loss": 2.239, + "step": 5955000 + }, + { + "epoch": 29.51, + "learning_rate": 3.52524833657843e-05, + "loss": 2.2468, + "step": 5955500 + }, + { + "epoch": 29.51, + "learning_rate": 3.525124477935822e-05, + "loss": 2.2548, + "step": 5956000 + }, + { + "epoch": 29.51, + "learning_rate": 3.5250006192932136e-05, + "loss": 2.2493, + "step": 5956500 + }, + { + "epoch": 29.51, + "learning_rate": 3.524876760650605e-05, + "loss": 2.2602, + "step": 5957000 + }, + { + "epoch": 29.52, + "learning_rate": 3.5247531497252815e-05, + "loss": 2.255, + "step": 5957500 + }, + { + "epoch": 29.52, + "learning_rate": 3.524629291082673e-05, + "loss": 2.2757, + "step": 5958000 + }, + { + "epoch": 29.52, + "learning_rate": 3.524505432440065e-05, + "loss": 2.2548, + "step": 5958500 + }, + { + "epoch": 29.52, + "learning_rate": 3.5243815737974566e-05, + "loss": 2.2651, + "step": 5959000 + }, + { + "epoch": 29.53, + "learning_rate": 3.524257715154848e-05, + "loss": 2.2414, + "step": 5959500 + }, + { + "epoch": 29.53, + "learning_rate": 3.52413385651224e-05, + "loss": 2.2512, + "step": 5960000 + }, + { + "epoch": 29.53, + "learning_rate": 3.524009997869632e-05, + "loss": 2.2631, + "step": 5960500 + }, + { + "epoch": 29.53, + "learning_rate": 3.5238861392270234e-05, + "loss": 2.254, + "step": 5961000 + }, + { + "epoch": 29.54, + "learning_rate": 3.523762280584415e-05, + "loss": 2.2334, + "step": 5961500 + }, + { + "epoch": 29.54, + "learning_rate": 3.523638421941806e-05, + "loss": 2.2553, + "step": 5962000 + }, + { + "epoch": 29.54, + "learning_rate": 3.5235148110164836e-05, + "loss": 2.2297, + "step": 5962500 + }, + { + "epoch": 29.54, + "learning_rate": 3.523390952373875e-05, + "loss": 2.2426, + "step": 5963000 + }, + { + "epoch": 29.55, + "learning_rate": 3.5232673414485515e-05, + "loss": 2.2523, + "step": 5963500 + }, + { + "epoch": 29.55, + "learning_rate": 3.523143482805943e-05, + "loss": 2.2493, + "step": 5964000 + }, + { + "epoch": 29.55, + "learning_rate": 3.523019624163335e-05, + "loss": 2.2559, + "step": 5964500 + }, + { + "epoch": 29.55, + "learning_rate": 3.5228957655207266e-05, + "loss": 2.2758, + "step": 5965000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522771906878118e-05, + "loss": 2.2565, + "step": 5965500 + }, + { + "epoch": 29.56, + "learning_rate": 3.52264804823551e-05, + "loss": 2.2483, + "step": 5966000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522524189592902e-05, + "loss": 2.2588, + "step": 5966500 + }, + { + "epoch": 29.56, + "learning_rate": 3.5224003309502934e-05, + "loss": 2.2445, + "step": 5967000 + }, + { + "epoch": 29.57, + "learning_rate": 3.52227672002497e-05, + "loss": 2.274, + "step": 5967500 + }, + { + "epoch": 29.57, + "learning_rate": 3.522152861382362e-05, + "loss": 2.2289, + "step": 5968000 + }, + { + "epoch": 29.57, + "learning_rate": 3.5220290027397536e-05, + "loss": 2.2546, + "step": 5968500 + }, + { + "epoch": 29.57, + "learning_rate": 3.52190539181443e-05, + "loss": 2.2503, + "step": 5969000 + }, + { + "epoch": 29.57, + "learning_rate": 3.5217815331718215e-05, + "loss": 2.2506, + "step": 5969500 + }, + { + "epoch": 29.58, + "learning_rate": 3.521657674529213e-05, + "loss": 2.251, + "step": 5970000 + }, + { + "epoch": 29.58, + "learning_rate": 3.52153406360389e-05, + "loss": 2.2336, + "step": 5970500 + }, + { + "epoch": 29.58, + "learning_rate": 3.521410204961282e-05, + "loss": 2.2587, + "step": 5971000 + }, + { + "epoch": 29.58, + "learning_rate": 3.5212863463186735e-05, + "loss": 2.2595, + "step": 5971500 + }, + { + "epoch": 29.59, + "learning_rate": 3.5211627353933504e-05, + "loss": 2.2517, + "step": 5972000 + }, + { + "epoch": 29.59, + "learning_rate": 3.521038876750742e-05, + "loss": 2.258, + "step": 5972500 + }, + { + "epoch": 29.59, + "learning_rate": 3.520915018108134e-05, + "loss": 2.251, + "step": 5973000 + }, + { + "epoch": 29.59, + "learning_rate": 3.5207911594655255e-05, + "loss": 2.2584, + "step": 5973500 + }, + { + "epoch": 29.6, + "learning_rate": 3.520667300822917e-05, + "loss": 2.2444, + "step": 5974000 + }, + { + "epoch": 29.6, + "learning_rate": 3.520543442180309e-05, + "loss": 2.2307, + "step": 5974500 + }, + { + "epoch": 29.6, + "learning_rate": 3.5204195835377e-05, + "loss": 2.2548, + "step": 5975000 + }, + { + "epoch": 29.6, + "learning_rate": 3.5202957248950915e-05, + "loss": 2.2333, + "step": 5975500 + }, + { + "epoch": 29.61, + "learning_rate": 3.520171866252483e-05, + "loss": 2.2488, + "step": 5976000 + }, + { + "epoch": 29.61, + "learning_rate": 3.520048007609875e-05, + "loss": 2.2246, + "step": 5976500 + }, + { + "epoch": 29.61, + "learning_rate": 3.5199241489672666e-05, + "loss": 2.2331, + "step": 5977000 + }, + { + "epoch": 29.61, + "learning_rate": 3.519800290324658e-05, + "loss": 2.2524, + "step": 5977500 + }, + { + "epoch": 29.62, + "learning_rate": 3.51967643168205e-05, + "loss": 2.2479, + "step": 5978000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519552573039442e-05, + "loss": 2.2346, + "step": 5978500 + }, + { + "epoch": 29.62, + "learning_rate": 3.5194287143968334e-05, + "loss": 2.2478, + "step": 5979000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519304855754225e-05, + "loss": 2.2465, + "step": 5979500 + }, + { + "epoch": 29.63, + "learning_rate": 3.519180997111617e-05, + "loss": 2.2337, + "step": 5980000 + }, + { + "epoch": 29.63, + "learning_rate": 3.5190571384690085e-05, + "loss": 2.2392, + "step": 5980500 + }, + { + "epoch": 29.63, + "learning_rate": 3.5189335275436854e-05, + "loss": 2.2563, + "step": 5981000 + }, + { + "epoch": 29.63, + "learning_rate": 3.518809668901077e-05, + "loss": 2.2376, + "step": 5981500 + }, + { + "epoch": 29.64, + "learning_rate": 3.5186863056930384e-05, + "loss": 2.2474, + "step": 5982000 + }, + { + "epoch": 29.64, + "learning_rate": 3.51856244705043e-05, + "loss": 2.2415, + "step": 5982500 + }, + { + "epoch": 29.64, + "learning_rate": 3.518438588407822e-05, + "loss": 2.2424, + "step": 5983000 + }, + { + "epoch": 29.64, + "learning_rate": 3.5183147297652135e-05, + "loss": 2.2372, + "step": 5983500 + }, + { + "epoch": 29.65, + "learning_rate": 3.518190871122605e-05, + "loss": 2.2606, + "step": 5984000 + }, + { + "epoch": 29.65, + "learning_rate": 3.518067260197282e-05, + "loss": 2.2476, + "step": 5984500 + }, + { + "epoch": 29.65, + "learning_rate": 3.517943401554674e-05, + "loss": 2.2595, + "step": 5985000 + }, + { + "epoch": 29.65, + "learning_rate": 3.5178195429120655e-05, + "loss": 2.2325, + "step": 5985500 + }, + { + "epoch": 29.66, + "learning_rate": 3.517695684269457e-05, + "loss": 2.255, + "step": 5986000 + }, + { + "epoch": 29.66, + "learning_rate": 3.517571825626849e-05, + "loss": 2.2224, + "step": 5986500 + }, + { + "epoch": 29.66, + "learning_rate": 3.5174479669842406e-05, + "loss": 2.2385, + "step": 5987000 + }, + { + "epoch": 29.66, + "learning_rate": 3.517324108341632e-05, + "loss": 2.2482, + "step": 5987500 + }, + { + "epoch": 29.67, + "learning_rate": 3.517200249699024e-05, + "loss": 2.2485, + "step": 5988000 + }, + { + "epoch": 29.67, + "learning_rate": 3.517076391056415e-05, + "loss": 2.2526, + "step": 5988500 + }, + { + "epoch": 29.67, + "learning_rate": 3.5169525324138066e-05, + "loss": 2.2193, + "step": 5989000 + }, + { + "epoch": 29.67, + "learning_rate": 3.5168286737711983e-05, + "loss": 2.2374, + "step": 5989500 + }, + { + "epoch": 29.68, + "learning_rate": 3.51670481512859e-05, + "loss": 2.2634, + "step": 5990000 + }, + { + "epoch": 29.68, + "learning_rate": 3.516580956485982e-05, + "loss": 2.24, + "step": 5990500 + }, + { + "epoch": 29.68, + "learning_rate": 3.5164570978433734e-05, + "loss": 2.2492, + "step": 5991000 + }, + { + "epoch": 29.68, + "learning_rate": 3.516333239200765e-05, + "loss": 2.265, + "step": 5991500 + }, + { + "epoch": 29.69, + "learning_rate": 3.516209380558157e-05, + "loss": 2.2368, + "step": 5992000 + }, + { + "epoch": 29.69, + "learning_rate": 3.5160855219155485e-05, + "loss": 2.2397, + "step": 5992500 + }, + { + "epoch": 29.69, + "learning_rate": 3.51596166327294e-05, + "loss": 2.2589, + "step": 5993000 + }, + { + "epoch": 29.69, + "learning_rate": 3.515837804630331e-05, + "loss": 2.246, + "step": 5993500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515714193705009e-05, + "loss": 2.2362, + "step": 5994000 + }, + { + "epoch": 29.7, + "learning_rate": 3.5155905827796856e-05, + "loss": 2.2473, + "step": 5994500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515466724137077e-05, + "loss": 2.2488, + "step": 5995000 + }, + { + "epoch": 29.7, + "learning_rate": 3.5153428654944684e-05, + "loss": 2.2277, + "step": 5995500 + }, + { + "epoch": 29.71, + "learning_rate": 3.51521900685186e-05, + "loss": 2.2283, + "step": 5996000 + }, + { + "epoch": 29.71, + "learning_rate": 3.515095148209252e-05, + "loss": 2.245, + "step": 5996500 + }, + { + "epoch": 29.71, + "learning_rate": 3.5149715372839286e-05, + "loss": 2.2349, + "step": 5997000 + }, + { + "epoch": 29.71, + "learning_rate": 3.51484767864132e-05, + "loss": 2.2465, + "step": 5997500 + }, + { + "epoch": 29.72, + "learning_rate": 3.514723819998712e-05, + "loss": 2.2382, + "step": 5998000 + }, + { + "epoch": 29.72, + "learning_rate": 3.514599961356104e-05, + "loss": 2.2591, + "step": 5998500 + }, + { + "epoch": 29.72, + "learning_rate": 3.5144761027134954e-05, + "loss": 2.2273, + "step": 5999000 + }, + { + "epoch": 29.72, + "learning_rate": 3.514352244070887e-05, + "loss": 2.2438, + "step": 5999500 + }, + { + "epoch": 29.73, + "learning_rate": 3.514228385428279e-05, + "loss": 2.2216, + "step": 6000000 + }, + { + "epoch": 29.73, + "learning_rate": 3.5141045267856705e-05, + "loss": 2.2397, + "step": 6000500 + }, + { + "epoch": 29.73, + "learning_rate": 3.513980668143062e-05, + "loss": 2.2442, + "step": 6001000 + }, + { + "epoch": 29.73, + "learning_rate": 3.513856809500454e-05, + "loss": 2.2634, + "step": 6001500 + }, + { + "epoch": 29.74, + "learning_rate": 3.5137329508578455e-05, + "loss": 2.2353, + "step": 6002000 + }, + { + "epoch": 29.74, + "learning_rate": 3.5136090922152366e-05, + "loss": 2.2581, + "step": 6002500 + }, + { + "epoch": 29.74, + "learning_rate": 3.513485233572628e-05, + "loss": 2.2491, + "step": 6003000 + }, + { + "epoch": 29.74, + "learning_rate": 3.51336137493002e-05, + "loss": 2.2587, + "step": 6003500 + }, + { + "epoch": 29.75, + "learning_rate": 3.513238011721982e-05, + "loss": 2.2485, + "step": 6004000 + }, + { + "epoch": 29.75, + "learning_rate": 3.513114153079374e-05, + "loss": 2.2202, + "step": 6004500 + }, + { + "epoch": 29.75, + "learning_rate": 3.5129902944367654e-05, + "loss": 2.2374, + "step": 6005000 + }, + { + "epoch": 29.75, + "learning_rate": 3.512866435794157e-05, + "loss": 2.2464, + "step": 6005500 + }, + { + "epoch": 29.76, + "learning_rate": 3.512742824868834e-05, + "loss": 2.2466, + "step": 6006000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512618966226226e-05, + "loss": 2.2135, + "step": 6006500 + }, + { + "epoch": 29.76, + "learning_rate": 3.5124951075836174e-05, + "loss": 2.2327, + "step": 6007000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512371248941009e-05, + "loss": 2.2538, + "step": 6007500 + }, + { + "epoch": 29.77, + "learning_rate": 3.512247390298401e-05, + "loss": 2.241, + "step": 6008000 + }, + { + "epoch": 29.77, + "learning_rate": 3.512123779373077e-05, + "loss": 2.234, + "step": 6008500 + }, + { + "epoch": 29.77, + "learning_rate": 3.5119999207304686e-05, + "loss": 2.2631, + "step": 6009000 + }, + { + "epoch": 29.77, + "learning_rate": 3.51187606208786e-05, + "loss": 2.2543, + "step": 6009500 + }, + { + "epoch": 29.78, + "learning_rate": 3.511752203445252e-05, + "loss": 2.2295, + "step": 6010000 + }, + { + "epoch": 29.78, + "learning_rate": 3.511628344802644e-05, + "loss": 2.2397, + "step": 6010500 + }, + { + "epoch": 29.78, + "learning_rate": 3.5115044861600354e-05, + "loss": 2.2528, + "step": 6011000 + }, + { + "epoch": 29.78, + "learning_rate": 3.511380627517427e-05, + "loss": 2.2429, + "step": 6011500 + }, + { + "epoch": 29.79, + "learning_rate": 3.511256768874819e-05, + "loss": 2.2785, + "step": 6012000 + }, + { + "epoch": 29.79, + "learning_rate": 3.5111329102322105e-05, + "loss": 2.2572, + "step": 6012500 + }, + { + "epoch": 29.79, + "learning_rate": 3.511009051589602e-05, + "loss": 2.2329, + "step": 6013000 + }, + { + "epoch": 29.79, + "learning_rate": 3.510885192946994e-05, + "loss": 2.2503, + "step": 6013500 + }, + { + "epoch": 29.8, + "learning_rate": 3.5107613343043856e-05, + "loss": 2.2507, + "step": 6014000 + }, + { + "epoch": 29.8, + "learning_rate": 3.510637475661777e-05, + "loss": 2.2361, + "step": 6014500 + }, + { + "epoch": 29.8, + "learning_rate": 3.5105141124537387e-05, + "loss": 2.2559, + "step": 6015000 + }, + { + "epoch": 29.8, + "learning_rate": 3.5103902538111303e-05, + "loss": 2.2426, + "step": 6015500 + }, + { + "epoch": 29.81, + "learning_rate": 3.510266395168522e-05, + "loss": 2.2477, + "step": 6016000 + }, + { + "epoch": 29.81, + "learning_rate": 3.510142536525914e-05, + "loss": 2.235, + "step": 6016500 + }, + { + "epoch": 29.81, + "learning_rate": 3.5100186778833054e-05, + "loss": 2.2402, + "step": 6017000 + }, + { + "epoch": 29.81, + "learning_rate": 3.509894819240697e-05, + "loss": 2.2432, + "step": 6017500 + }, + { + "epoch": 29.82, + "learning_rate": 3.509770960598089e-05, + "loss": 2.2734, + "step": 6018000 + }, + { + "epoch": 29.82, + "learning_rate": 3.5096471019554805e-05, + "loss": 2.2639, + "step": 6018500 + }, + { + "epoch": 29.82, + "learning_rate": 3.5095234910301574e-05, + "loss": 2.2591, + "step": 6019000 + }, + { + "epoch": 29.82, + "learning_rate": 3.509399632387549e-05, + "loss": 2.2149, + "step": 6019500 + }, + { + "epoch": 29.83, + "learning_rate": 3.509276021462225e-05, + "loss": 2.2437, + "step": 6020000 + }, + { + "epoch": 29.83, + "learning_rate": 3.509152162819617e-05, + "loss": 2.2636, + "step": 6020500 + }, + { + "epoch": 29.83, + "learning_rate": 3.509028304177009e-05, + "loss": 2.2386, + "step": 6021000 + }, + { + "epoch": 29.83, + "learning_rate": 3.5089044455344004e-05, + "loss": 2.2542, + "step": 6021500 + }, + { + "epoch": 29.84, + "learning_rate": 3.508780586891792e-05, + "loss": 2.2627, + "step": 6022000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508656975966469e-05, + "loss": 2.2434, + "step": 6022500 + }, + { + "epoch": 29.84, + "learning_rate": 3.5085331173238606e-05, + "loss": 2.2404, + "step": 6023000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508409258681252e-05, + "loss": 2.2144, + "step": 6023500 + }, + { + "epoch": 29.84, + "learning_rate": 3.508285400038644e-05, + "loss": 2.2302, + "step": 6024000 + }, + { + "epoch": 29.85, + "learning_rate": 3.508161541396036e-05, + "loss": 2.2015, + "step": 6024500 + }, + { + "epoch": 29.85, + "learning_rate": 3.5080379304707126e-05, + "loss": 2.2531, + "step": 6025000 + }, + { + "epoch": 29.85, + "learning_rate": 3.507914071828104e-05, + "loss": 2.2415, + "step": 6025500 + }, + { + "epoch": 29.85, + "learning_rate": 3.507790213185495e-05, + "loss": 2.2345, + "step": 6026000 + }, + { + "epoch": 29.86, + "learning_rate": 3.507666354542887e-05, + "loss": 2.246, + "step": 6026500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507542495900279e-05, + "loss": 2.2626, + "step": 6027000 + }, + { + "epoch": 29.86, + "learning_rate": 3.5074186372576704e-05, + "loss": 2.2182, + "step": 6027500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507295026332348e-05, + "loss": 2.2315, + "step": 6028000 + }, + { + "epoch": 29.87, + "learning_rate": 3.507171167689739e-05, + "loss": 2.26, + "step": 6028500 + }, + { + "epoch": 29.87, + "learning_rate": 3.5070473090471306e-05, + "loss": 2.2435, + "step": 6029000 + }, + { + "epoch": 29.87, + "learning_rate": 3.506923450404522e-05, + "loss": 2.2568, + "step": 6029500 + }, + { + "epoch": 29.87, + "learning_rate": 3.506799591761914e-05, + "loss": 2.2584, + "step": 6030000 + }, + { + "epoch": 29.88, + "learning_rate": 3.506675733119306e-05, + "loss": 2.2313, + "step": 6030500 + }, + { + "epoch": 29.88, + "learning_rate": 3.5065518744766974e-05, + "loss": 2.2419, + "step": 6031000 + }, + { + "epoch": 29.88, + "learning_rate": 3.506428015834089e-05, + "loss": 2.2516, + "step": 6031500 + }, + { + "epoch": 29.88, + "learning_rate": 3.506304157191481e-05, + "loss": 2.2528, + "step": 6032000 + }, + { + "epoch": 29.89, + "learning_rate": 3.506180793983443e-05, + "loss": 2.2518, + "step": 6032500 + }, + { + "epoch": 29.89, + "learning_rate": 3.5060569353408346e-05, + "loss": 2.2389, + "step": 6033000 + }, + { + "epoch": 29.89, + "learning_rate": 3.505933076698226e-05, + "loss": 2.2587, + "step": 6033500 + }, + { + "epoch": 29.89, + "learning_rate": 3.505809218055618e-05, + "loss": 2.2524, + "step": 6034000 + }, + { + "epoch": 29.9, + "learning_rate": 3.505685607130295e-05, + "loss": 2.2516, + "step": 6034500 + }, + { + "epoch": 29.9, + "learning_rate": 3.5055617484876865e-05, + "loss": 2.244, + "step": 6035000 + }, + { + "epoch": 29.9, + "learning_rate": 3.505437889845078e-05, + "loss": 2.2784, + "step": 6035500 + }, + { + "epoch": 29.9, + "learning_rate": 3.50531403120247e-05, + "loss": 2.2512, + "step": 6036000 + }, + { + "epoch": 29.91, + "learning_rate": 3.5051901725598616e-05, + "loss": 2.2421, + "step": 6036500 + }, + { + "epoch": 29.91, + "learning_rate": 3.5050663139172526e-05, + "loss": 2.2322, + "step": 6037000 + }, + { + "epoch": 29.91, + "learning_rate": 3.5049427029919295e-05, + "loss": 2.2487, + "step": 6037500 + }, + { + "epoch": 29.91, + "learning_rate": 3.5048190920666064e-05, + "loss": 2.2687, + "step": 6038000 + }, + { + "epoch": 29.92, + "learning_rate": 3.504695233423998e-05, + "loss": 2.2331, + "step": 6038500 + }, + { + "epoch": 29.92, + "learning_rate": 3.50457137478139e-05, + "loss": 2.2588, + "step": 6039000 + }, + { + "epoch": 29.92, + "learning_rate": 3.5044475161387815e-05, + "loss": 2.2409, + "step": 6039500 + }, + { + "epoch": 29.92, + "learning_rate": 3.504323657496173e-05, + "loss": 2.2369, + "step": 6040000 + }, + { + "epoch": 29.93, + "learning_rate": 3.504199798853565e-05, + "loss": 2.2599, + "step": 6040500 + }, + { + "epoch": 29.93, + "learning_rate": 3.504076683362812e-05, + "loss": 2.2484, + "step": 6041000 + }, + { + "epoch": 29.93, + "learning_rate": 3.503952824720204e-05, + "loss": 2.2689, + "step": 6041500 + }, + { + "epoch": 29.93, + "learning_rate": 3.5038289660775955e-05, + "loss": 2.2367, + "step": 6042000 + }, + { + "epoch": 29.94, + "learning_rate": 3.503705107434987e-05, + "loss": 2.2461, + "step": 6042500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503581248792379e-05, + "loss": 2.2407, + "step": 6043000 + }, + { + "epoch": 29.94, + "learning_rate": 3.5034573901497706e-05, + "loss": 2.2564, + "step": 6043500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503333531507162e-05, + "loss": 2.2676, + "step": 6044000 + }, + { + "epoch": 29.95, + "learning_rate": 3.503209672864553e-05, + "loss": 2.2478, + "step": 6044500 + }, + { + "epoch": 29.95, + "learning_rate": 3.503085814221945e-05, + "loss": 2.2507, + "step": 6045000 + }, + { + "epoch": 29.95, + "learning_rate": 3.5029619555793367e-05, + "loss": 2.2605, + "step": 6045500 + }, + { + "epoch": 29.95, + "learning_rate": 3.5028383446540135e-05, + "loss": 2.2413, + "step": 6046000 + }, + { + "epoch": 29.96, + "learning_rate": 3.502714486011405e-05, + "loss": 2.2463, + "step": 6046500 + }, + { + "epoch": 29.96, + "learning_rate": 3.502590627368797e-05, + "loss": 2.2444, + "step": 6047000 + }, + { + "epoch": 29.96, + "learning_rate": 3.5024667687261886e-05, + "loss": 2.2214, + "step": 6047500 + }, + { + "epoch": 29.96, + "learning_rate": 3.5023429100835796e-05, + "loss": 2.2366, + "step": 6048000 + }, + { + "epoch": 29.97, + "learning_rate": 3.502219051440971e-05, + "loss": 2.2388, + "step": 6048500 + }, + { + "epoch": 29.97, + "learning_rate": 3.502095192798363e-05, + "loss": 2.265, + "step": 6049000 + }, + { + "epoch": 29.97, + "learning_rate": 3.501971334155755e-05, + "loss": 2.2427, + "step": 6049500 + }, + { + "epoch": 29.97, + "learning_rate": 3.5018474755131464e-05, + "loss": 2.243, + "step": 6050000 + }, + { + "epoch": 29.98, + "learning_rate": 3.501723616870538e-05, + "loss": 2.2914, + "step": 6050500 + }, + { + "epoch": 29.98, + "learning_rate": 3.50159975822793e-05, + "loss": 2.2452, + "step": 6051000 + }, + { + "epoch": 29.98, + "learning_rate": 3.5014758995853215e-05, + "loss": 2.2627, + "step": 6051500 + }, + { + "epoch": 29.98, + "learning_rate": 3.501352040942713e-05, + "loss": 2.2594, + "step": 6052000 + }, + { + "epoch": 29.99, + "learning_rate": 3.50122843001739e-05, + "loss": 2.2361, + "step": 6052500 + }, + { + "epoch": 29.99, + "learning_rate": 3.501104571374782e-05, + "loss": 2.2669, + "step": 6053000 + }, + { + "epoch": 29.99, + "learning_rate": 3.500980712732173e-05, + "loss": 2.264, + "step": 6053500 + }, + { + "epoch": 29.99, + "learning_rate": 3.5008568540895645e-05, + "loss": 2.244, + "step": 6054000 + }, + { + "epoch": 30.0, + "learning_rate": 3.500732995446956e-05, + "loss": 2.2693, + "step": 6054500 + }, + { + "epoch": 30.0, + "learning_rate": 3.500609384521633e-05, + "loss": 2.2239, + "step": 6055000 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.6577156623596847, + "eval_accuracy_mlm": 0.6136358534754317, + "eval_accuracy_nsp": 0.8656725198953558, + "eval_loss": 2.330178737640381, + "eval_runtime": 146.0024, + "eval_samples_per_second": 1746.266, + "eval_steps_per_second": 72.766, + "step": 6055290 + }, + { + "epoch": 30.0, + "learning_rate": 3.500485525879025e-05, + "loss": 2.2413, + "step": 6055500 + }, + { + "epoch": 30.0, + "learning_rate": 3.5003616672364164e-05, + "loss": 2.2212, + "step": 6056000 + }, + { + "epoch": 30.01, + "learning_rate": 3.500237808593808e-05, + "loss": 2.2128, + "step": 6056500 + }, + { + "epoch": 30.01, + "learning_rate": 3.500114197668485e-05, + "loss": 2.225, + "step": 6057000 + }, + { + "epoch": 30.01, + "learning_rate": 3.499990339025877e-05, + "loss": 2.2007, + "step": 6057500 + }, + { + "epoch": 30.01, + "learning_rate": 3.4998664803832684e-05, + "loss": 2.2145, + "step": 6058000 + }, + { + "epoch": 30.02, + "learning_rate": 3.499742869457945e-05, + "loss": 2.2361, + "step": 6058500 + }, + { + "epoch": 30.02, + "learning_rate": 3.499619010815337e-05, + "loss": 2.2315, + "step": 6059000 + }, + { + "epoch": 30.02, + "learning_rate": 3.4994951521727286e-05, + "loss": 2.2215, + "step": 6059500 + }, + { + "epoch": 30.02, + "learning_rate": 3.49937129353012e-05, + "loss": 2.2348, + "step": 6060000 + }, + { + "epoch": 30.03, + "learning_rate": 3.4992474348875113e-05, + "loss": 2.198, + "step": 6060500 + }, + { + "epoch": 30.03, + "learning_rate": 3.499123576244903e-05, + "loss": 2.2208, + "step": 6061000 + }, + { + "epoch": 30.03, + "learning_rate": 3.498999717602295e-05, + "loss": 2.2319, + "step": 6061500 + }, + { + "epoch": 30.03, + "learning_rate": 3.4988758589596864e-05, + "loss": 2.1845, + "step": 6062000 + }, + { + "epoch": 30.04, + "learning_rate": 3.498752000317078e-05, + "loss": 2.2193, + "step": 6062500 + }, + { + "epoch": 30.04, + "learning_rate": 3.49862814167447e-05, + "loss": 2.215, + "step": 6063000 + }, + { + "epoch": 30.04, + "learning_rate": 3.4985042830318615e-05, + "loss": 2.2272, + "step": 6063500 + }, + { + "epoch": 30.04, + "learning_rate": 3.4983806721065384e-05, + "loss": 2.2079, + "step": 6064000 + }, + { + "epoch": 30.05, + "learning_rate": 3.49825681346393e-05, + "loss": 2.2012, + "step": 6064500 + }, + { + "epoch": 30.05, + "learning_rate": 3.498132954821322e-05, + "loss": 2.2302, + "step": 6065000 + }, + { + "epoch": 30.05, + "learning_rate": 3.4980090961787135e-05, + "loss": 2.2268, + "step": 6065500 + }, + { + "epoch": 30.05, + "learning_rate": 3.497885237536105e-05, + "loss": 2.2134, + "step": 6066000 + }, + { + "epoch": 30.06, + "learning_rate": 3.497761378893497e-05, + "loss": 2.2258, + "step": 6066500 + }, + { + "epoch": 30.06, + "learning_rate": 3.497637520250888e-05, + "loss": 2.244, + "step": 6067000 + }, + { + "epoch": 30.06, + "learning_rate": 3.4975136616082796e-05, + "loss": 2.2202, + "step": 6067500 + }, + { + "epoch": 30.06, + "learning_rate": 3.4973900506829564e-05, + "loss": 2.2133, + "step": 6068000 + }, + { + "epoch": 30.07, + "learning_rate": 3.497266192040348e-05, + "loss": 2.2548, + "step": 6068500 + }, + { + "epoch": 30.07, + "learning_rate": 3.49714233339774e-05, + "loss": 2.2356, + "step": 6069000 + }, + { + "epoch": 30.07, + "learning_rate": 3.497018970189702e-05, + "loss": 2.2452, + "step": 6069500 + }, + { + "epoch": 30.07, + "learning_rate": 3.4968951115470936e-05, + "loss": 2.2254, + "step": 6070000 + }, + { + "epoch": 30.08, + "learning_rate": 3.4967715006217705e-05, + "loss": 2.2346, + "step": 6070500 + }, + { + "epoch": 30.08, + "learning_rate": 3.496647641979162e-05, + "loss": 2.2092, + "step": 6071000 + }, + { + "epoch": 30.08, + "learning_rate": 3.496523783336554e-05, + "loss": 2.2229, + "step": 6071500 + }, + { + "epoch": 30.08, + "learning_rate": 3.4963999246939455e-05, + "loss": 2.2513, + "step": 6072000 + }, + { + "epoch": 30.09, + "learning_rate": 3.496276066051337e-05, + "loss": 2.2273, + "step": 6072500 + }, + { + "epoch": 30.09, + "learning_rate": 3.496152207408729e-05, + "loss": 2.2222, + "step": 6073000 + }, + { + "epoch": 30.09, + "learning_rate": 3.4960283487661206e-05, + "loss": 2.2396, + "step": 6073500 + }, + { + "epoch": 30.09, + "learning_rate": 3.495904490123512e-05, + "loss": 2.2165, + "step": 6074000 + }, + { + "epoch": 30.1, + "learning_rate": 3.495780879198189e-05, + "loss": 2.1869, + "step": 6074500 + }, + { + "epoch": 30.1, + "learning_rate": 3.49565702055558e-05, + "loss": 2.2352, + "step": 6075000 + }, + { + "epoch": 30.1, + "learning_rate": 3.495533161912972e-05, + "loss": 2.2344, + "step": 6075500 + }, + { + "epoch": 30.1, + "learning_rate": 3.4954093032703636e-05, + "loss": 2.2467, + "step": 6076000 + }, + { + "epoch": 30.11, + "learning_rate": 3.495285444627755e-05, + "loss": 2.2179, + "step": 6076500 + }, + { + "epoch": 30.11, + "learning_rate": 3.495161585985147e-05, + "loss": 2.2305, + "step": 6077000 + }, + { + "epoch": 30.11, + "learning_rate": 3.495037727342539e-05, + "loss": 2.2301, + "step": 6077500 + }, + { + "epoch": 30.11, + "learning_rate": 3.494914364134501e-05, + "loss": 2.2311, + "step": 6078000 + }, + { + "epoch": 30.11, + "learning_rate": 3.4947905054918924e-05, + "loss": 2.2474, + "step": 6078500 + }, + { + "epoch": 30.12, + "learning_rate": 3.494666646849284e-05, + "loss": 2.2372, + "step": 6079000 + }, + { + "epoch": 30.12, + "learning_rate": 3.494542788206676e-05, + "loss": 2.2275, + "step": 6079500 + }, + { + "epoch": 30.12, + "learning_rate": 3.4944189295640675e-05, + "loss": 2.2308, + "step": 6080000 + }, + { + "epoch": 30.12, + "learning_rate": 3.494295070921459e-05, + "loss": 2.2416, + "step": 6080500 + }, + { + "epoch": 30.13, + "learning_rate": 3.494171212278851e-05, + "loss": 2.2317, + "step": 6081000 + }, + { + "epoch": 30.13, + "learning_rate": 3.494047353636242e-05, + "loss": 2.1961, + "step": 6081500 + }, + { + "epoch": 30.13, + "learning_rate": 3.4939234949936336e-05, + "loss": 2.2102, + "step": 6082000 + }, + { + "epoch": 30.13, + "learning_rate": 3.493799636351025e-05, + "loss": 2.2449, + "step": 6082500 + }, + { + "epoch": 30.14, + "learning_rate": 3.493676025425702e-05, + "loss": 2.2392, + "step": 6083000 + }, + { + "epoch": 30.14, + "learning_rate": 3.493552166783094e-05, + "loss": 2.2121, + "step": 6083500 + }, + { + "epoch": 30.14, + "learning_rate": 3.4934283081404856e-05, + "loss": 2.2322, + "step": 6084000 + }, + { + "epoch": 30.14, + "learning_rate": 3.493304449497877e-05, + "loss": 2.2107, + "step": 6084500 + }, + { + "epoch": 30.15, + "learning_rate": 3.493180590855269e-05, + "loss": 2.2081, + "step": 6085000 + }, + { + "epoch": 30.15, + "learning_rate": 3.4930567322126606e-05, + "loss": 2.2319, + "step": 6085500 + }, + { + "epoch": 30.15, + "learning_rate": 3.4929328735700523e-05, + "loss": 2.2277, + "step": 6086000 + }, + { + "epoch": 30.15, + "learning_rate": 3.492809262644729e-05, + "loss": 2.2258, + "step": 6086500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492685404002121e-05, + "loss": 2.224, + "step": 6087000 + }, + { + "epoch": 30.16, + "learning_rate": 3.4925615453595126e-05, + "loss": 2.2234, + "step": 6087500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492437934434189e-05, + "loss": 2.2201, + "step": 6088000 + }, + { + "epoch": 30.16, + "learning_rate": 3.4923140757915805e-05, + "loss": 2.2361, + "step": 6088500 + }, + { + "epoch": 30.17, + "learning_rate": 3.492190217148972e-05, + "loss": 2.2149, + "step": 6089000 + }, + { + "epoch": 30.17, + "learning_rate": 3.492066358506364e-05, + "loss": 2.2179, + "step": 6089500 + }, + { + "epoch": 30.17, + "learning_rate": 3.4919424998637556e-05, + "loss": 2.2521, + "step": 6090000 + }, + { + "epoch": 30.17, + "learning_rate": 3.491818641221147e-05, + "loss": 2.2198, + "step": 6090500 + }, + { + "epoch": 30.18, + "learning_rate": 3.491694782578539e-05, + "loss": 2.2251, + "step": 6091000 + }, + { + "epoch": 30.18, + "learning_rate": 3.4915709239359307e-05, + "loss": 2.2303, + "step": 6091500 + }, + { + "epoch": 30.18, + "learning_rate": 3.4914470652933224e-05, + "loss": 2.2546, + "step": 6092000 + }, + { + "epoch": 30.18, + "learning_rate": 3.491323206650714e-05, + "loss": 2.2206, + "step": 6092500 + }, + { + "epoch": 30.19, + "learning_rate": 3.491199595725391e-05, + "loss": 2.2407, + "step": 6093000 + }, + { + "epoch": 30.19, + "learning_rate": 3.4910757370827826e-05, + "loss": 2.2124, + "step": 6093500 + }, + { + "epoch": 30.19, + "learning_rate": 3.490951878440174e-05, + "loss": 2.2374, + "step": 6094000 + }, + { + "epoch": 30.19, + "learning_rate": 3.490828019797566e-05, + "loss": 2.2252, + "step": 6094500 + }, + { + "epoch": 30.2, + "learning_rate": 3.490704161154957e-05, + "loss": 2.2356, + "step": 6095000 + }, + { + "epoch": 30.2, + "learning_rate": 3.490580302512349e-05, + "loss": 2.2251, + "step": 6095500 + }, + { + "epoch": 30.2, + "learning_rate": 3.4904564438697404e-05, + "loss": 2.2153, + "step": 6096000 + }, + { + "epoch": 30.2, + "learning_rate": 3.490332585227132e-05, + "loss": 2.2476, + "step": 6096500 + }, + { + "epoch": 30.21, + "learning_rate": 3.490208726584524e-05, + "loss": 2.2666, + "step": 6097000 + }, + { + "epoch": 30.21, + "learning_rate": 3.4900848679419155e-05, + "loss": 2.2188, + "step": 6097500 + }, + { + "epoch": 30.21, + "learning_rate": 3.4899610092993065e-05, + "loss": 2.2538, + "step": 6098000 + }, + { + "epoch": 30.21, + "learning_rate": 3.489837398373984e-05, + "loss": 2.2291, + "step": 6098500 + }, + { + "epoch": 30.22, + "learning_rate": 3.489713539731376e-05, + "loss": 2.2363, + "step": 6099000 + }, + { + "epoch": 30.22, + "learning_rate": 3.4895896810887674e-05, + "loss": 2.2546, + "step": 6099500 + }, + { + "epoch": 30.22, + "learning_rate": 3.489465822446159e-05, + "loss": 2.2155, + "step": 6100000 + }, + { + "epoch": 30.22, + "learning_rate": 3.489341963803551e-05, + "loss": 2.2316, + "step": 6100500 + }, + { + "epoch": 30.23, + "learning_rate": 3.489218105160942e-05, + "loss": 2.2429, + "step": 6101000 + }, + { + "epoch": 30.23, + "learning_rate": 3.4890944942356194e-05, + "loss": 2.2284, + "step": 6101500 + }, + { + "epoch": 30.23, + "learning_rate": 3.4889708833102956e-05, + "loss": 2.2208, + "step": 6102000 + }, + { + "epoch": 30.23, + "learning_rate": 3.488847024667687e-05, + "loss": 2.2401, + "step": 6102500 + }, + { + "epoch": 30.24, + "learning_rate": 3.488723166025079e-05, + "loss": 2.2289, + "step": 6103000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488599307382471e-05, + "loss": 2.2329, + "step": 6103500 + }, + { + "epoch": 30.24, + "learning_rate": 3.4884754487398624e-05, + "loss": 2.2288, + "step": 6104000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488351590097254e-05, + "loss": 2.2316, + "step": 6104500 + }, + { + "epoch": 30.25, + "learning_rate": 3.488227731454646e-05, + "loss": 2.2553, + "step": 6105000 + }, + { + "epoch": 30.25, + "learning_rate": 3.4881038728120375e-05, + "loss": 2.21, + "step": 6105500 + }, + { + "epoch": 30.25, + "learning_rate": 3.487980261886714e-05, + "loss": 2.2134, + "step": 6106000 + }, + { + "epoch": 30.25, + "learning_rate": 3.487856403244106e-05, + "loss": 2.2151, + "step": 6106500 + }, + { + "epoch": 30.26, + "learning_rate": 3.487732544601498e-05, + "loss": 2.229, + "step": 6107000 + }, + { + "epoch": 30.26, + "learning_rate": 3.4876086859588894e-05, + "loss": 2.2177, + "step": 6107500 + }, + { + "epoch": 30.26, + "learning_rate": 3.4874850750335656e-05, + "loss": 2.2384, + "step": 6108000 + }, + { + "epoch": 30.26, + "learning_rate": 3.487361216390957e-05, + "loss": 2.2151, + "step": 6108500 + }, + { + "epoch": 30.27, + "learning_rate": 3.487237357748349e-05, + "loss": 2.2436, + "step": 6109000 + }, + { + "epoch": 30.27, + "learning_rate": 3.487113746823026e-05, + "loss": 2.2404, + "step": 6109500 + }, + { + "epoch": 30.27, + "learning_rate": 3.4869898881804176e-05, + "loss": 2.2555, + "step": 6110000 + }, + { + "epoch": 30.27, + "learning_rate": 3.486866029537809e-05, + "loss": 2.2404, + "step": 6110500 + }, + { + "epoch": 30.28, + "learning_rate": 3.486742170895201e-05, + "loss": 2.2173, + "step": 6111000 + }, + { + "epoch": 30.28, + "learning_rate": 3.486618559969878e-05, + "loss": 2.2282, + "step": 6111500 + }, + { + "epoch": 30.28, + "learning_rate": 3.486494701327269e-05, + "loss": 2.2582, + "step": 6112000 + }, + { + "epoch": 30.28, + "learning_rate": 3.4863708426846606e-05, + "loss": 2.2264, + "step": 6112500 + }, + { + "epoch": 30.29, + "learning_rate": 3.486246984042052e-05, + "loss": 2.2294, + "step": 6113000 + }, + { + "epoch": 30.29, + "learning_rate": 3.486123125399444e-05, + "loss": 2.2493, + "step": 6113500 + }, + { + "epoch": 30.29, + "learning_rate": 3.4859992667568356e-05, + "loss": 2.2327, + "step": 6114000 + }, + { + "epoch": 30.29, + "learning_rate": 3.485875408114227e-05, + "loss": 2.2406, + "step": 6114500 + }, + { + "epoch": 30.3, + "learning_rate": 3.485751549471619e-05, + "loss": 2.2053, + "step": 6115000 + }, + { + "epoch": 30.3, + "learning_rate": 3.485627690829011e-05, + "loss": 2.2343, + "step": 6115500 + }, + { + "epoch": 30.3, + "learning_rate": 3.4855040799036876e-05, + "loss": 2.2061, + "step": 6116000 + }, + { + "epoch": 30.3, + "learning_rate": 3.4853804689783645e-05, + "loss": 2.2168, + "step": 6116500 + }, + { + "epoch": 30.31, + "learning_rate": 3.485256610335756e-05, + "loss": 2.2477, + "step": 6117000 + }, + { + "epoch": 30.31, + "learning_rate": 3.485132751693148e-05, + "loss": 2.233, + "step": 6117500 + }, + { + "epoch": 30.31, + "learning_rate": 3.4850088930505395e-05, + "loss": 2.2193, + "step": 6118000 + }, + { + "epoch": 30.31, + "learning_rate": 3.484885034407931e-05, + "loss": 2.224, + "step": 6118500 + }, + { + "epoch": 30.32, + "learning_rate": 3.484761175765322e-05, + "loss": 2.221, + "step": 6119000 + }, + { + "epoch": 30.32, + "learning_rate": 3.48463756484e-05, + "loss": 2.2206, + "step": 6119500 + }, + { + "epoch": 30.32, + "learning_rate": 3.4845137061973915e-05, + "loss": 2.2106, + "step": 6120000 + }, + { + "epoch": 30.32, + "learning_rate": 3.4843898475547825e-05, + "loss": 2.2688, + "step": 6120500 + }, + { + "epoch": 30.33, + "learning_rate": 3.484265988912174e-05, + "loss": 2.224, + "step": 6121000 + }, + { + "epoch": 30.33, + "learning_rate": 3.484142130269566e-05, + "loss": 2.2378, + "step": 6121500 + }, + { + "epoch": 30.33, + "learning_rate": 3.4840182716269576e-05, + "loss": 2.1927, + "step": 6122000 + }, + { + "epoch": 30.33, + "learning_rate": 3.483894412984349e-05, + "loss": 2.2423, + "step": 6122500 + }, + { + "epoch": 30.34, + "learning_rate": 3.483770554341741e-05, + "loss": 2.2092, + "step": 6123000 + }, + { + "epoch": 30.34, + "learning_rate": 3.483646695699133e-05, + "loss": 2.242, + "step": 6123500 + }, + { + "epoch": 30.34, + "learning_rate": 3.4835228370565244e-05, + "loss": 2.217, + "step": 6124000 + }, + { + "epoch": 30.34, + "learning_rate": 3.483398978413916e-05, + "loss": 2.2285, + "step": 6124500 + }, + { + "epoch": 30.35, + "learning_rate": 3.483275119771308e-05, + "loss": 2.2339, + "step": 6125000 + }, + { + "epoch": 30.35, + "learning_rate": 3.4831512611286995e-05, + "loss": 2.235, + "step": 6125500 + }, + { + "epoch": 30.35, + "learning_rate": 3.483027402486091e-05, + "loss": 2.2277, + "step": 6126000 + }, + { + "epoch": 30.35, + "learning_rate": 3.482903543843483e-05, + "loss": 2.2282, + "step": 6126500 + }, + { + "epoch": 30.36, + "learning_rate": 3.482779932918159e-05, + "loss": 2.2184, + "step": 6127000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482656321992836e-05, + "loss": 2.2337, + "step": 6127500 + }, + { + "epoch": 30.36, + "learning_rate": 3.4825324633502276e-05, + "loss": 2.256, + "step": 6128000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482408852424905e-05, + "loss": 2.2668, + "step": 6128500 + }, + { + "epoch": 30.37, + "learning_rate": 3.482284993782297e-05, + "loss": 2.254, + "step": 6129000 + }, + { + "epoch": 30.37, + "learning_rate": 3.482161382856973e-05, + "loss": 2.2228, + "step": 6129500 + }, + { + "epoch": 30.37, + "learning_rate": 3.482037524214365e-05, + "loss": 2.2304, + "step": 6130000 + }, + { + "epoch": 30.37, + "learning_rate": 3.4819136655717565e-05, + "loss": 2.2172, + "step": 6130500 + }, + { + "epoch": 30.38, + "learning_rate": 3.481789806929148e-05, + "loss": 2.2406, + "step": 6131000 + }, + { + "epoch": 30.38, + "learning_rate": 3.481666196003825e-05, + "loss": 2.2339, + "step": 6131500 + }, + { + "epoch": 30.38, + "learning_rate": 3.481542337361217e-05, + "loss": 2.2278, + "step": 6132000 + }, + { + "epoch": 30.38, + "learning_rate": 3.4814184787186084e-05, + "loss": 2.2436, + "step": 6132500 + }, + { + "epoch": 30.39, + "learning_rate": 3.481294620076e-05, + "loss": 2.2639, + "step": 6133000 + }, + { + "epoch": 30.39, + "learning_rate": 3.481170761433392e-05, + "loss": 2.2498, + "step": 6133500 + }, + { + "epoch": 30.39, + "learning_rate": 3.481047150508068e-05, + "loss": 2.2274, + "step": 6134000 + }, + { + "epoch": 30.39, + "learning_rate": 3.48092329186546e-05, + "loss": 2.251, + "step": 6134500 + }, + { + "epoch": 30.39, + "learning_rate": 3.4807994332228514e-05, + "loss": 2.2089, + "step": 6135000 + }, + { + "epoch": 30.4, + "learning_rate": 3.480675574580243e-05, + "loss": 2.2332, + "step": 6135500 + }, + { + "epoch": 30.4, + "learning_rate": 3.48055196365492e-05, + "loss": 2.2195, + "step": 6136000 + }, + { + "epoch": 30.4, + "learning_rate": 3.4804281050123117e-05, + "loss": 2.2192, + "step": 6136500 + }, + { + "epoch": 30.4, + "learning_rate": 3.4803042463697033e-05, + "loss": 2.2242, + "step": 6137000 + }, + { + "epoch": 30.41, + "learning_rate": 3.480180387727095e-05, + "loss": 2.2118, + "step": 6137500 + }, + { + "epoch": 30.41, + "learning_rate": 3.480056529084487e-05, + "loss": 2.2459, + "step": 6138000 + }, + { + "epoch": 30.41, + "learning_rate": 3.4799326704418784e-05, + "loss": 2.2424, + "step": 6138500 + }, + { + "epoch": 30.41, + "learning_rate": 3.47980881179927e-05, + "loss": 2.2348, + "step": 6139000 + }, + { + "epoch": 30.42, + "learning_rate": 3.479684953156662e-05, + "loss": 2.2165, + "step": 6139500 + }, + { + "epoch": 30.42, + "learning_rate": 3.4795610945140535e-05, + "loss": 2.2224, + "step": 6140000 + }, + { + "epoch": 30.42, + "learning_rate": 3.47943748358873e-05, + "loss": 2.2588, + "step": 6140500 + }, + { + "epoch": 30.42, + "learning_rate": 3.4793138726634066e-05, + "loss": 2.2282, + "step": 6141000 + }, + { + "epoch": 30.43, + "learning_rate": 3.479190014020798e-05, + "loss": 2.2549, + "step": 6141500 + }, + { + "epoch": 30.43, + "learning_rate": 3.47906615537819e-05, + "loss": 2.2456, + "step": 6142000 + }, + { + "epoch": 30.43, + "learning_rate": 3.478942296735582e-05, + "loss": 2.2379, + "step": 6142500 + }, + { + "epoch": 30.43, + "learning_rate": 3.4788184380929734e-05, + "loss": 2.2362, + "step": 6143000 + }, + { + "epoch": 30.44, + "learning_rate": 3.478694579450365e-05, + "loss": 2.2371, + "step": 6143500 + }, + { + "epoch": 30.44, + "learning_rate": 3.478570720807757e-05, + "loss": 2.2568, + "step": 6144000 + }, + { + "epoch": 30.44, + "learning_rate": 3.4784468621651484e-05, + "loss": 2.2387, + "step": 6144500 + }, + { + "epoch": 30.44, + "learning_rate": 3.47832300352254e-05, + "loss": 2.2459, + "step": 6145000 + }, + { + "epoch": 30.45, + "learning_rate": 3.478199144879932e-05, + "loss": 2.2056, + "step": 6145500 + }, + { + "epoch": 30.45, + "learning_rate": 3.4780752862373235e-05, + "loss": 2.2531, + "step": 6146000 + }, + { + "epoch": 30.45, + "learning_rate": 3.477951427594715e-05, + "loss": 2.2194, + "step": 6146500 + }, + { + "epoch": 30.45, + "learning_rate": 3.477827568952107e-05, + "loss": 2.2402, + "step": 6147000 + }, + { + "epoch": 30.46, + "learning_rate": 3.4777037103094986e-05, + "loss": 2.2502, + "step": 6147500 + }, + { + "epoch": 30.46, + "learning_rate": 3.47757985166689e-05, + "loss": 2.2541, + "step": 6148000 + }, + { + "epoch": 30.46, + "learning_rate": 3.477455993024281e-05, + "loss": 2.2304, + "step": 6148500 + }, + { + "epoch": 30.46, + "learning_rate": 3.477332134381673e-05, + "loss": 2.2356, + "step": 6149000 + }, + { + "epoch": 30.47, + "learning_rate": 3.47720852345635e-05, + "loss": 2.2525, + "step": 6149500 + }, + { + "epoch": 30.47, + "learning_rate": 3.477084912531027e-05, + "loss": 2.2437, + "step": 6150000 + }, + { + "epoch": 30.47, + "learning_rate": 3.4769610538884185e-05, + "loss": 2.2182, + "step": 6150500 + }, + { + "epoch": 30.47, + "learning_rate": 3.47683719524581e-05, + "loss": 2.2214, + "step": 6151000 + }, + { + "epoch": 30.48, + "learning_rate": 3.476713336603202e-05, + "loss": 2.2403, + "step": 6151500 + }, + { + "epoch": 30.48, + "learning_rate": 3.4765894779605935e-05, + "loss": 2.2509, + "step": 6152000 + }, + { + "epoch": 30.48, + "learning_rate": 3.476465619317985e-05, + "loss": 2.2426, + "step": 6152500 + }, + { + "epoch": 30.48, + "learning_rate": 3.476342008392662e-05, + "loss": 2.2147, + "step": 6153000 + }, + { + "epoch": 30.49, + "learning_rate": 3.476218149750053e-05, + "loss": 2.2379, + "step": 6153500 + }, + { + "epoch": 30.49, + "learning_rate": 3.476094291107445e-05, + "loss": 2.1983, + "step": 6154000 + }, + { + "epoch": 30.49, + "learning_rate": 3.4759704324648365e-05, + "loss": 2.2076, + "step": 6154500 + }, + { + "epoch": 30.49, + "learning_rate": 3.4758468215395134e-05, + "loss": 2.22, + "step": 6155000 + }, + { + "epoch": 30.5, + "learning_rate": 3.475722962896905e-05, + "loss": 2.2341, + "step": 6155500 + }, + { + "epoch": 30.5, + "learning_rate": 3.475599351971582e-05, + "loss": 2.2196, + "step": 6156000 + }, + { + "epoch": 30.5, + "learning_rate": 3.475475741046259e-05, + "loss": 2.2361, + "step": 6156500 + }, + { + "epoch": 30.5, + "learning_rate": 3.4753518824036505e-05, + "loss": 2.2396, + "step": 6157000 + }, + { + "epoch": 30.51, + "learning_rate": 3.475228023761042e-05, + "loss": 2.2363, + "step": 6157500 + }, + { + "epoch": 30.51, + "learning_rate": 3.475104165118434e-05, + "loss": 2.235, + "step": 6158000 + }, + { + "epoch": 30.51, + "learning_rate": 3.474980306475825e-05, + "loss": 2.247, + "step": 6158500 + }, + { + "epoch": 30.51, + "learning_rate": 3.4748564478332166e-05, + "loss": 2.2427, + "step": 6159000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474732589190608e-05, + "loss": 2.2194, + "step": 6159500 + }, + { + "epoch": 30.52, + "learning_rate": 3.474608730548e-05, + "loss": 2.2395, + "step": 6160000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474484871905392e-05, + "loss": 2.2607, + "step": 6160500 + }, + { + "epoch": 30.52, + "learning_rate": 3.4743610132627834e-05, + "loss": 2.2377, + "step": 6161000 + }, + { + "epoch": 30.53, + "learning_rate": 3.474237154620175e-05, + "loss": 2.2333, + "step": 6161500 + }, + { + "epoch": 30.53, + "learning_rate": 3.474113295977567e-05, + "loss": 2.226, + "step": 6162000 + }, + { + "epoch": 30.53, + "learning_rate": 3.4739894373349585e-05, + "loss": 2.2342, + "step": 6162500 + }, + { + "epoch": 30.53, + "learning_rate": 3.4738658264096354e-05, + "loss": 2.2176, + "step": 6163000 + }, + { + "epoch": 30.54, + "learning_rate": 3.473741967767027e-05, + "loss": 2.2501, + "step": 6163500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473618109124419e-05, + "loss": 2.2182, + "step": 6164000 + }, + { + "epoch": 30.54, + "learning_rate": 3.4734942504818104e-05, + "loss": 2.2549, + "step": 6164500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473370391839202e-05, + "loss": 2.2135, + "step": 6165000 + }, + { + "epoch": 30.55, + "learning_rate": 3.473246533196594e-05, + "loss": 2.2249, + "step": 6165500 + }, + { + "epoch": 30.55, + "learning_rate": 3.47312292227127e-05, + "loss": 2.2566, + "step": 6166000 + }, + { + "epoch": 30.55, + "learning_rate": 3.4729993113459476e-05, + "loss": 2.2149, + "step": 6166500 + }, + { + "epoch": 30.55, + "learning_rate": 3.472875452703339e-05, + "loss": 2.2436, + "step": 6167000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472751594060731e-05, + "loss": 2.2307, + "step": 6167500 + }, + { + "epoch": 30.56, + "learning_rate": 3.472627983135407e-05, + "loss": 2.2567, + "step": 6168000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472504124492799e-05, + "loss": 2.198, + "step": 6168500 + }, + { + "epoch": 30.56, + "learning_rate": 3.4723802658501906e-05, + "loss": 2.2303, + "step": 6169000 + }, + { + "epoch": 30.57, + "learning_rate": 3.472256407207582e-05, + "loss": 2.229, + "step": 6169500 + }, + { + "epoch": 30.57, + "learning_rate": 3.472132548564974e-05, + "loss": 2.2487, + "step": 6170000 + }, + { + "epoch": 30.57, + "learning_rate": 3.4720086899223656e-05, + "loss": 2.2399, + "step": 6170500 + }, + { + "epoch": 30.57, + "learning_rate": 3.4718848312797567e-05, + "loss": 2.2207, + "step": 6171000 + }, + { + "epoch": 30.58, + "learning_rate": 3.4717609726371483e-05, + "loss": 2.2181, + "step": 6171500 + }, + { + "epoch": 30.58, + "learning_rate": 3.47163711399454e-05, + "loss": 2.2427, + "step": 6172000 + }, + { + "epoch": 30.58, + "learning_rate": 3.471513255351932e-05, + "loss": 2.2214, + "step": 6172500 + }, + { + "epoch": 30.58, + "learning_rate": 3.4713893967093234e-05, + "loss": 2.2395, + "step": 6173000 + }, + { + "epoch": 30.59, + "learning_rate": 3.471265538066715e-05, + "loss": 2.2376, + "step": 6173500 + }, + { + "epoch": 30.59, + "learning_rate": 3.471141679424107e-05, + "loss": 2.2301, + "step": 6174000 + }, + { + "epoch": 30.59, + "learning_rate": 3.4710178207814985e-05, + "loss": 2.2256, + "step": 6174500 + }, + { + "epoch": 30.59, + "learning_rate": 3.47089396213889e-05, + "loss": 2.2518, + "step": 6175000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470770103496282e-05, + "loss": 2.2303, + "step": 6175500 + }, + { + "epoch": 30.6, + "learning_rate": 3.4706462448536736e-05, + "loss": 2.2201, + "step": 6176000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470522386211065e-05, + "loss": 2.224, + "step": 6176500 + }, + { + "epoch": 30.6, + "learning_rate": 3.470398527568457e-05, + "loss": 2.2206, + "step": 6177000 + }, + { + "epoch": 30.61, + "learning_rate": 3.4702746689258487e-05, + "loss": 2.2475, + "step": 6177500 + }, + { + "epoch": 30.61, + "learning_rate": 3.4701510580005255e-05, + "loss": 2.2503, + "step": 6178000 + }, + { + "epoch": 30.61, + "learning_rate": 3.470027199357917e-05, + "loss": 2.2358, + "step": 6178500 + }, + { + "epoch": 30.61, + "learning_rate": 3.4699035884325934e-05, + "loss": 2.2493, + "step": 6179000 + }, + { + "epoch": 30.62, + "learning_rate": 3.469779729789985e-05, + "loss": 2.2415, + "step": 6179500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469655871147377e-05, + "loss": 2.1972, + "step": 6180000 + }, + { + "epoch": 30.62, + "learning_rate": 3.4695325079393396e-05, + "loss": 2.2483, + "step": 6180500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469408649296731e-05, + "loss": 2.2393, + "step": 6181000 + }, + { + "epoch": 30.63, + "learning_rate": 3.469284790654122e-05, + "loss": 2.2498, + "step": 6181500 + }, + { + "epoch": 30.63, + "learning_rate": 3.469160932011514e-05, + "loss": 2.2415, + "step": 6182000 + }, + { + "epoch": 30.63, + "learning_rate": 3.4690370733689057e-05, + "loss": 2.2731, + "step": 6182500 + }, + { + "epoch": 30.63, + "learning_rate": 3.4689132147262974e-05, + "loss": 2.265, + "step": 6183000 + }, + { + "epoch": 30.64, + "learning_rate": 3.468789356083689e-05, + "loss": 2.2388, + "step": 6183500 + }, + { + "epoch": 30.64, + "learning_rate": 3.468665745158366e-05, + "loss": 2.2341, + "step": 6184000 + }, + { + "epoch": 30.64, + "learning_rate": 3.4685418865157576e-05, + "loss": 2.2543, + "step": 6184500 + }, + { + "epoch": 30.64, + "learning_rate": 3.468418027873149e-05, + "loss": 2.2344, + "step": 6185000 + }, + { + "epoch": 30.65, + "learning_rate": 3.468294169230541e-05, + "loss": 2.2658, + "step": 6185500 + }, + { + "epoch": 30.65, + "learning_rate": 3.468170310587933e-05, + "loss": 2.2603, + "step": 6186000 + }, + { + "epoch": 30.65, + "learning_rate": 3.4680466996626096e-05, + "loss": 2.2264, + "step": 6186500 + }, + { + "epoch": 30.65, + "learning_rate": 3.467922841020001e-05, + "loss": 2.2426, + "step": 6187000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467798982377393e-05, + "loss": 2.2392, + "step": 6187500 + }, + { + "epoch": 30.66, + "learning_rate": 3.467675123734784e-05, + "loss": 2.2032, + "step": 6188000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467551265092176e-05, + "loss": 2.2448, + "step": 6188500 + }, + { + "epoch": 30.66, + "learning_rate": 3.4674274064495674e-05, + "loss": 2.2308, + "step": 6189000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467303547806959e-05, + "loss": 2.2433, + "step": 6189500 + }, + { + "epoch": 30.67, + "learning_rate": 3.467179689164351e-05, + "loss": 2.2387, + "step": 6190000 + }, + { + "epoch": 30.67, + "learning_rate": 3.4670560782390276e-05, + "loss": 2.2341, + "step": 6190500 + }, + { + "epoch": 30.67, + "learning_rate": 3.466932219596419e-05, + "loss": 2.2467, + "step": 6191000 + }, + { + "epoch": 30.67, + "learning_rate": 3.466808360953811e-05, + "loss": 2.2455, + "step": 6191500 + }, + { + "epoch": 30.68, + "learning_rate": 3.466684502311203e-05, + "loss": 2.2521, + "step": 6192000 + }, + { + "epoch": 30.68, + "learning_rate": 3.4665606436685944e-05, + "loss": 2.2215, + "step": 6192500 + }, + { + "epoch": 30.68, + "learning_rate": 3.4664367850259854e-05, + "loss": 2.2362, + "step": 6193000 + }, + { + "epoch": 30.68, + "learning_rate": 3.466313174100663e-05, + "loss": 2.2413, + "step": 6193500 + }, + { + "epoch": 30.69, + "learning_rate": 3.466189315458055e-05, + "loss": 2.2268, + "step": 6194000 + }, + { + "epoch": 30.69, + "learning_rate": 3.4660654568154464e-05, + "loss": 2.2273, + "step": 6194500 + }, + { + "epoch": 30.69, + "learning_rate": 3.4659415981728374e-05, + "loss": 2.2315, + "step": 6195000 + }, + { + "epoch": 30.69, + "learning_rate": 3.465817987247514e-05, + "loss": 2.2424, + "step": 6195500 + }, + { + "epoch": 30.7, + "learning_rate": 3.465694128604906e-05, + "loss": 2.2435, + "step": 6196000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465570517679583e-05, + "loss": 2.2266, + "step": 6196500 + }, + { + "epoch": 30.7, + "learning_rate": 3.4654466590369745e-05, + "loss": 2.2315, + "step": 6197000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465322800394366e-05, + "loss": 2.2434, + "step": 6197500 + }, + { + "epoch": 30.71, + "learning_rate": 3.465198941751758e-05, + "loss": 2.2493, + "step": 6198000 + }, + { + "epoch": 30.71, + "learning_rate": 3.4650750831091496e-05, + "loss": 2.2375, + "step": 6198500 + }, + { + "epoch": 30.71, + "learning_rate": 3.464951224466541e-05, + "loss": 2.2524, + "step": 6199000 + }, + { + "epoch": 30.71, + "learning_rate": 3.4648276135412175e-05, + "loss": 2.2466, + "step": 6199500 + }, + { + "epoch": 30.72, + "learning_rate": 3.464703754898609e-05, + "loss": 2.2237, + "step": 6200000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464579896256001e-05, + "loss": 2.2402, + "step": 6200500 + }, + { + "epoch": 30.72, + "learning_rate": 3.4644560376133926e-05, + "loss": 2.246, + "step": 6201000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464332178970784e-05, + "loss": 2.2472, + "step": 6201500 + }, + { + "epoch": 30.73, + "learning_rate": 3.464208320328176e-05, + "loss": 2.2377, + "step": 6202000 + }, + { + "epoch": 30.73, + "learning_rate": 3.4640844616855677e-05, + "loss": 2.27, + "step": 6202500 + }, + { + "epoch": 30.73, + "learning_rate": 3.4639606030429594e-05, + "loss": 2.2133, + "step": 6203000 + }, + { + "epoch": 30.73, + "learning_rate": 3.463836744400351e-05, + "loss": 2.2362, + "step": 6203500 + }, + { + "epoch": 30.74, + "learning_rate": 3.463712885757743e-05, + "loss": 2.2287, + "step": 6204000 + }, + { + "epoch": 30.74, + "learning_rate": 3.4635892748324196e-05, + "loss": 2.2377, + "step": 6204500 + }, + { + "epoch": 30.74, + "learning_rate": 3.463465416189811e-05, + "loss": 2.2474, + "step": 6205000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463341557547203e-05, + "loss": 2.2293, + "step": 6205500 + }, + { + "epoch": 30.75, + "learning_rate": 3.463217698904595e-05, + "loss": 2.2539, + "step": 6206000 + }, + { + "epoch": 30.75, + "learning_rate": 3.463094087979271e-05, + "loss": 2.2161, + "step": 6206500 + }, + { + "epoch": 30.75, + "learning_rate": 3.4629702293366626e-05, + "loss": 2.2283, + "step": 6207000 + }, + { + "epoch": 30.75, + "learning_rate": 3.462846370694054e-05, + "loss": 2.2341, + "step": 6207500 + }, + { + "epoch": 30.76, + "learning_rate": 3.462722512051446e-05, + "loss": 2.2445, + "step": 6208000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462598653408838e-05, + "loss": 2.2316, + "step": 6208500 + }, + { + "epoch": 30.76, + "learning_rate": 3.4624747947662294e-05, + "loss": 2.242, + "step": 6209000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462350936123621e-05, + "loss": 2.245, + "step": 6209500 + }, + { + "epoch": 30.77, + "learning_rate": 3.462227325198298e-05, + "loss": 2.253, + "step": 6210000 + }, + { + "epoch": 30.77, + "learning_rate": 3.4621034665556896e-05, + "loss": 2.2291, + "step": 6210500 + }, + { + "epoch": 30.77, + "learning_rate": 3.461979607913081e-05, + "loss": 2.2505, + "step": 6211000 + }, + { + "epoch": 30.77, + "learning_rate": 3.461855996987758e-05, + "loss": 2.2503, + "step": 6211500 + }, + { + "epoch": 30.78, + "learning_rate": 3.461732138345149e-05, + "loss": 2.2289, + "step": 6212000 + }, + { + "epoch": 30.78, + "learning_rate": 3.461608527419826e-05, + "loss": 2.2613, + "step": 6212500 + }, + { + "epoch": 30.78, + "learning_rate": 3.461484668777218e-05, + "loss": 2.2444, + "step": 6213000 + }, + { + "epoch": 30.78, + "learning_rate": 3.4613608101346095e-05, + "loss": 2.2416, + "step": 6213500 + }, + { + "epoch": 30.79, + "learning_rate": 3.461236951492001e-05, + "loss": 2.2211, + "step": 6214000 + }, + { + "epoch": 30.79, + "learning_rate": 3.461113092849393e-05, + "loss": 2.2169, + "step": 6214500 + }, + { + "epoch": 30.79, + "learning_rate": 3.4609892342067846e-05, + "loss": 2.2292, + "step": 6215000 + }, + { + "epoch": 30.79, + "learning_rate": 3.460865375564176e-05, + "loss": 2.2373, + "step": 6215500 + }, + { + "epoch": 30.8, + "learning_rate": 3.460741516921568e-05, + "loss": 2.2433, + "step": 6216000 + }, + { + "epoch": 30.8, + "learning_rate": 3.4606176582789596e-05, + "loss": 2.2271, + "step": 6216500 + }, + { + "epoch": 30.8, + "learning_rate": 3.460493799636351e-05, + "loss": 2.2382, + "step": 6217000 + }, + { + "epoch": 30.8, + "learning_rate": 3.460369940993743e-05, + "loss": 2.2384, + "step": 6217500 + }, + { + "epoch": 30.81, + "learning_rate": 3.460246082351135e-05, + "loss": 2.241, + "step": 6218000 + }, + { + "epoch": 30.81, + "learning_rate": 3.4601222237085264e-05, + "loss": 2.2461, + "step": 6218500 + }, + { + "epoch": 30.81, + "learning_rate": 3.459998365065918e-05, + "loss": 2.2499, + "step": 6219000 + }, + { + "epoch": 30.81, + "learning_rate": 3.45987450642331e-05, + "loss": 2.2335, + "step": 6219500 + }, + { + "epoch": 30.82, + "learning_rate": 3.4597506477807015e-05, + "loss": 2.2417, + "step": 6220000 + }, + { + "epoch": 30.82, + "learning_rate": 3.459626789138093e-05, + "loss": 2.2655, + "step": 6220500 + }, + { + "epoch": 30.82, + "learning_rate": 3.4595034259300546e-05, + "loss": 2.2529, + "step": 6221000 + }, + { + "epoch": 30.82, + "learning_rate": 3.459379567287446e-05, + "loss": 2.2362, + "step": 6221500 + }, + { + "epoch": 30.83, + "learning_rate": 3.459255708644838e-05, + "loss": 2.2316, + "step": 6222000 + }, + { + "epoch": 30.83, + "learning_rate": 3.4591318500022297e-05, + "loss": 2.2179, + "step": 6222500 + }, + { + "epoch": 30.83, + "learning_rate": 3.4590079913596213e-05, + "loss": 2.2264, + "step": 6223000 + }, + { + "epoch": 30.83, + "learning_rate": 3.458884132717013e-05, + "loss": 2.2518, + "step": 6223500 + }, + { + "epoch": 30.84, + "learning_rate": 3.45876052179169e-05, + "loss": 2.2177, + "step": 6224000 + }, + { + "epoch": 30.84, + "learning_rate": 3.4586366631490816e-05, + "loss": 2.2561, + "step": 6224500 + }, + { + "epoch": 30.84, + "learning_rate": 3.458512804506473e-05, + "loss": 2.2601, + "step": 6225000 + }, + { + "epoch": 30.84, + "learning_rate": 3.458388945863864e-05, + "loss": 2.2479, + "step": 6225500 + }, + { + "epoch": 30.85, + "learning_rate": 3.458265582655827e-05, + "loss": 2.2314, + "step": 6226000 + }, + { + "epoch": 30.85, + "learning_rate": 3.458141724013219e-05, + "loss": 2.2399, + "step": 6226500 + }, + { + "epoch": 30.85, + "learning_rate": 3.4580178653706105e-05, + "loss": 2.2339, + "step": 6227000 + }, + { + "epoch": 30.85, + "learning_rate": 3.457894006728002e-05, + "loss": 2.2389, + "step": 6227500 + }, + { + "epoch": 30.86, + "learning_rate": 3.457770148085393e-05, + "loss": 2.2303, + "step": 6228000 + }, + { + "epoch": 30.86, + "learning_rate": 3.457646289442785e-05, + "loss": 2.2388, + "step": 6228500 + }, + { + "epoch": 30.86, + "learning_rate": 3.4575224308001765e-05, + "loss": 2.2413, + "step": 6229000 + }, + { + "epoch": 30.86, + "learning_rate": 3.457398572157568e-05, + "loss": 2.2108, + "step": 6229500 + }, + { + "epoch": 30.87, + "learning_rate": 3.45727471351496e-05, + "loss": 2.2406, + "step": 6230000 + }, + { + "epoch": 30.87, + "learning_rate": 3.457151102589637e-05, + "loss": 2.2665, + "step": 6230500 + }, + { + "epoch": 30.87, + "learning_rate": 3.457027243947028e-05, + "loss": 2.2316, + "step": 6231000 + }, + { + "epoch": 30.87, + "learning_rate": 3.4569033853044195e-05, + "loss": 2.2667, + "step": 6231500 + }, + { + "epoch": 30.88, + "learning_rate": 3.456779526661811e-05, + "loss": 2.2364, + "step": 6232000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456655668019203e-05, + "loss": 2.2286, + "step": 6232500 + }, + { + "epoch": 30.88, + "learning_rate": 3.4565318093765946e-05, + "loss": 2.251, + "step": 6233000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456407950733986e-05, + "loss": 2.2181, + "step": 6233500 + }, + { + "epoch": 30.89, + "learning_rate": 3.456284092091378e-05, + "loss": 2.2578, + "step": 6234000 + }, + { + "epoch": 30.89, + "learning_rate": 3.456160481166055e-05, + "loss": 2.268, + "step": 6234500 + }, + { + "epoch": 30.89, + "learning_rate": 3.4560366225234466e-05, + "loss": 2.2286, + "step": 6235000 + }, + { + "epoch": 30.89, + "learning_rate": 3.455912763880838e-05, + "loss": 2.2199, + "step": 6235500 + }, + { + "epoch": 30.9, + "learning_rate": 3.45578890523823e-05, + "loss": 2.2377, + "step": 6236000 + }, + { + "epoch": 30.9, + "learning_rate": 3.4556650465956216e-05, + "loss": 2.238, + "step": 6236500 + }, + { + "epoch": 30.9, + "learning_rate": 3.455541187953013e-05, + "loss": 2.2317, + "step": 6237000 + }, + { + "epoch": 30.9, + "learning_rate": 3.455417329310405e-05, + "loss": 2.2309, + "step": 6237500 + }, + { + "epoch": 30.91, + "learning_rate": 3.455293470667797e-05, + "loss": 2.2461, + "step": 6238000 + }, + { + "epoch": 30.91, + "learning_rate": 3.4551696120251884e-05, + "loss": 2.2172, + "step": 6238500 + }, + { + "epoch": 30.91, + "learning_rate": 3.4550457533825794e-05, + "loss": 2.2666, + "step": 6239000 + }, + { + "epoch": 30.91, + "learning_rate": 3.454921894739971e-05, + "loss": 2.2173, + "step": 6239500 + }, + { + "epoch": 30.92, + "learning_rate": 3.454798036097363e-05, + "loss": 2.2493, + "step": 6240000 + }, + { + "epoch": 30.92, + "learning_rate": 3.4546741774547545e-05, + "loss": 2.2422, + "step": 6240500 + }, + { + "epoch": 30.92, + "learning_rate": 3.4545505665294314e-05, + "loss": 2.2461, + "step": 6241000 + }, + { + "epoch": 30.92, + "learning_rate": 3.454426707886823e-05, + "loss": 2.2144, + "step": 6241500 + }, + { + "epoch": 30.93, + "learning_rate": 3.4543030969615e-05, + "loss": 2.241, + "step": 6242000 + }, + { + "epoch": 30.93, + "learning_rate": 3.4541792383188917e-05, + "loss": 2.2454, + "step": 6242500 + }, + { + "epoch": 30.93, + "learning_rate": 3.4540553796762833e-05, + "loss": 2.2331, + "step": 6243000 + }, + { + "epoch": 30.93, + "learning_rate": 3.453931521033675e-05, + "loss": 2.241, + "step": 6243500 + }, + { + "epoch": 30.93, + "learning_rate": 3.453807910108351e-05, + "loss": 2.2426, + "step": 6244000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453684051465743e-05, + "loss": 2.2353, + "step": 6244500 + }, + { + "epoch": 30.94, + "learning_rate": 3.4535601928231346e-05, + "loss": 2.2339, + "step": 6245000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453436334180526e-05, + "loss": 2.2464, + "step": 6245500 + }, + { + "epoch": 30.94, + "learning_rate": 3.453312723255204e-05, + "loss": 2.247, + "step": 6246000 + }, + { + "epoch": 30.95, + "learning_rate": 3.453188864612595e-05, + "loss": 2.242, + "step": 6246500 + }, + { + "epoch": 30.95, + "learning_rate": 3.4530650059699866e-05, + "loss": 2.2535, + "step": 6247000 + }, + { + "epoch": 30.95, + "learning_rate": 3.452941147327378e-05, + "loss": 2.2252, + "step": 6247500 + }, + { + "epoch": 30.95, + "learning_rate": 3.45281728868477e-05, + "loss": 2.2676, + "step": 6248000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452693677759447e-05, + "loss": 2.2438, + "step": 6248500 + }, + { + "epoch": 30.96, + "learning_rate": 3.4525698191168385e-05, + "loss": 2.2296, + "step": 6249000 + }, + { + "epoch": 30.96, + "learning_rate": 3.45244596047423e-05, + "loss": 2.254, + "step": 6249500 + }, + { + "epoch": 30.96, + "learning_rate": 3.452322101831621e-05, + "loss": 2.2367, + "step": 6250000 + }, + { + "epoch": 30.97, + "learning_rate": 3.452198243189013e-05, + "loss": 2.2447, + "step": 6250500 + }, + { + "epoch": 30.97, + "learning_rate": 3.4520746322636905e-05, + "loss": 2.2428, + "step": 6251000 + }, + { + "epoch": 30.97, + "learning_rate": 3.451950773621082e-05, + "loss": 2.224, + "step": 6251500 + }, + { + "epoch": 30.97, + "learning_rate": 3.451826914978474e-05, + "loss": 2.2347, + "step": 6252000 + }, + { + "epoch": 30.98, + "learning_rate": 3.4517030563358656e-05, + "loss": 2.2252, + "step": 6252500 + }, + { + "epoch": 30.98, + "learning_rate": 3.451579445410542e-05, + "loss": 2.2325, + "step": 6253000 + }, + { + "epoch": 30.98, + "learning_rate": 3.4514555867679335e-05, + "loss": 2.2505, + "step": 6253500 + }, + { + "epoch": 30.98, + "learning_rate": 3.4513319758426104e-05, + "loss": 2.25, + "step": 6254000 + }, + { + "epoch": 30.99, + "learning_rate": 3.451208117200002e-05, + "loss": 2.2485, + "step": 6254500 + }, + { + "epoch": 30.99, + "learning_rate": 3.451084258557394e-05, + "loss": 2.2436, + "step": 6255000 + }, + { + "epoch": 30.99, + "learning_rate": 3.4509603999147854e-05, + "loss": 2.2469, + "step": 6255500 + }, + { + "epoch": 30.99, + "learning_rate": 3.450836788989462e-05, + "loss": 2.2563, + "step": 6256000 + }, + { + "epoch": 31.0, + "learning_rate": 3.450712930346854e-05, + "loss": 2.2587, + "step": 6256500 + }, + { + "epoch": 31.0, + "learning_rate": 3.450589071704246e-05, + "loss": 2.235, + "step": 6257000 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.6597478495877519, + "eval_accuracy_mlm": 0.6156466741652056, + "eval_accuracy_nsp": 0.8678179628881506, + "eval_loss": 2.3193306922912598, + "eval_runtime": 145.8562, + "eval_samples_per_second": 1748.017, + "eval_steps_per_second": 72.839, + "step": 6257133 + }, + { + "epoch": 31.0, + "learning_rate": 3.4504652130616374e-05, + "loss": 2.1808, + "step": 6257500 + }, + { + "epoch": 31.0, + "learning_rate": 3.4503416021363136e-05, + "loss": 2.213, + "step": 6258000 + }, + { + "epoch": 31.01, + "learning_rate": 3.450217743493705e-05, + "loss": 2.2195, + "step": 6258500 + }, + { + "epoch": 31.01, + "learning_rate": 3.450093884851097e-05, + "loss": 2.2085, + "step": 6259000 + }, + { + "epoch": 31.01, + "learning_rate": 3.449970026208489e-05, + "loss": 2.2248, + "step": 6259500 + }, + { + "epoch": 31.01, + "learning_rate": 3.4498461675658804e-05, + "loss": 2.2321, + "step": 6260000 + }, + { + "epoch": 31.02, + "learning_rate": 3.449722308923272e-05, + "loss": 2.2127, + "step": 6260500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449598450280664e-05, + "loss": 2.1961, + "step": 6261000 + }, + { + "epoch": 31.02, + "learning_rate": 3.4494745916380554e-05, + "loss": 2.2115, + "step": 6261500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449350980712732e-05, + "loss": 2.1928, + "step": 6262000 + }, + { + "epoch": 31.03, + "learning_rate": 3.449227122070124e-05, + "loss": 2.2092, + "step": 6262500 + }, + { + "epoch": 31.03, + "learning_rate": 3.449103263427516e-05, + "loss": 2.2159, + "step": 6263000 + }, + { + "epoch": 31.03, + "learning_rate": 3.4489794047849074e-05, + "loss": 2.189, + "step": 6263500 + }, + { + "epoch": 31.03, + "learning_rate": 3.448855546142299e-05, + "loss": 2.21, + "step": 6264000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448731687499691e-05, + "loss": 2.2087, + "step": 6264500 + }, + { + "epoch": 31.04, + "learning_rate": 3.4486078288570825e-05, + "loss": 2.2178, + "step": 6265000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448483970214474e-05, + "loss": 2.2034, + "step": 6265500 + }, + { + "epoch": 31.04, + "learning_rate": 3.448360111571866e-05, + "loss": 2.2245, + "step": 6266000 + }, + { + "epoch": 31.05, + "learning_rate": 3.448236252929257e-05, + "loss": 2.203, + "step": 6266500 + }, + { + "epoch": 31.05, + "learning_rate": 3.4481123942866486e-05, + "loss": 2.1872, + "step": 6267000 + }, + { + "epoch": 31.05, + "learning_rate": 3.4479887833613255e-05, + "loss": 2.1915, + "step": 6267500 + }, + { + "epoch": 31.05, + "learning_rate": 3.447864924718717e-05, + "loss": 2.2446, + "step": 6268000 + }, + { + "epoch": 31.06, + "learning_rate": 3.447741066076109e-05, + "loss": 2.2023, + "step": 6268500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447617455150786e-05, + "loss": 2.2039, + "step": 6269000 + }, + { + "epoch": 31.06, + "learning_rate": 3.4474935965081774e-05, + "loss": 2.2216, + "step": 6269500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447369737865569e-05, + "loss": 2.2161, + "step": 6270000 + }, + { + "epoch": 31.07, + "learning_rate": 3.447245879222961e-05, + "loss": 2.1849, + "step": 6270500 + }, + { + "epoch": 31.07, + "learning_rate": 3.4471220205803525e-05, + "loss": 2.2113, + "step": 6271000 + }, + { + "epoch": 31.07, + "learning_rate": 3.446998161937744e-05, + "loss": 2.2109, + "step": 6271500 + }, + { + "epoch": 31.07, + "learning_rate": 3.446874303295136e-05, + "loss": 2.2381, + "step": 6272000 + }, + { + "epoch": 31.08, + "learning_rate": 3.4467504446525276e-05, + "loss": 2.2078, + "step": 6272500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446626586009919e-05, + "loss": 2.2187, + "step": 6273000 + }, + { + "epoch": 31.08, + "learning_rate": 3.44650272736731e-05, + "loss": 2.2024, + "step": 6273500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446378868724702e-05, + "loss": 2.2256, + "step": 6274000 + }, + { + "epoch": 31.09, + "learning_rate": 3.446255010082094e-05, + "loss": 2.2071, + "step": 6274500 + }, + { + "epoch": 31.09, + "learning_rate": 3.4461311514394854e-05, + "loss": 2.2189, + "step": 6275000 + }, + { + "epoch": 31.09, + "learning_rate": 3.446007540514162e-05, + "loss": 2.2198, + "step": 6275500 + }, + { + "epoch": 31.09, + "learning_rate": 3.445883929588839e-05, + "loss": 2.219, + "step": 6276000 + }, + { + "epoch": 31.1, + "learning_rate": 3.445760070946231e-05, + "loss": 2.218, + "step": 6276500 + }, + { + "epoch": 31.1, + "learning_rate": 3.4456362123036225e-05, + "loss": 2.2116, + "step": 6277000 + }, + { + "epoch": 31.1, + "learning_rate": 3.445512353661014e-05, + "loss": 2.2026, + "step": 6277500 + }, + { + "epoch": 31.1, + "learning_rate": 3.445388495018406e-05, + "loss": 2.2124, + "step": 6278000 + }, + { + "epoch": 31.11, + "learning_rate": 3.4452646363757976e-05, + "loss": 2.2316, + "step": 6278500 + }, + { + "epoch": 31.11, + "learning_rate": 3.445140777733189e-05, + "loss": 2.2332, + "step": 6279000 + }, + { + "epoch": 31.11, + "learning_rate": 3.445016919090581e-05, + "loss": 2.2116, + "step": 6279500 + }, + { + "epoch": 31.11, + "learning_rate": 3.444893060447973e-05, + "loss": 2.2152, + "step": 6280000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444769201805364e-05, + "loss": 2.211, + "step": 6280500 + }, + { + "epoch": 31.12, + "learning_rate": 3.4446453431627554e-05, + "loss": 2.2078, + "step": 6281000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444521484520147e-05, + "loss": 2.2311, + "step": 6281500 + }, + { + "epoch": 31.12, + "learning_rate": 3.444397625877539e-05, + "loss": 2.208, + "step": 6282000 + }, + { + "epoch": 31.13, + "learning_rate": 3.44427376723493e-05, + "loss": 2.2156, + "step": 6282500 + }, + { + "epoch": 31.13, + "learning_rate": 3.444150156309607e-05, + "loss": 2.2341, + "step": 6283000 + }, + { + "epoch": 31.13, + "learning_rate": 3.444026545384284e-05, + "loss": 2.2018, + "step": 6283500 + }, + { + "epoch": 31.13, + "learning_rate": 3.4439029344589604e-05, + "loss": 2.2093, + "step": 6284000 + }, + { + "epoch": 31.14, + "learning_rate": 3.443779075816352e-05, + "loss": 2.2189, + "step": 6284500 + }, + { + "epoch": 31.14, + "learning_rate": 3.443655217173744e-05, + "loss": 2.2098, + "step": 6285000 + }, + { + "epoch": 31.14, + "learning_rate": 3.4435318539657066e-05, + "loss": 2.2191, + "step": 6285500 + }, + { + "epoch": 31.14, + "learning_rate": 3.443407995323098e-05, + "loss": 2.2183, + "step": 6286000 + }, + { + "epoch": 31.15, + "learning_rate": 3.44328413668049e-05, + "loss": 2.2258, + "step": 6286500 + }, + { + "epoch": 31.15, + "learning_rate": 3.4431602780378816e-05, + "loss": 2.2116, + "step": 6287000 + }, + { + "epoch": 31.15, + "learning_rate": 3.443036419395273e-05, + "loss": 2.2322, + "step": 6287500 + }, + { + "epoch": 31.15, + "learning_rate": 3.4429128084699495e-05, + "loss": 2.212, + "step": 6288000 + }, + { + "epoch": 31.16, + "learning_rate": 3.442788949827341e-05, + "loss": 2.2144, + "step": 6288500 + }, + { + "epoch": 31.16, + "learning_rate": 3.442665091184733e-05, + "loss": 2.1998, + "step": 6289000 + }, + { + "epoch": 31.16, + "learning_rate": 3.4425412325421246e-05, + "loss": 2.1758, + "step": 6289500 + }, + { + "epoch": 31.16, + "learning_rate": 3.442417373899516e-05, + "loss": 2.2297, + "step": 6290000 + }, + { + "epoch": 31.17, + "learning_rate": 3.442293515256908e-05, + "loss": 2.211, + "step": 6290500 + }, + { + "epoch": 31.17, + "learning_rate": 3.442169656614299e-05, + "loss": 2.212, + "step": 6291000 + }, + { + "epoch": 31.17, + "learning_rate": 3.442045797971691e-05, + "loss": 2.2227, + "step": 6291500 + }, + { + "epoch": 31.17, + "learning_rate": 3.4419219393290824e-05, + "loss": 2.2025, + "step": 6292000 + }, + { + "epoch": 31.18, + "learning_rate": 3.441798080686474e-05, + "loss": 2.2118, + "step": 6292500 + }, + { + "epoch": 31.18, + "learning_rate": 3.441674222043866e-05, + "loss": 2.235, + "step": 6293000 + }, + { + "epoch": 31.18, + "learning_rate": 3.4415503634012575e-05, + "loss": 2.2199, + "step": 6293500 + }, + { + "epoch": 31.18, + "learning_rate": 3.441426504758649e-05, + "loss": 2.2552, + "step": 6294000 + }, + { + "epoch": 31.19, + "learning_rate": 3.441302646116041e-05, + "loss": 2.207, + "step": 6294500 + }, + { + "epoch": 31.19, + "learning_rate": 3.4411787874734325e-05, + "loss": 2.2162, + "step": 6295000 + }, + { + "epoch": 31.19, + "learning_rate": 3.4410551765481094e-05, + "loss": 2.2181, + "step": 6295500 + }, + { + "epoch": 31.19, + "learning_rate": 3.440931565622786e-05, + "loss": 2.228, + "step": 6296000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440807706980178e-05, + "loss": 2.2119, + "step": 6296500 + }, + { + "epoch": 31.2, + "learning_rate": 3.44068384833757e-05, + "loss": 2.2169, + "step": 6297000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440559989694961e-05, + "loss": 2.2164, + "step": 6297500 + }, + { + "epoch": 31.2, + "learning_rate": 3.4404361310523524e-05, + "loss": 2.2253, + "step": 6298000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440312272409744e-05, + "loss": 2.2121, + "step": 6298500 + }, + { + "epoch": 31.21, + "learning_rate": 3.440188413767136e-05, + "loss": 2.2298, + "step": 6299000 + }, + { + "epoch": 31.21, + "learning_rate": 3.4400645551245275e-05, + "loss": 2.2243, + "step": 6299500 + }, + { + "epoch": 31.21, + "learning_rate": 3.439940696481919e-05, + "loss": 2.2202, + "step": 6300000 + }, + { + "epoch": 31.21, + "learning_rate": 3.439817085556596e-05, + "loss": 2.2288, + "step": 6300500 + }, + { + "epoch": 31.22, + "learning_rate": 3.439693226913988e-05, + "loss": 2.2252, + "step": 6301000 + }, + { + "epoch": 31.22, + "learning_rate": 3.4395693682713794e-05, + "loss": 2.2148, + "step": 6301500 + }, + { + "epoch": 31.22, + "learning_rate": 3.439445509628771e-05, + "loss": 2.2291, + "step": 6302000 + }, + { + "epoch": 31.22, + "learning_rate": 3.439321650986163e-05, + "loss": 2.2196, + "step": 6302500 + }, + { + "epoch": 31.23, + "learning_rate": 3.4391977923435545e-05, + "loss": 2.2144, + "step": 6303000 + }, + { + "epoch": 31.23, + "learning_rate": 3.439073933700946e-05, + "loss": 2.2116, + "step": 6303500 + }, + { + "epoch": 31.23, + "learning_rate": 3.438950075058337e-05, + "loss": 2.2102, + "step": 6304000 + }, + { + "epoch": 31.23, + "learning_rate": 3.438826216415729e-05, + "loss": 2.2304, + "step": 6304500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438702605490406e-05, + "loss": 2.2181, + "step": 6305000 + }, + { + "epoch": 31.24, + "learning_rate": 3.438579242282368e-05, + "loss": 2.2039, + "step": 6305500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438455631357045e-05, + "loss": 2.2286, + "step": 6306000 + }, + { + "epoch": 31.24, + "learning_rate": 3.4383317727144364e-05, + "loss": 2.2183, + "step": 6306500 + }, + { + "epoch": 31.25, + "learning_rate": 3.438207914071828e-05, + "loss": 2.2127, + "step": 6307000 + }, + { + "epoch": 31.25, + "learning_rate": 3.43808405542922e-05, + "loss": 2.2101, + "step": 6307500 + }, + { + "epoch": 31.25, + "learning_rate": 3.4379601967866115e-05, + "loss": 2.203, + "step": 6308000 + }, + { + "epoch": 31.25, + "learning_rate": 3.437836338144003e-05, + "loss": 2.2003, + "step": 6308500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437712479501395e-05, + "loss": 2.1954, + "step": 6309000 + }, + { + "epoch": 31.26, + "learning_rate": 3.4375886208587866e-05, + "loss": 2.2006, + "step": 6309500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437464762216178e-05, + "loss": 2.2085, + "step": 6310000 + }, + { + "epoch": 31.26, + "learning_rate": 3.43734090357357e-05, + "loss": 2.2127, + "step": 6310500 + }, + { + "epoch": 31.27, + "learning_rate": 3.437217292648247e-05, + "loss": 2.2205, + "step": 6311000 + }, + { + "epoch": 31.27, + "learning_rate": 3.437093434005638e-05, + "loss": 2.2085, + "step": 6311500 + }, + { + "epoch": 31.27, + "learning_rate": 3.4369695753630296e-05, + "loss": 2.2141, + "step": 6312000 + }, + { + "epoch": 31.27, + "learning_rate": 3.436845716720421e-05, + "loss": 2.226, + "step": 6312500 + }, + { + "epoch": 31.28, + "learning_rate": 3.436722105795098e-05, + "loss": 2.2442, + "step": 6313000 + }, + { + "epoch": 31.28, + "learning_rate": 3.43659824715249e-05, + "loss": 2.2363, + "step": 6313500 + }, + { + "epoch": 31.28, + "learning_rate": 3.436474636227167e-05, + "loss": 2.208, + "step": 6314000 + }, + { + "epoch": 31.28, + "learning_rate": 3.4363507775845584e-05, + "loss": 2.233, + "step": 6314500 + }, + { + "epoch": 31.29, + "learning_rate": 3.43622691894195e-05, + "loss": 2.2111, + "step": 6315000 + }, + { + "epoch": 31.29, + "learning_rate": 3.436103060299342e-05, + "loss": 2.2063, + "step": 6315500 + }, + { + "epoch": 31.29, + "learning_rate": 3.4359792016567335e-05, + "loss": 2.2115, + "step": 6316000 + }, + { + "epoch": 31.29, + "learning_rate": 3.435855343014125e-05, + "loss": 2.1919, + "step": 6316500 + }, + { + "epoch": 31.3, + "learning_rate": 3.435731484371517e-05, + "loss": 2.2389, + "step": 6317000 + }, + { + "epoch": 31.3, + "learning_rate": 3.4356076257289086e-05, + "loss": 2.2189, + "step": 6317500 + }, + { + "epoch": 31.3, + "learning_rate": 3.435484014803585e-05, + "loss": 2.2316, + "step": 6318000 + }, + { + "epoch": 31.3, + "learning_rate": 3.4353601561609765e-05, + "loss": 2.2294, + "step": 6318500 + }, + { + "epoch": 31.31, + "learning_rate": 3.435236297518368e-05, + "loss": 2.228, + "step": 6319000 + }, + { + "epoch": 31.31, + "learning_rate": 3.43511243887576e-05, + "loss": 2.2099, + "step": 6319500 + }, + { + "epoch": 31.31, + "learning_rate": 3.4349885802331515e-05, + "loss": 2.2214, + "step": 6320000 + }, + { + "epoch": 31.31, + "learning_rate": 3.434864721590543e-05, + "loss": 2.2146, + "step": 6320500 + }, + { + "epoch": 31.32, + "learning_rate": 3.434740862947935e-05, + "loss": 2.2259, + "step": 6321000 + }, + { + "epoch": 31.32, + "learning_rate": 3.4346170043053266e-05, + "loss": 2.2005, + "step": 6321500 + }, + { + "epoch": 31.32, + "learning_rate": 3.434493145662718e-05, + "loss": 2.2291, + "step": 6322000 + }, + { + "epoch": 31.32, + "learning_rate": 3.43436928702011e-05, + "loss": 2.219, + "step": 6322500 + }, + { + "epoch": 31.33, + "learning_rate": 3.434245428377502e-05, + "loss": 2.1981, + "step": 6323000 + }, + { + "epoch": 31.33, + "learning_rate": 3.4341215697348934e-05, + "loss": 2.2405, + "step": 6323500 + }, + { + "epoch": 31.33, + "learning_rate": 3.433997711092285e-05, + "loss": 2.1992, + "step": 6324000 + }, + { + "epoch": 31.33, + "learning_rate": 3.433874100166962e-05, + "loss": 2.2415, + "step": 6324500 + }, + { + "epoch": 31.34, + "learning_rate": 3.433750241524353e-05, + "loss": 2.2222, + "step": 6325000 + }, + { + "epoch": 31.34, + "learning_rate": 3.433626382881745e-05, + "loss": 2.2247, + "step": 6325500 + }, + { + "epoch": 31.34, + "learning_rate": 3.4335025242391364e-05, + "loss": 2.2394, + "step": 6326000 + }, + { + "epoch": 31.34, + "learning_rate": 3.433378665596528e-05, + "loss": 2.2441, + "step": 6326500 + }, + { + "epoch": 31.35, + "learning_rate": 3.43325480695392e-05, + "loss": 2.2145, + "step": 6327000 + }, + { + "epoch": 31.35, + "learning_rate": 3.4331309483113115e-05, + "loss": 2.2068, + "step": 6327500 + }, + { + "epoch": 31.35, + "learning_rate": 3.433007089668703e-05, + "loss": 2.2079, + "step": 6328000 + }, + { + "epoch": 31.35, + "learning_rate": 3.432883231026094e-05, + "loss": 2.2071, + "step": 6328500 + }, + { + "epoch": 31.36, + "learning_rate": 3.432759620100772e-05, + "loss": 2.2209, + "step": 6329000 + }, + { + "epoch": 31.36, + "learning_rate": 3.4326357614581634e-05, + "loss": 2.2367, + "step": 6329500 + }, + { + "epoch": 31.36, + "learning_rate": 3.432511902815555e-05, + "loss": 2.2124, + "step": 6330000 + }, + { + "epoch": 31.36, + "learning_rate": 3.432388044172947e-05, + "loss": 2.2072, + "step": 6330500 + }, + { + "epoch": 31.37, + "learning_rate": 3.4322641855303385e-05, + "loss": 2.2324, + "step": 6331000 + }, + { + "epoch": 31.37, + "learning_rate": 3.4321408223223e-05, + "loss": 2.2163, + "step": 6331500 + }, + { + "epoch": 31.37, + "learning_rate": 3.4320172113969774e-05, + "loss": 2.2097, + "step": 6332000 + }, + { + "epoch": 31.37, + "learning_rate": 3.4318936004716536e-05, + "loss": 2.2137, + "step": 6332500 + }, + { + "epoch": 31.38, + "learning_rate": 3.431769741829045e-05, + "loss": 2.2035, + "step": 6333000 + }, + { + "epoch": 31.38, + "learning_rate": 3.431645883186437e-05, + "loss": 2.2252, + "step": 6333500 + }, + { + "epoch": 31.38, + "learning_rate": 3.431522024543829e-05, + "loss": 2.2297, + "step": 6334000 + }, + { + "epoch": 31.38, + "learning_rate": 3.4313981659012204e-05, + "loss": 2.2349, + "step": 6334500 + }, + { + "epoch": 31.39, + "learning_rate": 3.431274307258612e-05, + "loss": 2.2302, + "step": 6335000 + }, + { + "epoch": 31.39, + "learning_rate": 3.431150696333289e-05, + "loss": 2.2064, + "step": 6335500 + }, + { + "epoch": 31.39, + "learning_rate": 3.431026837690681e-05, + "loss": 2.2251, + "step": 6336000 + }, + { + "epoch": 31.39, + "learning_rate": 3.4309029790480724e-05, + "loss": 2.238, + "step": 6336500 + }, + { + "epoch": 31.4, + "learning_rate": 3.430779120405464e-05, + "loss": 2.2249, + "step": 6337000 + }, + { + "epoch": 31.4, + "learning_rate": 3.430655261762856e-05, + "loss": 2.2352, + "step": 6337500 + }, + { + "epoch": 31.4, + "learning_rate": 3.4305316508375326e-05, + "loss": 2.2189, + "step": 6338000 + }, + { + "epoch": 31.4, + "learning_rate": 3.430407792194924e-05, + "loss": 2.2131, + "step": 6338500 + }, + { + "epoch": 31.41, + "learning_rate": 3.430283933552316e-05, + "loss": 2.2507, + "step": 6339000 + }, + { + "epoch": 31.41, + "learning_rate": 3.430160074909707e-05, + "loss": 2.2302, + "step": 6339500 + }, + { + "epoch": 31.41, + "learning_rate": 3.430036216267099e-05, + "loss": 2.2123, + "step": 6340000 + }, + { + "epoch": 31.41, + "learning_rate": 3.4299123576244904e-05, + "loss": 2.2245, + "step": 6340500 + }, + { + "epoch": 31.42, + "learning_rate": 3.429788498981882e-05, + "loss": 2.2152, + "step": 6341000 + }, + { + "epoch": 31.42, + "learning_rate": 3.429664888056559e-05, + "loss": 2.2311, + "step": 6341500 + }, + { + "epoch": 31.42, + "learning_rate": 3.429541029413951e-05, + "loss": 2.2054, + "step": 6342000 + }, + { + "epoch": 31.42, + "learning_rate": 3.4294171707713424e-05, + "loss": 2.2194, + "step": 6342500 + }, + { + "epoch": 31.43, + "learning_rate": 3.429293312128734e-05, + "loss": 2.2208, + "step": 6343000 + }, + { + "epoch": 31.43, + "learning_rate": 3.429169453486126e-05, + "loss": 2.2581, + "step": 6343500 + }, + { + "epoch": 31.43, + "learning_rate": 3.4290455948435175e-05, + "loss": 2.1989, + "step": 6344000 + }, + { + "epoch": 31.43, + "learning_rate": 3.428921736200909e-05, + "loss": 2.2191, + "step": 6344500 + }, + { + "epoch": 31.44, + "learning_rate": 3.4287978775583e-05, + "loss": 2.239, + "step": 6345000 + }, + { + "epoch": 31.44, + "learning_rate": 3.428674018915692e-05, + "loss": 2.2267, + "step": 6345500 + }, + { + "epoch": 31.44, + "learning_rate": 3.428550407990369e-05, + "loss": 2.2187, + "step": 6346000 + }, + { + "epoch": 31.44, + "learning_rate": 3.4284265493477604e-05, + "loss": 2.2207, + "step": 6346500 + }, + { + "epoch": 31.45, + "learning_rate": 3.428302690705152e-05, + "loss": 2.2389, + "step": 6347000 + }, + { + "epoch": 31.45, + "learning_rate": 3.428178832062544e-05, + "loss": 2.2213, + "step": 6347500 + }, + { + "epoch": 31.45, + "learning_rate": 3.4280549734199355e-05, + "loss": 2.2062, + "step": 6348000 + }, + { + "epoch": 31.45, + "learning_rate": 3.4279311147773265e-05, + "loss": 2.218, + "step": 6348500 + }, + { + "epoch": 31.46, + "learning_rate": 3.427807256134718e-05, + "loss": 2.2122, + "step": 6349000 + }, + { + "epoch": 31.46, + "learning_rate": 3.42768339749211e-05, + "loss": 2.2257, + "step": 6349500 + }, + { + "epoch": 31.46, + "learning_rate": 3.4275595388495016e-05, + "loss": 2.2072, + "step": 6350000 + }, + { + "epoch": 31.46, + "learning_rate": 3.427435927924179e-05, + "loss": 2.2411, + "step": 6350500 + }, + { + "epoch": 31.47, + "learning_rate": 3.427312316998856e-05, + "loss": 2.2063, + "step": 6351000 + }, + { + "epoch": 31.47, + "learning_rate": 3.427188458356248e-05, + "loss": 2.2173, + "step": 6351500 + }, + { + "epoch": 31.47, + "learning_rate": 3.4270645997136394e-05, + "loss": 2.2306, + "step": 6352000 + }, + { + "epoch": 31.47, + "learning_rate": 3.426940741071031e-05, + "loss": 2.2277, + "step": 6352500 + }, + { + "epoch": 31.47, + "learning_rate": 3.426816882428422e-05, + "loss": 2.229, + "step": 6353000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426693023785814e-05, + "loss": 2.2136, + "step": 6353500 + }, + { + "epoch": 31.48, + "learning_rate": 3.4265691651432055e-05, + "loss": 2.2245, + "step": 6354000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426445306500597e-05, + "loss": 2.2111, + "step": 6354500 + }, + { + "epoch": 31.48, + "learning_rate": 3.426321447857989e-05, + "loss": 2.2525, + "step": 6355000 + }, + { + "epoch": 31.49, + "learning_rate": 3.42619758921538e-05, + "loss": 2.2219, + "step": 6355500 + }, + { + "epoch": 31.49, + "learning_rate": 3.4260737305727716e-05, + "loss": 2.2086, + "step": 6356000 + }, + { + "epoch": 31.49, + "learning_rate": 3.425949871930163e-05, + "loss": 2.2263, + "step": 6356500 + }, + { + "epoch": 31.49, + "learning_rate": 3.425826013287555e-05, + "loss": 2.2265, + "step": 6357000 + }, + { + "epoch": 31.5, + "learning_rate": 3.425702154644947e-05, + "loss": 2.2005, + "step": 6357500 + }, + { + "epoch": 31.5, + "learning_rate": 3.4255782960023384e-05, + "loss": 2.2232, + "step": 6358000 + }, + { + "epoch": 31.5, + "learning_rate": 3.42545443735973e-05, + "loss": 2.2305, + "step": 6358500 + }, + { + "epoch": 31.5, + "learning_rate": 3.425330826434407e-05, + "loss": 2.1797, + "step": 6359000 + }, + { + "epoch": 31.51, + "learning_rate": 3.425206967791799e-05, + "loss": 2.2228, + "step": 6359500 + }, + { + "epoch": 31.51, + "learning_rate": 3.4250831091491904e-05, + "loss": 2.2035, + "step": 6360000 + }, + { + "epoch": 31.51, + "learning_rate": 3.424959498223867e-05, + "loss": 2.2232, + "step": 6360500 + }, + { + "epoch": 31.51, + "learning_rate": 3.424835639581259e-05, + "loss": 2.2096, + "step": 6361000 + }, + { + "epoch": 31.52, + "learning_rate": 3.4247117809386506e-05, + "loss": 2.203, + "step": 6361500 + }, + { + "epoch": 31.52, + "learning_rate": 3.4245879222960416e-05, + "loss": 2.2527, + "step": 6362000 + }, + { + "epoch": 31.52, + "learning_rate": 3.424464063653433e-05, + "loss": 2.2428, + "step": 6362500 + }, + { + "epoch": 31.52, + "learning_rate": 3.424340452728111e-05, + "loss": 2.1951, + "step": 6363000 + }, + { + "epoch": 31.53, + "learning_rate": 3.424216594085502e-05, + "loss": 2.2294, + "step": 6363500 + }, + { + "epoch": 31.53, + "learning_rate": 3.4240927354428936e-05, + "loss": 2.2303, + "step": 6364000 + }, + { + "epoch": 31.53, + "learning_rate": 3.423968876800285e-05, + "loss": 2.2402, + "step": 6364500 + }, + { + "epoch": 31.53, + "learning_rate": 3.423845265874963e-05, + "loss": 2.2149, + "step": 6365000 + }, + { + "epoch": 31.54, + "learning_rate": 3.423721654949639e-05, + "loss": 2.2285, + "step": 6365500 + }, + { + "epoch": 31.54, + "learning_rate": 3.423597796307031e-05, + "loss": 2.2355, + "step": 6366000 + }, + { + "epoch": 31.54, + "learning_rate": 3.4234739376644224e-05, + "loss": 2.2315, + "step": 6366500 + }, + { + "epoch": 31.54, + "learning_rate": 3.423350079021814e-05, + "loss": 2.2164, + "step": 6367000 + }, + { + "epoch": 31.55, + "learning_rate": 3.423226220379206e-05, + "loss": 2.2494, + "step": 6367500 + }, + { + "epoch": 31.55, + "learning_rate": 3.4231023617365975e-05, + "loss": 2.21, + "step": 6368000 + }, + { + "epoch": 31.55, + "learning_rate": 3.4229787508112744e-05, + "loss": 2.2279, + "step": 6368500 + }, + { + "epoch": 31.55, + "learning_rate": 3.422854892168666e-05, + "loss": 2.2203, + "step": 6369000 + }, + { + "epoch": 31.56, + "learning_rate": 3.422731033526058e-05, + "loss": 2.2472, + "step": 6369500 + }, + { + "epoch": 31.56, + "learning_rate": 3.4226071748834495e-05, + "loss": 2.2179, + "step": 6370000 + }, + { + "epoch": 31.56, + "learning_rate": 3.422483316240841e-05, + "loss": 2.2375, + "step": 6370500 + }, + { + "epoch": 31.56, + "learning_rate": 3.422359457598233e-05, + "loss": 2.2469, + "step": 6371000 + }, + { + "epoch": 31.57, + "learning_rate": 3.4222355989556246e-05, + "loss": 2.2507, + "step": 6371500 + }, + { + "epoch": 31.57, + "learning_rate": 3.422111740313016e-05, + "loss": 2.2195, + "step": 6372000 + }, + { + "epoch": 31.57, + "learning_rate": 3.421987881670408e-05, + "loss": 2.2346, + "step": 6372500 + }, + { + "epoch": 31.57, + "learning_rate": 3.421864023027799e-05, + "loss": 2.2264, + "step": 6373000 + }, + { + "epoch": 31.58, + "learning_rate": 3.4217401643851906e-05, + "loss": 2.2479, + "step": 6373500 + }, + { + "epoch": 31.58, + "learning_rate": 3.421616305742582e-05, + "loss": 2.2097, + "step": 6374000 + }, + { + "epoch": 31.58, + "learning_rate": 3.421492694817259e-05, + "loss": 2.23, + "step": 6374500 + }, + { + "epoch": 31.58, + "learning_rate": 3.421368836174651e-05, + "loss": 2.2047, + "step": 6375000 + }, + { + "epoch": 31.59, + "learning_rate": 3.4212449775320426e-05, + "loss": 2.2001, + "step": 6375500 + }, + { + "epoch": 31.59, + "learning_rate": 3.4211211188894336e-05, + "loss": 2.1968, + "step": 6376000 + }, + { + "epoch": 31.59, + "learning_rate": 3.420997507964111e-05, + "loss": 2.219, + "step": 6376500 + }, + { + "epoch": 31.59, + "learning_rate": 3.420873649321503e-05, + "loss": 2.2418, + "step": 6377000 + }, + { + "epoch": 31.6, + "learning_rate": 3.4207497906788946e-05, + "loss": 2.2385, + "step": 6377500 + }, + { + "epoch": 31.6, + "learning_rate": 3.420625932036286e-05, + "loss": 2.2359, + "step": 6378000 + }, + { + "epoch": 31.6, + "learning_rate": 3.420502073393678e-05, + "loss": 2.2245, + "step": 6378500 + }, + { + "epoch": 31.6, + "learning_rate": 3.420378462468354e-05, + "loss": 2.231, + "step": 6379000 + }, + { + "epoch": 31.61, + "learning_rate": 3.420254603825746e-05, + "loss": 2.1829, + "step": 6379500 + }, + { + "epoch": 31.61, + "learning_rate": 3.420130992900423e-05, + "loss": 2.2169, + "step": 6380000 + }, + { + "epoch": 31.61, + "learning_rate": 3.4200071342578144e-05, + "loss": 2.2441, + "step": 6380500 + }, + { + "epoch": 31.61, + "learning_rate": 3.419883275615206e-05, + "loss": 2.2208, + "step": 6381000 + }, + { + "epoch": 31.62, + "learning_rate": 3.419759416972598e-05, + "loss": 2.2438, + "step": 6381500 + }, + { + "epoch": 31.62, + "learning_rate": 3.4196355583299895e-05, + "loss": 2.2072, + "step": 6382000 + }, + { + "epoch": 31.62, + "learning_rate": 3.419511699687381e-05, + "loss": 2.2324, + "step": 6382500 + }, + { + "epoch": 31.62, + "learning_rate": 3.419387841044773e-05, + "loss": 2.2065, + "step": 6383000 + }, + { + "epoch": 31.63, + "learning_rate": 3.4192639824021646e-05, + "loss": 2.2306, + "step": 6383500 + }, + { + "epoch": 31.63, + "learning_rate": 3.419140123759556e-05, + "loss": 2.2126, + "step": 6384000 + }, + { + "epoch": 31.63, + "learning_rate": 3.419016265116948e-05, + "loss": 2.2266, + "step": 6384500 + }, + { + "epoch": 31.63, + "learning_rate": 3.4188924064743397e-05, + "loss": 2.2249, + "step": 6385000 + }, + { + "epoch": 31.64, + "learning_rate": 3.418768547831731e-05, + "loss": 2.221, + "step": 6385500 + }, + { + "epoch": 31.64, + "learning_rate": 3.4186446891891224e-05, + "loss": 2.245, + "step": 6386000 + }, + { + "epoch": 31.64, + "learning_rate": 3.418520830546514e-05, + "loss": 2.2187, + "step": 6386500 + }, + { + "epoch": 31.64, + "learning_rate": 3.418396971903906e-05, + "loss": 2.2144, + "step": 6387000 + }, + { + "epoch": 31.65, + "learning_rate": 3.4182731132612974e-05, + "loss": 2.2362, + "step": 6387500 + }, + { + "epoch": 31.65, + "learning_rate": 3.418149254618689e-05, + "loss": 2.1898, + "step": 6388000 + }, + { + "epoch": 31.65, + "learning_rate": 3.418025643693365e-05, + "loss": 2.2426, + "step": 6388500 + }, + { + "epoch": 31.65, + "learning_rate": 3.417901785050757e-05, + "loss": 2.2156, + "step": 6389000 + }, + { + "epoch": 31.66, + "learning_rate": 3.417777926408149e-05, + "loss": 2.2327, + "step": 6389500 + }, + { + "epoch": 31.66, + "learning_rate": 3.4176540677655404e-05, + "loss": 2.2054, + "step": 6390000 + }, + { + "epoch": 31.66, + "learning_rate": 3.417530209122932e-05, + "loss": 2.2216, + "step": 6390500 + }, + { + "epoch": 31.66, + "learning_rate": 3.417406350480324e-05, + "loss": 2.2077, + "step": 6391000 + }, + { + "epoch": 31.67, + "learning_rate": 3.4172824918377155e-05, + "loss": 2.2122, + "step": 6391500 + }, + { + "epoch": 31.67, + "learning_rate": 3.417158633195107e-05, + "loss": 2.2206, + "step": 6392000 + }, + { + "epoch": 31.67, + "learning_rate": 3.417034774552499e-05, + "loss": 2.2276, + "step": 6392500 + }, + { + "epoch": 31.67, + "learning_rate": 3.4169109159098906e-05, + "loss": 2.2207, + "step": 6393000 + }, + { + "epoch": 31.68, + "learning_rate": 3.416787057267282e-05, + "loss": 2.2294, + "step": 6393500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416663446341959e-05, + "loss": 2.2208, + "step": 6394000 + }, + { + "epoch": 31.68, + "learning_rate": 3.416539835416636e-05, + "loss": 2.2432, + "step": 6394500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416415976774027e-05, + "loss": 2.2231, + "step": 6395000 + }, + { + "epoch": 31.69, + "learning_rate": 3.416292118131419e-05, + "loss": 2.2351, + "step": 6395500 + }, + { + "epoch": 31.69, + "learning_rate": 3.4161682594888104e-05, + "loss": 2.219, + "step": 6396000 + }, + { + "epoch": 31.69, + "learning_rate": 3.416044400846202e-05, + "loss": 2.2291, + "step": 6396500 + }, + { + "epoch": 31.69, + "learning_rate": 3.415920542203594e-05, + "loss": 2.2375, + "step": 6397000 + }, + { + "epoch": 31.7, + "learning_rate": 3.4157966835609855e-05, + "loss": 2.2292, + "step": 6397500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415672824918377e-05, + "loss": 2.2253, + "step": 6398000 + }, + { + "epoch": 31.7, + "learning_rate": 3.415548966275769e-05, + "loss": 2.2333, + "step": 6398500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415425603067731e-05, + "loss": 2.2152, + "step": 6399000 + }, + { + "epoch": 31.71, + "learning_rate": 3.415301992142408e-05, + "loss": 2.2123, + "step": 6399500 + }, + { + "epoch": 31.71, + "learning_rate": 3.415178381217085e-05, + "loss": 2.235, + "step": 6400000 + }, + { + "epoch": 31.71, + "learning_rate": 3.4150545225744764e-05, + "loss": 2.2219, + "step": 6400500 + }, + { + "epoch": 31.71, + "learning_rate": 3.414930663931868e-05, + "loss": 2.2168, + "step": 6401000 + }, + { + "epoch": 31.72, + "learning_rate": 3.41480680528926e-05, + "loss": 2.2214, + "step": 6401500 + }, + { + "epoch": 31.72, + "learning_rate": 3.4146829466466515e-05, + "loss": 2.2568, + "step": 6402000 + }, + { + "epoch": 31.72, + "learning_rate": 3.414559088004043e-05, + "loss": 2.1955, + "step": 6402500 + }, + { + "epoch": 31.72, + "learning_rate": 3.4144354770787194e-05, + "loss": 2.2304, + "step": 6403000 + }, + { + "epoch": 31.73, + "learning_rate": 3.414311618436111e-05, + "loss": 2.2254, + "step": 6403500 + }, + { + "epoch": 31.73, + "learning_rate": 3.414187759793503e-05, + "loss": 2.2343, + "step": 6404000 + }, + { + "epoch": 31.73, + "learning_rate": 3.4140639011508945e-05, + "loss": 2.2122, + "step": 6404500 + }, + { + "epoch": 31.73, + "learning_rate": 3.413940042508286e-05, + "loss": 2.2378, + "step": 6405000 + }, + { + "epoch": 31.74, + "learning_rate": 3.413816431582963e-05, + "loss": 2.2572, + "step": 6405500 + }, + { + "epoch": 31.74, + "learning_rate": 3.413692572940355e-05, + "loss": 2.2494, + "step": 6406000 + }, + { + "epoch": 31.74, + "learning_rate": 3.4135687142977464e-05, + "loss": 2.2201, + "step": 6406500 + }, + { + "epoch": 31.74, + "learning_rate": 3.413444855655138e-05, + "loss": 2.2259, + "step": 6407000 + }, + { + "epoch": 31.74, + "learning_rate": 3.41332099701253e-05, + "loss": 2.2272, + "step": 6407500 + }, + { + "epoch": 31.75, + "learning_rate": 3.4131971383699215e-05, + "loss": 2.235, + "step": 6408000 + }, + { + "epoch": 31.75, + "learning_rate": 3.413073279727313e-05, + "loss": 2.2403, + "step": 6408500 + }, + { + "epoch": 31.75, + "learning_rate": 3.412949421084705e-05, + "loss": 2.246, + "step": 6409000 + }, + { + "epoch": 31.75, + "learning_rate": 3.4128255624420966e-05, + "loss": 2.2335, + "step": 6409500 + }, + { + "epoch": 31.76, + "learning_rate": 3.412701703799488e-05, + "loss": 2.247, + "step": 6410000 + }, + { + "epoch": 31.76, + "learning_rate": 3.412577845156879e-05, + "loss": 2.2347, + "step": 6410500 + }, + { + "epoch": 31.76, + "learning_rate": 3.412453986514271e-05, + "loss": 2.2364, + "step": 6411000 + }, + { + "epoch": 31.76, + "learning_rate": 3.412330127871663e-05, + "loss": 2.2555, + "step": 6411500 + }, + { + "epoch": 31.77, + "learning_rate": 3.4122062692290544e-05, + "loss": 2.2152, + "step": 6412000 + }, + { + "epoch": 31.77, + "learning_rate": 3.412082658303731e-05, + "loss": 2.2121, + "step": 6412500 + }, + { + "epoch": 31.77, + "learning_rate": 3.411959047378408e-05, + "loss": 2.2103, + "step": 6413000 + }, + { + "epoch": 31.77, + "learning_rate": 3.411835436453085e-05, + "loss": 2.211, + "step": 6413500 + }, + { + "epoch": 31.78, + "learning_rate": 3.411711577810477e-05, + "loss": 2.222, + "step": 6414000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411587719167868e-05, + "loss": 2.2353, + "step": 6414500 + }, + { + "epoch": 31.78, + "learning_rate": 3.4114638605252594e-05, + "loss": 2.2561, + "step": 6415000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411340001882651e-05, + "loss": 2.2272, + "step": 6415500 + }, + { + "epoch": 31.79, + "learning_rate": 3.411216143240043e-05, + "loss": 2.2308, + "step": 6416000 + }, + { + "epoch": 31.79, + "learning_rate": 3.4110922845974345e-05, + "loss": 2.2271, + "step": 6416500 + }, + { + "epoch": 31.79, + "learning_rate": 3.410968425954826e-05, + "loss": 2.203, + "step": 6417000 + }, + { + "epoch": 31.79, + "learning_rate": 3.410844567312218e-05, + "loss": 2.2542, + "step": 6417500 + }, + { + "epoch": 31.8, + "learning_rate": 3.4107207086696096e-05, + "loss": 2.2169, + "step": 6418000 + }, + { + "epoch": 31.8, + "learning_rate": 3.410596850027001e-05, + "loss": 2.225, + "step": 6418500 + }, + { + "epoch": 31.8, + "learning_rate": 3.410473239101678e-05, + "loss": 2.225, + "step": 6419000 + }, + { + "epoch": 31.8, + "learning_rate": 3.41034938045907e-05, + "loss": 2.24, + "step": 6419500 + }, + { + "epoch": 31.81, + "learning_rate": 3.4102255218164615e-05, + "loss": 2.2394, + "step": 6420000 + }, + { + "epoch": 31.81, + "learning_rate": 3.410101663173853e-05, + "loss": 2.255, + "step": 6420500 + }, + { + "epoch": 31.81, + "learning_rate": 3.409977804531245e-05, + "loss": 2.2297, + "step": 6421000 + }, + { + "epoch": 31.81, + "learning_rate": 3.4098539458886366e-05, + "loss": 2.2249, + "step": 6421500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409730087246028e-05, + "loss": 2.2136, + "step": 6422000 + }, + { + "epoch": 31.82, + "learning_rate": 3.40960622860342e-05, + "loss": 2.252, + "step": 6422500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409482369960812e-05, + "loss": 2.213, + "step": 6423000 + }, + { + "epoch": 31.82, + "learning_rate": 3.4093585113182034e-05, + "loss": 2.2281, + "step": 6423500 + }, + { + "epoch": 31.83, + "learning_rate": 3.4092346526755944e-05, + "loss": 2.2149, + "step": 6424000 + }, + { + "epoch": 31.83, + "learning_rate": 3.409110794032986e-05, + "loss": 2.2163, + "step": 6424500 + }, + { + "epoch": 31.83, + "learning_rate": 3.408987183107663e-05, + "loss": 2.2166, + "step": 6425000 + }, + { + "epoch": 31.83, + "learning_rate": 3.408863324465055e-05, + "loss": 2.2333, + "step": 6425500 + }, + { + "epoch": 31.84, + "learning_rate": 3.4087397135397315e-05, + "loss": 2.2312, + "step": 6426000 + }, + { + "epoch": 31.84, + "learning_rate": 3.4086161026144084e-05, + "loss": 2.226, + "step": 6426500 + }, + { + "epoch": 31.84, + "learning_rate": 3.4084922439718e-05, + "loss": 2.2535, + "step": 6427000 + }, + { + "epoch": 31.84, + "learning_rate": 3.408368385329191e-05, + "loss": 2.2257, + "step": 6427500 + }, + { + "epoch": 31.85, + "learning_rate": 3.408244526686583e-05, + "loss": 2.232, + "step": 6428000 + }, + { + "epoch": 31.85, + "learning_rate": 3.4081206680439745e-05, + "loss": 2.2277, + "step": 6428500 + }, + { + "epoch": 31.85, + "learning_rate": 3.407996809401366e-05, + "loss": 2.215, + "step": 6429000 + }, + { + "epoch": 31.85, + "learning_rate": 3.407872950758758e-05, + "loss": 2.2315, + "step": 6429500 + }, + { + "epoch": 31.86, + "learning_rate": 3.4077490921161496e-05, + "loss": 2.2276, + "step": 6430000 + }, + { + "epoch": 31.86, + "learning_rate": 3.4076254811908265e-05, + "loss": 2.2121, + "step": 6430500 + }, + { + "epoch": 31.86, + "learning_rate": 3.407501622548218e-05, + "loss": 2.2239, + "step": 6431000 + }, + { + "epoch": 31.86, + "learning_rate": 3.40737776390561e-05, + "loss": 2.2147, + "step": 6431500 + }, + { + "epoch": 31.87, + "learning_rate": 3.4072539052630016e-05, + "loss": 2.2294, + "step": 6432000 + }, + { + "epoch": 31.87, + "learning_rate": 3.407130046620393e-05, + "loss": 2.2209, + "step": 6432500 + }, + { + "epoch": 31.87, + "learning_rate": 3.407006187977785e-05, + "loss": 2.2195, + "step": 6433000 + }, + { + "epoch": 31.87, + "learning_rate": 3.4068823293351766e-05, + "loss": 2.231, + "step": 6433500 + }, + { + "epoch": 31.88, + "learning_rate": 3.406758470692568e-05, + "loss": 2.2281, + "step": 6434000 + }, + { + "epoch": 31.88, + "learning_rate": 3.40663461204996e-05, + "loss": 2.2155, + "step": 6434500 + }, + { + "epoch": 31.88, + "learning_rate": 3.406511248841922e-05, + "loss": 2.2162, + "step": 6435000 + }, + { + "epoch": 31.88, + "learning_rate": 3.406387390199314e-05, + "loss": 2.217, + "step": 6435500 + }, + { + "epoch": 31.89, + "learning_rate": 3.406263531556705e-05, + "loss": 2.1886, + "step": 6436000 + }, + { + "epoch": 31.89, + "learning_rate": 3.4061396729140965e-05, + "loss": 2.2471, + "step": 6436500 + }, + { + "epoch": 31.89, + "learning_rate": 3.406015814271488e-05, + "loss": 2.2321, + "step": 6437000 + }, + { + "epoch": 31.89, + "learning_rate": 3.40589195562888e-05, + "loss": 2.2454, + "step": 6437500 + }, + { + "epoch": 31.9, + "learning_rate": 3.4057680969862716e-05, + "loss": 2.2335, + "step": 6438000 + }, + { + "epoch": 31.9, + "learning_rate": 3.4056444860609485e-05, + "loss": 2.2461, + "step": 6438500 + }, + { + "epoch": 31.9, + "learning_rate": 3.40552062741834e-05, + "loss": 2.2311, + "step": 6439000 + }, + { + "epoch": 31.9, + "learning_rate": 3.405396768775732e-05, + "loss": 2.2144, + "step": 6439500 + }, + { + "epoch": 31.91, + "learning_rate": 3.4052729101331235e-05, + "loss": 2.2356, + "step": 6440000 + }, + { + "epoch": 31.91, + "learning_rate": 3.405149051490515e-05, + "loss": 2.2321, + "step": 6440500 + }, + { + "epoch": 31.91, + "learning_rate": 3.405025440565192e-05, + "loss": 2.2336, + "step": 6441000 + }, + { + "epoch": 31.91, + "learning_rate": 3.404901581922584e-05, + "loss": 2.2452, + "step": 6441500 + }, + { + "epoch": 31.92, + "learning_rate": 3.4047777232799755e-05, + "loss": 2.2066, + "step": 6442000 + }, + { + "epoch": 31.92, + "learning_rate": 3.4046538646373665e-05, + "loss": 2.2472, + "step": 6442500 + }, + { + "epoch": 31.92, + "learning_rate": 3.404530005994758e-05, + "loss": 2.2411, + "step": 6443000 + }, + { + "epoch": 31.92, + "learning_rate": 3.40440664278672e-05, + "loss": 2.2134, + "step": 6443500 + }, + { + "epoch": 31.93, + "learning_rate": 3.404282784144112e-05, + "loss": 2.2387, + "step": 6444000 + }, + { + "epoch": 31.93, + "learning_rate": 3.4041589255015037e-05, + "loss": 2.2511, + "step": 6444500 + }, + { + "epoch": 31.93, + "learning_rate": 3.4040350668588953e-05, + "loss": 2.1958, + "step": 6445000 + }, + { + "epoch": 31.93, + "learning_rate": 3.403911208216287e-05, + "loss": 2.2224, + "step": 6445500 + }, + { + "epoch": 31.94, + "learning_rate": 3.403787349573679e-05, + "loss": 2.1971, + "step": 6446000 + }, + { + "epoch": 31.94, + "learning_rate": 3.4036634909310704e-05, + "loss": 2.2369, + "step": 6446500 + }, + { + "epoch": 31.94, + "learning_rate": 3.403539880005747e-05, + "loss": 2.2274, + "step": 6447000 + }, + { + "epoch": 31.94, + "learning_rate": 3.403416021363139e-05, + "loss": 2.2175, + "step": 6447500 + }, + { + "epoch": 31.95, + "learning_rate": 3.403292162720531e-05, + "loss": 2.2479, + "step": 6448000 + }, + { + "epoch": 31.95, + "learning_rate": 3.403168551795207e-05, + "loss": 2.2391, + "step": 6448500 + }, + { + "epoch": 31.95, + "learning_rate": 3.4030449408698845e-05, + "loss": 2.2423, + "step": 6449000 + }, + { + "epoch": 31.95, + "learning_rate": 3.4029210822272755e-05, + "loss": 2.2215, + "step": 6449500 + }, + { + "epoch": 31.96, + "learning_rate": 3.402797223584667e-05, + "loss": 2.2542, + "step": 6450000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402673364942059e-05, + "loss": 2.2291, + "step": 6450500 + }, + { + "epoch": 31.96, + "learning_rate": 3.4025495062994505e-05, + "loss": 2.2185, + "step": 6451000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402425895374128e-05, + "loss": 2.2083, + "step": 6451500 + }, + { + "epoch": 31.97, + "learning_rate": 3.40230203673152e-05, + "loss": 2.2151, + "step": 6452000 + }, + { + "epoch": 31.97, + "learning_rate": 3.402178178088911e-05, + "loss": 2.2216, + "step": 6452500 + }, + { + "epoch": 31.97, + "learning_rate": 3.4020543194463025e-05, + "loss": 2.2025, + "step": 6453000 + }, + { + "epoch": 31.97, + "learning_rate": 3.401930460803694e-05, + "loss": 2.2272, + "step": 6453500 + }, + { + "epoch": 31.98, + "learning_rate": 3.401806602161086e-05, + "loss": 2.2315, + "step": 6454000 + }, + { + "epoch": 31.98, + "learning_rate": 3.4016827435184776e-05, + "loss": 2.2421, + "step": 6454500 + }, + { + "epoch": 31.98, + "learning_rate": 3.4015588848758686e-05, + "loss": 2.229, + "step": 6455000 + }, + { + "epoch": 31.98, + "learning_rate": 3.40143502623326e-05, + "loss": 2.2382, + "step": 6455500 + }, + { + "epoch": 31.99, + "learning_rate": 3.401311167590652e-05, + "loss": 2.2209, + "step": 6456000 + }, + { + "epoch": 31.99, + "learning_rate": 3.401187308948044e-05, + "loss": 2.2126, + "step": 6456500 + }, + { + "epoch": 31.99, + "learning_rate": 3.4010634503054354e-05, + "loss": 2.2322, + "step": 6457000 + }, + { + "epoch": 31.99, + "learning_rate": 3.400939591662827e-05, + "loss": 2.2372, + "step": 6457500 + }, + { + "epoch": 32.0, + "learning_rate": 3.400815733020219e-05, + "loss": 2.236, + "step": 6458000 + }, + { + "epoch": 32.0, + "learning_rate": 3.4006918743776104e-05, + "loss": 2.2363, + "step": 6458500 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.6593229081640353, + "eval_accuracy_mlm": 0.6153610829620848, + "eval_accuracy_nsp": 0.8667393580928698, + "eval_loss": 2.3164103031158447, + "eval_runtime": 145.6685, + "eval_samples_per_second": 1750.269, + "eval_steps_per_second": 72.933, + "step": 6458976 + }, + { + "epoch": 32.0, + "learning_rate": 3.400568015735002e-05, + "loss": 2.2388, + "step": 6459000 + }, + { + "epoch": 32.0, + "learning_rate": 3.400444652526964e-05, + "loss": 2.2307, + "step": 6459500 + }, + { + "epoch": 32.01, + "learning_rate": 3.400320793884356e-05, + "loss": 2.203, + "step": 6460000 + }, + { + "epoch": 32.01, + "learning_rate": 3.4001969352417476e-05, + "loss": 2.1939, + "step": 6460500 + }, + { + "epoch": 32.01, + "learning_rate": 3.400073076599139e-05, + "loss": 2.1834, + "step": 6461000 + }, + { + "epoch": 32.01, + "learning_rate": 3.399949217956531e-05, + "loss": 2.2082, + "step": 6461500 + }, + { + "epoch": 32.01, + "learning_rate": 3.399825607031207e-05, + "loss": 2.2135, + "step": 6462000 + }, + { + "epoch": 32.02, + "learning_rate": 3.399701748388599e-05, + "loss": 2.2041, + "step": 6462500 + }, + { + "epoch": 32.02, + "learning_rate": 3.3995778897459906e-05, + "loss": 2.1927, + "step": 6463000 + }, + { + "epoch": 32.02, + "learning_rate": 3.399454031103382e-05, + "loss": 2.1848, + "step": 6463500 + }, + { + "epoch": 32.02, + "learning_rate": 3.399330172460774e-05, + "loss": 2.2194, + "step": 6464000 + }, + { + "epoch": 32.03, + "learning_rate": 3.3992063138181656e-05, + "loss": 2.1935, + "step": 6464500 + }, + { + "epoch": 32.03, + "learning_rate": 3.3990824551755573e-05, + "loss": 2.22, + "step": 6465000 + }, + { + "epoch": 32.03, + "learning_rate": 3.398958596532949e-05, + "loss": 2.1954, + "step": 6465500 + }, + { + "epoch": 32.03, + "learning_rate": 3.398834737890341e-05, + "loss": 2.2214, + "step": 6466000 + }, + { + "epoch": 32.04, + "learning_rate": 3.3987111269650176e-05, + "loss": 2.1903, + "step": 6466500 + }, + { + "epoch": 32.04, + "learning_rate": 3.398587268322409e-05, + "loss": 2.1986, + "step": 6467000 + }, + { + "epoch": 32.04, + "learning_rate": 3.398463409679801e-05, + "loss": 2.1881, + "step": 6467500 + }, + { + "epoch": 32.04, + "learning_rate": 3.398339798754477e-05, + "loss": 2.2115, + "step": 6468000 + }, + { + "epoch": 32.05, + "learning_rate": 3.398215940111869e-05, + "loss": 2.2028, + "step": 6468500 + }, + { + "epoch": 32.05, + "learning_rate": 3.3980923291865464e-05, + "loss": 2.1965, + "step": 6469000 + }, + { + "epoch": 32.05, + "learning_rate": 3.3979687182612227e-05, + "loss": 2.2209, + "step": 6469500 + }, + { + "epoch": 32.05, + "learning_rate": 3.3978448596186143e-05, + "loss": 2.1943, + "step": 6470000 + }, + { + "epoch": 32.06, + "learning_rate": 3.397721000976006e-05, + "loss": 2.2164, + "step": 6470500 + }, + { + "epoch": 32.06, + "learning_rate": 3.397597390050683e-05, + "loss": 2.1946, + "step": 6471000 + }, + { + "epoch": 32.06, + "learning_rate": 3.3974735314080746e-05, + "loss": 2.2251, + "step": 6471500 + }, + { + "epoch": 32.06, + "learning_rate": 3.397349672765466e-05, + "loss": 2.1968, + "step": 6472000 + }, + { + "epoch": 32.07, + "learning_rate": 3.397225814122858e-05, + "loss": 2.212, + "step": 6472500 + }, + { + "epoch": 32.07, + "learning_rate": 3.39710195548025e-05, + "loss": 2.2174, + "step": 6473000 + }, + { + "epoch": 32.07, + "learning_rate": 3.3969780968376414e-05, + "loss": 2.2001, + "step": 6473500 + }, + { + "epoch": 32.07, + "learning_rate": 3.396854238195033e-05, + "loss": 2.2193, + "step": 6474000 + }, + { + "epoch": 32.08, + "learning_rate": 3.396730379552425e-05, + "loss": 2.2152, + "step": 6474500 + }, + { + "epoch": 32.08, + "learning_rate": 3.3966065209098165e-05, + "loss": 2.1806, + "step": 6475000 + }, + { + "epoch": 32.08, + "learning_rate": 3.3964829099844933e-05, + "loss": 2.2087, + "step": 6475500 + }, + { + "epoch": 32.08, + "learning_rate": 3.396359051341885e-05, + "loss": 2.1978, + "step": 6476000 + }, + { + "epoch": 32.09, + "learning_rate": 3.396235192699276e-05, + "loss": 2.2147, + "step": 6476500 + }, + { + "epoch": 32.09, + "learning_rate": 3.396111334056668e-05, + "loss": 2.2109, + "step": 6477000 + }, + { + "epoch": 32.09, + "learning_rate": 3.3959874754140594e-05, + "loss": 2.2149, + "step": 6477500 + }, + { + "epoch": 32.09, + "learning_rate": 3.395863616771451e-05, + "loss": 2.2203, + "step": 6478000 + }, + { + "epoch": 32.1, + "learning_rate": 3.395739758128843e-05, + "loss": 2.208, + "step": 6478500 + }, + { + "epoch": 32.1, + "learning_rate": 3.3956158994862345e-05, + "loss": 2.2123, + "step": 6479000 + }, + { + "epoch": 32.1, + "learning_rate": 3.395492040843626e-05, + "loss": 2.1831, + "step": 6479500 + }, + { + "epoch": 32.1, + "learning_rate": 3.395368182201018e-05, + "loss": 2.2049, + "step": 6480000 + }, + { + "epoch": 32.11, + "learning_rate": 3.395244323558409e-05, + "loss": 2.2029, + "step": 6480500 + }, + { + "epoch": 32.11, + "learning_rate": 3.3951207126330865e-05, + "loss": 2.1977, + "step": 6481000 + }, + { + "epoch": 32.11, + "learning_rate": 3.394996853990478e-05, + "loss": 2.2033, + "step": 6481500 + }, + { + "epoch": 32.11, + "learning_rate": 3.39487299534787e-05, + "loss": 2.2213, + "step": 6482000 + }, + { + "epoch": 32.12, + "learning_rate": 3.394749384422547e-05, + "loss": 2.2247, + "step": 6482500 + }, + { + "epoch": 32.12, + "learning_rate": 3.394625525779938e-05, + "loss": 2.2051, + "step": 6483000 + }, + { + "epoch": 32.12, + "learning_rate": 3.3945016671373294e-05, + "loss": 2.1888, + "step": 6483500 + }, + { + "epoch": 32.12, + "learning_rate": 3.394377808494721e-05, + "loss": 2.2, + "step": 6484000 + }, + { + "epoch": 32.13, + "learning_rate": 3.394253949852113e-05, + "loss": 2.2133, + "step": 6484500 + }, + { + "epoch": 32.13, + "learning_rate": 3.3941300912095045e-05, + "loss": 2.207, + "step": 6485000 + }, + { + "epoch": 32.13, + "learning_rate": 3.394006232566896e-05, + "loss": 2.2215, + "step": 6485500 + }, + { + "epoch": 32.13, + "learning_rate": 3.393882373924288e-05, + "loss": 2.2094, + "step": 6486000 + }, + { + "epoch": 32.14, + "learning_rate": 3.3937585152816796e-05, + "loss": 2.1998, + "step": 6486500 + }, + { + "epoch": 32.14, + "learning_rate": 3.3936346566390706e-05, + "loss": 2.1855, + "step": 6487000 + }, + { + "epoch": 32.14, + "learning_rate": 3.393510797996462e-05, + "loss": 2.2094, + "step": 6487500 + }, + { + "epoch": 32.14, + "learning_rate": 3.393386939353854e-05, + "loss": 2.1967, + "step": 6488000 + }, + { + "epoch": 32.15, + "learning_rate": 3.393263080711246e-05, + "loss": 2.2105, + "step": 6488500 + }, + { + "epoch": 32.15, + "learning_rate": 3.3931392220686374e-05, + "loss": 2.2006, + "step": 6489000 + }, + { + "epoch": 32.15, + "learning_rate": 3.393015363426029e-05, + "loss": 2.1944, + "step": 6489500 + }, + { + "epoch": 32.15, + "learning_rate": 3.392891504783421e-05, + "loss": 2.1904, + "step": 6490000 + }, + { + "epoch": 32.16, + "learning_rate": 3.3927676461408125e-05, + "loss": 2.1949, + "step": 6490500 + }, + { + "epoch": 32.16, + "learning_rate": 3.392643787498204e-05, + "loss": 2.23, + "step": 6491000 + }, + { + "epoch": 32.16, + "learning_rate": 3.392519928855596e-05, + "loss": 2.2266, + "step": 6491500 + }, + { + "epoch": 32.16, + "learning_rate": 3.392396317930273e-05, + "loss": 2.2044, + "step": 6492000 + }, + { + "epoch": 32.17, + "learning_rate": 3.3922724592876644e-05, + "loss": 2.2013, + "step": 6492500 + }, + { + "epoch": 32.17, + "learning_rate": 3.392148600645056e-05, + "loss": 2.2037, + "step": 6493000 + }, + { + "epoch": 32.17, + "learning_rate": 3.392024742002448e-05, + "loss": 2.213, + "step": 6493500 + }, + { + "epoch": 32.17, + "learning_rate": 3.3919008833598395e-05, + "loss": 2.2009, + "step": 6494000 + }, + { + "epoch": 32.18, + "learning_rate": 3.391777272434516e-05, + "loss": 2.1952, + "step": 6494500 + }, + { + "epoch": 32.18, + "learning_rate": 3.3916534137919074e-05, + "loss": 2.2143, + "step": 6495000 + }, + { + "epoch": 32.18, + "learning_rate": 3.391529555149299e-05, + "loss": 2.208, + "step": 6495500 + }, + { + "epoch": 32.18, + "learning_rate": 3.391405696506691e-05, + "loss": 2.1843, + "step": 6496000 + }, + { + "epoch": 32.19, + "learning_rate": 3.3912818378640825e-05, + "loss": 2.1988, + "step": 6496500 + }, + { + "epoch": 32.19, + "learning_rate": 3.391157979221474e-05, + "loss": 2.2156, + "step": 6497000 + }, + { + "epoch": 32.19, + "learning_rate": 3.391034120578866e-05, + "loss": 2.1969, + "step": 6497500 + }, + { + "epoch": 32.19, + "learning_rate": 3.3909102619362576e-05, + "loss": 2.1982, + "step": 6498000 + }, + { + "epoch": 32.2, + "learning_rate": 3.3907866510109344e-05, + "loss": 2.1881, + "step": 6498500 + }, + { + "epoch": 32.2, + "learning_rate": 3.390663040085611e-05, + "loss": 2.198, + "step": 6499000 + }, + { + "epoch": 32.2, + "learning_rate": 3.390539181443002e-05, + "loss": 2.2107, + "step": 6499500 + }, + { + "epoch": 32.2, + "learning_rate": 3.390415322800394e-05, + "loss": 2.2016, + "step": 6500000 + }, + { + "epoch": 32.21, + "learning_rate": 3.390291464157786e-05, + "loss": 2.2143, + "step": 6500500 + }, + { + "epoch": 32.21, + "learning_rate": 3.390167853232463e-05, + "loss": 2.2103, + "step": 6501000 + }, + { + "epoch": 32.21, + "learning_rate": 3.390043994589855e-05, + "loss": 2.1963, + "step": 6501500 + }, + { + "epoch": 32.21, + "learning_rate": 3.389920383664532e-05, + "loss": 2.2025, + "step": 6502000 + }, + { + "epoch": 32.22, + "learning_rate": 3.3897965250219235e-05, + "loss": 2.2115, + "step": 6502500 + }, + { + "epoch": 32.22, + "learning_rate": 3.389672666379315e-05, + "loss": 2.2162, + "step": 6503000 + }, + { + "epoch": 32.22, + "learning_rate": 3.389548807736706e-05, + "loss": 2.2093, + "step": 6503500 + }, + { + "epoch": 32.22, + "learning_rate": 3.389424949094098e-05, + "loss": 2.2149, + "step": 6504000 + }, + { + "epoch": 32.23, + "learning_rate": 3.3893010904514896e-05, + "loss": 2.2041, + "step": 6504500 + }, + { + "epoch": 32.23, + "learning_rate": 3.389177231808881e-05, + "loss": 2.1998, + "step": 6505000 + }, + { + "epoch": 32.23, + "learning_rate": 3.389053373166273e-05, + "loss": 2.1898, + "step": 6505500 + }, + { + "epoch": 32.23, + "learning_rate": 3.388929514523664e-05, + "loss": 2.2154, + "step": 6506000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388805655881056e-05, + "loss": 2.2269, + "step": 6506500 + }, + { + "epoch": 32.24, + "learning_rate": 3.3886817972384474e-05, + "loss": 2.2098, + "step": 6507000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388557938595839e-05, + "loss": 2.2206, + "step": 6507500 + }, + { + "epoch": 32.24, + "learning_rate": 3.388434079953231e-05, + "loss": 2.2283, + "step": 6508000 + }, + { + "epoch": 32.25, + "learning_rate": 3.388310469027908e-05, + "loss": 2.2005, + "step": 6508500 + }, + { + "epoch": 32.25, + "learning_rate": 3.3881866103852994e-05, + "loss": 2.2254, + "step": 6509000 + }, + { + "epoch": 32.25, + "learning_rate": 3.388062751742691e-05, + "loss": 2.1927, + "step": 6509500 + }, + { + "epoch": 32.25, + "learning_rate": 3.387938893100083e-05, + "loss": 2.201, + "step": 6510000 + }, + { + "epoch": 32.26, + "learning_rate": 3.3878150344574745e-05, + "loss": 2.1961, + "step": 6510500 + }, + { + "epoch": 32.26, + "learning_rate": 3.387691175814866e-05, + "loss": 2.1983, + "step": 6511000 + }, + { + "epoch": 32.26, + "learning_rate": 3.387567317172258e-05, + "loss": 2.2046, + "step": 6511500 + }, + { + "epoch": 32.26, + "learning_rate": 3.3874434585296495e-05, + "loss": 2.2265, + "step": 6512000 + }, + { + "epoch": 32.27, + "learning_rate": 3.387319599887041e-05, + "loss": 2.214, + "step": 6512500 + }, + { + "epoch": 32.27, + "learning_rate": 3.387195741244433e-05, + "loss": 2.194, + "step": 6513000 + }, + { + "epoch": 32.27, + "learning_rate": 3.3870718826018246e-05, + "loss": 2.2226, + "step": 6513500 + }, + { + "epoch": 32.27, + "learning_rate": 3.386948271676501e-05, + "loss": 2.1941, + "step": 6514000 + }, + { + "epoch": 32.28, + "learning_rate": 3.3868246607511784e-05, + "loss": 2.1788, + "step": 6514500 + }, + { + "epoch": 32.28, + "learning_rate": 3.3867008021085694e-05, + "loss": 2.205, + "step": 6515000 + }, + { + "epoch": 32.28, + "learning_rate": 3.386576943465961e-05, + "loss": 2.2079, + "step": 6515500 + }, + { + "epoch": 32.28, + "learning_rate": 3.386453084823353e-05, + "loss": 2.1918, + "step": 6516000 + }, + { + "epoch": 32.28, + "learning_rate": 3.3863294738980303e-05, + "loss": 2.2067, + "step": 6516500 + }, + { + "epoch": 32.29, + "learning_rate": 3.3862056152554214e-05, + "loss": 2.1896, + "step": 6517000 + }, + { + "epoch": 32.29, + "learning_rate": 3.386081756612813e-05, + "loss": 2.1909, + "step": 6517500 + }, + { + "epoch": 32.29, + "learning_rate": 3.385957897970205e-05, + "loss": 2.2219, + "step": 6518000 + }, + { + "epoch": 32.29, + "learning_rate": 3.3858342870448816e-05, + "loss": 2.2144, + "step": 6518500 + }, + { + "epoch": 32.3, + "learning_rate": 3.385710428402273e-05, + "loss": 2.2162, + "step": 6519000 + }, + { + "epoch": 32.3, + "learning_rate": 3.385586569759665e-05, + "loss": 2.2064, + "step": 6519500 + }, + { + "epoch": 32.3, + "learning_rate": 3.385462958834342e-05, + "loss": 2.2063, + "step": 6520000 + }, + { + "epoch": 32.3, + "learning_rate": 3.3853391001917336e-05, + "loss": 2.2012, + "step": 6520500 + }, + { + "epoch": 32.31, + "learning_rate": 3.385215241549125e-05, + "loss": 2.2028, + "step": 6521000 + }, + { + "epoch": 32.31, + "learning_rate": 3.385091382906517e-05, + "loss": 2.1945, + "step": 6521500 + }, + { + "epoch": 32.31, + "learning_rate": 3.3849680196984784e-05, + "loss": 2.2054, + "step": 6522000 + }, + { + "epoch": 32.31, + "learning_rate": 3.38484416105587e-05, + "loss": 2.2139, + "step": 6522500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384720302413262e-05, + "loss": 2.203, + "step": 6523000 + }, + { + "epoch": 32.32, + "learning_rate": 3.3845964437706534e-05, + "loss": 2.2034, + "step": 6523500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384472585128045e-05, + "loss": 2.2103, + "step": 6524000 + }, + { + "epoch": 32.32, + "learning_rate": 3.384348726485437e-05, + "loss": 2.2098, + "step": 6524500 + }, + { + "epoch": 32.33, + "learning_rate": 3.3842248678428285e-05, + "loss": 2.1964, + "step": 6525000 + }, + { + "epoch": 32.33, + "learning_rate": 3.38410100920022e-05, + "loss": 2.1948, + "step": 6525500 + }, + { + "epoch": 32.33, + "learning_rate": 3.383977150557612e-05, + "loss": 2.2176, + "step": 6526000 + }, + { + "epoch": 32.33, + "learning_rate": 3.3838532919150036e-05, + "loss": 2.2094, + "step": 6526500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383729433272395e-05, + "loss": 2.2111, + "step": 6527000 + }, + { + "epoch": 32.34, + "learning_rate": 3.383605574629787e-05, + "loss": 2.2003, + "step": 6527500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383481715987179e-05, + "loss": 2.2115, + "step": 6528000 + }, + { + "epoch": 32.34, + "learning_rate": 3.3833578573445704e-05, + "loss": 2.2292, + "step": 6528500 + }, + { + "epoch": 32.35, + "learning_rate": 3.3832342464192466e-05, + "loss": 2.2087, + "step": 6529000 + }, + { + "epoch": 32.35, + "learning_rate": 3.383110387776638e-05, + "loss": 2.2061, + "step": 6529500 + }, + { + "epoch": 32.35, + "learning_rate": 3.38298652913403e-05, + "loss": 2.2259, + "step": 6530000 + }, + { + "epoch": 32.35, + "learning_rate": 3.3828626704914217e-05, + "loss": 2.203, + "step": 6530500 + }, + { + "epoch": 32.36, + "learning_rate": 3.3827388118488133e-05, + "loss": 2.2353, + "step": 6531000 + }, + { + "epoch": 32.36, + "learning_rate": 3.382614953206205e-05, + "loss": 2.2215, + "step": 6531500 + }, + { + "epoch": 32.36, + "learning_rate": 3.382491342280882e-05, + "loss": 2.2307, + "step": 6532000 + }, + { + "epoch": 32.36, + "learning_rate": 3.3823674836382736e-05, + "loss": 2.2312, + "step": 6532500 + }, + { + "epoch": 32.37, + "learning_rate": 3.382243624995665e-05, + "loss": 2.2237, + "step": 6533000 + }, + { + "epoch": 32.37, + "learning_rate": 3.382119766353057e-05, + "loss": 2.2023, + "step": 6533500 + }, + { + "epoch": 32.37, + "learning_rate": 3.381996155427733e-05, + "loss": 2.2172, + "step": 6534000 + }, + { + "epoch": 32.37, + "learning_rate": 3.381872296785125e-05, + "loss": 2.2087, + "step": 6534500 + }, + { + "epoch": 32.38, + "learning_rate": 3.3817484381425166e-05, + "loss": 2.2205, + "step": 6535000 + }, + { + "epoch": 32.38, + "learning_rate": 3.381624579499908e-05, + "loss": 2.2083, + "step": 6535500 + }, + { + "epoch": 32.38, + "learning_rate": 3.3815007208573e-05, + "loss": 2.2052, + "step": 6536000 + }, + { + "epoch": 32.38, + "learning_rate": 3.381376862214692e-05, + "loss": 2.2252, + "step": 6536500 + }, + { + "epoch": 32.39, + "learning_rate": 3.3812530035720834e-05, + "loss": 2.1906, + "step": 6537000 + }, + { + "epoch": 32.39, + "learning_rate": 3.381129144929475e-05, + "loss": 2.2277, + "step": 6537500 + }, + { + "epoch": 32.39, + "learning_rate": 3.381005286286867e-05, + "loss": 2.2075, + "step": 6538000 + }, + { + "epoch": 32.39, + "learning_rate": 3.3808816753615436e-05, + "loss": 2.2181, + "step": 6538500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380757816718935e-05, + "loss": 2.2153, + "step": 6539000 + }, + { + "epoch": 32.4, + "learning_rate": 3.380634205793612e-05, + "loss": 2.1944, + "step": 6539500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380510842585574e-05, + "loss": 2.2176, + "step": 6540000 + }, + { + "epoch": 32.4, + "learning_rate": 3.380387231660251e-05, + "loss": 2.2233, + "step": 6540500 + }, + { + "epoch": 32.41, + "learning_rate": 3.380263373017643e-05, + "loss": 2.2152, + "step": 6541000 + }, + { + "epoch": 32.41, + "learning_rate": 3.380139514375034e-05, + "loss": 2.2154, + "step": 6541500 + }, + { + "epoch": 32.41, + "learning_rate": 3.3800156557324255e-05, + "loss": 2.2299, + "step": 6542000 + }, + { + "epoch": 32.41, + "learning_rate": 3.379891797089817e-05, + "loss": 2.2184, + "step": 6542500 + }, + { + "epoch": 32.42, + "learning_rate": 3.379767938447209e-05, + "loss": 2.2195, + "step": 6543000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379644327521886e-05, + "loss": 2.2001, + "step": 6543500 + }, + { + "epoch": 32.42, + "learning_rate": 3.3795204688792775e-05, + "loss": 2.2322, + "step": 6544000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379396610236669e-05, + "loss": 2.207, + "step": 6544500 + }, + { + "epoch": 32.43, + "learning_rate": 3.379272751594061e-05, + "loss": 2.2361, + "step": 6545000 + }, + { + "epoch": 32.43, + "learning_rate": 3.3791488929514526e-05, + "loss": 2.2226, + "step": 6545500 + }, + { + "epoch": 32.43, + "learning_rate": 3.379025034308844e-05, + "loss": 2.2174, + "step": 6546000 + }, + { + "epoch": 32.43, + "learning_rate": 3.378901175666236e-05, + "loss": 2.2221, + "step": 6546500 + }, + { + "epoch": 32.44, + "learning_rate": 3.378777317023628e-05, + "loss": 2.1688, + "step": 6547000 + }, + { + "epoch": 32.44, + "learning_rate": 3.3786534583810194e-05, + "loss": 2.2214, + "step": 6547500 + }, + { + "epoch": 32.44, + "learning_rate": 3.378529599738411e-05, + "loss": 2.2168, + "step": 6548000 + }, + { + "epoch": 32.44, + "learning_rate": 3.378405741095803e-05, + "loss": 2.2249, + "step": 6548500 + }, + { + "epoch": 32.45, + "learning_rate": 3.3782818824531944e-05, + "loss": 2.2091, + "step": 6549000 + }, + { + "epoch": 32.45, + "learning_rate": 3.378158023810586e-05, + "loss": 2.2079, + "step": 6549500 + }, + { + "epoch": 32.45, + "learning_rate": 3.378034165167977e-05, + "loss": 2.236, + "step": 6550000 + }, + { + "epoch": 32.45, + "learning_rate": 3.377910306525369e-05, + "loss": 2.219, + "step": 6550500 + }, + { + "epoch": 32.46, + "learning_rate": 3.3777864478827605e-05, + "loss": 2.212, + "step": 6551000 + }, + { + "epoch": 32.46, + "learning_rate": 3.377662589240152e-05, + "loss": 2.2248, + "step": 6551500 + }, + { + "epoch": 32.46, + "learning_rate": 3.377538730597544e-05, + "loss": 2.2177, + "step": 6552000 + }, + { + "epoch": 32.46, + "learning_rate": 3.3774148719549356e-05, + "loss": 2.2114, + "step": 6552500 + }, + { + "epoch": 32.47, + "learning_rate": 3.377291013312327e-05, + "loss": 2.2005, + "step": 6553000 + }, + { + "epoch": 32.47, + "learning_rate": 3.377167154669719e-05, + "loss": 2.2251, + "step": 6553500 + }, + { + "epoch": 32.47, + "learning_rate": 3.37704329602711e-05, + "loss": 2.2158, + "step": 6554000 + }, + { + "epoch": 32.47, + "learning_rate": 3.376919685101787e-05, + "loss": 2.1986, + "step": 6554500 + }, + { + "epoch": 32.48, + "learning_rate": 3.3767960741764644e-05, + "loss": 2.2362, + "step": 6555000 + }, + { + "epoch": 32.48, + "learning_rate": 3.376672215533856e-05, + "loss": 2.2072, + "step": 6555500 + }, + { + "epoch": 32.48, + "learning_rate": 3.3765488523258175e-05, + "loss": 2.2369, + "step": 6556000 + }, + { + "epoch": 32.48, + "learning_rate": 3.376424993683209e-05, + "loss": 2.2121, + "step": 6556500 + }, + { + "epoch": 32.49, + "learning_rate": 3.376301135040601e-05, + "loss": 2.2245, + "step": 6557000 + }, + { + "epoch": 32.49, + "learning_rate": 3.3761772763979926e-05, + "loss": 2.2273, + "step": 6557500 + }, + { + "epoch": 32.49, + "learning_rate": 3.376053417755384e-05, + "loss": 2.2315, + "step": 6558000 + }, + { + "epoch": 32.49, + "learning_rate": 3.375929559112776e-05, + "loss": 2.221, + "step": 6558500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375805700470168e-05, + "loss": 2.2076, + "step": 6559000 + }, + { + "epoch": 32.5, + "learning_rate": 3.3756820895448446e-05, + "loss": 2.2375, + "step": 6559500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375558230902236e-05, + "loss": 2.233, + "step": 6560000 + }, + { + "epoch": 32.5, + "learning_rate": 3.375434372259628e-05, + "loss": 2.2013, + "step": 6560500 + }, + { + "epoch": 32.51, + "learning_rate": 3.3753105136170196e-05, + "loss": 2.224, + "step": 6561000 + }, + { + "epoch": 32.51, + "learning_rate": 3.375186654974411e-05, + "loss": 2.2265, + "step": 6561500 + }, + { + "epoch": 32.51, + "learning_rate": 3.3750627963318024e-05, + "loss": 2.2233, + "step": 6562000 + }, + { + "epoch": 32.51, + "learning_rate": 3.374938937689194e-05, + "loss": 2.22, + "step": 6562500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374815079046586e-05, + "loss": 2.2073, + "step": 6563000 + }, + { + "epoch": 32.52, + "learning_rate": 3.3746912204039774e-05, + "loss": 2.2204, + "step": 6563500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374567609478654e-05, + "loss": 2.1987, + "step": 6564000 + }, + { + "epoch": 32.52, + "learning_rate": 3.374443750836046e-05, + "loss": 2.252, + "step": 6564500 + }, + { + "epoch": 32.53, + "learning_rate": 3.374319892193438e-05, + "loss": 2.2045, + "step": 6565000 + }, + { + "epoch": 32.53, + "learning_rate": 3.3741960335508294e-05, + "loss": 2.2147, + "step": 6565500 + }, + { + "epoch": 32.53, + "learning_rate": 3.374072174908221e-05, + "loss": 2.2299, + "step": 6566000 + }, + { + "epoch": 32.53, + "learning_rate": 3.373948316265613e-05, + "loss": 2.2087, + "step": 6566500 + }, + { + "epoch": 32.54, + "learning_rate": 3.3738244576230045e-05, + "loss": 2.2143, + "step": 6567000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373700598980396e-05, + "loss": 2.2074, + "step": 6567500 + }, + { + "epoch": 32.54, + "learning_rate": 3.373576740337788e-05, + "loss": 2.2112, + "step": 6568000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373453129412464e-05, + "loss": 2.1904, + "step": 6568500 + }, + { + "epoch": 32.55, + "learning_rate": 3.373329518487141e-05, + "loss": 2.1953, + "step": 6569000 + }, + { + "epoch": 32.55, + "learning_rate": 3.3732056598445326e-05, + "loss": 2.2283, + "step": 6569500 + }, + { + "epoch": 32.55, + "learning_rate": 3.373081801201924e-05, + "loss": 2.2039, + "step": 6570000 + }, + { + "epoch": 32.55, + "learning_rate": 3.372957942559316e-05, + "loss": 2.2101, + "step": 6570500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372834083916708e-05, + "loss": 2.2203, + "step": 6571000 + }, + { + "epoch": 32.56, + "learning_rate": 3.3727102252740994e-05, + "loss": 2.1973, + "step": 6571500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372586366631491e-05, + "loss": 2.212, + "step": 6572000 + }, + { + "epoch": 32.56, + "learning_rate": 3.372462755706168e-05, + "loss": 2.2298, + "step": 6572500 + }, + { + "epoch": 32.56, + "learning_rate": 3.37233889706356e-05, + "loss": 2.201, + "step": 6573000 + }, + { + "epoch": 32.57, + "learning_rate": 3.372215286138236e-05, + "loss": 2.2432, + "step": 6573500 + }, + { + "epoch": 32.57, + "learning_rate": 3.3720914274956276e-05, + "loss": 2.2075, + "step": 6574000 + }, + { + "epoch": 32.57, + "learning_rate": 3.371967568853019e-05, + "loss": 2.2052, + "step": 6574500 + }, + { + "epoch": 32.57, + "learning_rate": 3.371843710210411e-05, + "loss": 2.201, + "step": 6575000 + }, + { + "epoch": 32.58, + "learning_rate": 3.3717198515678026e-05, + "loss": 2.2347, + "step": 6575500 + }, + { + "epoch": 32.58, + "learning_rate": 3.3715959929251943e-05, + "loss": 2.2136, + "step": 6576000 + }, + { + "epoch": 32.58, + "learning_rate": 3.371472134282586e-05, + "loss": 2.2285, + "step": 6576500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371348275639978e-05, + "loss": 2.2092, + "step": 6577000 + }, + { + "epoch": 32.59, + "learning_rate": 3.3712244169973694e-05, + "loss": 2.2303, + "step": 6577500 + }, + { + "epoch": 32.59, + "learning_rate": 3.371100806072046e-05, + "loss": 2.2372, + "step": 6578000 + }, + { + "epoch": 32.59, + "learning_rate": 3.370976947429438e-05, + "loss": 2.2142, + "step": 6578500 + }, + { + "epoch": 32.59, + "learning_rate": 3.37085308878683e-05, + "loss": 2.2163, + "step": 6579000 + }, + { + "epoch": 32.6, + "learning_rate": 3.3707292301442214e-05, + "loss": 2.2067, + "step": 6579500 + }, + { + "epoch": 32.6, + "learning_rate": 3.370605371501613e-05, + "loss": 2.2277, + "step": 6580000 + }, + { + "epoch": 32.6, + "learning_rate": 3.370481512859005e-05, + "loss": 2.2055, + "step": 6580500 + }, + { + "epoch": 32.6, + "learning_rate": 3.3703576542163965e-05, + "loss": 2.1938, + "step": 6581000 + }, + { + "epoch": 32.61, + "learning_rate": 3.370233795573788e-05, + "loss": 2.2145, + "step": 6581500 + }, + { + "epoch": 32.61, + "learning_rate": 3.370109936931179e-05, + "loss": 2.1997, + "step": 6582000 + }, + { + "epoch": 32.61, + "learning_rate": 3.369986078288571e-05, + "loss": 2.2109, + "step": 6582500 + }, + { + "epoch": 32.61, + "learning_rate": 3.3698622196459625e-05, + "loss": 2.2218, + "step": 6583000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369738361003354e-05, + "loss": 2.2179, + "step": 6583500 + }, + { + "epoch": 32.62, + "learning_rate": 3.369614502360746e-05, + "loss": 2.2124, + "step": 6584000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369490891435423e-05, + "loss": 2.218, + "step": 6584500 + }, + { + "epoch": 32.62, + "learning_rate": 3.3693670327928145e-05, + "loss": 2.2309, + "step": 6585000 + }, + { + "epoch": 32.63, + "learning_rate": 3.369243174150206e-05, + "loss": 2.1999, + "step": 6585500 + }, + { + "epoch": 32.63, + "learning_rate": 3.369119315507598e-05, + "loss": 2.2156, + "step": 6586000 + }, + { + "epoch": 32.63, + "learning_rate": 3.368995704582275e-05, + "loss": 2.2088, + "step": 6586500 + }, + { + "epoch": 32.63, + "learning_rate": 3.3688718459396665e-05, + "loss": 2.2409, + "step": 6587000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368748235014343e-05, + "loss": 2.2263, + "step": 6587500 + }, + { + "epoch": 32.64, + "learning_rate": 3.3686243763717344e-05, + "loss": 2.2184, + "step": 6588000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368500517729126e-05, + "loss": 2.2329, + "step": 6588500 + }, + { + "epoch": 32.64, + "learning_rate": 3.368376659086518e-05, + "loss": 2.2595, + "step": 6589000 + }, + { + "epoch": 32.65, + "learning_rate": 3.3682528004439094e-05, + "loss": 2.2049, + "step": 6589500 + }, + { + "epoch": 32.65, + "learning_rate": 3.368128941801301e-05, + "loss": 2.2098, + "step": 6590000 + }, + { + "epoch": 32.65, + "learning_rate": 3.368005083158693e-05, + "loss": 2.2009, + "step": 6590500 + }, + { + "epoch": 32.65, + "learning_rate": 3.3678812245160845e-05, + "loss": 2.2141, + "step": 6591000 + }, + { + "epoch": 32.66, + "learning_rate": 3.367757365873476e-05, + "loss": 2.2486, + "step": 6591500 + }, + { + "epoch": 32.66, + "learning_rate": 3.367633507230868e-05, + "loss": 2.1941, + "step": 6592000 + }, + { + "epoch": 32.66, + "learning_rate": 3.3675096485882596e-05, + "loss": 2.2165, + "step": 6592500 + }, + { + "epoch": 32.66, + "learning_rate": 3.367385789945651e-05, + "loss": 2.1934, + "step": 6593000 + }, + { + "epoch": 32.67, + "learning_rate": 3.367261931303042e-05, + "loss": 2.2103, + "step": 6593500 + }, + { + "epoch": 32.67, + "learning_rate": 3.367138072660434e-05, + "loss": 2.2143, + "step": 6594000 + }, + { + "epoch": 32.67, + "learning_rate": 3.367014214017826e-05, + "loss": 2.2099, + "step": 6594500 + }, + { + "epoch": 32.67, + "learning_rate": 3.366890603092503e-05, + "loss": 2.1974, + "step": 6595000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366766744449894e-05, + "loss": 2.2372, + "step": 6595500 + }, + { + "epoch": 32.68, + "learning_rate": 3.366643133524571e-05, + "loss": 2.2234, + "step": 6596000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366519274881963e-05, + "loss": 2.2023, + "step": 6596500 + }, + { + "epoch": 32.68, + "learning_rate": 3.3663954162393545e-05, + "loss": 2.2142, + "step": 6597000 + }, + { + "epoch": 32.69, + "learning_rate": 3.366271557596746e-05, + "loss": 2.2005, + "step": 6597500 + }, + { + "epoch": 32.69, + "learning_rate": 3.366147698954138e-05, + "loss": 2.219, + "step": 6598000 + }, + { + "epoch": 32.69, + "learning_rate": 3.366024088028815e-05, + "loss": 2.2181, + "step": 6598500 + }, + { + "epoch": 32.69, + "learning_rate": 3.3659002293862065e-05, + "loss": 2.2129, + "step": 6599000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365776618460883e-05, + "loss": 2.2157, + "step": 6599500 + }, + { + "epoch": 32.7, + "learning_rate": 3.3656527598182744e-05, + "loss": 2.2164, + "step": 6600000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365528901175666e-05, + "loss": 2.1974, + "step": 6600500 + }, + { + "epoch": 32.7, + "learning_rate": 3.365405042533058e-05, + "loss": 2.2306, + "step": 6601000 + }, + { + "epoch": 32.71, + "learning_rate": 3.3652814316077347e-05, + "loss": 2.2077, + "step": 6601500 + }, + { + "epoch": 32.71, + "learning_rate": 3.3651575729651263e-05, + "loss": 2.2149, + "step": 6602000 + }, + { + "epoch": 32.71, + "learning_rate": 3.365033714322518e-05, + "loss": 2.2067, + "step": 6602500 + }, + { + "epoch": 32.71, + "learning_rate": 3.36490985567991e-05, + "loss": 2.2259, + "step": 6603000 + }, + { + "epoch": 32.72, + "learning_rate": 3.3647859970373014e-05, + "loss": 2.2163, + "step": 6603500 + }, + { + "epoch": 32.72, + "learning_rate": 3.364662138394693e-05, + "loss": 2.2088, + "step": 6604000 + }, + { + "epoch": 32.72, + "learning_rate": 3.364538279752085e-05, + "loss": 2.2304, + "step": 6604500 + }, + { + "epoch": 32.72, + "learning_rate": 3.3644144211094765e-05, + "loss": 2.2071, + "step": 6605000 + }, + { + "epoch": 32.73, + "learning_rate": 3.364290562466868e-05, + "loss": 2.2017, + "step": 6605500 + }, + { + "epoch": 32.73, + "learning_rate": 3.36416670382426e-05, + "loss": 2.2221, + "step": 6606000 + }, + { + "epoch": 32.73, + "learning_rate": 3.3640428451816516e-05, + "loss": 2.2516, + "step": 6606500 + }, + { + "epoch": 32.73, + "learning_rate": 3.363918986539043e-05, + "loss": 2.2249, + "step": 6607000 + }, + { + "epoch": 32.74, + "learning_rate": 3.363795127896435e-05, + "loss": 2.2237, + "step": 6607500 + }, + { + "epoch": 32.74, + "learning_rate": 3.363671269253827e-05, + "loss": 2.2048, + "step": 6608000 + }, + { + "epoch": 32.74, + "learning_rate": 3.3635474106112184e-05, + "loss": 2.2148, + "step": 6608500 + }, + { + "epoch": 32.74, + "learning_rate": 3.3634235519686094e-05, + "loss": 2.2256, + "step": 6609000 + }, + { + "epoch": 32.75, + "learning_rate": 3.363299693326001e-05, + "loss": 2.2463, + "step": 6609500 + }, + { + "epoch": 32.75, + "learning_rate": 3.363175834683393e-05, + "loss": 2.2072, + "step": 6610000 + }, + { + "epoch": 32.75, + "learning_rate": 3.3630519760407844e-05, + "loss": 2.2111, + "step": 6610500 + }, + { + "epoch": 32.75, + "learning_rate": 3.362928365115461e-05, + "loss": 2.2146, + "step": 6611000 + }, + { + "epoch": 32.76, + "learning_rate": 3.362804506472853e-05, + "loss": 2.252, + "step": 6611500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362680647830245e-05, + "loss": 2.2347, + "step": 6612000 + }, + { + "epoch": 32.76, + "learning_rate": 3.362556789187636e-05, + "loss": 2.2109, + "step": 6612500 + }, + { + "epoch": 32.76, + "learning_rate": 3.3624329305450274e-05, + "loss": 2.2187, + "step": 6613000 + }, + { + "epoch": 32.77, + "learning_rate": 3.362309319619705e-05, + "loss": 2.2218, + "step": 6613500 + }, + { + "epoch": 32.77, + "learning_rate": 3.362185460977097e-05, + "loss": 2.226, + "step": 6614000 + }, + { + "epoch": 32.77, + "learning_rate": 3.3620616023344884e-05, + "loss": 2.2319, + "step": 6614500 + }, + { + "epoch": 32.77, + "learning_rate": 3.36193774369188e-05, + "loss": 2.2312, + "step": 6615000 + }, + { + "epoch": 32.78, + "learning_rate": 3.361814132766556e-05, + "loss": 2.224, + "step": 6615500 + }, + { + "epoch": 32.78, + "learning_rate": 3.361690274123948e-05, + "loss": 2.242, + "step": 6616000 + }, + { + "epoch": 32.78, + "learning_rate": 3.3615664154813396e-05, + "loss": 2.2007, + "step": 6616500 + }, + { + "epoch": 32.78, + "learning_rate": 3.361443052273302e-05, + "loss": 2.2512, + "step": 6617000 + }, + { + "epoch": 32.79, + "learning_rate": 3.3613191936306934e-05, + "loss": 2.2141, + "step": 6617500 + }, + { + "epoch": 32.79, + "learning_rate": 3.361195334988085e-05, + "loss": 2.221, + "step": 6618000 + }, + { + "epoch": 32.79, + "learning_rate": 3.361071476345477e-05, + "loss": 2.2071, + "step": 6618500 + }, + { + "epoch": 32.79, + "learning_rate": 3.360947617702868e-05, + "loss": 2.2425, + "step": 6619000 + }, + { + "epoch": 32.8, + "learning_rate": 3.3608237590602595e-05, + "loss": 2.223, + "step": 6619500 + }, + { + "epoch": 32.8, + "learning_rate": 3.360699900417651e-05, + "loss": 2.2144, + "step": 6620000 + }, + { + "epoch": 32.8, + "learning_rate": 3.360576041775043e-05, + "loss": 2.2007, + "step": 6620500 + }, + { + "epoch": 32.8, + "learning_rate": 3.3604521831324346e-05, + "loss": 2.204, + "step": 6621000 + }, + { + "epoch": 32.81, + "learning_rate": 3.3603285722071115e-05, + "loss": 2.2528, + "step": 6621500 + }, + { + "epoch": 32.81, + "learning_rate": 3.360204713564503e-05, + "loss": 2.21, + "step": 6622000 + }, + { + "epoch": 32.81, + "learning_rate": 3.360080854921895e-05, + "loss": 2.2036, + "step": 6622500 + }, + { + "epoch": 32.81, + "learning_rate": 3.3599569962792865e-05, + "loss": 2.2467, + "step": 6623000 + }, + { + "epoch": 32.82, + "learning_rate": 3.3598336330712486e-05, + "loss": 2.214, + "step": 6623500 + }, + { + "epoch": 32.82, + "learning_rate": 3.35970977442864e-05, + "loss": 2.2221, + "step": 6624000 + }, + { + "epoch": 32.82, + "learning_rate": 3.359585915786032e-05, + "loss": 2.2069, + "step": 6624500 + }, + { + "epoch": 32.82, + "learning_rate": 3.359462057143424e-05, + "loss": 2.228, + "step": 6625000 + }, + { + "epoch": 32.83, + "learning_rate": 3.359338198500815e-05, + "loss": 2.2318, + "step": 6625500 + }, + { + "epoch": 32.83, + "learning_rate": 3.359214587575492e-05, + "loss": 2.2248, + "step": 6626000 + }, + { + "epoch": 32.83, + "learning_rate": 3.359090728932884e-05, + "loss": 2.2005, + "step": 6626500 + }, + { + "epoch": 32.83, + "learning_rate": 3.3589673657248453e-05, + "loss": 2.2361, + "step": 6627000 + }, + { + "epoch": 32.83, + "learning_rate": 3.358843507082237e-05, + "loss": 2.2029, + "step": 6627500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358719648439629e-05, + "loss": 2.2289, + "step": 6628000 + }, + { + "epoch": 32.84, + "learning_rate": 3.3585957897970204e-05, + "loss": 2.2083, + "step": 6628500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358471931154412e-05, + "loss": 2.2152, + "step": 6629000 + }, + { + "epoch": 32.84, + "learning_rate": 3.358348072511804e-05, + "loss": 2.2215, + "step": 6629500 + }, + { + "epoch": 32.85, + "learning_rate": 3.3582242138691955e-05, + "loss": 2.1913, + "step": 6630000 + }, + { + "epoch": 32.85, + "learning_rate": 3.358100355226587e-05, + "loss": 2.2186, + "step": 6630500 + }, + { + "epoch": 32.85, + "learning_rate": 3.357976496583979e-05, + "loss": 2.2363, + "step": 6631000 + }, + { + "epoch": 32.85, + "learning_rate": 3.3578526379413706e-05, + "loss": 2.2145, + "step": 6631500 + }, + { + "epoch": 32.86, + "learning_rate": 3.357728779298762e-05, + "loss": 2.2131, + "step": 6632000 + }, + { + "epoch": 32.86, + "learning_rate": 3.357604920656154e-05, + "loss": 2.2287, + "step": 6632500 + }, + { + "epoch": 32.86, + "learning_rate": 3.357481062013546e-05, + "loss": 2.2282, + "step": 6633000 + }, + { + "epoch": 32.86, + "learning_rate": 3.3573572033709374e-05, + "loss": 2.222, + "step": 6633500 + }, + { + "epoch": 32.87, + "learning_rate": 3.357233344728329e-05, + "loss": 2.2288, + "step": 6634000 + }, + { + "epoch": 32.87, + "learning_rate": 3.357109486085721e-05, + "loss": 2.2067, + "step": 6634500 + }, + { + "epoch": 32.87, + "learning_rate": 3.356985627443112e-05, + "loss": 2.2245, + "step": 6635000 + }, + { + "epoch": 32.87, + "learning_rate": 3.3568620165177886e-05, + "loss": 2.2202, + "step": 6635500 + }, + { + "epoch": 32.88, + "learning_rate": 3.35673815787518e-05, + "loss": 2.2116, + "step": 6636000 + }, + { + "epoch": 32.88, + "learning_rate": 3.356614546949857e-05, + "loss": 2.2298, + "step": 6636500 + }, + { + "epoch": 32.88, + "learning_rate": 3.356490688307249e-05, + "loss": 2.2243, + "step": 6637000 + }, + { + "epoch": 32.88, + "learning_rate": 3.3563668296646406e-05, + "loss": 2.2296, + "step": 6637500 + }, + { + "epoch": 32.89, + "learning_rate": 3.356242971022032e-05, + "loss": 2.2225, + "step": 6638000 + }, + { + "epoch": 32.89, + "learning_rate": 3.356119112379424e-05, + "loss": 2.1984, + "step": 6638500 + }, + { + "epoch": 32.89, + "learning_rate": 3.355995253736816e-05, + "loss": 2.2297, + "step": 6639000 + }, + { + "epoch": 32.89, + "learning_rate": 3.3558713950942074e-05, + "loss": 2.2113, + "step": 6639500 + }, + { + "epoch": 32.9, + "learning_rate": 3.355747536451599e-05, + "loss": 2.2124, + "step": 6640000 + }, + { + "epoch": 32.9, + "learning_rate": 3.355623677808991e-05, + "loss": 2.213, + "step": 6640500 + }, + { + "epoch": 32.9, + "learning_rate": 3.355500066883667e-05, + "loss": 2.1908, + "step": 6641000 + }, + { + "epoch": 32.9, + "learning_rate": 3.3553762082410586e-05, + "loss": 2.2046, + "step": 6641500 + }, + { + "epoch": 32.91, + "learning_rate": 3.3552523495984503e-05, + "loss": 2.2032, + "step": 6642000 + }, + { + "epoch": 32.91, + "learning_rate": 3.355128738673127e-05, + "loss": 2.1987, + "step": 6642500 + }, + { + "epoch": 32.91, + "learning_rate": 3.355004880030519e-05, + "loss": 2.2311, + "step": 6643000 + }, + { + "epoch": 32.91, + "learning_rate": 3.3548810213879106e-05, + "loss": 2.2356, + "step": 6643500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354757162745302e-05, + "loss": 2.2262, + "step": 6644000 + }, + { + "epoch": 32.92, + "learning_rate": 3.354633304102694e-05, + "loss": 2.196, + "step": 6644500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354509445460086e-05, + "loss": 2.2177, + "step": 6645000 + }, + { + "epoch": 32.92, + "learning_rate": 3.3543855868174774e-05, + "loss": 2.2002, + "step": 6645500 + }, + { + "epoch": 32.93, + "learning_rate": 3.354261728174869e-05, + "loss": 2.2402, + "step": 6646000 + }, + { + "epoch": 32.93, + "learning_rate": 3.354137869532261e-05, + "loss": 2.2201, + "step": 6646500 + }, + { + "epoch": 32.93, + "learning_rate": 3.3540140108896525e-05, + "loss": 2.2249, + "step": 6647000 + }, + { + "epoch": 32.93, + "learning_rate": 3.3538901522470435e-05, + "loss": 2.2081, + "step": 6647500 + }, + { + "epoch": 32.94, + "learning_rate": 3.3537665413217204e-05, + "loss": 2.2199, + "step": 6648000 + }, + { + "epoch": 32.94, + "learning_rate": 3.353642682679112e-05, + "loss": 2.2042, + "step": 6648500 + }, + { + "epoch": 32.94, + "learning_rate": 3.353518824036504e-05, + "loss": 2.2251, + "step": 6649000 + }, + { + "epoch": 32.94, + "learning_rate": 3.3533952131111806e-05, + "loss": 2.2405, + "step": 6649500 + }, + { + "epoch": 32.95, + "learning_rate": 3.353271354468572e-05, + "loss": 2.2288, + "step": 6650000 + }, + { + "epoch": 32.95, + "learning_rate": 3.353147495825964e-05, + "loss": 2.2055, + "step": 6650500 + }, + { + "epoch": 32.95, + "learning_rate": 3.353023637183356e-05, + "loss": 2.2243, + "step": 6651000 + }, + { + "epoch": 32.95, + "learning_rate": 3.3528997785407474e-05, + "loss": 2.2378, + "step": 6651500 + }, + { + "epoch": 32.96, + "learning_rate": 3.352775919898139e-05, + "loss": 2.2031, + "step": 6652000 + }, + { + "epoch": 32.96, + "learning_rate": 3.352652061255531e-05, + "loss": 2.2246, + "step": 6652500 + }, + { + "epoch": 32.96, + "learning_rate": 3.3525282026129225e-05, + "loss": 2.1836, + "step": 6653000 + }, + { + "epoch": 32.96, + "learning_rate": 3.3524045916875994e-05, + "loss": 2.2215, + "step": 6653500 + }, + { + "epoch": 32.97, + "learning_rate": 3.3522807330449904e-05, + "loss": 2.2132, + "step": 6654000 + }, + { + "epoch": 32.97, + "learning_rate": 3.352157122119667e-05, + "loss": 2.2263, + "step": 6654500 + }, + { + "epoch": 32.97, + "learning_rate": 3.352033511194344e-05, + "loss": 2.2455, + "step": 6655000 + }, + { + "epoch": 32.97, + "learning_rate": 3.351909652551736e-05, + "loss": 2.2298, + "step": 6655500 + }, + { + "epoch": 32.98, + "learning_rate": 3.3517857939091275e-05, + "loss": 2.2223, + "step": 6656000 + }, + { + "epoch": 32.98, + "learning_rate": 3.351661935266519e-05, + "loss": 2.2317, + "step": 6656500 + }, + { + "epoch": 32.98, + "learning_rate": 3.351538324341196e-05, + "loss": 2.2343, + "step": 6657000 + }, + { + "epoch": 32.98, + "learning_rate": 3.351414465698587e-05, + "loss": 2.2235, + "step": 6657500 + }, + { + "epoch": 32.99, + "learning_rate": 3.351290607055979e-05, + "loss": 2.2268, + "step": 6658000 + }, + { + "epoch": 32.99, + "learning_rate": 3.3511667484133705e-05, + "loss": 2.2339, + "step": 6658500 + }, + { + "epoch": 32.99, + "learning_rate": 3.351042889770762e-05, + "loss": 2.2136, + "step": 6659000 + }, + { + "epoch": 32.99, + "learning_rate": 3.350919031128154e-05, + "loss": 2.1893, + "step": 6659500 + }, + { + "epoch": 33.0, + "learning_rate": 3.3507951724855456e-05, + "loss": 2.2058, + "step": 6660000 + }, + { + "epoch": 33.0, + "learning_rate": 3.350671313842937e-05, + "loss": 2.2326, + "step": 6660500 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.660817023764832, + "eval_accuracy_mlm": 0.6167854855212918, + "eval_accuracy_nsp": 0.8684219815735079, + "eval_loss": 2.316751480102539, + "eval_runtime": 145.7034, + "eval_samples_per_second": 1749.849, + "eval_steps_per_second": 72.915, + "step": 6660819 + }, + { + "epoch": 33.0, + "learning_rate": 3.350547455200329e-05, + "loss": 2.1953, + "step": 6661000 + }, + { + "epoch": 33.0, + "learning_rate": 3.3504235965577206e-05, + "loss": 2.1906, + "step": 6661500 + }, + { + "epoch": 33.01, + "learning_rate": 3.350299737915112e-05, + "loss": 2.1715, + "step": 6662000 + }, + { + "epoch": 33.01, + "learning_rate": 3.350175879272504e-05, + "loss": 2.1679, + "step": 6662500 + }, + { + "epoch": 33.01, + "learning_rate": 3.350052020629896e-05, + "loss": 2.2096, + "step": 6663000 + }, + { + "epoch": 33.01, + "learning_rate": 3.3499281619872874e-05, + "loss": 2.1684, + "step": 6663500 + }, + { + "epoch": 33.02, + "learning_rate": 3.349804551061964e-05, + "loss": 2.1789, + "step": 6664000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349680692419356e-05, + "loss": 2.1883, + "step": 6664500 + }, + { + "epoch": 33.02, + "learning_rate": 3.349557081494032e-05, + "loss": 2.1976, + "step": 6665000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349433222851424e-05, + "loss": 2.1609, + "step": 6665500 + }, + { + "epoch": 33.03, + "learning_rate": 3.3493093642088156e-05, + "loss": 2.1907, + "step": 6666000 + }, + { + "epoch": 33.03, + "learning_rate": 3.349185505566207e-05, + "loss": 2.1754, + "step": 6666500 + }, + { + "epoch": 33.03, + "learning_rate": 3.349061894640884e-05, + "loss": 2.1871, + "step": 6667000 + }, + { + "epoch": 33.03, + "learning_rate": 3.348938035998276e-05, + "loss": 2.1858, + "step": 6667500 + }, + { + "epoch": 33.04, + "learning_rate": 3.348814425072953e-05, + "loss": 2.1992, + "step": 6668000 + }, + { + "epoch": 33.04, + "learning_rate": 3.3486905664303444e-05, + "loss": 2.1737, + "step": 6668500 + }, + { + "epoch": 33.04, + "learning_rate": 3.348566707787736e-05, + "loss": 2.1944, + "step": 6669000 + }, + { + "epoch": 33.04, + "learning_rate": 3.348442849145128e-05, + "loss": 2.1859, + "step": 6669500 + }, + { + "epoch": 33.05, + "learning_rate": 3.348319238219805e-05, + "loss": 2.1786, + "step": 6670000 + }, + { + "epoch": 33.05, + "learning_rate": 3.3481953795771964e-05, + "loss": 2.1959, + "step": 6670500 + }, + { + "epoch": 33.05, + "learning_rate": 3.348071520934588e-05, + "loss": 2.2086, + "step": 6671000 + }, + { + "epoch": 33.05, + "learning_rate": 3.34794766229198e-05, + "loss": 2.1758, + "step": 6671500 + }, + { + "epoch": 33.06, + "learning_rate": 3.3478238036493715e-05, + "loss": 2.1904, + "step": 6672000 + }, + { + "epoch": 33.06, + "learning_rate": 3.347699945006763e-05, + "loss": 2.1964, + "step": 6672500 + }, + { + "epoch": 33.06, + "learning_rate": 3.347576086364154e-05, + "loss": 2.1856, + "step": 6673000 + }, + { + "epoch": 33.06, + "learning_rate": 3.347452227721546e-05, + "loss": 2.1881, + "step": 6673500 + }, + { + "epoch": 33.07, + "learning_rate": 3.3473283690789376e-05, + "loss": 2.1798, + "step": 6674000 + }, + { + "epoch": 33.07, + "learning_rate": 3.347204510436329e-05, + "loss": 2.1917, + "step": 6674500 + }, + { + "epoch": 33.07, + "learning_rate": 3.347080651793721e-05, + "loss": 2.1751, + "step": 6675000 + }, + { + "epoch": 33.07, + "learning_rate": 3.3469567931511126e-05, + "loss": 2.2065, + "step": 6675500 + }, + { + "epoch": 33.08, + "learning_rate": 3.346832934508504e-05, + "loss": 2.1857, + "step": 6676000 + }, + { + "epoch": 33.08, + "learning_rate": 3.346709075865896e-05, + "loss": 2.1952, + "step": 6676500 + }, + { + "epoch": 33.08, + "learning_rate": 3.346585217223288e-05, + "loss": 2.1908, + "step": 6677000 + }, + { + "epoch": 33.08, + "learning_rate": 3.3464613585806794e-05, + "loss": 2.1971, + "step": 6677500 + }, + { + "epoch": 33.09, + "learning_rate": 3.346337499938071e-05, + "loss": 2.194, + "step": 6678000 + }, + { + "epoch": 33.09, + "learning_rate": 3.346213889012747e-05, + "loss": 2.164, + "step": 6678500 + }, + { + "epoch": 33.09, + "learning_rate": 3.346090030370139e-05, + "loss": 2.1589, + "step": 6679000 + }, + { + "epoch": 33.09, + "learning_rate": 3.345966171727531e-05, + "loss": 2.216, + "step": 6679500 + }, + { + "epoch": 33.1, + "learning_rate": 3.3458423130849224e-05, + "loss": 2.1777, + "step": 6680000 + }, + { + "epoch": 33.1, + "learning_rate": 3.345718454442314e-05, + "loss": 2.2036, + "step": 6680500 + }, + { + "epoch": 33.1, + "learning_rate": 3.345594595799706e-05, + "loss": 2.1821, + "step": 6681000 + }, + { + "epoch": 33.1, + "learning_rate": 3.3454707371570975e-05, + "loss": 2.1714, + "step": 6681500 + }, + { + "epoch": 33.1, + "learning_rate": 3.345346878514489e-05, + "loss": 2.199, + "step": 6682000 + }, + { + "epoch": 33.11, + "learning_rate": 3.345223019871881e-05, + "loss": 2.1905, + "step": 6682500 + }, + { + "epoch": 33.11, + "learning_rate": 3.3450991612292725e-05, + "loss": 2.1875, + "step": 6683000 + }, + { + "epoch": 33.11, + "learning_rate": 3.344975302586664e-05, + "loss": 2.1735, + "step": 6683500 + }, + { + "epoch": 33.11, + "learning_rate": 3.344851443944056e-05, + "loss": 2.2194, + "step": 6684000 + }, + { + "epoch": 33.12, + "learning_rate": 3.3447275853014476e-05, + "loss": 2.1537, + "step": 6684500 + }, + { + "epoch": 33.12, + "learning_rate": 3.3446037266588386e-05, + "loss": 2.209, + "step": 6685000 + }, + { + "epoch": 33.12, + "learning_rate": 3.344480115733516e-05, + "loss": 2.1662, + "step": 6685500 + }, + { + "epoch": 33.12, + "learning_rate": 3.344356257090908e-05, + "loss": 2.1802, + "step": 6686000 + }, + { + "epoch": 33.13, + "learning_rate": 3.344232646165584e-05, + "loss": 2.211, + "step": 6686500 + }, + { + "epoch": 33.13, + "learning_rate": 3.344108787522976e-05, + "loss": 2.1945, + "step": 6687000 + }, + { + "epoch": 33.13, + "learning_rate": 3.3439849288803675e-05, + "loss": 2.1963, + "step": 6687500 + }, + { + "epoch": 33.13, + "learning_rate": 3.343861070237759e-05, + "loss": 2.1963, + "step": 6688000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343737211595151e-05, + "loss": 2.1822, + "step": 6688500 + }, + { + "epoch": 33.14, + "learning_rate": 3.3436133529525425e-05, + "loss": 2.1843, + "step": 6689000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343489494309934e-05, + "loss": 2.1904, + "step": 6689500 + }, + { + "epoch": 33.14, + "learning_rate": 3.343365635667326e-05, + "loss": 2.1861, + "step": 6690000 + }, + { + "epoch": 33.15, + "learning_rate": 3.3432417770247176e-05, + "loss": 2.2003, + "step": 6690500 + }, + { + "epoch": 33.15, + "learning_rate": 3.3431181660993945e-05, + "loss": 2.1901, + "step": 6691000 + }, + { + "epoch": 33.15, + "learning_rate": 3.342994307456786e-05, + "loss": 2.2043, + "step": 6691500 + }, + { + "epoch": 33.15, + "learning_rate": 3.342870448814178e-05, + "loss": 2.198, + "step": 6692000 + }, + { + "epoch": 33.16, + "learning_rate": 3.342746837888854e-05, + "loss": 2.1838, + "step": 6692500 + }, + { + "epoch": 33.16, + "learning_rate": 3.342622979246246e-05, + "loss": 2.196, + "step": 6693000 + }, + { + "epoch": 33.16, + "learning_rate": 3.342499368320923e-05, + "loss": 2.1899, + "step": 6693500 + }, + { + "epoch": 33.16, + "learning_rate": 3.3423757573956e-05, + "loss": 2.1982, + "step": 6694000 + }, + { + "epoch": 33.17, + "learning_rate": 3.342251898752992e-05, + "loss": 2.1961, + "step": 6694500 + }, + { + "epoch": 33.17, + "learning_rate": 3.342128040110383e-05, + "loss": 2.2007, + "step": 6695000 + }, + { + "epoch": 33.17, + "learning_rate": 3.3420041814677746e-05, + "loss": 2.1791, + "step": 6695500 + }, + { + "epoch": 33.17, + "learning_rate": 3.341880322825166e-05, + "loss": 2.2048, + "step": 6696000 + }, + { + "epoch": 33.18, + "learning_rate": 3.341756464182558e-05, + "loss": 2.197, + "step": 6696500 + }, + { + "epoch": 33.18, + "learning_rate": 3.34163260553995e-05, + "loss": 2.2261, + "step": 6697000 + }, + { + "epoch": 33.18, + "learning_rate": 3.3415089946146266e-05, + "loss": 2.1916, + "step": 6697500 + }, + { + "epoch": 33.18, + "learning_rate": 3.3413851359720176e-05, + "loss": 2.1948, + "step": 6698000 + }, + { + "epoch": 33.19, + "learning_rate": 3.341261277329409e-05, + "loss": 2.1977, + "step": 6698500 + }, + { + "epoch": 33.19, + "learning_rate": 3.341137418686801e-05, + "loss": 2.1964, + "step": 6699000 + }, + { + "epoch": 33.19, + "learning_rate": 3.341013560044193e-05, + "loss": 2.2078, + "step": 6699500 + }, + { + "epoch": 33.19, + "learning_rate": 3.3408897014015844e-05, + "loss": 2.1995, + "step": 6700000 + }, + { + "epoch": 33.2, + "learning_rate": 3.340765842758976e-05, + "loss": 2.1763, + "step": 6700500 + }, + { + "epoch": 33.2, + "learning_rate": 3.340641984116368e-05, + "loss": 2.1989, + "step": 6701000 + }, + { + "epoch": 33.2, + "learning_rate": 3.3405181254737595e-05, + "loss": 2.1885, + "step": 6701500 + }, + { + "epoch": 33.2, + "learning_rate": 3.340394266831151e-05, + "loss": 2.1716, + "step": 6702000 + }, + { + "epoch": 33.21, + "learning_rate": 3.340270408188543e-05, + "loss": 2.2069, + "step": 6702500 + }, + { + "epoch": 33.21, + "learning_rate": 3.3401465495459345e-05, + "loss": 2.1995, + "step": 6703000 + }, + { + "epoch": 33.21, + "learning_rate": 3.3400229386206114e-05, + "loss": 2.2251, + "step": 6703500 + }, + { + "epoch": 33.21, + "learning_rate": 3.339899079978003e-05, + "loss": 2.2079, + "step": 6704000 + }, + { + "epoch": 33.22, + "learning_rate": 3.339775221335394e-05, + "loss": 2.1914, + "step": 6704500 + }, + { + "epoch": 33.22, + "learning_rate": 3.339651610410071e-05, + "loss": 2.1867, + "step": 6705000 + }, + { + "epoch": 33.22, + "learning_rate": 3.339527751767463e-05, + "loss": 2.2132, + "step": 6705500 + }, + { + "epoch": 33.22, + "learning_rate": 3.33940414084214e-05, + "loss": 2.1824, + "step": 6706000 + }, + { + "epoch": 33.23, + "learning_rate": 3.339280282199532e-05, + "loss": 2.1999, + "step": 6706500 + }, + { + "epoch": 33.23, + "learning_rate": 3.3391564235569236e-05, + "loss": 2.1785, + "step": 6707000 + }, + { + "epoch": 33.23, + "learning_rate": 3.3390325649143147e-05, + "loss": 2.2085, + "step": 6707500 + }, + { + "epoch": 33.23, + "learning_rate": 3.3389087062717063e-05, + "loss": 2.2031, + "step": 6708000 + }, + { + "epoch": 33.24, + "learning_rate": 3.338784847629098e-05, + "loss": 2.181, + "step": 6708500 + }, + { + "epoch": 33.24, + "learning_rate": 3.33866098898649e-05, + "loss": 2.2132, + "step": 6709000 + }, + { + "epoch": 33.24, + "learning_rate": 3.3385371303438814e-05, + "loss": 2.1918, + "step": 6709500 + }, + { + "epoch": 33.24, + "learning_rate": 3.338413271701273e-05, + "loss": 2.1732, + "step": 6710000 + }, + { + "epoch": 33.25, + "learning_rate": 3.338289413058665e-05, + "loss": 2.1797, + "step": 6710500 + }, + { + "epoch": 33.25, + "learning_rate": 3.3381655544160565e-05, + "loss": 2.2188, + "step": 6711000 + }, + { + "epoch": 33.25, + "learning_rate": 3.3380416957734475e-05, + "loss": 2.1945, + "step": 6711500 + }, + { + "epoch": 33.25, + "learning_rate": 3.3379180848481244e-05, + "loss": 2.1775, + "step": 6712000 + }, + { + "epoch": 33.26, + "learning_rate": 3.337794226205516e-05, + "loss": 2.2078, + "step": 6712500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337670367562908e-05, + "loss": 2.1919, + "step": 6713000 + }, + { + "epoch": 33.26, + "learning_rate": 3.3375465089202995e-05, + "loss": 2.2168, + "step": 6713500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337422650277691e-05, + "loss": 2.1926, + "step": 6714000 + }, + { + "epoch": 33.27, + "learning_rate": 3.337299039352368e-05, + "loss": 2.2111, + "step": 6714500 + }, + { + "epoch": 33.27, + "learning_rate": 3.33717518070976e-05, + "loss": 2.1717, + "step": 6715000 + }, + { + "epoch": 33.27, + "learning_rate": 3.3370515697844366e-05, + "loss": 2.1892, + "step": 6715500 + }, + { + "epoch": 33.27, + "learning_rate": 3.336927711141828e-05, + "loss": 2.1819, + "step": 6716000 + }, + { + "epoch": 33.28, + "learning_rate": 3.33680385249922e-05, + "loss": 2.1656, + "step": 6716500 + }, + { + "epoch": 33.28, + "learning_rate": 3.336680241573897e-05, + "loss": 2.1817, + "step": 6717000 + }, + { + "epoch": 33.28, + "learning_rate": 3.3365563829312886e-05, + "loss": 2.2084, + "step": 6717500 + }, + { + "epoch": 33.28, + "learning_rate": 3.33643252428868e-05, + "loss": 2.1876, + "step": 6718000 + }, + { + "epoch": 33.29, + "learning_rate": 3.336308665646072e-05, + "loss": 2.2138, + "step": 6718500 + }, + { + "epoch": 33.29, + "learning_rate": 3.3361848070034637e-05, + "loss": 2.2026, + "step": 6719000 + }, + { + "epoch": 33.29, + "learning_rate": 3.3360609483608554e-05, + "loss": 2.1882, + "step": 6719500 + }, + { + "epoch": 33.29, + "learning_rate": 3.3359370897182464e-05, + "loss": 2.2171, + "step": 6720000 + }, + { + "epoch": 33.3, + "learning_rate": 3.335813231075638e-05, + "loss": 2.1742, + "step": 6720500 + }, + { + "epoch": 33.3, + "learning_rate": 3.33568937243303e-05, + "loss": 2.2045, + "step": 6721000 + }, + { + "epoch": 33.3, + "learning_rate": 3.3355655137904214e-05, + "loss": 2.2029, + "step": 6721500 + }, + { + "epoch": 33.3, + "learning_rate": 3.335441655147813e-05, + "loss": 2.1792, + "step": 6722000 + }, + { + "epoch": 33.31, + "learning_rate": 3.335317796505205e-05, + "loss": 2.1961, + "step": 6722500 + }, + { + "epoch": 33.31, + "learning_rate": 3.3351939378625965e-05, + "loss": 2.2094, + "step": 6723000 + }, + { + "epoch": 33.31, + "learning_rate": 3.335070079219988e-05, + "loss": 2.2066, + "step": 6723500 + }, + { + "epoch": 33.31, + "learning_rate": 3.33494622057738e-05, + "loss": 2.1974, + "step": 6724000 + }, + { + "epoch": 33.32, + "learning_rate": 3.3348223619347716e-05, + "loss": 2.1959, + "step": 6724500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334698998726734e-05, + "loss": 2.2206, + "step": 6725000 + }, + { + "epoch": 33.32, + "learning_rate": 3.3345751400841254e-05, + "loss": 2.1876, + "step": 6725500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334451281441517e-05, + "loss": 2.1978, + "step": 6726000 + }, + { + "epoch": 33.33, + "learning_rate": 3.334327422798908e-05, + "loss": 2.1713, + "step": 6726500 + }, + { + "epoch": 33.33, + "learning_rate": 3.3342035641563e-05, + "loss": 2.2117, + "step": 6727000 + }, + { + "epoch": 33.33, + "learning_rate": 3.3340799532309766e-05, + "loss": 2.2119, + "step": 6727500 + }, + { + "epoch": 33.33, + "learning_rate": 3.3339560945883683e-05, + "loss": 2.191, + "step": 6728000 + }, + { + "epoch": 33.34, + "learning_rate": 3.33383223594576e-05, + "loss": 2.1935, + "step": 6728500 + }, + { + "epoch": 33.34, + "learning_rate": 3.333708377303152e-05, + "loss": 2.1783, + "step": 6729000 + }, + { + "epoch": 33.34, + "learning_rate": 3.333584518660543e-05, + "loss": 2.2009, + "step": 6729500 + }, + { + "epoch": 33.34, + "learning_rate": 3.3334606600179344e-05, + "loss": 2.1902, + "step": 6730000 + }, + { + "epoch": 33.35, + "learning_rate": 3.333336801375326e-05, + "loss": 2.2013, + "step": 6730500 + }, + { + "epoch": 33.35, + "learning_rate": 3.333212942732718e-05, + "loss": 2.2053, + "step": 6731000 + }, + { + "epoch": 33.35, + "learning_rate": 3.3330893318073954e-05, + "loss": 2.2068, + "step": 6731500 + }, + { + "epoch": 33.35, + "learning_rate": 3.332965473164787e-05, + "loss": 2.1893, + "step": 6732000 + }, + { + "epoch": 33.36, + "learning_rate": 3.332841614522178e-05, + "loss": 2.1971, + "step": 6732500 + }, + { + "epoch": 33.36, + "learning_rate": 3.33271775587957e-05, + "loss": 2.2038, + "step": 6733000 + }, + { + "epoch": 33.36, + "learning_rate": 3.3325938972369615e-05, + "loss": 2.1941, + "step": 6733500 + }, + { + "epoch": 33.36, + "learning_rate": 3.3324705340289235e-05, + "loss": 2.2214, + "step": 6734000 + }, + { + "epoch": 33.37, + "learning_rate": 3.332346675386315e-05, + "loss": 2.1986, + "step": 6734500 + }, + { + "epoch": 33.37, + "learning_rate": 3.332222816743707e-05, + "loss": 2.2197, + "step": 6735000 + }, + { + "epoch": 33.37, + "learning_rate": 3.332099205818384e-05, + "loss": 2.198, + "step": 6735500 + }, + { + "epoch": 33.37, + "learning_rate": 3.3319753471757755e-05, + "loss": 2.2016, + "step": 6736000 + }, + { + "epoch": 33.37, + "learning_rate": 3.331851488533167e-05, + "loss": 2.2062, + "step": 6736500 + }, + { + "epoch": 33.38, + "learning_rate": 3.331727629890559e-05, + "loss": 2.2213, + "step": 6737000 + }, + { + "epoch": 33.38, + "learning_rate": 3.3316037712479506e-05, + "loss": 2.2173, + "step": 6737500 + }, + { + "epoch": 33.38, + "learning_rate": 3.331479912605342e-05, + "loss": 2.196, + "step": 6738000 + }, + { + "epoch": 33.38, + "learning_rate": 3.331356053962734e-05, + "loss": 2.2056, + "step": 6738500 + }, + { + "epoch": 33.39, + "learning_rate": 3.331232195320125e-05, + "loss": 2.2122, + "step": 6739000 + }, + { + "epoch": 33.39, + "learning_rate": 3.331108336677517e-05, + "loss": 2.1967, + "step": 6739500 + }, + { + "epoch": 33.39, + "learning_rate": 3.3309844780349084e-05, + "loss": 2.2116, + "step": 6740000 + }, + { + "epoch": 33.39, + "learning_rate": 3.3308606193923e-05, + "loss": 2.2288, + "step": 6740500 + }, + { + "epoch": 33.4, + "learning_rate": 3.330736760749692e-05, + "loss": 2.1971, + "step": 6741000 + }, + { + "epoch": 33.4, + "learning_rate": 3.3306129021070834e-05, + "loss": 2.2274, + "step": 6741500 + }, + { + "epoch": 33.4, + "learning_rate": 3.33048929118176e-05, + "loss": 2.2, + "step": 6742000 + }, + { + "epoch": 33.4, + "learning_rate": 3.330365432539152e-05, + "loss": 2.2053, + "step": 6742500 + }, + { + "epoch": 33.41, + "learning_rate": 3.330241573896544e-05, + "loss": 2.2427, + "step": 6743000 + }, + { + "epoch": 33.41, + "learning_rate": 3.3301177152539354e-05, + "loss": 2.2006, + "step": 6743500 + }, + { + "epoch": 33.41, + "learning_rate": 3.329994104328612e-05, + "loss": 2.1955, + "step": 6744000 + }, + { + "epoch": 33.41, + "learning_rate": 3.329870245686004e-05, + "loss": 2.2025, + "step": 6744500 + }, + { + "epoch": 33.42, + "learning_rate": 3.329746387043396e-05, + "loss": 2.2052, + "step": 6745000 + }, + { + "epoch": 33.42, + "learning_rate": 3.3296225284007874e-05, + "loss": 2.199, + "step": 6745500 + }, + { + "epoch": 33.42, + "learning_rate": 3.3294986697581784e-05, + "loss": 2.2119, + "step": 6746000 + }, + { + "epoch": 33.42, + "learning_rate": 3.329375058832855e-05, + "loss": 2.1805, + "step": 6746500 + }, + { + "epoch": 33.43, + "learning_rate": 3.329251200190247e-05, + "loss": 2.2214, + "step": 6747000 + }, + { + "epoch": 33.43, + "learning_rate": 3.3291273415476386e-05, + "loss": 2.1932, + "step": 6747500 + }, + { + "epoch": 33.43, + "learning_rate": 3.32900348290503e-05, + "loss": 2.21, + "step": 6748000 + }, + { + "epoch": 33.43, + "learning_rate": 3.328879871979707e-05, + "loss": 2.1815, + "step": 6748500 + }, + { + "epoch": 33.44, + "learning_rate": 3.328756013337099e-05, + "loss": 2.2231, + "step": 6749000 + }, + { + "epoch": 33.44, + "learning_rate": 3.3286321546944906e-05, + "loss": 2.1896, + "step": 6749500 + }, + { + "epoch": 33.44, + "learning_rate": 3.328508296051882e-05, + "loss": 2.1942, + "step": 6750000 + }, + { + "epoch": 33.44, + "learning_rate": 3.328384437409274e-05, + "loss": 2.2055, + "step": 6750500 + }, + { + "epoch": 33.45, + "learning_rate": 3.328260578766666e-05, + "loss": 2.2069, + "step": 6751000 + }, + { + "epoch": 33.45, + "learning_rate": 3.3281367201240574e-05, + "loss": 2.2045, + "step": 6751500 + }, + { + "epoch": 33.45, + "learning_rate": 3.3280131091987336e-05, + "loss": 2.2114, + "step": 6752000 + }, + { + "epoch": 33.45, + "learning_rate": 3.327889250556125e-05, + "loss": 2.2043, + "step": 6752500 + }, + { + "epoch": 33.46, + "learning_rate": 3.327765391913517e-05, + "loss": 2.2053, + "step": 6753000 + }, + { + "epoch": 33.46, + "learning_rate": 3.3276415332709087e-05, + "loss": 2.2035, + "step": 6753500 + }, + { + "epoch": 33.46, + "learning_rate": 3.3275179223455855e-05, + "loss": 2.1932, + "step": 6754000 + }, + { + "epoch": 33.46, + "learning_rate": 3.327394063702977e-05, + "loss": 2.2068, + "step": 6754500 + }, + { + "epoch": 33.47, + "learning_rate": 3.327270205060369e-05, + "loss": 2.1834, + "step": 6755000 + }, + { + "epoch": 33.47, + "learning_rate": 3.3271463464177606e-05, + "loss": 2.1792, + "step": 6755500 + }, + { + "epoch": 33.47, + "learning_rate": 3.327022735492437e-05, + "loss": 2.1915, + "step": 6756000 + }, + { + "epoch": 33.47, + "learning_rate": 3.3268988768498285e-05, + "loss": 2.2222, + "step": 6756500 + }, + { + "epoch": 33.48, + "learning_rate": 3.32677501820722e-05, + "loss": 2.2125, + "step": 6757000 + }, + { + "epoch": 33.48, + "learning_rate": 3.326651159564612e-05, + "loss": 2.1967, + "step": 6757500 + }, + { + "epoch": 33.48, + "learning_rate": 3.3265273009220036e-05, + "loss": 2.2272, + "step": 6758000 + }, + { + "epoch": 33.48, + "learning_rate": 3.326403442279395e-05, + "loss": 2.2209, + "step": 6758500 + }, + { + "epoch": 33.49, + "learning_rate": 3.326279831354072e-05, + "loss": 2.2223, + "step": 6759000 + }, + { + "epoch": 33.49, + "learning_rate": 3.326155972711464e-05, + "loss": 2.21, + "step": 6759500 + }, + { + "epoch": 33.49, + "learning_rate": 3.3260321140688556e-05, + "loss": 2.2063, + "step": 6760000 + }, + { + "epoch": 33.49, + "learning_rate": 3.325908255426247e-05, + "loss": 2.2272, + "step": 6760500 + }, + { + "epoch": 33.5, + "learning_rate": 3.325784396783639e-05, + "loss": 2.1844, + "step": 6761000 + }, + { + "epoch": 33.5, + "learning_rate": 3.3256605381410306e-05, + "loss": 2.2001, + "step": 6761500 + }, + { + "epoch": 33.5, + "learning_rate": 3.3255369272157075e-05, + "loss": 2.1846, + "step": 6762000 + }, + { + "epoch": 33.5, + "learning_rate": 3.3254133162903844e-05, + "loss": 2.2296, + "step": 6762500 + }, + { + "epoch": 33.51, + "learning_rate": 3.325289457647776e-05, + "loss": 2.2138, + "step": 6763000 + }, + { + "epoch": 33.51, + "learning_rate": 3.325165599005168e-05, + "loss": 2.2029, + "step": 6763500 + }, + { + "epoch": 33.51, + "learning_rate": 3.3250417403625595e-05, + "loss": 2.1842, + "step": 6764000 + }, + { + "epoch": 33.51, + "learning_rate": 3.3249181294372364e-05, + "loss": 2.216, + "step": 6764500 + }, + { + "epoch": 33.52, + "learning_rate": 3.324794270794628e-05, + "loss": 2.2137, + "step": 6765000 + }, + { + "epoch": 33.52, + "learning_rate": 3.32467041215202e-05, + "loss": 2.1953, + "step": 6765500 + }, + { + "epoch": 33.52, + "learning_rate": 3.324546801226696e-05, + "loss": 2.209, + "step": 6766000 + }, + { + "epoch": 33.52, + "learning_rate": 3.3244229425840876e-05, + "loss": 2.2116, + "step": 6766500 + }, + { + "epoch": 33.53, + "learning_rate": 3.324299083941479e-05, + "loss": 2.2116, + "step": 6767000 + }, + { + "epoch": 33.53, + "learning_rate": 3.324175225298871e-05, + "loss": 2.2068, + "step": 6767500 + }, + { + "epoch": 33.53, + "learning_rate": 3.324051366656263e-05, + "loss": 2.1877, + "step": 6768000 + }, + { + "epoch": 33.53, + "learning_rate": 3.3239275080136544e-05, + "loss": 2.2073, + "step": 6768500 + }, + { + "epoch": 33.54, + "learning_rate": 3.323803649371046e-05, + "loss": 2.1873, + "step": 6769000 + }, + { + "epoch": 33.54, + "learning_rate": 3.323679790728438e-05, + "loss": 2.2067, + "step": 6769500 + }, + { + "epoch": 33.54, + "learning_rate": 3.3235559320858295e-05, + "loss": 2.1925, + "step": 6770000 + }, + { + "epoch": 33.54, + "learning_rate": 3.3234320734432205e-05, + "loss": 2.2227, + "step": 6770500 + }, + { + "epoch": 33.55, + "learning_rate": 3.323308214800612e-05, + "loss": 2.1975, + "step": 6771000 + }, + { + "epoch": 33.55, + "learning_rate": 3.323184356158004e-05, + "loss": 2.1838, + "step": 6771500 + }, + { + "epoch": 33.55, + "learning_rate": 3.3230604975153956e-05, + "loss": 2.2219, + "step": 6772000 + }, + { + "epoch": 33.55, + "learning_rate": 3.322936638872787e-05, + "loss": 2.1908, + "step": 6772500 + }, + { + "epoch": 33.56, + "learning_rate": 3.322812780230179e-05, + "loss": 2.1813, + "step": 6773000 + }, + { + "epoch": 33.56, + "learning_rate": 3.3226889215875707e-05, + "loss": 2.1919, + "step": 6773500 + }, + { + "epoch": 33.56, + "learning_rate": 3.3225650629449623e-05, + "loss": 2.2287, + "step": 6774000 + }, + { + "epoch": 33.56, + "learning_rate": 3.322441204302354e-05, + "loss": 2.2185, + "step": 6774500 + }, + { + "epoch": 33.57, + "learning_rate": 3.322317345659746e-05, + "loss": 2.1899, + "step": 6775000 + }, + { + "epoch": 33.57, + "learning_rate": 3.3221934870171374e-05, + "loss": 2.204, + "step": 6775500 + }, + { + "epoch": 33.57, + "learning_rate": 3.322069628374529e-05, + "loss": 2.2152, + "step": 6776000 + }, + { + "epoch": 33.57, + "learning_rate": 3.321945769731921e-05, + "loss": 2.1904, + "step": 6776500 + }, + { + "epoch": 33.58, + "learning_rate": 3.3218219110893125e-05, + "loss": 2.2197, + "step": 6777000 + }, + { + "epoch": 33.58, + "learning_rate": 3.321698052446704e-05, + "loss": 2.1919, + "step": 6777500 + }, + { + "epoch": 33.58, + "learning_rate": 3.321574193804096e-05, + "loss": 2.2174, + "step": 6778000 + }, + { + "epoch": 33.58, + "learning_rate": 3.3214503351614876e-05, + "loss": 2.1984, + "step": 6778500 + }, + { + "epoch": 33.59, + "learning_rate": 3.321326971953449e-05, + "loss": 2.2013, + "step": 6779000 + }, + { + "epoch": 33.59, + "learning_rate": 3.3212033610281265e-05, + "loss": 2.2408, + "step": 6779500 + }, + { + "epoch": 33.59, + "learning_rate": 3.3210795023855175e-05, + "loss": 2.2103, + "step": 6780000 + }, + { + "epoch": 33.59, + "learning_rate": 3.320955643742909e-05, + "loss": 2.1746, + "step": 6780500 + }, + { + "epoch": 33.6, + "learning_rate": 3.320831785100301e-05, + "loss": 2.2278, + "step": 6781000 + }, + { + "epoch": 33.6, + "learning_rate": 3.3207079264576926e-05, + "loss": 2.1785, + "step": 6781500 + }, + { + "epoch": 33.6, + "learning_rate": 3.320584067815084e-05, + "loss": 2.2099, + "step": 6782000 + }, + { + "epoch": 33.6, + "learning_rate": 3.320460209172476e-05, + "loss": 2.1942, + "step": 6782500 + }, + { + "epoch": 33.61, + "learning_rate": 3.320336845964438e-05, + "loss": 2.2025, + "step": 6783000 + }, + { + "epoch": 33.61, + "learning_rate": 3.32021298732183e-05, + "loss": 2.2365, + "step": 6783500 + }, + { + "epoch": 33.61, + "learning_rate": 3.3200891286792215e-05, + "loss": 2.2065, + "step": 6784000 + }, + { + "epoch": 33.61, + "learning_rate": 3.319965270036613e-05, + "loss": 2.1954, + "step": 6784500 + }, + { + "epoch": 33.62, + "learning_rate": 3.319841411394005e-05, + "loss": 2.2004, + "step": 6785000 + }, + { + "epoch": 33.62, + "learning_rate": 3.3197175527513965e-05, + "loss": 2.211, + "step": 6785500 + }, + { + "epoch": 33.62, + "learning_rate": 3.319593694108788e-05, + "loss": 2.2276, + "step": 6786000 + }, + { + "epoch": 33.62, + "learning_rate": 3.319469835466179e-05, + "loss": 2.2108, + "step": 6786500 + }, + { + "epoch": 33.63, + "learning_rate": 3.319345976823571e-05, + "loss": 2.2176, + "step": 6787000 + }, + { + "epoch": 33.63, + "learning_rate": 3.3192221181809626e-05, + "loss": 2.2349, + "step": 6787500 + }, + { + "epoch": 33.63, + "learning_rate": 3.3190985072556395e-05, + "loss": 2.2048, + "step": 6788000 + }, + { + "epoch": 33.63, + "learning_rate": 3.318974648613031e-05, + "loss": 2.1891, + "step": 6788500 + }, + { + "epoch": 33.64, + "learning_rate": 3.318850789970423e-05, + "loss": 2.2304, + "step": 6789000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318726931327814e-05, + "loss": 2.21, + "step": 6789500 + }, + { + "epoch": 33.64, + "learning_rate": 3.3186030726852056e-05, + "loss": 2.1899, + "step": 6790000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318479214042597e-05, + "loss": 2.2241, + "step": 6790500 + }, + { + "epoch": 33.64, + "learning_rate": 3.318355355399989e-05, + "loss": 2.2191, + "step": 6791000 + }, + { + "epoch": 33.65, + "learning_rate": 3.318231496757381e-05, + "loss": 2.2102, + "step": 6791500 + }, + { + "epoch": 33.65, + "learning_rate": 3.318107885832058e-05, + "loss": 2.1883, + "step": 6792000 + }, + { + "epoch": 33.65, + "learning_rate": 3.317984027189449e-05, + "loss": 2.1964, + "step": 6792500 + }, + { + "epoch": 33.65, + "learning_rate": 3.317860168546841e-05, + "loss": 2.2054, + "step": 6793000 + }, + { + "epoch": 33.66, + "learning_rate": 3.3177363099042327e-05, + "loss": 2.1987, + "step": 6793500 + }, + { + "epoch": 33.66, + "learning_rate": 3.3176126989789095e-05, + "loss": 2.2227, + "step": 6794000 + }, + { + "epoch": 33.66, + "learning_rate": 3.317488840336301e-05, + "loss": 2.217, + "step": 6794500 + }, + { + "epoch": 33.66, + "learning_rate": 3.317364981693693e-05, + "loss": 2.1929, + "step": 6795000 + }, + { + "epoch": 33.67, + "learning_rate": 3.317241123051084e-05, + "loss": 2.2129, + "step": 6795500 + }, + { + "epoch": 33.67, + "learning_rate": 3.3171172644084756e-05, + "loss": 2.2122, + "step": 6796000 + }, + { + "epoch": 33.67, + "learning_rate": 3.316993405765867e-05, + "loss": 2.2232, + "step": 6796500 + }, + { + "epoch": 33.67, + "learning_rate": 3.316869547123259e-05, + "loss": 2.2027, + "step": 6797000 + }, + { + "epoch": 33.68, + "learning_rate": 3.316745688480651e-05, + "loss": 2.1981, + "step": 6797500 + }, + { + "epoch": 33.68, + "learning_rate": 3.3166218298380424e-05, + "loss": 2.1982, + "step": 6798000 + }, + { + "epoch": 33.68, + "learning_rate": 3.316497971195434e-05, + "loss": 2.1927, + "step": 6798500 + }, + { + "epoch": 33.68, + "learning_rate": 3.316374112552826e-05, + "loss": 2.1843, + "step": 6799000 + }, + { + "epoch": 33.69, + "learning_rate": 3.3162502539102175e-05, + "loss": 2.2093, + "step": 6799500 + }, + { + "epoch": 33.69, + "learning_rate": 3.3161266429848944e-05, + "loss": 2.1841, + "step": 6800000 + }, + { + "epoch": 33.69, + "learning_rate": 3.316003032059571e-05, + "loss": 2.2062, + "step": 6800500 + }, + { + "epoch": 33.69, + "learning_rate": 3.315879173416963e-05, + "loss": 2.1887, + "step": 6801000 + }, + { + "epoch": 33.7, + "learning_rate": 3.3157553147743546e-05, + "loss": 2.2124, + "step": 6801500 + }, + { + "epoch": 33.7, + "learning_rate": 3.3156314561317456e-05, + "loss": 2.18, + "step": 6802000 + }, + { + "epoch": 33.7, + "learning_rate": 3.315507597489137e-05, + "loss": 2.2071, + "step": 6802500 + }, + { + "epoch": 33.7, + "learning_rate": 3.315383986563815e-05, + "loss": 2.1993, + "step": 6803000 + }, + { + "epoch": 33.71, + "learning_rate": 3.315260375638492e-05, + "loss": 2.1961, + "step": 6803500 + }, + { + "epoch": 33.71, + "learning_rate": 3.3151365169958835e-05, + "loss": 2.1915, + "step": 6804000 + }, + { + "epoch": 33.71, + "learning_rate": 3.3150126583532745e-05, + "loss": 2.1792, + "step": 6804500 + }, + { + "epoch": 33.71, + "learning_rate": 3.314888799710666e-05, + "loss": 2.192, + "step": 6805000 + }, + { + "epoch": 33.72, + "learning_rate": 3.314764941068058e-05, + "loss": 2.1913, + "step": 6805500 + }, + { + "epoch": 33.72, + "learning_rate": 3.3146410824254496e-05, + "loss": 2.1953, + "step": 6806000 + }, + { + "epoch": 33.72, + "learning_rate": 3.314517223782841e-05, + "loss": 2.1918, + "step": 6806500 + }, + { + "epoch": 33.72, + "learning_rate": 3.314393365140233e-05, + "loss": 2.189, + "step": 6807000 + }, + { + "epoch": 33.73, + "learning_rate": 3.31426975421491e-05, + "loss": 2.1915, + "step": 6807500 + }, + { + "epoch": 33.73, + "learning_rate": 3.314146143289587e-05, + "loss": 2.2173, + "step": 6808000 + }, + { + "epoch": 33.73, + "learning_rate": 3.3140222846469784e-05, + "loss": 2.2431, + "step": 6808500 + }, + { + "epoch": 33.73, + "learning_rate": 3.31389842600437e-05, + "loss": 2.2125, + "step": 6809000 + }, + { + "epoch": 33.74, + "learning_rate": 3.313774815079046e-05, + "loss": 2.224, + "step": 6809500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313651204153724e-05, + "loss": 2.2055, + "step": 6810000 + }, + { + "epoch": 33.74, + "learning_rate": 3.3135273455111155e-05, + "loss": 2.21, + "step": 6810500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313403486868507e-05, + "loss": 2.2161, + "step": 6811000 + }, + { + "epoch": 33.75, + "learning_rate": 3.313279628225899e-05, + "loss": 2.1847, + "step": 6811500 + }, + { + "epoch": 33.75, + "learning_rate": 3.31315576958329e-05, + "loss": 2.2189, + "step": 6812000 + }, + { + "epoch": 33.75, + "learning_rate": 3.3130319109406816e-05, + "loss": 2.2207, + "step": 6812500 + }, + { + "epoch": 33.75, + "learning_rate": 3.312908052298073e-05, + "loss": 2.1814, + "step": 6813000 + }, + { + "epoch": 33.76, + "learning_rate": 3.312784193655465e-05, + "loss": 2.2075, + "step": 6813500 + }, + { + "epoch": 33.76, + "learning_rate": 3.312660335012857e-05, + "loss": 2.2068, + "step": 6814000 + }, + { + "epoch": 33.76, + "learning_rate": 3.3125364763702484e-05, + "loss": 2.2041, + "step": 6814500 + }, + { + "epoch": 33.76, + "learning_rate": 3.3124128654449246e-05, + "loss": 2.2088, + "step": 6815000 + }, + { + "epoch": 33.77, + "learning_rate": 3.312289006802316e-05, + "loss": 2.2029, + "step": 6815500 + }, + { + "epoch": 33.77, + "learning_rate": 3.312165148159708e-05, + "loss": 2.1921, + "step": 6816000 + }, + { + "epoch": 33.77, + "learning_rate": 3.3120412895171e-05, + "loss": 2.216, + "step": 6816500 + }, + { + "epoch": 33.77, + "learning_rate": 3.3119174308744914e-05, + "loss": 2.195, + "step": 6817000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311793572231883e-05, + "loss": 2.2238, + "step": 6817500 + }, + { + "epoch": 33.78, + "learning_rate": 3.311669713589275e-05, + "loss": 2.2144, + "step": 6818000 + }, + { + "epoch": 33.78, + "learning_rate": 3.3115458549466665e-05, + "loss": 2.2092, + "step": 6818500 + }, + { + "epoch": 33.78, + "learning_rate": 3.3114222440213433e-05, + "loss": 2.2108, + "step": 6819000 + }, + { + "epoch": 33.79, + "learning_rate": 3.311298385378735e-05, + "loss": 2.2043, + "step": 6819500 + }, + { + "epoch": 33.79, + "learning_rate": 3.311174526736127e-05, + "loss": 2.1984, + "step": 6820000 + }, + { + "epoch": 33.79, + "learning_rate": 3.3110509158108036e-05, + "loss": 2.1975, + "step": 6820500 + }, + { + "epoch": 33.79, + "learning_rate": 3.310927057168195e-05, + "loss": 2.1769, + "step": 6821000 + }, + { + "epoch": 33.8, + "learning_rate": 3.310803198525586e-05, + "loss": 2.1828, + "step": 6821500 + }, + { + "epoch": 33.8, + "learning_rate": 3.310679339882978e-05, + "loss": 2.2122, + "step": 6822000 + }, + { + "epoch": 33.8, + "learning_rate": 3.31055548124037e-05, + "loss": 2.1925, + "step": 6822500 + }, + { + "epoch": 33.8, + "learning_rate": 3.3104316225977614e-05, + "loss": 2.1946, + "step": 6823000 + }, + { + "epoch": 33.81, + "learning_rate": 3.310307763955153e-05, + "loss": 2.2058, + "step": 6823500 + }, + { + "epoch": 33.81, + "learning_rate": 3.310183905312545e-05, + "loss": 2.2011, + "step": 6824000 + }, + { + "epoch": 33.81, + "learning_rate": 3.3100600466699365e-05, + "loss": 2.2053, + "step": 6824500 + }, + { + "epoch": 33.81, + "learning_rate": 3.3099364357446134e-05, + "loss": 2.2091, + "step": 6825000 + }, + { + "epoch": 33.82, + "learning_rate": 3.309812577102005e-05, + "loss": 2.2024, + "step": 6825500 + }, + { + "epoch": 33.82, + "learning_rate": 3.309688718459397e-05, + "loss": 2.1989, + "step": 6826000 + }, + { + "epoch": 33.82, + "learning_rate": 3.3095648598167884e-05, + "loss": 2.197, + "step": 6826500 + }, + { + "epoch": 33.82, + "learning_rate": 3.30944100117418e-05, + "loss": 2.2224, + "step": 6827000 + }, + { + "epoch": 33.83, + "learning_rate": 3.309317142531572e-05, + "loss": 2.2263, + "step": 6827500 + }, + { + "epoch": 33.83, + "learning_rate": 3.3091932838889635e-05, + "loss": 2.2002, + "step": 6828000 + }, + { + "epoch": 33.83, + "learning_rate": 3.309069425246355e-05, + "loss": 2.2009, + "step": 6828500 + }, + { + "epoch": 33.83, + "learning_rate": 3.308945566603747e-05, + "loss": 2.2113, + "step": 6829000 + }, + { + "epoch": 33.84, + "learning_rate": 3.3088217079611386e-05, + "loss": 2.2273, + "step": 6829500 + }, + { + "epoch": 33.84, + "learning_rate": 3.30869784931853e-05, + "loss": 2.2292, + "step": 6830000 + }, + { + "epoch": 33.84, + "learning_rate": 3.308573990675922e-05, + "loss": 2.1992, + "step": 6830500 + }, + { + "epoch": 33.84, + "learning_rate": 3.308450132033314e-05, + "loss": 2.217, + "step": 6831000 + }, + { + "epoch": 33.85, + "learning_rate": 3.308326273390705e-05, + "loss": 2.2222, + "step": 6831500 + }, + { + "epoch": 33.85, + "learning_rate": 3.3082026624653816e-05, + "loss": 2.1777, + "step": 6832000 + }, + { + "epoch": 33.85, + "learning_rate": 3.308078803822773e-05, + "loss": 2.1957, + "step": 6832500 + }, + { + "epoch": 33.85, + "learning_rate": 3.30795519289745e-05, + "loss": 2.2035, + "step": 6833000 + }, + { + "epoch": 33.86, + "learning_rate": 3.307831334254842e-05, + "loss": 2.2322, + "step": 6833500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3077074756122335e-05, + "loss": 2.2006, + "step": 6834000 + }, + { + "epoch": 33.86, + "learning_rate": 3.307583616969625e-05, + "loss": 2.2075, + "step": 6834500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3074600060443014e-05, + "loss": 2.2022, + "step": 6835000 + }, + { + "epoch": 33.87, + "learning_rate": 3.307336147401693e-05, + "loss": 2.239, + "step": 6835500 + }, + { + "epoch": 33.87, + "learning_rate": 3.307212288759085e-05, + "loss": 2.2284, + "step": 6836000 + }, + { + "epoch": 33.87, + "learning_rate": 3.3070884301164765e-05, + "loss": 2.2016, + "step": 6836500 + }, + { + "epoch": 33.87, + "learning_rate": 3.3069648191911534e-05, + "loss": 2.2318, + "step": 6837000 + }, + { + "epoch": 33.88, + "learning_rate": 3.306840960548545e-05, + "loss": 2.2189, + "step": 6837500 + }, + { + "epoch": 33.88, + "learning_rate": 3.306717101905937e-05, + "loss": 2.2088, + "step": 6838000 + }, + { + "epoch": 33.88, + "learning_rate": 3.3065932432633285e-05, + "loss": 2.215, + "step": 6838500 + }, + { + "epoch": 33.88, + "learning_rate": 3.30646938462072e-05, + "loss": 2.2143, + "step": 6839000 + }, + { + "epoch": 33.89, + "learning_rate": 3.306345525978112e-05, + "loss": 2.2258, + "step": 6839500 + }, + { + "epoch": 33.89, + "learning_rate": 3.3062216673355035e-05, + "loss": 2.2073, + "step": 6840000 + }, + { + "epoch": 33.89, + "learning_rate": 3.306097808692895e-05, + "loss": 2.2141, + "step": 6840500 + }, + { + "epoch": 33.89, + "learning_rate": 3.305973950050287e-05, + "loss": 2.2215, + "step": 6841000 + }, + { + "epoch": 33.9, + "learning_rate": 3.3058500914076786e-05, + "loss": 2.2281, + "step": 6841500 + }, + { + "epoch": 33.9, + "learning_rate": 3.30572623276507e-05, + "loss": 2.1636, + "step": 6842000 + }, + { + "epoch": 33.9, + "learning_rate": 3.305602374122462e-05, + "loss": 2.2143, + "step": 6842500 + }, + { + "epoch": 33.9, + "learning_rate": 3.305478515479854e-05, + "loss": 2.2349, + "step": 6843000 + }, + { + "epoch": 33.91, + "learning_rate": 3.30535490455453e-05, + "loss": 2.2016, + "step": 6843500 + }, + { + "epoch": 33.91, + "learning_rate": 3.305231293629207e-05, + "loss": 2.1975, + "step": 6844000 + }, + { + "epoch": 33.91, + "learning_rate": 3.3051074349865985e-05, + "loss": 2.2012, + "step": 6844500 + }, + { + "epoch": 33.91, + "learning_rate": 3.30498357634399e-05, + "loss": 2.2201, + "step": 6845000 + }, + { + "epoch": 33.91, + "learning_rate": 3.304859717701382e-05, + "loss": 2.1905, + "step": 6845500 + }, + { + "epoch": 33.92, + "learning_rate": 3.3047358590587735e-05, + "loss": 2.2118, + "step": 6846000 + }, + { + "epoch": 33.92, + "learning_rate": 3.304612000416165e-05, + "loss": 2.2002, + "step": 6846500 + }, + { + "epoch": 33.92, + "learning_rate": 3.304488141773557e-05, + "loss": 2.2046, + "step": 6847000 + }, + { + "epoch": 33.92, + "learning_rate": 3.3043642831309486e-05, + "loss": 2.192, + "step": 6847500 + }, + { + "epoch": 33.93, + "learning_rate": 3.304240672205625e-05, + "loss": 2.2318, + "step": 6848000 + }, + { + "epoch": 33.93, + "learning_rate": 3.3041168135630165e-05, + "loss": 2.2128, + "step": 6848500 + }, + { + "epoch": 33.93, + "learning_rate": 3.303992954920408e-05, + "loss": 2.2345, + "step": 6849000 + }, + { + "epoch": 33.93, + "learning_rate": 3.303869343995085e-05, + "loss": 2.2106, + "step": 6849500 + }, + { + "epoch": 33.94, + "learning_rate": 3.303745485352477e-05, + "loss": 2.2147, + "step": 6850000 + }, + { + "epoch": 33.94, + "learning_rate": 3.3036216267098685e-05, + "loss": 2.2091, + "step": 6850500 + }, + { + "epoch": 33.94, + "learning_rate": 3.30349776806726e-05, + "loss": 2.1914, + "step": 6851000 + }, + { + "epoch": 33.94, + "learning_rate": 3.303373909424652e-05, + "loss": 2.2098, + "step": 6851500 + }, + { + "epoch": 33.95, + "learning_rate": 3.3032500507820436e-05, + "loss": 2.1919, + "step": 6852000 + }, + { + "epoch": 33.95, + "learning_rate": 3.303126192139435e-05, + "loss": 2.1861, + "step": 6852500 + }, + { + "epoch": 33.95, + "learning_rate": 3.303002828931397e-05, + "loss": 2.202, + "step": 6853000 + }, + { + "epoch": 33.95, + "learning_rate": 3.302878970288789e-05, + "loss": 2.2175, + "step": 6853500 + }, + { + "epoch": 33.96, + "learning_rate": 3.302755111646181e-05, + "loss": 2.2061, + "step": 6854000 + }, + { + "epoch": 33.96, + "learning_rate": 3.3026312530035724e-05, + "loss": 2.1985, + "step": 6854500 + }, + { + "epoch": 33.96, + "learning_rate": 3.302507394360964e-05, + "loss": 2.2186, + "step": 6855000 + }, + { + "epoch": 33.96, + "learning_rate": 3.302383783435641e-05, + "loss": 2.2056, + "step": 6855500 + }, + { + "epoch": 33.97, + "learning_rate": 3.302259924793033e-05, + "loss": 2.2069, + "step": 6856000 + }, + { + "epoch": 33.97, + "learning_rate": 3.3021360661504244e-05, + "loss": 2.2249, + "step": 6856500 + }, + { + "epoch": 33.97, + "learning_rate": 3.302012207507816e-05, + "loss": 2.2189, + "step": 6857000 + }, + { + "epoch": 33.97, + "learning_rate": 3.301888348865208e-05, + "loss": 2.2219, + "step": 6857500 + }, + { + "epoch": 33.98, + "learning_rate": 3.3017644902225994e-05, + "loss": 2.1775, + "step": 6858000 + }, + { + "epoch": 33.98, + "learning_rate": 3.301640631579991e-05, + "loss": 2.1861, + "step": 6858500 + }, + { + "epoch": 33.98, + "learning_rate": 3.301516772937382e-05, + "loss": 2.2028, + "step": 6859000 + }, + { + "epoch": 33.98, + "learning_rate": 3.301393162012059e-05, + "loss": 2.1903, + "step": 6859500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301269303369451e-05, + "loss": 2.2199, + "step": 6860000 + }, + { + "epoch": 33.99, + "learning_rate": 3.3011454447268424e-05, + "loss": 2.2266, + "step": 6860500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301021586084234e-05, + "loss": 2.223, + "step": 6861000 + }, + { + "epoch": 33.99, + "learning_rate": 3.300897975158911e-05, + "loss": 2.2017, + "step": 6861500 + }, + { + "epoch": 34.0, + "learning_rate": 3.300774116516303e-05, + "loss": 2.2275, + "step": 6862000 + }, + { + "epoch": 34.0, + "learning_rate": 3.3006502578736944e-05, + "loss": 2.1995, + "step": 6862500 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.6615834582009188, + "eval_accuracy_mlm": 0.6176810786763514, + "eval_accuracy_nsp": 0.8686612357280975, + "eval_loss": 2.2982945442199707, + "eval_runtime": 145.6589, + "eval_samples_per_second": 1750.384, + "eval_steps_per_second": 72.938, + "step": 6862662 + } + ], + "max_steps": 20184300, + "num_train_epochs": 100, + "total_flos": 8.884340001928284e+18, + "trial_name": null, + "trial_params": null +}