{ "best_metric": null, "best_model_checkpoint": null, "epoch": 70.0, "global_step": 14129010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9998768845092475e-05, "loss": 3.3856, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999753025866639e-05, "loss": 3.1861, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.999629167224031e-05, "loss": 3.1416, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9995053085814226e-05, "loss": 3.0688, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.999381449938814e-05, "loss": 3.0467, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.999257591296206e-05, "loss": 3.0166, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.999133980370883e-05, "loss": 3.0395, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.9990101217282745e-05, "loss": 2.9982, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.9988862630856655e-05, "loss": 2.9531, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.9987626521603424e-05, "loss": 2.985, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.998638793517734e-05, "loss": 2.9528, "step": 5500 }, { "epoch": 0.03, "learning_rate": 4.998514934875126e-05, "loss": 2.9339, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.9983910762325175e-05, "loss": 2.9353, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.998267217589909e-05, "loss": 2.9251, "step": 7000 }, { "epoch": 0.04, "learning_rate": 4.998143358947301e-05, "loss": 2.9249, "step": 7500 }, { "epoch": 0.04, "learning_rate": 4.9980195003046926e-05, "loss": 2.922, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.997895641662084e-05, "loss": 2.9129, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.997771783019476e-05, "loss": 2.8805, "step": 9000 }, { "epoch": 0.05, "learning_rate": 4.997647924376868e-05, "loss": 2.8891, "step": 9500 }, { "epoch": 0.05, "learning_rate": 4.9975240657342594e-05, "loss": 2.8833, "step": 10000 }, { "epoch": 0.05, "learning_rate": 4.997400207091651e-05, "loss": 2.9069, "step": 10500 }, { "epoch": 0.05, "learning_rate": 4.997276596166328e-05, "loss": 2.8774, "step": 11000 }, { "epoch": 0.06, "learning_rate": 4.997152737523719e-05, "loss": 2.8608, "step": 11500 }, { "epoch": 0.06, "learning_rate": 4.9970288788811106e-05, "loss": 2.8853, "step": 12000 }, { "epoch": 0.06, "learning_rate": 4.996905020238502e-05, "loss": 2.854, "step": 12500 }, { "epoch": 0.06, "learning_rate": 4.996781161595894e-05, "loss": 2.8447, "step": 13000 }, { "epoch": 0.07, "learning_rate": 4.996657550670571e-05, "loss": 2.8466, "step": 13500 }, { "epoch": 0.07, "learning_rate": 4.9965336920279626e-05, "loss": 2.8537, "step": 14000 }, { "epoch": 0.07, "learning_rate": 4.996409833385354e-05, "loss": 2.8332, "step": 14500 }, { "epoch": 0.07, "learning_rate": 4.996285974742746e-05, "loss": 2.8452, "step": 15000 }, { "epoch": 0.08, "learning_rate": 4.996162363817423e-05, "loss": 2.8432, "step": 15500 }, { "epoch": 0.08, "learning_rate": 4.9960385051748146e-05, "loss": 2.846, "step": 16000 }, { "epoch": 0.08, "learning_rate": 4.995914646532206e-05, "loss": 2.8418, "step": 16500 }, { "epoch": 0.08, "learning_rate": 4.995790787889598e-05, "loss": 2.8401, "step": 17000 }, { "epoch": 0.09, "learning_rate": 4.9956669292469896e-05, "loss": 2.8253, "step": 17500 }, { "epoch": 0.09, "learning_rate": 4.9955430706043807e-05, "loss": 2.8343, "step": 18000 }, { "epoch": 0.09, "learning_rate": 4.9954192119617723e-05, "loss": 2.8231, "step": 18500 }, { "epoch": 0.09, "learning_rate": 4.995295353319164e-05, "loss": 2.7981, "step": 19000 }, { "epoch": 0.1, "learning_rate": 4.995171742393841e-05, "loss": 2.8358, "step": 19500 }, { "epoch": 0.1, "learning_rate": 4.9950478837512326e-05, "loss": 2.825, "step": 20000 }, { "epoch": 0.1, "learning_rate": 4.994924025108624e-05, "loss": 2.7965, "step": 20500 }, { "epoch": 0.1, "learning_rate": 4.994800166466016e-05, "loss": 2.8224, "step": 21000 }, { "epoch": 0.11, "learning_rate": 4.9946768032579774e-05, "loss": 2.8108, "step": 21500 }, { "epoch": 0.11, "learning_rate": 4.994552944615369e-05, "loss": 2.8076, "step": 22000 }, { "epoch": 0.11, "learning_rate": 4.994429085972761e-05, "loss": 2.8013, "step": 22500 }, { "epoch": 0.11, "learning_rate": 4.9943052273301525e-05, "loss": 2.818, "step": 23000 }, { "epoch": 0.12, "learning_rate": 4.99418161640483e-05, "loss": 2.8436, "step": 23500 }, { "epoch": 0.12, "learning_rate": 4.994057757762222e-05, "loss": 2.8163, "step": 24000 }, { "epoch": 0.12, "learning_rate": 4.993933899119613e-05, "loss": 2.7991, "step": 24500 }, { "epoch": 0.12, "learning_rate": 4.9938100404770044e-05, "loss": 2.7893, "step": 25000 }, { "epoch": 0.13, "learning_rate": 4.993686181834396e-05, "loss": 2.7931, "step": 25500 }, { "epoch": 0.13, "learning_rate": 4.993562323191788e-05, "loss": 2.8015, "step": 26000 }, { "epoch": 0.13, "learning_rate": 4.9934384645491795e-05, "loss": 2.7764, "step": 26500 }, { "epoch": 0.13, "learning_rate": 4.993314605906571e-05, "loss": 2.7875, "step": 27000 }, { "epoch": 0.14, "learning_rate": 4.993190747263963e-05, "loss": 2.8176, "step": 27500 }, { "epoch": 0.14, "learning_rate": 4.9930668886213546e-05, "loss": 2.7744, "step": 28000 }, { "epoch": 0.14, "learning_rate": 4.992943029978746e-05, "loss": 2.8011, "step": 28500 }, { "epoch": 0.14, "learning_rate": 4.9928194190534225e-05, "loss": 2.7897, "step": 29000 }, { "epoch": 0.15, "learning_rate": 4.992695560410814e-05, "loss": 2.7773, "step": 29500 }, { "epoch": 0.15, "learning_rate": 4.992571701768206e-05, "loss": 2.7895, "step": 30000 }, { "epoch": 0.15, "learning_rate": 4.9924478431255976e-05, "loss": 2.795, "step": 30500 }, { "epoch": 0.15, "learning_rate": 4.992323984482989e-05, "loss": 2.7796, "step": 31000 }, { "epoch": 0.16, "learning_rate": 4.992200125840381e-05, "loss": 2.7868, "step": 31500 }, { "epoch": 0.16, "learning_rate": 4.9920762671977726e-05, "loss": 2.8024, "step": 32000 }, { "epoch": 0.16, "learning_rate": 4.991952408555164e-05, "loss": 2.7609, "step": 32500 }, { "epoch": 0.16, "learning_rate": 4.9918290453471264e-05, "loss": 2.7815, "step": 33000 }, { "epoch": 0.17, "learning_rate": 4.991705186704518e-05, "loss": 2.7409, "step": 33500 }, { "epoch": 0.17, "learning_rate": 4.99158132806191e-05, "loss": 2.7607, "step": 34000 }, { "epoch": 0.17, "learning_rate": 4.9914574694193015e-05, "loss": 2.7929, "step": 34500 }, { "epoch": 0.17, "learning_rate": 4.9913336107766925e-05, "loss": 2.782, "step": 35000 }, { "epoch": 0.18, "learning_rate": 4.99120999985137e-05, "loss": 2.785, "step": 35500 }, { "epoch": 0.18, "learning_rate": 4.991086141208762e-05, "loss": 2.7697, "step": 36000 }, { "epoch": 0.18, "learning_rate": 4.9909622825661534e-05, "loss": 2.7626, "step": 36500 }, { "epoch": 0.18, "learning_rate": 4.99083867164083e-05, "loss": 2.7679, "step": 37000 }, { "epoch": 0.19, "learning_rate": 4.990714812998222e-05, "loss": 2.7565, "step": 37500 }, { "epoch": 0.19, "learning_rate": 4.990590954355614e-05, "loss": 2.7848, "step": 38000 }, { "epoch": 0.19, "learning_rate": 4.99046734343029e-05, "loss": 2.7555, "step": 38500 }, { "epoch": 0.19, "learning_rate": 4.9903434847876816e-05, "loss": 2.7742, "step": 39000 }, { "epoch": 0.2, "learning_rate": 4.990219626145073e-05, "loss": 2.7821, "step": 39500 }, { "epoch": 0.2, "learning_rate": 4.990095767502465e-05, "loss": 2.7616, "step": 40000 }, { "epoch": 0.2, "learning_rate": 4.989971908859857e-05, "loss": 2.7711, "step": 40500 }, { "epoch": 0.2, "learning_rate": 4.9898480502172484e-05, "loss": 2.7499, "step": 41000 }, { "epoch": 0.21, "learning_rate": 4.98972419157464e-05, "loss": 2.7549, "step": 41500 }, { "epoch": 0.21, "learning_rate": 4.989600332932032e-05, "loss": 2.7782, "step": 42000 }, { "epoch": 0.21, "learning_rate": 4.9894764742894234e-05, "loss": 2.748, "step": 42500 }, { "epoch": 0.21, "learning_rate": 4.9893528633641e-05, "loss": 2.7325, "step": 43000 }, { "epoch": 0.22, "learning_rate": 4.989229004721492e-05, "loss": 2.7488, "step": 43500 }, { "epoch": 0.22, "learning_rate": 4.989105146078884e-05, "loss": 2.7606, "step": 44000 }, { "epoch": 0.22, "learning_rate": 4.9889812874362754e-05, "loss": 2.7445, "step": 44500 }, { "epoch": 0.22, "learning_rate": 4.988857428793667e-05, "loss": 2.76, "step": 45000 }, { "epoch": 0.23, "learning_rate": 4.988733570151059e-05, "loss": 2.7375, "step": 45500 }, { "epoch": 0.23, "learning_rate": 4.98860971150845e-05, "loss": 2.7624, "step": 46000 }, { "epoch": 0.23, "learning_rate": 4.9884858528658415e-05, "loss": 2.7588, "step": 46500 }, { "epoch": 0.23, "learning_rate": 4.9883622419405184e-05, "loss": 2.7508, "step": 47000 }, { "epoch": 0.24, "learning_rate": 4.988238631015195e-05, "loss": 2.7562, "step": 47500 }, { "epoch": 0.24, "learning_rate": 4.988114772372587e-05, "loss": 2.7546, "step": 48000 }, { "epoch": 0.24, "learning_rate": 4.9879909137299787e-05, "loss": 2.7657, "step": 48500 }, { "epoch": 0.24, "learning_rate": 4.9878670550873703e-05, "loss": 2.741, "step": 49000 }, { "epoch": 0.25, "learning_rate": 4.9877434441620465e-05, "loss": 2.7813, "step": 49500 }, { "epoch": 0.25, "learning_rate": 4.987619585519438e-05, "loss": 2.7575, "step": 50000 }, { "epoch": 0.25, "learning_rate": 4.98749572687683e-05, "loss": 2.7419, "step": 50500 }, { "epoch": 0.25, "learning_rate": 4.9873718682342216e-05, "loss": 2.7422, "step": 51000 }, { "epoch": 0.26, "learning_rate": 4.987248009591613e-05, "loss": 2.7333, "step": 51500 }, { "epoch": 0.26, "learning_rate": 4.987124150949005e-05, "loss": 2.743, "step": 52000 }, { "epoch": 0.26, "learning_rate": 4.987000540023682e-05, "loss": 2.7217, "step": 52500 }, { "epoch": 0.26, "learning_rate": 4.9868766813810736e-05, "loss": 2.7355, "step": 53000 }, { "epoch": 0.27, "learning_rate": 4.986752822738465e-05, "loss": 2.7393, "step": 53500 }, { "epoch": 0.27, "learning_rate": 4.986628964095857e-05, "loss": 2.7279, "step": 54000 }, { "epoch": 0.27, "learning_rate": 4.986505105453249e-05, "loss": 2.7535, "step": 54500 }, { "epoch": 0.27, "learning_rate": 4.9863812468106404e-05, "loss": 2.7389, "step": 55000 }, { "epoch": 0.27, "learning_rate": 4.986257635885317e-05, "loss": 2.7488, "step": 55500 }, { "epoch": 0.28, "learning_rate": 4.986134024959994e-05, "loss": 2.7452, "step": 56000 }, { "epoch": 0.28, "learning_rate": 4.986010166317385e-05, "loss": 2.7503, "step": 56500 }, { "epoch": 0.28, "learning_rate": 4.985886307674777e-05, "loss": 2.7213, "step": 57000 }, { "epoch": 0.28, "learning_rate": 4.9857624490321685e-05, "loss": 2.7421, "step": 57500 }, { "epoch": 0.29, "learning_rate": 4.98563859038956e-05, "loss": 2.7295, "step": 58000 }, { "epoch": 0.29, "learning_rate": 4.985514731746952e-05, "loss": 2.7164, "step": 58500 }, { "epoch": 0.29, "learning_rate": 4.9853908731043436e-05, "loss": 2.7262, "step": 59000 }, { "epoch": 0.29, "learning_rate": 4.985267014461735e-05, "loss": 2.734, "step": 59500 }, { "epoch": 0.3, "learning_rate": 4.985143155819127e-05, "loss": 2.7186, "step": 60000 }, { "epoch": 0.3, "learning_rate": 4.985019297176519e-05, "loss": 2.739, "step": 60500 }, { "epoch": 0.3, "learning_rate": 4.9848954385339104e-05, "loss": 2.7356, "step": 61000 }, { "epoch": 0.3, "learning_rate": 4.984771579891302e-05, "loss": 2.7406, "step": 61500 }, { "epoch": 0.31, "learning_rate": 4.984647968965979e-05, "loss": 2.7167, "step": 62000 }, { "epoch": 0.31, "learning_rate": 4.9845241103233706e-05, "loss": 2.6958, "step": 62500 }, { "epoch": 0.31, "learning_rate": 4.984400499398047e-05, "loss": 2.7223, "step": 63000 }, { "epoch": 0.31, "learning_rate": 4.9842766407554385e-05, "loss": 2.7279, "step": 63500 }, { "epoch": 0.32, "learning_rate": 4.98415278211283e-05, "loss": 2.7186, "step": 64000 }, { "epoch": 0.32, "learning_rate": 4.984029171187508e-05, "loss": 2.7359, "step": 64500 }, { "epoch": 0.32, "learning_rate": 4.9839053125448995e-05, "loss": 2.7288, "step": 65000 }, { "epoch": 0.32, "learning_rate": 4.983781453902291e-05, "loss": 2.7157, "step": 65500 }, { "epoch": 0.33, "learning_rate": 4.983657595259682e-05, "loss": 2.7194, "step": 66000 }, { "epoch": 0.33, "learning_rate": 4.983533736617074e-05, "loss": 2.7106, "step": 66500 }, { "epoch": 0.33, "learning_rate": 4.9834098779744656e-05, "loss": 2.7313, "step": 67000 }, { "epoch": 0.33, "learning_rate": 4.983286019331857e-05, "loss": 2.7072, "step": 67500 }, { "epoch": 0.34, "learning_rate": 4.983162160689249e-05, "loss": 2.7392, "step": 68000 }, { "epoch": 0.34, "learning_rate": 4.9830383020466406e-05, "loss": 2.7083, "step": 68500 }, { "epoch": 0.34, "learning_rate": 4.982914691121317e-05, "loss": 2.7407, "step": 69000 }, { "epoch": 0.34, "learning_rate": 4.9827908324787085e-05, "loss": 2.7374, "step": 69500 }, { "epoch": 0.35, "learning_rate": 4.9826669738361e-05, "loss": 2.7151, "step": 70000 }, { "epoch": 0.35, "learning_rate": 4.982543115193492e-05, "loss": 2.7057, "step": 70500 }, { "epoch": 0.35, "learning_rate": 4.9824192565508836e-05, "loss": 2.7337, "step": 71000 }, { "epoch": 0.35, "learning_rate": 4.982295397908275e-05, "loss": 2.7017, "step": 71500 }, { "epoch": 0.36, "learning_rate": 4.982171786982952e-05, "loss": 2.7105, "step": 72000 }, { "epoch": 0.36, "learning_rate": 4.982047928340344e-05, "loss": 2.7399, "step": 72500 }, { "epoch": 0.36, "learning_rate": 4.981924565132306e-05, "loss": 2.707, "step": 73000 }, { "epoch": 0.36, "learning_rate": 4.981800954206983e-05, "loss": 2.7199, "step": 73500 }, { "epoch": 0.37, "learning_rate": 4.9816770955643745e-05, "loss": 2.7107, "step": 74000 }, { "epoch": 0.37, "learning_rate": 4.981553236921766e-05, "loss": 2.7136, "step": 74500 }, { "epoch": 0.37, "learning_rate": 4.981429378279158e-05, "loss": 2.711, "step": 75000 }, { "epoch": 0.37, "learning_rate": 4.9813055196365496e-05, "loss": 2.705, "step": 75500 }, { "epoch": 0.38, "learning_rate": 4.981181908711226e-05, "loss": 2.672, "step": 76000 }, { "epoch": 0.38, "learning_rate": 4.9810580500686175e-05, "loss": 2.7155, "step": 76500 }, { "epoch": 0.38, "learning_rate": 4.980934191426009e-05, "loss": 2.7097, "step": 77000 }, { "epoch": 0.38, "learning_rate": 4.980810332783401e-05, "loss": 2.7019, "step": 77500 }, { "epoch": 0.39, "learning_rate": 4.9806864741407926e-05, "loss": 2.7046, "step": 78000 }, { "epoch": 0.39, "learning_rate": 4.980562615498184e-05, "loss": 2.7231, "step": 78500 }, { "epoch": 0.39, "learning_rate": 4.980438756855576e-05, "loss": 2.7074, "step": 79000 }, { "epoch": 0.39, "learning_rate": 4.980314898212968e-05, "loss": 2.7098, "step": 79500 }, { "epoch": 0.4, "learning_rate": 4.9801910395703594e-05, "loss": 2.7095, "step": 80000 }, { "epoch": 0.4, "learning_rate": 4.980067180927751e-05, "loss": 2.6981, "step": 80500 }, { "epoch": 0.4, "learning_rate": 4.979943322285143e-05, "loss": 2.7144, "step": 81000 }, { "epoch": 0.4, "learning_rate": 4.9798194636425344e-05, "loss": 2.7018, "step": 81500 }, { "epoch": 0.41, "learning_rate": 4.979695604999926e-05, "loss": 2.7057, "step": 82000 }, { "epoch": 0.41, "learning_rate": 4.979571746357318e-05, "loss": 2.6865, "step": 82500 }, { "epoch": 0.41, "learning_rate": 4.9794478877147095e-05, "loss": 2.7394, "step": 83000 }, { "epoch": 0.41, "learning_rate": 4.979324029072101e-05, "loss": 2.7101, "step": 83500 }, { "epoch": 0.42, "learning_rate": 4.979200170429493e-05, "loss": 2.7016, "step": 84000 }, { "epoch": 0.42, "learning_rate": 4.979076311786884e-05, "loss": 2.6752, "step": 84500 }, { "epoch": 0.42, "learning_rate": 4.9789524531442756e-05, "loss": 2.7344, "step": 85000 }, { "epoch": 0.42, "learning_rate": 4.978828594501667e-05, "loss": 2.7173, "step": 85500 }, { "epoch": 0.43, "learning_rate": 4.978704735859059e-05, "loss": 2.7205, "step": 86000 }, { "epoch": 0.43, "learning_rate": 4.978581124933736e-05, "loss": 2.6739, "step": 86500 }, { "epoch": 0.43, "learning_rate": 4.9784572662911276e-05, "loss": 2.7142, "step": 87000 }, { "epoch": 0.43, "learning_rate": 4.978333407648519e-05, "loss": 2.6928, "step": 87500 }, { "epoch": 0.44, "learning_rate": 4.97820954900591e-05, "loss": 2.722, "step": 88000 }, { "epoch": 0.44, "learning_rate": 4.978085690363302e-05, "loss": 2.7314, "step": 88500 }, { "epoch": 0.44, "learning_rate": 4.9779620794379795e-05, "loss": 2.7161, "step": 89000 }, { "epoch": 0.44, "learning_rate": 4.977838220795371e-05, "loss": 2.7042, "step": 89500 }, { "epoch": 0.45, "learning_rate": 4.977714362152763e-05, "loss": 2.7171, "step": 90000 }, { "epoch": 0.45, "learning_rate": 4.9775905035101546e-05, "loss": 2.7061, "step": 90500 }, { "epoch": 0.45, "learning_rate": 4.977466892584831e-05, "loss": 2.6724, "step": 91000 }, { "epoch": 0.45, "learning_rate": 4.9773430339422225e-05, "loss": 2.6936, "step": 91500 }, { "epoch": 0.46, "learning_rate": 4.977219175299614e-05, "loss": 2.709, "step": 92000 }, { "epoch": 0.46, "learning_rate": 4.977095316657006e-05, "loss": 2.7078, "step": 92500 }, { "epoch": 0.46, "learning_rate": 4.976971705731683e-05, "loss": 2.6722, "step": 93000 }, { "epoch": 0.46, "learning_rate": 4.9768478470890745e-05, "loss": 2.6995, "step": 93500 }, { "epoch": 0.47, "learning_rate": 4.976723988446466e-05, "loss": 2.6995, "step": 94000 }, { "epoch": 0.47, "learning_rate": 4.976600129803858e-05, "loss": 2.6942, "step": 94500 }, { "epoch": 0.47, "learning_rate": 4.9764762711612495e-05, "loss": 2.69, "step": 95000 }, { "epoch": 0.47, "learning_rate": 4.9763526602359264e-05, "loss": 2.7196, "step": 95500 }, { "epoch": 0.48, "learning_rate": 4.976228801593318e-05, "loss": 2.6655, "step": 96000 }, { "epoch": 0.48, "learning_rate": 4.97610494295071e-05, "loss": 2.7035, "step": 96500 }, { "epoch": 0.48, "learning_rate": 4.9759810843081015e-05, "loss": 2.7225, "step": 97000 }, { "epoch": 0.48, "learning_rate": 4.9758572256654925e-05, "loss": 2.6971, "step": 97500 }, { "epoch": 0.49, "learning_rate": 4.975733367022884e-05, "loss": 2.7039, "step": 98000 }, { "epoch": 0.49, "learning_rate": 4.975609508380276e-05, "loss": 2.6814, "step": 98500 }, { "epoch": 0.49, "learning_rate": 4.9754856497376676e-05, "loss": 2.7068, "step": 99000 }, { "epoch": 0.49, "learning_rate": 4.975361791095059e-05, "loss": 2.6989, "step": 99500 }, { "epoch": 0.5, "learning_rate": 4.975237932452451e-05, "loss": 2.6904, "step": 100000 }, { "epoch": 0.5, "learning_rate": 4.975114073809842e-05, "loss": 2.6863, "step": 100500 }, { "epoch": 0.5, "learning_rate": 4.974990215167234e-05, "loss": 2.6785, "step": 101000 }, { "epoch": 0.5, "learning_rate": 4.974866604241911e-05, "loss": 2.6996, "step": 101500 }, { "epoch": 0.51, "learning_rate": 4.974742993316588e-05, "loss": 2.7096, "step": 102000 }, { "epoch": 0.51, "learning_rate": 4.97461913467398e-05, "loss": 2.6979, "step": 102500 }, { "epoch": 0.51, "learning_rate": 4.9744952760313715e-05, "loss": 2.6932, "step": 103000 }, { "epoch": 0.51, "learning_rate": 4.974371417388763e-05, "loss": 2.6986, "step": 103500 }, { "epoch": 0.52, "learning_rate": 4.974247558746155e-05, "loss": 2.6888, "step": 104000 }, { "epoch": 0.52, "learning_rate": 4.974123700103546e-05, "loss": 2.6854, "step": 104500 }, { "epoch": 0.52, "learning_rate": 4.974000089178223e-05, "loss": 2.697, "step": 105000 }, { "epoch": 0.52, "learning_rate": 4.9738762305356145e-05, "loss": 2.6931, "step": 105500 }, { "epoch": 0.53, "learning_rate": 4.973752371893006e-05, "loss": 2.7098, "step": 106000 }, { "epoch": 0.53, "learning_rate": 4.973628513250398e-05, "loss": 2.6989, "step": 106500 }, { "epoch": 0.53, "learning_rate": 4.9735046546077896e-05, "loss": 2.7059, "step": 107000 }, { "epoch": 0.53, "learning_rate": 4.9733810436824664e-05, "loss": 2.6612, "step": 107500 }, { "epoch": 0.54, "learning_rate": 4.973257185039858e-05, "loss": 2.6945, "step": 108000 }, { "epoch": 0.54, "learning_rate": 4.97313332639725e-05, "loss": 2.6951, "step": 108500 }, { "epoch": 0.54, "learning_rate": 4.9730094677546415e-05, "loss": 2.6871, "step": 109000 }, { "epoch": 0.54, "learning_rate": 4.972885609112033e-05, "loss": 2.6909, "step": 109500 }, { "epoch": 0.54, "learning_rate": 4.972761750469425e-05, "loss": 2.6948, "step": 110000 }, { "epoch": 0.55, "learning_rate": 4.972638139544101e-05, "loss": 2.7037, "step": 110500 }, { "epoch": 0.55, "learning_rate": 4.972514280901493e-05, "loss": 2.6804, "step": 111000 }, { "epoch": 0.55, "learning_rate": 4.9723904222588845e-05, "loss": 2.6895, "step": 111500 }, { "epoch": 0.55, "learning_rate": 4.972266563616276e-05, "loss": 2.6878, "step": 112000 }, { "epoch": 0.56, "learning_rate": 4.972142704973668e-05, "loss": 2.6916, "step": 112500 }, { "epoch": 0.56, "learning_rate": 4.9720188463310596e-05, "loss": 2.6843, "step": 113000 }, { "epoch": 0.56, "learning_rate": 4.9718952354057365e-05, "loss": 2.6903, "step": 113500 }, { "epoch": 0.56, "learning_rate": 4.971771376763128e-05, "loss": 2.6777, "step": 114000 }, { "epoch": 0.57, "learning_rate": 4.97164751812052e-05, "loss": 2.7029, "step": 114500 }, { "epoch": 0.57, "learning_rate": 4.9715236594779115e-05, "loss": 2.7026, "step": 115000 }, { "epoch": 0.57, "learning_rate": 4.971399800835303e-05, "loss": 2.6807, "step": 115500 }, { "epoch": 0.57, "learning_rate": 4.971275942192695e-05, "loss": 2.6868, "step": 116000 }, { "epoch": 0.58, "learning_rate": 4.9711520835500866e-05, "loss": 2.6976, "step": 116500 }, { "epoch": 0.58, "learning_rate": 4.971028472624763e-05, "loss": 2.6817, "step": 117000 }, { "epoch": 0.58, "learning_rate": 4.9709046139821545e-05, "loss": 2.6925, "step": 117500 }, { "epoch": 0.58, "learning_rate": 4.970780755339546e-05, "loss": 2.6972, "step": 118000 }, { "epoch": 0.59, "learning_rate": 4.970656896696938e-05, "loss": 2.6853, "step": 118500 }, { "epoch": 0.59, "learning_rate": 4.9705330380543296e-05, "loss": 2.6735, "step": 119000 }, { "epoch": 0.59, "learning_rate": 4.970409179411721e-05, "loss": 2.702, "step": 119500 }, { "epoch": 0.59, "learning_rate": 4.970285320769113e-05, "loss": 2.6876, "step": 120000 }, { "epoch": 0.6, "learning_rate": 4.970161462126505e-05, "loss": 2.6829, "step": 120500 }, { "epoch": 0.6, "learning_rate": 4.9700378512011815e-05, "loss": 2.6596, "step": 121000 }, { "epoch": 0.6, "learning_rate": 4.969913992558573e-05, "loss": 2.6907, "step": 121500 }, { "epoch": 0.6, "learning_rate": 4.969790133915965e-05, "loss": 2.6968, "step": 122000 }, { "epoch": 0.61, "learning_rate": 4.9696662752733566e-05, "loss": 2.6804, "step": 122500 }, { "epoch": 0.61, "learning_rate": 4.969542664348033e-05, "loss": 2.6914, "step": 123000 }, { "epoch": 0.61, "learning_rate": 4.96941905342271e-05, "loss": 2.6812, "step": 123500 }, { "epoch": 0.61, "learning_rate": 4.9692951947801014e-05, "loss": 2.6607, "step": 124000 }, { "epoch": 0.62, "learning_rate": 4.969171336137493e-05, "loss": 2.686, "step": 124500 }, { "epoch": 0.62, "learning_rate": 4.969047477494885e-05, "loss": 2.6591, "step": 125000 }, { "epoch": 0.62, "learning_rate": 4.9689236188522765e-05, "loss": 2.6824, "step": 125500 }, { "epoch": 0.62, "learning_rate": 4.968799760209668e-05, "loss": 2.694, "step": 126000 }, { "epoch": 0.63, "learning_rate": 4.968676149284345e-05, "loss": 2.6881, "step": 126500 }, { "epoch": 0.63, "learning_rate": 4.968552290641737e-05, "loss": 2.7259, "step": 127000 }, { "epoch": 0.63, "learning_rate": 4.9684284319991284e-05, "loss": 2.68, "step": 127500 }, { "epoch": 0.63, "learning_rate": 4.9683045733565195e-05, "loss": 2.6683, "step": 128000 }, { "epoch": 0.64, "learning_rate": 4.968180714713911e-05, "loss": 2.6481, "step": 128500 }, { "epoch": 0.64, "learning_rate": 4.968056856071303e-05, "loss": 2.6635, "step": 129000 }, { "epoch": 0.64, "learning_rate": 4.96793324514598e-05, "loss": 2.6736, "step": 129500 }, { "epoch": 0.64, "learning_rate": 4.9678093865033714e-05, "loss": 2.7035, "step": 130000 }, { "epoch": 0.65, "learning_rate": 4.967685527860763e-05, "loss": 2.6893, "step": 130500 }, { "epoch": 0.65, "learning_rate": 4.967561669218155e-05, "loss": 2.6946, "step": 131000 }, { "epoch": 0.65, "learning_rate": 4.9674378105755465e-05, "loss": 2.6781, "step": 131500 }, { "epoch": 0.65, "learning_rate": 4.967313951932938e-05, "loss": 2.6633, "step": 132000 }, { "epoch": 0.66, "learning_rate": 4.96719009329033e-05, "loss": 2.6894, "step": 132500 }, { "epoch": 0.66, "learning_rate": 4.9670662346477216e-05, "loss": 2.6659, "step": 133000 }, { "epoch": 0.66, "learning_rate": 4.966942376005113e-05, "loss": 2.668, "step": 133500 }, { "epoch": 0.66, "learning_rate": 4.966818517362505e-05, "loss": 2.6745, "step": 134000 }, { "epoch": 0.67, "learning_rate": 4.9666946587198966e-05, "loss": 2.6407, "step": 134500 }, { "epoch": 0.67, "learning_rate": 4.966571047794573e-05, "loss": 2.6832, "step": 135000 }, { "epoch": 0.67, "learning_rate": 4.96644743686925e-05, "loss": 2.6761, "step": 135500 }, { "epoch": 0.67, "learning_rate": 4.9663235782266414e-05, "loss": 2.7023, "step": 136000 }, { "epoch": 0.68, "learning_rate": 4.966199719584033e-05, "loss": 2.6568, "step": 136500 }, { "epoch": 0.68, "learning_rate": 4.966075860941425e-05, "loss": 2.6889, "step": 137000 }, { "epoch": 0.68, "learning_rate": 4.9659520022988165e-05, "loss": 2.6811, "step": 137500 }, { "epoch": 0.68, "learning_rate": 4.965828391373494e-05, "loss": 2.6648, "step": 138000 }, { "epoch": 0.69, "learning_rate": 4.965704532730885e-05, "loss": 2.668, "step": 138500 }, { "epoch": 0.69, "learning_rate": 4.965580674088277e-05, "loss": 2.6594, "step": 139000 }, { "epoch": 0.69, "learning_rate": 4.9654568154456685e-05, "loss": 2.6537, "step": 139500 }, { "epoch": 0.69, "learning_rate": 4.96533295680306e-05, "loss": 2.7002, "step": 140000 }, { "epoch": 0.7, "learning_rate": 4.965209345877737e-05, "loss": 2.679, "step": 140500 }, { "epoch": 0.7, "learning_rate": 4.965085487235129e-05, "loss": 2.6813, "step": 141000 }, { "epoch": 0.7, "learning_rate": 4.9649618763098056e-05, "loss": 2.6977, "step": 141500 }, { "epoch": 0.7, "learning_rate": 4.964838017667197e-05, "loss": 2.6734, "step": 142000 }, { "epoch": 0.71, "learning_rate": 4.964714159024589e-05, "loss": 2.6827, "step": 142500 }, { "epoch": 0.71, "learning_rate": 4.964590300381981e-05, "loss": 2.6551, "step": 143000 }, { "epoch": 0.71, "learning_rate": 4.9644664417393724e-05, "loss": 2.6671, "step": 143500 }, { "epoch": 0.71, "learning_rate": 4.964342583096764e-05, "loss": 2.6576, "step": 144000 }, { "epoch": 0.72, "learning_rate": 4.96421897217144e-05, "loss": 2.6725, "step": 144500 }, { "epoch": 0.72, "learning_rate": 4.964095113528832e-05, "loss": 2.6767, "step": 145000 }, { "epoch": 0.72, "learning_rate": 4.963971254886224e-05, "loss": 2.6836, "step": 145500 }, { "epoch": 0.72, "learning_rate": 4.9638473962436154e-05, "loss": 2.6378, "step": 146000 }, { "epoch": 0.73, "learning_rate": 4.963723537601007e-05, "loss": 2.6721, "step": 146500 }, { "epoch": 0.73, "learning_rate": 4.963599678958399e-05, "loss": 2.7062, "step": 147000 }, { "epoch": 0.73, "learning_rate": 4.9634758203157904e-05, "loss": 2.6502, "step": 147500 }, { "epoch": 0.73, "learning_rate": 4.9633519616731815e-05, "loss": 2.6751, "step": 148000 }, { "epoch": 0.74, "learning_rate": 4.963228103030573e-05, "loss": 2.6675, "step": 148500 }, { "epoch": 0.74, "learning_rate": 4.963104244387965e-05, "loss": 2.6572, "step": 149000 }, { "epoch": 0.74, "learning_rate": 4.9629803857453565e-05, "loss": 2.6735, "step": 149500 }, { "epoch": 0.74, "learning_rate": 4.962856774820034e-05, "loss": 2.6621, "step": 150000 }, { "epoch": 0.75, "learning_rate": 4.962732916177426e-05, "loss": 2.6808, "step": 150500 }, { "epoch": 0.75, "learning_rate": 4.962609057534817e-05, "loss": 2.6627, "step": 151000 }, { "epoch": 0.75, "learning_rate": 4.9624851988922085e-05, "loss": 2.6605, "step": 151500 }, { "epoch": 0.75, "learning_rate": 4.9623613402496e-05, "loss": 2.6853, "step": 152000 }, { "epoch": 0.76, "learning_rate": 4.962237481606992e-05, "loss": 2.6536, "step": 152500 }, { "epoch": 0.76, "learning_rate": 4.9621136229643836e-05, "loss": 2.6741, "step": 153000 }, { "epoch": 0.76, "learning_rate": 4.961989764321775e-05, "loss": 2.6637, "step": 153500 }, { "epoch": 0.76, "learning_rate": 4.9618666488310225e-05, "loss": 2.65, "step": 154000 }, { "epoch": 0.77, "learning_rate": 4.961742790188414e-05, "loss": 2.6598, "step": 154500 }, { "epoch": 0.77, "learning_rate": 4.961618931545806e-05, "loss": 2.6615, "step": 155000 }, { "epoch": 0.77, "learning_rate": 4.9614950729031976e-05, "loss": 2.6592, "step": 155500 }, { "epoch": 0.77, "learning_rate": 4.9613712142605886e-05, "loss": 2.6707, "step": 156000 }, { "epoch": 0.78, "learning_rate": 4.96124735561798e-05, "loss": 2.6291, "step": 156500 }, { "epoch": 0.78, "learning_rate": 4.961123496975372e-05, "loss": 2.6811, "step": 157000 }, { "epoch": 0.78, "learning_rate": 4.960999638332764e-05, "loss": 2.6534, "step": 157500 }, { "epoch": 0.78, "learning_rate": 4.9608757796901554e-05, "loss": 2.6623, "step": 158000 }, { "epoch": 0.79, "learning_rate": 4.960751921047547e-05, "loss": 2.6498, "step": 158500 }, { "epoch": 0.79, "learning_rate": 4.960628062404939e-05, "loss": 2.6617, "step": 159000 }, { "epoch": 0.79, "learning_rate": 4.9605042037623305e-05, "loss": 2.6545, "step": 159500 }, { "epoch": 0.79, "learning_rate": 4.960380345119722e-05, "loss": 2.6516, "step": 160000 }, { "epoch": 0.8, "learning_rate": 4.960256734194399e-05, "loss": 2.6722, "step": 160500 }, { "epoch": 0.8, "learning_rate": 4.960132875551791e-05, "loss": 2.6724, "step": 161000 }, { "epoch": 0.8, "learning_rate": 4.9600090169091824e-05, "loss": 2.6304, "step": 161500 }, { "epoch": 0.8, "learning_rate": 4.959885158266574e-05, "loss": 2.6624, "step": 162000 }, { "epoch": 0.81, "learning_rate": 4.959761299623966e-05, "loss": 2.6667, "step": 162500 }, { "epoch": 0.81, "learning_rate": 4.9596374409813575e-05, "loss": 2.6568, "step": 163000 }, { "epoch": 0.81, "learning_rate": 4.959513830056034e-05, "loss": 2.6541, "step": 163500 }, { "epoch": 0.81, "learning_rate": 4.9593899714134254e-05, "loss": 2.6696, "step": 164000 }, { "epoch": 0.81, "learning_rate": 4.959266112770817e-05, "loss": 2.6907, "step": 164500 }, { "epoch": 0.82, "learning_rate": 4.959142254128209e-05, "loss": 2.656, "step": 165000 }, { "epoch": 0.82, "learning_rate": 4.9590183954856005e-05, "loss": 2.6893, "step": 165500 }, { "epoch": 0.82, "learning_rate": 4.9588950322775625e-05, "loss": 2.6747, "step": 166000 }, { "epoch": 0.82, "learning_rate": 4.958771173634954e-05, "loss": 2.6528, "step": 166500 }, { "epoch": 0.83, "learning_rate": 4.958647314992346e-05, "loss": 2.6685, "step": 167000 }, { "epoch": 0.83, "learning_rate": 4.9585234563497376e-05, "loss": 2.6476, "step": 167500 }, { "epoch": 0.83, "learning_rate": 4.958399597707129e-05, "loss": 2.66, "step": 168000 }, { "epoch": 0.83, "learning_rate": 4.958275739064521e-05, "loss": 2.6539, "step": 168500 }, { "epoch": 0.84, "learning_rate": 4.958151880421913e-05, "loss": 2.6832, "step": 169000 }, { "epoch": 0.84, "learning_rate": 4.958028021779304e-05, "loss": 2.6409, "step": 169500 }, { "epoch": 0.84, "learning_rate": 4.9579044108539806e-05, "loss": 2.6694, "step": 170000 }, { "epoch": 0.84, "learning_rate": 4.957780552211372e-05, "loss": 2.6599, "step": 170500 }, { "epoch": 0.85, "learning_rate": 4.957656693568764e-05, "loss": 2.6693, "step": 171000 }, { "epoch": 0.85, "learning_rate": 4.957532834926156e-05, "loss": 2.6476, "step": 171500 }, { "epoch": 0.85, "learning_rate": 4.9574089762835474e-05, "loss": 2.6592, "step": 172000 }, { "epoch": 0.85, "learning_rate": 4.957285117640939e-05, "loss": 2.6631, "step": 172500 }, { "epoch": 0.86, "learning_rate": 4.957161258998331e-05, "loss": 2.6674, "step": 173000 }, { "epoch": 0.86, "learning_rate": 4.9570374003557224e-05, "loss": 2.6603, "step": 173500 }, { "epoch": 0.86, "learning_rate": 4.956913541713114e-05, "loss": 2.6461, "step": 174000 }, { "epoch": 0.86, "learning_rate": 4.956789930787791e-05, "loss": 2.6766, "step": 174500 }, { "epoch": 0.87, "learning_rate": 4.956666072145183e-05, "loss": 2.6398, "step": 175000 }, { "epoch": 0.87, "learning_rate": 4.956542461219859e-05, "loss": 2.6699, "step": 175500 }, { "epoch": 0.87, "learning_rate": 4.9564186025772506e-05, "loss": 2.6702, "step": 176000 }, { "epoch": 0.87, "learning_rate": 4.956294743934642e-05, "loss": 2.6357, "step": 176500 }, { "epoch": 0.88, "learning_rate": 4.956170885292034e-05, "loss": 2.6474, "step": 177000 }, { "epoch": 0.88, "learning_rate": 4.956047026649426e-05, "loss": 2.6496, "step": 177500 }, { "epoch": 0.88, "learning_rate": 4.9559231680068174e-05, "loss": 2.664, "step": 178000 }, { "epoch": 0.88, "learning_rate": 4.955799557081494e-05, "loss": 2.6964, "step": 178500 }, { "epoch": 0.89, "learning_rate": 4.955675698438886e-05, "loss": 2.6707, "step": 179000 }, { "epoch": 0.89, "learning_rate": 4.9555518397962776e-05, "loss": 2.654, "step": 179500 }, { "epoch": 0.89, "learning_rate": 4.955427981153669e-05, "loss": 2.6789, "step": 180000 }, { "epoch": 0.89, "learning_rate": 4.9553043702283455e-05, "loss": 2.6482, "step": 180500 }, { "epoch": 0.9, "learning_rate": 4.955180511585737e-05, "loss": 2.6471, "step": 181000 }, { "epoch": 0.9, "learning_rate": 4.955056652943129e-05, "loss": 2.6439, "step": 181500 }, { "epoch": 0.9, "learning_rate": 4.9549327943005206e-05, "loss": 2.6462, "step": 182000 }, { "epoch": 0.9, "learning_rate": 4.954808935657912e-05, "loss": 2.6688, "step": 182500 }, { "epoch": 0.91, "learning_rate": 4.954685077015304e-05, "loss": 2.6614, "step": 183000 }, { "epoch": 0.91, "learning_rate": 4.954561218372696e-05, "loss": 2.673, "step": 183500 }, { "epoch": 0.91, "learning_rate": 4.9544373597300874e-05, "loss": 2.6636, "step": 184000 }, { "epoch": 0.91, "learning_rate": 4.954313748804764e-05, "loss": 2.6533, "step": 184500 }, { "epoch": 0.92, "learning_rate": 4.954189890162156e-05, "loss": 2.6585, "step": 185000 }, { "epoch": 0.92, "learning_rate": 4.9540660315195477e-05, "loss": 2.6397, "step": 185500 }, { "epoch": 0.92, "learning_rate": 4.9539421728769394e-05, "loss": 2.6475, "step": 186000 }, { "epoch": 0.92, "learning_rate": 4.953818314234331e-05, "loss": 2.658, "step": 186500 }, { "epoch": 0.93, "learning_rate": 4.953694455591723e-05, "loss": 2.6754, "step": 187000 }, { "epoch": 0.93, "learning_rate": 4.9535705969491144e-05, "loss": 2.6488, "step": 187500 }, { "epoch": 0.93, "learning_rate": 4.953446738306506e-05, "loss": 2.6816, "step": 188000 }, { "epoch": 0.93, "learning_rate": 4.953322879663898e-05, "loss": 2.6558, "step": 188500 }, { "epoch": 0.94, "learning_rate": 4.953199268738574e-05, "loss": 2.6288, "step": 189000 }, { "epoch": 0.94, "learning_rate": 4.953075410095966e-05, "loss": 2.6406, "step": 189500 }, { "epoch": 0.94, "learning_rate": 4.9529517991706426e-05, "loss": 2.6695, "step": 190000 }, { "epoch": 0.94, "learning_rate": 4.952827940528034e-05, "loss": 2.6565, "step": 190500 }, { "epoch": 0.95, "learning_rate": 4.952704081885426e-05, "loss": 2.6577, "step": 191000 }, { "epoch": 0.95, "learning_rate": 4.952580470960103e-05, "loss": 2.6499, "step": 191500 }, { "epoch": 0.95, "learning_rate": 4.95245686003478e-05, "loss": 2.6423, "step": 192000 }, { "epoch": 0.95, "learning_rate": 4.9523330013921714e-05, "loss": 2.6735, "step": 192500 }, { "epoch": 0.96, "learning_rate": 4.952209142749563e-05, "loss": 2.6441, "step": 193000 }, { "epoch": 0.96, "learning_rate": 4.952085284106955e-05, "loss": 2.6782, "step": 193500 }, { "epoch": 0.96, "learning_rate": 4.9519614254643465e-05, "loss": 2.6542, "step": 194000 }, { "epoch": 0.96, "learning_rate": 4.951837566821738e-05, "loss": 2.6564, "step": 194500 }, { "epoch": 0.97, "learning_rate": 4.95171370817913e-05, "loss": 2.6682, "step": 195000 }, { "epoch": 0.97, "learning_rate": 4.951589849536521e-05, "loss": 2.6803, "step": 195500 }, { "epoch": 0.97, "learning_rate": 4.9514659908939126e-05, "loss": 2.6768, "step": 196000 }, { "epoch": 0.97, "learning_rate": 4.951342132251304e-05, "loss": 2.652, "step": 196500 }, { "epoch": 0.98, "learning_rate": 4.951218273608696e-05, "loss": 2.6467, "step": 197000 }, { "epoch": 0.98, "learning_rate": 4.951094414966088e-05, "loss": 2.631, "step": 197500 }, { "epoch": 0.98, "learning_rate": 4.9509705563234794e-05, "loss": 2.6418, "step": 198000 }, { "epoch": 0.98, "learning_rate": 4.950846697680871e-05, "loss": 2.6617, "step": 198500 }, { "epoch": 0.99, "learning_rate": 4.950722839038263e-05, "loss": 2.6553, "step": 199000 }, { "epoch": 0.99, "learning_rate": 4.9505989803956545e-05, "loss": 2.6369, "step": 199500 }, { "epoch": 0.99, "learning_rate": 4.9504753694703307e-05, "loss": 2.6282, "step": 200000 }, { "epoch": 0.99, "learning_rate": 4.9503515108277224e-05, "loss": 2.65, "step": 200500 }, { "epoch": 1.0, "learning_rate": 4.950227652185114e-05, "loss": 2.6651, "step": 201000 }, { "epoch": 1.0, "learning_rate": 4.950103793542506e-05, "loss": 2.6763, "step": 201500 }, { "epoch": 1.0, "eval_accuracy": 0.6236883143982033, "eval_accuracy_mlm": 0.5761070598072291, "eval_accuracy_nsp": 0.8479990900497727, "eval_loss": 2.567748546600342, "eval_runtime": 145.8951, "eval_samples_per_second": 1747.55, "eval_steps_per_second": 72.819, "step": 201843 }, { "epoch": 1.0, "learning_rate": 4.9499801826171826e-05, "loss": 2.6422, "step": 202000 }, { "epoch": 1.0, "learning_rate": 4.949856323974574e-05, "loss": 2.6314, "step": 202500 }, { "epoch": 1.01, "learning_rate": 4.949732465331966e-05, "loss": 2.6069, "step": 203000 }, { "epoch": 1.01, "learning_rate": 4.949608606689358e-05, "loss": 2.621, "step": 203500 }, { "epoch": 1.01, "learning_rate": 4.9494847480467494e-05, "loss": 2.6315, "step": 204000 }, { "epoch": 1.01, "learning_rate": 4.949361137121426e-05, "loss": 2.6192, "step": 204500 }, { "epoch": 1.02, "learning_rate": 4.949237278478818e-05, "loss": 2.6241, "step": 205000 }, { "epoch": 1.02, "learning_rate": 4.949113667553495e-05, "loss": 2.6318, "step": 205500 }, { "epoch": 1.02, "learning_rate": 4.9489898089108865e-05, "loss": 2.6222, "step": 206000 }, { "epoch": 1.02, "learning_rate": 4.948865950268278e-05, "loss": 2.6478, "step": 206500 }, { "epoch": 1.03, "learning_rate": 4.94874209162567e-05, "loss": 2.6204, "step": 207000 }, { "epoch": 1.03, "learning_rate": 4.9486182329830616e-05, "loss": 2.6316, "step": 207500 }, { "epoch": 1.03, "learning_rate": 4.9484946220577385e-05, "loss": 2.6242, "step": 208000 }, { "epoch": 1.03, "learning_rate": 4.94837076341513e-05, "loss": 2.5915, "step": 208500 }, { "epoch": 1.04, "learning_rate": 4.948246904772522e-05, "loss": 2.6172, "step": 209000 }, { "epoch": 1.04, "learning_rate": 4.9481230461299136e-05, "loss": 2.6155, "step": 209500 }, { "epoch": 1.04, "learning_rate": 4.947999187487305e-05, "loss": 2.6064, "step": 210000 }, { "epoch": 1.04, "learning_rate": 4.947875328844697e-05, "loss": 2.6333, "step": 210500 }, { "epoch": 1.05, "learning_rate": 4.947751470202088e-05, "loss": 2.6295, "step": 211000 }, { "epoch": 1.05, "learning_rate": 4.94762761155948e-05, "loss": 2.6262, "step": 211500 }, { "epoch": 1.05, "learning_rate": 4.9475037529168714e-05, "loss": 2.598, "step": 212000 }, { "epoch": 1.05, "learning_rate": 4.947379894274263e-05, "loss": 2.6367, "step": 212500 }, { "epoch": 1.06, "learning_rate": 4.947256035631655e-05, "loss": 2.6183, "step": 213000 }, { "epoch": 1.06, "learning_rate": 4.947132176989046e-05, "loss": 2.6535, "step": 213500 }, { "epoch": 1.06, "learning_rate": 4.9470083183464375e-05, "loss": 2.6075, "step": 214000 }, { "epoch": 1.06, "learning_rate": 4.946884707421114e-05, "loss": 2.5924, "step": 214500 }, { "epoch": 1.07, "learning_rate": 4.946760848778506e-05, "loss": 2.6268, "step": 215000 }, { "epoch": 1.07, "learning_rate": 4.946636990135898e-05, "loss": 2.6207, "step": 215500 }, { "epoch": 1.07, "learning_rate": 4.9465131314932894e-05, "loss": 2.5849, "step": 216000 }, { "epoch": 1.07, "learning_rate": 4.946389520567967e-05, "loss": 2.6141, "step": 216500 }, { "epoch": 1.08, "learning_rate": 4.946265661925359e-05, "loss": 2.6114, "step": 217000 }, { "epoch": 1.08, "learning_rate": 4.94614180328275e-05, "loss": 2.6018, "step": 217500 }, { "epoch": 1.08, "learning_rate": 4.9460179446401414e-05, "loss": 2.6197, "step": 218000 }, { "epoch": 1.08, "learning_rate": 4.945894085997533e-05, "loss": 2.6252, "step": 218500 }, { "epoch": 1.09, "learning_rate": 4.945770227354925e-05, "loss": 2.6027, "step": 219000 }, { "epoch": 1.09, "learning_rate": 4.9456463687123165e-05, "loss": 2.6315, "step": 219500 }, { "epoch": 1.09, "learning_rate": 4.9455225100697075e-05, "loss": 2.6094, "step": 220000 }, { "epoch": 1.09, "learning_rate": 4.9453988991443843e-05, "loss": 2.6246, "step": 220500 }, { "epoch": 1.09, "learning_rate": 4.945275040501776e-05, "loss": 2.6303, "step": 221000 }, { "epoch": 1.1, "learning_rate": 4.945151181859168e-05, "loss": 2.6081, "step": 221500 }, { "epoch": 1.1, "learning_rate": 4.9450273232165594e-05, "loss": 2.6129, "step": 222000 }, { "epoch": 1.1, "learning_rate": 4.944903464573951e-05, "loss": 2.6229, "step": 222500 }, { "epoch": 1.1, "learning_rate": 4.944780101365913e-05, "loss": 2.594, "step": 223000 }, { "epoch": 1.11, "learning_rate": 4.944656242723305e-05, "loss": 2.621, "step": 223500 }, { "epoch": 1.11, "learning_rate": 4.9445323840806966e-05, "loss": 2.6097, "step": 224000 }, { "epoch": 1.11, "learning_rate": 4.944408525438088e-05, "loss": 2.6152, "step": 224500 }, { "epoch": 1.11, "learning_rate": 4.94428466679548e-05, "loss": 2.6206, "step": 225000 }, { "epoch": 1.12, "learning_rate": 4.944161055870157e-05, "loss": 2.6391, "step": 225500 }, { "epoch": 1.12, "learning_rate": 4.9440371972275485e-05, "loss": 2.6487, "step": 226000 }, { "epoch": 1.12, "learning_rate": 4.94391333858494e-05, "loss": 2.5981, "step": 226500 }, { "epoch": 1.12, "learning_rate": 4.943789479942332e-05, "loss": 2.6175, "step": 227000 }, { "epoch": 1.13, "learning_rate": 4.9436656212997236e-05, "loss": 2.6332, "step": 227500 }, { "epoch": 1.13, "learning_rate": 4.9435420103744e-05, "loss": 2.6234, "step": 228000 }, { "epoch": 1.13, "learning_rate": 4.9434181517317915e-05, "loss": 2.6201, "step": 228500 }, { "epoch": 1.13, "learning_rate": 4.943294293089183e-05, "loss": 2.6235, "step": 229000 }, { "epoch": 1.14, "learning_rate": 4.943170434446575e-05, "loss": 2.6339, "step": 229500 }, { "epoch": 1.14, "learning_rate": 4.9430465758039666e-05, "loss": 2.5977, "step": 230000 }, { "epoch": 1.14, "learning_rate": 4.942922717161358e-05, "loss": 2.612, "step": 230500 }, { "epoch": 1.14, "learning_rate": 4.942799106236035e-05, "loss": 2.6263, "step": 231000 }, { "epoch": 1.15, "learning_rate": 4.942675247593427e-05, "loss": 2.6239, "step": 231500 }, { "epoch": 1.15, "learning_rate": 4.9425513889508185e-05, "loss": 2.6085, "step": 232000 }, { "epoch": 1.15, "learning_rate": 4.94242753030821e-05, "loss": 2.6082, "step": 232500 }, { "epoch": 1.15, "learning_rate": 4.942303671665602e-05, "loss": 2.6257, "step": 233000 }, { "epoch": 1.16, "learning_rate": 4.9421798130229936e-05, "loss": 2.6365, "step": 233500 }, { "epoch": 1.16, "learning_rate": 4.942055954380385e-05, "loss": 2.6226, "step": 234000 }, { "epoch": 1.16, "learning_rate": 4.9419323434550615e-05, "loss": 2.6333, "step": 234500 }, { "epoch": 1.16, "learning_rate": 4.941808484812453e-05, "loss": 2.6214, "step": 235000 }, { "epoch": 1.17, "learning_rate": 4.941684626169845e-05, "loss": 2.614, "step": 235500 }, { "epoch": 1.17, "learning_rate": 4.941561015244522e-05, "loss": 2.5987, "step": 236000 }, { "epoch": 1.17, "learning_rate": 4.9414371566019135e-05, "loss": 2.6155, "step": 236500 }, { "epoch": 1.17, "learning_rate": 4.941313297959305e-05, "loss": 2.6424, "step": 237000 }, { "epoch": 1.18, "learning_rate": 4.941189439316697e-05, "loss": 2.6057, "step": 237500 }, { "epoch": 1.18, "learning_rate": 4.9410655806740886e-05, "loss": 2.6237, "step": 238000 }, { "epoch": 1.18, "learning_rate": 4.94094172203148e-05, "loss": 2.6184, "step": 238500 }, { "epoch": 1.18, "learning_rate": 4.940818111106157e-05, "loss": 2.6616, "step": 239000 }, { "epoch": 1.19, "learning_rate": 4.940694252463549e-05, "loss": 2.6191, "step": 239500 }, { "epoch": 1.19, "learning_rate": 4.9405703938209405e-05, "loss": 2.5849, "step": 240000 }, { "epoch": 1.19, "learning_rate": 4.940446535178332e-05, "loss": 2.6178, "step": 240500 }, { "epoch": 1.19, "learning_rate": 4.940322676535723e-05, "loss": 2.6466, "step": 241000 }, { "epoch": 1.2, "learning_rate": 4.940198817893115e-05, "loss": 2.6025, "step": 241500 }, { "epoch": 1.2, "learning_rate": 4.940075206967792e-05, "loss": 2.5923, "step": 242000 }, { "epoch": 1.2, "learning_rate": 4.9399513483251835e-05, "loss": 2.6381, "step": 242500 }, { "epoch": 1.2, "learning_rate": 4.939827489682575e-05, "loss": 2.5819, "step": 243000 }, { "epoch": 1.21, "learning_rate": 4.939703631039967e-05, "loss": 2.6115, "step": 243500 }, { "epoch": 1.21, "learning_rate": 4.9395797723973586e-05, "loss": 2.6289, "step": 244000 }, { "epoch": 1.21, "learning_rate": 4.93945591375475e-05, "loss": 2.5913, "step": 244500 }, { "epoch": 1.21, "learning_rate": 4.939332302829427e-05, "loss": 2.6194, "step": 245000 }, { "epoch": 1.22, "learning_rate": 4.939208444186819e-05, "loss": 2.6232, "step": 245500 }, { "epoch": 1.22, "learning_rate": 4.9390845855442105e-05, "loss": 2.6176, "step": 246000 }, { "epoch": 1.22, "learning_rate": 4.938960726901602e-05, "loss": 2.6158, "step": 246500 }, { "epoch": 1.22, "learning_rate": 4.938836868258994e-05, "loss": 2.6155, "step": 247000 }, { "epoch": 1.23, "learning_rate": 4.9387130096163856e-05, "loss": 2.6407, "step": 247500 }, { "epoch": 1.23, "learning_rate": 4.9385891509737766e-05, "loss": 2.6334, "step": 248000 }, { "epoch": 1.23, "learning_rate": 4.938465292331168e-05, "loss": 2.62, "step": 248500 }, { "epoch": 1.23, "learning_rate": 4.93834143368856e-05, "loss": 2.6266, "step": 249000 }, { "epoch": 1.24, "learning_rate": 4.938217575045952e-05, "loss": 2.612, "step": 249500 }, { "epoch": 1.24, "learning_rate": 4.9380937164033434e-05, "loss": 2.6358, "step": 250000 }, { "epoch": 1.24, "learning_rate": 4.937969857760735e-05, "loss": 2.6289, "step": 250500 }, { "epoch": 1.24, "learning_rate": 4.937846246835412e-05, "loss": 2.6102, "step": 251000 }, { "epoch": 1.25, "learning_rate": 4.9377223881928037e-05, "loss": 2.6004, "step": 251500 }, { "epoch": 1.25, "learning_rate": 4.9375985295501954e-05, "loss": 2.607, "step": 252000 }, { "epoch": 1.25, "learning_rate": 4.937475166342157e-05, "loss": 2.6224, "step": 252500 }, { "epoch": 1.25, "learning_rate": 4.9373513076995484e-05, "loss": 2.6319, "step": 253000 }, { "epoch": 1.26, "learning_rate": 4.93722744905694e-05, "loss": 2.6201, "step": 253500 }, { "epoch": 1.26, "learning_rate": 4.937103590414332e-05, "loss": 2.605, "step": 254000 }, { "epoch": 1.26, "learning_rate": 4.9369797317717235e-05, "loss": 2.6106, "step": 254500 }, { "epoch": 1.26, "learning_rate": 4.936855873129115e-05, "loss": 2.5989, "step": 255000 }, { "epoch": 1.27, "learning_rate": 4.936732014486507e-05, "loss": 2.6324, "step": 255500 }, { "epoch": 1.27, "learning_rate": 4.9366081558438986e-05, "loss": 2.615, "step": 256000 }, { "epoch": 1.27, "learning_rate": 4.9364845449185755e-05, "loss": 2.6136, "step": 256500 }, { "epoch": 1.27, "learning_rate": 4.936360686275967e-05, "loss": 2.6346, "step": 257000 }, { "epoch": 1.28, "learning_rate": 4.936236827633359e-05, "loss": 2.6109, "step": 257500 }, { "epoch": 1.28, "learning_rate": 4.9361129689907506e-05, "loss": 2.629, "step": 258000 }, { "epoch": 1.28, "learning_rate": 4.935989110348142e-05, "loss": 2.6382, "step": 258500 }, { "epoch": 1.28, "learning_rate": 4.935865251705534e-05, "loss": 2.652, "step": 259000 }, { "epoch": 1.29, "learning_rate": 4.9357413930629256e-05, "loss": 2.5949, "step": 259500 }, { "epoch": 1.29, "learning_rate": 4.935617534420317e-05, "loss": 2.6228, "step": 260000 }, { "epoch": 1.29, "learning_rate": 4.935493675777709e-05, "loss": 2.5946, "step": 260500 }, { "epoch": 1.29, "learning_rate": 4.935369817135101e-05, "loss": 2.6195, "step": 261000 }, { "epoch": 1.3, "learning_rate": 4.935245958492492e-05, "loss": 2.6184, "step": 261500 }, { "epoch": 1.3, "learning_rate": 4.9351223475671686e-05, "loss": 2.6138, "step": 262000 }, { "epoch": 1.3, "learning_rate": 4.93499848892456e-05, "loss": 2.5975, "step": 262500 }, { "epoch": 1.3, "learning_rate": 4.934874630281952e-05, "loss": 2.6182, "step": 263000 }, { "epoch": 1.31, "learning_rate": 4.934750771639344e-05, "loss": 2.6013, "step": 263500 }, { "epoch": 1.31, "learning_rate": 4.9346269129967354e-05, "loss": 2.6424, "step": 264000 }, { "epoch": 1.31, "learning_rate": 4.934503302071412e-05, "loss": 2.6397, "step": 264500 }, { "epoch": 1.31, "learning_rate": 4.934379443428804e-05, "loss": 2.6183, "step": 265000 }, { "epoch": 1.32, "learning_rate": 4.9342555847861956e-05, "loss": 2.6236, "step": 265500 }, { "epoch": 1.32, "learning_rate": 4.934131726143587e-05, "loss": 2.6169, "step": 266000 }, { "epoch": 1.32, "learning_rate": 4.934007867500979e-05, "loss": 2.6104, "step": 266500 }, { "epoch": 1.32, "learning_rate": 4.933884008858371e-05, "loss": 2.6056, "step": 267000 }, { "epoch": 1.33, "learning_rate": 4.9337601502157624e-05, "loss": 2.6044, "step": 267500 }, { "epoch": 1.33, "learning_rate": 4.9336362915731534e-05, "loss": 2.5988, "step": 268000 }, { "epoch": 1.33, "learning_rate": 4.933512432930545e-05, "loss": 2.64, "step": 268500 }, { "epoch": 1.33, "learning_rate": 4.933388822005222e-05, "loss": 2.6459, "step": 269000 }, { "epoch": 1.34, "learning_rate": 4.933264963362614e-05, "loss": 2.6069, "step": 269500 }, { "epoch": 1.34, "learning_rate": 4.9331413524372906e-05, "loss": 2.6332, "step": 270000 }, { "epoch": 1.34, "learning_rate": 4.933017493794682e-05, "loss": 2.6211, "step": 270500 }, { "epoch": 1.34, "learning_rate": 4.932893635152074e-05, "loss": 2.637, "step": 271000 }, { "epoch": 1.35, "learning_rate": 4.9327697765094657e-05, "loss": 2.6399, "step": 271500 }, { "epoch": 1.35, "learning_rate": 4.9326459178668573e-05, "loss": 2.6234, "step": 272000 }, { "epoch": 1.35, "learning_rate": 4.9325223069415336e-05, "loss": 2.6221, "step": 272500 }, { "epoch": 1.35, "learning_rate": 4.932398448298925e-05, "loss": 2.6133, "step": 273000 }, { "epoch": 1.36, "learning_rate": 4.932274589656317e-05, "loss": 2.614, "step": 273500 }, { "epoch": 1.36, "learning_rate": 4.9321507310137086e-05, "loss": 2.6026, "step": 274000 }, { "epoch": 1.36, "learning_rate": 4.9320268723711e-05, "loss": 2.5999, "step": 274500 }, { "epoch": 1.36, "learning_rate": 4.931903013728492e-05, "loss": 2.6374, "step": 275000 }, { "epoch": 1.36, "learning_rate": 4.931779155085884e-05, "loss": 2.6182, "step": 275500 }, { "epoch": 1.37, "learning_rate": 4.9316552964432754e-05, "loss": 2.6048, "step": 276000 }, { "epoch": 1.37, "learning_rate": 4.931531685517952e-05, "loss": 2.6308, "step": 276500 }, { "epoch": 1.37, "learning_rate": 4.931408074592629e-05, "loss": 2.6062, "step": 277000 }, { "epoch": 1.37, "learning_rate": 4.931284215950021e-05, "loss": 2.6164, "step": 277500 }, { "epoch": 1.38, "learning_rate": 4.9311603573074126e-05, "loss": 2.6278, "step": 278000 }, { "epoch": 1.38, "learning_rate": 4.9310364986648036e-05, "loss": 2.6067, "step": 278500 }, { "epoch": 1.38, "learning_rate": 4.930912640022195e-05, "loss": 2.6246, "step": 279000 }, { "epoch": 1.38, "learning_rate": 4.930788781379587e-05, "loss": 2.6262, "step": 279500 }, { "epoch": 1.39, "learning_rate": 4.9306649227369786e-05, "loss": 2.6116, "step": 280000 }, { "epoch": 1.39, "learning_rate": 4.93054106409437e-05, "loss": 2.6325, "step": 280500 }, { "epoch": 1.39, "learning_rate": 4.930417453169047e-05, "loss": 2.5979, "step": 281000 }, { "epoch": 1.39, "learning_rate": 4.930293594526439e-05, "loss": 2.6106, "step": 281500 }, { "epoch": 1.4, "learning_rate": 4.9301699836011165e-05, "loss": 2.6094, "step": 282000 }, { "epoch": 1.4, "learning_rate": 4.9300461249585075e-05, "loss": 2.6065, "step": 282500 }, { "epoch": 1.4, "learning_rate": 4.929922266315899e-05, "loss": 2.5977, "step": 283000 }, { "epoch": 1.4, "learning_rate": 4.929798407673291e-05, "loss": 2.6309, "step": 283500 }, { "epoch": 1.41, "learning_rate": 4.9296745490306826e-05, "loss": 2.6134, "step": 284000 }, { "epoch": 1.41, "learning_rate": 4.929550690388074e-05, "loss": 2.6412, "step": 284500 }, { "epoch": 1.41, "learning_rate": 4.929426831745465e-05, "loss": 2.6118, "step": 285000 }, { "epoch": 1.41, "learning_rate": 4.929302973102857e-05, "loss": 2.5835, "step": 285500 }, { "epoch": 1.42, "learning_rate": 4.9291791144602487e-05, "loss": 2.5993, "step": 286000 }, { "epoch": 1.42, "learning_rate": 4.9290555035349255e-05, "loss": 2.614, "step": 286500 }, { "epoch": 1.42, "learning_rate": 4.928931892609603e-05, "loss": 2.5988, "step": 287000 }, { "epoch": 1.42, "learning_rate": 4.928808033966995e-05, "loss": 2.6119, "step": 287500 }, { "epoch": 1.43, "learning_rate": 4.9286841753243865e-05, "loss": 2.6121, "step": 288000 }, { "epoch": 1.43, "learning_rate": 4.928560316681778e-05, "loss": 2.6205, "step": 288500 }, { "epoch": 1.43, "learning_rate": 4.92843645803917e-05, "loss": 2.6136, "step": 289000 }, { "epoch": 1.43, "learning_rate": 4.928312847113846e-05, "loss": 2.6125, "step": 289500 }, { "epoch": 1.44, "learning_rate": 4.928188988471238e-05, "loss": 2.6199, "step": 290000 }, { "epoch": 1.44, "learning_rate": 4.9280651298286295e-05, "loss": 2.612, "step": 290500 }, { "epoch": 1.44, "learning_rate": 4.927941271186021e-05, "loss": 2.5957, "step": 291000 }, { "epoch": 1.44, "learning_rate": 4.927817412543413e-05, "loss": 2.5938, "step": 291500 }, { "epoch": 1.45, "learning_rate": 4.9276935539008045e-05, "loss": 2.6138, "step": 292000 }, { "epoch": 1.45, "learning_rate": 4.9275699429754814e-05, "loss": 2.6162, "step": 292500 }, { "epoch": 1.45, "learning_rate": 4.927446084332873e-05, "loss": 2.6214, "step": 293000 }, { "epoch": 1.45, "learning_rate": 4.927322225690265e-05, "loss": 2.6009, "step": 293500 }, { "epoch": 1.46, "learning_rate": 4.9271983670476565e-05, "loss": 2.6031, "step": 294000 }, { "epoch": 1.46, "learning_rate": 4.927074508405048e-05, "loss": 2.5915, "step": 294500 }, { "epoch": 1.46, "learning_rate": 4.92695064976244e-05, "loss": 2.6289, "step": 295000 }, { "epoch": 1.46, "learning_rate": 4.9268267911198316e-05, "loss": 2.6141, "step": 295500 }, { "epoch": 1.47, "learning_rate": 4.9267029324772226e-05, "loss": 2.6233, "step": 296000 }, { "epoch": 1.47, "learning_rate": 4.926579073834614e-05, "loss": 2.6347, "step": 296500 }, { "epoch": 1.47, "learning_rate": 4.926455215192006e-05, "loss": 2.619, "step": 297000 }, { "epoch": 1.47, "learning_rate": 4.926331604266683e-05, "loss": 2.5973, "step": 297500 }, { "epoch": 1.48, "learning_rate": 4.9262077456240745e-05, "loss": 2.5999, "step": 298000 }, { "epoch": 1.48, "learning_rate": 4.926083886981466e-05, "loss": 2.6199, "step": 298500 }, { "epoch": 1.48, "learning_rate": 4.925960028338857e-05, "loss": 2.6028, "step": 299000 }, { "epoch": 1.48, "learning_rate": 4.925836417413535e-05, "loss": 2.5981, "step": 299500 }, { "epoch": 1.49, "learning_rate": 4.925712806488211e-05, "loss": 2.6159, "step": 300000 }, { "epoch": 1.49, "learning_rate": 4.925588947845603e-05, "loss": 2.6048, "step": 300500 }, { "epoch": 1.49, "learning_rate": 4.9254650892029944e-05, "loss": 2.5982, "step": 301000 }, { "epoch": 1.49, "learning_rate": 4.925341230560386e-05, "loss": 2.6199, "step": 301500 }, { "epoch": 1.5, "learning_rate": 4.925217619635063e-05, "loss": 2.6351, "step": 302000 }, { "epoch": 1.5, "learning_rate": 4.925093760992455e-05, "loss": 2.6151, "step": 302500 }, { "epoch": 1.5, "learning_rate": 4.9249701500671315e-05, "loss": 2.5925, "step": 303000 }, { "epoch": 1.5, "learning_rate": 4.924846291424523e-05, "loss": 2.5977, "step": 303500 }, { "epoch": 1.51, "learning_rate": 4.924722432781915e-05, "loss": 2.6201, "step": 304000 }, { "epoch": 1.51, "learning_rate": 4.9245985741393066e-05, "loss": 2.6132, "step": 304500 }, { "epoch": 1.51, "learning_rate": 4.924474715496698e-05, "loss": 2.6258, "step": 305000 }, { "epoch": 1.51, "learning_rate": 4.92435085685409e-05, "loss": 2.6223, "step": 305500 }, { "epoch": 1.52, "learning_rate": 4.924226998211482e-05, "loss": 2.6127, "step": 306000 }, { "epoch": 1.52, "learning_rate": 4.924103139568873e-05, "loss": 2.6071, "step": 306500 }, { "epoch": 1.52, "learning_rate": 4.9239795286435496e-05, "loss": 2.6147, "step": 307000 }, { "epoch": 1.52, "learning_rate": 4.923855670000941e-05, "loss": 2.6054, "step": 307500 }, { "epoch": 1.53, "learning_rate": 4.923731811358333e-05, "loss": 2.597, "step": 308000 }, { "epoch": 1.53, "learning_rate": 4.9236082004330105e-05, "loss": 2.599, "step": 308500 }, { "epoch": 1.53, "learning_rate": 4.9234843417904016e-05, "loss": 2.5992, "step": 309000 }, { "epoch": 1.53, "learning_rate": 4.923360483147793e-05, "loss": 2.6026, "step": 309500 }, { "epoch": 1.54, "learning_rate": 4.923236624505185e-05, "loss": 2.5938, "step": 310000 }, { "epoch": 1.54, "learning_rate": 4.9231127658625766e-05, "loss": 2.6023, "step": 310500 }, { "epoch": 1.54, "learning_rate": 4.922988907219968e-05, "loss": 2.586, "step": 311000 }, { "epoch": 1.54, "learning_rate": 4.92286504857736e-05, "loss": 2.6111, "step": 311500 }, { "epoch": 1.55, "learning_rate": 4.922741189934752e-05, "loss": 2.6103, "step": 312000 }, { "epoch": 1.55, "learning_rate": 4.922617579009428e-05, "loss": 2.598, "step": 312500 }, { "epoch": 1.55, "learning_rate": 4.9224937203668196e-05, "loss": 2.5982, "step": 313000 }, { "epoch": 1.55, "learning_rate": 4.922369861724211e-05, "loss": 2.6226, "step": 313500 }, { "epoch": 1.56, "learning_rate": 4.922246250798889e-05, "loss": 2.6292, "step": 314000 }, { "epoch": 1.56, "learning_rate": 4.9221223921562806e-05, "loss": 2.6045, "step": 314500 }, { "epoch": 1.56, "learning_rate": 4.921998533513672e-05, "loss": 2.596, "step": 315000 }, { "epoch": 1.56, "learning_rate": 4.921874674871063e-05, "loss": 2.5956, "step": 315500 }, { "epoch": 1.57, "learning_rate": 4.921750816228455e-05, "loss": 2.5923, "step": 316000 }, { "epoch": 1.57, "learning_rate": 4.9216269575858467e-05, "loss": 2.5815, "step": 316500 }, { "epoch": 1.57, "learning_rate": 4.9215030989432383e-05, "loss": 2.6302, "step": 317000 }, { "epoch": 1.57, "learning_rate": 4.92137924030063e-05, "loss": 2.5967, "step": 317500 }, { "epoch": 1.58, "learning_rate": 4.921255381658022e-05, "loss": 2.611, "step": 318000 }, { "epoch": 1.58, "learning_rate": 4.9211315230154134e-05, "loss": 2.5912, "step": 318500 }, { "epoch": 1.58, "learning_rate": 4.921007664372805e-05, "loss": 2.6341, "step": 319000 }, { "epoch": 1.58, "learning_rate": 4.920883805730197e-05, "loss": 2.5869, "step": 319500 }, { "epoch": 1.59, "learning_rate": 4.920759947087588e-05, "loss": 2.648, "step": 320000 }, { "epoch": 1.59, "learning_rate": 4.9206360884449795e-05, "loss": 2.6136, "step": 320500 }, { "epoch": 1.59, "learning_rate": 4.9205124775196564e-05, "loss": 2.5998, "step": 321000 }, { "epoch": 1.59, "learning_rate": 4.920388618877048e-05, "loss": 2.5955, "step": 321500 }, { "epoch": 1.6, "learning_rate": 4.920265007951725e-05, "loss": 2.5852, "step": 322000 }, { "epoch": 1.6, "learning_rate": 4.920141149309117e-05, "loss": 2.6069, "step": 322500 }, { "epoch": 1.6, "learning_rate": 4.9200172906665084e-05, "loss": 2.5923, "step": 323000 }, { "epoch": 1.6, "learning_rate": 4.9198934320239e-05, "loss": 2.6257, "step": 323500 }, { "epoch": 1.61, "learning_rate": 4.919769573381292e-05, "loss": 2.6118, "step": 324000 }, { "epoch": 1.61, "learning_rate": 4.9196459624559686e-05, "loss": 2.6192, "step": 324500 }, { "epoch": 1.61, "learning_rate": 4.9195221038133596e-05, "loss": 2.6028, "step": 325000 }, { "epoch": 1.61, "learning_rate": 4.919398245170751e-05, "loss": 2.6064, "step": 325500 }, { "epoch": 1.62, "learning_rate": 4.919274386528143e-05, "loss": 2.6158, "step": 326000 }, { "epoch": 1.62, "learning_rate": 4.919150527885535e-05, "loss": 2.5847, "step": 326500 }, { "epoch": 1.62, "learning_rate": 4.9190266692429264e-05, "loss": 2.5866, "step": 327000 }, { "epoch": 1.62, "learning_rate": 4.918902810600318e-05, "loss": 2.6172, "step": 327500 }, { "epoch": 1.63, "learning_rate": 4.91877895195771e-05, "loss": 2.6314, "step": 328000 }, { "epoch": 1.63, "learning_rate": 4.918655341032387e-05, "loss": 2.6196, "step": 328500 }, { "epoch": 1.63, "learning_rate": 4.9185317301070636e-05, "loss": 2.6346, "step": 329000 }, { "epoch": 1.63, "learning_rate": 4.918407871464455e-05, "loss": 2.6118, "step": 329500 }, { "epoch": 1.63, "learning_rate": 4.918284012821847e-05, "loss": 2.5716, "step": 330000 }, { "epoch": 1.64, "learning_rate": 4.9181601541792386e-05, "loss": 2.626, "step": 330500 }, { "epoch": 1.64, "learning_rate": 4.9180362955366297e-05, "loss": 2.6039, "step": 331000 }, { "epoch": 1.64, "learning_rate": 4.9179124368940213e-05, "loss": 2.5957, "step": 331500 }, { "epoch": 1.64, "learning_rate": 4.917788825968699e-05, "loss": 2.6115, "step": 332000 }, { "epoch": 1.65, "learning_rate": 4.9176649673260906e-05, "loss": 2.6013, "step": 332500 }, { "epoch": 1.65, "learning_rate": 4.917541108683482e-05, "loss": 2.6131, "step": 333000 }, { "epoch": 1.65, "learning_rate": 4.917417250040874e-05, "loss": 2.5955, "step": 333500 }, { "epoch": 1.65, "learning_rate": 4.917293391398265e-05, "loss": 2.615, "step": 334000 }, { "epoch": 1.66, "learning_rate": 4.917169780472942e-05, "loss": 2.5883, "step": 334500 }, { "epoch": 1.66, "learning_rate": 4.9170459218303336e-05, "loss": 2.6009, "step": 335000 }, { "epoch": 1.66, "learning_rate": 4.916922063187725e-05, "loss": 2.6009, "step": 335500 }, { "epoch": 1.66, "learning_rate": 4.916798204545117e-05, "loss": 2.6285, "step": 336000 }, { "epoch": 1.67, "learning_rate": 4.916674593619794e-05, "loss": 2.6007, "step": 336500 }, { "epoch": 1.67, "learning_rate": 4.9165507349771855e-05, "loss": 2.5952, "step": 337000 }, { "epoch": 1.67, "learning_rate": 4.916426876334577e-05, "loss": 2.5738, "step": 337500 }, { "epoch": 1.67, "learning_rate": 4.916303017691969e-05, "loss": 2.6101, "step": 338000 }, { "epoch": 1.68, "learning_rate": 4.916179406766646e-05, "loss": 2.5811, "step": 338500 }, { "epoch": 1.68, "learning_rate": 4.9160555481240375e-05, "loss": 2.5833, "step": 339000 }, { "epoch": 1.68, "learning_rate": 4.915931689481429e-05, "loss": 2.6013, "step": 339500 }, { "epoch": 1.68, "learning_rate": 4.915807830838821e-05, "loss": 2.6163, "step": 340000 }, { "epoch": 1.69, "learning_rate": 4.9156839721962126e-05, "loss": 2.5969, "step": 340500 }, { "epoch": 1.69, "learning_rate": 4.9155601135536036e-05, "loss": 2.5872, "step": 341000 }, { "epoch": 1.69, "learning_rate": 4.9154365026282805e-05, "loss": 2.5693, "step": 341500 }, { "epoch": 1.69, "learning_rate": 4.915312643985672e-05, "loss": 2.5994, "step": 342000 }, { "epoch": 1.7, "learning_rate": 4.915188785343064e-05, "loss": 2.6187, "step": 342500 }, { "epoch": 1.7, "learning_rate": 4.9150649267004555e-05, "loss": 2.5982, "step": 343000 }, { "epoch": 1.7, "learning_rate": 4.914941068057847e-05, "loss": 2.5968, "step": 343500 }, { "epoch": 1.7, "learning_rate": 4.914817209415239e-05, "loss": 2.5756, "step": 344000 }, { "epoch": 1.71, "learning_rate": 4.9146933507726306e-05, "loss": 2.5962, "step": 344500 }, { "epoch": 1.71, "learning_rate": 4.914569492130022e-05, "loss": 2.6133, "step": 345000 }, { "epoch": 1.71, "learning_rate": 4.914445881204699e-05, "loss": 2.6052, "step": 345500 }, { "epoch": 1.71, "learning_rate": 4.914322022562091e-05, "loss": 2.6143, "step": 346000 }, { "epoch": 1.72, "learning_rate": 4.9141981639194826e-05, "loss": 2.6077, "step": 346500 }, { "epoch": 1.72, "learning_rate": 4.914074552994159e-05, "loss": 2.6198, "step": 347000 }, { "epoch": 1.72, "learning_rate": 4.9139506943515505e-05, "loss": 2.6435, "step": 347500 }, { "epoch": 1.72, "learning_rate": 4.913826835708942e-05, "loss": 2.6145, "step": 348000 }, { "epoch": 1.73, "learning_rate": 4.913702977066334e-05, "loss": 2.5931, "step": 348500 }, { "epoch": 1.73, "learning_rate": 4.9135791184237256e-05, "loss": 2.6031, "step": 349000 }, { "epoch": 1.73, "learning_rate": 4.913455259781117e-05, "loss": 2.599, "step": 349500 }, { "epoch": 1.73, "learning_rate": 4.913331401138509e-05, "loss": 2.6163, "step": 350000 }, { "epoch": 1.74, "learning_rate": 4.9132075424959006e-05, "loss": 2.6104, "step": 350500 }, { "epoch": 1.74, "learning_rate": 4.913083683853292e-05, "loss": 2.6251, "step": 351000 }, { "epoch": 1.74, "learning_rate": 4.912959825210684e-05, "loss": 2.6044, "step": 351500 }, { "epoch": 1.74, "learning_rate": 4.912835966568076e-05, "loss": 2.6131, "step": 352000 }, { "epoch": 1.75, "learning_rate": 4.9127121079254674e-05, "loss": 2.5796, "step": 352500 }, { "epoch": 1.75, "learning_rate": 4.9125882492828584e-05, "loss": 2.6012, "step": 353000 }, { "epoch": 1.75, "learning_rate": 4.912464638357536e-05, "loss": 2.6166, "step": 353500 }, { "epoch": 1.75, "learning_rate": 4.912340779714928e-05, "loss": 2.6058, "step": 354000 }, { "epoch": 1.76, "learning_rate": 4.912216921072319e-05, "loss": 2.6076, "step": 354500 }, { "epoch": 1.76, "learning_rate": 4.9120930624297104e-05, "loss": 2.5742, "step": 355000 }, { "epoch": 1.76, "learning_rate": 4.911969203787102e-05, "loss": 2.5899, "step": 355500 }, { "epoch": 1.76, "learning_rate": 4.911845592861779e-05, "loss": 2.6106, "step": 356000 }, { "epoch": 1.77, "learning_rate": 4.9117217342191706e-05, "loss": 2.6205, "step": 356500 }, { "epoch": 1.77, "learning_rate": 4.9115978755765623e-05, "loss": 2.6254, "step": 357000 }, { "epoch": 1.77, "learning_rate": 4.911474016933954e-05, "loss": 2.6239, "step": 357500 }, { "epoch": 1.77, "learning_rate": 4.911350158291346e-05, "loss": 2.6001, "step": 358000 }, { "epoch": 1.78, "learning_rate": 4.9112262996487374e-05, "loss": 2.5861, "step": 358500 }, { "epoch": 1.78, "learning_rate": 4.911102688723414e-05, "loss": 2.6104, "step": 359000 }, { "epoch": 1.78, "learning_rate": 4.910978830080806e-05, "loss": 2.5978, "step": 359500 }, { "epoch": 1.78, "learning_rate": 4.910854971438198e-05, "loss": 2.5799, "step": 360000 }, { "epoch": 1.79, "learning_rate": 4.910731360512874e-05, "loss": 2.6136, "step": 360500 }, { "epoch": 1.79, "learning_rate": 4.9106075018702656e-05, "loss": 2.6105, "step": 361000 }, { "epoch": 1.79, "learning_rate": 4.910483643227657e-05, "loss": 2.5947, "step": 361500 }, { "epoch": 1.79, "learning_rate": 4.910359784585049e-05, "loss": 2.5731, "step": 362000 }, { "epoch": 1.8, "learning_rate": 4.910236421377011e-05, "loss": 2.6102, "step": 362500 }, { "epoch": 1.8, "learning_rate": 4.910112562734403e-05, "loss": 2.6048, "step": 363000 }, { "epoch": 1.8, "learning_rate": 4.9099887040917944e-05, "loss": 2.6181, "step": 363500 }, { "epoch": 1.8, "learning_rate": 4.909864845449186e-05, "loss": 2.5967, "step": 364000 }, { "epoch": 1.81, "learning_rate": 4.909740986806577e-05, "loss": 2.6098, "step": 364500 }, { "epoch": 1.81, "learning_rate": 4.909617128163969e-05, "loss": 2.6129, "step": 365000 }, { "epoch": 1.81, "learning_rate": 4.9094932695213605e-05, "loss": 2.5773, "step": 365500 }, { "epoch": 1.81, "learning_rate": 4.9093696585960374e-05, "loss": 2.6096, "step": 366000 }, { "epoch": 1.82, "learning_rate": 4.909245799953429e-05, "loss": 2.6181, "step": 366500 }, { "epoch": 1.82, "learning_rate": 4.909121941310821e-05, "loss": 2.5896, "step": 367000 }, { "epoch": 1.82, "learning_rate": 4.9089980826682125e-05, "loss": 2.5851, "step": 367500 }, { "epoch": 1.82, "learning_rate": 4.90887447174289e-05, "loss": 2.6158, "step": 368000 }, { "epoch": 1.83, "learning_rate": 4.908750613100282e-05, "loss": 2.5965, "step": 368500 }, { "epoch": 1.83, "learning_rate": 4.908626754457673e-05, "loss": 2.6232, "step": 369000 }, { "epoch": 1.83, "learning_rate": 4.9085028958150644e-05, "loss": 2.5897, "step": 369500 }, { "epoch": 1.83, "learning_rate": 4.908379037172456e-05, "loss": 2.577, "step": 370000 }, { "epoch": 1.84, "learning_rate": 4.908255178529848e-05, "loss": 2.6109, "step": 370500 }, { "epoch": 1.84, "learning_rate": 4.9081313198872395e-05, "loss": 2.602, "step": 371000 }, { "epoch": 1.84, "learning_rate": 4.9080074612446305e-05, "loss": 2.5674, "step": 371500 }, { "epoch": 1.84, "learning_rate": 4.907883602602022e-05, "loss": 2.5986, "step": 372000 }, { "epoch": 1.85, "learning_rate": 4.907759743959414e-05, "loss": 2.6051, "step": 372500 }, { "epoch": 1.85, "learning_rate": 4.9076358853168056e-05, "loss": 2.5999, "step": 373000 }, { "epoch": 1.85, "learning_rate": 4.907512026674197e-05, "loss": 2.6057, "step": 373500 }, { "epoch": 1.85, "learning_rate": 4.907388168031589e-05, "loss": 2.5942, "step": 374000 }, { "epoch": 1.86, "learning_rate": 4.907264309388981e-05, "loss": 2.6297, "step": 374500 }, { "epoch": 1.86, "learning_rate": 4.9071404507463724e-05, "loss": 2.5977, "step": 375000 }, { "epoch": 1.86, "learning_rate": 4.907016592103764e-05, "loss": 2.5997, "step": 375500 }, { "epoch": 1.86, "learning_rate": 4.906892733461156e-05, "loss": 2.6261, "step": 376000 }, { "epoch": 1.87, "learning_rate": 4.906769370253118e-05, "loss": 2.6139, "step": 376500 }, { "epoch": 1.87, "learning_rate": 4.9066455116105095e-05, "loss": 2.6229, "step": 377000 }, { "epoch": 1.87, "learning_rate": 4.906521652967901e-05, "loss": 2.6, "step": 377500 }, { "epoch": 1.87, "learning_rate": 4.906397794325292e-05, "loss": 2.5966, "step": 378000 }, { "epoch": 1.88, "learning_rate": 4.906273935682684e-05, "loss": 2.5734, "step": 378500 }, { "epoch": 1.88, "learning_rate": 4.9061500770400756e-05, "loss": 2.6094, "step": 379000 }, { "epoch": 1.88, "learning_rate": 4.906026218397467e-05, "loss": 2.5933, "step": 379500 }, { "epoch": 1.88, "learning_rate": 4.905902359754859e-05, "loss": 2.6042, "step": 380000 }, { "epoch": 1.89, "learning_rate": 4.905778501112251e-05, "loss": 2.6221, "step": 380500 }, { "epoch": 1.89, "learning_rate": 4.9056548901869276e-05, "loss": 2.5897, "step": 381000 }, { "epoch": 1.89, "learning_rate": 4.905531279261605e-05, "loss": 2.5985, "step": 381500 }, { "epoch": 1.89, "learning_rate": 4.9054076683362813e-05, "loss": 2.6131, "step": 382000 }, { "epoch": 1.9, "learning_rate": 4.905283809693673e-05, "loss": 2.6016, "step": 382500 }, { "epoch": 1.9, "learning_rate": 4.905159951051065e-05, "loss": 2.6079, "step": 383000 }, { "epoch": 1.9, "learning_rate": 4.9050360924084564e-05, "loss": 2.6093, "step": 383500 }, { "epoch": 1.9, "learning_rate": 4.904912233765848e-05, "loss": 2.6203, "step": 384000 }, { "epoch": 1.9, "learning_rate": 4.90478837512324e-05, "loss": 2.6072, "step": 384500 }, { "epoch": 1.91, "learning_rate": 4.904664516480631e-05, "loss": 2.5929, "step": 385000 }, { "epoch": 1.91, "learning_rate": 4.9045406578380225e-05, "loss": 2.6029, "step": 385500 }, { "epoch": 1.91, "learning_rate": 4.9044170469127e-05, "loss": 2.6103, "step": 386000 }, { "epoch": 1.91, "learning_rate": 4.904293188270092e-05, "loss": 2.5676, "step": 386500 }, { "epoch": 1.92, "learning_rate": 4.9041693296274835e-05, "loss": 2.6016, "step": 387000 }, { "epoch": 1.92, "learning_rate": 4.904045470984875e-05, "loss": 2.5875, "step": 387500 }, { "epoch": 1.92, "learning_rate": 4.903921612342266e-05, "loss": 2.6147, "step": 388000 }, { "epoch": 1.92, "learning_rate": 4.903797753699658e-05, "loss": 2.573, "step": 388500 }, { "epoch": 1.93, "learning_rate": 4.903674142774335e-05, "loss": 2.6102, "step": 389000 }, { "epoch": 1.93, "learning_rate": 4.9035502841317264e-05, "loss": 2.6018, "step": 389500 }, { "epoch": 1.93, "learning_rate": 4.903426425489118e-05, "loss": 2.5944, "step": 390000 }, { "epoch": 1.93, "learning_rate": 4.90330256684651e-05, "loss": 2.6172, "step": 390500 }, { "epoch": 1.94, "learning_rate": 4.903178708203901e-05, "loss": 2.5899, "step": 391000 }, { "epoch": 1.94, "learning_rate": 4.9030548495612925e-05, "loss": 2.5876, "step": 391500 }, { "epoch": 1.94, "learning_rate": 4.902930990918684e-05, "loss": 2.5859, "step": 392000 }, { "epoch": 1.94, "learning_rate": 4.902807132276076e-05, "loss": 2.586, "step": 392500 }, { "epoch": 1.95, "learning_rate": 4.9026832736334676e-05, "loss": 2.6084, "step": 393000 }, { "epoch": 1.95, "learning_rate": 4.902559662708145e-05, "loss": 2.6038, "step": 393500 }, { "epoch": 1.95, "learning_rate": 4.9024360517828214e-05, "loss": 2.5836, "step": 394000 }, { "epoch": 1.95, "learning_rate": 4.902312193140213e-05, "loss": 2.5929, "step": 394500 }, { "epoch": 1.96, "learning_rate": 4.90218858221489e-05, "loss": 2.5957, "step": 395000 }, { "epoch": 1.96, "learning_rate": 4.9020647235722816e-05, "loss": 2.582, "step": 395500 }, { "epoch": 1.96, "learning_rate": 4.901940864929673e-05, "loss": 2.6287, "step": 396000 }, { "epoch": 1.96, "learning_rate": 4.901817006287065e-05, "loss": 2.6059, "step": 396500 }, { "epoch": 1.97, "learning_rate": 4.901693147644457e-05, "loss": 2.5855, "step": 397000 }, { "epoch": 1.97, "learning_rate": 4.9015692890018484e-05, "loss": 2.5951, "step": 397500 }, { "epoch": 1.97, "learning_rate": 4.90144543035924e-05, "loss": 2.5817, "step": 398000 }, { "epoch": 1.97, "learning_rate": 4.901321571716632e-05, "loss": 2.6157, "step": 398500 }, { "epoch": 1.98, "learning_rate": 4.9011977130740235e-05, "loss": 2.596, "step": 399000 }, { "epoch": 1.98, "learning_rate": 4.901073854431415e-05, "loss": 2.5786, "step": 399500 }, { "epoch": 1.98, "learning_rate": 4.900949995788807e-05, "loss": 2.5946, "step": 400000 }, { "epoch": 1.98, "learning_rate": 4.900826384863483e-05, "loss": 2.6155, "step": 400500 }, { "epoch": 1.99, "learning_rate": 4.900702526220875e-05, "loss": 2.5889, "step": 401000 }, { "epoch": 1.99, "learning_rate": 4.9005786675782665e-05, "loss": 2.5796, "step": 401500 }, { "epoch": 1.99, "learning_rate": 4.900454808935658e-05, "loss": 2.5993, "step": 402000 }, { "epoch": 1.99, "learning_rate": 4.900331198010335e-05, "loss": 2.5894, "step": 402500 }, { "epoch": 2.0, "learning_rate": 4.900207339367727e-05, "loss": 2.6215, "step": 403000 }, { "epoch": 2.0, "learning_rate": 4.9000834807251184e-05, "loss": 2.5994, "step": 403500 }, { "epoch": 2.0, "eval_accuracy": 0.6286475772528078, "eval_accuracy_mlm": 0.5813388926339267, "eval_accuracy_nsp": 0.8516075133648939, "eval_loss": 2.524798631668091, "eval_runtime": 146.063, "eval_samples_per_second": 1745.542, "eval_steps_per_second": 72.736, "step": 403686 }, { "epoch": 2.0, "learning_rate": 4.89995962208251e-05, "loss": 2.5887, "step": 404000 }, { "epoch": 2.0, "learning_rate": 4.899835763439902e-05, "loss": 2.5834, "step": 404500 }, { "epoch": 2.01, "learning_rate": 4.8997119047972935e-05, "loss": 2.5753, "step": 405000 }, { "epoch": 2.01, "learning_rate": 4.8995882938719704e-05, "loss": 2.5542, "step": 405500 }, { "epoch": 2.01, "learning_rate": 4.8994644352293614e-05, "loss": 2.5592, "step": 406000 }, { "epoch": 2.01, "learning_rate": 4.899340576586753e-05, "loss": 2.5818, "step": 406500 }, { "epoch": 2.02, "learning_rate": 4.899216717944145e-05, "loss": 2.5571, "step": 407000 }, { "epoch": 2.02, "learning_rate": 4.8990928593015365e-05, "loss": 2.5746, "step": 407500 }, { "epoch": 2.02, "learning_rate": 4.8989692483762133e-05, "loss": 2.5663, "step": 408000 }, { "epoch": 2.02, "learning_rate": 4.898845389733605e-05, "loss": 2.5716, "step": 408500 }, { "epoch": 2.03, "learning_rate": 4.898721778808282e-05, "loss": 2.5595, "step": 409000 }, { "epoch": 2.03, "learning_rate": 4.8985979201656736e-05, "loss": 2.5811, "step": 409500 }, { "epoch": 2.03, "learning_rate": 4.898474061523065e-05, "loss": 2.5488, "step": 410000 }, { "epoch": 2.03, "learning_rate": 4.898350202880457e-05, "loss": 2.5651, "step": 410500 }, { "epoch": 2.04, "learning_rate": 4.898226344237849e-05, "loss": 2.5839, "step": 411000 }, { "epoch": 2.04, "learning_rate": 4.8981024855952404e-05, "loss": 2.577, "step": 411500 }, { "epoch": 2.04, "learning_rate": 4.897978626952632e-05, "loss": 2.5854, "step": 412000 }, { "epoch": 2.04, "learning_rate": 4.897855016027308e-05, "loss": 2.5607, "step": 412500 }, { "epoch": 2.05, "learning_rate": 4.8977311573847e-05, "loss": 2.5672, "step": 413000 }, { "epoch": 2.05, "learning_rate": 4.897607298742092e-05, "loss": 2.5866, "step": 413500 }, { "epoch": 2.05, "learning_rate": 4.8974834400994834e-05, "loss": 2.6007, "step": 414000 }, { "epoch": 2.05, "learning_rate": 4.897359581456875e-05, "loss": 2.5648, "step": 414500 }, { "epoch": 2.06, "learning_rate": 4.897235722814267e-05, "loss": 2.5832, "step": 415000 }, { "epoch": 2.06, "learning_rate": 4.8971118641716584e-05, "loss": 2.5968, "step": 415500 }, { "epoch": 2.06, "learning_rate": 4.896988253246335e-05, "loss": 2.5548, "step": 416000 }, { "epoch": 2.06, "learning_rate": 4.896864394603727e-05, "loss": 2.5688, "step": 416500 }, { "epoch": 2.07, "learning_rate": 4.896740535961119e-05, "loss": 2.5836, "step": 417000 }, { "epoch": 2.07, "learning_rate": 4.8966166773185104e-05, "loss": 2.571, "step": 417500 }, { "epoch": 2.07, "learning_rate": 4.896492818675902e-05, "loss": 2.5721, "step": 418000 }, { "epoch": 2.07, "learning_rate": 4.896368960033294e-05, "loss": 2.561, "step": 418500 }, { "epoch": 2.08, "learning_rate": 4.8962451013906855e-05, "loss": 2.5737, "step": 419000 }, { "epoch": 2.08, "learning_rate": 4.8961212427480765e-05, "loss": 2.559, "step": 419500 }, { "epoch": 2.08, "learning_rate": 4.895997384105468e-05, "loss": 2.5928, "step": 420000 }, { "epoch": 2.08, "learning_rate": 4.89587352546286e-05, "loss": 2.5754, "step": 420500 }, { "epoch": 2.09, "learning_rate": 4.895749914537537e-05, "loss": 2.5631, "step": 421000 }, { "epoch": 2.09, "learning_rate": 4.8956260558949285e-05, "loss": 2.5715, "step": 421500 }, { "epoch": 2.09, "learning_rate": 4.89550219725232e-05, "loss": 2.5942, "step": 422000 }, { "epoch": 2.09, "learning_rate": 4.895378338609712e-05, "loss": 2.5579, "step": 422500 }, { "epoch": 2.1, "learning_rate": 4.8952544799671035e-05, "loss": 2.5612, "step": 423000 }, { "epoch": 2.1, "learning_rate": 4.895130621324495e-05, "loss": 2.5945, "step": 423500 }, { "epoch": 2.1, "learning_rate": 4.895007010399172e-05, "loss": 2.547, "step": 424000 }, { "epoch": 2.1, "learning_rate": 4.894883151756564e-05, "loss": 2.5908, "step": 424500 }, { "epoch": 2.11, "learning_rate": 4.8947592931139555e-05, "loss": 2.5855, "step": 425000 }, { "epoch": 2.11, "learning_rate": 4.894635434471347e-05, "loss": 2.5796, "step": 425500 }, { "epoch": 2.11, "learning_rate": 4.894511575828739e-05, "loss": 2.5654, "step": 426000 }, { "epoch": 2.11, "learning_rate": 4.89438771718613e-05, "loss": 2.5478, "step": 426500 }, { "epoch": 2.12, "learning_rate": 4.8942638585435216e-05, "loss": 2.5889, "step": 427000 }, { "epoch": 2.12, "learning_rate": 4.894139999900913e-05, "loss": 2.5634, "step": 427500 }, { "epoch": 2.12, "learning_rate": 4.894016141258305e-05, "loss": 2.5461, "step": 428000 }, { "epoch": 2.12, "learning_rate": 4.893892530332982e-05, "loss": 2.5366, "step": 428500 }, { "epoch": 2.13, "learning_rate": 4.893768919407659e-05, "loss": 2.5651, "step": 429000 }, { "epoch": 2.13, "learning_rate": 4.8936450607650504e-05, "loss": 2.5911, "step": 429500 }, { "epoch": 2.13, "learning_rate": 4.893521202122442e-05, "loss": 2.5801, "step": 430000 }, { "epoch": 2.13, "learning_rate": 4.893397591197118e-05, "loss": 2.5771, "step": 430500 }, { "epoch": 2.14, "learning_rate": 4.89327373255451e-05, "loss": 2.5864, "step": 431000 }, { "epoch": 2.14, "learning_rate": 4.8931501216291876e-05, "loss": 2.5512, "step": 431500 }, { "epoch": 2.14, "learning_rate": 4.893026262986579e-05, "loss": 2.5651, "step": 432000 }, { "epoch": 2.14, "learning_rate": 4.89290240434397e-05, "loss": 2.5588, "step": 432500 }, { "epoch": 2.15, "learning_rate": 4.892778545701362e-05, "loss": 2.5744, "step": 433000 }, { "epoch": 2.15, "learning_rate": 4.892654687058754e-05, "loss": 2.5913, "step": 433500 }, { "epoch": 2.15, "learning_rate": 4.8925308284161454e-05, "loss": 2.5761, "step": 434000 }, { "epoch": 2.15, "learning_rate": 4.892406969773537e-05, "loss": 2.5819, "step": 434500 }, { "epoch": 2.16, "learning_rate": 4.892283111130929e-05, "loss": 2.5598, "step": 435000 }, { "epoch": 2.16, "learning_rate": 4.8921592524883204e-05, "loss": 2.574, "step": 435500 }, { "epoch": 2.16, "learning_rate": 4.892035393845712e-05, "loss": 2.5842, "step": 436000 }, { "epoch": 2.16, "learning_rate": 4.891911535203104e-05, "loss": 2.5627, "step": 436500 }, { "epoch": 2.17, "learning_rate": 4.8917876765604955e-05, "loss": 2.5587, "step": 437000 }, { "epoch": 2.17, "learning_rate": 4.891663817917887e-05, "loss": 2.5872, "step": 437500 }, { "epoch": 2.17, "learning_rate": 4.891539959275279e-05, "loss": 2.5496, "step": 438000 }, { "epoch": 2.17, "learning_rate": 4.891416348349955e-05, "loss": 2.5884, "step": 438500 }, { "epoch": 2.17, "learning_rate": 4.891292489707347e-05, "loss": 2.5475, "step": 439000 }, { "epoch": 2.18, "learning_rate": 4.8911686310647385e-05, "loss": 2.5432, "step": 439500 }, { "epoch": 2.18, "learning_rate": 4.8910450201394154e-05, "loss": 2.5509, "step": 440000 }, { "epoch": 2.18, "learning_rate": 4.890921161496807e-05, "loss": 2.5689, "step": 440500 }, { "epoch": 2.18, "learning_rate": 4.890797302854199e-05, "loss": 2.5687, "step": 441000 }, { "epoch": 2.19, "learning_rate": 4.8906734442115904e-05, "loss": 2.5651, "step": 441500 }, { "epoch": 2.19, "learning_rate": 4.890549585568982e-05, "loss": 2.5761, "step": 442000 }, { "epoch": 2.19, "learning_rate": 4.8904267177955146e-05, "loss": 2.5642, "step": 442500 }, { "epoch": 2.19, "learning_rate": 4.890302859152906e-05, "loss": 2.5557, "step": 443000 }, { "epoch": 2.2, "learning_rate": 4.890179000510298e-05, "loss": 2.575, "step": 443500 }, { "epoch": 2.2, "learning_rate": 4.890055141867689e-05, "loss": 2.5907, "step": 444000 }, { "epoch": 2.2, "learning_rate": 4.889931283225081e-05, "loss": 2.5731, "step": 444500 }, { "epoch": 2.2, "learning_rate": 4.8898074245824724e-05, "loss": 2.5548, "step": 445000 }, { "epoch": 2.21, "learning_rate": 4.889683565939864e-05, "loss": 2.5467, "step": 445500 }, { "epoch": 2.21, "learning_rate": 4.889559707297256e-05, "loss": 2.592, "step": 446000 }, { "epoch": 2.21, "learning_rate": 4.8894358486546475e-05, "loss": 2.5996, "step": 446500 }, { "epoch": 2.21, "learning_rate": 4.889311990012039e-05, "loss": 2.5768, "step": 447000 }, { "epoch": 2.22, "learning_rate": 4.889188131369431e-05, "loss": 2.5667, "step": 447500 }, { "epoch": 2.22, "learning_rate": 4.8890642727268225e-05, "loss": 2.5699, "step": 448000 }, { "epoch": 2.22, "learning_rate": 4.888940414084214e-05, "loss": 2.5794, "step": 448500 }, { "epoch": 2.22, "learning_rate": 4.888816555441606e-05, "loss": 2.5778, "step": 449000 }, { "epoch": 2.23, "learning_rate": 4.8886926967989976e-05, "loss": 2.5842, "step": 449500 }, { "epoch": 2.23, "learning_rate": 4.888568838156389e-05, "loss": 2.5859, "step": 450000 }, { "epoch": 2.23, "learning_rate": 4.888444979513781e-05, "loss": 2.5658, "step": 450500 }, { "epoch": 2.23, "learning_rate": 4.888321120871172e-05, "loss": 2.5744, "step": 451000 }, { "epoch": 2.24, "learning_rate": 4.888197262228564e-05, "loss": 2.5542, "step": 451500 }, { "epoch": 2.24, "learning_rate": 4.8880734035859554e-05, "loss": 2.5659, "step": 452000 }, { "epoch": 2.24, "learning_rate": 4.887949544943347e-05, "loss": 2.5667, "step": 452500 }, { "epoch": 2.24, "learning_rate": 4.887825686300739e-05, "loss": 2.5916, "step": 453000 }, { "epoch": 2.25, "learning_rate": 4.887702075375416e-05, "loss": 2.5724, "step": 453500 }, { "epoch": 2.25, "learning_rate": 4.8875782167328074e-05, "loss": 2.5604, "step": 454000 }, { "epoch": 2.25, "learning_rate": 4.887454358090199e-05, "loss": 2.556, "step": 454500 }, { "epoch": 2.25, "learning_rate": 4.887330499447591e-05, "loss": 2.569, "step": 455000 }, { "epoch": 2.26, "learning_rate": 4.8872066408049824e-05, "loss": 2.5667, "step": 455500 }, { "epoch": 2.26, "learning_rate": 4.887082782162374e-05, "loss": 2.5849, "step": 456000 }, { "epoch": 2.26, "learning_rate": 4.886958923519765e-05, "loss": 2.5702, "step": 456500 }, { "epoch": 2.26, "learning_rate": 4.886835064877157e-05, "loss": 2.5687, "step": 457000 }, { "epoch": 2.27, "learning_rate": 4.886711453951834e-05, "loss": 2.5787, "step": 457500 }, { "epoch": 2.27, "learning_rate": 4.8865875953092254e-05, "loss": 2.5678, "step": 458000 }, { "epoch": 2.27, "learning_rate": 4.886463736666617e-05, "loss": 2.5795, "step": 458500 }, { "epoch": 2.27, "learning_rate": 4.8863401257412947e-05, "loss": 2.5772, "step": 459000 }, { "epoch": 2.28, "learning_rate": 4.8862162670986864e-05, "loss": 2.5546, "step": 459500 }, { "epoch": 2.28, "learning_rate": 4.886092408456078e-05, "loss": 2.5824, "step": 460000 }, { "epoch": 2.28, "learning_rate": 4.885968549813469e-05, "loss": 2.5854, "step": 460500 }, { "epoch": 2.28, "learning_rate": 4.885844691170861e-05, "loss": 2.5993, "step": 461000 }, { "epoch": 2.29, "learning_rate": 4.8857208325282524e-05, "loss": 2.5536, "step": 461500 }, { "epoch": 2.29, "learning_rate": 4.885596973885644e-05, "loss": 2.5607, "step": 462000 }, { "epoch": 2.29, "learning_rate": 4.885473362960321e-05, "loss": 2.5777, "step": 462500 }, { "epoch": 2.29, "learning_rate": 4.885349504317713e-05, "loss": 2.5489, "step": 463000 }, { "epoch": 2.3, "learning_rate": 4.885225645675104e-05, "loss": 2.5935, "step": 463500 }, { "epoch": 2.3, "learning_rate": 4.8851017870324954e-05, "loss": 2.5599, "step": 464000 }, { "epoch": 2.3, "learning_rate": 4.884978176107173e-05, "loss": 2.5644, "step": 464500 }, { "epoch": 2.3, "learning_rate": 4.884854317464565e-05, "loss": 2.5661, "step": 465000 }, { "epoch": 2.31, "learning_rate": 4.8847304588219564e-05, "loss": 2.5732, "step": 465500 }, { "epoch": 2.31, "learning_rate": 4.884606600179348e-05, "loss": 2.5702, "step": 466000 }, { "epoch": 2.31, "learning_rate": 4.88448274153674e-05, "loss": 2.5586, "step": 466500 }, { "epoch": 2.31, "learning_rate": 4.884359130611416e-05, "loss": 2.574, "step": 467000 }, { "epoch": 2.32, "learning_rate": 4.8842352719688076e-05, "loss": 2.5618, "step": 467500 }, { "epoch": 2.32, "learning_rate": 4.884111413326199e-05, "loss": 2.5526, "step": 468000 }, { "epoch": 2.32, "learning_rate": 4.883987554683591e-05, "loss": 2.5749, "step": 468500 }, { "epoch": 2.32, "learning_rate": 4.883863696040983e-05, "loss": 2.5694, "step": 469000 }, { "epoch": 2.33, "learning_rate": 4.8837398373983744e-05, "loss": 2.5888, "step": 469500 }, { "epoch": 2.33, "learning_rate": 4.8836159787557654e-05, "loss": 2.5595, "step": 470000 }, { "epoch": 2.33, "learning_rate": 4.883492120113157e-05, "loss": 2.5967, "step": 470500 }, { "epoch": 2.33, "learning_rate": 4.883368261470549e-05, "loss": 2.5484, "step": 471000 }, { "epoch": 2.34, "learning_rate": 4.8832444028279405e-05, "loss": 2.5715, "step": 471500 }, { "epoch": 2.34, "learning_rate": 4.883120544185332e-05, "loss": 2.5756, "step": 472000 }, { "epoch": 2.34, "learning_rate": 4.882996685542724e-05, "loss": 2.5513, "step": 472500 }, { "epoch": 2.34, "learning_rate": 4.8828728269001156e-05, "loss": 2.5952, "step": 473000 }, { "epoch": 2.35, "learning_rate": 4.8827492159747925e-05, "loss": 2.5728, "step": 473500 }, { "epoch": 2.35, "learning_rate": 4.882625357332184e-05, "loss": 2.577, "step": 474000 }, { "epoch": 2.35, "learning_rate": 4.882501498689576e-05, "loss": 2.6014, "step": 474500 }, { "epoch": 2.35, "learning_rate": 4.882377887764253e-05, "loss": 2.5732, "step": 475000 }, { "epoch": 2.36, "learning_rate": 4.8822540291216444e-05, "loss": 2.559, "step": 475500 }, { "epoch": 2.36, "learning_rate": 4.8821301704790354e-05, "loss": 2.5574, "step": 476000 }, { "epoch": 2.36, "learning_rate": 4.882006311836427e-05, "loss": 2.5518, "step": 476500 }, { "epoch": 2.36, "learning_rate": 4.881882453193819e-05, "loss": 2.5777, "step": 477000 }, { "epoch": 2.37, "learning_rate": 4.8817585945512105e-05, "loss": 2.5795, "step": 477500 }, { "epoch": 2.37, "learning_rate": 4.881634735908602e-05, "loss": 2.5723, "step": 478000 }, { "epoch": 2.37, "learning_rate": 4.881510877265994e-05, "loss": 2.5972, "step": 478500 }, { "epoch": 2.37, "learning_rate": 4.8813872663406715e-05, "loss": 2.5405, "step": 479000 }, { "epoch": 2.38, "learning_rate": 4.8812634076980625e-05, "loss": 2.6003, "step": 479500 }, { "epoch": 2.38, "learning_rate": 4.881139549055454e-05, "loss": 2.5426, "step": 480000 }, { "epoch": 2.38, "learning_rate": 4.881015690412846e-05, "loss": 2.587, "step": 480500 }, { "epoch": 2.38, "learning_rate": 4.8808918317702376e-05, "loss": 2.5803, "step": 481000 }, { "epoch": 2.39, "learning_rate": 4.880767973127629e-05, "loss": 2.5914, "step": 481500 }, { "epoch": 2.39, "learning_rate": 4.880644114485021e-05, "loss": 2.5622, "step": 482000 }, { "epoch": 2.39, "learning_rate": 4.8805202558424126e-05, "loss": 2.5813, "step": 482500 }, { "epoch": 2.39, "learning_rate": 4.880396397199804e-05, "loss": 2.5929, "step": 483000 }, { "epoch": 2.4, "learning_rate": 4.8802727862744805e-05, "loss": 2.5919, "step": 483500 }, { "epoch": 2.4, "learning_rate": 4.880148927631872e-05, "loss": 2.5565, "step": 484000 }, { "epoch": 2.4, "learning_rate": 4.88002531670655e-05, "loss": 2.5604, "step": 484500 }, { "epoch": 2.4, "learning_rate": 4.8799014580639415e-05, "loss": 2.5575, "step": 485000 }, { "epoch": 2.41, "learning_rate": 4.8797775994213325e-05, "loss": 2.5634, "step": 485500 }, { "epoch": 2.41, "learning_rate": 4.879653740778724e-05, "loss": 2.5762, "step": 486000 }, { "epoch": 2.41, "learning_rate": 4.879529882136116e-05, "loss": 2.5648, "step": 486500 }, { "epoch": 2.41, "learning_rate": 4.8794060234935076e-05, "loss": 2.5592, "step": 487000 }, { "epoch": 2.42, "learning_rate": 4.879282164850899e-05, "loss": 2.563, "step": 487500 }, { "epoch": 2.42, "learning_rate": 4.879158553925576e-05, "loss": 2.6136, "step": 488000 }, { "epoch": 2.42, "learning_rate": 4.879034695282967e-05, "loss": 2.5676, "step": 488500 }, { "epoch": 2.42, "learning_rate": 4.878910836640359e-05, "loss": 2.5787, "step": 489000 }, { "epoch": 2.43, "learning_rate": 4.8787872257150364e-05, "loss": 2.5805, "step": 489500 }, { "epoch": 2.43, "learning_rate": 4.878663367072428e-05, "loss": 2.5883, "step": 490000 }, { "epoch": 2.43, "learning_rate": 4.87853950842982e-05, "loss": 2.5649, "step": 490500 }, { "epoch": 2.43, "learning_rate": 4.878415897504497e-05, "loss": 2.5689, "step": 491000 }, { "epoch": 2.44, "learning_rate": 4.878292038861888e-05, "loss": 2.5621, "step": 491500 }, { "epoch": 2.44, "learning_rate": 4.8781681802192794e-05, "loss": 2.586, "step": 492000 }, { "epoch": 2.44, "learning_rate": 4.878044321576671e-05, "loss": 2.5845, "step": 492500 }, { "epoch": 2.44, "learning_rate": 4.877920462934063e-05, "loss": 2.5749, "step": 493000 }, { "epoch": 2.44, "learning_rate": 4.8777966042914545e-05, "loss": 2.5925, "step": 493500 }, { "epoch": 2.45, "learning_rate": 4.877672745648846e-05, "loss": 2.5829, "step": 494000 }, { "epoch": 2.45, "learning_rate": 4.877549134723523e-05, "loss": 2.5584, "step": 494500 }, { "epoch": 2.45, "learning_rate": 4.877425276080915e-05, "loss": 2.562, "step": 495000 }, { "epoch": 2.45, "learning_rate": 4.8773014174383064e-05, "loss": 2.5678, "step": 495500 }, { "epoch": 2.46, "learning_rate": 4.877177558795698e-05, "loss": 2.5516, "step": 496000 }, { "epoch": 2.46, "learning_rate": 4.87705370015309e-05, "loss": 2.567, "step": 496500 }, { "epoch": 2.46, "learning_rate": 4.8769298415104815e-05, "loss": 2.5885, "step": 497000 }, { "epoch": 2.46, "learning_rate": 4.876805982867873e-05, "loss": 2.5813, "step": 497500 }, { "epoch": 2.47, "learning_rate": 4.876682124225264e-05, "loss": 2.5801, "step": 498000 }, { "epoch": 2.47, "learning_rate": 4.876558265582656e-05, "loss": 2.567, "step": 498500 }, { "epoch": 2.47, "learning_rate": 4.8764344069400476e-05, "loss": 2.5659, "step": 499000 }, { "epoch": 2.47, "learning_rate": 4.876310548297439e-05, "loss": 2.572, "step": 499500 }, { "epoch": 2.48, "learning_rate": 4.876186689654831e-05, "loss": 2.5771, "step": 500000 }, { "epoch": 2.48, "learning_rate": 4.876062831012223e-05, "loss": 2.5942, "step": 500500 }, { "epoch": 2.48, "learning_rate": 4.8759389723696144e-05, "loss": 2.58, "step": 501000 }, { "epoch": 2.48, "learning_rate": 4.875815113727006e-05, "loss": 2.5474, "step": 501500 }, { "epoch": 2.49, "learning_rate": 4.875691502801682e-05, "loss": 2.5899, "step": 502000 }, { "epoch": 2.49, "learning_rate": 4.87556789187636e-05, "loss": 2.581, "step": 502500 }, { "epoch": 2.49, "learning_rate": 4.8754440332337515e-05, "loss": 2.5766, "step": 503000 }, { "epoch": 2.49, "learning_rate": 4.875320174591143e-05, "loss": 2.5461, "step": 503500 }, { "epoch": 2.5, "learning_rate": 4.875196315948535e-05, "loss": 2.5437, "step": 504000 }, { "epoch": 2.5, "learning_rate": 4.875072457305926e-05, "loss": 2.566, "step": 504500 }, { "epoch": 2.5, "learning_rate": 4.8749485986633176e-05, "loss": 2.5592, "step": 505000 }, { "epoch": 2.5, "learning_rate": 4.8748249877379945e-05, "loss": 2.5718, "step": 505500 }, { "epoch": 2.51, "learning_rate": 4.874701129095386e-05, "loss": 2.554, "step": 506000 }, { "epoch": 2.51, "learning_rate": 4.874577270452778e-05, "loss": 2.5343, "step": 506500 }, { "epoch": 2.51, "learning_rate": 4.8744534118101696e-05, "loss": 2.5618, "step": 507000 }, { "epoch": 2.51, "learning_rate": 4.8743295531675606e-05, "loss": 2.5622, "step": 507500 }, { "epoch": 2.52, "learning_rate": 4.874205694524952e-05, "loss": 2.5814, "step": 508000 }, { "epoch": 2.52, "learning_rate": 4.874081835882344e-05, "loss": 2.5951, "step": 508500 }, { "epoch": 2.52, "learning_rate": 4.8739582249570215e-05, "loss": 2.5568, "step": 509000 }, { "epoch": 2.52, "learning_rate": 4.873834366314413e-05, "loss": 2.5576, "step": 509500 }, { "epoch": 2.53, "learning_rate": 4.873710507671805e-05, "loss": 2.5658, "step": 510000 }, { "epoch": 2.53, "learning_rate": 4.873586649029196e-05, "loss": 2.5628, "step": 510500 }, { "epoch": 2.53, "learning_rate": 4.8734630381038735e-05, "loss": 2.5771, "step": 511000 }, { "epoch": 2.53, "learning_rate": 4.8733391794612645e-05, "loss": 2.5623, "step": 511500 }, { "epoch": 2.54, "learning_rate": 4.873215320818656e-05, "loss": 2.552, "step": 512000 }, { "epoch": 2.54, "learning_rate": 4.873091462176048e-05, "loss": 2.5771, "step": 512500 }, { "epoch": 2.54, "learning_rate": 4.8729676035334396e-05, "loss": 2.5683, "step": 513000 }, { "epoch": 2.54, "learning_rate": 4.872843744890831e-05, "loss": 2.5772, "step": 513500 }, { "epoch": 2.55, "learning_rate": 4.872719886248222e-05, "loss": 2.5589, "step": 514000 }, { "epoch": 2.55, "learning_rate": 4.872596027605614e-05, "loss": 2.5793, "step": 514500 }, { "epoch": 2.55, "learning_rate": 4.872472168963006e-05, "loss": 2.5705, "step": 515000 }, { "epoch": 2.55, "learning_rate": 4.8723483103203974e-05, "loss": 2.5768, "step": 515500 }, { "epoch": 2.56, "learning_rate": 4.872224451677789e-05, "loss": 2.5792, "step": 516000 }, { "epoch": 2.56, "learning_rate": 4.872100593035181e-05, "loss": 2.538, "step": 516500 }, { "epoch": 2.56, "learning_rate": 4.8719767343925724e-05, "loss": 2.5786, "step": 517000 }, { "epoch": 2.56, "learning_rate": 4.871852875749964e-05, "loss": 2.5611, "step": 517500 }, { "epoch": 2.57, "learning_rate": 4.871729017107356e-05, "loss": 2.5601, "step": 518000 }, { "epoch": 2.57, "learning_rate": 4.8716051584647475e-05, "loss": 2.5772, "step": 518500 }, { "epoch": 2.57, "learning_rate": 4.8714817952567096e-05, "loss": 2.5865, "step": 519000 }, { "epoch": 2.57, "learning_rate": 4.871357936614101e-05, "loss": 2.5592, "step": 519500 }, { "epoch": 2.58, "learning_rate": 4.871234077971493e-05, "loss": 2.5937, "step": 520000 }, { "epoch": 2.58, "learning_rate": 4.871110219328885e-05, "loss": 2.5601, "step": 520500 }, { "epoch": 2.58, "learning_rate": 4.870986360686276e-05, "loss": 2.5464, "step": 521000 }, { "epoch": 2.58, "learning_rate": 4.8708625020436674e-05, "loss": 2.5565, "step": 521500 }, { "epoch": 2.59, "learning_rate": 4.870738643401059e-05, "loss": 2.5702, "step": 522000 }, { "epoch": 2.59, "learning_rate": 4.870614784758451e-05, "loss": 2.5839, "step": 522500 }, { "epoch": 2.59, "learning_rate": 4.8704911738331276e-05, "loss": 2.5712, "step": 523000 }, { "epoch": 2.59, "learning_rate": 4.8703673151905193e-05, "loss": 2.5777, "step": 523500 }, { "epoch": 2.6, "learning_rate": 4.870243704265197e-05, "loss": 2.56, "step": 524000 }, { "epoch": 2.6, "learning_rate": 4.8701198456225886e-05, "loss": 2.5688, "step": 524500 }, { "epoch": 2.6, "learning_rate": 4.8699959869799796e-05, "loss": 2.5656, "step": 525000 }, { "epoch": 2.6, "learning_rate": 4.8698723760546565e-05, "loss": 2.5649, "step": 525500 }, { "epoch": 2.61, "learning_rate": 4.869748517412048e-05, "loss": 2.5436, "step": 526000 }, { "epoch": 2.61, "learning_rate": 4.86962465876944e-05, "loss": 2.5476, "step": 526500 }, { "epoch": 2.61, "learning_rate": 4.8695008001268316e-05, "loss": 2.5696, "step": 527000 }, { "epoch": 2.61, "learning_rate": 4.869376941484223e-05, "loss": 2.5495, "step": 527500 }, { "epoch": 2.62, "learning_rate": 4.869253082841615e-05, "loss": 2.5808, "step": 528000 }, { "epoch": 2.62, "learning_rate": 4.8691292241990066e-05, "loss": 2.5641, "step": 528500 }, { "epoch": 2.62, "learning_rate": 4.869005365556398e-05, "loss": 2.5714, "step": 529000 }, { "epoch": 2.62, "learning_rate": 4.8688815069137894e-05, "loss": 2.5913, "step": 529500 }, { "epoch": 2.63, "learning_rate": 4.868757648271181e-05, "loss": 2.5783, "step": 530000 }, { "epoch": 2.63, "learning_rate": 4.868633789628573e-05, "loss": 2.57, "step": 530500 }, { "epoch": 2.63, "learning_rate": 4.86851017870325e-05, "loss": 2.5547, "step": 531000 }, { "epoch": 2.63, "learning_rate": 4.868386320060642e-05, "loss": 2.5468, "step": 531500 }, { "epoch": 2.64, "learning_rate": 4.868262461418033e-05, "loss": 2.5864, "step": 532000 }, { "epoch": 2.64, "learning_rate": 4.868138602775425e-05, "loss": 2.5843, "step": 532500 }, { "epoch": 2.64, "learning_rate": 4.8680147441328164e-05, "loss": 2.5719, "step": 533000 }, { "epoch": 2.64, "learning_rate": 4.867891133207493e-05, "loss": 2.5732, "step": 533500 }, { "epoch": 2.65, "learning_rate": 4.867767274564885e-05, "loss": 2.5765, "step": 534000 }, { "epoch": 2.65, "learning_rate": 4.8676434159222767e-05, "loss": 2.5757, "step": 534500 }, { "epoch": 2.65, "learning_rate": 4.8675195572796683e-05, "loss": 2.5774, "step": 535000 }, { "epoch": 2.65, "learning_rate": 4.8673956986370594e-05, "loss": 2.554, "step": 535500 }, { "epoch": 2.66, "learning_rate": 4.867272087711737e-05, "loss": 2.5533, "step": 536000 }, { "epoch": 2.66, "learning_rate": 4.8671482290691286e-05, "loss": 2.5772, "step": 536500 }, { "epoch": 2.66, "learning_rate": 4.86702437042652e-05, "loss": 2.5513, "step": 537000 }, { "epoch": 2.66, "learning_rate": 4.866900511783912e-05, "loss": 2.5708, "step": 537500 }, { "epoch": 2.67, "learning_rate": 4.866776653141304e-05, "loss": 2.5684, "step": 538000 }, { "epoch": 2.67, "learning_rate": 4.866652794498695e-05, "loss": 2.5832, "step": 538500 }, { "epoch": 2.67, "learning_rate": 4.8665291835733716e-05, "loss": 2.5732, "step": 539000 }, { "epoch": 2.67, "learning_rate": 4.866405324930763e-05, "loss": 2.5757, "step": 539500 }, { "epoch": 2.68, "learning_rate": 4.86628171400544e-05, "loss": 2.5808, "step": 540000 }, { "epoch": 2.68, "learning_rate": 4.866157855362832e-05, "loss": 2.5977, "step": 540500 }, { "epoch": 2.68, "learning_rate": 4.8660339967202236e-05, "loss": 2.5599, "step": 541000 }, { "epoch": 2.68, "learning_rate": 4.865910138077615e-05, "loss": 2.5493, "step": 541500 }, { "epoch": 2.69, "learning_rate": 4.865786279435007e-05, "loss": 2.5864, "step": 542000 }, { "epoch": 2.69, "learning_rate": 4.8656624207923986e-05, "loss": 2.5611, "step": 542500 }, { "epoch": 2.69, "learning_rate": 4.86553856214979e-05, "loss": 2.5601, "step": 543000 }, { "epoch": 2.69, "learning_rate": 4.865414703507182e-05, "loss": 2.5715, "step": 543500 }, { "epoch": 2.7, "learning_rate": 4.865291092581858e-05, "loss": 2.5968, "step": 544000 }, { "epoch": 2.7, "learning_rate": 4.86516723393925e-05, "loss": 2.561, "step": 544500 }, { "epoch": 2.7, "learning_rate": 4.8650433752966416e-05, "loss": 2.593, "step": 545000 }, { "epoch": 2.7, "learning_rate": 4.864919516654033e-05, "loss": 2.5692, "step": 545500 }, { "epoch": 2.71, "learning_rate": 4.864795658011425e-05, "loss": 2.5501, "step": 546000 }, { "epoch": 2.71, "learning_rate": 4.864671799368817e-05, "loss": 2.5733, "step": 546500 }, { "epoch": 2.71, "learning_rate": 4.8645481884434936e-05, "loss": 2.5761, "step": 547000 }, { "epoch": 2.71, "learning_rate": 4.864424329800885e-05, "loss": 2.5604, "step": 547500 }, { "epoch": 2.71, "learning_rate": 4.864300471158277e-05, "loss": 2.5876, "step": 548000 }, { "epoch": 2.72, "learning_rate": 4.8641766125156686e-05, "loss": 2.5696, "step": 548500 }, { "epoch": 2.72, "learning_rate": 4.86405275387306e-05, "loss": 2.5717, "step": 549000 }, { "epoch": 2.72, "learning_rate": 4.863928895230452e-05, "loss": 2.5383, "step": 549500 }, { "epoch": 2.72, "learning_rate": 4.863805284305128e-05, "loss": 2.5773, "step": 550000 }, { "epoch": 2.73, "learning_rate": 4.863681673379805e-05, "loss": 2.5818, "step": 550500 }, { "epoch": 2.73, "learning_rate": 4.863557814737197e-05, "loss": 2.5148, "step": 551000 }, { "epoch": 2.73, "learning_rate": 4.8634339560945885e-05, "loss": 2.5935, "step": 551500 }, { "epoch": 2.73, "learning_rate": 4.86331009745198e-05, "loss": 2.5585, "step": 552000 }, { "epoch": 2.74, "learning_rate": 4.863186238809372e-05, "loss": 2.5496, "step": 552500 }, { "epoch": 2.74, "learning_rate": 4.8630623801667636e-05, "loss": 2.5555, "step": 553000 }, { "epoch": 2.74, "learning_rate": 4.862938521524155e-05, "loss": 2.547, "step": 553500 }, { "epoch": 2.74, "learning_rate": 4.862814662881547e-05, "loss": 2.567, "step": 554000 }, { "epoch": 2.75, "learning_rate": 4.8626908042389387e-05, "loss": 2.554, "step": 554500 }, { "epoch": 2.75, "learning_rate": 4.8625669455963303e-05, "loss": 2.5619, "step": 555000 }, { "epoch": 2.75, "learning_rate": 4.862443582388292e-05, "loss": 2.5697, "step": 555500 }, { "epoch": 2.75, "learning_rate": 4.8623197237456834e-05, "loss": 2.5523, "step": 556000 }, { "epoch": 2.76, "learning_rate": 4.862195865103075e-05, "loss": 2.568, "step": 556500 }, { "epoch": 2.76, "learning_rate": 4.862072006460467e-05, "loss": 2.5945, "step": 557000 }, { "epoch": 2.76, "learning_rate": 4.861948643252429e-05, "loss": 2.5638, "step": 557500 }, { "epoch": 2.76, "learning_rate": 4.8618247846098206e-05, "loss": 2.56, "step": 558000 }, { "epoch": 2.77, "learning_rate": 4.861700925967212e-05, "loss": 2.5682, "step": 558500 }, { "epoch": 2.77, "learning_rate": 4.861577067324604e-05, "loss": 2.5404, "step": 559000 }, { "epoch": 2.77, "learning_rate": 4.8614532086819957e-05, "loss": 2.5794, "step": 559500 }, { "epoch": 2.77, "learning_rate": 4.8613293500393873e-05, "loss": 2.5669, "step": 560000 }, { "epoch": 2.78, "learning_rate": 4.861205491396779e-05, "loss": 2.5423, "step": 560500 }, { "epoch": 2.78, "learning_rate": 4.86108163275417e-05, "loss": 2.5711, "step": 561000 }, { "epoch": 2.78, "learning_rate": 4.860957774111562e-05, "loss": 2.5841, "step": 561500 }, { "epoch": 2.78, "learning_rate": 4.8608339154689534e-05, "loss": 2.563, "step": 562000 }, { "epoch": 2.79, "learning_rate": 4.860710056826345e-05, "loss": 2.5802, "step": 562500 }, { "epoch": 2.79, "learning_rate": 4.860586198183737e-05, "loss": 2.5727, "step": 563000 }, { "epoch": 2.79, "learning_rate": 4.8604625872584144e-05, "loss": 2.5798, "step": 563500 }, { "epoch": 2.79, "learning_rate": 4.860338728615806e-05, "loss": 2.5647, "step": 564000 }, { "epoch": 2.8, "learning_rate": 4.860214869973197e-05, "loss": 2.5512, "step": 564500 }, { "epoch": 2.8, "learning_rate": 4.860091011330589e-05, "loss": 2.5654, "step": 565000 }, { "epoch": 2.8, "learning_rate": 4.8599671526879805e-05, "loss": 2.5424, "step": 565500 }, { "epoch": 2.8, "learning_rate": 4.859843294045372e-05, "loss": 2.554, "step": 566000 }, { "epoch": 2.81, "learning_rate": 4.859719435402764e-05, "loss": 2.5894, "step": 566500 }, { "epoch": 2.81, "learning_rate": 4.8595955767601556e-05, "loss": 2.551, "step": 567000 }, { "epoch": 2.81, "learning_rate": 4.859471965834832e-05, "loss": 2.5805, "step": 567500 }, { "epoch": 2.81, "learning_rate": 4.8593481071922235e-05, "loss": 2.5897, "step": 568000 }, { "epoch": 2.82, "learning_rate": 4.859224248549615e-05, "loss": 2.5625, "step": 568500 }, { "epoch": 2.82, "learning_rate": 4.859100389907007e-05, "loss": 2.5616, "step": 569000 }, { "epoch": 2.82, "learning_rate": 4.8589765312643985e-05, "loss": 2.5768, "step": 569500 }, { "epoch": 2.82, "learning_rate": 4.858852920339076e-05, "loss": 2.5554, "step": 570000 }, { "epoch": 2.83, "learning_rate": 4.858729061696467e-05, "loss": 2.5912, "step": 570500 }, { "epoch": 2.83, "learning_rate": 4.858605203053859e-05, "loss": 2.5492, "step": 571000 }, { "epoch": 2.83, "learning_rate": 4.858481592128536e-05, "loss": 2.5554, "step": 571500 }, { "epoch": 2.83, "learning_rate": 4.8583577334859274e-05, "loss": 2.5641, "step": 572000 }, { "epoch": 2.84, "learning_rate": 4.858233874843319e-05, "loss": 2.5709, "step": 572500 }, { "epoch": 2.84, "learning_rate": 4.858110016200711e-05, "loss": 2.5975, "step": 573000 }, { "epoch": 2.84, "learning_rate": 4.857986157558102e-05, "loss": 2.5723, "step": 573500 }, { "epoch": 2.84, "learning_rate": 4.8578622989154935e-05, "loss": 2.5557, "step": 574000 }, { "epoch": 2.85, "learning_rate": 4.857738440272885e-05, "loss": 2.538, "step": 574500 }, { "epoch": 2.85, "learning_rate": 4.857614581630277e-05, "loss": 2.5816, "step": 575000 }, { "epoch": 2.85, "learning_rate": 4.8574907229876685e-05, "loss": 2.5707, "step": 575500 }, { "epoch": 2.85, "learning_rate": 4.85736686434506e-05, "loss": 2.5592, "step": 576000 }, { "epoch": 2.86, "learning_rate": 4.857243005702452e-05, "loss": 2.5493, "step": 576500 }, { "epoch": 2.86, "learning_rate": 4.8571191470598436e-05, "loss": 2.5495, "step": 577000 }, { "epoch": 2.86, "learning_rate": 4.8569955361345205e-05, "loss": 2.5573, "step": 577500 }, { "epoch": 2.86, "learning_rate": 4.8568719252091974e-05, "loss": 2.5596, "step": 578000 }, { "epoch": 2.87, "learning_rate": 4.856748066566589e-05, "loss": 2.5427, "step": 578500 }, { "epoch": 2.87, "learning_rate": 4.856624207923981e-05, "loss": 2.5828, "step": 579000 }, { "epoch": 2.87, "learning_rate": 4.8565003492813725e-05, "loss": 2.5622, "step": 579500 }, { "epoch": 2.87, "learning_rate": 4.8563764906387635e-05, "loss": 2.5681, "step": 580000 }, { "epoch": 2.88, "learning_rate": 4.856252631996155e-05, "loss": 2.5767, "step": 580500 }, { "epoch": 2.88, "learning_rate": 4.856128773353547e-05, "loss": 2.5359, "step": 581000 }, { "epoch": 2.88, "learning_rate": 4.8560049147109386e-05, "loss": 2.5966, "step": 581500 }, { "epoch": 2.88, "learning_rate": 4.85588105606833e-05, "loss": 2.5783, "step": 582000 }, { "epoch": 2.89, "learning_rate": 4.855757445143008e-05, "loss": 2.563, "step": 582500 }, { "epoch": 2.89, "learning_rate": 4.855633586500399e-05, "loss": 2.5594, "step": 583000 }, { "epoch": 2.89, "learning_rate": 4.855509975575076e-05, "loss": 2.5744, "step": 583500 }, { "epoch": 2.89, "learning_rate": 4.8553861169324674e-05, "loss": 2.5556, "step": 584000 }, { "epoch": 2.9, "learning_rate": 4.855262258289859e-05, "loss": 2.5552, "step": 584500 }, { "epoch": 2.9, "learning_rate": 4.855138399647251e-05, "loss": 2.5402, "step": 585000 }, { "epoch": 2.9, "learning_rate": 4.8550145410046425e-05, "loss": 2.553, "step": 585500 }, { "epoch": 2.9, "learning_rate": 4.8548906823620335e-05, "loss": 2.5668, "step": 586000 }, { "epoch": 2.91, "learning_rate": 4.854766823719425e-05, "loss": 2.5855, "step": 586500 }, { "epoch": 2.91, "learning_rate": 4.854643212794103e-05, "loss": 2.5822, "step": 587000 }, { "epoch": 2.91, "learning_rate": 4.8545193541514944e-05, "loss": 2.5603, "step": 587500 }, { "epoch": 2.91, "learning_rate": 4.854395495508886e-05, "loss": 2.5564, "step": 588000 }, { "epoch": 2.92, "learning_rate": 4.854271884583563e-05, "loss": 2.5659, "step": 588500 }, { "epoch": 2.92, "learning_rate": 4.854148025940955e-05, "loss": 2.5704, "step": 589000 }, { "epoch": 2.92, "learning_rate": 4.8540241672983464e-05, "loss": 2.584, "step": 589500 }, { "epoch": 2.92, "learning_rate": 4.8539003086557374e-05, "loss": 2.5611, "step": 590000 }, { "epoch": 2.93, "learning_rate": 4.853776697730414e-05, "loss": 2.5538, "step": 590500 }, { "epoch": 2.93, "learning_rate": 4.853652839087806e-05, "loss": 2.5419, "step": 591000 }, { "epoch": 2.93, "learning_rate": 4.853528980445198e-05, "loss": 2.5637, "step": 591500 }, { "epoch": 2.93, "learning_rate": 4.8534051218025894e-05, "loss": 2.5852, "step": 592000 }, { "epoch": 2.94, "learning_rate": 4.853281263159981e-05, "loss": 2.5701, "step": 592500 }, { "epoch": 2.94, "learning_rate": 4.853157404517373e-05, "loss": 2.5697, "step": 593000 }, { "epoch": 2.94, "learning_rate": 4.8530335458747644e-05, "loss": 2.5454, "step": 593500 }, { "epoch": 2.94, "learning_rate": 4.852909687232156e-05, "loss": 2.5414, "step": 594000 }, { "epoch": 2.95, "learning_rate": 4.852785828589548e-05, "loss": 2.5856, "step": 594500 }, { "epoch": 2.95, "learning_rate": 4.8526619699469395e-05, "loss": 2.5922, "step": 595000 }, { "epoch": 2.95, "learning_rate": 4.8525381113043305e-05, "loss": 2.5762, "step": 595500 }, { "epoch": 2.95, "learning_rate": 4.852414252661722e-05, "loss": 2.5752, "step": 596000 }, { "epoch": 2.96, "learning_rate": 4.852290394019114e-05, "loss": 2.5849, "step": 596500 }, { "epoch": 2.96, "learning_rate": 4.852166783093791e-05, "loss": 2.5798, "step": 597000 }, { "epoch": 2.96, "learning_rate": 4.8520429244511825e-05, "loss": 2.5716, "step": 597500 }, { "epoch": 2.96, "learning_rate": 4.8519193135258594e-05, "loss": 2.559, "step": 598000 }, { "epoch": 2.97, "learning_rate": 4.851795454883251e-05, "loss": 2.5591, "step": 598500 }, { "epoch": 2.97, "learning_rate": 4.851671596240643e-05, "loss": 2.582, "step": 599000 }, { "epoch": 2.97, "learning_rate": 4.8515477375980345e-05, "loss": 2.581, "step": 599500 }, { "epoch": 2.97, "learning_rate": 4.851423878955426e-05, "loss": 2.5779, "step": 600000 }, { "epoch": 2.98, "learning_rate": 4.851300268030103e-05, "loss": 2.5746, "step": 600500 }, { "epoch": 2.98, "learning_rate": 4.851176409387495e-05, "loss": 2.5805, "step": 601000 }, { "epoch": 2.98, "learning_rate": 4.8510525507448864e-05, "loss": 2.5754, "step": 601500 }, { "epoch": 2.98, "learning_rate": 4.850928692102278e-05, "loss": 2.5709, "step": 602000 }, { "epoch": 2.98, "learning_rate": 4.85080483345967e-05, "loss": 2.5627, "step": 602500 }, { "epoch": 2.99, "learning_rate": 4.8506809748170615e-05, "loss": 2.5808, "step": 603000 }, { "epoch": 2.99, "learning_rate": 4.8505571161744525e-05, "loss": 2.5847, "step": 603500 }, { "epoch": 2.99, "learning_rate": 4.850433257531844e-05, "loss": 2.5929, "step": 604000 }, { "epoch": 2.99, "learning_rate": 4.850309398889236e-05, "loss": 2.5613, "step": 604500 }, { "epoch": 3.0, "learning_rate": 4.850185787963913e-05, "loss": 2.5579, "step": 605000 }, { "epoch": 3.0, "learning_rate": 4.8500619293213045e-05, "loss": 2.5798, "step": 605500 }, { "epoch": 3.0, "eval_accuracy": 0.6309935793243545, "eval_accuracy_mlm": 0.583263169880887, "eval_accuracy_nsp": 0.856231786287207, "eval_loss": 2.503361225128174, "eval_runtime": 145.9652, "eval_samples_per_second": 1746.711, "eval_steps_per_second": 72.784, "step": 605529 }, { "epoch": 3.0, "learning_rate": 4.8499383183959814e-05, "loss": 2.5333, "step": 606000 }, { "epoch": 3.0, "learning_rate": 4.849814459753373e-05, "loss": 2.5139, "step": 606500 }, { "epoch": 3.01, "learning_rate": 4.849690601110765e-05, "loss": 2.5247, "step": 607000 }, { "epoch": 3.01, "learning_rate": 4.849566990185441e-05, "loss": 2.5196, "step": 607500 }, { "epoch": 3.01, "learning_rate": 4.8494431315428326e-05, "loss": 2.5171, "step": 608000 }, { "epoch": 3.01, "learning_rate": 4.849319272900224e-05, "loss": 2.5036, "step": 608500 }, { "epoch": 3.02, "learning_rate": 4.849195414257616e-05, "loss": 2.5135, "step": 609000 }, { "epoch": 3.02, "learning_rate": 4.849071555615008e-05, "loss": 2.5322, "step": 609500 }, { "epoch": 3.02, "learning_rate": 4.8489476969723994e-05, "loss": 2.5542, "step": 610000 }, { "epoch": 3.02, "learning_rate": 4.848823838329791e-05, "loss": 2.5499, "step": 610500 }, { "epoch": 3.03, "learning_rate": 4.848699979687183e-05, "loss": 2.5478, "step": 611000 }, { "epoch": 3.03, "learning_rate": 4.84857636876186e-05, "loss": 2.5333, "step": 611500 }, { "epoch": 3.03, "learning_rate": 4.8484525101192514e-05, "loss": 2.552, "step": 612000 }, { "epoch": 3.03, "learning_rate": 4.848328651476643e-05, "loss": 2.5408, "step": 612500 }, { "epoch": 3.04, "learning_rate": 4.848204792834035e-05, "loss": 2.5539, "step": 613000 }, { "epoch": 3.04, "learning_rate": 4.8480809341914264e-05, "loss": 2.5355, "step": 613500 }, { "epoch": 3.04, "learning_rate": 4.847957075548818e-05, "loss": 2.5418, "step": 614000 }, { "epoch": 3.04, "learning_rate": 4.84783321690621e-05, "loss": 2.5373, "step": 614500 }, { "epoch": 3.05, "learning_rate": 4.847709605980886e-05, "loss": 2.539, "step": 615000 }, { "epoch": 3.05, "learning_rate": 4.847585747338278e-05, "loss": 2.5402, "step": 615500 }, { "epoch": 3.05, "learning_rate": 4.8474618886956694e-05, "loss": 2.5299, "step": 616000 }, { "epoch": 3.05, "learning_rate": 4.847338030053061e-05, "loss": 2.5361, "step": 616500 }, { "epoch": 3.06, "learning_rate": 4.847214171410453e-05, "loss": 2.5464, "step": 617000 }, { "epoch": 3.06, "learning_rate": 4.8470903127678445e-05, "loss": 2.5522, "step": 617500 }, { "epoch": 3.06, "learning_rate": 4.846966454125236e-05, "loss": 2.5553, "step": 618000 }, { "epoch": 3.06, "learning_rate": 4.846842595482628e-05, "loss": 2.5591, "step": 618500 }, { "epoch": 3.07, "learning_rate": 4.8467187368400196e-05, "loss": 2.54, "step": 619000 }, { "epoch": 3.07, "learning_rate": 4.846594878197411e-05, "loss": 2.5405, "step": 619500 }, { "epoch": 3.07, "learning_rate": 4.846471267272088e-05, "loss": 2.5232, "step": 620000 }, { "epoch": 3.07, "learning_rate": 4.84634740862948e-05, "loss": 2.5347, "step": 620500 }, { "epoch": 3.08, "learning_rate": 4.8462235499868715e-05, "loss": 2.528, "step": 621000 }, { "epoch": 3.08, "learning_rate": 4.846099939061548e-05, "loss": 2.5322, "step": 621500 }, { "epoch": 3.08, "learning_rate": 4.8459760804189394e-05, "loss": 2.5317, "step": 622000 }, { "epoch": 3.08, "learning_rate": 4.845852221776331e-05, "loss": 2.5383, "step": 622500 }, { "epoch": 3.09, "learning_rate": 4.845728363133723e-05, "loss": 2.5493, "step": 623000 }, { "epoch": 3.09, "learning_rate": 4.8456045044911145e-05, "loss": 2.538, "step": 623500 }, { "epoch": 3.09, "learning_rate": 4.845480645848506e-05, "loss": 2.5492, "step": 624000 }, { "epoch": 3.09, "learning_rate": 4.845356787205898e-05, "loss": 2.5289, "step": 624500 }, { "epoch": 3.1, "learning_rate": 4.8452329285632896e-05, "loss": 2.566, "step": 625000 }, { "epoch": 3.1, "learning_rate": 4.8451093176379665e-05, "loss": 2.5407, "step": 625500 }, { "epoch": 3.1, "learning_rate": 4.844985458995358e-05, "loss": 2.5053, "step": 626000 }, { "epoch": 3.1, "learning_rate": 4.84486160035275e-05, "loss": 2.5606, "step": 626500 }, { "epoch": 3.11, "learning_rate": 4.844737989427427e-05, "loss": 2.5307, "step": 627000 }, { "epoch": 3.11, "learning_rate": 4.844614130784818e-05, "loss": 2.5339, "step": 627500 }, { "epoch": 3.11, "learning_rate": 4.8444902721422094e-05, "loss": 2.5584, "step": 628000 }, { "epoch": 3.11, "learning_rate": 4.844366413499601e-05, "loss": 2.5237, "step": 628500 }, { "epoch": 3.12, "learning_rate": 4.844242554856993e-05, "loss": 2.5553, "step": 629000 }, { "epoch": 3.12, "learning_rate": 4.84411894393167e-05, "loss": 2.5506, "step": 629500 }, { "epoch": 3.12, "learning_rate": 4.8439950852890614e-05, "loss": 2.5469, "step": 630000 }, { "epoch": 3.12, "learning_rate": 4.843871226646453e-05, "loss": 2.551, "step": 630500 }, { "epoch": 3.13, "learning_rate": 4.843747368003845e-05, "loss": 2.5408, "step": 631000 }, { "epoch": 3.13, "learning_rate": 4.8436235093612365e-05, "loss": 2.5641, "step": 631500 }, { "epoch": 3.13, "learning_rate": 4.843499650718628e-05, "loss": 2.5317, "step": 632000 }, { "epoch": 3.13, "learning_rate": 4.84337579207602e-05, "loss": 2.528, "step": 632500 }, { "epoch": 3.14, "learning_rate": 4.8432519334334116e-05, "loss": 2.5114, "step": 633000 }, { "epoch": 3.14, "learning_rate": 4.843128074790803e-05, "loss": 2.5307, "step": 633500 }, { "epoch": 3.14, "learning_rate": 4.843004216148195e-05, "loss": 2.5363, "step": 634000 }, { "epoch": 3.14, "learning_rate": 4.8428803575055866e-05, "loss": 2.5329, "step": 634500 }, { "epoch": 3.15, "learning_rate": 4.842756498862978e-05, "loss": 2.5189, "step": 635000 }, { "epoch": 3.15, "learning_rate": 4.8426328879376545e-05, "loss": 2.5262, "step": 635500 }, { "epoch": 3.15, "learning_rate": 4.842509029295046e-05, "loss": 2.5593, "step": 636000 }, { "epoch": 3.15, "learning_rate": 4.842385418369723e-05, "loss": 2.5337, "step": 636500 }, { "epoch": 3.16, "learning_rate": 4.842261559727115e-05, "loss": 2.5343, "step": 637000 }, { "epoch": 3.16, "learning_rate": 4.8421377010845065e-05, "loss": 2.5407, "step": 637500 }, { "epoch": 3.16, "learning_rate": 4.842013842441898e-05, "loss": 2.5609, "step": 638000 }, { "epoch": 3.16, "learning_rate": 4.84188998379929e-05, "loss": 2.5219, "step": 638500 }, { "epoch": 3.17, "learning_rate": 4.8417661251566816e-05, "loss": 2.5684, "step": 639000 }, { "epoch": 3.17, "learning_rate": 4.841642266514073e-05, "loss": 2.5422, "step": 639500 }, { "epoch": 3.17, "learning_rate": 4.841518407871465e-05, "loss": 2.5736, "step": 640000 }, { "epoch": 3.17, "learning_rate": 4.841394796946142e-05, "loss": 2.5538, "step": 640500 }, { "epoch": 3.18, "learning_rate": 4.841270938303533e-05, "loss": 2.5404, "step": 641000 }, { "epoch": 3.18, "learning_rate": 4.8411470796609245e-05, "loss": 2.5472, "step": 641500 }, { "epoch": 3.18, "learning_rate": 4.841023221018316e-05, "loss": 2.5434, "step": 642000 }, { "epoch": 3.18, "learning_rate": 4.840899610092993e-05, "loss": 2.5179, "step": 642500 }, { "epoch": 3.19, "learning_rate": 4.840775751450385e-05, "loss": 2.5491, "step": 643000 }, { "epoch": 3.19, "learning_rate": 4.8406518928077765e-05, "loss": 2.5372, "step": 643500 }, { "epoch": 3.19, "learning_rate": 4.840528034165168e-05, "loss": 2.544, "step": 644000 }, { "epoch": 3.19, "learning_rate": 4.84040417552256e-05, "loss": 2.5429, "step": 644500 }, { "epoch": 3.2, "learning_rate": 4.8402803168799516e-05, "loss": 2.5307, "step": 645000 }, { "epoch": 3.2, "learning_rate": 4.840156458237343e-05, "loss": 2.553, "step": 645500 }, { "epoch": 3.2, "learning_rate": 4.84003284731202e-05, "loss": 2.5597, "step": 646000 }, { "epoch": 3.2, "learning_rate": 4.839908988669412e-05, "loss": 2.5398, "step": 646500 }, { "epoch": 3.21, "learning_rate": 4.8397851300268035e-05, "loss": 2.5258, "step": 647000 }, { "epoch": 3.21, "learning_rate": 4.8396612713841946e-05, "loss": 2.525, "step": 647500 }, { "epoch": 3.21, "learning_rate": 4.839537412741586e-05, "loss": 2.5487, "step": 648000 }, { "epoch": 3.21, "learning_rate": 4.839413801816263e-05, "loss": 2.5557, "step": 648500 }, { "epoch": 3.22, "learning_rate": 4.839289943173655e-05, "loss": 2.5434, "step": 649000 }, { "epoch": 3.22, "learning_rate": 4.8391660845310465e-05, "loss": 2.5495, "step": 649500 }, { "epoch": 3.22, "learning_rate": 4.839042225888438e-05, "loss": 2.5504, "step": 650000 }, { "epoch": 3.22, "learning_rate": 4.838918614963115e-05, "loss": 2.5101, "step": 650500 }, { "epoch": 3.23, "learning_rate": 4.838794756320507e-05, "loss": 2.5414, "step": 651000 }, { "epoch": 3.23, "learning_rate": 4.8386708976778985e-05, "loss": 2.5399, "step": 651500 }, { "epoch": 3.23, "learning_rate": 4.83854703903529e-05, "loss": 2.5554, "step": 652000 }, { "epoch": 3.23, "learning_rate": 4.8384234281099664e-05, "loss": 2.5515, "step": 652500 }, { "epoch": 3.24, "learning_rate": 4.838299569467358e-05, "loss": 2.5307, "step": 653000 }, { "epoch": 3.24, "learning_rate": 4.83817571082475e-05, "loss": 2.539, "step": 653500 }, { "epoch": 3.24, "learning_rate": 4.8380518521821415e-05, "loss": 2.5284, "step": 654000 }, { "epoch": 3.24, "learning_rate": 4.837928241256819e-05, "loss": 2.5652, "step": 654500 }, { "epoch": 3.25, "learning_rate": 4.837804382614211e-05, "loss": 2.5605, "step": 655000 }, { "epoch": 3.25, "learning_rate": 4.837680523971602e-05, "loss": 2.5348, "step": 655500 }, { "epoch": 3.25, "learning_rate": 4.8375569130462786e-05, "loss": 2.5298, "step": 656000 }, { "epoch": 3.25, "learning_rate": 4.83743305440367e-05, "loss": 2.5437, "step": 656500 }, { "epoch": 3.26, "learning_rate": 4.837309195761062e-05, "loss": 2.5365, "step": 657000 }, { "epoch": 3.26, "learning_rate": 4.837185337118454e-05, "loss": 2.5457, "step": 657500 }, { "epoch": 3.26, "learning_rate": 4.8370614784758454e-05, "loss": 2.5365, "step": 658000 }, { "epoch": 3.26, "learning_rate": 4.836937619833237e-05, "loss": 2.5297, "step": 658500 }, { "epoch": 3.26, "learning_rate": 4.836813761190628e-05, "loss": 2.5214, "step": 659000 }, { "epoch": 3.27, "learning_rate": 4.83668990254802e-05, "loss": 2.5343, "step": 659500 }, { "epoch": 3.27, "learning_rate": 4.8365660439054115e-05, "loss": 2.535, "step": 660000 }, { "epoch": 3.27, "learning_rate": 4.836442432980089e-05, "loss": 2.5523, "step": 660500 }, { "epoch": 3.27, "learning_rate": 4.836318574337481e-05, "loss": 2.5577, "step": 661000 }, { "epoch": 3.28, "learning_rate": 4.8361947156948724e-05, "loss": 2.5796, "step": 661500 }, { "epoch": 3.28, "learning_rate": 4.8360708570522634e-05, "loss": 2.5263, "step": 662000 }, { "epoch": 3.28, "learning_rate": 4.835946998409655e-05, "loss": 2.535, "step": 662500 }, { "epoch": 3.28, "learning_rate": 4.835823139767047e-05, "loss": 2.5555, "step": 663000 }, { "epoch": 3.29, "learning_rate": 4.835699528841724e-05, "loss": 2.5393, "step": 663500 }, { "epoch": 3.29, "learning_rate": 4.8355756701991154e-05, "loss": 2.5416, "step": 664000 }, { "epoch": 3.29, "learning_rate": 4.835451811556507e-05, "loss": 2.5387, "step": 664500 }, { "epoch": 3.29, "learning_rate": 4.835327952913898e-05, "loss": 2.5604, "step": 665000 }, { "epoch": 3.3, "learning_rate": 4.83520409427129e-05, "loss": 2.565, "step": 665500 }, { "epoch": 3.3, "learning_rate": 4.8350802356286815e-05, "loss": 2.5364, "step": 666000 }, { "epoch": 3.3, "learning_rate": 4.834956376986073e-05, "loss": 2.5461, "step": 666500 }, { "epoch": 3.3, "learning_rate": 4.834832518343465e-05, "loss": 2.5692, "step": 667000 }, { "epoch": 3.31, "learning_rate": 4.8347089074181424e-05, "loss": 2.5424, "step": 667500 }, { "epoch": 3.31, "learning_rate": 4.834585296492819e-05, "loss": 2.557, "step": 668000 }, { "epoch": 3.31, "learning_rate": 4.834461437850211e-05, "loss": 2.5454, "step": 668500 }, { "epoch": 3.31, "learning_rate": 4.834337579207602e-05, "loss": 2.5403, "step": 669000 }, { "epoch": 3.32, "learning_rate": 4.834213720564994e-05, "loss": 2.5301, "step": 669500 }, { "epoch": 3.32, "learning_rate": 4.8340898619223854e-05, "loss": 2.5211, "step": 670000 }, { "epoch": 3.32, "learning_rate": 4.833966003279777e-05, "loss": 2.5401, "step": 670500 }, { "epoch": 3.32, "learning_rate": 4.833842144637169e-05, "loss": 2.541, "step": 671000 }, { "epoch": 3.33, "learning_rate": 4.833718533711846e-05, "loss": 2.5565, "step": 671500 }, { "epoch": 3.33, "learning_rate": 4.8335946750692374e-05, "loss": 2.5404, "step": 672000 }, { "epoch": 3.33, "learning_rate": 4.833470816426629e-05, "loss": 2.567, "step": 672500 }, { "epoch": 3.33, "learning_rate": 4.833346957784021e-05, "loss": 2.5351, "step": 673000 }, { "epoch": 3.34, "learning_rate": 4.8332230991414124e-05, "loss": 2.5467, "step": 673500 }, { "epoch": 3.34, "learning_rate": 4.833099240498804e-05, "loss": 2.5485, "step": 674000 }, { "epoch": 3.34, "learning_rate": 4.832975381856195e-05, "loss": 2.5353, "step": 674500 }, { "epoch": 3.34, "learning_rate": 4.832851523213587e-05, "loss": 2.5495, "step": 675000 }, { "epoch": 3.35, "learning_rate": 4.832728160005549e-05, "loss": 2.5532, "step": 675500 }, { "epoch": 3.35, "learning_rate": 4.8326043013629406e-05, "loss": 2.5324, "step": 676000 }, { "epoch": 3.35, "learning_rate": 4.832480442720332e-05, "loss": 2.538, "step": 676500 }, { "epoch": 3.35, "learning_rate": 4.832356584077724e-05, "loss": 2.5226, "step": 677000 }, { "epoch": 3.36, "learning_rate": 4.832232973152401e-05, "loss": 2.5422, "step": 677500 }, { "epoch": 3.36, "learning_rate": 4.8321091145097926e-05, "loss": 2.5271, "step": 678000 }, { "epoch": 3.36, "learning_rate": 4.831985255867184e-05, "loss": 2.5118, "step": 678500 }, { "epoch": 3.36, "learning_rate": 4.831861397224576e-05, "loss": 2.5408, "step": 679000 }, { "epoch": 3.37, "learning_rate": 4.831737786299252e-05, "loss": 2.5397, "step": 679500 }, { "epoch": 3.37, "learning_rate": 4.831613927656644e-05, "loss": 2.5553, "step": 680000 }, { "epoch": 3.37, "learning_rate": 4.8314900690140355e-05, "loss": 2.5429, "step": 680500 }, { "epoch": 3.37, "learning_rate": 4.831366210371427e-05, "loss": 2.5535, "step": 681000 }, { "epoch": 3.38, "learning_rate": 4.831242351728819e-05, "loss": 2.5647, "step": 681500 }, { "epoch": 3.38, "learning_rate": 4.8311184930862106e-05, "loss": 2.5757, "step": 682000 }, { "epoch": 3.38, "learning_rate": 4.830994634443602e-05, "loss": 2.5574, "step": 682500 }, { "epoch": 3.38, "learning_rate": 4.830870775800994e-05, "loss": 2.53, "step": 683000 }, { "epoch": 3.39, "learning_rate": 4.830746917158386e-05, "loss": 2.5279, "step": 683500 }, { "epoch": 3.39, "learning_rate": 4.8306233062330626e-05, "loss": 2.5323, "step": 684000 }, { "epoch": 3.39, "learning_rate": 4.830499447590454e-05, "loss": 2.5433, "step": 684500 }, { "epoch": 3.39, "learning_rate": 4.830375588947846e-05, "loss": 2.5779, "step": 685000 }, { "epoch": 3.4, "learning_rate": 4.8302517303052376e-05, "loss": 2.5304, "step": 685500 }, { "epoch": 3.4, "learning_rate": 4.830128119379914e-05, "loss": 2.5579, "step": 686000 }, { "epoch": 3.4, "learning_rate": 4.8300045084545914e-05, "loss": 2.5429, "step": 686500 }, { "epoch": 3.4, "learning_rate": 4.829880649811983e-05, "loss": 2.5222, "step": 687000 }, { "epoch": 3.41, "learning_rate": 4.829756791169374e-05, "loss": 2.5562, "step": 687500 }, { "epoch": 3.41, "learning_rate": 4.829632932526766e-05, "loss": 2.579, "step": 688000 }, { "epoch": 3.41, "learning_rate": 4.8295090738841575e-05, "loss": 2.5623, "step": 688500 }, { "epoch": 3.41, "learning_rate": 4.829385215241549e-05, "loss": 2.543, "step": 689000 }, { "epoch": 3.42, "learning_rate": 4.829261356598941e-05, "loss": 2.5659, "step": 689500 }, { "epoch": 3.42, "learning_rate": 4.8291374979563326e-05, "loss": 2.5464, "step": 690000 }, { "epoch": 3.42, "learning_rate": 4.829013639313724e-05, "loss": 2.529, "step": 690500 }, { "epoch": 3.42, "learning_rate": 4.828889780671116e-05, "loss": 2.5534, "step": 691000 }, { "epoch": 3.43, "learning_rate": 4.828765922028508e-05, "loss": 2.5394, "step": 691500 }, { "epoch": 3.43, "learning_rate": 4.8286420633858994e-05, "loss": 2.5359, "step": 692000 }, { "epoch": 3.43, "learning_rate": 4.8285184524605756e-05, "loss": 2.5425, "step": 692500 }, { "epoch": 3.43, "learning_rate": 4.828394593817967e-05, "loss": 2.5602, "step": 693000 }, { "epoch": 3.44, "learning_rate": 4.828270735175359e-05, "loss": 2.5164, "step": 693500 }, { "epoch": 3.44, "learning_rate": 4.8281468765327506e-05, "loss": 2.5582, "step": 694000 }, { "epoch": 3.44, "learning_rate": 4.8280232656074275e-05, "loss": 2.5522, "step": 694500 }, { "epoch": 3.44, "learning_rate": 4.827899406964819e-05, "loss": 2.5528, "step": 695000 }, { "epoch": 3.45, "learning_rate": 4.827775548322211e-05, "loss": 2.5459, "step": 695500 }, { "epoch": 3.45, "learning_rate": 4.8276516896796026e-05, "loss": 2.5493, "step": 696000 }, { "epoch": 3.45, "learning_rate": 4.827527831036994e-05, "loss": 2.5516, "step": 696500 }, { "epoch": 3.45, "learning_rate": 4.827403972394386e-05, "loss": 2.5535, "step": 697000 }, { "epoch": 3.46, "learning_rate": 4.827280113751778e-05, "loss": 2.5477, "step": 697500 }, { "epoch": 3.46, "learning_rate": 4.8271562551091694e-05, "loss": 2.5291, "step": 698000 }, { "epoch": 3.46, "learning_rate": 4.827032396466561e-05, "loss": 2.5557, "step": 698500 }, { "epoch": 3.46, "learning_rate": 4.826908537823953e-05, "loss": 2.5559, "step": 699000 }, { "epoch": 3.47, "learning_rate": 4.8267846791813444e-05, "loss": 2.5445, "step": 699500 }, { "epoch": 3.47, "learning_rate": 4.8266610682560206e-05, "loss": 2.5345, "step": 700000 }, { "epoch": 3.47, "learning_rate": 4.8265374573306975e-05, "loss": 2.563, "step": 700500 }, { "epoch": 3.47, "learning_rate": 4.826413598688089e-05, "loss": 2.5561, "step": 701000 }, { "epoch": 3.48, "learning_rate": 4.826289740045481e-05, "loss": 2.5687, "step": 701500 }, { "epoch": 3.48, "learning_rate": 4.8261658814028726e-05, "loss": 2.544, "step": 702000 }, { "epoch": 3.48, "learning_rate": 4.826042022760264e-05, "loss": 2.5362, "step": 702500 }, { "epoch": 3.48, "learning_rate": 4.825918164117656e-05, "loss": 2.5309, "step": 703000 }, { "epoch": 3.49, "learning_rate": 4.825794305475048e-05, "loss": 2.5421, "step": 703500 }, { "epoch": 3.49, "learning_rate": 4.8256704468324394e-05, "loss": 2.5295, "step": 704000 }, { "epoch": 3.49, "learning_rate": 4.825546588189831e-05, "loss": 2.5354, "step": 704500 }, { "epoch": 3.49, "learning_rate": 4.825422977264508e-05, "loss": 2.5441, "step": 705000 }, { "epoch": 3.5, "learning_rate": 4.8252991186218996e-05, "loss": 2.5388, "step": 705500 }, { "epoch": 3.5, "learning_rate": 4.8251752599792907e-05, "loss": 2.5489, "step": 706000 }, { "epoch": 3.5, "learning_rate": 4.8250514013366824e-05, "loss": 2.5435, "step": 706500 }, { "epoch": 3.5, "learning_rate": 4.824927790411359e-05, "loss": 2.5567, "step": 707000 }, { "epoch": 3.51, "learning_rate": 4.824803931768751e-05, "loss": 2.5394, "step": 707500 }, { "epoch": 3.51, "learning_rate": 4.8246800731261426e-05, "loss": 2.5111, "step": 708000 }, { "epoch": 3.51, "learning_rate": 4.824556214483534e-05, "loss": 2.5513, "step": 708500 }, { "epoch": 3.51, "learning_rate": 4.824432355840926e-05, "loss": 2.5387, "step": 709000 }, { "epoch": 3.52, "learning_rate": 4.824308497198318e-05, "loss": 2.5602, "step": 709500 }, { "epoch": 3.52, "learning_rate": 4.8241846385557094e-05, "loss": 2.549, "step": 710000 }, { "epoch": 3.52, "learning_rate": 4.824060779913101e-05, "loss": 2.5252, "step": 710500 }, { "epoch": 3.52, "learning_rate": 4.823937416705063e-05, "loss": 2.5328, "step": 711000 }, { "epoch": 3.53, "learning_rate": 4.82381380577974e-05, "loss": 2.522, "step": 711500 }, { "epoch": 3.53, "learning_rate": 4.823689947137132e-05, "loss": 2.5387, "step": 712000 }, { "epoch": 3.53, "learning_rate": 4.8235660884945234e-05, "loss": 2.5568, "step": 712500 }, { "epoch": 3.53, "learning_rate": 4.823442229851915e-05, "loss": 2.5318, "step": 713000 }, { "epoch": 3.53, "learning_rate": 4.823318371209307e-05, "loss": 2.5472, "step": 713500 }, { "epoch": 3.54, "learning_rate": 4.8231945125666985e-05, "loss": 2.5452, "step": 714000 }, { "epoch": 3.54, "learning_rate": 4.823070901641375e-05, "loss": 2.5537, "step": 714500 }, { "epoch": 3.54, "learning_rate": 4.8229470429987664e-05, "loss": 2.5685, "step": 715000 }, { "epoch": 3.54, "learning_rate": 4.822823184356158e-05, "loss": 2.5505, "step": 715500 }, { "epoch": 3.55, "learning_rate": 4.82269932571355e-05, "loss": 2.5497, "step": 716000 }, { "epoch": 3.55, "learning_rate": 4.8225754670709415e-05, "loss": 2.529, "step": 716500 }, { "epoch": 3.55, "learning_rate": 4.822451608428333e-05, "loss": 2.5528, "step": 717000 }, { "epoch": 3.55, "learning_rate": 4.822327749785725e-05, "loss": 2.5036, "step": 717500 }, { "epoch": 3.56, "learning_rate": 4.8222038911431166e-05, "loss": 2.5104, "step": 718000 }, { "epoch": 3.56, "learning_rate": 4.822080032500508e-05, "loss": 2.5557, "step": 718500 }, { "epoch": 3.56, "learning_rate": 4.821956173857899e-05, "loss": 2.5172, "step": 719000 }, { "epoch": 3.56, "learning_rate": 4.821832562932577e-05, "loss": 2.5314, "step": 719500 }, { "epoch": 3.57, "learning_rate": 4.821708952007254e-05, "loss": 2.5631, "step": 720000 }, { "epoch": 3.57, "learning_rate": 4.821585093364645e-05, "loss": 2.5495, "step": 720500 }, { "epoch": 3.57, "learning_rate": 4.8214612347220364e-05, "loss": 2.5225, "step": 721000 }, { "epoch": 3.57, "learning_rate": 4.821337376079428e-05, "loss": 2.5358, "step": 721500 }, { "epoch": 3.58, "learning_rate": 4.82121351743682e-05, "loss": 2.5573, "step": 722000 }, { "epoch": 3.58, "learning_rate": 4.8210896587942115e-05, "loss": 2.5512, "step": 722500 }, { "epoch": 3.58, "learning_rate": 4.820965800151603e-05, "loss": 2.5373, "step": 723000 }, { "epoch": 3.58, "learning_rate": 4.820841941508995e-05, "loss": 2.5567, "step": 723500 }, { "epoch": 3.59, "learning_rate": 4.8207180828663866e-05, "loss": 2.5367, "step": 724000 }, { "epoch": 3.59, "learning_rate": 4.820594224223778e-05, "loss": 2.558, "step": 724500 }, { "epoch": 3.59, "learning_rate": 4.820470613298455e-05, "loss": 2.5263, "step": 725000 }, { "epoch": 3.59, "learning_rate": 4.820346754655847e-05, "loss": 2.5248, "step": 725500 }, { "epoch": 3.6, "learning_rate": 4.8202228960132385e-05, "loss": 2.5235, "step": 726000 }, { "epoch": 3.6, "learning_rate": 4.82009903737063e-05, "loss": 2.5575, "step": 726500 }, { "epoch": 3.6, "learning_rate": 4.819975178728022e-05, "loss": 2.5402, "step": 727000 }, { "epoch": 3.6, "learning_rate": 4.8198513200854136e-05, "loss": 2.5331, "step": 727500 }, { "epoch": 3.61, "learning_rate": 4.8197274614428046e-05, "loss": 2.5494, "step": 728000 }, { "epoch": 3.61, "learning_rate": 4.819603602800196e-05, "loss": 2.5456, "step": 728500 }, { "epoch": 3.61, "learning_rate": 4.819479991874873e-05, "loss": 2.5465, "step": 729000 }, { "epoch": 3.61, "learning_rate": 4.819356133232265e-05, "loss": 2.5497, "step": 729500 }, { "epoch": 3.62, "learning_rate": 4.8192322745896566e-05, "loss": 2.5535, "step": 730000 }, { "epoch": 3.62, "learning_rate": 4.819108415947048e-05, "loss": 2.5379, "step": 730500 }, { "epoch": 3.62, "learning_rate": 4.81898455730444e-05, "loss": 2.5503, "step": 731000 }, { "epoch": 3.62, "learning_rate": 4.818860698661831e-05, "loss": 2.5508, "step": 731500 }, { "epoch": 3.63, "learning_rate": 4.818736840019223e-05, "loss": 2.5412, "step": 732000 }, { "epoch": 3.63, "learning_rate": 4.8186129813766144e-05, "loss": 2.5646, "step": 732500 }, { "epoch": 3.63, "learning_rate": 4.818489370451292e-05, "loss": 2.554, "step": 733000 }, { "epoch": 3.63, "learning_rate": 4.8183655118086836e-05, "loss": 2.5507, "step": 733500 }, { "epoch": 3.64, "learning_rate": 4.818241653166075e-05, "loss": 2.5684, "step": 734000 }, { "epoch": 3.64, "learning_rate": 4.818117794523466e-05, "loss": 2.5564, "step": 734500 }, { "epoch": 3.64, "learning_rate": 4.817993935880858e-05, "loss": 2.5357, "step": 735000 }, { "epoch": 3.64, "learning_rate": 4.81787057267282e-05, "loss": 2.5358, "step": 735500 }, { "epoch": 3.65, "learning_rate": 4.817746961747497e-05, "loss": 2.5506, "step": 736000 }, { "epoch": 3.65, "learning_rate": 4.8176231031048887e-05, "loss": 2.5443, "step": 736500 }, { "epoch": 3.65, "learning_rate": 4.8174992444622804e-05, "loss": 2.5271, "step": 737000 }, { "epoch": 3.65, "learning_rate": 4.817375633536957e-05, "loss": 2.5738, "step": 737500 }, { "epoch": 3.66, "learning_rate": 4.817251774894348e-05, "loss": 2.5456, "step": 738000 }, { "epoch": 3.66, "learning_rate": 4.81712791625174e-05, "loss": 2.5431, "step": 738500 }, { "epoch": 3.66, "learning_rate": 4.8170040576091316e-05, "loss": 2.5176, "step": 739000 }, { "epoch": 3.66, "learning_rate": 4.816880198966523e-05, "loss": 2.5471, "step": 739500 }, { "epoch": 3.67, "learning_rate": 4.816756340323915e-05, "loss": 2.5422, "step": 740000 }, { "epoch": 3.67, "learning_rate": 4.816632481681307e-05, "loss": 2.5587, "step": 740500 }, { "epoch": 3.67, "learning_rate": 4.8165086230386984e-05, "loss": 2.5371, "step": 741000 }, { "epoch": 3.67, "learning_rate": 4.81638476439609e-05, "loss": 2.5467, "step": 741500 }, { "epoch": 3.68, "learning_rate": 4.816260905753482e-05, "loss": 2.5384, "step": 742000 }, { "epoch": 3.68, "learning_rate": 4.8161370471108735e-05, "loss": 2.5456, "step": 742500 }, { "epoch": 3.68, "learning_rate": 4.816013188468265e-05, "loss": 2.5356, "step": 743000 }, { "epoch": 3.68, "learning_rate": 4.815889329825657e-05, "loss": 2.5309, "step": 743500 }, { "epoch": 3.69, "learning_rate": 4.815765718900334e-05, "loss": 2.5272, "step": 744000 }, { "epoch": 3.69, "learning_rate": 4.8156418602577254e-05, "loss": 2.5665, "step": 744500 }, { "epoch": 3.69, "learning_rate": 4.8155182493324016e-05, "loss": 2.5413, "step": 745000 }, { "epoch": 3.69, "learning_rate": 4.815394390689793e-05, "loss": 2.568, "step": 745500 }, { "epoch": 3.7, "learning_rate": 4.815270532047185e-05, "loss": 2.5461, "step": 746000 }, { "epoch": 3.7, "learning_rate": 4.815146673404577e-05, "loss": 2.5391, "step": 746500 }, { "epoch": 3.7, "learning_rate": 4.8150228147619684e-05, "loss": 2.5244, "step": 747000 }, { "epoch": 3.7, "learning_rate": 4.81489895611936e-05, "loss": 2.544, "step": 747500 }, { "epoch": 3.71, "learning_rate": 4.814775097476752e-05, "loss": 2.5462, "step": 748000 }, { "epoch": 3.71, "learning_rate": 4.8146512388341435e-05, "loss": 2.5465, "step": 748500 }, { "epoch": 3.71, "learning_rate": 4.814527380191535e-05, "loss": 2.5428, "step": 749000 }, { "epoch": 3.71, "learning_rate": 4.814403769266212e-05, "loss": 2.555, "step": 749500 }, { "epoch": 3.72, "learning_rate": 4.814280158340889e-05, "loss": 2.563, "step": 750000 }, { "epoch": 3.72, "learning_rate": 4.8141562996982806e-05, "loss": 2.5506, "step": 750500 }, { "epoch": 3.72, "learning_rate": 4.8140324410556717e-05, "loss": 2.5451, "step": 751000 }, { "epoch": 3.72, "learning_rate": 4.8139085824130633e-05, "loss": 2.544, "step": 751500 }, { "epoch": 3.73, "learning_rate": 4.813784971487741e-05, "loss": 2.5365, "step": 752000 }, { "epoch": 3.73, "learning_rate": 4.8136611128451326e-05, "loss": 2.5606, "step": 752500 }, { "epoch": 3.73, "learning_rate": 4.813537254202524e-05, "loss": 2.5512, "step": 753000 }, { "epoch": 3.73, "learning_rate": 4.813413395559916e-05, "loss": 2.5219, "step": 753500 }, { "epoch": 3.74, "learning_rate": 4.813289784634593e-05, "loss": 2.5419, "step": 754000 }, { "epoch": 3.74, "learning_rate": 4.813166173709269e-05, "loss": 2.5459, "step": 754500 }, { "epoch": 3.74, "learning_rate": 4.813042315066661e-05, "loss": 2.5357, "step": 755000 }, { "epoch": 3.74, "learning_rate": 4.8129184564240525e-05, "loss": 2.5289, "step": 755500 }, { "epoch": 3.75, "learning_rate": 4.812794597781444e-05, "loss": 2.5534, "step": 756000 }, { "epoch": 3.75, "learning_rate": 4.812670739138836e-05, "loss": 2.5353, "step": 756500 }, { "epoch": 3.75, "learning_rate": 4.8125468804962275e-05, "loss": 2.5421, "step": 757000 }, { "epoch": 3.75, "learning_rate": 4.8124232695709044e-05, "loss": 2.5251, "step": 757500 }, { "epoch": 3.76, "learning_rate": 4.812299410928296e-05, "loss": 2.5294, "step": 758000 }, { "epoch": 3.76, "learning_rate": 4.812175552285688e-05, "loss": 2.5252, "step": 758500 }, { "epoch": 3.76, "learning_rate": 4.8120516936430795e-05, "loss": 2.5331, "step": 759000 }, { "epoch": 3.76, "learning_rate": 4.811927835000471e-05, "loss": 2.5705, "step": 759500 }, { "epoch": 3.77, "learning_rate": 4.811803976357863e-05, "loss": 2.5623, "step": 760000 }, { "epoch": 3.77, "learning_rate": 4.8116801177152546e-05, "loss": 2.5405, "step": 760500 }, { "epoch": 3.77, "learning_rate": 4.811556259072646e-05, "loss": 2.525, "step": 761000 }, { "epoch": 3.77, "learning_rate": 4.811432400430038e-05, "loss": 2.5315, "step": 761500 }, { "epoch": 3.78, "learning_rate": 4.811308541787429e-05, "loss": 2.5486, "step": 762000 }, { "epoch": 3.78, "learning_rate": 4.811184683144821e-05, "loss": 2.549, "step": 762500 }, { "epoch": 3.78, "learning_rate": 4.8110608245022124e-05, "loss": 2.5359, "step": 763000 }, { "epoch": 3.78, "learning_rate": 4.810936965859604e-05, "loss": 2.5533, "step": 763500 }, { "epoch": 3.79, "learning_rate": 4.810813354934281e-05, "loss": 2.5451, "step": 764000 }, { "epoch": 3.79, "learning_rate": 4.8106894962916726e-05, "loss": 2.5645, "step": 764500 }, { "epoch": 3.79, "learning_rate": 4.810565637649064e-05, "loss": 2.5328, "step": 765000 }, { "epoch": 3.79, "learning_rate": 4.810441779006456e-05, "loss": 2.538, "step": 765500 }, { "epoch": 3.8, "learning_rate": 4.810318168081133e-05, "loss": 2.5277, "step": 766000 }, { "epoch": 3.8, "learning_rate": 4.8101943094385246e-05, "loss": 2.5509, "step": 766500 }, { "epoch": 3.8, "learning_rate": 4.810070450795916e-05, "loss": 2.558, "step": 767000 }, { "epoch": 3.8, "learning_rate": 4.809946592153308e-05, "loss": 2.5482, "step": 767500 }, { "epoch": 3.8, "learning_rate": 4.8098227335107e-05, "loss": 2.5564, "step": 768000 }, { "epoch": 3.81, "learning_rate": 4.809699122585376e-05, "loss": 2.5368, "step": 768500 }, { "epoch": 3.81, "learning_rate": 4.809575511660053e-05, "loss": 2.5808, "step": 769000 }, { "epoch": 3.81, "learning_rate": 4.8094516530174444e-05, "loss": 2.5585, "step": 769500 }, { "epoch": 3.81, "learning_rate": 4.809327794374836e-05, "loss": 2.5754, "step": 770000 }, { "epoch": 3.82, "learning_rate": 4.809203935732228e-05, "loss": 2.5389, "step": 770500 }, { "epoch": 3.82, "learning_rate": 4.8090800770896195e-05, "loss": 2.5298, "step": 771000 }, { "epoch": 3.82, "learning_rate": 4.808956218447011e-05, "loss": 2.5495, "step": 771500 }, { "epoch": 3.82, "learning_rate": 4.808832359804403e-05, "loss": 2.5579, "step": 772000 }, { "epoch": 3.83, "learning_rate": 4.808708748879079e-05, "loss": 2.5362, "step": 772500 }, { "epoch": 3.83, "learning_rate": 4.808584890236471e-05, "loss": 2.5724, "step": 773000 }, { "epoch": 3.83, "learning_rate": 4.8084610315938625e-05, "loss": 2.5317, "step": 773500 }, { "epoch": 3.83, "learning_rate": 4.808337172951254e-05, "loss": 2.5207, "step": 774000 }, { "epoch": 3.84, "learning_rate": 4.808213314308646e-05, "loss": 2.5329, "step": 774500 }, { "epoch": 3.84, "learning_rate": 4.8080894556660376e-05, "loss": 2.5449, "step": 775000 }, { "epoch": 3.84, "learning_rate": 4.807965597023429e-05, "loss": 2.5359, "step": 775500 }, { "epoch": 3.84, "learning_rate": 4.807841986098106e-05, "loss": 2.5733, "step": 776000 }, { "epoch": 3.85, "learning_rate": 4.807718375172783e-05, "loss": 2.543, "step": 776500 }, { "epoch": 3.85, "learning_rate": 4.807594516530175e-05, "loss": 2.5573, "step": 777000 }, { "epoch": 3.85, "learning_rate": 4.8074706578875664e-05, "loss": 2.5554, "step": 777500 }, { "epoch": 3.85, "learning_rate": 4.807346799244958e-05, "loss": 2.5289, "step": 778000 }, { "epoch": 3.86, "learning_rate": 4.807222940602349e-05, "loss": 2.5436, "step": 778500 }, { "epoch": 3.86, "learning_rate": 4.807099081959741e-05, "loss": 2.5695, "step": 779000 }, { "epoch": 3.86, "learning_rate": 4.8069752233171325e-05, "loss": 2.5339, "step": 779500 }, { "epoch": 3.86, "learning_rate": 4.806851364674524e-05, "loss": 2.5585, "step": 780000 }, { "epoch": 3.87, "learning_rate": 4.806727506031916e-05, "loss": 2.5488, "step": 780500 }, { "epoch": 3.87, "learning_rate": 4.8066036473893076e-05, "loss": 2.55, "step": 781000 }, { "epoch": 3.87, "learning_rate": 4.806479788746699e-05, "loss": 2.524, "step": 781500 }, { "epoch": 3.87, "learning_rate": 4.806355930104091e-05, "loss": 2.5214, "step": 782000 }, { "epoch": 3.88, "learning_rate": 4.806232071461483e-05, "loss": 2.5534, "step": 782500 }, { "epoch": 3.88, "learning_rate": 4.8061082128188744e-05, "loss": 2.5238, "step": 783000 }, { "epoch": 3.88, "learning_rate": 4.805984354176266e-05, "loss": 2.5444, "step": 783500 }, { "epoch": 3.88, "learning_rate": 4.805860495533658e-05, "loss": 2.5237, "step": 784000 }, { "epoch": 3.89, "learning_rate": 4.8057366368910494e-05, "loss": 2.5258, "step": 784500 }, { "epoch": 3.89, "learning_rate": 4.8056127782484404e-05, "loss": 2.529, "step": 785000 }, { "epoch": 3.89, "learning_rate": 4.805488919605832e-05, "loss": 2.5469, "step": 785500 }, { "epoch": 3.89, "learning_rate": 4.805365060963224e-05, "loss": 2.5389, "step": 786000 }, { "epoch": 3.9, "learning_rate": 4.8052414500379014e-05, "loss": 2.5637, "step": 786500 }, { "epoch": 3.9, "learning_rate": 4.805117591395293e-05, "loss": 2.5438, "step": 787000 }, { "epoch": 3.9, "learning_rate": 4.804993732752685e-05, "loss": 2.5465, "step": 787500 }, { "epoch": 3.9, "learning_rate": 4.804869874110076e-05, "loss": 2.5289, "step": 788000 }, { "epoch": 3.91, "learning_rate": 4.804746263184753e-05, "loss": 2.5684, "step": 788500 }, { "epoch": 3.91, "learning_rate": 4.8046226522594296e-05, "loss": 2.5539, "step": 789000 }, { "epoch": 3.91, "learning_rate": 4.804498793616821e-05, "loss": 2.5358, "step": 789500 }, { "epoch": 3.91, "learning_rate": 4.804374934974213e-05, "loss": 2.5412, "step": 790000 }, { "epoch": 3.92, "learning_rate": 4.8042510763316046e-05, "loss": 2.5593, "step": 790500 }, { "epoch": 3.92, "learning_rate": 4.804127217688996e-05, "loss": 2.5245, "step": 791000 }, { "epoch": 3.92, "learning_rate": 4.804003359046388e-05, "loss": 2.5548, "step": 791500 }, { "epoch": 3.92, "learning_rate": 4.80387950040378e-05, "loss": 2.5387, "step": 792000 }, { "epoch": 3.93, "learning_rate": 4.8037556417611714e-05, "loss": 2.529, "step": 792500 }, { "epoch": 3.93, "learning_rate": 4.8036320308358476e-05, "loss": 2.5248, "step": 793000 }, { "epoch": 3.93, "learning_rate": 4.803508172193239e-05, "loss": 2.5407, "step": 793500 }, { "epoch": 3.93, "learning_rate": 4.803384313550631e-05, "loss": 2.5353, "step": 794000 }, { "epoch": 3.94, "learning_rate": 4.803260454908023e-05, "loss": 2.5249, "step": 794500 }, { "epoch": 3.94, "learning_rate": 4.8031368439826996e-05, "loss": 2.5362, "step": 795000 }, { "epoch": 3.94, "learning_rate": 4.803012985340091e-05, "loss": 2.5309, "step": 795500 }, { "epoch": 3.94, "learning_rate": 4.802889126697483e-05, "loss": 2.5317, "step": 796000 }, { "epoch": 3.95, "learning_rate": 4.8027652680548746e-05, "loss": 2.5299, "step": 796500 }, { "epoch": 3.95, "learning_rate": 4.8026416571295515e-05, "loss": 2.5359, "step": 797000 }, { "epoch": 3.95, "learning_rate": 4.8025182939215136e-05, "loss": 2.5481, "step": 797500 }, { "epoch": 3.95, "learning_rate": 4.802394435278905e-05, "loss": 2.5442, "step": 798000 }, { "epoch": 3.96, "learning_rate": 4.802270576636297e-05, "loss": 2.5507, "step": 798500 }, { "epoch": 3.96, "learning_rate": 4.802146717993689e-05, "loss": 2.5488, "step": 799000 }, { "epoch": 3.96, "learning_rate": 4.8020228593510804e-05, "loss": 2.5537, "step": 799500 }, { "epoch": 3.96, "learning_rate": 4.801899000708472e-05, "loss": 2.5368, "step": 800000 }, { "epoch": 3.97, "learning_rate": 4.801775142065864e-05, "loss": 2.5376, "step": 800500 }, { "epoch": 3.97, "learning_rate": 4.8016512834232554e-05, "loss": 2.5168, "step": 801000 }, { "epoch": 3.97, "learning_rate": 4.8015276724979317e-05, "loss": 2.5168, "step": 801500 }, { "epoch": 3.97, "learning_rate": 4.8014038138553233e-05, "loss": 2.5698, "step": 802000 }, { "epoch": 3.98, "learning_rate": 4.801279955212715e-05, "loss": 2.5456, "step": 802500 }, { "epoch": 3.98, "learning_rate": 4.801156096570107e-05, "loss": 2.5533, "step": 803000 }, { "epoch": 3.98, "learning_rate": 4.8010322379274984e-05, "loss": 2.5442, "step": 803500 }, { "epoch": 3.98, "learning_rate": 4.80090837928489e-05, "loss": 2.5533, "step": 804000 }, { "epoch": 3.99, "learning_rate": 4.800784768359567e-05, "loss": 2.5392, "step": 804500 }, { "epoch": 3.99, "learning_rate": 4.800661157434244e-05, "loss": 2.5661, "step": 805000 }, { "epoch": 3.99, "learning_rate": 4.8005372987916356e-05, "loss": 2.5666, "step": 805500 }, { "epoch": 3.99, "learning_rate": 4.800413440149027e-05, "loss": 2.5285, "step": 806000 }, { "epoch": 4.0, "learning_rate": 4.800289581506418e-05, "loss": 2.5249, "step": 806500 }, { "epoch": 4.0, "learning_rate": 4.800165970581095e-05, "loss": 2.5377, "step": 807000 }, { "epoch": 4.0, "eval_accuracy": 0.6340747980022936, "eval_accuracy_mlm": 0.5867373422775914, "eval_accuracy_nsp": 0.8572672468906766, "eval_loss": 2.4791698455810547, "eval_runtime": 145.9611, "eval_samples_per_second": 1746.76, "eval_steps_per_second": 72.787, "step": 807372 }, { "epoch": 4.0, "learning_rate": 4.800042111938487e-05, "loss": 2.5294, "step": 807500 }, { "epoch": 4.0, "learning_rate": 4.7999182532958785e-05, "loss": 2.4976, "step": 808000 }, { "epoch": 4.01, "learning_rate": 4.79979439465327e-05, "loss": 2.5014, "step": 808500 }, { "epoch": 4.01, "learning_rate": 4.799670536010662e-05, "loss": 2.5063, "step": 809000 }, { "epoch": 4.01, "learning_rate": 4.7995466773680536e-05, "loss": 2.5132, "step": 809500 }, { "epoch": 4.01, "learning_rate": 4.799422818725445e-05, "loss": 2.5018, "step": 810000 }, { "epoch": 4.02, "learning_rate": 4.799298960082837e-05, "loss": 2.5117, "step": 810500 }, { "epoch": 4.02, "learning_rate": 4.799175101440229e-05, "loss": 2.5232, "step": 811000 }, { "epoch": 4.02, "learning_rate": 4.7990514905149056e-05, "loss": 2.5043, "step": 811500 }, { "epoch": 4.02, "learning_rate": 4.798927631872297e-05, "loss": 2.5114, "step": 812000 }, { "epoch": 4.03, "learning_rate": 4.798803773229689e-05, "loss": 2.5253, "step": 812500 }, { "epoch": 4.03, "learning_rate": 4.7986799145870807e-05, "loss": 2.5181, "step": 813000 }, { "epoch": 4.03, "learning_rate": 4.798556055944472e-05, "loss": 2.5291, "step": 813500 }, { "epoch": 4.03, "learning_rate": 4.7984321973018634e-05, "loss": 2.499, "step": 814000 }, { "epoch": 4.04, "learning_rate": 4.798308338659255e-05, "loss": 2.5222, "step": 814500 }, { "epoch": 4.04, "learning_rate": 4.798184480016647e-05, "loss": 2.5236, "step": 815000 }, { "epoch": 4.04, "learning_rate": 4.7980606213740384e-05, "loss": 2.5314, "step": 815500 }, { "epoch": 4.04, "learning_rate": 4.797937010448715e-05, "loss": 2.5234, "step": 816000 }, { "epoch": 4.05, "learning_rate": 4.797813151806107e-05, "loss": 2.4951, "step": 816500 }, { "epoch": 4.05, "learning_rate": 4.797689293163499e-05, "loss": 2.529, "step": 817000 }, { "epoch": 4.05, "learning_rate": 4.7975654345208904e-05, "loss": 2.5055, "step": 817500 }, { "epoch": 4.05, "learning_rate": 4.797441575878282e-05, "loss": 2.4981, "step": 818000 }, { "epoch": 4.06, "learning_rate": 4.797317964952959e-05, "loss": 2.512, "step": 818500 }, { "epoch": 4.06, "learning_rate": 4.797194106310351e-05, "loss": 2.515, "step": 819000 }, { "epoch": 4.06, "learning_rate": 4.7970702476677424e-05, "loss": 2.4995, "step": 819500 }, { "epoch": 4.06, "learning_rate": 4.7969463890251334e-05, "loss": 2.5222, "step": 820000 }, { "epoch": 4.07, "learning_rate": 4.79682277809981e-05, "loss": 2.5096, "step": 820500 }, { "epoch": 4.07, "learning_rate": 4.796698919457202e-05, "loss": 2.5198, "step": 821000 }, { "epoch": 4.07, "learning_rate": 4.796575308531879e-05, "loss": 2.5011, "step": 821500 }, { "epoch": 4.07, "learning_rate": 4.7964514498892705e-05, "loss": 2.5134, "step": 822000 }, { "epoch": 4.07, "learning_rate": 4.796327591246662e-05, "loss": 2.486, "step": 822500 }, { "epoch": 4.08, "learning_rate": 4.796203732604054e-05, "loss": 2.5452, "step": 823000 }, { "epoch": 4.08, "learning_rate": 4.7960798739614456e-05, "loss": 2.5249, "step": 823500 }, { "epoch": 4.08, "learning_rate": 4.795956015318837e-05, "loss": 2.5277, "step": 824000 }, { "epoch": 4.08, "learning_rate": 4.795832156676229e-05, "loss": 2.517, "step": 824500 }, { "epoch": 4.09, "learning_rate": 4.795708298033621e-05, "loss": 2.5276, "step": 825000 }, { "epoch": 4.09, "learning_rate": 4.7955844393910124e-05, "loss": 2.5128, "step": 825500 }, { "epoch": 4.09, "learning_rate": 4.7954610761829744e-05, "loss": 2.5389, "step": 826000 }, { "epoch": 4.09, "learning_rate": 4.795337217540366e-05, "loss": 2.5206, "step": 826500 }, { "epoch": 4.1, "learning_rate": 4.795213358897757e-05, "loss": 2.5004, "step": 827000 }, { "epoch": 4.1, "learning_rate": 4.795089747972435e-05, "loss": 2.5165, "step": 827500 }, { "epoch": 4.1, "learning_rate": 4.794965889329826e-05, "loss": 2.5113, "step": 828000 }, { "epoch": 4.1, "learning_rate": 4.7948420306872174e-05, "loss": 2.4997, "step": 828500 }, { "epoch": 4.11, "learning_rate": 4.794718172044609e-05, "loss": 2.5323, "step": 829000 }, { "epoch": 4.11, "learning_rate": 4.794594313402001e-05, "loss": 2.5239, "step": 829500 }, { "epoch": 4.11, "learning_rate": 4.794470454759392e-05, "loss": 2.5154, "step": 830000 }, { "epoch": 4.11, "learning_rate": 4.7943465961167835e-05, "loss": 2.5174, "step": 830500 }, { "epoch": 4.12, "learning_rate": 4.794222737474175e-05, "loss": 2.5047, "step": 831000 }, { "epoch": 4.12, "learning_rate": 4.794098878831567e-05, "loss": 2.5309, "step": 831500 }, { "epoch": 4.12, "learning_rate": 4.7939750201889586e-05, "loss": 2.5058, "step": 832000 }, { "epoch": 4.12, "learning_rate": 4.793851409263636e-05, "loss": 2.5134, "step": 832500 }, { "epoch": 4.13, "learning_rate": 4.793727550621028e-05, "loss": 2.5142, "step": 833000 }, { "epoch": 4.13, "learning_rate": 4.793603691978419e-05, "loss": 2.5138, "step": 833500 }, { "epoch": 4.13, "learning_rate": 4.7934798333358106e-05, "loss": 2.5339, "step": 834000 }, { "epoch": 4.13, "learning_rate": 4.793355974693202e-05, "loss": 2.5123, "step": 834500 }, { "epoch": 4.14, "learning_rate": 4.793232363767879e-05, "loss": 2.5296, "step": 835000 }, { "epoch": 4.14, "learning_rate": 4.793108752842556e-05, "loss": 2.5339, "step": 835500 }, { "epoch": 4.14, "learning_rate": 4.792984894199948e-05, "loss": 2.538, "step": 836000 }, { "epoch": 4.14, "learning_rate": 4.7928610355573394e-05, "loss": 2.5208, "step": 836500 }, { "epoch": 4.15, "learning_rate": 4.792737176914731e-05, "loss": 2.4823, "step": 837000 }, { "epoch": 4.15, "learning_rate": 4.792613318272123e-05, "loss": 2.5293, "step": 837500 }, { "epoch": 4.15, "learning_rate": 4.7924894596295145e-05, "loss": 2.5136, "step": 838000 }, { "epoch": 4.15, "learning_rate": 4.792365600986906e-05, "loss": 2.497, "step": 838500 }, { "epoch": 4.16, "learning_rate": 4.792241742344298e-05, "loss": 2.5301, "step": 839000 }, { "epoch": 4.16, "learning_rate": 4.792117883701689e-05, "loss": 2.5154, "step": 839500 }, { "epoch": 4.16, "learning_rate": 4.7919940250590806e-05, "loss": 2.5086, "step": 840000 }, { "epoch": 4.16, "learning_rate": 4.791870166416472e-05, "loss": 2.506, "step": 840500 }, { "epoch": 4.17, "learning_rate": 4.791746307773864e-05, "loss": 2.5198, "step": 841000 }, { "epoch": 4.17, "learning_rate": 4.7916224491312556e-05, "loss": 2.5042, "step": 841500 }, { "epoch": 4.17, "learning_rate": 4.791499085923218e-05, "loss": 2.5122, "step": 842000 }, { "epoch": 4.17, "learning_rate": 4.7913752272806094e-05, "loss": 2.5176, "step": 842500 }, { "epoch": 4.18, "learning_rate": 4.791251368638001e-05, "loss": 2.5354, "step": 843000 }, { "epoch": 4.18, "learning_rate": 4.791127509995393e-05, "loss": 2.515, "step": 843500 }, { "epoch": 4.18, "learning_rate": 4.7910036513527845e-05, "loss": 2.5099, "step": 844000 }, { "epoch": 4.18, "learning_rate": 4.790879792710176e-05, "loss": 2.5267, "step": 844500 }, { "epoch": 4.19, "learning_rate": 4.790755934067568e-05, "loss": 2.5115, "step": 845000 }, { "epoch": 4.19, "learning_rate": 4.7906320754249596e-05, "loss": 2.5051, "step": 845500 }, { "epoch": 4.19, "learning_rate": 4.7905082167823506e-05, "loss": 2.5294, "step": 846000 }, { "epoch": 4.19, "learning_rate": 4.790384605857028e-05, "loss": 2.5381, "step": 846500 }, { "epoch": 4.2, "learning_rate": 4.79026074721442e-05, "loss": 2.509, "step": 847000 }, { "epoch": 4.2, "learning_rate": 4.7901368885718115e-05, "loss": 2.5301, "step": 847500 }, { "epoch": 4.2, "learning_rate": 4.7900130299292025e-05, "loss": 2.5011, "step": 848000 }, { "epoch": 4.2, "learning_rate": 4.789889171286594e-05, "loss": 2.5179, "step": 848500 }, { "epoch": 4.21, "learning_rate": 4.789765312643986e-05, "loss": 2.5437, "step": 849000 }, { "epoch": 4.21, "learning_rate": 4.789641701718663e-05, "loss": 2.5317, "step": 849500 }, { "epoch": 4.21, "learning_rate": 4.78951809079334e-05, "loss": 2.5221, "step": 850000 }, { "epoch": 4.21, "learning_rate": 4.7893942321507314e-05, "loss": 2.5152, "step": 850500 }, { "epoch": 4.22, "learning_rate": 4.789270621225408e-05, "loss": 2.5534, "step": 851000 }, { "epoch": 4.22, "learning_rate": 4.789146762582799e-05, "loss": 2.5463, "step": 851500 }, { "epoch": 4.22, "learning_rate": 4.789022903940191e-05, "loss": 2.528, "step": 852000 }, { "epoch": 4.22, "learning_rate": 4.7888990452975827e-05, "loss": 2.5066, "step": 852500 }, { "epoch": 4.23, "learning_rate": 4.7887751866549744e-05, "loss": 2.5129, "step": 853000 }, { "epoch": 4.23, "learning_rate": 4.788651328012366e-05, "loss": 2.5264, "step": 853500 }, { "epoch": 4.23, "learning_rate": 4.788527469369758e-05, "loss": 2.5151, "step": 854000 }, { "epoch": 4.23, "learning_rate": 4.7884036107271494e-05, "loss": 2.5214, "step": 854500 }, { "epoch": 4.24, "learning_rate": 4.788279752084541e-05, "loss": 2.5097, "step": 855000 }, { "epoch": 4.24, "learning_rate": 4.788155893441933e-05, "loss": 2.5109, "step": 855500 }, { "epoch": 4.24, "learning_rate": 4.7880320347993245e-05, "loss": 2.522, "step": 856000 }, { "epoch": 4.24, "learning_rate": 4.7879084238740014e-05, "loss": 2.5233, "step": 856500 }, { "epoch": 4.25, "learning_rate": 4.787784565231393e-05, "loss": 2.5345, "step": 857000 }, { "epoch": 4.25, "learning_rate": 4.787660706588785e-05, "loss": 2.5216, "step": 857500 }, { "epoch": 4.25, "learning_rate": 4.7875368479461765e-05, "loss": 2.565, "step": 858000 }, { "epoch": 4.25, "learning_rate": 4.787412989303568e-05, "loss": 2.5278, "step": 858500 }, { "epoch": 4.26, "learning_rate": 4.7872893783782444e-05, "loss": 2.5257, "step": 859000 }, { "epoch": 4.26, "learning_rate": 4.787165519735636e-05, "loss": 2.5161, "step": 859500 }, { "epoch": 4.26, "learning_rate": 4.787041661093028e-05, "loss": 2.5183, "step": 860000 }, { "epoch": 4.26, "learning_rate": 4.7869178024504194e-05, "loss": 2.52, "step": 860500 }, { "epoch": 4.27, "learning_rate": 4.786793943807811e-05, "loss": 2.5174, "step": 861000 }, { "epoch": 4.27, "learning_rate": 4.786670085165203e-05, "loss": 2.5502, "step": 861500 }, { "epoch": 4.27, "learning_rate": 4.78654647423988e-05, "loss": 2.5172, "step": 862000 }, { "epoch": 4.27, "learning_rate": 4.7864226155972714e-05, "loss": 2.5307, "step": 862500 }, { "epoch": 4.28, "learning_rate": 4.786298756954663e-05, "loss": 2.5203, "step": 863000 }, { "epoch": 4.28, "learning_rate": 4.786174898312055e-05, "loss": 2.528, "step": 863500 }, { "epoch": 4.28, "learning_rate": 4.7860510396694465e-05, "loss": 2.5293, "step": 864000 }, { "epoch": 4.28, "learning_rate": 4.785927181026838e-05, "loss": 2.5237, "step": 864500 }, { "epoch": 4.29, "learning_rate": 4.78580332238423e-05, "loss": 2.5579, "step": 865000 }, { "epoch": 4.29, "learning_rate": 4.7856794637416216e-05, "loss": 2.5067, "step": 865500 }, { "epoch": 4.29, "learning_rate": 4.785555605099013e-05, "loss": 2.5152, "step": 866000 }, { "epoch": 4.29, "learning_rate": 4.785431746456405e-05, "loss": 2.5302, "step": 866500 }, { "epoch": 4.3, "learning_rate": 4.785308135531081e-05, "loss": 2.5225, "step": 867000 }, { "epoch": 4.3, "learning_rate": 4.785184524605758e-05, "loss": 2.507, "step": 867500 }, { "epoch": 4.3, "learning_rate": 4.78506066596315e-05, "loss": 2.5427, "step": 868000 }, { "epoch": 4.3, "learning_rate": 4.7849370550378266e-05, "loss": 2.508, "step": 868500 }, { "epoch": 4.31, "learning_rate": 4.784813196395218e-05, "loss": 2.5365, "step": 869000 }, { "epoch": 4.31, "learning_rate": 4.78468933775261e-05, "loss": 2.5121, "step": 869500 }, { "epoch": 4.31, "learning_rate": 4.784565479110002e-05, "loss": 2.519, "step": 870000 }, { "epoch": 4.31, "learning_rate": 4.7844416204673934e-05, "loss": 2.5295, "step": 870500 }, { "epoch": 4.32, "learning_rate": 4.784317761824785e-05, "loss": 2.5398, "step": 871000 }, { "epoch": 4.32, "learning_rate": 4.784193903182176e-05, "loss": 2.5191, "step": 871500 }, { "epoch": 4.32, "learning_rate": 4.784070044539568e-05, "loss": 2.523, "step": 872000 }, { "epoch": 4.32, "learning_rate": 4.7839461858969595e-05, "loss": 2.5589, "step": 872500 }, { "epoch": 4.33, "learning_rate": 4.783822327254351e-05, "loss": 2.5345, "step": 873000 }, { "epoch": 4.33, "learning_rate": 4.783698468611743e-05, "loss": 2.5171, "step": 873500 }, { "epoch": 4.33, "learning_rate": 4.7835746099691345e-05, "loss": 2.5048, "step": 874000 }, { "epoch": 4.33, "learning_rate": 4.783450751326526e-05, "loss": 2.5152, "step": 874500 }, { "epoch": 4.34, "learning_rate": 4.783327140401203e-05, "loss": 2.5189, "step": 875000 }, { "epoch": 4.34, "learning_rate": 4.783203281758595e-05, "loss": 2.506, "step": 875500 }, { "epoch": 4.34, "learning_rate": 4.7830794231159865e-05, "loss": 2.5281, "step": 876000 }, { "epoch": 4.34, "learning_rate": 4.782955564473378e-05, "loss": 2.5282, "step": 876500 }, { "epoch": 4.34, "learning_rate": 4.782831953548055e-05, "loss": 2.4956, "step": 877000 }, { "epoch": 4.35, "learning_rate": 4.782708094905447e-05, "loss": 2.5356, "step": 877500 }, { "epoch": 4.35, "learning_rate": 4.7825842362628385e-05, "loss": 2.5127, "step": 878000 }, { "epoch": 4.35, "learning_rate": 4.7824603776202295e-05, "loss": 2.536, "step": 878500 }, { "epoch": 4.35, "learning_rate": 4.782336518977621e-05, "loss": 2.5432, "step": 879000 }, { "epoch": 4.36, "learning_rate": 4.782212660335013e-05, "loss": 2.5164, "step": 879500 }, { "epoch": 4.36, "learning_rate": 4.78208904940969e-05, "loss": 2.5096, "step": 880000 }, { "epoch": 4.36, "learning_rate": 4.7819651907670814e-05, "loss": 2.5279, "step": 880500 }, { "epoch": 4.36, "learning_rate": 4.781841332124473e-05, "loss": 2.5284, "step": 881000 }, { "epoch": 4.37, "learning_rate": 4.781717473481865e-05, "loss": 2.496, "step": 881500 }, { "epoch": 4.37, "learning_rate": 4.7815936148392565e-05, "loss": 2.5169, "step": 882000 }, { "epoch": 4.37, "learning_rate": 4.781469756196648e-05, "loss": 2.5015, "step": 882500 }, { "epoch": 4.37, "learning_rate": 4.781346145271325e-05, "loss": 2.5389, "step": 883000 }, { "epoch": 4.38, "learning_rate": 4.781222286628717e-05, "loss": 2.5235, "step": 883500 }, { "epoch": 4.38, "learning_rate": 4.7810984279861085e-05, "loss": 2.5245, "step": 884000 }, { "epoch": 4.38, "learning_rate": 4.780974817060785e-05, "loss": 2.5214, "step": 884500 }, { "epoch": 4.38, "learning_rate": 4.780851206135462e-05, "loss": 2.5422, "step": 885000 }, { "epoch": 4.39, "learning_rate": 4.780727347492854e-05, "loss": 2.5327, "step": 885500 }, { "epoch": 4.39, "learning_rate": 4.7806034888502456e-05, "loss": 2.5164, "step": 886000 }, { "epoch": 4.39, "learning_rate": 4.780479630207637e-05, "loss": 2.5537, "step": 886500 }, { "epoch": 4.39, "learning_rate": 4.780355771565028e-05, "loss": 2.513, "step": 887000 }, { "epoch": 4.4, "learning_rate": 4.78023191292242e-05, "loss": 2.5196, "step": 887500 }, { "epoch": 4.4, "learning_rate": 4.780108054279812e-05, "loss": 2.5014, "step": 888000 }, { "epoch": 4.4, "learning_rate": 4.7799841956372034e-05, "loss": 2.5319, "step": 888500 }, { "epoch": 4.4, "learning_rate": 4.779860336994595e-05, "loss": 2.5317, "step": 889000 }, { "epoch": 4.41, "learning_rate": 4.779736478351987e-05, "loss": 2.5447, "step": 889500 }, { "epoch": 4.41, "learning_rate": 4.7796126197093785e-05, "loss": 2.513, "step": 890000 }, { "epoch": 4.41, "learning_rate": 4.77948876106677e-05, "loss": 2.5189, "step": 890500 }, { "epoch": 4.41, "learning_rate": 4.7793651501414464e-05, "loss": 2.5308, "step": 891000 }, { "epoch": 4.42, "learning_rate": 4.779241291498838e-05, "loss": 2.5264, "step": 891500 }, { "epoch": 4.42, "learning_rate": 4.77911743285623e-05, "loss": 2.5147, "step": 892000 }, { "epoch": 4.42, "learning_rate": 4.7789935742136215e-05, "loss": 2.5411, "step": 892500 }, { "epoch": 4.42, "learning_rate": 4.7788702110055835e-05, "loss": 2.496, "step": 893000 }, { "epoch": 4.43, "learning_rate": 4.778746352362975e-05, "loss": 2.5043, "step": 893500 }, { "epoch": 4.43, "learning_rate": 4.778622493720367e-05, "loss": 2.51, "step": 894000 }, { "epoch": 4.43, "learning_rate": 4.7784986350777586e-05, "loss": 2.518, "step": 894500 }, { "epoch": 4.43, "learning_rate": 4.77837477643515e-05, "loss": 2.5077, "step": 895000 }, { "epoch": 4.44, "learning_rate": 4.778250917792542e-05, "loss": 2.512, "step": 895500 }, { "epoch": 4.44, "learning_rate": 4.778127306867219e-05, "loss": 2.5042, "step": 896000 }, { "epoch": 4.44, "learning_rate": 4.7780034482246106e-05, "loss": 2.5105, "step": 896500 }, { "epoch": 4.44, "learning_rate": 4.777879589582002e-05, "loss": 2.5393, "step": 897000 }, { "epoch": 4.45, "learning_rate": 4.777755730939394e-05, "loss": 2.5374, "step": 897500 }, { "epoch": 4.45, "learning_rate": 4.7776318722967856e-05, "loss": 2.4974, "step": 898000 }, { "epoch": 4.45, "learning_rate": 4.7775080136541773e-05, "loss": 2.5217, "step": 898500 }, { "epoch": 4.45, "learning_rate": 4.777384155011569e-05, "loss": 2.5425, "step": 899000 }, { "epoch": 4.46, "learning_rate": 4.77726029636896e-05, "loss": 2.5461, "step": 899500 }, { "epoch": 4.46, "learning_rate": 4.777136933160922e-05, "loss": 2.5432, "step": 900000 }, { "epoch": 4.46, "learning_rate": 4.777013074518314e-05, "loss": 2.5105, "step": 900500 }, { "epoch": 4.46, "learning_rate": 4.7768892158757055e-05, "loss": 2.5198, "step": 901000 }, { "epoch": 4.47, "learning_rate": 4.7767656049503824e-05, "loss": 2.5226, "step": 901500 }, { "epoch": 4.47, "learning_rate": 4.776641746307774e-05, "loss": 2.5397, "step": 902000 }, { "epoch": 4.47, "learning_rate": 4.776517887665166e-05, "loss": 2.5173, "step": 902500 }, { "epoch": 4.47, "learning_rate": 4.7763940290225575e-05, "loss": 2.5123, "step": 903000 }, { "epoch": 4.48, "learning_rate": 4.776270170379949e-05, "loss": 2.5164, "step": 903500 }, { "epoch": 4.48, "learning_rate": 4.776146311737341e-05, "loss": 2.5277, "step": 904000 }, { "epoch": 4.48, "learning_rate": 4.7760224530947325e-05, "loss": 2.5085, "step": 904500 }, { "epoch": 4.48, "learning_rate": 4.775898594452124e-05, "loss": 2.5297, "step": 905000 }, { "epoch": 4.49, "learning_rate": 4.775774735809516e-05, "loss": 2.5393, "step": 905500 }, { "epoch": 4.49, "learning_rate": 4.775651124884192e-05, "loss": 2.5383, "step": 906000 }, { "epoch": 4.49, "learning_rate": 4.775527266241584e-05, "loss": 2.5199, "step": 906500 }, { "epoch": 4.49, "learning_rate": 4.7754034075989755e-05, "loss": 2.5323, "step": 907000 }, { "epoch": 4.5, "learning_rate": 4.775279548956367e-05, "loss": 2.5211, "step": 907500 }, { "epoch": 4.5, "learning_rate": 4.775155690313759e-05, "loss": 2.5245, "step": 908000 }, { "epoch": 4.5, "learning_rate": 4.7750318316711506e-05, "loss": 2.5016, "step": 908500 }, { "epoch": 4.5, "learning_rate": 4.774907973028542e-05, "loss": 2.5191, "step": 909000 }, { "epoch": 4.51, "learning_rate": 4.774784114385934e-05, "loss": 2.5471, "step": 909500 }, { "epoch": 4.51, "learning_rate": 4.774660255743326e-05, "loss": 2.5372, "step": 910000 }, { "epoch": 4.51, "learning_rate": 4.7745363971007174e-05, "loss": 2.52, "step": 910500 }, { "epoch": 4.51, "learning_rate": 4.774412538458109e-05, "loss": 2.512, "step": 911000 }, { "epoch": 4.52, "learning_rate": 4.774288679815501e-05, "loss": 2.5325, "step": 911500 }, { "epoch": 4.52, "learning_rate": 4.774164821172892e-05, "loss": 2.5194, "step": 912000 }, { "epoch": 4.52, "learning_rate": 4.7740409625302835e-05, "loss": 2.5473, "step": 912500 }, { "epoch": 4.52, "learning_rate": 4.773917103887675e-05, "loss": 2.5345, "step": 913000 }, { "epoch": 4.53, "learning_rate": 4.773793245245067e-05, "loss": 2.5285, "step": 913500 }, { "epoch": 4.53, "learning_rate": 4.7736693866024585e-05, "loss": 2.5442, "step": 914000 }, { "epoch": 4.53, "learning_rate": 4.7735457756771354e-05, "loss": 2.5358, "step": 914500 }, { "epoch": 4.53, "learning_rate": 4.773421917034527e-05, "loss": 2.5193, "step": 915000 }, { "epoch": 4.54, "learning_rate": 4.773298306109204e-05, "loss": 2.5213, "step": 915500 }, { "epoch": 4.54, "learning_rate": 4.773174447466596e-05, "loss": 2.5143, "step": 916000 }, { "epoch": 4.54, "learning_rate": 4.7730505888239874e-05, "loss": 2.507, "step": 916500 }, { "epoch": 4.54, "learning_rate": 4.772926977898664e-05, "loss": 2.5088, "step": 917000 }, { "epoch": 4.55, "learning_rate": 4.772803119256056e-05, "loss": 2.538, "step": 917500 }, { "epoch": 4.55, "learning_rate": 4.7726792606134476e-05, "loss": 2.5344, "step": 918000 }, { "epoch": 4.55, "learning_rate": 4.7725554019708393e-05, "loss": 2.5217, "step": 918500 }, { "epoch": 4.55, "learning_rate": 4.772431543328231e-05, "loss": 2.5035, "step": 919000 }, { "epoch": 4.56, "learning_rate": 4.772307684685623e-05, "loss": 2.5022, "step": 919500 }, { "epoch": 4.56, "learning_rate": 4.772183826043014e-05, "loss": 2.535, "step": 920000 }, { "epoch": 4.56, "learning_rate": 4.7720599674004054e-05, "loss": 2.481, "step": 920500 }, { "epoch": 4.56, "learning_rate": 4.771936356475082e-05, "loss": 2.5258, "step": 921000 }, { "epoch": 4.57, "learning_rate": 4.771812497832474e-05, "loss": 2.5533, "step": 921500 }, { "epoch": 4.57, "learning_rate": 4.771688639189866e-05, "loss": 2.5407, "step": 922000 }, { "epoch": 4.57, "learning_rate": 4.7715647805472574e-05, "loss": 2.5222, "step": 922500 }, { "epoch": 4.57, "learning_rate": 4.771440921904649e-05, "loss": 2.5202, "step": 923000 }, { "epoch": 4.58, "learning_rate": 4.7713178064138963e-05, "loss": 2.5326, "step": 923500 }, { "epoch": 4.58, "learning_rate": 4.771193947771288e-05, "loss": 2.5148, "step": 924000 }, { "epoch": 4.58, "learning_rate": 4.77107008912868e-05, "loss": 2.5138, "step": 924500 }, { "epoch": 4.58, "learning_rate": 4.770946230486071e-05, "loss": 2.4793, "step": 925000 }, { "epoch": 4.59, "learning_rate": 4.7708223718434624e-05, "loss": 2.5133, "step": 925500 }, { "epoch": 4.59, "learning_rate": 4.770698513200854e-05, "loss": 2.5307, "step": 926000 }, { "epoch": 4.59, "learning_rate": 4.770574902275532e-05, "loss": 2.5068, "step": 926500 }, { "epoch": 4.59, "learning_rate": 4.7704510436329234e-05, "loss": 2.528, "step": 927000 }, { "epoch": 4.6, "learning_rate": 4.7703271849903144e-05, "loss": 2.5122, "step": 927500 }, { "epoch": 4.6, "learning_rate": 4.770203326347706e-05, "loss": 2.5372, "step": 928000 }, { "epoch": 4.6, "learning_rate": 4.770079467705098e-05, "loss": 2.5282, "step": 928500 }, { "epoch": 4.6, "learning_rate": 4.7699556090624895e-05, "loss": 2.5162, "step": 929000 }, { "epoch": 4.61, "learning_rate": 4.769831750419881e-05, "loss": 2.5279, "step": 929500 }, { "epoch": 4.61, "learning_rate": 4.769707891777272e-05, "loss": 2.5302, "step": 930000 }, { "epoch": 4.61, "learning_rate": 4.769584033134664e-05, "loss": 2.5265, "step": 930500 }, { "epoch": 4.61, "learning_rate": 4.7694601744920556e-05, "loss": 2.5167, "step": 931000 }, { "epoch": 4.61, "learning_rate": 4.769336315849447e-05, "loss": 2.5033, "step": 931500 }, { "epoch": 4.62, "learning_rate": 4.769212457206839e-05, "loss": 2.5239, "step": 932000 }, { "epoch": 4.62, "learning_rate": 4.769088846281516e-05, "loss": 2.5485, "step": 932500 }, { "epoch": 4.62, "learning_rate": 4.7689649876389075e-05, "loss": 2.5165, "step": 933000 }, { "epoch": 4.62, "learning_rate": 4.768841376713585e-05, "loss": 2.5295, "step": 933500 }, { "epoch": 4.63, "learning_rate": 4.768717518070977e-05, "loss": 2.5282, "step": 934000 }, { "epoch": 4.63, "learning_rate": 4.768593659428368e-05, "loss": 2.505, "step": 934500 }, { "epoch": 4.63, "learning_rate": 4.768470048503045e-05, "loss": 2.5145, "step": 935000 }, { "epoch": 4.63, "learning_rate": 4.7683461898604364e-05, "loss": 2.5239, "step": 935500 }, { "epoch": 4.64, "learning_rate": 4.768222331217828e-05, "loss": 2.512, "step": 936000 }, { "epoch": 4.64, "learning_rate": 4.76809847257522e-05, "loss": 2.5265, "step": 936500 }, { "epoch": 4.64, "learning_rate": 4.7679746139326114e-05, "loss": 2.528, "step": 937000 }, { "epoch": 4.64, "learning_rate": 4.7678507552900025e-05, "loss": 2.5295, "step": 937500 }, { "epoch": 4.65, "learning_rate": 4.767726896647394e-05, "loss": 2.4952, "step": 938000 }, { "epoch": 4.65, "learning_rate": 4.767603038004786e-05, "loss": 2.5174, "step": 938500 }, { "epoch": 4.65, "learning_rate": 4.7674791793621775e-05, "loss": 2.5244, "step": 939000 }, { "epoch": 4.65, "learning_rate": 4.767355320719569e-05, "loss": 2.5318, "step": 939500 }, { "epoch": 4.66, "learning_rate": 4.767231462076961e-05, "loss": 2.5375, "step": 940000 }, { "epoch": 4.66, "learning_rate": 4.7671078511516385e-05, "loss": 2.5271, "step": 940500 }, { "epoch": 4.66, "learning_rate": 4.7669839925090295e-05, "loss": 2.5511, "step": 941000 }, { "epoch": 4.66, "learning_rate": 4.766860133866421e-05, "loss": 2.5225, "step": 941500 }, { "epoch": 4.67, "learning_rate": 4.766736275223813e-05, "loss": 2.5055, "step": 942000 }, { "epoch": 4.67, "learning_rate": 4.7666124165812046e-05, "loss": 2.5206, "step": 942500 }, { "epoch": 4.67, "learning_rate": 4.766488557938596e-05, "loss": 2.5182, "step": 943000 }, { "epoch": 4.67, "learning_rate": 4.766364947013273e-05, "loss": 2.5254, "step": 943500 }, { "epoch": 4.68, "learning_rate": 4.766241088370664e-05, "loss": 2.4945, "step": 944000 }, { "epoch": 4.68, "learning_rate": 4.766117229728056e-05, "loss": 2.5185, "step": 944500 }, { "epoch": 4.68, "learning_rate": 4.7659933710854476e-05, "loss": 2.5111, "step": 945000 }, { "epoch": 4.68, "learning_rate": 4.765869512442839e-05, "loss": 2.5186, "step": 945500 }, { "epoch": 4.69, "learning_rate": 4.765745653800231e-05, "loss": 2.5001, "step": 946000 }, { "epoch": 4.69, "learning_rate": 4.7656217951576226e-05, "loss": 2.5132, "step": 946500 }, { "epoch": 4.69, "learning_rate": 4.7654981842322995e-05, "loss": 2.5195, "step": 947000 }, { "epoch": 4.69, "learning_rate": 4.765374325589691e-05, "loss": 2.5179, "step": 947500 }, { "epoch": 4.7, "learning_rate": 4.765250466947083e-05, "loss": 2.5338, "step": 948000 }, { "epoch": 4.7, "learning_rate": 4.7651266083044746e-05, "loss": 2.5614, "step": 948500 }, { "epoch": 4.7, "learning_rate": 4.7650029973791515e-05, "loss": 2.5336, "step": 949000 }, { "epoch": 4.7, "learning_rate": 4.764879138736543e-05, "loss": 2.4901, "step": 949500 }, { "epoch": 4.71, "learning_rate": 4.76475552781122e-05, "loss": 2.5283, "step": 950000 }, { "epoch": 4.71, "learning_rate": 4.764631669168612e-05, "loss": 2.5171, "step": 950500 }, { "epoch": 4.71, "learning_rate": 4.764508058243288e-05, "loss": 2.5353, "step": 951000 }, { "epoch": 4.71, "learning_rate": 4.7643841996006796e-05, "loss": 2.5246, "step": 951500 }, { "epoch": 4.72, "learning_rate": 4.764260340958071e-05, "loss": 2.5173, "step": 952000 }, { "epoch": 4.72, "learning_rate": 4.764136482315463e-05, "loss": 2.5125, "step": 952500 }, { "epoch": 4.72, "learning_rate": 4.764012623672855e-05, "loss": 2.5237, "step": 953000 }, { "epoch": 4.72, "learning_rate": 4.7638887650302464e-05, "loss": 2.5033, "step": 953500 }, { "epoch": 4.73, "learning_rate": 4.763764906387638e-05, "loss": 2.5233, "step": 954000 }, { "epoch": 4.73, "learning_rate": 4.76364104774503e-05, "loss": 2.5408, "step": 954500 }, { "epoch": 4.73, "learning_rate": 4.7635171891024215e-05, "loss": 2.5335, "step": 955000 }, { "epoch": 4.73, "learning_rate": 4.763393330459813e-05, "loss": 2.5481, "step": 955500 }, { "epoch": 4.74, "learning_rate": 4.763269471817205e-05, "loss": 2.5219, "step": 956000 }, { "epoch": 4.74, "learning_rate": 4.763145613174596e-05, "loss": 2.5032, "step": 956500 }, { "epoch": 4.74, "learning_rate": 4.7630217545319876e-05, "loss": 2.5114, "step": 957000 }, { "epoch": 4.74, "learning_rate": 4.762897895889379e-05, "loss": 2.5286, "step": 957500 }, { "epoch": 4.75, "learning_rate": 4.762774284964057e-05, "loss": 2.5163, "step": 958000 }, { "epoch": 4.75, "learning_rate": 4.7626504263214485e-05, "loss": 2.5127, "step": 958500 }, { "epoch": 4.75, "learning_rate": 4.76252656767884e-05, "loss": 2.5466, "step": 959000 }, { "epoch": 4.75, "learning_rate": 4.762402709036231e-05, "loss": 2.5094, "step": 959500 }, { "epoch": 4.76, "learning_rate": 4.762278850393623e-05, "loss": 2.5276, "step": 960000 }, { "epoch": 4.76, "learning_rate": 4.7621549917510146e-05, "loss": 2.5316, "step": 960500 }, { "epoch": 4.76, "learning_rate": 4.762031133108406e-05, "loss": 2.5051, "step": 961000 }, { "epoch": 4.76, "learning_rate": 4.761907522183083e-05, "loss": 2.495, "step": 961500 }, { "epoch": 4.77, "learning_rate": 4.761783663540475e-05, "loss": 2.5125, "step": 962000 }, { "epoch": 4.77, "learning_rate": 4.761659804897866e-05, "loss": 2.5293, "step": 962500 }, { "epoch": 4.77, "learning_rate": 4.7615359462552576e-05, "loss": 2.5661, "step": 963000 }, { "epoch": 4.77, "learning_rate": 4.761412087612649e-05, "loss": 2.5403, "step": 963500 }, { "epoch": 4.78, "learning_rate": 4.761288228970041e-05, "loss": 2.4998, "step": 964000 }, { "epoch": 4.78, "learning_rate": 4.761164370327433e-05, "loss": 2.5193, "step": 964500 }, { "epoch": 4.78, "learning_rate": 4.7610405116848244e-05, "loss": 2.5213, "step": 965000 }, { "epoch": 4.78, "learning_rate": 4.760916653042216e-05, "loss": 2.5163, "step": 965500 }, { "epoch": 4.79, "learning_rate": 4.760792794399608e-05, "loss": 2.5139, "step": 966000 }, { "epoch": 4.79, "learning_rate": 4.7606689357569994e-05, "loss": 2.5008, "step": 966500 }, { "epoch": 4.79, "learning_rate": 4.760545324831676e-05, "loss": 2.5123, "step": 967000 }, { "epoch": 4.79, "learning_rate": 4.760421466189068e-05, "loss": 2.519, "step": 967500 }, { "epoch": 4.8, "learning_rate": 4.76029760754646e-05, "loss": 2.5064, "step": 968000 }, { "epoch": 4.8, "learning_rate": 4.7601739966211366e-05, "loss": 2.5199, "step": 968500 }, { "epoch": 4.8, "learning_rate": 4.7600501379785276e-05, "loss": 2.5431, "step": 969000 }, { "epoch": 4.8, "learning_rate": 4.759926279335919e-05, "loss": 2.5357, "step": 969500 }, { "epoch": 4.81, "learning_rate": 4.759802420693311e-05, "loss": 2.5141, "step": 970000 }, { "epoch": 4.81, "learning_rate": 4.759678562050703e-05, "loss": 2.5247, "step": 970500 }, { "epoch": 4.81, "learning_rate": 4.7595547034080944e-05, "loss": 2.5246, "step": 971000 }, { "epoch": 4.81, "learning_rate": 4.759430844765486e-05, "loss": 2.5307, "step": 971500 }, { "epoch": 4.82, "learning_rate": 4.759307233840163e-05, "loss": 2.5291, "step": 972000 }, { "epoch": 4.82, "learning_rate": 4.7591833751975546e-05, "loss": 2.5249, "step": 972500 }, { "epoch": 4.82, "learning_rate": 4.759059516554946e-05, "loss": 2.5034, "step": 973000 }, { "epoch": 4.82, "learning_rate": 4.758935657912338e-05, "loss": 2.4944, "step": 973500 }, { "epoch": 4.83, "learning_rate": 4.75881179926973e-05, "loss": 2.5173, "step": 974000 }, { "epoch": 4.83, "learning_rate": 4.7586881883444066e-05, "loss": 2.5288, "step": 974500 }, { "epoch": 4.83, "learning_rate": 4.7585643297017976e-05, "loss": 2.5382, "step": 975000 }, { "epoch": 4.83, "learning_rate": 4.758440471059189e-05, "loss": 2.5082, "step": 975500 }, { "epoch": 4.84, "learning_rate": 4.758316612416581e-05, "loss": 2.4987, "step": 976000 }, { "epoch": 4.84, "learning_rate": 4.758192753773973e-05, "loss": 2.5288, "step": 976500 }, { "epoch": 4.84, "learning_rate": 4.7580693905659354e-05, "loss": 2.5066, "step": 977000 }, { "epoch": 4.84, "learning_rate": 4.757945531923327e-05, "loss": 2.5299, "step": 977500 }, { "epoch": 4.85, "learning_rate": 4.757821673280718e-05, "loss": 2.5367, "step": 978000 }, { "epoch": 4.85, "learning_rate": 4.75769781463811e-05, "loss": 2.5034, "step": 978500 }, { "epoch": 4.85, "learning_rate": 4.7575739559955015e-05, "loss": 2.5142, "step": 979000 }, { "epoch": 4.85, "learning_rate": 4.757450097352893e-05, "loss": 2.5349, "step": 979500 }, { "epoch": 4.86, "learning_rate": 4.757326238710285e-05, "loss": 2.5567, "step": 980000 }, { "epoch": 4.86, "learning_rate": 4.7572023800676766e-05, "loss": 2.5057, "step": 980500 }, { "epoch": 4.86, "learning_rate": 4.757078521425068e-05, "loss": 2.5152, "step": 981000 }, { "epoch": 4.86, "learning_rate": 4.756954662782459e-05, "loss": 2.5161, "step": 981500 }, { "epoch": 4.87, "learning_rate": 4.756830804139851e-05, "loss": 2.4941, "step": 982000 }, { "epoch": 4.87, "learning_rate": 4.756706945497243e-05, "loss": 2.5139, "step": 982500 }, { "epoch": 4.87, "learning_rate": 4.7565830868546344e-05, "loss": 2.5407, "step": 983000 }, { "epoch": 4.87, "learning_rate": 4.756459475929312e-05, "loss": 2.5312, "step": 983500 }, { "epoch": 4.88, "learning_rate": 4.7563356172867036e-05, "loss": 2.5106, "step": 984000 }, { "epoch": 4.88, "learning_rate": 4.756211758644095e-05, "loss": 2.5268, "step": 984500 }, { "epoch": 4.88, "learning_rate": 4.7560879000014864e-05, "loss": 2.537, "step": 985000 }, { "epoch": 4.88, "learning_rate": 4.755964041358878e-05, "loss": 2.5343, "step": 985500 }, { "epoch": 4.88, "learning_rate": 4.75584018271627e-05, "loss": 2.5492, "step": 986000 }, { "epoch": 4.89, "learning_rate": 4.7557163240736614e-05, "loss": 2.5359, "step": 986500 }, { "epoch": 4.89, "learning_rate": 4.755592465431053e-05, "loss": 2.5285, "step": 987000 }, { "epoch": 4.89, "learning_rate": 4.755468854505729e-05, "loss": 2.5059, "step": 987500 }, { "epoch": 4.89, "learning_rate": 4.755344995863121e-05, "loss": 2.5226, "step": 988000 }, { "epoch": 4.9, "learning_rate": 4.755221137220513e-05, "loss": 2.5191, "step": 988500 }, { "epoch": 4.9, "learning_rate": 4.75509752629519e-05, "loss": 2.5028, "step": 989000 }, { "epoch": 4.9, "learning_rate": 4.754973915369867e-05, "loss": 2.5224, "step": 989500 }, { "epoch": 4.9, "learning_rate": 4.7548503044445434e-05, "loss": 2.5351, "step": 990000 }, { "epoch": 4.91, "learning_rate": 4.754726445801935e-05, "loss": 2.5295, "step": 990500 }, { "epoch": 4.91, "learning_rate": 4.754602587159327e-05, "loss": 2.5256, "step": 991000 }, { "epoch": 4.91, "learning_rate": 4.7544787285167184e-05, "loss": 2.5175, "step": 991500 }, { "epoch": 4.91, "learning_rate": 4.75435486987411e-05, "loss": 2.5368, "step": 992000 }, { "epoch": 4.92, "learning_rate": 4.754231011231502e-05, "loss": 2.5184, "step": 992500 }, { "epoch": 4.92, "learning_rate": 4.7541071525888935e-05, "loss": 2.5307, "step": 993000 }, { "epoch": 4.92, "learning_rate": 4.753983293946285e-05, "loss": 2.5217, "step": 993500 }, { "epoch": 4.92, "learning_rate": 4.753859435303677e-05, "loss": 2.5294, "step": 994000 }, { "epoch": 4.93, "learning_rate": 4.753735824378354e-05, "loss": 2.505, "step": 994500 }, { "epoch": 4.93, "learning_rate": 4.7536119657357455e-05, "loss": 2.5285, "step": 995000 }, { "epoch": 4.93, "learning_rate": 4.753488107093137e-05, "loss": 2.5311, "step": 995500 }, { "epoch": 4.93, "learning_rate": 4.753364248450529e-05, "loss": 2.5233, "step": 996000 }, { "epoch": 4.94, "learning_rate": 4.7532403898079206e-05, "loss": 2.5115, "step": 996500 }, { "epoch": 4.94, "learning_rate": 4.753116531165312e-05, "loss": 2.5477, "step": 997000 }, { "epoch": 4.94, "learning_rate": 4.7529929202399885e-05, "loss": 2.5459, "step": 997500 }, { "epoch": 4.94, "learning_rate": 4.75286906159738e-05, "loss": 2.5032, "step": 998000 }, { "epoch": 4.95, "learning_rate": 4.752745202954772e-05, "loss": 2.506, "step": 998500 }, { "epoch": 4.95, "learning_rate": 4.7526213443121635e-05, "loss": 2.5271, "step": 999000 }, { "epoch": 4.95, "learning_rate": 4.7524977333868404e-05, "loss": 2.5142, "step": 999500 }, { "epoch": 4.95, "learning_rate": 4.752373874744232e-05, "loss": 2.5061, "step": 1000000 }, { "epoch": 4.96, "learning_rate": 4.752250016101624e-05, "loss": 2.5104, "step": 1000500 }, { "epoch": 4.96, "learning_rate": 4.7521261574590155e-05, "loss": 2.5104, "step": 1001000 }, { "epoch": 4.96, "learning_rate": 4.752002298816407e-05, "loss": 2.5243, "step": 1001500 }, { "epoch": 4.96, "learning_rate": 4.751878440173799e-05, "loss": 2.5186, "step": 1002000 }, { "epoch": 4.97, "learning_rate": 4.7517545815311906e-05, "loss": 2.52, "step": 1002500 }, { "epoch": 4.97, "learning_rate": 4.751630722888582e-05, "loss": 2.5164, "step": 1003000 }, { "epoch": 4.97, "learning_rate": 4.751506864245974e-05, "loss": 2.5276, "step": 1003500 }, { "epoch": 4.97, "learning_rate": 4.7513830056033656e-05, "loss": 2.5109, "step": 1004000 }, { "epoch": 4.98, "learning_rate": 4.751259146960757e-05, "loss": 2.5262, "step": 1004500 }, { "epoch": 4.98, "learning_rate": 4.7511352883181484e-05, "loss": 2.5065, "step": 1005000 }, { "epoch": 4.98, "learning_rate": 4.751011677392825e-05, "loss": 2.5033, "step": 1005500 }, { "epoch": 4.98, "learning_rate": 4.750887818750217e-05, "loss": 2.5218, "step": 1006000 }, { "epoch": 4.99, "learning_rate": 4.7507639601076086e-05, "loss": 2.5046, "step": 1006500 }, { "epoch": 4.99, "learning_rate": 4.750640101465e-05, "loss": 2.5356, "step": 1007000 }, { "epoch": 4.99, "learning_rate": 4.750516490539677e-05, "loss": 2.5308, "step": 1007500 }, { "epoch": 4.99, "learning_rate": 4.750392879614354e-05, "loss": 2.5266, "step": 1008000 }, { "epoch": 5.0, "learning_rate": 4.750269020971745e-05, "loss": 2.5191, "step": 1008500 }, { "epoch": 5.0, "learning_rate": 4.750145162329137e-05, "loss": 2.5071, "step": 1009000 }, { "epoch": 5.0, "eval_accuracy": 0.635423503873417, "eval_accuracy_mlm": 0.588502945831283, "eval_accuracy_nsp": 0.8568240383747975, "eval_loss": 2.4675350189208984, "eval_runtime": 146.0196, "eval_samples_per_second": 1746.06, "eval_steps_per_second": 72.757, "step": 1009215 }, { "epoch": 5.0, "learning_rate": 4.7500213036865285e-05, "loss": 2.5021, "step": 1009500 }, { "epoch": 5.0, "learning_rate": 4.74989744504392e-05, "loss": 2.4848, "step": 1010000 }, { "epoch": 5.01, "learning_rate": 4.749773586401312e-05, "loss": 2.4841, "step": 1010500 }, { "epoch": 5.01, "learning_rate": 4.7496497277587036e-05, "loss": 2.4968, "step": 1011000 }, { "epoch": 5.01, "learning_rate": 4.749525869116095e-05, "loss": 2.4742, "step": 1011500 }, { "epoch": 5.01, "learning_rate": 4.749402010473487e-05, "loss": 2.5161, "step": 1012000 }, { "epoch": 5.02, "learning_rate": 4.7492781518308786e-05, "loss": 2.496, "step": 1012500 }, { "epoch": 5.02, "learning_rate": 4.74915429318827e-05, "loss": 2.4761, "step": 1013000 }, { "epoch": 5.02, "learning_rate": 4.749030434545662e-05, "loss": 2.514, "step": 1013500 }, { "epoch": 5.02, "learning_rate": 4.748906823620339e-05, "loss": 2.4858, "step": 1014000 }, { "epoch": 5.03, "learning_rate": 4.7487829649777306e-05, "loss": 2.5104, "step": 1014500 }, { "epoch": 5.03, "learning_rate": 4.7486593540524075e-05, "loss": 2.4841, "step": 1015000 }, { "epoch": 5.03, "learning_rate": 4.7485354954097985e-05, "loss": 2.4851, "step": 1015500 }, { "epoch": 5.03, "learning_rate": 4.748411884484476e-05, "loss": 2.4796, "step": 1016000 }, { "epoch": 5.04, "learning_rate": 4.748288025841867e-05, "loss": 2.512, "step": 1016500 }, { "epoch": 5.04, "learning_rate": 4.7481644149165446e-05, "loss": 2.4949, "step": 1017000 }, { "epoch": 5.04, "learning_rate": 4.748040556273936e-05, "loss": 2.4787, "step": 1017500 }, { "epoch": 5.04, "learning_rate": 4.747916697631328e-05, "loss": 2.4854, "step": 1018000 }, { "epoch": 5.05, "learning_rate": 4.74779283898872e-05, "loss": 2.505, "step": 1018500 }, { "epoch": 5.05, "learning_rate": 4.7476689803461114e-05, "loss": 2.4824, "step": 1019000 }, { "epoch": 5.05, "learning_rate": 4.7475451217035024e-05, "loss": 2.5006, "step": 1019500 }, { "epoch": 5.05, "learning_rate": 4.747421263060894e-05, "loss": 2.4915, "step": 1020000 }, { "epoch": 5.06, "learning_rate": 4.747297652135571e-05, "loss": 2.5021, "step": 1020500 }, { "epoch": 5.06, "learning_rate": 4.747173793492963e-05, "loss": 2.4845, "step": 1021000 }, { "epoch": 5.06, "learning_rate": 4.7470499348503544e-05, "loss": 2.4846, "step": 1021500 }, { "epoch": 5.06, "learning_rate": 4.746926076207746e-05, "loss": 2.4703, "step": 1022000 }, { "epoch": 5.07, "learning_rate": 4.746802465282423e-05, "loss": 2.5154, "step": 1022500 }, { "epoch": 5.07, "learning_rate": 4.7466786066398146e-05, "loss": 2.4895, "step": 1023000 }, { "epoch": 5.07, "learning_rate": 4.746554747997206e-05, "loss": 2.4909, "step": 1023500 }, { "epoch": 5.07, "learning_rate": 4.746430889354598e-05, "loss": 2.4931, "step": 1024000 }, { "epoch": 5.08, "learning_rate": 4.74630703071199e-05, "loss": 2.4837, "step": 1024500 }, { "epoch": 5.08, "learning_rate": 4.746183667503951e-05, "loss": 2.499, "step": 1025000 }, { "epoch": 5.08, "learning_rate": 4.746059808861343e-05, "loss": 2.5291, "step": 1025500 }, { "epoch": 5.08, "learning_rate": 4.7459359502187345e-05, "loss": 2.4858, "step": 1026000 }, { "epoch": 5.09, "learning_rate": 4.745812091576126e-05, "loss": 2.4969, "step": 1026500 }, { "epoch": 5.09, "learning_rate": 4.745688232933518e-05, "loss": 2.4516, "step": 1027000 }, { "epoch": 5.09, "learning_rate": 4.7455643742909096e-05, "loss": 2.5106, "step": 1027500 }, { "epoch": 5.09, "learning_rate": 4.745440515648301e-05, "loss": 2.4804, "step": 1028000 }, { "epoch": 5.1, "learning_rate": 4.745316657005693e-05, "loss": 2.4969, "step": 1028500 }, { "epoch": 5.1, "learning_rate": 4.7451927983630846e-05, "loss": 2.5142, "step": 1029000 }, { "epoch": 5.1, "learning_rate": 4.745068939720476e-05, "loss": 2.4816, "step": 1029500 }, { "epoch": 5.1, "learning_rate": 4.744945081077868e-05, "loss": 2.4882, "step": 1030000 }, { "epoch": 5.11, "learning_rate": 4.74482122243526e-05, "loss": 2.4912, "step": 1030500 }, { "epoch": 5.11, "learning_rate": 4.7446973637926514e-05, "loss": 2.4795, "step": 1031000 }, { "epoch": 5.11, "learning_rate": 4.744573505150043e-05, "loss": 2.498, "step": 1031500 }, { "epoch": 5.11, "learning_rate": 4.744449646507434e-05, "loss": 2.5025, "step": 1032000 }, { "epoch": 5.12, "learning_rate": 4.744325787864826e-05, "loss": 2.4867, "step": 1032500 }, { "epoch": 5.12, "learning_rate": 4.7442019292222175e-05, "loss": 2.5047, "step": 1033000 }, { "epoch": 5.12, "learning_rate": 4.744078070579609e-05, "loss": 2.493, "step": 1033500 }, { "epoch": 5.12, "learning_rate": 4.743954211937001e-05, "loss": 2.4766, "step": 1034000 }, { "epoch": 5.13, "learning_rate": 4.7438303532943926e-05, "loss": 2.4811, "step": 1034500 }, { "epoch": 5.13, "learning_rate": 4.743706742369069e-05, "loss": 2.4792, "step": 1035000 }, { "epoch": 5.13, "learning_rate": 4.7435828837264605e-05, "loss": 2.4694, "step": 1035500 }, { "epoch": 5.13, "learning_rate": 4.743459272801138e-05, "loss": 2.4567, "step": 1036000 }, { "epoch": 5.14, "learning_rate": 4.7433359095930994e-05, "loss": 2.4914, "step": 1036500 }, { "epoch": 5.14, "learning_rate": 4.743212050950491e-05, "loss": 2.4963, "step": 1037000 }, { "epoch": 5.14, "learning_rate": 4.743088192307883e-05, "loss": 2.5135, "step": 1037500 }, { "epoch": 5.14, "learning_rate": 4.7429643336652745e-05, "loss": 2.5065, "step": 1038000 }, { "epoch": 5.15, "learning_rate": 4.742840475022666e-05, "loss": 2.5155, "step": 1038500 }, { "epoch": 5.15, "learning_rate": 4.742716616380058e-05, "loss": 2.5172, "step": 1039000 }, { "epoch": 5.15, "learning_rate": 4.7425927577374496e-05, "loss": 2.4787, "step": 1039500 }, { "epoch": 5.15, "learning_rate": 4.742468899094841e-05, "loss": 2.4847, "step": 1040000 }, { "epoch": 5.15, "learning_rate": 4.742345040452233e-05, "loss": 2.5204, "step": 1040500 }, { "epoch": 5.16, "learning_rate": 4.742221181809625e-05, "loss": 2.4876, "step": 1041000 }, { "epoch": 5.16, "learning_rate": 4.7420973231670164e-05, "loss": 2.5044, "step": 1041500 }, { "epoch": 5.16, "learning_rate": 4.741973464524408e-05, "loss": 2.4813, "step": 1042000 }, { "epoch": 5.16, "learning_rate": 4.7418496058818e-05, "loss": 2.4927, "step": 1042500 }, { "epoch": 5.17, "learning_rate": 4.7417257472391914e-05, "loss": 2.5097, "step": 1043000 }, { "epoch": 5.17, "learning_rate": 4.741601888596583e-05, "loss": 2.5111, "step": 1043500 }, { "epoch": 5.17, "learning_rate": 4.741478277671259e-05, "loss": 2.5181, "step": 1044000 }, { "epoch": 5.17, "learning_rate": 4.741354419028651e-05, "loss": 2.5167, "step": 1044500 }, { "epoch": 5.18, "learning_rate": 4.741230560386043e-05, "loss": 2.5013, "step": 1045000 }, { "epoch": 5.18, "learning_rate": 4.7411067017434344e-05, "loss": 2.516, "step": 1045500 }, { "epoch": 5.18, "learning_rate": 4.740982843100826e-05, "loss": 2.4898, "step": 1046000 }, { "epoch": 5.18, "learning_rate": 4.740858984458218e-05, "loss": 2.5233, "step": 1046500 }, { "epoch": 5.19, "learning_rate": 4.7407351258156095e-05, "loss": 2.5021, "step": 1047000 }, { "epoch": 5.19, "learning_rate": 4.7406115148902864e-05, "loss": 2.4977, "step": 1047500 }, { "epoch": 5.19, "learning_rate": 4.740487656247678e-05, "loss": 2.5229, "step": 1048000 }, { "epoch": 5.19, "learning_rate": 4.74036379760507e-05, "loss": 2.5149, "step": 1048500 }, { "epoch": 5.2, "learning_rate": 4.7402399389624615e-05, "loss": 2.5227, "step": 1049000 }, { "epoch": 5.2, "learning_rate": 4.740116328037138e-05, "loss": 2.5052, "step": 1049500 }, { "epoch": 5.2, "learning_rate": 4.7399924693945293e-05, "loss": 2.5153, "step": 1050000 }, { "epoch": 5.2, "learning_rate": 4.739868610751921e-05, "loss": 2.5106, "step": 1050500 }, { "epoch": 5.21, "learning_rate": 4.739744752109313e-05, "loss": 2.4979, "step": 1051000 }, { "epoch": 5.21, "learning_rate": 4.7396208934667044e-05, "loss": 2.4952, "step": 1051500 }, { "epoch": 5.21, "learning_rate": 4.739497034824096e-05, "loss": 2.5124, "step": 1052000 }, { "epoch": 5.21, "learning_rate": 4.739373176181488e-05, "loss": 2.4948, "step": 1052500 }, { "epoch": 5.22, "learning_rate": 4.7392493175388795e-05, "loss": 2.5208, "step": 1053000 }, { "epoch": 5.22, "learning_rate": 4.7391257066135564e-05, "loss": 2.4947, "step": 1053500 }, { "epoch": 5.22, "learning_rate": 4.7390023434055185e-05, "loss": 2.505, "step": 1054000 }, { "epoch": 5.22, "learning_rate": 4.7388784847629095e-05, "loss": 2.5084, "step": 1054500 }, { "epoch": 5.23, "learning_rate": 4.738754626120301e-05, "loss": 2.5115, "step": 1055000 }, { "epoch": 5.23, "learning_rate": 4.738630767477693e-05, "loss": 2.5227, "step": 1055500 }, { "epoch": 5.23, "learning_rate": 4.7385069088350846e-05, "loss": 2.5089, "step": 1056000 }, { "epoch": 5.23, "learning_rate": 4.738383050192476e-05, "loss": 2.5291, "step": 1056500 }, { "epoch": 5.24, "learning_rate": 4.738259191549868e-05, "loss": 2.5091, "step": 1057000 }, { "epoch": 5.24, "learning_rate": 4.7381353329072596e-05, "loss": 2.4981, "step": 1057500 }, { "epoch": 5.24, "learning_rate": 4.738011474264651e-05, "loss": 2.4948, "step": 1058000 }, { "epoch": 5.24, "learning_rate": 4.737887615622043e-05, "loss": 2.5134, "step": 1058500 }, { "epoch": 5.25, "learning_rate": 4.737763756979435e-05, "loss": 2.528, "step": 1059000 }, { "epoch": 5.25, "learning_rate": 4.7376398983368264e-05, "loss": 2.5002, "step": 1059500 }, { "epoch": 5.25, "learning_rate": 4.737516287411503e-05, "loss": 2.4989, "step": 1060000 }, { "epoch": 5.25, "learning_rate": 4.737392428768895e-05, "loss": 2.5081, "step": 1060500 }, { "epoch": 5.26, "learning_rate": 4.737268570126287e-05, "loss": 2.4678, "step": 1061000 }, { "epoch": 5.26, "learning_rate": 4.7371447114836784e-05, "loss": 2.5176, "step": 1061500 }, { "epoch": 5.26, "learning_rate": 4.73702085284107e-05, "loss": 2.5324, "step": 1062000 }, { "epoch": 5.26, "learning_rate": 4.736897489633032e-05, "loss": 2.4977, "step": 1062500 }, { "epoch": 5.27, "learning_rate": 4.736773630990424e-05, "loss": 2.4846, "step": 1063000 }, { "epoch": 5.27, "learning_rate": 4.7366497723478155e-05, "loss": 2.4992, "step": 1063500 }, { "epoch": 5.27, "learning_rate": 4.7365259137052065e-05, "loss": 2.5183, "step": 1064000 }, { "epoch": 5.27, "learning_rate": 4.736402055062598e-05, "loss": 2.5053, "step": 1064500 }, { "epoch": 5.28, "learning_rate": 4.73627819641999e-05, "loss": 2.5262, "step": 1065000 }, { "epoch": 5.28, "learning_rate": 4.7361543377773816e-05, "loss": 2.515, "step": 1065500 }, { "epoch": 5.28, "learning_rate": 4.736030479134773e-05, "loss": 2.4938, "step": 1066000 }, { "epoch": 5.28, "learning_rate": 4.735906620492165e-05, "loss": 2.5149, "step": 1066500 }, { "epoch": 5.29, "learning_rate": 4.735782761849557e-05, "loss": 2.5019, "step": 1067000 }, { "epoch": 5.29, "learning_rate": 4.735659150924233e-05, "loss": 2.5112, "step": 1067500 }, { "epoch": 5.29, "learning_rate": 4.7355352922816246e-05, "loss": 2.5042, "step": 1068000 }, { "epoch": 5.29, "learning_rate": 4.735411681356302e-05, "loss": 2.5022, "step": 1068500 }, { "epoch": 5.3, "learning_rate": 4.735287822713694e-05, "loss": 2.5294, "step": 1069000 }, { "epoch": 5.3, "learning_rate": 4.7351639640710855e-05, "loss": 2.4906, "step": 1069500 }, { "epoch": 5.3, "learning_rate": 4.735040105428477e-05, "loss": 2.497, "step": 1070000 }, { "epoch": 5.3, "learning_rate": 4.734916246785868e-05, "loss": 2.5093, "step": 1070500 }, { "epoch": 5.31, "learning_rate": 4.73479238814326e-05, "loss": 2.4948, "step": 1071000 }, { "epoch": 5.31, "learning_rate": 4.7346685295006516e-05, "loss": 2.5087, "step": 1071500 }, { "epoch": 5.31, "learning_rate": 4.734544670858043e-05, "loss": 2.5076, "step": 1072000 }, { "epoch": 5.31, "learning_rate": 4.734420812215435e-05, "loss": 2.4856, "step": 1072500 }, { "epoch": 5.32, "learning_rate": 4.734296953572827e-05, "loss": 2.4979, "step": 1073000 }, { "epoch": 5.32, "learning_rate": 4.7341730949302184e-05, "loss": 2.4767, "step": 1073500 }, { "epoch": 5.32, "learning_rate": 4.7340494840048946e-05, "loss": 2.4792, "step": 1074000 }, { "epoch": 5.32, "learning_rate": 4.733925625362286e-05, "loss": 2.4931, "step": 1074500 }, { "epoch": 5.33, "learning_rate": 4.733801766719678e-05, "loss": 2.4973, "step": 1075000 }, { "epoch": 5.33, "learning_rate": 4.73367790807707e-05, "loss": 2.4988, "step": 1075500 }, { "epoch": 5.33, "learning_rate": 4.7335540494344614e-05, "loss": 2.4931, "step": 1076000 }, { "epoch": 5.33, "learning_rate": 4.733430190791853e-05, "loss": 2.5197, "step": 1076500 }, { "epoch": 5.34, "learning_rate": 4.733306332149245e-05, "loss": 2.4846, "step": 1077000 }, { "epoch": 5.34, "learning_rate": 4.7331824735066364e-05, "loss": 2.5041, "step": 1077500 }, { "epoch": 5.34, "learning_rate": 4.733058862581313e-05, "loss": 2.4894, "step": 1078000 }, { "epoch": 5.34, "learning_rate": 4.732935003938705e-05, "loss": 2.5004, "step": 1078500 }, { "epoch": 5.35, "learning_rate": 4.732811145296097e-05, "loss": 2.5042, "step": 1079000 }, { "epoch": 5.35, "learning_rate": 4.7326875343707736e-05, "loss": 2.4997, "step": 1079500 }, { "epoch": 5.35, "learning_rate": 4.732563675728165e-05, "loss": 2.5063, "step": 1080000 }, { "epoch": 5.35, "learning_rate": 4.732439817085556e-05, "loss": 2.5086, "step": 1080500 }, { "epoch": 5.36, "learning_rate": 4.732315958442948e-05, "loss": 2.4812, "step": 1081000 }, { "epoch": 5.36, "learning_rate": 4.73219209980034e-05, "loss": 2.4943, "step": 1081500 }, { "epoch": 5.36, "learning_rate": 4.7320682411577314e-05, "loss": 2.5031, "step": 1082000 }, { "epoch": 5.36, "learning_rate": 4.731944382515123e-05, "loss": 2.5015, "step": 1082500 }, { "epoch": 5.37, "learning_rate": 4.731820523872515e-05, "loss": 2.5043, "step": 1083000 }, { "epoch": 5.37, "learning_rate": 4.7316966652299064e-05, "loss": 2.5235, "step": 1083500 }, { "epoch": 5.37, "learning_rate": 4.731572806587298e-05, "loss": 2.491, "step": 1084000 }, { "epoch": 5.37, "learning_rate": 4.73144894794469e-05, "loss": 2.4956, "step": 1084500 }, { "epoch": 5.38, "learning_rate": 4.7313250893020815e-05, "loss": 2.5086, "step": 1085000 }, { "epoch": 5.38, "learning_rate": 4.731201230659473e-05, "loss": 2.534, "step": 1085500 }, { "epoch": 5.38, "learning_rate": 4.73107761973415e-05, "loss": 2.5147, "step": 1086000 }, { "epoch": 5.38, "learning_rate": 4.730953761091542e-05, "loss": 2.5247, "step": 1086500 }, { "epoch": 5.39, "learning_rate": 4.7308299024489335e-05, "loss": 2.5079, "step": 1087000 }, { "epoch": 5.39, "learning_rate": 4.730706043806325e-05, "loss": 2.5156, "step": 1087500 }, { "epoch": 5.39, "learning_rate": 4.730582185163717e-05, "loss": 2.4809, "step": 1088000 }, { "epoch": 5.39, "learning_rate": 4.730458574238393e-05, "loss": 2.5121, "step": 1088500 }, { "epoch": 5.4, "learning_rate": 4.730334715595785e-05, "loss": 2.5155, "step": 1089000 }, { "epoch": 5.4, "learning_rate": 4.7302108569531765e-05, "loss": 2.4882, "step": 1089500 }, { "epoch": 5.4, "learning_rate": 4.730086998310568e-05, "loss": 2.4941, "step": 1090000 }, { "epoch": 5.4, "learning_rate": 4.72996313966796e-05, "loss": 2.4765, "step": 1090500 }, { "epoch": 5.41, "learning_rate": 4.7298392810253515e-05, "loss": 2.4786, "step": 1091000 }, { "epoch": 5.41, "learning_rate": 4.729715422382743e-05, "loss": 2.5199, "step": 1091500 }, { "epoch": 5.41, "learning_rate": 4.72959181145742e-05, "loss": 2.4885, "step": 1092000 }, { "epoch": 5.41, "learning_rate": 4.729468200532097e-05, "loss": 2.523, "step": 1092500 }, { "epoch": 5.42, "learning_rate": 4.729344589606774e-05, "loss": 2.5936, "step": 1093000 }, { "epoch": 5.42, "learning_rate": 4.7292207309641656e-05, "loss": 2.5146, "step": 1093500 }, { "epoch": 5.42, "learning_rate": 4.729096872321557e-05, "loss": 2.5291, "step": 1094000 }, { "epoch": 5.42, "learning_rate": 4.728973013678949e-05, "loss": 2.5103, "step": 1094500 }, { "epoch": 5.43, "learning_rate": 4.7288491550363406e-05, "loss": 2.5062, "step": 1095000 }, { "epoch": 5.43, "learning_rate": 4.728725791828302e-05, "loss": 2.5114, "step": 1095500 }, { "epoch": 5.43, "learning_rate": 4.728602180902979e-05, "loss": 2.5239, "step": 1096000 }, { "epoch": 5.43, "learning_rate": 4.7284783222603706e-05, "loss": 2.5374, "step": 1096500 }, { "epoch": 5.43, "learning_rate": 4.728354463617762e-05, "loss": 2.5018, "step": 1097000 }, { "epoch": 5.44, "learning_rate": 4.728230604975154e-05, "loss": 2.4786, "step": 1097500 }, { "epoch": 5.44, "learning_rate": 4.7281069940498316e-05, "loss": 2.5172, "step": 1098000 }, { "epoch": 5.44, "learning_rate": 4.727983135407223e-05, "loss": 2.5365, "step": 1098500 }, { "epoch": 5.44, "learning_rate": 4.727859276764614e-05, "loss": 2.6215, "step": 1099000 }, { "epoch": 5.45, "learning_rate": 4.727735418122006e-05, "loss": 2.5575, "step": 1099500 }, { "epoch": 5.45, "learning_rate": 4.7276115594793977e-05, "loss": 2.549, "step": 1100000 }, { "epoch": 5.45, "learning_rate": 4.7274877008367893e-05, "loss": 2.538, "step": 1100500 }, { "epoch": 5.45, "learning_rate": 4.727364089911466e-05, "loss": 2.698, "step": 1101000 }, { "epoch": 5.46, "learning_rate": 4.727240231268858e-05, "loss": 2.6845, "step": 1101500 }, { "epoch": 5.46, "learning_rate": 4.727116372626249e-05, "loss": 2.6379, "step": 1102000 }, { "epoch": 5.46, "learning_rate": 4.7269925139836406e-05, "loss": 2.5858, "step": 1102500 }, { "epoch": 5.46, "learning_rate": 4.726868655341032e-05, "loss": 2.5711, "step": 1103000 }, { "epoch": 5.47, "learning_rate": 4.7267452921329944e-05, "loss": 2.6267, "step": 1103500 }, { "epoch": 5.47, "learning_rate": 4.726621681207671e-05, "loss": 2.6757, "step": 1104000 }, { "epoch": 5.47, "learning_rate": 4.726497822565063e-05, "loss": 2.6265, "step": 1104500 }, { "epoch": 5.47, "learning_rate": 4.7263739639224547e-05, "loss": 2.6267, "step": 1105000 }, { "epoch": 5.48, "learning_rate": 4.7262501052798463e-05, "loss": 2.6607, "step": 1105500 }, { "epoch": 5.48, "learning_rate": 4.726126246637238e-05, "loss": 2.5848, "step": 1106000 }, { "epoch": 5.48, "learning_rate": 4.72600238799463e-05, "loss": 2.5908, "step": 1106500 }, { "epoch": 5.48, "learning_rate": 4.7258785293520214e-05, "loss": 2.6656, "step": 1107000 }, { "epoch": 5.49, "learning_rate": 4.725754918426698e-05, "loss": 2.5915, "step": 1107500 }, { "epoch": 5.49, "learning_rate": 4.72563105978409e-05, "loss": 2.6518, "step": 1108000 }, { "epoch": 5.49, "learning_rate": 4.725507201141482e-05, "loss": 2.617, "step": 1108500 }, { "epoch": 5.49, "learning_rate": 4.725383342498873e-05, "loss": 2.6337, "step": 1109000 }, { "epoch": 5.5, "learning_rate": 4.7252594838562644e-05, "loss": 2.5885, "step": 1109500 }, { "epoch": 5.5, "learning_rate": 4.725135625213656e-05, "loss": 2.5465, "step": 1110000 }, { "epoch": 5.5, "learning_rate": 4.725011766571048e-05, "loss": 2.5822, "step": 1110500 }, { "epoch": 5.5, "learning_rate": 4.7248879079284395e-05, "loss": 2.6102, "step": 1111000 }, { "epoch": 5.51, "learning_rate": 4.724764049285831e-05, "loss": 2.6242, "step": 1111500 }, { "epoch": 5.51, "learning_rate": 4.724640438360508e-05, "loss": 2.6015, "step": 1112000 }, { "epoch": 5.51, "learning_rate": 4.7245165797179e-05, "loss": 2.5667, "step": 1112500 }, { "epoch": 5.51, "learning_rate": 4.7243927210752914e-05, "loss": 2.5967, "step": 1113000 }, { "epoch": 5.52, "learning_rate": 4.724268862432683e-05, "loss": 2.591, "step": 1113500 }, { "epoch": 5.52, "learning_rate": 4.72414525150736e-05, "loss": 2.6024, "step": 1114000 }, { "epoch": 5.52, "learning_rate": 4.724021392864752e-05, "loss": 2.6239, "step": 1114500 }, { "epoch": 5.52, "learning_rate": 4.7238975342221434e-05, "loss": 2.6354, "step": 1115000 }, { "epoch": 5.53, "learning_rate": 4.723773675579535e-05, "loss": 2.6408, "step": 1115500 }, { "epoch": 5.53, "learning_rate": 4.723649816936926e-05, "loss": 2.6031, "step": 1116000 }, { "epoch": 5.53, "learning_rate": 4.723525958294318e-05, "loss": 2.6564, "step": 1116500 }, { "epoch": 5.53, "learning_rate": 4.7234020996517095e-05, "loss": 2.5826, "step": 1117000 }, { "epoch": 5.54, "learning_rate": 4.723278241009101e-05, "loss": 2.583, "step": 1117500 }, { "epoch": 5.54, "learning_rate": 4.723154382366493e-05, "loss": 2.5424, "step": 1118000 }, { "epoch": 5.54, "learning_rate": 4.7230305237238846e-05, "loss": 2.5366, "step": 1118500 }, { "epoch": 5.54, "learning_rate": 4.722906665081276e-05, "loss": 2.5275, "step": 1119000 }, { "epoch": 5.55, "learning_rate": 4.722782806438668e-05, "loss": 2.546, "step": 1119500 }, { "epoch": 5.55, "learning_rate": 4.7226589477960596e-05, "loss": 2.5238, "step": 1120000 }, { "epoch": 5.55, "learning_rate": 4.7225350891534513e-05, "loss": 2.535, "step": 1120500 }, { "epoch": 5.55, "learning_rate": 4.7224112305108424e-05, "loss": 2.52, "step": 1121000 }, { "epoch": 5.56, "learning_rate": 4.722287371868234e-05, "loss": 2.5252, "step": 1121500 }, { "epoch": 5.56, "learning_rate": 4.722163513225626e-05, "loss": 2.544, "step": 1122000 }, { "epoch": 5.56, "learning_rate": 4.7220396545830174e-05, "loss": 2.5424, "step": 1122500 }, { "epoch": 5.56, "learning_rate": 4.721915795940409e-05, "loss": 2.5489, "step": 1123000 }, { "epoch": 5.57, "learning_rate": 4.721791937297801e-05, "loss": 2.5057, "step": 1123500 }, { "epoch": 5.57, "learning_rate": 4.7216680786551925e-05, "loss": 2.5233, "step": 1124000 }, { "epoch": 5.57, "learning_rate": 4.721544220012584e-05, "loss": 2.53, "step": 1124500 }, { "epoch": 5.57, "learning_rate": 4.721420361369976e-05, "loss": 2.5091, "step": 1125000 }, { "epoch": 5.58, "learning_rate": 4.7212965027273676e-05, "loss": 2.5253, "step": 1125500 }, { "epoch": 5.58, "learning_rate": 4.7211728918020445e-05, "loss": 2.5188, "step": 1126000 }, { "epoch": 5.58, "learning_rate": 4.721049033159436e-05, "loss": 2.522, "step": 1126500 }, { "epoch": 5.58, "learning_rate": 4.720925669951398e-05, "loss": 2.5627, "step": 1127000 }, { "epoch": 5.59, "learning_rate": 4.72080181130879e-05, "loss": 2.526, "step": 1127500 }, { "epoch": 5.59, "learning_rate": 4.7206779526661816e-05, "loss": 2.5394, "step": 1128000 }, { "epoch": 5.59, "learning_rate": 4.720554094023573e-05, "loss": 2.5182, "step": 1128500 }, { "epoch": 5.59, "learning_rate": 4.720430235380965e-05, "loss": 2.568, "step": 1129000 }, { "epoch": 5.6, "learning_rate": 4.720306624455641e-05, "loss": 2.5795, "step": 1129500 }, { "epoch": 5.6, "learning_rate": 4.720182765813033e-05, "loss": 2.6119, "step": 1130000 }, { "epoch": 5.6, "learning_rate": 4.7200589071704246e-05, "loss": 2.5682, "step": 1130500 }, { "epoch": 5.6, "learning_rate": 4.719935048527816e-05, "loss": 2.5436, "step": 1131000 }, { "epoch": 5.61, "learning_rate": 4.719811189885208e-05, "loss": 2.5482, "step": 1131500 }, { "epoch": 5.61, "learning_rate": 4.7196873312426e-05, "loss": 2.5595, "step": 1132000 }, { "epoch": 5.61, "learning_rate": 4.7195634725999914e-05, "loss": 2.5303, "step": 1132500 }, { "epoch": 5.61, "learning_rate": 4.719439861674668e-05, "loss": 2.5571, "step": 1133000 }, { "epoch": 5.62, "learning_rate": 4.71931600303206e-05, "loss": 2.5494, "step": 1133500 }, { "epoch": 5.62, "learning_rate": 4.7191921443894516e-05, "loss": 2.5194, "step": 1134000 }, { "epoch": 5.62, "learning_rate": 4.719068285746843e-05, "loss": 2.5303, "step": 1134500 }, { "epoch": 5.62, "learning_rate": 4.718944427104235e-05, "loss": 2.5406, "step": 1135000 }, { "epoch": 5.63, "learning_rate": 4.718820568461627e-05, "loss": 2.5231, "step": 1135500 }, { "epoch": 5.63, "learning_rate": 4.7186967098190184e-05, "loss": 2.5372, "step": 1136000 }, { "epoch": 5.63, "learning_rate": 4.7185728511764094e-05, "loss": 2.5714, "step": 1136500 }, { "epoch": 5.63, "learning_rate": 4.718448992533801e-05, "loss": 2.5142, "step": 1137000 }, { "epoch": 5.64, "learning_rate": 4.718325381608478e-05, "loss": 2.5527, "step": 1137500 }, { "epoch": 5.64, "learning_rate": 4.71820152296587e-05, "loss": 2.574, "step": 1138000 }, { "epoch": 5.64, "learning_rate": 4.7180776643232614e-05, "loss": 2.5483, "step": 1138500 }, { "epoch": 5.64, "learning_rate": 4.717953805680653e-05, "loss": 2.5275, "step": 1139000 }, { "epoch": 5.65, "learning_rate": 4.717829947038044e-05, "loss": 2.5176, "step": 1139500 }, { "epoch": 5.65, "learning_rate": 4.717706088395436e-05, "loss": 2.5255, "step": 1140000 }, { "epoch": 5.65, "learning_rate": 4.717582477470113e-05, "loss": 2.5277, "step": 1140500 }, { "epoch": 5.65, "learning_rate": 4.717458618827505e-05, "loss": 2.5264, "step": 1141000 }, { "epoch": 5.66, "learning_rate": 4.717334760184897e-05, "loss": 2.5172, "step": 1141500 }, { "epoch": 5.66, "learning_rate": 4.7172109015422884e-05, "loss": 2.5394, "step": 1142000 }, { "epoch": 5.66, "learning_rate": 4.71708704289968e-05, "loss": 2.4941, "step": 1142500 }, { "epoch": 5.66, "learning_rate": 4.716963184257071e-05, "loss": 2.5234, "step": 1143000 }, { "epoch": 5.67, "learning_rate": 4.716839325614463e-05, "loss": 2.5233, "step": 1143500 }, { "epoch": 5.67, "learning_rate": 4.71671571468914e-05, "loss": 2.5376, "step": 1144000 }, { "epoch": 5.67, "learning_rate": 4.7165918560465314e-05, "loss": 2.5352, "step": 1144500 }, { "epoch": 5.67, "learning_rate": 4.716467997403923e-05, "loss": 2.5087, "step": 1145000 }, { "epoch": 5.68, "learning_rate": 4.716344138761315e-05, "loss": 2.5188, "step": 1145500 }, { "epoch": 5.68, "learning_rate": 4.716220280118706e-05, "loss": 2.5193, "step": 1146000 }, { "epoch": 5.68, "learning_rate": 4.7160964214760975e-05, "loss": 2.5462, "step": 1146500 }, { "epoch": 5.68, "learning_rate": 4.71597305826806e-05, "loss": 2.5272, "step": 1147000 }, { "epoch": 5.69, "learning_rate": 4.715849199625452e-05, "loss": 2.5139, "step": 1147500 }, { "epoch": 5.69, "learning_rate": 4.7157253409828436e-05, "loss": 2.5406, "step": 1148000 }, { "epoch": 5.69, "learning_rate": 4.715601482340235e-05, "loss": 2.5199, "step": 1148500 }, { "epoch": 5.69, "learning_rate": 4.715477623697627e-05, "loss": 2.4929, "step": 1149000 }, { "epoch": 5.7, "learning_rate": 4.715354012772303e-05, "loss": 2.5136, "step": 1149500 }, { "epoch": 5.7, "learning_rate": 4.715230154129695e-05, "loss": 2.5388, "step": 1150000 }, { "epoch": 5.7, "learning_rate": 4.7151062954870866e-05, "loss": 2.5158, "step": 1150500 }, { "epoch": 5.7, "learning_rate": 4.714982436844478e-05, "loss": 2.4921, "step": 1151000 }, { "epoch": 5.7, "learning_rate": 4.71485857820187e-05, "loss": 2.5272, "step": 1151500 }, { "epoch": 5.71, "learning_rate": 4.714734719559262e-05, "loss": 2.5284, "step": 1152000 }, { "epoch": 5.71, "learning_rate": 4.7146108609166534e-05, "loss": 2.5297, "step": 1152500 }, { "epoch": 5.71, "learning_rate": 4.714487002274045e-05, "loss": 2.5186, "step": 1153000 }, { "epoch": 5.71, "learning_rate": 4.714363143631437e-05, "loss": 2.4969, "step": 1153500 }, { "epoch": 5.72, "learning_rate": 4.7142392849888284e-05, "loss": 2.5141, "step": 1154000 }, { "epoch": 5.72, "learning_rate": 4.71411542634622e-05, "loss": 2.5465, "step": 1154500 }, { "epoch": 5.72, "learning_rate": 4.713991567703612e-05, "loss": 2.5263, "step": 1155000 }, { "epoch": 5.72, "learning_rate": 4.713867709061003e-05, "loss": 2.5282, "step": 1155500 }, { "epoch": 5.73, "learning_rate": 4.713744345852965e-05, "loss": 2.5718, "step": 1156000 }, { "epoch": 5.73, "learning_rate": 4.7136204872103566e-05, "loss": 2.5614, "step": 1156500 }, { "epoch": 5.73, "learning_rate": 4.7134968762850335e-05, "loss": 2.5299, "step": 1157000 }, { "epoch": 5.73, "learning_rate": 4.713373017642425e-05, "loss": 2.5281, "step": 1157500 }, { "epoch": 5.74, "learning_rate": 4.713249158999817e-05, "loss": 2.503, "step": 1158000 }, { "epoch": 5.74, "learning_rate": 4.7131253003572086e-05, "loss": 2.5163, "step": 1158500 }, { "epoch": 5.74, "learning_rate": 4.7130014417146e-05, "loss": 2.5324, "step": 1159000 }, { "epoch": 5.74, "learning_rate": 4.712877583071992e-05, "loss": 2.522, "step": 1159500 }, { "epoch": 5.75, "learning_rate": 4.7127537244293836e-05, "loss": 2.55, "step": 1160000 }, { "epoch": 5.75, "learning_rate": 4.712629865786775e-05, "loss": 2.5301, "step": 1160500 }, { "epoch": 5.75, "learning_rate": 4.712506007144167e-05, "loss": 2.5052, "step": 1161000 }, { "epoch": 5.75, "learning_rate": 4.712382148501559e-05, "loss": 2.4877, "step": 1161500 }, { "epoch": 5.76, "learning_rate": 4.7122582898589504e-05, "loss": 2.5126, "step": 1162000 }, { "epoch": 5.76, "learning_rate": 4.712134431216342e-05, "loss": 2.5242, "step": 1162500 }, { "epoch": 5.76, "learning_rate": 4.712010572573734e-05, "loss": 2.5206, "step": 1163000 }, { "epoch": 5.76, "learning_rate": 4.711886713931125e-05, "loss": 2.5255, "step": 1163500 }, { "epoch": 5.77, "learning_rate": 4.711763103005802e-05, "loss": 2.5042, "step": 1164000 }, { "epoch": 5.77, "learning_rate": 4.7116392443631934e-05, "loss": 2.5521, "step": 1164500 }, { "epoch": 5.77, "learning_rate": 4.711515385720585e-05, "loss": 2.5334, "step": 1165000 }, { "epoch": 5.77, "learning_rate": 4.711391527077977e-05, "loss": 2.536, "step": 1165500 }, { "epoch": 5.78, "learning_rate": 4.7112676684353685e-05, "loss": 2.5293, "step": 1166000 }, { "epoch": 5.78, "learning_rate": 4.71114380979276e-05, "loss": 2.5166, "step": 1166500 }, { "epoch": 5.78, "learning_rate": 4.711019951150152e-05, "loss": 2.4953, "step": 1167000 }, { "epoch": 5.78, "learning_rate": 4.7108960925075435e-05, "loss": 2.4882, "step": 1167500 }, { "epoch": 5.79, "learning_rate": 4.7107724815822204e-05, "loss": 2.5227, "step": 1168000 }, { "epoch": 5.79, "learning_rate": 4.710648622939612e-05, "loss": 2.514, "step": 1168500 }, { "epoch": 5.79, "learning_rate": 4.710524764297004e-05, "loss": 2.5371, "step": 1169000 }, { "epoch": 5.79, "learning_rate": 4.7104009056543955e-05, "loss": 2.5194, "step": 1169500 }, { "epoch": 5.8, "learning_rate": 4.7102770470117865e-05, "loss": 2.5025, "step": 1170000 }, { "epoch": 5.8, "learning_rate": 4.710153188369178e-05, "loss": 2.5248, "step": 1170500 }, { "epoch": 5.8, "learning_rate": 4.71002932972657e-05, "loss": 2.4913, "step": 1171000 }, { "epoch": 5.8, "learning_rate": 4.7099054710839616e-05, "loss": 2.5103, "step": 1171500 }, { "epoch": 5.81, "learning_rate": 4.709781612441353e-05, "loss": 2.4856, "step": 1172000 }, { "epoch": 5.81, "learning_rate": 4.709657753798744e-05, "loss": 2.536, "step": 1172500 }, { "epoch": 5.81, "learning_rate": 4.709534142873422e-05, "loss": 2.4982, "step": 1173000 }, { "epoch": 5.81, "learning_rate": 4.7094102842308136e-05, "loss": 2.5312, "step": 1173500 }, { "epoch": 5.82, "learning_rate": 4.7092866733054904e-05, "loss": 2.4911, "step": 1174000 }, { "epoch": 5.82, "learning_rate": 4.709162814662882e-05, "loss": 2.5099, "step": 1174500 }, { "epoch": 5.82, "learning_rate": 4.709038956020274e-05, "loss": 2.5238, "step": 1175000 }, { "epoch": 5.82, "learning_rate": 4.7089150973776655e-05, "loss": 2.5279, "step": 1175500 }, { "epoch": 5.83, "learning_rate": 4.708791486452342e-05, "loss": 2.5004, "step": 1176000 }, { "epoch": 5.83, "learning_rate": 4.7086676278097334e-05, "loss": 2.5314, "step": 1176500 }, { "epoch": 5.83, "learning_rate": 4.708543769167125e-05, "loss": 2.5071, "step": 1177000 }, { "epoch": 5.83, "learning_rate": 4.708419910524517e-05, "loss": 2.5197, "step": 1177500 }, { "epoch": 5.84, "learning_rate": 4.7082960518819085e-05, "loss": 2.5249, "step": 1178000 }, { "epoch": 5.84, "learning_rate": 4.7081721932393e-05, "loss": 2.5054, "step": 1178500 }, { "epoch": 5.84, "learning_rate": 4.708048334596692e-05, "loss": 2.5106, "step": 1179000 }, { "epoch": 5.84, "learning_rate": 4.7079244759540836e-05, "loss": 2.5198, "step": 1179500 }, { "epoch": 5.85, "learning_rate": 4.7078008650287604e-05, "loss": 2.5138, "step": 1180000 }, { "epoch": 5.85, "learning_rate": 4.707677006386152e-05, "loss": 2.5156, "step": 1180500 }, { "epoch": 5.85, "learning_rate": 4.707553147743544e-05, "loss": 2.5207, "step": 1181000 }, { "epoch": 5.85, "learning_rate": 4.7074292891009355e-05, "loss": 2.4657, "step": 1181500 }, { "epoch": 5.86, "learning_rate": 4.707305430458327e-05, "loss": 2.4906, "step": 1182000 }, { "epoch": 5.86, "learning_rate": 4.707181571815719e-05, "loss": 2.4839, "step": 1182500 }, { "epoch": 5.86, "learning_rate": 4.7070577131731106e-05, "loss": 2.514, "step": 1183000 }, { "epoch": 5.86, "learning_rate": 4.7069338545305016e-05, "loss": 2.4829, "step": 1183500 }, { "epoch": 5.87, "learning_rate": 4.706809995887893e-05, "loss": 2.5311, "step": 1184000 }, { "epoch": 5.87, "learning_rate": 4.706686137245285e-05, "loss": 2.5153, "step": 1184500 }, { "epoch": 5.87, "learning_rate": 4.706562774037247e-05, "loss": 2.5128, "step": 1185000 }, { "epoch": 5.87, "learning_rate": 4.706438915394639e-05, "loss": 2.5017, "step": 1185500 }, { "epoch": 5.88, "learning_rate": 4.7063153044693156e-05, "loss": 2.5261, "step": 1186000 }, { "epoch": 5.88, "learning_rate": 4.7061914458267073e-05, "loss": 2.5201, "step": 1186500 }, { "epoch": 5.88, "learning_rate": 4.7060675871840984e-05, "loss": 2.5188, "step": 1187000 }, { "epoch": 5.88, "learning_rate": 4.70594372854149e-05, "loss": 2.534, "step": 1187500 }, { "epoch": 5.89, "learning_rate": 4.705819869898882e-05, "loss": 2.5313, "step": 1188000 }, { "epoch": 5.89, "learning_rate": 4.7056960112562734e-05, "loss": 2.5164, "step": 1188500 }, { "epoch": 5.89, "learning_rate": 4.705572152613665e-05, "loss": 2.5065, "step": 1189000 }, { "epoch": 5.89, "learning_rate": 4.705448293971057e-05, "loss": 2.5141, "step": 1189500 }, { "epoch": 5.9, "learning_rate": 4.7053244353284485e-05, "loss": 2.4913, "step": 1190000 }, { "epoch": 5.9, "learning_rate": 4.70520057668584e-05, "loss": 2.5257, "step": 1190500 }, { "epoch": 5.9, "learning_rate": 4.705076965760517e-05, "loss": 2.5149, "step": 1191000 }, { "epoch": 5.9, "learning_rate": 4.704953107117909e-05, "loss": 2.524, "step": 1191500 }, { "epoch": 5.91, "learning_rate": 4.7048292484753005e-05, "loss": 2.5101, "step": 1192000 }, { "epoch": 5.91, "learning_rate": 4.704705389832692e-05, "loss": 2.5176, "step": 1192500 }, { "epoch": 5.91, "learning_rate": 4.704581531190084e-05, "loss": 2.5155, "step": 1193000 }, { "epoch": 5.91, "learning_rate": 4.7044576725474756e-05, "loss": 2.4957, "step": 1193500 }, { "epoch": 5.92, "learning_rate": 4.704333813904867e-05, "loss": 2.5396, "step": 1194000 }, { "epoch": 5.92, "learning_rate": 4.7042102029795434e-05, "loss": 2.5241, "step": 1194500 }, { "epoch": 5.92, "learning_rate": 4.704086344336935e-05, "loss": 2.5134, "step": 1195000 }, { "epoch": 5.92, "learning_rate": 4.703962485694327e-05, "loss": 2.5014, "step": 1195500 }, { "epoch": 5.93, "learning_rate": 4.7038386270517185e-05, "loss": 2.5075, "step": 1196000 }, { "epoch": 5.93, "learning_rate": 4.70371476840911e-05, "loss": 2.5256, "step": 1196500 }, { "epoch": 5.93, "learning_rate": 4.703590909766502e-05, "loss": 2.4918, "step": 1197000 }, { "epoch": 5.93, "learning_rate": 4.7034670511238936e-05, "loss": 2.4843, "step": 1197500 }, { "epoch": 5.94, "learning_rate": 4.703343192481285e-05, "loss": 2.5463, "step": 1198000 }, { "epoch": 5.94, "learning_rate": 4.703219333838677e-05, "loss": 2.4987, "step": 1198500 }, { "epoch": 5.94, "learning_rate": 4.703095475196068e-05, "loss": 2.5182, "step": 1199000 }, { "epoch": 5.94, "learning_rate": 4.702972111988031e-05, "loss": 2.5022, "step": 1199500 }, { "epoch": 5.95, "learning_rate": 4.7028482533454224e-05, "loss": 2.5201, "step": 1200000 }, { "epoch": 5.95, "learning_rate": 4.7027243947028135e-05, "loss": 2.5196, "step": 1200500 }, { "epoch": 5.95, "learning_rate": 4.702600536060205e-05, "loss": 2.5223, "step": 1201000 }, { "epoch": 5.95, "learning_rate": 4.702476677417597e-05, "loss": 2.5153, "step": 1201500 }, { "epoch": 5.96, "learning_rate": 4.7023528187749885e-05, "loss": 2.5415, "step": 1202000 }, { "epoch": 5.96, "learning_rate": 4.70222896013238e-05, "loss": 2.529, "step": 1202500 }, { "epoch": 5.96, "learning_rate": 4.702105349207057e-05, "loss": 2.5326, "step": 1203000 }, { "epoch": 5.96, "learning_rate": 4.701981490564449e-05, "loss": 2.5434, "step": 1203500 }, { "epoch": 5.97, "learning_rate": 4.7018576319218405e-05, "loss": 2.5482, "step": 1204000 }, { "epoch": 5.97, "learning_rate": 4.701733773279232e-05, "loss": 2.5158, "step": 1204500 }, { "epoch": 5.97, "learning_rate": 4.701609914636624e-05, "loss": 2.5229, "step": 1205000 }, { "epoch": 5.97, "learning_rate": 4.7014860559940156e-05, "loss": 2.5232, "step": 1205500 }, { "epoch": 5.97, "learning_rate": 4.7013624450686925e-05, "loss": 2.5345, "step": 1206000 }, { "epoch": 5.98, "learning_rate": 4.701238586426084e-05, "loss": 2.4912, "step": 1206500 }, { "epoch": 5.98, "learning_rate": 4.701114727783475e-05, "loss": 2.5206, "step": 1207000 }, { "epoch": 5.98, "learning_rate": 4.700990869140867e-05, "loss": 2.4784, "step": 1207500 }, { "epoch": 5.98, "learning_rate": 4.7008670104982586e-05, "loss": 2.4862, "step": 1208000 }, { "epoch": 5.99, "learning_rate": 4.70074315185565e-05, "loss": 2.5107, "step": 1208500 }, { "epoch": 5.99, "learning_rate": 4.700619293213042e-05, "loss": 2.5165, "step": 1209000 }, { "epoch": 5.99, "learning_rate": 4.700495682287719e-05, "loss": 2.5188, "step": 1209500 }, { "epoch": 5.99, "learning_rate": 4.7003718236451105e-05, "loss": 2.4935, "step": 1210000 }, { "epoch": 6.0, "learning_rate": 4.700247965002502e-05, "loss": 2.5275, "step": 1210500 }, { "epoch": 6.0, "learning_rate": 4.700124106359894e-05, "loss": 2.5212, "step": 1211000 }, { "epoch": 6.0, "eval_accuracy": 0.6358168518016798, "eval_accuracy_mlm": 0.5887641252682907, "eval_accuracy_nsp": 0.8578281213842226, "eval_loss": 2.467965841293335, "eval_runtime": 145.9334, "eval_samples_per_second": 1747.092, "eval_steps_per_second": 72.8, "step": 1211058 }, { "epoch": 6.0, "learning_rate": 4.7000002477172856e-05, "loss": 2.4902, "step": 1211500 }, { "epoch": 6.0, "learning_rate": 4.699876389074677e-05, "loss": 2.493, "step": 1212000 }, { "epoch": 6.01, "learning_rate": 4.699752530432069e-05, "loss": 2.5038, "step": 1212500 }, { "epoch": 6.01, "learning_rate": 4.699628671789461e-05, "loss": 2.4571, "step": 1213000 }, { "epoch": 6.01, "learning_rate": 4.6995048131468524e-05, "loss": 2.4688, "step": 1213500 }, { "epoch": 6.01, "learning_rate": 4.699380954504244e-05, "loss": 2.5051, "step": 1214000 }, { "epoch": 6.02, "learning_rate": 4.699257095861635e-05, "loss": 2.4776, "step": 1214500 }, { "epoch": 6.02, "learning_rate": 4.699133237219027e-05, "loss": 2.4888, "step": 1215000 }, { "epoch": 6.02, "learning_rate": 4.6990096262937036e-05, "loss": 2.4839, "step": 1215500 }, { "epoch": 6.02, "learning_rate": 4.698885767651095e-05, "loss": 2.4875, "step": 1216000 }, { "epoch": 6.03, "learning_rate": 4.698762156725772e-05, "loss": 2.5075, "step": 1216500 }, { "epoch": 6.03, "learning_rate": 4.698638545800449e-05, "loss": 2.4875, "step": 1217000 }, { "epoch": 6.03, "learning_rate": 4.698514687157841e-05, "loss": 2.4917, "step": 1217500 }, { "epoch": 6.03, "learning_rate": 4.6983908285152325e-05, "loss": 2.4866, "step": 1218000 }, { "epoch": 6.04, "learning_rate": 4.698266969872624e-05, "loss": 2.5019, "step": 1218500 }, { "epoch": 6.04, "learning_rate": 4.698143111230016e-05, "loss": 2.4532, "step": 1219000 }, { "epoch": 6.04, "learning_rate": 4.6980192525874076e-05, "loss": 2.4699, "step": 1219500 }, { "epoch": 6.04, "learning_rate": 4.697895393944799e-05, "loss": 2.5117, "step": 1220000 }, { "epoch": 6.05, "learning_rate": 4.69777153530219e-05, "loss": 2.4897, "step": 1220500 }, { "epoch": 6.05, "learning_rate": 4.697647676659582e-05, "loss": 2.4558, "step": 1221000 }, { "epoch": 6.05, "learning_rate": 4.6975238180169737e-05, "loss": 2.4775, "step": 1221500 }, { "epoch": 6.05, "learning_rate": 4.6973999593743653e-05, "loss": 2.4828, "step": 1222000 }, { "epoch": 6.06, "learning_rate": 4.697276100731757e-05, "loss": 2.487, "step": 1222500 }, { "epoch": 6.06, "learning_rate": 4.697152242089149e-05, "loss": 2.4936, "step": 1223000 }, { "epoch": 6.06, "learning_rate": 4.6970283834465404e-05, "loss": 2.489, "step": 1223500 }, { "epoch": 6.06, "learning_rate": 4.6969045248039314e-05, "loss": 2.4504, "step": 1224000 }, { "epoch": 6.07, "learning_rate": 4.696780666161323e-05, "loss": 2.4844, "step": 1224500 }, { "epoch": 6.07, "learning_rate": 4.696656807518715e-05, "loss": 2.4735, "step": 1225000 }, { "epoch": 6.07, "learning_rate": 4.6965329488761065e-05, "loss": 2.4685, "step": 1225500 }, { "epoch": 6.07, "learning_rate": 4.696409090233498e-05, "loss": 2.4795, "step": 1226000 }, { "epoch": 6.08, "learning_rate": 4.69628523159089e-05, "loss": 2.4918, "step": 1226500 }, { "epoch": 6.08, "learning_rate": 4.696161620665567e-05, "loss": 2.4753, "step": 1227000 }, { "epoch": 6.08, "learning_rate": 4.696038009740244e-05, "loss": 2.4695, "step": 1227500 }, { "epoch": 6.08, "learning_rate": 4.6959141510976354e-05, "loss": 2.4831, "step": 1228000 }, { "epoch": 6.09, "learning_rate": 4.695790292455027e-05, "loss": 2.4936, "step": 1228500 }, { "epoch": 6.09, "learning_rate": 4.695666433812419e-05, "loss": 2.4876, "step": 1229000 }, { "epoch": 6.09, "learning_rate": 4.6955425751698104e-05, "loss": 2.4676, "step": 1229500 }, { "epoch": 6.09, "learning_rate": 4.695418716527202e-05, "loss": 2.4873, "step": 1230000 }, { "epoch": 6.1, "learning_rate": 4.695295105601879e-05, "loss": 2.4812, "step": 1230500 }, { "epoch": 6.1, "learning_rate": 4.695171246959271e-05, "loss": 2.4859, "step": 1231000 }, { "epoch": 6.1, "learning_rate": 4.6950473883166624e-05, "loss": 2.4744, "step": 1231500 }, { "epoch": 6.1, "learning_rate": 4.694923529674054e-05, "loss": 2.4792, "step": 1232000 }, { "epoch": 6.11, "learning_rate": 4.694799918748731e-05, "loss": 2.4847, "step": 1232500 }, { "epoch": 6.11, "learning_rate": 4.694676060106123e-05, "loss": 2.5086, "step": 1233000 }, { "epoch": 6.11, "learning_rate": 4.6945522014635144e-05, "loss": 2.4921, "step": 1233500 }, { "epoch": 6.11, "learning_rate": 4.6944283428209054e-05, "loss": 2.471, "step": 1234000 }, { "epoch": 6.12, "learning_rate": 4.694304484178297e-05, "loss": 2.4858, "step": 1234500 }, { "epoch": 6.12, "learning_rate": 4.694180625535689e-05, "loss": 2.4519, "step": 1235000 }, { "epoch": 6.12, "learning_rate": 4.6940567668930805e-05, "loss": 2.4916, "step": 1235500 }, { "epoch": 6.12, "learning_rate": 4.693932908250472e-05, "loss": 2.4775, "step": 1236000 }, { "epoch": 6.13, "learning_rate": 4.693809049607863e-05, "loss": 2.483, "step": 1236500 }, { "epoch": 6.13, "learning_rate": 4.693685190965255e-05, "loss": 2.4979, "step": 1237000 }, { "epoch": 6.13, "learning_rate": 4.6935613323226465e-05, "loss": 2.4822, "step": 1237500 }, { "epoch": 6.13, "learning_rate": 4.693437473680038e-05, "loss": 2.4846, "step": 1238000 }, { "epoch": 6.14, "learning_rate": 4.69331361503743e-05, "loss": 2.4769, "step": 1238500 }, { "epoch": 6.14, "learning_rate": 4.6931900041121075e-05, "loss": 2.5055, "step": 1239000 }, { "epoch": 6.14, "learning_rate": 4.6930661454694985e-05, "loss": 2.4656, "step": 1239500 }, { "epoch": 6.14, "learning_rate": 4.69294228682689e-05, "loss": 2.5074, "step": 1240000 }, { "epoch": 6.15, "learning_rate": 4.692818428184282e-05, "loss": 2.4729, "step": 1240500 }, { "epoch": 6.15, "learning_rate": 4.692694817258959e-05, "loss": 2.4935, "step": 1241000 }, { "epoch": 6.15, "learning_rate": 4.6925709586163505e-05, "loss": 2.4919, "step": 1241500 }, { "epoch": 6.15, "learning_rate": 4.692447099973742e-05, "loss": 2.4865, "step": 1242000 }, { "epoch": 6.16, "learning_rate": 4.692323241331134e-05, "loss": 2.4812, "step": 1242500 }, { "epoch": 6.16, "learning_rate": 4.6921993826885255e-05, "loss": 2.483, "step": 1243000 }, { "epoch": 6.16, "learning_rate": 4.6920757717632024e-05, "loss": 2.4902, "step": 1243500 }, { "epoch": 6.16, "learning_rate": 4.691951913120594e-05, "loss": 2.47, "step": 1244000 }, { "epoch": 6.17, "learning_rate": 4.691828054477986e-05, "loss": 2.4547, "step": 1244500 }, { "epoch": 6.17, "learning_rate": 4.6917041958353775e-05, "loss": 2.4744, "step": 1245000 }, { "epoch": 6.17, "learning_rate": 4.691580337192769e-05, "loss": 2.4984, "step": 1245500 }, { "epoch": 6.17, "learning_rate": 4.69145647855016e-05, "loss": 2.4811, "step": 1246000 }, { "epoch": 6.18, "learning_rate": 4.691332619907552e-05, "loss": 2.4872, "step": 1246500 }, { "epoch": 6.18, "learning_rate": 4.6912090089822295e-05, "loss": 2.481, "step": 1247000 }, { "epoch": 6.18, "learning_rate": 4.691085150339621e-05, "loss": 2.4718, "step": 1247500 }, { "epoch": 6.18, "learning_rate": 4.690961291697012e-05, "loss": 2.4848, "step": 1248000 }, { "epoch": 6.19, "learning_rate": 4.690837433054404e-05, "loss": 2.483, "step": 1248500 }, { "epoch": 6.19, "learning_rate": 4.6907135744117956e-05, "loss": 2.4704, "step": 1249000 }, { "epoch": 6.19, "learning_rate": 4.690589715769187e-05, "loss": 2.4682, "step": 1249500 }, { "epoch": 6.19, "learning_rate": 4.690466104843864e-05, "loss": 2.4647, "step": 1250000 }, { "epoch": 6.2, "learning_rate": 4.690342493918541e-05, "loss": 2.4835, "step": 1250500 }, { "epoch": 6.2, "learning_rate": 4.690218635275933e-05, "loss": 2.4615, "step": 1251000 }, { "epoch": 6.2, "learning_rate": 4.6900947766333244e-05, "loss": 2.4774, "step": 1251500 }, { "epoch": 6.2, "learning_rate": 4.689970917990716e-05, "loss": 2.4772, "step": 1252000 }, { "epoch": 6.21, "learning_rate": 4.689847059348108e-05, "loss": 2.4977, "step": 1252500 }, { "epoch": 6.21, "learning_rate": 4.6897232007054995e-05, "loss": 2.5088, "step": 1253000 }, { "epoch": 6.21, "learning_rate": 4.689599342062891e-05, "loss": 2.4723, "step": 1253500 }, { "epoch": 6.21, "learning_rate": 4.689475483420283e-05, "loss": 2.4965, "step": 1254000 }, { "epoch": 6.22, "learning_rate": 4.689351624777674e-05, "loss": 2.4822, "step": 1254500 }, { "epoch": 6.22, "learning_rate": 4.689228013852351e-05, "loss": 2.5036, "step": 1255000 }, { "epoch": 6.22, "learning_rate": 4.6891041552097424e-05, "loss": 2.4874, "step": 1255500 }, { "epoch": 6.22, "learning_rate": 4.688980544284419e-05, "loss": 2.4798, "step": 1256000 }, { "epoch": 6.23, "learning_rate": 4.688856685641811e-05, "loss": 2.4956, "step": 1256500 }, { "epoch": 6.23, "learning_rate": 4.688733074716488e-05, "loss": 2.4933, "step": 1257000 }, { "epoch": 6.23, "learning_rate": 4.6886092160738796e-05, "loss": 2.5024, "step": 1257500 }, { "epoch": 6.23, "learning_rate": 4.6884853574312706e-05, "loss": 2.4811, "step": 1258000 }, { "epoch": 6.24, "learning_rate": 4.688361498788662e-05, "loss": 2.4707, "step": 1258500 }, { "epoch": 6.24, "learning_rate": 4.688237887863339e-05, "loss": 2.4857, "step": 1259000 }, { "epoch": 6.24, "learning_rate": 4.688114029220731e-05, "loss": 2.5269, "step": 1259500 }, { "epoch": 6.24, "learning_rate": 4.6879901705781226e-05, "loss": 2.4841, "step": 1260000 }, { "epoch": 6.24, "learning_rate": 4.687866311935514e-05, "loss": 2.5034, "step": 1260500 }, { "epoch": 6.25, "learning_rate": 4.687742453292906e-05, "loss": 2.4715, "step": 1261000 }, { "epoch": 6.25, "learning_rate": 4.6876185946502976e-05, "loss": 2.4924, "step": 1261500 }, { "epoch": 6.25, "learning_rate": 4.6874947360076893e-05, "loss": 2.5025, "step": 1262000 }, { "epoch": 6.25, "learning_rate": 4.687370877365081e-05, "loss": 2.4942, "step": 1262500 }, { "epoch": 6.26, "learning_rate": 4.687247018722473e-05, "loss": 2.4852, "step": 1263000 }, { "epoch": 6.26, "learning_rate": 4.6871231600798644e-05, "loss": 2.4815, "step": 1263500 }, { "epoch": 6.26, "learning_rate": 4.686999301437256e-05, "loss": 2.4833, "step": 1264000 }, { "epoch": 6.26, "learning_rate": 4.686875690511932e-05, "loss": 2.5097, "step": 1264500 }, { "epoch": 6.27, "learning_rate": 4.686751831869324e-05, "loss": 2.5067, "step": 1265000 }, { "epoch": 6.27, "learning_rate": 4.686627973226716e-05, "loss": 2.5029, "step": 1265500 }, { "epoch": 6.27, "learning_rate": 4.6865041145841074e-05, "loss": 2.4842, "step": 1266000 }, { "epoch": 6.27, "learning_rate": 4.686380255941499e-05, "loss": 2.4757, "step": 1266500 }, { "epoch": 6.28, "learning_rate": 4.686256397298891e-05, "loss": 2.4729, "step": 1267000 }, { "epoch": 6.28, "learning_rate": 4.6861325386562825e-05, "loss": 2.4995, "step": 1267500 }, { "epoch": 6.28, "learning_rate": 4.686008680013674e-05, "loss": 2.5044, "step": 1268000 }, { "epoch": 6.28, "learning_rate": 4.685884821371066e-05, "loss": 2.483, "step": 1268500 }, { "epoch": 6.29, "learning_rate": 4.685761458163028e-05, "loss": 2.5019, "step": 1269000 }, { "epoch": 6.29, "learning_rate": 4.6856375995204196e-05, "loss": 2.5024, "step": 1269500 }, { "epoch": 6.29, "learning_rate": 4.685513740877811e-05, "loss": 2.4707, "step": 1270000 }, { "epoch": 6.29, "learning_rate": 4.685389882235203e-05, "loss": 2.4647, "step": 1270500 }, { "epoch": 6.3, "learning_rate": 4.68526627130988e-05, "loss": 2.4922, "step": 1271000 }, { "epoch": 6.3, "learning_rate": 4.685142412667271e-05, "loss": 2.5003, "step": 1271500 }, { "epoch": 6.3, "learning_rate": 4.6850185540246626e-05, "loss": 2.4678, "step": 1272000 }, { "epoch": 6.3, "learning_rate": 4.684894695382054e-05, "loss": 2.4832, "step": 1272500 }, { "epoch": 6.31, "learning_rate": 4.684771084456732e-05, "loss": 2.4824, "step": 1273000 }, { "epoch": 6.31, "learning_rate": 4.6846472258141235e-05, "loss": 2.4748, "step": 1273500 }, { "epoch": 6.31, "learning_rate": 4.684523367171515e-05, "loss": 2.5172, "step": 1274000 }, { "epoch": 6.31, "learning_rate": 4.684399508528906e-05, "loss": 2.4525, "step": 1274500 }, { "epoch": 6.32, "learning_rate": 4.684275649886298e-05, "loss": 2.4783, "step": 1275000 }, { "epoch": 6.32, "learning_rate": 4.6841517912436896e-05, "loss": 2.5077, "step": 1275500 }, { "epoch": 6.32, "learning_rate": 4.6840281803183665e-05, "loss": 2.4907, "step": 1276000 }, { "epoch": 6.32, "learning_rate": 4.683904321675758e-05, "loss": 2.464, "step": 1276500 }, { "epoch": 6.33, "learning_rate": 4.68378046303315e-05, "loss": 2.4991, "step": 1277000 }, { "epoch": 6.33, "learning_rate": 4.6836566043905416e-05, "loss": 2.5031, "step": 1277500 }, { "epoch": 6.33, "learning_rate": 4.6835327457479326e-05, "loss": 2.4927, "step": 1278000 }, { "epoch": 6.33, "learning_rate": 4.683408887105324e-05, "loss": 2.4785, "step": 1278500 }, { "epoch": 6.34, "learning_rate": 4.683285028462716e-05, "loss": 2.465, "step": 1279000 }, { "epoch": 6.34, "learning_rate": 4.683161169820108e-05, "loss": 2.4818, "step": 1279500 }, { "epoch": 6.34, "learning_rate": 4.6830373111774994e-05, "loss": 2.5001, "step": 1280000 }, { "epoch": 6.34, "learning_rate": 4.682913452534891e-05, "loss": 2.4687, "step": 1280500 }, { "epoch": 6.35, "learning_rate": 4.682789841609568e-05, "loss": 2.4916, "step": 1281000 }, { "epoch": 6.35, "learning_rate": 4.6826659829669596e-05, "loss": 2.4898, "step": 1281500 }, { "epoch": 6.35, "learning_rate": 4.682542124324351e-05, "loss": 2.4705, "step": 1282000 }, { "epoch": 6.35, "learning_rate": 4.682418265681743e-05, "loss": 2.4925, "step": 1282500 }, { "epoch": 6.36, "learning_rate": 4.682294407039135e-05, "loss": 2.4848, "step": 1283000 }, { "epoch": 6.36, "learning_rate": 4.6821705483965264e-05, "loss": 2.4765, "step": 1283500 }, { "epoch": 6.36, "learning_rate": 4.6820469374712026e-05, "loss": 2.4905, "step": 1284000 }, { "epoch": 6.36, "learning_rate": 4.681923078828594e-05, "loss": 2.4852, "step": 1284500 }, { "epoch": 6.37, "learning_rate": 4.681799220185986e-05, "loss": 2.4962, "step": 1285000 }, { "epoch": 6.37, "learning_rate": 4.681675361543378e-05, "loss": 2.4493, "step": 1285500 }, { "epoch": 6.37, "learning_rate": 4.6815515029007694e-05, "loss": 2.4891, "step": 1286000 }, { "epoch": 6.37, "learning_rate": 4.681427644258161e-05, "loss": 2.4824, "step": 1286500 }, { "epoch": 6.38, "learning_rate": 4.681303785615553e-05, "loss": 2.4763, "step": 1287000 }, { "epoch": 6.38, "learning_rate": 4.6811799269729445e-05, "loss": 2.5012, "step": 1287500 }, { "epoch": 6.38, "learning_rate": 4.681056068330336e-05, "loss": 2.4999, "step": 1288000 }, { "epoch": 6.38, "learning_rate": 4.680932209687728e-05, "loss": 2.4742, "step": 1288500 }, { "epoch": 6.39, "learning_rate": 4.680808598762405e-05, "loss": 2.4774, "step": 1289000 }, { "epoch": 6.39, "learning_rate": 4.6806847401197964e-05, "loss": 2.4746, "step": 1289500 }, { "epoch": 6.39, "learning_rate": 4.680560881477188e-05, "loss": 2.4768, "step": 1290000 }, { "epoch": 6.39, "learning_rate": 4.68043702283458e-05, "loss": 2.4646, "step": 1290500 }, { "epoch": 6.4, "learning_rate": 4.6803131641919715e-05, "loss": 2.4499, "step": 1291000 }, { "epoch": 6.4, "learning_rate": 4.680189553266648e-05, "loss": 2.4654, "step": 1291500 }, { "epoch": 6.4, "learning_rate": 4.6800656946240394e-05, "loss": 2.4719, "step": 1292000 }, { "epoch": 6.4, "learning_rate": 4.679941835981431e-05, "loss": 2.4862, "step": 1292500 }, { "epoch": 6.41, "learning_rate": 4.6798182250561087e-05, "loss": 2.4977, "step": 1293000 }, { "epoch": 6.41, "learning_rate": 4.679694614130785e-05, "loss": 2.4832, "step": 1293500 }, { "epoch": 6.41, "learning_rate": 4.6795707554881766e-05, "loss": 2.5195, "step": 1294000 }, { "epoch": 6.41, "learning_rate": 4.679446896845568e-05, "loss": 2.4994, "step": 1294500 }, { "epoch": 6.42, "learning_rate": 4.67932303820296e-05, "loss": 2.4854, "step": 1295000 }, { "epoch": 6.42, "learning_rate": 4.6791991795603516e-05, "loss": 2.4757, "step": 1295500 }, { "epoch": 6.42, "learning_rate": 4.679075320917743e-05, "loss": 2.5, "step": 1296000 }, { "epoch": 6.42, "learning_rate": 4.678951462275134e-05, "loss": 2.4907, "step": 1296500 }, { "epoch": 6.43, "learning_rate": 4.678827603632526e-05, "loss": 2.5016, "step": 1297000 }, { "epoch": 6.43, "learning_rate": 4.678703744989918e-05, "loss": 2.5061, "step": 1297500 }, { "epoch": 6.43, "learning_rate": 4.6785798863473094e-05, "loss": 2.4797, "step": 1298000 }, { "epoch": 6.43, "learning_rate": 4.678456027704701e-05, "loss": 2.5274, "step": 1298500 }, { "epoch": 6.44, "learning_rate": 4.678332169062093e-05, "loss": 2.489, "step": 1299000 }, { "epoch": 6.44, "learning_rate": 4.6782083104194845e-05, "loss": 2.4759, "step": 1299500 }, { "epoch": 6.44, "learning_rate": 4.6780846994941614e-05, "loss": 2.4618, "step": 1300000 }, { "epoch": 6.44, "learning_rate": 4.677961088568838e-05, "loss": 2.4962, "step": 1300500 }, { "epoch": 6.45, "learning_rate": 4.67783722992623e-05, "loss": 2.485, "step": 1301000 }, { "epoch": 6.45, "learning_rate": 4.6777133712836216e-05, "loss": 2.4917, "step": 1301500 }, { "epoch": 6.45, "learning_rate": 4.677589512641013e-05, "loss": 2.4872, "step": 1302000 }, { "epoch": 6.45, "learning_rate": 4.677465653998405e-05, "loss": 2.4978, "step": 1302500 }, { "epoch": 6.46, "learning_rate": 4.677342043073082e-05, "loss": 2.486, "step": 1303000 }, { "epoch": 6.46, "learning_rate": 4.6772181844304736e-05, "loss": 2.5053, "step": 1303500 }, { "epoch": 6.46, "learning_rate": 4.6770945735051505e-05, "loss": 2.4912, "step": 1304000 }, { "epoch": 6.46, "learning_rate": 4.676970714862542e-05, "loss": 2.4926, "step": 1304500 }, { "epoch": 6.47, "learning_rate": 4.676846856219934e-05, "loss": 2.4839, "step": 1305000 }, { "epoch": 6.47, "learning_rate": 4.6767229975773256e-05, "loss": 2.4895, "step": 1305500 }, { "epoch": 6.47, "learning_rate": 4.6765991389347166e-05, "loss": 2.5066, "step": 1306000 }, { "epoch": 6.47, "learning_rate": 4.6764755280093935e-05, "loss": 2.4824, "step": 1306500 }, { "epoch": 6.48, "learning_rate": 4.676351669366785e-05, "loss": 2.482, "step": 1307000 }, { "epoch": 6.48, "learning_rate": 4.676227810724177e-05, "loss": 2.4795, "step": 1307500 }, { "epoch": 6.48, "learning_rate": 4.6761039520815685e-05, "loss": 2.5152, "step": 1308000 }, { "epoch": 6.48, "learning_rate": 4.67598009343896e-05, "loss": 2.4962, "step": 1308500 }, { "epoch": 6.49, "learning_rate": 4.675856234796352e-05, "loss": 2.499, "step": 1309000 }, { "epoch": 6.49, "learning_rate": 4.6757323761537436e-05, "loss": 2.5016, "step": 1309500 }, { "epoch": 6.49, "learning_rate": 4.675608517511135e-05, "loss": 2.5232, "step": 1310000 }, { "epoch": 6.49, "learning_rate": 4.675484658868527e-05, "loss": 2.4946, "step": 1310500 }, { "epoch": 6.5, "learning_rate": 4.675361047943204e-05, "loss": 2.4769, "step": 1311000 }, { "epoch": 6.5, "learning_rate": 4.6752371893005956e-05, "loss": 2.4588, "step": 1311500 }, { "epoch": 6.5, "learning_rate": 4.675113578375272e-05, "loss": 2.514, "step": 1312000 }, { "epoch": 6.5, "learning_rate": 4.6749897197326635e-05, "loss": 2.5118, "step": 1312500 }, { "epoch": 6.51, "learning_rate": 4.674865861090055e-05, "loss": 2.4955, "step": 1313000 }, { "epoch": 6.51, "learning_rate": 4.674742002447447e-05, "loss": 2.4849, "step": 1313500 }, { "epoch": 6.51, "learning_rate": 4.6746181438048385e-05, "loss": 2.4894, "step": 1314000 }, { "epoch": 6.51, "learning_rate": 4.67449428516223e-05, "loss": 2.4816, "step": 1314500 }, { "epoch": 6.51, "learning_rate": 4.674370426519622e-05, "loss": 2.4829, "step": 1315000 }, { "epoch": 6.52, "learning_rate": 4.6742465678770136e-05, "loss": 2.4969, "step": 1315500 }, { "epoch": 6.52, "learning_rate": 4.6741229569516905e-05, "loss": 2.4945, "step": 1316000 }, { "epoch": 6.52, "learning_rate": 4.673999346026367e-05, "loss": 2.4751, "step": 1316500 }, { "epoch": 6.52, "learning_rate": 4.6738754873837584e-05, "loss": 2.5224, "step": 1317000 }, { "epoch": 6.53, "learning_rate": 4.67375162874115e-05, "loss": 2.5149, "step": 1317500 }, { "epoch": 6.53, "learning_rate": 4.673627770098542e-05, "loss": 2.5144, "step": 1318000 }, { "epoch": 6.53, "learning_rate": 4.6735039114559335e-05, "loss": 2.4687, "step": 1318500 }, { "epoch": 6.53, "learning_rate": 4.673380052813325e-05, "loss": 2.4861, "step": 1319000 }, { "epoch": 6.54, "learning_rate": 4.673256441888002e-05, "loss": 2.5114, "step": 1319500 }, { "epoch": 6.54, "learning_rate": 4.673132583245394e-05, "loss": 2.482, "step": 1320000 }, { "epoch": 6.54, "learning_rate": 4.6730087246027854e-05, "loss": 2.5231, "step": 1320500 }, { "epoch": 6.54, "learning_rate": 4.672885113677462e-05, "loss": 2.511, "step": 1321000 }, { "epoch": 6.55, "learning_rate": 4.672761502752139e-05, "loss": 2.512, "step": 1321500 }, { "epoch": 6.55, "learning_rate": 4.672637644109531e-05, "loss": 2.5054, "step": 1322000 }, { "epoch": 6.55, "learning_rate": 4.6725137854669226e-05, "loss": 2.502, "step": 1322500 }, { "epoch": 6.55, "learning_rate": 4.6723901745415995e-05, "loss": 2.4838, "step": 1323000 }, { "epoch": 6.56, "learning_rate": 4.672266315898991e-05, "loss": 2.5259, "step": 1323500 }, { "epoch": 6.56, "learning_rate": 4.672142457256383e-05, "loss": 2.5036, "step": 1324000 }, { "epoch": 6.56, "learning_rate": 4.6720185986137745e-05, "loss": 2.5052, "step": 1324500 }, { "epoch": 6.56, "learning_rate": 4.671894739971166e-05, "loss": 2.4959, "step": 1325000 }, { "epoch": 6.57, "learning_rate": 4.671770881328558e-05, "loss": 2.5092, "step": 1325500 }, { "epoch": 6.57, "learning_rate": 4.6716470226859496e-05, "loss": 2.5141, "step": 1326000 }, { "epoch": 6.57, "learning_rate": 4.671523164043341e-05, "loss": 2.5202, "step": 1326500 }, { "epoch": 6.57, "learning_rate": 4.671399305400732e-05, "loss": 2.4919, "step": 1327000 }, { "epoch": 6.58, "learning_rate": 4.671275446758124e-05, "loss": 2.4807, "step": 1327500 }, { "epoch": 6.58, "learning_rate": 4.671151588115516e-05, "loss": 2.4948, "step": 1328000 }, { "epoch": 6.58, "learning_rate": 4.6710277294729074e-05, "loss": 2.51, "step": 1328500 }, { "epoch": 6.58, "learning_rate": 4.670903870830299e-05, "loss": 2.4887, "step": 1329000 }, { "epoch": 6.59, "learning_rate": 4.67078001218769e-05, "loss": 2.4943, "step": 1329500 }, { "epoch": 6.59, "learning_rate": 4.670656153545082e-05, "loss": 2.5102, "step": 1330000 }, { "epoch": 6.59, "learning_rate": 4.6705322949024735e-05, "loss": 2.4674, "step": 1330500 }, { "epoch": 6.59, "learning_rate": 4.670408436259865e-05, "loss": 2.4995, "step": 1331000 }, { "epoch": 6.6, "learning_rate": 4.670285073051828e-05, "loss": 2.5186, "step": 1331500 }, { "epoch": 6.6, "learning_rate": 4.6701612144092196e-05, "loss": 2.485, "step": 1332000 }, { "epoch": 6.6, "learning_rate": 4.670037355766611e-05, "loss": 2.487, "step": 1332500 }, { "epoch": 6.6, "learning_rate": 4.669913497124003e-05, "loss": 2.5017, "step": 1333000 }, { "epoch": 6.61, "learning_rate": 4.669789886198679e-05, "loss": 2.4821, "step": 1333500 }, { "epoch": 6.61, "learning_rate": 4.669666027556071e-05, "loss": 2.4815, "step": 1334000 }, { "epoch": 6.61, "learning_rate": 4.6695421689134626e-05, "loss": 2.4971, "step": 1334500 }, { "epoch": 6.61, "learning_rate": 4.669418310270854e-05, "loss": 2.5461, "step": 1335000 }, { "epoch": 6.62, "learning_rate": 4.669294451628246e-05, "loss": 2.514, "step": 1335500 }, { "epoch": 6.62, "learning_rate": 4.669170592985638e-05, "loss": 2.5117, "step": 1336000 }, { "epoch": 6.62, "learning_rate": 4.6690467343430294e-05, "loss": 2.5189, "step": 1336500 }, { "epoch": 6.62, "learning_rate": 4.668922875700421e-05, "loss": 2.4931, "step": 1337000 }, { "epoch": 6.63, "learning_rate": 4.668799017057813e-05, "loss": 2.5089, "step": 1337500 }, { "epoch": 6.63, "learning_rate": 4.668675158415204e-05, "loss": 2.5183, "step": 1338000 }, { "epoch": 6.63, "learning_rate": 4.6685512997725955e-05, "loss": 2.5247, "step": 1338500 }, { "epoch": 6.63, "learning_rate": 4.668427441129987e-05, "loss": 2.4885, "step": 1339000 }, { "epoch": 6.64, "learning_rate": 4.668303830204665e-05, "loss": 2.5007, "step": 1339500 }, { "epoch": 6.64, "learning_rate": 4.6681799715620564e-05, "loss": 2.483, "step": 1340000 }, { "epoch": 6.64, "learning_rate": 4.668056112919448e-05, "loss": 2.5025, "step": 1340500 }, { "epoch": 6.64, "learning_rate": 4.667932254276839e-05, "loss": 2.4848, "step": 1341000 }, { "epoch": 6.65, "learning_rate": 4.667808395634231e-05, "loss": 2.5055, "step": 1341500 }, { "epoch": 6.65, "learning_rate": 4.6676845369916225e-05, "loss": 2.4936, "step": 1342000 }, { "epoch": 6.65, "learning_rate": 4.6675609260662994e-05, "loss": 2.5113, "step": 1342500 }, { "epoch": 6.65, "learning_rate": 4.667437315140976e-05, "loss": 2.4663, "step": 1343000 }, { "epoch": 6.66, "learning_rate": 4.667313456498368e-05, "loss": 2.4992, "step": 1343500 }, { "epoch": 6.66, "learning_rate": 4.66718959785576e-05, "loss": 2.4944, "step": 1344000 }, { "epoch": 6.66, "learning_rate": 4.6670657392131514e-05, "loss": 2.4973, "step": 1344500 }, { "epoch": 6.66, "learning_rate": 4.666941880570543e-05, "loss": 2.469, "step": 1345000 }, { "epoch": 6.67, "learning_rate": 4.666818021927935e-05, "loss": 2.5108, "step": 1345500 }, { "epoch": 6.67, "learning_rate": 4.6666941632853264e-05, "loss": 2.4903, "step": 1346000 }, { "epoch": 6.67, "learning_rate": 4.666570304642718e-05, "loss": 2.4975, "step": 1346500 }, { "epoch": 6.67, "learning_rate": 4.66644644600011e-05, "loss": 2.5192, "step": 1347000 }, { "epoch": 6.68, "learning_rate": 4.666322587357501e-05, "loss": 2.5035, "step": 1347500 }, { "epoch": 6.68, "learning_rate": 4.666198976432178e-05, "loss": 2.4976, "step": 1348000 }, { "epoch": 6.68, "learning_rate": 4.6660751177895694e-05, "loss": 2.5002, "step": 1348500 }, { "epoch": 6.68, "learning_rate": 4.665951506864246e-05, "loss": 2.5158, "step": 1349000 }, { "epoch": 6.69, "learning_rate": 4.665827648221638e-05, "loss": 2.4809, "step": 1349500 }, { "epoch": 6.69, "learning_rate": 4.66570378957903e-05, "loss": 2.4874, "step": 1350000 }, { "epoch": 6.69, "learning_rate": 4.6655799309364214e-05, "loss": 2.5005, "step": 1350500 }, { "epoch": 6.69, "learning_rate": 4.665456072293813e-05, "loss": 2.4906, "step": 1351000 }, { "epoch": 6.7, "learning_rate": 4.665332213651205e-05, "loss": 2.5107, "step": 1351500 }, { "epoch": 6.7, "learning_rate": 4.6652083550085964e-05, "loss": 2.4649, "step": 1352000 }, { "epoch": 6.7, "learning_rate": 4.665084496365988e-05, "loss": 2.4886, "step": 1352500 }, { "epoch": 6.7, "learning_rate": 4.66496063772338e-05, "loss": 2.5207, "step": 1353000 }, { "epoch": 6.71, "learning_rate": 4.664837026798056e-05, "loss": 2.4898, "step": 1353500 }, { "epoch": 6.71, "learning_rate": 4.664713168155448e-05, "loss": 2.5024, "step": 1354000 }, { "epoch": 6.71, "learning_rate": 4.6645893095128394e-05, "loss": 2.517, "step": 1354500 }, { "epoch": 6.71, "learning_rate": 4.664465450870231e-05, "loss": 2.4937, "step": 1355000 }, { "epoch": 6.72, "learning_rate": 4.664341592227623e-05, "loss": 2.4872, "step": 1355500 }, { "epoch": 6.72, "learning_rate": 4.6642177335850145e-05, "loss": 2.5017, "step": 1356000 }, { "epoch": 6.72, "learning_rate": 4.6640938749424055e-05, "loss": 2.5051, "step": 1356500 }, { "epoch": 6.72, "learning_rate": 4.663970016299797e-05, "loss": 2.477, "step": 1357000 }, { "epoch": 6.73, "learning_rate": 4.663846157657189e-05, "loss": 2.5128, "step": 1357500 }, { "epoch": 6.73, "learning_rate": 4.6637222990145806e-05, "loss": 2.4908, "step": 1358000 }, { "epoch": 6.73, "learning_rate": 4.663598440371972e-05, "loss": 2.4832, "step": 1358500 }, { "epoch": 6.73, "learning_rate": 4.66347482944665e-05, "loss": 2.4844, "step": 1359000 }, { "epoch": 6.74, "learning_rate": 4.6633509708040415e-05, "loss": 2.5207, "step": 1359500 }, { "epoch": 6.74, "learning_rate": 4.663227359878718e-05, "loss": 2.496, "step": 1360000 }, { "epoch": 6.74, "learning_rate": 4.6631035012361094e-05, "loss": 2.5076, "step": 1360500 }, { "epoch": 6.74, "learning_rate": 4.662979642593501e-05, "loss": 2.4935, "step": 1361000 }, { "epoch": 6.75, "learning_rate": 4.662855783950893e-05, "loss": 2.4879, "step": 1361500 }, { "epoch": 6.75, "learning_rate": 4.6627319253082845e-05, "loss": 2.4986, "step": 1362000 }, { "epoch": 6.75, "learning_rate": 4.6626083143829614e-05, "loss": 2.515, "step": 1362500 }, { "epoch": 6.75, "learning_rate": 4.662484455740353e-05, "loss": 2.501, "step": 1363000 }, { "epoch": 6.76, "learning_rate": 4.662360597097745e-05, "loss": 2.4869, "step": 1363500 }, { "epoch": 6.76, "learning_rate": 4.6622367384551365e-05, "loss": 2.4833, "step": 1364000 }, { "epoch": 6.76, "learning_rate": 4.662113127529813e-05, "loss": 2.4934, "step": 1364500 }, { "epoch": 6.76, "learning_rate": 4.6619895166044896e-05, "loss": 2.48, "step": 1365000 }, { "epoch": 6.77, "learning_rate": 4.661865657961881e-05, "loss": 2.4729, "step": 1365500 }, { "epoch": 6.77, "learning_rate": 4.661741799319273e-05, "loss": 2.4902, "step": 1366000 }, { "epoch": 6.77, "learning_rate": 4.6616179406766646e-05, "loss": 2.5141, "step": 1366500 }, { "epoch": 6.77, "learning_rate": 4.661494082034056e-05, "loss": 2.4894, "step": 1367000 }, { "epoch": 6.78, "learning_rate": 4.661370223391448e-05, "loss": 2.4918, "step": 1367500 }, { "epoch": 6.78, "learning_rate": 4.66124636474884e-05, "loss": 2.5045, "step": 1368000 }, { "epoch": 6.78, "learning_rate": 4.6611225061062314e-05, "loss": 2.4838, "step": 1368500 }, { "epoch": 6.78, "learning_rate": 4.660998647463623e-05, "loss": 2.487, "step": 1369000 }, { "epoch": 6.78, "learning_rate": 4.660874788821015e-05, "loss": 2.5039, "step": 1369500 }, { "epoch": 6.79, "learning_rate": 4.6607509301784065e-05, "loss": 2.5063, "step": 1370000 }, { "epoch": 6.79, "learning_rate": 4.660627071535798e-05, "loss": 2.5151, "step": 1370500 }, { "epoch": 6.79, "learning_rate": 4.6605039560450454e-05, "loss": 2.4945, "step": 1371000 }, { "epoch": 6.79, "learning_rate": 4.660380097402437e-05, "loss": 2.4963, "step": 1371500 }, { "epoch": 6.8, "learning_rate": 4.660256238759829e-05, "loss": 2.5327, "step": 1372000 }, { "epoch": 6.8, "learning_rate": 4.660132627834505e-05, "loss": 2.4942, "step": 1372500 }, { "epoch": 6.8, "learning_rate": 4.660008769191897e-05, "loss": 2.512, "step": 1373000 }, { "epoch": 6.8, "learning_rate": 4.6598849105492884e-05, "loss": 2.4764, "step": 1373500 }, { "epoch": 6.81, "learning_rate": 4.65976105190668e-05, "loss": 2.4803, "step": 1374000 }, { "epoch": 6.81, "learning_rate": 4.659637193264072e-05, "loss": 2.4855, "step": 1374500 }, { "epoch": 6.81, "learning_rate": 4.6595133346214635e-05, "loss": 2.4853, "step": 1375000 }, { "epoch": 6.81, "learning_rate": 4.6593897236961404e-05, "loss": 2.4791, "step": 1375500 }, { "epoch": 6.82, "learning_rate": 4.659265865053532e-05, "loss": 2.4887, "step": 1376000 }, { "epoch": 6.82, "learning_rate": 4.659142006410924e-05, "loss": 2.4982, "step": 1376500 }, { "epoch": 6.82, "learning_rate": 4.6590181477683154e-05, "loss": 2.5017, "step": 1377000 }, { "epoch": 6.82, "learning_rate": 4.658894536842992e-05, "loss": 2.5206, "step": 1377500 }, { "epoch": 6.83, "learning_rate": 4.658770678200384e-05, "loss": 2.4919, "step": 1378000 }, { "epoch": 6.83, "learning_rate": 4.658646819557776e-05, "loss": 2.5036, "step": 1378500 }, { "epoch": 6.83, "learning_rate": 4.658522960915167e-05, "loss": 2.4691, "step": 1379000 }, { "epoch": 6.83, "learning_rate": 4.6583991022725584e-05, "loss": 2.4853, "step": 1379500 }, { "epoch": 6.84, "learning_rate": 4.65827524362995e-05, "loss": 2.5148, "step": 1380000 }, { "epoch": 6.84, "learning_rate": 4.658151384987342e-05, "loss": 2.5151, "step": 1380500 }, { "epoch": 6.84, "learning_rate": 4.6580275263447335e-05, "loss": 2.4836, "step": 1381000 }, { "epoch": 6.84, "learning_rate": 4.657903667702125e-05, "loss": 2.4898, "step": 1381500 }, { "epoch": 6.85, "learning_rate": 4.657779809059517e-05, "loss": 2.4917, "step": 1382000 }, { "epoch": 6.85, "learning_rate": 4.657655950416908e-05, "loss": 2.5036, "step": 1382500 }, { "epoch": 6.85, "learning_rate": 4.6575320917742996e-05, "loss": 2.4992, "step": 1383000 }, { "epoch": 6.85, "learning_rate": 4.657408233131691e-05, "loss": 2.502, "step": 1383500 }, { "epoch": 6.86, "learning_rate": 4.657284374489083e-05, "loss": 2.4944, "step": 1384000 }, { "epoch": 6.86, "learning_rate": 4.657160515846475e-05, "loss": 2.4917, "step": 1384500 }, { "epoch": 6.86, "learning_rate": 4.6570366572038664e-05, "loss": 2.4888, "step": 1385000 }, { "epoch": 6.86, "learning_rate": 4.656912798561258e-05, "loss": 2.485, "step": 1385500 }, { "epoch": 6.87, "learning_rate": 4.65678893991865e-05, "loss": 2.4949, "step": 1386000 }, { "epoch": 6.87, "learning_rate": 4.6566653289933266e-05, "loss": 2.4753, "step": 1386500 }, { "epoch": 6.87, "learning_rate": 4.656541470350718e-05, "loss": 2.5044, "step": 1387000 }, { "epoch": 6.87, "learning_rate": 4.65641761170811e-05, "loss": 2.5088, "step": 1387500 }, { "epoch": 6.88, "learning_rate": 4.656293753065502e-05, "loss": 2.4922, "step": 1388000 }, { "epoch": 6.88, "learning_rate": 4.6561698944228934e-05, "loss": 2.5054, "step": 1388500 }, { "epoch": 6.88, "learning_rate": 4.656046035780285e-05, "loss": 2.4948, "step": 1389000 }, { "epoch": 6.88, "learning_rate": 4.655922177137677e-05, "loss": 2.5002, "step": 1389500 }, { "epoch": 6.89, "learning_rate": 4.655798566212353e-05, "loss": 2.4978, "step": 1390000 }, { "epoch": 6.89, "learning_rate": 4.655674707569745e-05, "loss": 2.5017, "step": 1390500 }, { "epoch": 6.89, "learning_rate": 4.6555508489271364e-05, "loss": 2.4925, "step": 1391000 }, { "epoch": 6.89, "learning_rate": 4.655426990284528e-05, "loss": 2.4932, "step": 1391500 }, { "epoch": 6.9, "learning_rate": 4.655303379359205e-05, "loss": 2.5058, "step": 1392000 }, { "epoch": 6.9, "learning_rate": 4.6551795207165966e-05, "loss": 2.4799, "step": 1392500 }, { "epoch": 6.9, "learning_rate": 4.655055662073988e-05, "loss": 2.5225, "step": 1393000 }, { "epoch": 6.9, "learning_rate": 4.65493180343138e-05, "loss": 2.4859, "step": 1393500 }, { "epoch": 6.91, "learning_rate": 4.654807944788772e-05, "loss": 2.521, "step": 1394000 }, { "epoch": 6.91, "learning_rate": 4.6546840861461634e-05, "loss": 2.4728, "step": 1394500 }, { "epoch": 6.91, "learning_rate": 4.6545604752208396e-05, "loss": 2.4708, "step": 1395000 }, { "epoch": 6.91, "learning_rate": 4.654436616578231e-05, "loss": 2.4922, "step": 1395500 }, { "epoch": 6.92, "learning_rate": 4.654312757935623e-05, "loss": 2.4731, "step": 1396000 }, { "epoch": 6.92, "learning_rate": 4.654188899293015e-05, "loss": 2.4955, "step": 1396500 }, { "epoch": 6.92, "learning_rate": 4.6540650406504064e-05, "loss": 2.4996, "step": 1397000 }, { "epoch": 6.92, "learning_rate": 4.653941182007798e-05, "loss": 2.4988, "step": 1397500 }, { "epoch": 6.93, "learning_rate": 4.65381732336519e-05, "loss": 2.476, "step": 1398000 }, { "epoch": 6.93, "learning_rate": 4.6536934647225815e-05, "loss": 2.487, "step": 1398500 }, { "epoch": 6.93, "learning_rate": 4.6535698537972583e-05, "loss": 2.4695, "step": 1399000 }, { "epoch": 6.93, "learning_rate": 4.65344599515465e-05, "loss": 2.4825, "step": 1399500 }, { "epoch": 6.94, "learning_rate": 4.653322136512042e-05, "loss": 2.4803, "step": 1400000 }, { "epoch": 6.94, "learning_rate": 4.6531982778694334e-05, "loss": 2.4772, "step": 1400500 }, { "epoch": 6.94, "learning_rate": 4.653074419226825e-05, "loss": 2.4992, "step": 1401000 }, { "epoch": 6.94, "learning_rate": 4.652950560584217e-05, "loss": 2.4755, "step": 1401500 }, { "epoch": 6.95, "learning_rate": 4.6528267019416085e-05, "loss": 2.486, "step": 1402000 }, { "epoch": 6.95, "learning_rate": 4.652703091016285e-05, "loss": 2.5144, "step": 1402500 }, { "epoch": 6.95, "learning_rate": 4.652579480090962e-05, "loss": 2.4947, "step": 1403000 }, { "epoch": 6.95, "learning_rate": 4.652455621448354e-05, "loss": 2.4977, "step": 1403500 }, { "epoch": 6.96, "learning_rate": 4.652331762805745e-05, "loss": 2.508, "step": 1404000 }, { "epoch": 6.96, "learning_rate": 4.652207904163137e-05, "loss": 2.5197, "step": 1404500 }, { "epoch": 6.96, "learning_rate": 4.6520840455205284e-05, "loss": 2.4885, "step": 1405000 }, { "epoch": 6.96, "learning_rate": 4.65196018687792e-05, "loss": 2.4976, "step": 1405500 }, { "epoch": 6.97, "learning_rate": 4.651836328235312e-05, "loss": 2.4476, "step": 1406000 }, { "epoch": 6.97, "learning_rate": 4.6517124695927034e-05, "loss": 2.5003, "step": 1406500 }, { "epoch": 6.97, "learning_rate": 4.651588610950095e-05, "loss": 2.4807, "step": 1407000 }, { "epoch": 6.97, "learning_rate": 4.651464752307487e-05, "loss": 2.5, "step": 1407500 }, { "epoch": 6.98, "learning_rate": 4.6513408936648785e-05, "loss": 2.5051, "step": 1408000 }, { "epoch": 6.98, "learning_rate": 4.65121703502227e-05, "loss": 2.4952, "step": 1408500 }, { "epoch": 6.98, "learning_rate": 4.651093176379662e-05, "loss": 2.4864, "step": 1409000 }, { "epoch": 6.98, "learning_rate": 4.650969565454338e-05, "loss": 2.4935, "step": 1409500 }, { "epoch": 6.99, "learning_rate": 4.65084570681173e-05, "loss": 2.5068, "step": 1410000 }, { "epoch": 6.99, "learning_rate": 4.650722095886407e-05, "loss": 2.5097, "step": 1410500 }, { "epoch": 6.99, "learning_rate": 4.6505982372437984e-05, "loss": 2.4744, "step": 1411000 }, { "epoch": 6.99, "learning_rate": 4.65047437860119e-05, "loss": 2.4966, "step": 1411500 }, { "epoch": 7.0, "learning_rate": 4.650350519958582e-05, "loss": 2.4788, "step": 1412000 }, { "epoch": 7.0, "learning_rate": 4.6502266613159735e-05, "loss": 2.4876, "step": 1412500 }, { "epoch": 7.0, "eval_accuracy": 0.6390809050019882, "eval_accuracy_mlm": 0.5919321078827258, "eval_accuracy_nsp": 0.8611933683454986, "eval_loss": 2.4429469108581543, "eval_runtime": 145.8536, "eval_samples_per_second": 1748.048, "eval_steps_per_second": 72.84, "step": 1412901 }, { "epoch": 7.0, "learning_rate": 4.650102802673365e-05, "loss": 2.4845, "step": 1413000 }, { "epoch": 7.0, "learning_rate": 4.649978944030757e-05, "loss": 2.4572, "step": 1413500 }, { "epoch": 7.01, "learning_rate": 4.6498550853881485e-05, "loss": 2.4539, "step": 1414000 }, { "epoch": 7.01, "learning_rate": 4.6497314744628254e-05, "loss": 2.4568, "step": 1414500 }, { "epoch": 7.01, "learning_rate": 4.6496076158202164e-05, "loss": 2.4616, "step": 1415000 }, { "epoch": 7.01, "learning_rate": 4.649483757177608e-05, "loss": 2.4551, "step": 1415500 }, { "epoch": 7.02, "learning_rate": 4.649360146252286e-05, "loss": 2.4791, "step": 1416000 }, { "epoch": 7.02, "learning_rate": 4.649236287609677e-05, "loss": 2.481, "step": 1416500 }, { "epoch": 7.02, "learning_rate": 4.6491124289670684e-05, "loss": 2.4784, "step": 1417000 }, { "epoch": 7.02, "learning_rate": 4.64898857032446e-05, "loss": 2.467, "step": 1417500 }, { "epoch": 7.03, "learning_rate": 4.648864711681852e-05, "loss": 2.4805, "step": 1418000 }, { "epoch": 7.03, "learning_rate": 4.6487408530392435e-05, "loss": 2.4275, "step": 1418500 }, { "epoch": 7.03, "learning_rate": 4.648616994396635e-05, "loss": 2.4766, "step": 1419000 }, { "epoch": 7.03, "learning_rate": 4.648493135754027e-05, "loss": 2.4559, "step": 1419500 }, { "epoch": 7.04, "learning_rate": 4.6483692771114185e-05, "loss": 2.5089, "step": 1420000 }, { "epoch": 7.04, "learning_rate": 4.64824541846881e-05, "loss": 2.4865, "step": 1420500 }, { "epoch": 7.04, "learning_rate": 4.648121559826202e-05, "loss": 2.4508, "step": 1421000 }, { "epoch": 7.04, "learning_rate": 4.6479977011835936e-05, "loss": 2.4655, "step": 1421500 }, { "epoch": 7.05, "learning_rate": 4.647873842540985e-05, "loss": 2.4671, "step": 1422000 }, { "epoch": 7.05, "learning_rate": 4.647749983898377e-05, "loss": 2.4695, "step": 1422500 }, { "epoch": 7.05, "learning_rate": 4.647626372973053e-05, "loss": 2.4488, "step": 1423000 }, { "epoch": 7.05, "learning_rate": 4.647502514330445e-05, "loss": 2.4521, "step": 1423500 }, { "epoch": 7.05, "learning_rate": 4.647378903405122e-05, "loss": 2.4709, "step": 1424000 }, { "epoch": 7.06, "learning_rate": 4.647255540197084e-05, "loss": 2.4587, "step": 1424500 }, { "epoch": 7.06, "learning_rate": 4.6471316815544755e-05, "loss": 2.4739, "step": 1425000 }, { "epoch": 7.06, "learning_rate": 4.647007822911867e-05, "loss": 2.4977, "step": 1425500 }, { "epoch": 7.06, "learning_rate": 4.646883964269259e-05, "loss": 2.4725, "step": 1426000 }, { "epoch": 7.07, "learning_rate": 4.6467601056266506e-05, "loss": 2.4716, "step": 1426500 }, { "epoch": 7.07, "learning_rate": 4.646636246984042e-05, "loss": 2.4874, "step": 1427000 }, { "epoch": 7.07, "learning_rate": 4.646512388341434e-05, "loss": 2.4674, "step": 1427500 }, { "epoch": 7.07, "learning_rate": 4.646388529698826e-05, "loss": 2.4617, "step": 1428000 }, { "epoch": 7.08, "learning_rate": 4.6462646710562174e-05, "loss": 2.4703, "step": 1428500 }, { "epoch": 7.08, "learning_rate": 4.6461408124136084e-05, "loss": 2.4612, "step": 1429000 }, { "epoch": 7.08, "learning_rate": 4.646016953771e-05, "loss": 2.4505, "step": 1429500 }, { "epoch": 7.08, "learning_rate": 4.645893095128392e-05, "loss": 2.4645, "step": 1430000 }, { "epoch": 7.09, "learning_rate": 4.6457692364857835e-05, "loss": 2.4425, "step": 1430500 }, { "epoch": 7.09, "learning_rate": 4.645645377843175e-05, "loss": 2.4478, "step": 1431000 }, { "epoch": 7.09, "learning_rate": 4.645521519200567e-05, "loss": 2.4645, "step": 1431500 }, { "epoch": 7.09, "learning_rate": 4.6453979082752444e-05, "loss": 2.4471, "step": 1432000 }, { "epoch": 7.1, "learning_rate": 4.6452740496326354e-05, "loss": 2.4378, "step": 1432500 }, { "epoch": 7.1, "learning_rate": 4.645150190990027e-05, "loss": 2.4872, "step": 1433000 }, { "epoch": 7.1, "learning_rate": 4.645026332347419e-05, "loss": 2.4629, "step": 1433500 }, { "epoch": 7.1, "learning_rate": 4.6449024737048105e-05, "loss": 2.4646, "step": 1434000 }, { "epoch": 7.11, "learning_rate": 4.644778615062202e-05, "loss": 2.4917, "step": 1434500 }, { "epoch": 7.11, "learning_rate": 4.644654756419594e-05, "loss": 2.4704, "step": 1435000 }, { "epoch": 7.11, "learning_rate": 4.644530897776985e-05, "loss": 2.4439, "step": 1435500 }, { "epoch": 7.11, "learning_rate": 4.6444070391343766e-05, "loss": 2.458, "step": 1436000 }, { "epoch": 7.12, "learning_rate": 4.6442834282090535e-05, "loss": 2.4867, "step": 1436500 }, { "epoch": 7.12, "learning_rate": 4.644159569566445e-05, "loss": 2.4849, "step": 1437000 }, { "epoch": 7.12, "learning_rate": 4.644035710923837e-05, "loss": 2.4636, "step": 1437500 }, { "epoch": 7.12, "learning_rate": 4.6439118522812286e-05, "loss": 2.4742, "step": 1438000 }, { "epoch": 7.13, "learning_rate": 4.643788736790476e-05, "loss": 2.4822, "step": 1438500 }, { "epoch": 7.13, "learning_rate": 4.6436648781478675e-05, "loss": 2.4591, "step": 1439000 }, { "epoch": 7.13, "learning_rate": 4.643541019505259e-05, "loss": 2.4727, "step": 1439500 }, { "epoch": 7.13, "learning_rate": 4.643417408579936e-05, "loss": 2.4814, "step": 1440000 }, { "epoch": 7.14, "learning_rate": 4.643293549937328e-05, "loss": 2.4816, "step": 1440500 }, { "epoch": 7.14, "learning_rate": 4.6431696912947195e-05, "loss": 2.4745, "step": 1441000 }, { "epoch": 7.14, "learning_rate": 4.643045832652111e-05, "loss": 2.467, "step": 1441500 }, { "epoch": 7.14, "learning_rate": 4.642921974009503e-05, "loss": 2.4839, "step": 1442000 }, { "epoch": 7.15, "learning_rate": 4.6427981153668946e-05, "loss": 2.4538, "step": 1442500 }, { "epoch": 7.15, "learning_rate": 4.6426742567242856e-05, "loss": 2.4355, "step": 1443000 }, { "epoch": 7.15, "learning_rate": 4.642550398081677e-05, "loss": 2.479, "step": 1443500 }, { "epoch": 7.15, "learning_rate": 4.642426539439069e-05, "loss": 2.4619, "step": 1444000 }, { "epoch": 7.16, "learning_rate": 4.642302680796461e-05, "loss": 2.4853, "step": 1444500 }, { "epoch": 7.16, "learning_rate": 4.6421788221538524e-05, "loss": 2.4502, "step": 1445000 }, { "epoch": 7.16, "learning_rate": 4.642054963511244e-05, "loss": 2.4703, "step": 1445500 }, { "epoch": 7.16, "learning_rate": 4.641931104868636e-05, "loss": 2.4679, "step": 1446000 }, { "epoch": 7.17, "learning_rate": 4.6418072462260274e-05, "loss": 2.4749, "step": 1446500 }, { "epoch": 7.17, "learning_rate": 4.641683387583419e-05, "loss": 2.4873, "step": 1447000 }, { "epoch": 7.17, "learning_rate": 4.641559528940811e-05, "loss": 2.4585, "step": 1447500 }, { "epoch": 7.17, "learning_rate": 4.641435670298202e-05, "loss": 2.4642, "step": 1448000 }, { "epoch": 7.18, "learning_rate": 4.6413120593728794e-05, "loss": 2.4696, "step": 1448500 }, { "epoch": 7.18, "learning_rate": 4.641188200730271e-05, "loss": 2.4597, "step": 1449000 }, { "epoch": 7.18, "learning_rate": 4.641064342087663e-05, "loss": 2.4897, "step": 1449500 }, { "epoch": 7.18, "learning_rate": 4.640940731162339e-05, "loss": 2.4707, "step": 1450000 }, { "epoch": 7.19, "learning_rate": 4.640816872519731e-05, "loss": 2.4917, "step": 1450500 }, { "epoch": 7.19, "learning_rate": 4.6406930138771224e-05, "loss": 2.4877, "step": 1451000 }, { "epoch": 7.19, "learning_rate": 4.640569155234514e-05, "loss": 2.4417, "step": 1451500 }, { "epoch": 7.19, "learning_rate": 4.640445544309191e-05, "loss": 2.4632, "step": 1452000 }, { "epoch": 7.2, "learning_rate": 4.6403216856665826e-05, "loss": 2.5164, "step": 1452500 }, { "epoch": 7.2, "learning_rate": 4.640197827023974e-05, "loss": 2.476, "step": 1453000 }, { "epoch": 7.2, "learning_rate": 4.640073968381366e-05, "loss": 2.4726, "step": 1453500 }, { "epoch": 7.2, "learning_rate": 4.639950357456043e-05, "loss": 2.4642, "step": 1454000 }, { "epoch": 7.21, "learning_rate": 4.6398264988134346e-05, "loss": 2.4863, "step": 1454500 }, { "epoch": 7.21, "learning_rate": 4.639702640170826e-05, "loss": 2.4524, "step": 1455000 }, { "epoch": 7.21, "learning_rate": 4.639578781528218e-05, "loss": 2.4725, "step": 1455500 }, { "epoch": 7.21, "learning_rate": 4.639455170602894e-05, "loss": 2.4505, "step": 1456000 }, { "epoch": 7.22, "learning_rate": 4.639331311960286e-05, "loss": 2.4743, "step": 1456500 }, { "epoch": 7.22, "learning_rate": 4.6392074533176776e-05, "loss": 2.4581, "step": 1457000 }, { "epoch": 7.22, "learning_rate": 4.639083594675069e-05, "loss": 2.4725, "step": 1457500 }, { "epoch": 7.22, "learning_rate": 4.638959736032461e-05, "loss": 2.4704, "step": 1458000 }, { "epoch": 7.23, "learning_rate": 4.6388358773898526e-05, "loss": 2.4718, "step": 1458500 }, { "epoch": 7.23, "learning_rate": 4.638712018747244e-05, "loss": 2.4867, "step": 1459000 }, { "epoch": 7.23, "learning_rate": 4.638588407821921e-05, "loss": 2.4673, "step": 1459500 }, { "epoch": 7.23, "learning_rate": 4.638464549179313e-05, "loss": 2.4954, "step": 1460000 }, { "epoch": 7.24, "learning_rate": 4.6383406905367046e-05, "loss": 2.4766, "step": 1460500 }, { "epoch": 7.24, "learning_rate": 4.638216831894096e-05, "loss": 2.4681, "step": 1461000 }, { "epoch": 7.24, "learning_rate": 4.638092973251488e-05, "loss": 2.4931, "step": 1461500 }, { "epoch": 7.24, "learning_rate": 4.63796911460888e-05, "loss": 2.4671, "step": 1462000 }, { "epoch": 7.25, "learning_rate": 4.6378452559662714e-05, "loss": 2.4901, "step": 1462500 }, { "epoch": 7.25, "learning_rate": 4.637721397323663e-05, "loss": 2.4586, "step": 1463000 }, { "epoch": 7.25, "learning_rate": 4.637597538681054e-05, "loss": 2.4707, "step": 1463500 }, { "epoch": 7.25, "learning_rate": 4.637474175473016e-05, "loss": 2.4735, "step": 1464000 }, { "epoch": 7.26, "learning_rate": 4.637350316830408e-05, "loss": 2.4678, "step": 1464500 }, { "epoch": 7.26, "learning_rate": 4.6372264581877995e-05, "loss": 2.485, "step": 1465000 }, { "epoch": 7.26, "learning_rate": 4.637102599545191e-05, "loss": 2.4831, "step": 1465500 }, { "epoch": 7.26, "learning_rate": 4.636978740902583e-05, "loss": 2.4813, "step": 1466000 }, { "epoch": 7.27, "learning_rate": 4.6368548822599746e-05, "loss": 2.4825, "step": 1466500 }, { "epoch": 7.27, "learning_rate": 4.6367312713346515e-05, "loss": 2.4867, "step": 1467000 }, { "epoch": 7.27, "learning_rate": 4.6366074126920425e-05, "loss": 2.4784, "step": 1467500 }, { "epoch": 7.27, "learning_rate": 4.636483554049434e-05, "loss": 2.4478, "step": 1468000 }, { "epoch": 7.28, "learning_rate": 4.636359695406826e-05, "loss": 2.4608, "step": 1468500 }, { "epoch": 7.28, "learning_rate": 4.6362358367642176e-05, "loss": 2.4759, "step": 1469000 }, { "epoch": 7.28, "learning_rate": 4.636111978121609e-05, "loss": 2.4748, "step": 1469500 }, { "epoch": 7.28, "learning_rate": 4.635988119479001e-05, "loss": 2.4616, "step": 1470000 }, { "epoch": 7.29, "learning_rate": 4.635864260836393e-05, "loss": 2.4663, "step": 1470500 }, { "epoch": 7.29, "learning_rate": 4.6357404021937844e-05, "loss": 2.4643, "step": 1471000 }, { "epoch": 7.29, "learning_rate": 4.635616543551176e-05, "loss": 2.482, "step": 1471500 }, { "epoch": 7.29, "learning_rate": 4.635492932625853e-05, "loss": 2.4848, "step": 1472000 }, { "epoch": 7.3, "learning_rate": 4.6353690739832446e-05, "loss": 2.4771, "step": 1472500 }, { "epoch": 7.3, "learning_rate": 4.635245215340636e-05, "loss": 2.4642, "step": 1473000 }, { "epoch": 7.3, "learning_rate": 4.635121356698028e-05, "loss": 2.4742, "step": 1473500 }, { "epoch": 7.3, "learning_rate": 4.63499749805542e-05, "loss": 2.4884, "step": 1474000 }, { "epoch": 7.31, "learning_rate": 4.6348736394128114e-05, "loss": 2.4658, "step": 1474500 }, { "epoch": 7.31, "learning_rate": 4.634749780770203e-05, "loss": 2.49, "step": 1475000 }, { "epoch": 7.31, "learning_rate": 4.634625922127595e-05, "loss": 2.4885, "step": 1475500 }, { "epoch": 7.31, "learning_rate": 4.634502558919557e-05, "loss": 2.4919, "step": 1476000 }, { "epoch": 7.32, "learning_rate": 4.634378700276948e-05, "loss": 2.4588, "step": 1476500 }, { "epoch": 7.32, "learning_rate": 4.6342548416343396e-05, "loss": 2.4474, "step": 1477000 }, { "epoch": 7.32, "learning_rate": 4.634130982991731e-05, "loss": 2.4867, "step": 1477500 }, { "epoch": 7.32, "learning_rate": 4.634007124349123e-05, "loss": 2.4749, "step": 1478000 }, { "epoch": 7.33, "learning_rate": 4.6338832657065146e-05, "loss": 2.4603, "step": 1478500 }, { "epoch": 7.33, "learning_rate": 4.633759407063906e-05, "loss": 2.4551, "step": 1479000 }, { "epoch": 7.33, "learning_rate": 4.633635548421298e-05, "loss": 2.4716, "step": 1479500 }, { "epoch": 7.33, "learning_rate": 4.633511937495974e-05, "loss": 2.4879, "step": 1480000 }, { "epoch": 7.33, "learning_rate": 4.633388078853366e-05, "loss": 2.4664, "step": 1480500 }, { "epoch": 7.34, "learning_rate": 4.6332642202107576e-05, "loss": 2.4596, "step": 1481000 }, { "epoch": 7.34, "learning_rate": 4.633140361568149e-05, "loss": 2.5133, "step": 1481500 }, { "epoch": 7.34, "learning_rate": 4.633016502925541e-05, "loss": 2.4767, "step": 1482000 }, { "epoch": 7.34, "learning_rate": 4.632892644282933e-05, "loss": 2.4489, "step": 1482500 }, { "epoch": 7.35, "learning_rate": 4.6327687856403244e-05, "loss": 2.4446, "step": 1483000 }, { "epoch": 7.35, "learning_rate": 4.632644926997716e-05, "loss": 2.4838, "step": 1483500 }, { "epoch": 7.35, "learning_rate": 4.632521563789679e-05, "loss": 2.4423, "step": 1484000 }, { "epoch": 7.35, "learning_rate": 4.63239770514707e-05, "loss": 2.4774, "step": 1484500 }, { "epoch": 7.36, "learning_rate": 4.6322738465044615e-05, "loss": 2.4497, "step": 1485000 }, { "epoch": 7.36, "learning_rate": 4.632149987861853e-05, "loss": 2.4882, "step": 1485500 }, { "epoch": 7.36, "learning_rate": 4.63202637693653e-05, "loss": 2.4747, "step": 1486000 }, { "epoch": 7.36, "learning_rate": 4.631902518293922e-05, "loss": 2.4873, "step": 1486500 }, { "epoch": 7.37, "learning_rate": 4.6317786596513135e-05, "loss": 2.4719, "step": 1487000 }, { "epoch": 7.37, "learning_rate": 4.631654801008705e-05, "loss": 2.466, "step": 1487500 }, { "epoch": 7.37, "learning_rate": 4.631530942366097e-05, "loss": 2.5012, "step": 1488000 }, { "epoch": 7.37, "learning_rate": 4.6314070837234886e-05, "loss": 2.4666, "step": 1488500 }, { "epoch": 7.38, "learning_rate": 4.6312832250808796e-05, "loss": 2.476, "step": 1489000 }, { "epoch": 7.38, "learning_rate": 4.631159366438271e-05, "loss": 2.4908, "step": 1489500 }, { "epoch": 7.38, "learning_rate": 4.631035507795663e-05, "loss": 2.4713, "step": 1490000 }, { "epoch": 7.38, "learning_rate": 4.630911649153055e-05, "loss": 2.4714, "step": 1490500 }, { "epoch": 7.39, "learning_rate": 4.6307877905104464e-05, "loss": 2.4822, "step": 1491000 }, { "epoch": 7.39, "learning_rate": 4.630663931867838e-05, "loss": 2.468, "step": 1491500 }, { "epoch": 7.39, "learning_rate": 4.63054007322523e-05, "loss": 2.4597, "step": 1492000 }, { "epoch": 7.39, "learning_rate": 4.6304162145826214e-05, "loss": 2.4886, "step": 1492500 }, { "epoch": 7.4, "learning_rate": 4.630292355940013e-05, "loss": 2.4621, "step": 1493000 }, { "epoch": 7.4, "learning_rate": 4.630168745014689e-05, "loss": 2.4916, "step": 1493500 }, { "epoch": 7.4, "learning_rate": 4.630044886372081e-05, "loss": 2.5105, "step": 1494000 }, { "epoch": 7.4, "learning_rate": 4.629921027729473e-05, "loss": 2.4965, "step": 1494500 }, { "epoch": 7.41, "learning_rate": 4.62979741680415e-05, "loss": 2.4962, "step": 1495000 }, { "epoch": 7.41, "learning_rate": 4.629673558161541e-05, "loss": 2.4789, "step": 1495500 }, { "epoch": 7.41, "learning_rate": 4.629549947236219e-05, "loss": 2.4785, "step": 1496000 }, { "epoch": 7.41, "learning_rate": 4.6294260885936105e-05, "loss": 2.4689, "step": 1496500 }, { "epoch": 7.42, "learning_rate": 4.629302229951002e-05, "loss": 2.5094, "step": 1497000 }, { "epoch": 7.42, "learning_rate": 4.629178371308394e-05, "loss": 2.4664, "step": 1497500 }, { "epoch": 7.42, "learning_rate": 4.629054512665785e-05, "loss": 2.4711, "step": 1498000 }, { "epoch": 7.42, "learning_rate": 4.6289306540231766e-05, "loss": 2.4982, "step": 1498500 }, { "epoch": 7.43, "learning_rate": 4.628806795380568e-05, "loss": 2.4652, "step": 1499000 }, { "epoch": 7.43, "learning_rate": 4.62868293673796e-05, "loss": 2.464, "step": 1499500 }, { "epoch": 7.43, "learning_rate": 4.628559078095352e-05, "loss": 2.4816, "step": 1500000 }, { "epoch": 7.43, "learning_rate": 4.628435219452743e-05, "loss": 2.461, "step": 1500500 }, { "epoch": 7.44, "learning_rate": 4.6283113608101344e-05, "loss": 2.4604, "step": 1501000 }, { "epoch": 7.44, "learning_rate": 4.628187502167526e-05, "loss": 2.4845, "step": 1501500 }, { "epoch": 7.44, "learning_rate": 4.628063643524918e-05, "loss": 2.4844, "step": 1502000 }, { "epoch": 7.44, "learning_rate": 4.6279397848823095e-05, "loss": 2.4942, "step": 1502500 }, { "epoch": 7.45, "learning_rate": 4.627815926239701e-05, "loss": 2.4517, "step": 1503000 }, { "epoch": 7.45, "learning_rate": 4.627692067597093e-05, "loss": 2.4613, "step": 1503500 }, { "epoch": 7.45, "learning_rate": 4.62756845667177e-05, "loss": 2.4785, "step": 1504000 }, { "epoch": 7.45, "learning_rate": 4.6274445980291615e-05, "loss": 2.4746, "step": 1504500 }, { "epoch": 7.46, "learning_rate": 4.627320739386553e-05, "loss": 2.4834, "step": 1505000 }, { "epoch": 7.46, "learning_rate": 4.627196880743945e-05, "loss": 2.5137, "step": 1505500 }, { "epoch": 7.46, "learning_rate": 4.6270730221013365e-05, "loss": 2.4748, "step": 1506000 }, { "epoch": 7.46, "learning_rate": 4.626949163458728e-05, "loss": 2.459, "step": 1506500 }, { "epoch": 7.47, "learning_rate": 4.62682530481612e-05, "loss": 2.4794, "step": 1507000 }, { "epoch": 7.47, "learning_rate": 4.6267014461735116e-05, "loss": 2.4742, "step": 1507500 }, { "epoch": 7.47, "learning_rate": 4.626577587530903e-05, "loss": 2.4774, "step": 1508000 }, { "epoch": 7.47, "learning_rate": 4.626453728888295e-05, "loss": 2.4662, "step": 1508500 }, { "epoch": 7.48, "learning_rate": 4.626330117962971e-05, "loss": 2.4627, "step": 1509000 }, { "epoch": 7.48, "learning_rate": 4.626206259320363e-05, "loss": 2.4786, "step": 1509500 }, { "epoch": 7.48, "learning_rate": 4.62608264839504e-05, "loss": 2.4726, "step": 1510000 }, { "epoch": 7.48, "learning_rate": 4.6259590374697173e-05, "loss": 2.4954, "step": 1510500 }, { "epoch": 7.49, "learning_rate": 4.6258351788271084e-05, "loss": 2.4493, "step": 1511000 }, { "epoch": 7.49, "learning_rate": 4.6257113201845e-05, "loss": 2.4704, "step": 1511500 }, { "epoch": 7.49, "learning_rate": 4.625587461541892e-05, "loss": 2.4773, "step": 1512000 }, { "epoch": 7.49, "learning_rate": 4.6254636028992834e-05, "loss": 2.4571, "step": 1512500 }, { "epoch": 7.5, "learning_rate": 4.62533999197396e-05, "loss": 2.4826, "step": 1513000 }, { "epoch": 7.5, "learning_rate": 4.625216133331352e-05, "loss": 2.4711, "step": 1513500 }, { "epoch": 7.5, "learning_rate": 4.625092274688743e-05, "loss": 2.5154, "step": 1514000 }, { "epoch": 7.5, "learning_rate": 4.624968416046135e-05, "loss": 2.4689, "step": 1514500 }, { "epoch": 7.51, "learning_rate": 4.624844805120812e-05, "loss": 2.4558, "step": 1515000 }, { "epoch": 7.51, "learning_rate": 4.624720946478204e-05, "loss": 2.489, "step": 1515500 }, { "epoch": 7.51, "learning_rate": 4.624597087835596e-05, "loss": 2.4775, "step": 1516000 }, { "epoch": 7.51, "learning_rate": 4.6244732291929874e-05, "loss": 2.5, "step": 1516500 }, { "epoch": 7.52, "learning_rate": 4.6243496182676636e-05, "loss": 2.5144, "step": 1517000 }, { "epoch": 7.52, "learning_rate": 4.624225759625055e-05, "loss": 2.4776, "step": 1517500 }, { "epoch": 7.52, "learning_rate": 4.624101900982447e-05, "loss": 2.5141, "step": 1518000 }, { "epoch": 7.52, "learning_rate": 4.6239780423398386e-05, "loss": 2.4846, "step": 1518500 }, { "epoch": 7.53, "learning_rate": 4.62385418369723e-05, "loss": 2.4617, "step": 1519000 }, { "epoch": 7.53, "learning_rate": 4.623730325054622e-05, "loss": 2.4419, "step": 1519500 }, { "epoch": 7.53, "learning_rate": 4.623606466412014e-05, "loss": 2.4669, "step": 1520000 }, { "epoch": 7.53, "learning_rate": 4.623482607769405e-05, "loss": 2.4905, "step": 1520500 }, { "epoch": 7.54, "learning_rate": 4.6233587491267964e-05, "loss": 2.4972, "step": 1521000 }, { "epoch": 7.54, "learning_rate": 4.623234890484188e-05, "loss": 2.4975, "step": 1521500 }, { "epoch": 7.54, "learning_rate": 4.62311103184158e-05, "loss": 2.4753, "step": 1522000 }, { "epoch": 7.54, "learning_rate": 4.6229871731989715e-05, "loss": 2.4884, "step": 1522500 }, { "epoch": 7.55, "learning_rate": 4.622863314556363e-05, "loss": 2.4908, "step": 1523000 }, { "epoch": 7.55, "learning_rate": 4.62273970363104e-05, "loss": 2.4883, "step": 1523500 }, { "epoch": 7.55, "learning_rate": 4.622615844988432e-05, "loss": 2.48, "step": 1524000 }, { "epoch": 7.55, "learning_rate": 4.6224919863458235e-05, "loss": 2.4724, "step": 1524500 }, { "epoch": 7.56, "learning_rate": 4.622368127703215e-05, "loss": 2.4895, "step": 1525000 }, { "epoch": 7.56, "learning_rate": 4.622244516777892e-05, "loss": 2.4562, "step": 1525500 }, { "epoch": 7.56, "learning_rate": 4.622120658135284e-05, "loss": 2.4682, "step": 1526000 }, { "epoch": 7.56, "learning_rate": 4.621996799492675e-05, "loss": 2.4951, "step": 1526500 }, { "epoch": 7.57, "learning_rate": 4.6218729408500664e-05, "loss": 2.4807, "step": 1527000 }, { "epoch": 7.57, "learning_rate": 4.621749082207458e-05, "loss": 2.51, "step": 1527500 }, { "epoch": 7.57, "learning_rate": 4.621625471282136e-05, "loss": 2.465, "step": 1528000 }, { "epoch": 7.57, "learning_rate": 4.621501860356812e-05, "loss": 2.4833, "step": 1528500 }, { "epoch": 7.58, "learning_rate": 4.6213780017142036e-05, "loss": 2.4647, "step": 1529000 }, { "epoch": 7.58, "learning_rate": 4.621254143071595e-05, "loss": 2.461, "step": 1529500 }, { "epoch": 7.58, "learning_rate": 4.621130284428987e-05, "loss": 2.4943, "step": 1530000 }, { "epoch": 7.58, "learning_rate": 4.6210064257863787e-05, "loss": 2.4836, "step": 1530500 }, { "epoch": 7.59, "learning_rate": 4.6208825671437704e-05, "loss": 2.4815, "step": 1531000 }, { "epoch": 7.59, "learning_rate": 4.620758956218447e-05, "loss": 2.4911, "step": 1531500 }, { "epoch": 7.59, "learning_rate": 4.620635097575839e-05, "loss": 2.5123, "step": 1532000 }, { "epoch": 7.59, "learning_rate": 4.6205112389332306e-05, "loss": 2.4855, "step": 1532500 }, { "epoch": 7.6, "learning_rate": 4.620387380290622e-05, "loss": 2.4682, "step": 1533000 }, { "epoch": 7.6, "learning_rate": 4.620263521648014e-05, "loss": 2.4504, "step": 1533500 }, { "epoch": 7.6, "learning_rate": 4.620139663005406e-05, "loss": 2.4983, "step": 1534000 }, { "epoch": 7.6, "learning_rate": 4.6200158043627974e-05, "loss": 2.444, "step": 1534500 }, { "epoch": 7.6, "learning_rate": 4.619891945720189e-05, "loss": 2.4729, "step": 1535000 }, { "epoch": 7.61, "learning_rate": 4.619768087077581e-05, "loss": 2.4705, "step": 1535500 }, { "epoch": 7.61, "learning_rate": 4.619644476152257e-05, "loss": 2.4669, "step": 1536000 }, { "epoch": 7.61, "learning_rate": 4.619520617509649e-05, "loss": 2.4638, "step": 1536500 }, { "epoch": 7.61, "learning_rate": 4.6193967588670404e-05, "loss": 2.4943, "step": 1537000 }, { "epoch": 7.62, "learning_rate": 4.6192733956590024e-05, "loss": 2.4793, "step": 1537500 }, { "epoch": 7.62, "learning_rate": 4.619149537016394e-05, "loss": 2.4636, "step": 1538000 }, { "epoch": 7.62, "learning_rate": 4.619025678373786e-05, "loss": 2.4792, "step": 1538500 }, { "epoch": 7.62, "learning_rate": 4.6189018197311775e-05, "loss": 2.4809, "step": 1539000 }, { "epoch": 7.63, "learning_rate": 4.618777961088569e-05, "loss": 2.4845, "step": 1539500 }, { "epoch": 7.63, "learning_rate": 4.618654102445961e-05, "loss": 2.4399, "step": 1540000 }, { "epoch": 7.63, "learning_rate": 4.6185302438033526e-05, "loss": 2.4847, "step": 1540500 }, { "epoch": 7.63, "learning_rate": 4.618406385160744e-05, "loss": 2.4646, "step": 1541000 }, { "epoch": 7.64, "learning_rate": 4.618282526518136e-05, "loss": 2.4743, "step": 1541500 }, { "epoch": 7.64, "learning_rate": 4.618158667875527e-05, "loss": 2.4826, "step": 1542000 }, { "epoch": 7.64, "learning_rate": 4.618034809232919e-05, "loss": 2.5099, "step": 1542500 }, { "epoch": 7.64, "learning_rate": 4.6179109505903104e-05, "loss": 2.478, "step": 1543000 }, { "epoch": 7.65, "learning_rate": 4.617787091947702e-05, "loss": 2.4752, "step": 1543500 }, { "epoch": 7.65, "learning_rate": 4.617663481022379e-05, "loss": 2.4662, "step": 1544000 }, { "epoch": 7.65, "learning_rate": 4.6175396223797706e-05, "loss": 2.48, "step": 1544500 }, { "epoch": 7.65, "learning_rate": 4.617415763737162e-05, "loss": 2.498, "step": 1545000 }, { "epoch": 7.66, "learning_rate": 4.617291905094554e-05, "loss": 2.4554, "step": 1545500 }, { "epoch": 7.66, "learning_rate": 4.617168046451946e-05, "loss": 2.481, "step": 1546000 }, { "epoch": 7.66, "learning_rate": 4.6170444355266226e-05, "loss": 2.4825, "step": 1546500 }, { "epoch": 7.66, "learning_rate": 4.616920576884014e-05, "loss": 2.4594, "step": 1547000 }, { "epoch": 7.67, "learning_rate": 4.616796718241406e-05, "loss": 2.4847, "step": 1547500 }, { "epoch": 7.67, "learning_rate": 4.616672859598798e-05, "loss": 2.4623, "step": 1548000 }, { "epoch": 7.67, "learning_rate": 4.616549000956189e-05, "loss": 2.4656, "step": 1548500 }, { "epoch": 7.67, "learning_rate": 4.6164251423135804e-05, "loss": 2.4649, "step": 1549000 }, { "epoch": 7.68, "learning_rate": 4.616301531388257e-05, "loss": 2.474, "step": 1549500 }, { "epoch": 7.68, "learning_rate": 4.616177672745649e-05, "loss": 2.4778, "step": 1550000 }, { "epoch": 7.68, "learning_rate": 4.616054061820326e-05, "loss": 2.4788, "step": 1550500 }, { "epoch": 7.68, "learning_rate": 4.6159302031777175e-05, "loss": 2.4836, "step": 1551000 }, { "epoch": 7.69, "learning_rate": 4.615806344535109e-05, "loss": 2.4977, "step": 1551500 }, { "epoch": 7.69, "learning_rate": 4.615682733609786e-05, "loss": 2.4888, "step": 1552000 }, { "epoch": 7.69, "learning_rate": 4.615558874967177e-05, "loss": 2.4875, "step": 1552500 }, { "epoch": 7.69, "learning_rate": 4.61543551175914e-05, "loss": 2.503, "step": 1553000 }, { "epoch": 7.7, "learning_rate": 4.6153116531165316e-05, "loss": 2.5001, "step": 1553500 }, { "epoch": 7.7, "learning_rate": 4.615187794473923e-05, "loss": 2.4949, "step": 1554000 }, { "epoch": 7.7, "learning_rate": 4.615063935831315e-05, "loss": 2.4934, "step": 1554500 }, { "epoch": 7.7, "learning_rate": 4.6149400771887066e-05, "loss": 2.4785, "step": 1555000 }, { "epoch": 7.71, "learning_rate": 4.614816218546098e-05, "loss": 2.4767, "step": 1555500 }, { "epoch": 7.71, "learning_rate": 4.61469235990349e-05, "loss": 2.4847, "step": 1556000 }, { "epoch": 7.71, "learning_rate": 4.614568501260881e-05, "loss": 2.5089, "step": 1556500 }, { "epoch": 7.71, "learning_rate": 4.614444642618273e-05, "loss": 2.4748, "step": 1557000 }, { "epoch": 7.72, "learning_rate": 4.6143207839756644e-05, "loss": 2.4601, "step": 1557500 }, { "epoch": 7.72, "learning_rate": 4.614196925333056e-05, "loss": 2.5098, "step": 1558000 }, { "epoch": 7.72, "learning_rate": 4.614073066690447e-05, "loss": 2.4963, "step": 1558500 }, { "epoch": 7.72, "learning_rate": 4.613949208047839e-05, "loss": 2.4725, "step": 1559000 }, { "epoch": 7.73, "learning_rate": 4.6138253494052305e-05, "loss": 2.4686, "step": 1559500 }, { "epoch": 7.73, "learning_rate": 4.613701490762622e-05, "loss": 2.4655, "step": 1560000 }, { "epoch": 7.73, "learning_rate": 4.613577632120014e-05, "loss": 2.4731, "step": 1560500 }, { "epoch": 7.73, "learning_rate": 4.6134537734774056e-05, "loss": 2.4864, "step": 1561000 }, { "epoch": 7.74, "learning_rate": 4.613329914834797e-05, "loss": 2.5237, "step": 1561500 }, { "epoch": 7.74, "learning_rate": 4.613206056192189e-05, "loss": 2.4653, "step": 1562000 }, { "epoch": 7.74, "learning_rate": 4.613082197549581e-05, "loss": 2.4584, "step": 1562500 }, { "epoch": 7.74, "learning_rate": 4.6129583389069724e-05, "loss": 2.4773, "step": 1563000 }, { "epoch": 7.75, "learning_rate": 4.612834727981649e-05, "loss": 2.4909, "step": 1563500 }, { "epoch": 7.75, "learning_rate": 4.612711117056326e-05, "loss": 2.4713, "step": 1564000 }, { "epoch": 7.75, "learning_rate": 4.612587258413718e-05, "loss": 2.4771, "step": 1564500 }, { "epoch": 7.75, "learning_rate": 4.6124633997711095e-05, "loss": 2.4854, "step": 1565000 }, { "epoch": 7.76, "learning_rate": 4.6123397888457864e-05, "loss": 2.4965, "step": 1565500 }, { "epoch": 7.76, "learning_rate": 4.612215930203178e-05, "loss": 2.4689, "step": 1566000 }, { "epoch": 7.76, "learning_rate": 4.61209207156057e-05, "loss": 2.4708, "step": 1566500 }, { "epoch": 7.76, "learning_rate": 4.6119682129179615e-05, "loss": 2.4845, "step": 1567000 }, { "epoch": 7.77, "learning_rate": 4.611844354275353e-05, "loss": 2.473, "step": 1567500 }, { "epoch": 7.77, "learning_rate": 4.611720495632744e-05, "loss": 2.4954, "step": 1568000 }, { "epoch": 7.77, "learning_rate": 4.611596636990136e-05, "loss": 2.4764, "step": 1568500 }, { "epoch": 7.77, "learning_rate": 4.6114727783475276e-05, "loss": 2.4517, "step": 1569000 }, { "epoch": 7.78, "learning_rate": 4.611348919704919e-05, "loss": 2.4814, "step": 1569500 }, { "epoch": 7.78, "learning_rate": 4.611225061062311e-05, "loss": 2.4804, "step": 1570000 }, { "epoch": 7.78, "learning_rate": 4.6111012024197027e-05, "loss": 2.4678, "step": 1570500 }, { "epoch": 7.78, "learning_rate": 4.6109773437770943e-05, "loss": 2.4607, "step": 1571000 }, { "epoch": 7.79, "learning_rate": 4.610853485134486e-05, "loss": 2.4835, "step": 1571500 }, { "epoch": 7.79, "learning_rate": 4.610729874209162e-05, "loss": 2.4995, "step": 1572000 }, { "epoch": 7.79, "learning_rate": 4.610606015566554e-05, "loss": 2.4387, "step": 1572500 }, { "epoch": 7.79, "learning_rate": 4.6104824046412315e-05, "loss": 2.451, "step": 1573000 }, { "epoch": 7.8, "learning_rate": 4.610358545998623e-05, "loss": 2.4665, "step": 1573500 }, { "epoch": 7.8, "learning_rate": 4.610234687356015e-05, "loss": 2.4839, "step": 1574000 }, { "epoch": 7.8, "learning_rate": 4.610110828713406e-05, "loss": 2.4874, "step": 1574500 }, { "epoch": 7.8, "learning_rate": 4.6099869700707976e-05, "loss": 2.4841, "step": 1575000 }, { "epoch": 7.81, "learning_rate": 4.609863111428189e-05, "loss": 2.4755, "step": 1575500 }, { "epoch": 7.81, "learning_rate": 4.609739252785581e-05, "loss": 2.4554, "step": 1576000 }, { "epoch": 7.81, "learning_rate": 4.609615394142973e-05, "loss": 2.4956, "step": 1576500 }, { "epoch": 7.81, "learning_rate": 4.6094915355003644e-05, "loss": 2.4772, "step": 1577000 }, { "epoch": 7.82, "learning_rate": 4.609367676857756e-05, "loss": 2.4684, "step": 1577500 }, { "epoch": 7.82, "learning_rate": 4.609244065932433e-05, "loss": 2.4815, "step": 1578000 }, { "epoch": 7.82, "learning_rate": 4.6091202072898246e-05, "loss": 2.4774, "step": 1578500 }, { "epoch": 7.82, "learning_rate": 4.6089963486472156e-05, "loss": 2.4714, "step": 1579000 }, { "epoch": 7.83, "learning_rate": 4.608872490004607e-05, "loss": 2.4645, "step": 1579500 }, { "epoch": 7.83, "learning_rate": 4.608748631361999e-05, "loss": 2.485, "step": 1580000 }, { "epoch": 7.83, "learning_rate": 4.608624772719391e-05, "loss": 2.4805, "step": 1580500 }, { "epoch": 7.83, "learning_rate": 4.6085009140767824e-05, "loss": 2.4852, "step": 1581000 }, { "epoch": 7.84, "learning_rate": 4.608377055434174e-05, "loss": 2.4744, "step": 1581500 }, { "epoch": 7.84, "learning_rate": 4.608253196791566e-05, "loss": 2.4791, "step": 1582000 }, { "epoch": 7.84, "learning_rate": 4.608129585866243e-05, "loss": 2.4555, "step": 1582500 }, { "epoch": 7.84, "learning_rate": 4.60800597494092e-05, "loss": 2.4637, "step": 1583000 }, { "epoch": 7.85, "learning_rate": 4.607882116298311e-05, "loss": 2.4863, "step": 1583500 }, { "epoch": 7.85, "learning_rate": 4.607758257655703e-05, "loss": 2.4752, "step": 1584000 }, { "epoch": 7.85, "learning_rate": 4.6076343990130946e-05, "loss": 2.4594, "step": 1584500 }, { "epoch": 7.85, "learning_rate": 4.6075107880877715e-05, "loss": 2.4782, "step": 1585000 }, { "epoch": 7.86, "learning_rate": 4.607386929445163e-05, "loss": 2.4713, "step": 1585500 }, { "epoch": 7.86, "learning_rate": 4.607263070802555e-05, "loss": 2.4576, "step": 1586000 }, { "epoch": 7.86, "learning_rate": 4.607139459877232e-05, "loss": 2.4664, "step": 1586500 }, { "epoch": 7.86, "learning_rate": 4.6070156012346235e-05, "loss": 2.4834, "step": 1587000 }, { "epoch": 7.87, "learning_rate": 4.606891742592015e-05, "loss": 2.5103, "step": 1587500 }, { "epoch": 7.87, "learning_rate": 4.6067681316666914e-05, "loss": 2.4813, "step": 1588000 }, { "epoch": 7.87, "learning_rate": 4.606644273024083e-05, "loss": 2.4637, "step": 1588500 }, { "epoch": 7.87, "learning_rate": 4.606520414381475e-05, "loss": 2.4753, "step": 1589000 }, { "epoch": 7.87, "learning_rate": 4.6063965557388665e-05, "loss": 2.4819, "step": 1589500 }, { "epoch": 7.88, "learning_rate": 4.606272697096258e-05, "loss": 2.4511, "step": 1590000 }, { "epoch": 7.88, "learning_rate": 4.60614883845365e-05, "loss": 2.4717, "step": 1590500 }, { "epoch": 7.88, "learning_rate": 4.6060249798110415e-05, "loss": 2.489, "step": 1591000 }, { "epoch": 7.88, "learning_rate": 4.605901121168433e-05, "loss": 2.4643, "step": 1591500 }, { "epoch": 7.89, "learning_rate": 4.605777262525825e-05, "loss": 2.4807, "step": 1592000 }, { "epoch": 7.89, "learning_rate": 4.605653651600502e-05, "loss": 2.474, "step": 1592500 }, { "epoch": 7.89, "learning_rate": 4.6055297929578935e-05, "loss": 2.4841, "step": 1593000 }, { "epoch": 7.89, "learning_rate": 4.605405934315285e-05, "loss": 2.4805, "step": 1593500 }, { "epoch": 7.9, "learning_rate": 4.605282075672677e-05, "loss": 2.4747, "step": 1594000 }, { "epoch": 7.9, "learning_rate": 4.6051582170300686e-05, "loss": 2.4757, "step": 1594500 }, { "epoch": 7.9, "learning_rate": 4.60503435838746e-05, "loss": 2.4795, "step": 1595000 }, { "epoch": 7.9, "learning_rate": 4.604910499744852e-05, "loss": 2.4725, "step": 1595500 }, { "epoch": 7.91, "learning_rate": 4.604786641102243e-05, "loss": 2.4809, "step": 1596000 }, { "epoch": 7.91, "learning_rate": 4.604662782459635e-05, "loss": 2.4966, "step": 1596500 }, { "epoch": 7.91, "learning_rate": 4.6045389238170264e-05, "loss": 2.4485, "step": 1597000 }, { "epoch": 7.91, "learning_rate": 4.604415065174418e-05, "loss": 2.4603, "step": 1597500 }, { "epoch": 7.92, "learning_rate": 4.60429120653181e-05, "loss": 2.4999, "step": 1598000 }, { "epoch": 7.92, "learning_rate": 4.6041673478892014e-05, "loss": 2.4641, "step": 1598500 }, { "epoch": 7.92, "learning_rate": 4.604043736963878e-05, "loss": 2.473, "step": 1599000 }, { "epoch": 7.92, "learning_rate": 4.603919878321269e-05, "loss": 2.4771, "step": 1599500 }, { "epoch": 7.93, "learning_rate": 4.603796267395947e-05, "loss": 2.4965, "step": 1600000 }, { "epoch": 7.93, "learning_rate": 4.6036724087533386e-05, "loss": 2.4797, "step": 1600500 }, { "epoch": 7.93, "learning_rate": 4.60354855011073e-05, "loss": 2.4846, "step": 1601000 }, { "epoch": 7.93, "learning_rate": 4.603424691468122e-05, "loss": 2.4528, "step": 1601500 }, { "epoch": 7.94, "learning_rate": 4.6033008328255137e-05, "loss": 2.4811, "step": 1602000 }, { "epoch": 7.94, "learning_rate": 4.603176974182905e-05, "loss": 2.4997, "step": 1602500 }, { "epoch": 7.94, "learning_rate": 4.6030533632575816e-05, "loss": 2.4839, "step": 1603000 }, { "epoch": 7.94, "learning_rate": 4.602929504614973e-05, "loss": 2.4663, "step": 1603500 }, { "epoch": 7.95, "learning_rate": 4.60280589368965e-05, "loss": 2.4979, "step": 1604000 }, { "epoch": 7.95, "learning_rate": 4.602682035047042e-05, "loss": 2.4584, "step": 1604500 }, { "epoch": 7.95, "learning_rate": 4.6025581764044335e-05, "loss": 2.4736, "step": 1605000 }, { "epoch": 7.95, "learning_rate": 4.602434317761825e-05, "loss": 2.4942, "step": 1605500 }, { "epoch": 7.96, "learning_rate": 4.602310459119217e-05, "loss": 2.4717, "step": 1606000 }, { "epoch": 7.96, "learning_rate": 4.6021866004766086e-05, "loss": 2.4614, "step": 1606500 }, { "epoch": 7.96, "learning_rate": 4.602062741834e-05, "loss": 2.4844, "step": 1607000 }, { "epoch": 7.96, "learning_rate": 4.601938883191392e-05, "loss": 2.469, "step": 1607500 }, { "epoch": 7.97, "learning_rate": 4.601815272266068e-05, "loss": 2.48, "step": 1608000 }, { "epoch": 7.97, "learning_rate": 4.60169141362346e-05, "loss": 2.4687, "step": 1608500 }, { "epoch": 7.97, "learning_rate": 4.6015675549808516e-05, "loss": 2.4747, "step": 1609000 }, { "epoch": 7.97, "learning_rate": 4.601443696338243e-05, "loss": 2.4615, "step": 1609500 }, { "epoch": 7.98, "learning_rate": 4.601319837695635e-05, "loss": 2.5101, "step": 1610000 }, { "epoch": 7.98, "learning_rate": 4.6011959790530266e-05, "loss": 2.4775, "step": 1610500 }, { "epoch": 7.98, "learning_rate": 4.6010723681277035e-05, "loss": 2.4492, "step": 1611000 }, { "epoch": 7.98, "learning_rate": 4.600948509485095e-05, "loss": 2.49, "step": 1611500 }, { "epoch": 7.99, "learning_rate": 4.600824650842487e-05, "loss": 2.4681, "step": 1612000 }, { "epoch": 7.99, "learning_rate": 4.6007007921998786e-05, "loss": 2.4747, "step": 1612500 }, { "epoch": 7.99, "learning_rate": 4.60057693355727e-05, "loss": 2.4882, "step": 1613000 }, { "epoch": 7.99, "learning_rate": 4.600453074914662e-05, "loss": 2.4759, "step": 1613500 }, { "epoch": 8.0, "learning_rate": 4.600329216272054e-05, "loss": 2.4905, "step": 1614000 }, { "epoch": 8.0, "learning_rate": 4.6002053576294454e-05, "loss": 2.4694, "step": 1614500 }, { "epoch": 8.0, "eval_accuracy": 0.6397272831364085, "eval_accuracy_mlm": 0.59286278093972, "eval_accuracy_nsp": 0.8609541141909092, "eval_loss": 2.4369935989379883, "eval_runtime": 147.379, "eval_samples_per_second": 1729.955, "eval_steps_per_second": 72.086, "step": 1614744 }, { "epoch": 8.0, "learning_rate": 4.6000814989868364e-05, "loss": 2.4571, "step": 1615000 }, { "epoch": 8.0, "learning_rate": 4.599957640344228e-05, "loss": 2.4342, "step": 1615500 }, { "epoch": 8.01, "learning_rate": 4.599834029418905e-05, "loss": 2.4282, "step": 1616000 }, { "epoch": 8.01, "learning_rate": 4.599710418493582e-05, "loss": 2.4413, "step": 1616500 }, { "epoch": 8.01, "learning_rate": 4.5995865598509735e-05, "loss": 2.4362, "step": 1617000 }, { "epoch": 8.01, "learning_rate": 4.599462701208365e-05, "loss": 2.4393, "step": 1617500 }, { "epoch": 8.02, "learning_rate": 4.599338842565757e-05, "loss": 2.4598, "step": 1618000 }, { "epoch": 8.02, "learning_rate": 4.599215231640434e-05, "loss": 2.4666, "step": 1618500 }, { "epoch": 8.02, "learning_rate": 4.5990913729978255e-05, "loss": 2.4484, "step": 1619000 }, { "epoch": 8.02, "learning_rate": 4.598967514355217e-05, "loss": 2.4482, "step": 1619500 }, { "epoch": 8.03, "learning_rate": 4.598843655712609e-05, "loss": 2.4395, "step": 1620000 }, { "epoch": 8.03, "learning_rate": 4.598720044787285e-05, "loss": 2.4311, "step": 1620500 }, { "epoch": 8.03, "learning_rate": 4.598596186144677e-05, "loss": 2.438, "step": 1621000 }, { "epoch": 8.03, "learning_rate": 4.5984723275020685e-05, "loss": 2.4487, "step": 1621500 }, { "epoch": 8.04, "learning_rate": 4.59834846885946e-05, "loss": 2.4532, "step": 1622000 }, { "epoch": 8.04, "learning_rate": 4.598224610216852e-05, "loss": 2.4735, "step": 1622500 }, { "epoch": 8.04, "learning_rate": 4.5981007515742436e-05, "loss": 2.4447, "step": 1623000 }, { "epoch": 8.04, "learning_rate": 4.597976892931635e-05, "loss": 2.4488, "step": 1623500 }, { "epoch": 8.05, "learning_rate": 4.597853034289027e-05, "loss": 2.437, "step": 1624000 }, { "epoch": 8.05, "learning_rate": 4.597729423363704e-05, "loss": 2.4128, "step": 1624500 }, { "epoch": 8.05, "learning_rate": 4.5976055647210955e-05, "loss": 2.4574, "step": 1625000 }, { "epoch": 8.05, "learning_rate": 4.5974822015130576e-05, "loss": 2.4699, "step": 1625500 }, { "epoch": 8.06, "learning_rate": 4.597358342870449e-05, "loss": 2.4535, "step": 1626000 }, { "epoch": 8.06, "learning_rate": 4.597234484227841e-05, "loss": 2.463, "step": 1626500 }, { "epoch": 8.06, "learning_rate": 4.5971106255852327e-05, "loss": 2.477, "step": 1627000 }, { "epoch": 8.06, "learning_rate": 4.5969867669426244e-05, "loss": 2.4738, "step": 1627500 }, { "epoch": 8.07, "learning_rate": 4.5968629083000154e-05, "loss": 2.4388, "step": 1628000 }, { "epoch": 8.07, "learning_rate": 4.596739049657407e-05, "loss": 2.4269, "step": 1628500 }, { "epoch": 8.07, "learning_rate": 4.596615191014799e-05, "loss": 2.453, "step": 1629000 }, { "epoch": 8.07, "learning_rate": 4.5964913323721904e-05, "loss": 2.4559, "step": 1629500 }, { "epoch": 8.08, "learning_rate": 4.596367473729582e-05, "loss": 2.4375, "step": 1630000 }, { "epoch": 8.08, "learning_rate": 4.596243615086974e-05, "loss": 2.4509, "step": 1630500 }, { "epoch": 8.08, "learning_rate": 4.5961197564443655e-05, "loss": 2.4442, "step": 1631000 }, { "epoch": 8.08, "learning_rate": 4.595995897801757e-05, "loss": 2.4474, "step": 1631500 }, { "epoch": 8.09, "learning_rate": 4.595872039159149e-05, "loss": 2.4552, "step": 1632000 }, { "epoch": 8.09, "learning_rate": 4.5957481805165406e-05, "loss": 2.4695, "step": 1632500 }, { "epoch": 8.09, "learning_rate": 4.595624321873932e-05, "loss": 2.4425, "step": 1633000 }, { "epoch": 8.09, "learning_rate": 4.595500463231324e-05, "loss": 2.4627, "step": 1633500 }, { "epoch": 8.1, "learning_rate": 4.595376852306e-05, "loss": 2.4539, "step": 1634000 }, { "epoch": 8.1, "learning_rate": 4.595252993663392e-05, "loss": 2.4307, "step": 1634500 }, { "epoch": 8.1, "learning_rate": 4.5951291350207836e-05, "loss": 2.446, "step": 1635000 }, { "epoch": 8.1, "learning_rate": 4.595005276378175e-05, "loss": 2.4706, "step": 1635500 }, { "epoch": 8.11, "learning_rate": 4.594881417735567e-05, "loss": 2.4612, "step": 1636000 }, { "epoch": 8.11, "learning_rate": 4.594757806810244e-05, "loss": 2.4764, "step": 1636500 }, { "epoch": 8.11, "learning_rate": 4.5946339481676355e-05, "loss": 2.4155, "step": 1637000 }, { "epoch": 8.11, "learning_rate": 4.594510337242312e-05, "loss": 2.4596, "step": 1637500 }, { "epoch": 8.12, "learning_rate": 4.5943864785997034e-05, "loss": 2.4651, "step": 1638000 }, { "epoch": 8.12, "learning_rate": 4.594262619957095e-05, "loss": 2.4594, "step": 1638500 }, { "epoch": 8.12, "learning_rate": 4.594138761314487e-05, "loss": 2.4223, "step": 1639000 }, { "epoch": 8.12, "learning_rate": 4.5940149026718785e-05, "loss": 2.4587, "step": 1639500 }, { "epoch": 8.13, "learning_rate": 4.59389104402927e-05, "loss": 2.4586, "step": 1640000 }, { "epoch": 8.13, "learning_rate": 4.593767185386662e-05, "loss": 2.4567, "step": 1640500 }, { "epoch": 8.13, "learning_rate": 4.5936433267440536e-05, "loss": 2.4287, "step": 1641000 }, { "epoch": 8.13, "learning_rate": 4.593519468101445e-05, "loss": 2.4658, "step": 1641500 }, { "epoch": 8.14, "learning_rate": 4.593395609458837e-05, "loss": 2.4436, "step": 1642000 }, { "epoch": 8.14, "learning_rate": 4.593271750816229e-05, "loss": 2.4596, "step": 1642500 }, { "epoch": 8.14, "learning_rate": 4.5931478921736204e-05, "loss": 2.4541, "step": 1643000 }, { "epoch": 8.14, "learning_rate": 4.593024281248297e-05, "loss": 2.4404, "step": 1643500 }, { "epoch": 8.14, "learning_rate": 4.592900422605689e-05, "loss": 2.4754, "step": 1644000 }, { "epoch": 8.15, "learning_rate": 4.592776811680365e-05, "loss": 2.4406, "step": 1644500 }, { "epoch": 8.15, "learning_rate": 4.592652953037757e-05, "loss": 2.4571, "step": 1645000 }, { "epoch": 8.15, "learning_rate": 4.5925290943951485e-05, "loss": 2.4375, "step": 1645500 }, { "epoch": 8.15, "learning_rate": 4.59240523575254e-05, "loss": 2.4535, "step": 1646000 }, { "epoch": 8.16, "learning_rate": 4.592281377109932e-05, "loss": 2.4388, "step": 1646500 }, { "epoch": 8.16, "learning_rate": 4.5921575184673236e-05, "loss": 2.4679, "step": 1647000 }, { "epoch": 8.16, "learning_rate": 4.592033659824715e-05, "loss": 2.443, "step": 1647500 }, { "epoch": 8.16, "learning_rate": 4.591909801182107e-05, "loss": 2.449, "step": 1648000 }, { "epoch": 8.17, "learning_rate": 4.591786190256784e-05, "loss": 2.463, "step": 1648500 }, { "epoch": 8.17, "learning_rate": 4.5916623316141756e-05, "loss": 2.4809, "step": 1649000 }, { "epoch": 8.17, "learning_rate": 4.591538472971567e-05, "loss": 2.4569, "step": 1649500 }, { "epoch": 8.17, "learning_rate": 4.591414614328959e-05, "loss": 2.4376, "step": 1650000 }, { "epoch": 8.18, "learning_rate": 4.5912907556863506e-05, "loss": 2.4565, "step": 1650500 }, { "epoch": 8.18, "learning_rate": 4.591166897043742e-05, "loss": 2.448, "step": 1651000 }, { "epoch": 8.18, "learning_rate": 4.591043038401134e-05, "loss": 2.4753, "step": 1651500 }, { "epoch": 8.18, "learning_rate": 4.590919179758526e-05, "loss": 2.4302, "step": 1652000 }, { "epoch": 8.19, "learning_rate": 4.5907953211159174e-05, "loss": 2.4589, "step": 1652500 }, { "epoch": 8.19, "learning_rate": 4.590671462473309e-05, "loss": 2.454, "step": 1653000 }, { "epoch": 8.19, "learning_rate": 4.590547603830701e-05, "loss": 2.442, "step": 1653500 }, { "epoch": 8.19, "learning_rate": 4.590423992905377e-05, "loss": 2.4509, "step": 1654000 }, { "epoch": 8.2, "learning_rate": 4.590300134262769e-05, "loss": 2.4578, "step": 1654500 }, { "epoch": 8.2, "learning_rate": 4.5901765233374456e-05, "loss": 2.4688, "step": 1655000 }, { "epoch": 8.2, "learning_rate": 4.590052664694837e-05, "loss": 2.4823, "step": 1655500 }, { "epoch": 8.2, "learning_rate": 4.589928806052229e-05, "loss": 2.4619, "step": 1656000 }, { "epoch": 8.21, "learning_rate": 4.5898049474096207e-05, "loss": 2.443, "step": 1656500 }, { "epoch": 8.21, "learning_rate": 4.5896810887670123e-05, "loss": 2.4455, "step": 1657000 }, { "epoch": 8.21, "learning_rate": 4.5895574778416885e-05, "loss": 2.4619, "step": 1657500 }, { "epoch": 8.21, "learning_rate": 4.589433866916366e-05, "loss": 2.4513, "step": 1658000 }, { "epoch": 8.22, "learning_rate": 4.589310008273758e-05, "loss": 2.4317, "step": 1658500 }, { "epoch": 8.22, "learning_rate": 4.5891861496311495e-05, "loss": 2.4583, "step": 1659000 }, { "epoch": 8.22, "learning_rate": 4.5890625387058264e-05, "loss": 2.4789, "step": 1659500 }, { "epoch": 8.22, "learning_rate": 4.588938680063218e-05, "loss": 2.4496, "step": 1660000 }, { "epoch": 8.23, "learning_rate": 4.58881482142061e-05, "loss": 2.4006, "step": 1660500 }, { "epoch": 8.23, "learning_rate": 4.5886909627780015e-05, "loss": 2.4678, "step": 1661000 }, { "epoch": 8.23, "learning_rate": 4.588567104135393e-05, "loss": 2.4779, "step": 1661500 }, { "epoch": 8.23, "learning_rate": 4.588443245492784e-05, "loss": 2.4671, "step": 1662000 }, { "epoch": 8.24, "learning_rate": 4.588319386850176e-05, "loss": 2.457, "step": 1662500 }, { "epoch": 8.24, "learning_rate": 4.5881955282075675e-05, "loss": 2.466, "step": 1663000 }, { "epoch": 8.24, "learning_rate": 4.588071669564959e-05, "loss": 2.4845, "step": 1663500 }, { "epoch": 8.24, "learning_rate": 4.587947810922351e-05, "loss": 2.4621, "step": 1664000 }, { "epoch": 8.25, "learning_rate": 4.587823952279742e-05, "loss": 2.455, "step": 1664500 }, { "epoch": 8.25, "learning_rate": 4.5877000936371336e-05, "loss": 2.4329, "step": 1665000 }, { "epoch": 8.25, "learning_rate": 4.587576234994525e-05, "loss": 2.4424, "step": 1665500 }, { "epoch": 8.25, "learning_rate": 4.587452376351917e-05, "loss": 2.4667, "step": 1666000 }, { "epoch": 8.26, "learning_rate": 4.587328765426594e-05, "loss": 2.4531, "step": 1666500 }, { "epoch": 8.26, "learning_rate": 4.5872049067839856e-05, "loss": 2.4527, "step": 1667000 }, { "epoch": 8.26, "learning_rate": 4.587081048141377e-05, "loss": 2.4457, "step": 1667500 }, { "epoch": 8.26, "learning_rate": 4.586957189498769e-05, "loss": 2.4557, "step": 1668000 }, { "epoch": 8.27, "learning_rate": 4.586833330856161e-05, "loss": 2.4627, "step": 1668500 }, { "epoch": 8.27, "learning_rate": 4.5867094722135524e-05, "loss": 2.4349, "step": 1669000 }, { "epoch": 8.27, "learning_rate": 4.586585613570944e-05, "loss": 2.4301, "step": 1669500 }, { "epoch": 8.27, "learning_rate": 4.586461754928336e-05, "loss": 2.4527, "step": 1670000 }, { "epoch": 8.28, "learning_rate": 4.5863378962857274e-05, "loss": 2.4991, "step": 1670500 }, { "epoch": 8.28, "learning_rate": 4.5862142853604037e-05, "loss": 2.4676, "step": 1671000 }, { "epoch": 8.28, "learning_rate": 4.5860904267177953e-05, "loss": 2.4682, "step": 1671500 }, { "epoch": 8.28, "learning_rate": 4.585966568075187e-05, "loss": 2.4547, "step": 1672000 }, { "epoch": 8.29, "learning_rate": 4.585842709432579e-05, "loss": 2.454, "step": 1672500 }, { "epoch": 8.29, "learning_rate": 4.5857190985072556e-05, "loss": 2.4367, "step": 1673000 }, { "epoch": 8.29, "learning_rate": 4.585595239864647e-05, "loss": 2.4829, "step": 1673500 }, { "epoch": 8.29, "learning_rate": 4.585471381222039e-05, "loss": 2.4399, "step": 1674000 }, { "epoch": 8.3, "learning_rate": 4.585347522579431e-05, "loss": 2.4632, "step": 1674500 }, { "epoch": 8.3, "learning_rate": 4.5852236639368224e-05, "loss": 2.4526, "step": 1675000 }, { "epoch": 8.3, "learning_rate": 4.585099805294214e-05, "loss": 2.4805, "step": 1675500 }, { "epoch": 8.3, "learning_rate": 4.584976194368891e-05, "loss": 2.4633, "step": 1676000 }, { "epoch": 8.31, "learning_rate": 4.5848523357262826e-05, "loss": 2.4558, "step": 1676500 }, { "epoch": 8.31, "learning_rate": 4.5847284770836743e-05, "loss": 2.4494, "step": 1677000 }, { "epoch": 8.31, "learning_rate": 4.584604618441066e-05, "loss": 2.4412, "step": 1677500 }, { "epoch": 8.31, "learning_rate": 4.584481255233028e-05, "loss": 2.4841, "step": 1678000 }, { "epoch": 8.32, "learning_rate": 4.58435739659042e-05, "loss": 2.4545, "step": 1678500 }, { "epoch": 8.32, "learning_rate": 4.5842335379478115e-05, "loss": 2.4488, "step": 1679000 }, { "epoch": 8.32, "learning_rate": 4.584109679305203e-05, "loss": 2.454, "step": 1679500 }, { "epoch": 8.32, "learning_rate": 4.583985820662595e-05, "loss": 2.4556, "step": 1680000 }, { "epoch": 8.33, "learning_rate": 4.5838619620199866e-05, "loss": 2.4793, "step": 1680500 }, { "epoch": 8.33, "learning_rate": 4.5837381033773776e-05, "loss": 2.4807, "step": 1681000 }, { "epoch": 8.33, "learning_rate": 4.583614244734769e-05, "loss": 2.4468, "step": 1681500 }, { "epoch": 8.33, "learning_rate": 4.583490386092161e-05, "loss": 2.4587, "step": 1682000 }, { "epoch": 8.34, "learning_rate": 4.583366775166838e-05, "loss": 2.4789, "step": 1682500 }, { "epoch": 8.34, "learning_rate": 4.583243164241515e-05, "loss": 2.4712, "step": 1683000 }, { "epoch": 8.34, "learning_rate": 4.5831193055989064e-05, "loss": 2.4732, "step": 1683500 }, { "epoch": 8.34, "learning_rate": 4.582995446956298e-05, "loss": 2.4527, "step": 1684000 }, { "epoch": 8.35, "learning_rate": 4.582871836030975e-05, "loss": 2.4415, "step": 1684500 }, { "epoch": 8.35, "learning_rate": 4.582747977388367e-05, "loss": 2.4473, "step": 1685000 }, { "epoch": 8.35, "learning_rate": 4.582624118745758e-05, "loss": 2.4513, "step": 1685500 }, { "epoch": 8.35, "learning_rate": 4.5825002601031494e-05, "loss": 2.4484, "step": 1686000 }, { "epoch": 8.36, "learning_rate": 4.582376649177826e-05, "loss": 2.454, "step": 1686500 }, { "epoch": 8.36, "learning_rate": 4.582252790535218e-05, "loss": 2.4445, "step": 1687000 }, { "epoch": 8.36, "learning_rate": 4.58212893189261e-05, "loss": 2.4324, "step": 1687500 }, { "epoch": 8.36, "learning_rate": 4.5820050732500014e-05, "loss": 2.4387, "step": 1688000 }, { "epoch": 8.37, "learning_rate": 4.581881214607393e-05, "loss": 2.4302, "step": 1688500 }, { "epoch": 8.37, "learning_rate": 4.581757355964785e-05, "loss": 2.4632, "step": 1689000 }, { "epoch": 8.37, "learning_rate": 4.5816334973221764e-05, "loss": 2.4692, "step": 1689500 }, { "epoch": 8.37, "learning_rate": 4.581509638679568e-05, "loss": 2.4576, "step": 1690000 }, { "epoch": 8.38, "learning_rate": 4.58138578003696e-05, "loss": 2.454, "step": 1690500 }, { "epoch": 8.38, "learning_rate": 4.5812619213943515e-05, "loss": 2.4575, "step": 1691000 }, { "epoch": 8.38, "learning_rate": 4.581138062751743e-05, "loss": 2.4608, "step": 1691500 }, { "epoch": 8.38, "learning_rate": 4.581014204109135e-05, "loss": 2.4546, "step": 1692000 }, { "epoch": 8.39, "learning_rate": 4.580890593183811e-05, "loss": 2.4915, "step": 1692500 }, { "epoch": 8.39, "learning_rate": 4.580766734541203e-05, "loss": 2.4625, "step": 1693000 }, { "epoch": 8.39, "learning_rate": 4.5806428758985945e-05, "loss": 2.4287, "step": 1693500 }, { "epoch": 8.39, "learning_rate": 4.580519017255986e-05, "loss": 2.45, "step": 1694000 }, { "epoch": 8.4, "learning_rate": 4.580395406330663e-05, "loss": 2.4605, "step": 1694500 }, { "epoch": 8.4, "learning_rate": 4.580271547688055e-05, "loss": 2.4412, "step": 1695000 }, { "epoch": 8.4, "learning_rate": 4.5801476890454464e-05, "loss": 2.4722, "step": 1695500 }, { "epoch": 8.4, "learning_rate": 4.580023830402838e-05, "loss": 2.4585, "step": 1696000 }, { "epoch": 8.41, "learning_rate": 4.57989997176023e-05, "loss": 2.4503, "step": 1696500 }, { "epoch": 8.41, "learning_rate": 4.579776360834907e-05, "loss": 2.4558, "step": 1697000 }, { "epoch": 8.41, "learning_rate": 4.5796525021922984e-05, "loss": 2.4635, "step": 1697500 }, { "epoch": 8.41, "learning_rate": 4.5795288912669746e-05, "loss": 2.4488, "step": 1698000 }, { "epoch": 8.41, "learning_rate": 4.579405032624366e-05, "loss": 2.4235, "step": 1698500 }, { "epoch": 8.42, "learning_rate": 4.579281173981758e-05, "loss": 2.471, "step": 1699000 }, { "epoch": 8.42, "learning_rate": 4.5791575630564356e-05, "loss": 2.4608, "step": 1699500 }, { "epoch": 8.42, "learning_rate": 4.579033704413827e-05, "loss": 2.4558, "step": 1700000 }, { "epoch": 8.42, "learning_rate": 4.578909845771218e-05, "loss": 2.4506, "step": 1700500 }, { "epoch": 8.43, "learning_rate": 4.57878598712861e-05, "loss": 2.4858, "step": 1701000 }, { "epoch": 8.43, "learning_rate": 4.5786621284860016e-05, "loss": 2.4645, "step": 1701500 }, { "epoch": 8.43, "learning_rate": 4.5785385175606785e-05, "loss": 2.4548, "step": 1702000 }, { "epoch": 8.43, "learning_rate": 4.57841465891807e-05, "loss": 2.4721, "step": 1702500 }, { "epoch": 8.44, "learning_rate": 4.578290800275462e-05, "loss": 2.4628, "step": 1703000 }, { "epoch": 8.44, "learning_rate": 4.578166941632853e-05, "loss": 2.4656, "step": 1703500 }, { "epoch": 8.44, "learning_rate": 4.5780430829902446e-05, "loss": 2.4542, "step": 1704000 }, { "epoch": 8.44, "learning_rate": 4.577919224347636e-05, "loss": 2.4838, "step": 1704500 }, { "epoch": 8.45, "learning_rate": 4.577795365705028e-05, "loss": 2.4511, "step": 1705000 }, { "epoch": 8.45, "learning_rate": 4.57767150706242e-05, "loss": 2.4663, "step": 1705500 }, { "epoch": 8.45, "learning_rate": 4.5775476484198114e-05, "loss": 2.4657, "step": 1706000 }, { "epoch": 8.45, "learning_rate": 4.577423789777203e-05, "loss": 2.4396, "step": 1706500 }, { "epoch": 8.46, "learning_rate": 4.577299931134595e-05, "loss": 2.4544, "step": 1707000 }, { "epoch": 8.46, "learning_rate": 4.5771760724919865e-05, "loss": 2.4735, "step": 1707500 }, { "epoch": 8.46, "learning_rate": 4.577052213849378e-05, "loss": 2.4564, "step": 1708000 }, { "epoch": 8.46, "learning_rate": 4.57692835520677e-05, "loss": 2.4455, "step": 1708500 }, { "epoch": 8.47, "learning_rate": 4.576804744281447e-05, "loss": 2.4593, "step": 1709000 }, { "epoch": 8.47, "learning_rate": 4.5766808856388384e-05, "loss": 2.468, "step": 1709500 }, { "epoch": 8.47, "learning_rate": 4.57655702699623e-05, "loss": 2.4607, "step": 1710000 }, { "epoch": 8.47, "learning_rate": 4.576433168353622e-05, "loss": 2.4671, "step": 1710500 }, { "epoch": 8.48, "learning_rate": 4.5763093097110135e-05, "loss": 2.4854, "step": 1711000 }, { "epoch": 8.48, "learning_rate": 4.576185451068405e-05, "loss": 2.4792, "step": 1711500 }, { "epoch": 8.48, "learning_rate": 4.576061592425797e-05, "loss": 2.4591, "step": 1712000 }, { "epoch": 8.48, "learning_rate": 4.575937733783188e-05, "loss": 2.464, "step": 1712500 }, { "epoch": 8.49, "learning_rate": 4.5758138751405796e-05, "loss": 2.4626, "step": 1713000 }, { "epoch": 8.49, "learning_rate": 4.575690016497971e-05, "loss": 2.4861, "step": 1713500 }, { "epoch": 8.49, "learning_rate": 4.5755666532899334e-05, "loss": 2.4691, "step": 1714000 }, { "epoch": 8.49, "learning_rate": 4.575442794647325e-05, "loss": 2.4656, "step": 1714500 }, { "epoch": 8.5, "learning_rate": 4.575318936004717e-05, "loss": 2.4999, "step": 1715000 }, { "epoch": 8.5, "learning_rate": 4.5751950773621084e-05, "loss": 2.4376, "step": 1715500 }, { "epoch": 8.5, "learning_rate": 4.5750712187195e-05, "loss": 2.4519, "step": 1716000 }, { "epoch": 8.5, "learning_rate": 4.5749476077941763e-05, "loss": 2.4505, "step": 1716500 }, { "epoch": 8.51, "learning_rate": 4.574823749151568e-05, "loss": 2.4783, "step": 1717000 }, { "epoch": 8.51, "learning_rate": 4.57469989050896e-05, "loss": 2.437, "step": 1717500 }, { "epoch": 8.51, "learning_rate": 4.5745760318663514e-05, "loss": 2.4739, "step": 1718000 }, { "epoch": 8.51, "learning_rate": 4.574452420941029e-05, "loss": 2.4648, "step": 1718500 }, { "epoch": 8.52, "learning_rate": 4.574328562298421e-05, "loss": 2.4603, "step": 1719000 }, { "epoch": 8.52, "learning_rate": 4.574204703655812e-05, "loss": 2.4343, "step": 1719500 }, { "epoch": 8.52, "learning_rate": 4.5740808450132034e-05, "loss": 2.4705, "step": 1720000 }, { "epoch": 8.52, "learning_rate": 4.573956986370595e-05, "loss": 2.4513, "step": 1720500 }, { "epoch": 8.53, "learning_rate": 4.573833127727987e-05, "loss": 2.4702, "step": 1721000 }, { "epoch": 8.53, "learning_rate": 4.5737092690853785e-05, "loss": 2.4558, "step": 1721500 }, { "epoch": 8.53, "learning_rate": 4.57358541044277e-05, "loss": 2.4708, "step": 1722000 }, { "epoch": 8.53, "learning_rate": 4.573461551800162e-05, "loss": 2.4531, "step": 1722500 }, { "epoch": 8.54, "learning_rate": 4.5733376931575535e-05, "loss": 2.4685, "step": 1723000 }, { "epoch": 8.54, "learning_rate": 4.573213834514945e-05, "loss": 2.4622, "step": 1723500 }, { "epoch": 8.54, "learning_rate": 4.573089975872337e-05, "loss": 2.4663, "step": 1724000 }, { "epoch": 8.54, "learning_rate": 4.572966364947013e-05, "loss": 2.4865, "step": 1724500 }, { "epoch": 8.55, "learning_rate": 4.572842506304405e-05, "loss": 2.4635, "step": 1725000 }, { "epoch": 8.55, "learning_rate": 4.5727186476617965e-05, "loss": 2.4426, "step": 1725500 }, { "epoch": 8.55, "learning_rate": 4.572594789019188e-05, "loss": 2.4789, "step": 1726000 }, { "epoch": 8.55, "learning_rate": 4.57247093037658e-05, "loss": 2.4653, "step": 1726500 }, { "epoch": 8.56, "learning_rate": 4.5723470717339716e-05, "loss": 2.4574, "step": 1727000 }, { "epoch": 8.56, "learning_rate": 4.572223213091363e-05, "loss": 2.447, "step": 1727500 }, { "epoch": 8.56, "learning_rate": 4.572099354448755e-05, "loss": 2.4368, "step": 1728000 }, { "epoch": 8.56, "learning_rate": 4.571975495806147e-05, "loss": 2.4754, "step": 1728500 }, { "epoch": 8.57, "learning_rate": 4.5718516371635384e-05, "loss": 2.4884, "step": 1729000 }, { "epoch": 8.57, "learning_rate": 4.57172777852093e-05, "loss": 2.4793, "step": 1729500 }, { "epoch": 8.57, "learning_rate": 4.571603919878322e-05, "loss": 2.4638, "step": 1730000 }, { "epoch": 8.57, "learning_rate": 4.5714803089529986e-05, "loss": 2.4545, "step": 1730500 }, { "epoch": 8.58, "learning_rate": 4.57135645031039e-05, "loss": 2.4627, "step": 1731000 }, { "epoch": 8.58, "learning_rate": 4.571232591667782e-05, "loss": 2.46, "step": 1731500 }, { "epoch": 8.58, "learning_rate": 4.571108733025174e-05, "loss": 2.4664, "step": 1732000 }, { "epoch": 8.58, "learning_rate": 4.570985369817135e-05, "loss": 2.468, "step": 1732500 }, { "epoch": 8.59, "learning_rate": 4.570861511174527e-05, "loss": 2.4468, "step": 1733000 }, { "epoch": 8.59, "learning_rate": 4.5707376525319185e-05, "loss": 2.4756, "step": 1733500 }, { "epoch": 8.59, "learning_rate": 4.57061379388931e-05, "loss": 2.4659, "step": 1734000 }, { "epoch": 8.59, "learning_rate": 4.570489935246702e-05, "loss": 2.4663, "step": 1734500 }, { "epoch": 8.6, "learning_rate": 4.570366324321379e-05, "loss": 2.4561, "step": 1735000 }, { "epoch": 8.6, "learning_rate": 4.5702424656787704e-05, "loss": 2.4384, "step": 1735500 }, { "epoch": 8.6, "learning_rate": 4.5701186070361615e-05, "loss": 2.4622, "step": 1736000 }, { "epoch": 8.6, "learning_rate": 4.569994748393553e-05, "loss": 2.4654, "step": 1736500 }, { "epoch": 8.61, "learning_rate": 4.569870889750945e-05, "loss": 2.4673, "step": 1737000 }, { "epoch": 8.61, "learning_rate": 4.5697472788256224e-05, "loss": 2.4546, "step": 1737500 }, { "epoch": 8.61, "learning_rate": 4.569623667900299e-05, "loss": 2.4721, "step": 1738000 }, { "epoch": 8.61, "learning_rate": 4.569499809257691e-05, "loss": 2.4357, "step": 1738500 }, { "epoch": 8.62, "learning_rate": 4.569375950615083e-05, "loss": 2.4688, "step": 1739000 }, { "epoch": 8.62, "learning_rate": 4.5692520919724744e-05, "loss": 2.4617, "step": 1739500 }, { "epoch": 8.62, "learning_rate": 4.569128233329866e-05, "loss": 2.4783, "step": 1740000 }, { "epoch": 8.62, "learning_rate": 4.569004374687257e-05, "loss": 2.4474, "step": 1740500 }, { "epoch": 8.63, "learning_rate": 4.568880516044649e-05, "loss": 2.476, "step": 1741000 }, { "epoch": 8.63, "learning_rate": 4.5687566574020405e-05, "loss": 2.4684, "step": 1741500 }, { "epoch": 8.63, "learning_rate": 4.568632798759432e-05, "loss": 2.4567, "step": 1742000 }, { "epoch": 8.63, "learning_rate": 4.568508940116824e-05, "loss": 2.4522, "step": 1742500 }, { "epoch": 8.64, "learning_rate": 4.568385081474215e-05, "loss": 2.4613, "step": 1743000 }, { "epoch": 8.64, "learning_rate": 4.5682612228316065e-05, "loss": 2.481, "step": 1743500 }, { "epoch": 8.64, "learning_rate": 4.568137611906284e-05, "loss": 2.4624, "step": 1744000 }, { "epoch": 8.64, "learning_rate": 4.568013753263675e-05, "loss": 2.4642, "step": 1744500 }, { "epoch": 8.65, "learning_rate": 4.567889894621067e-05, "loss": 2.4412, "step": 1745000 }, { "epoch": 8.65, "learning_rate": 4.5677660359784585e-05, "loss": 2.4577, "step": 1745500 }, { "epoch": 8.65, "learning_rate": 4.56764217733585e-05, "loss": 2.4878, "step": 1746000 }, { "epoch": 8.65, "learning_rate": 4.567518566410528e-05, "loss": 2.4542, "step": 1746500 }, { "epoch": 8.66, "learning_rate": 4.567394707767919e-05, "loss": 2.466, "step": 1747000 }, { "epoch": 8.66, "learning_rate": 4.5672708491253105e-05, "loss": 2.4531, "step": 1747500 }, { "epoch": 8.66, "learning_rate": 4.567146990482702e-05, "loss": 2.4706, "step": 1748000 }, { "epoch": 8.66, "learning_rate": 4.567023131840094e-05, "loss": 2.4771, "step": 1748500 }, { "epoch": 8.67, "learning_rate": 4.5668992731974855e-05, "loss": 2.4667, "step": 1749000 }, { "epoch": 8.67, "learning_rate": 4.5667754145548766e-05, "loss": 2.4672, "step": 1749500 }, { "epoch": 8.67, "learning_rate": 4.566651803629554e-05, "loss": 2.4299, "step": 1750000 }, { "epoch": 8.67, "learning_rate": 4.566527944986945e-05, "loss": 2.4456, "step": 1750500 }, { "epoch": 8.68, "learning_rate": 4.566404334061623e-05, "loss": 2.4839, "step": 1751000 }, { "epoch": 8.68, "learning_rate": 4.5662804754190144e-05, "loss": 2.453, "step": 1751500 }, { "epoch": 8.68, "learning_rate": 4.5661568644936906e-05, "loss": 2.4527, "step": 1752000 }, { "epoch": 8.68, "learning_rate": 4.566033005851082e-05, "loss": 2.4506, "step": 1752500 }, { "epoch": 8.68, "learning_rate": 4.565909147208474e-05, "loss": 2.4345, "step": 1753000 }, { "epoch": 8.69, "learning_rate": 4.565785288565866e-05, "loss": 2.4588, "step": 1753500 }, { "epoch": 8.69, "learning_rate": 4.5656614299232574e-05, "loss": 2.4735, "step": 1754000 }, { "epoch": 8.69, "learning_rate": 4.565537571280649e-05, "loss": 2.4679, "step": 1754500 }, { "epoch": 8.69, "learning_rate": 4.565413712638041e-05, "loss": 2.4841, "step": 1755000 }, { "epoch": 8.7, "learning_rate": 4.5652898539954324e-05, "loss": 2.4871, "step": 1755500 }, { "epoch": 8.7, "learning_rate": 4.565165995352824e-05, "loss": 2.4499, "step": 1756000 }, { "epoch": 8.7, "learning_rate": 4.565042136710216e-05, "loss": 2.4511, "step": 1756500 }, { "epoch": 8.7, "learning_rate": 4.564918278067607e-05, "loss": 2.4915, "step": 1757000 }, { "epoch": 8.71, "learning_rate": 4.5647944194249985e-05, "loss": 2.4732, "step": 1757500 }, { "epoch": 8.71, "learning_rate": 4.56467056078239e-05, "loss": 2.4507, "step": 1758000 }, { "epoch": 8.71, "learning_rate": 4.564546702139782e-05, "loss": 2.4399, "step": 1758500 }, { "epoch": 8.71, "learning_rate": 4.5644228434971736e-05, "loss": 2.4765, "step": 1759000 }, { "epoch": 8.72, "learning_rate": 4.564299480289136e-05, "loss": 2.4684, "step": 1759500 }, { "epoch": 8.72, "learning_rate": 4.5641756216465274e-05, "loss": 2.4655, "step": 1760000 }, { "epoch": 8.72, "learning_rate": 4.564051763003919e-05, "loss": 2.4645, "step": 1760500 }, { "epoch": 8.72, "learning_rate": 4.563927904361311e-05, "loss": 2.4831, "step": 1761000 }, { "epoch": 8.73, "learning_rate": 4.5638040457187025e-05, "loss": 2.4761, "step": 1761500 }, { "epoch": 8.73, "learning_rate": 4.563680187076094e-05, "loss": 2.4824, "step": 1762000 }, { "epoch": 8.73, "learning_rate": 4.563556328433486e-05, "loss": 2.4461, "step": 1762500 }, { "epoch": 8.73, "learning_rate": 4.563432469790877e-05, "loss": 2.4469, "step": 1763000 }, { "epoch": 8.74, "learning_rate": 4.5633086111482685e-05, "loss": 2.4606, "step": 1763500 }, { "epoch": 8.74, "learning_rate": 4.56318475250566e-05, "loss": 2.4706, "step": 1764000 }, { "epoch": 8.74, "learning_rate": 4.563061141580338e-05, "loss": 2.4447, "step": 1764500 }, { "epoch": 8.74, "learning_rate": 4.562937530655014e-05, "loss": 2.4601, "step": 1765000 }, { "epoch": 8.75, "learning_rate": 4.562813672012406e-05, "loss": 2.4485, "step": 1765500 }, { "epoch": 8.75, "learning_rate": 4.5626898133697974e-05, "loss": 2.4624, "step": 1766000 }, { "epoch": 8.75, "learning_rate": 4.562565954727189e-05, "loss": 2.5026, "step": 1766500 }, { "epoch": 8.75, "learning_rate": 4.562442096084581e-05, "loss": 2.4555, "step": 1767000 }, { "epoch": 8.76, "learning_rate": 4.5623182374419725e-05, "loss": 2.4672, "step": 1767500 }, { "epoch": 8.76, "learning_rate": 4.562194378799364e-05, "loss": 2.4704, "step": 1768000 }, { "epoch": 8.76, "learning_rate": 4.5620712633086114e-05, "loss": 2.5025, "step": 1768500 }, { "epoch": 8.76, "learning_rate": 4.561947404666003e-05, "loss": 2.4781, "step": 1769000 }, { "epoch": 8.77, "learning_rate": 4.561823546023395e-05, "loss": 2.4675, "step": 1769500 }, { "epoch": 8.77, "learning_rate": 4.561699687380786e-05, "loss": 2.4574, "step": 1770000 }, { "epoch": 8.77, "learning_rate": 4.5615758287381775e-05, "loss": 2.4715, "step": 1770500 }, { "epoch": 8.77, "learning_rate": 4.561451970095569e-05, "loss": 2.4416, "step": 1771000 }, { "epoch": 8.78, "learning_rate": 4.561328111452961e-05, "loss": 2.4696, "step": 1771500 }, { "epoch": 8.78, "learning_rate": 4.5612042528103526e-05, "loss": 2.4609, "step": 1772000 }, { "epoch": 8.78, "learning_rate": 4.561080394167744e-05, "loss": 2.4613, "step": 1772500 }, { "epoch": 8.78, "learning_rate": 4.560956535525136e-05, "loss": 2.4924, "step": 1773000 }, { "epoch": 8.79, "learning_rate": 4.560832676882528e-05, "loss": 2.4574, "step": 1773500 }, { "epoch": 8.79, "learning_rate": 4.5607088182399194e-05, "loss": 2.479, "step": 1774000 }, { "epoch": 8.79, "learning_rate": 4.560584959597311e-05, "loss": 2.4495, "step": 1774500 }, { "epoch": 8.79, "learning_rate": 4.560461100954703e-05, "loss": 2.4675, "step": 1775000 }, { "epoch": 8.8, "learning_rate": 4.5603372423120944e-05, "loss": 2.472, "step": 1775500 }, { "epoch": 8.8, "learning_rate": 4.560213631386771e-05, "loss": 2.4506, "step": 1776000 }, { "epoch": 8.8, "learning_rate": 4.560089772744163e-05, "loss": 2.453, "step": 1776500 }, { "epoch": 8.8, "learning_rate": 4.559965914101555e-05, "loss": 2.4638, "step": 1777000 }, { "epoch": 8.81, "learning_rate": 4.559842055458946e-05, "loss": 2.4947, "step": 1777500 }, { "epoch": 8.81, "learning_rate": 4.5597181968163374e-05, "loss": 2.4435, "step": 1778000 }, { "epoch": 8.81, "learning_rate": 4.559594338173729e-05, "loss": 2.4739, "step": 1778500 }, { "epoch": 8.81, "learning_rate": 4.559470727248406e-05, "loss": 2.435, "step": 1779000 }, { "epoch": 8.82, "learning_rate": 4.559346868605798e-05, "loss": 2.4844, "step": 1779500 }, { "epoch": 8.82, "learning_rate": 4.5592230099631894e-05, "loss": 2.4432, "step": 1780000 }, { "epoch": 8.82, "learning_rate": 4.559099151320581e-05, "loss": 2.4639, "step": 1780500 }, { "epoch": 8.82, "learning_rate": 4.558975540395258e-05, "loss": 2.4533, "step": 1781000 }, { "epoch": 8.83, "learning_rate": 4.558851929469935e-05, "loss": 2.4545, "step": 1781500 }, { "epoch": 8.83, "learning_rate": 4.5587280708273265e-05, "loss": 2.4481, "step": 1782000 }, { "epoch": 8.83, "learning_rate": 4.5586042121847175e-05, "loss": 2.4613, "step": 1782500 }, { "epoch": 8.83, "learning_rate": 4.558480353542109e-05, "loss": 2.4775, "step": 1783000 }, { "epoch": 8.84, "learning_rate": 4.558356494899501e-05, "loss": 2.4433, "step": 1783500 }, { "epoch": 8.84, "learning_rate": 4.5582326362568926e-05, "loss": 2.4653, "step": 1784000 }, { "epoch": 8.84, "learning_rate": 4.558108777614284e-05, "loss": 2.4877, "step": 1784500 }, { "epoch": 8.84, "learning_rate": 4.557984918971676e-05, "loss": 2.4707, "step": 1785000 }, { "epoch": 8.85, "learning_rate": 4.557861060329068e-05, "loss": 2.4879, "step": 1785500 }, { "epoch": 8.85, "learning_rate": 4.5577372016864594e-05, "loss": 2.4454, "step": 1786000 }, { "epoch": 8.85, "learning_rate": 4.557613343043851e-05, "loss": 2.458, "step": 1786500 }, { "epoch": 8.85, "learning_rate": 4.557489732118528e-05, "loss": 2.4683, "step": 1787000 }, { "epoch": 8.86, "learning_rate": 4.5573658734759196e-05, "loss": 2.4623, "step": 1787500 }, { "epoch": 8.86, "learning_rate": 4.5572420148333113e-05, "loss": 2.4821, "step": 1788000 }, { "epoch": 8.86, "learning_rate": 4.557118156190703e-05, "loss": 2.456, "step": 1788500 }, { "epoch": 8.86, "learning_rate": 4.556994297548095e-05, "loss": 2.4476, "step": 1789000 }, { "epoch": 8.87, "learning_rate": 4.5568704389054864e-05, "loss": 2.4679, "step": 1789500 }, { "epoch": 8.87, "learning_rate": 4.556746580262878e-05, "loss": 2.4734, "step": 1790000 }, { "epoch": 8.87, "learning_rate": 4.55662272162027e-05, "loss": 2.4734, "step": 1790500 }, { "epoch": 8.87, "learning_rate": 4.556498862977661e-05, "loss": 2.4945, "step": 1791000 }, { "epoch": 8.88, "learning_rate": 4.5563750043350525e-05, "loss": 2.4307, "step": 1791500 }, { "epoch": 8.88, "learning_rate": 4.5562513934097294e-05, "loss": 2.4534, "step": 1792000 }, { "epoch": 8.88, "learning_rate": 4.556127534767121e-05, "loss": 2.4577, "step": 1792500 }, { "epoch": 8.88, "learning_rate": 4.556003676124513e-05, "loss": 2.4634, "step": 1793000 }, { "epoch": 8.89, "learning_rate": 4.5558798174819045e-05, "loss": 2.4808, "step": 1793500 }, { "epoch": 8.89, "learning_rate": 4.555755958839296e-05, "loss": 2.4439, "step": 1794000 }, { "epoch": 8.89, "learning_rate": 4.555632100196688e-05, "loss": 2.4453, "step": 1794500 }, { "epoch": 8.89, "learning_rate": 4.555508489271365e-05, "loss": 2.442, "step": 1795000 }, { "epoch": 8.9, "learning_rate": 4.5553846306287564e-05, "loss": 2.4453, "step": 1795500 }, { "epoch": 8.9, "learning_rate": 4.555260771986148e-05, "loss": 2.4428, "step": 1796000 }, { "epoch": 8.9, "learning_rate": 4.555137161060824e-05, "loss": 2.48, "step": 1796500 }, { "epoch": 8.9, "learning_rate": 4.555013302418216e-05, "loss": 2.4658, "step": 1797000 }, { "epoch": 8.91, "learning_rate": 4.554889443775608e-05, "loss": 2.4606, "step": 1797500 }, { "epoch": 8.91, "learning_rate": 4.5547655851329994e-05, "loss": 2.449, "step": 1798000 }, { "epoch": 8.91, "learning_rate": 4.554641726490391e-05, "loss": 2.4647, "step": 1798500 }, { "epoch": 8.91, "learning_rate": 4.554517867847783e-05, "loss": 2.4644, "step": 1799000 }, { "epoch": 8.92, "learning_rate": 4.5543940092051745e-05, "loss": 2.4737, "step": 1799500 }, { "epoch": 8.92, "learning_rate": 4.554270150562566e-05, "loss": 2.491, "step": 1800000 }, { "epoch": 8.92, "learning_rate": 4.554146291919958e-05, "loss": 2.4842, "step": 1800500 }, { "epoch": 8.92, "learning_rate": 4.55402292871192e-05, "loss": 2.4706, "step": 1801000 }, { "epoch": 8.93, "learning_rate": 4.553899070069311e-05, "loss": 2.4853, "step": 1801500 }, { "epoch": 8.93, "learning_rate": 4.5537752114267026e-05, "loss": 2.4699, "step": 1802000 }, { "epoch": 8.93, "learning_rate": 4.5536513527840943e-05, "loss": 2.4815, "step": 1802500 }, { "epoch": 8.93, "learning_rate": 4.553527494141486e-05, "loss": 2.458, "step": 1803000 }, { "epoch": 8.94, "learning_rate": 4.553403635498878e-05, "loss": 2.4418, "step": 1803500 }, { "epoch": 8.94, "learning_rate": 4.5532797768562694e-05, "loss": 2.4691, "step": 1804000 }, { "epoch": 8.94, "learning_rate": 4.553155918213661e-05, "loss": 2.4687, "step": 1804500 }, { "epoch": 8.94, "learning_rate": 4.553032059571053e-05, "loss": 2.4617, "step": 1805000 }, { "epoch": 8.95, "learning_rate": 4.55290844864573e-05, "loss": 2.491, "step": 1805500 }, { "epoch": 8.95, "learning_rate": 4.5527845900031214e-05, "loss": 2.4481, "step": 1806000 }, { "epoch": 8.95, "learning_rate": 4.552660731360513e-05, "loss": 2.447, "step": 1806500 }, { "epoch": 8.95, "learning_rate": 4.552536872717905e-05, "loss": 2.4697, "step": 1807000 }, { "epoch": 8.95, "learning_rate": 4.5524132617925816e-05, "loss": 2.4696, "step": 1807500 }, { "epoch": 8.96, "learning_rate": 4.5522894031499727e-05, "loss": 2.4883, "step": 1808000 }, { "epoch": 8.96, "learning_rate": 4.5521655445073644e-05, "loss": 2.429, "step": 1808500 }, { "epoch": 8.96, "learning_rate": 4.552041685864756e-05, "loss": 2.4636, "step": 1809000 }, { "epoch": 8.96, "learning_rate": 4.5519180749394336e-05, "loss": 2.4663, "step": 1809500 }, { "epoch": 8.97, "learning_rate": 4.5517944640141105e-05, "loss": 2.4526, "step": 1810000 }, { "epoch": 8.97, "learning_rate": 4.551670605371502e-05, "loss": 2.4824, "step": 1810500 }, { "epoch": 8.97, "learning_rate": 4.551546746728894e-05, "loss": 2.4671, "step": 1811000 }, { "epoch": 8.97, "learning_rate": 4.5514228880862856e-05, "loss": 2.4767, "step": 1811500 }, { "epoch": 8.98, "learning_rate": 4.551299029443677e-05, "loss": 2.4661, "step": 1812000 }, { "epoch": 8.98, "learning_rate": 4.551175170801068e-05, "loss": 2.4633, "step": 1812500 }, { "epoch": 8.98, "learning_rate": 4.55105131215846e-05, "loss": 2.4379, "step": 1813000 }, { "epoch": 8.98, "learning_rate": 4.5509274535158517e-05, "loss": 2.4667, "step": 1813500 }, { "epoch": 8.99, "learning_rate": 4.5508035948732434e-05, "loss": 2.4808, "step": 1814000 }, { "epoch": 8.99, "learning_rate": 4.55067998394792e-05, "loss": 2.4708, "step": 1814500 }, { "epoch": 8.99, "learning_rate": 4.550556125305312e-05, "loss": 2.4644, "step": 1815000 }, { "epoch": 8.99, "learning_rate": 4.5504322666627036e-05, "loss": 2.4861, "step": 1815500 }, { "epoch": 9.0, "learning_rate": 4.550308408020095e-05, "loss": 2.4598, "step": 1816000 }, { "epoch": 9.0, "learning_rate": 4.550184549377487e-05, "loss": 2.4818, "step": 1816500 }, { "epoch": 9.0, "eval_accuracy": 0.6403843342409302, "eval_accuracy_mlm": 0.5934790457919371, "eval_accuracy_nsp": 0.8615189108837107, "eval_loss": 2.434657096862793, "eval_runtime": 145.7954, "eval_samples_per_second": 1748.745, "eval_steps_per_second": 72.869, "step": 1816587 }, { "epoch": 9.0, "learning_rate": 4.550060938452164e-05, "loss": 2.424, "step": 1817000 }, { "epoch": 9.0, "learning_rate": 4.5499370798095556e-05, "loss": 2.426, "step": 1817500 }, { "epoch": 9.01, "learning_rate": 4.549813221166947e-05, "loss": 2.4208, "step": 1818000 }, { "epoch": 9.01, "learning_rate": 4.549689362524339e-05, "loss": 2.4265, "step": 1818500 }, { "epoch": 9.01, "learning_rate": 4.549565751599015e-05, "loss": 2.4424, "step": 1819000 }, { "epoch": 9.01, "learning_rate": 4.549441892956407e-05, "loss": 2.4181, "step": 1819500 }, { "epoch": 9.02, "learning_rate": 4.5493180343137986e-05, "loss": 2.4305, "step": 1820000 }, { "epoch": 9.02, "learning_rate": 4.54919417567119e-05, "loss": 2.4542, "step": 1820500 }, { "epoch": 9.02, "learning_rate": 4.549070317028582e-05, "loss": 2.4495, "step": 1821000 }, { "epoch": 9.02, "learning_rate": 4.5489464583859736e-05, "loss": 2.4497, "step": 1821500 }, { "epoch": 9.03, "learning_rate": 4.5488228474606505e-05, "loss": 2.442, "step": 1822000 }, { "epoch": 9.03, "learning_rate": 4.548698988818042e-05, "loss": 2.4593, "step": 1822500 }, { "epoch": 9.03, "learning_rate": 4.548575130175434e-05, "loss": 2.4229, "step": 1823000 }, { "epoch": 9.03, "learning_rate": 4.5484512715328256e-05, "loss": 2.4115, "step": 1823500 }, { "epoch": 9.04, "learning_rate": 4.548327412890217e-05, "loss": 2.4466, "step": 1824000 }, { "epoch": 9.04, "learning_rate": 4.548203554247609e-05, "loss": 2.4517, "step": 1824500 }, { "epoch": 9.04, "learning_rate": 4.548079695605001e-05, "loss": 2.4276, "step": 1825000 }, { "epoch": 9.04, "learning_rate": 4.5479558369623924e-05, "loss": 2.4327, "step": 1825500 }, { "epoch": 9.05, "learning_rate": 4.5478319783197834e-05, "loss": 2.4248, "step": 1826000 }, { "epoch": 9.05, "learning_rate": 4.547708119677175e-05, "loss": 2.4232, "step": 1826500 }, { "epoch": 9.05, "learning_rate": 4.547584508751852e-05, "loss": 2.4336, "step": 1827000 }, { "epoch": 9.05, "learning_rate": 4.547460897826529e-05, "loss": 2.4489, "step": 1827500 }, { "epoch": 9.06, "learning_rate": 4.5473370391839205e-05, "loss": 2.4391, "step": 1828000 }, { "epoch": 9.06, "learning_rate": 4.5472134282585974e-05, "loss": 2.4429, "step": 1828500 }, { "epoch": 9.06, "learning_rate": 4.5470895696159884e-05, "loss": 2.4328, "step": 1829000 }, { "epoch": 9.06, "learning_rate": 4.54696571097338e-05, "loss": 2.4022, "step": 1829500 }, { "epoch": 9.07, "learning_rate": 4.546841852330772e-05, "loss": 2.4167, "step": 1830000 }, { "epoch": 9.07, "learning_rate": 4.5467179936881635e-05, "loss": 2.4667, "step": 1830500 }, { "epoch": 9.07, "learning_rate": 4.546594135045555e-05, "loss": 2.4303, "step": 1831000 }, { "epoch": 9.07, "learning_rate": 4.546470276402947e-05, "loss": 2.4245, "step": 1831500 }, { "epoch": 9.08, "learning_rate": 4.5463464177603386e-05, "loss": 2.4439, "step": 1832000 }, { "epoch": 9.08, "learning_rate": 4.54622255911773e-05, "loss": 2.447, "step": 1832500 }, { "epoch": 9.08, "learning_rate": 4.546098700475122e-05, "loss": 2.4434, "step": 1833000 }, { "epoch": 9.08, "learning_rate": 4.5459748418325137e-05, "loss": 2.4311, "step": 1833500 }, { "epoch": 9.09, "learning_rate": 4.5458509831899053e-05, "loss": 2.4513, "step": 1834000 }, { "epoch": 9.09, "learning_rate": 4.545727372264582e-05, "loss": 2.4607, "step": 1834500 }, { "epoch": 9.09, "learning_rate": 4.545603513621974e-05, "loss": 2.4291, "step": 1835000 }, { "epoch": 9.09, "learning_rate": 4.5454796549793656e-05, "loss": 2.4439, "step": 1835500 }, { "epoch": 9.1, "learning_rate": 4.545355796336757e-05, "loss": 2.4741, "step": 1836000 }, { "epoch": 9.1, "learning_rate": 4.5452321854114335e-05, "loss": 2.4591, "step": 1836500 }, { "epoch": 9.1, "learning_rate": 4.545108326768825e-05, "loss": 2.4535, "step": 1837000 }, { "epoch": 9.1, "learning_rate": 4.544984468126217e-05, "loss": 2.4464, "step": 1837500 }, { "epoch": 9.11, "learning_rate": 4.5448606094836086e-05, "loss": 2.4405, "step": 1838000 }, { "epoch": 9.11, "learning_rate": 4.544736750841e-05, "loss": 2.4423, "step": 1838500 }, { "epoch": 9.11, "learning_rate": 4.544612892198392e-05, "loss": 2.4383, "step": 1839000 }, { "epoch": 9.11, "learning_rate": 4.544489033555784e-05, "loss": 2.4421, "step": 1839500 }, { "epoch": 9.12, "learning_rate": 4.5443651749131754e-05, "loss": 2.4476, "step": 1840000 }, { "epoch": 9.12, "learning_rate": 4.544241316270567e-05, "loss": 2.4121, "step": 1840500 }, { "epoch": 9.12, "learning_rate": 4.544117457627959e-05, "loss": 2.4078, "step": 1841000 }, { "epoch": 9.12, "learning_rate": 4.5439935989853504e-05, "loss": 2.4349, "step": 1841500 }, { "epoch": 9.13, "learning_rate": 4.543869988060027e-05, "loss": 2.4562, "step": 1842000 }, { "epoch": 9.13, "learning_rate": 4.543746129417419e-05, "loss": 2.404, "step": 1842500 }, { "epoch": 9.13, "learning_rate": 4.543622270774811e-05, "loss": 2.4584, "step": 1843000 }, { "epoch": 9.13, "learning_rate": 4.543498659849487e-05, "loss": 2.4404, "step": 1843500 }, { "epoch": 9.14, "learning_rate": 4.5433748012068786e-05, "loss": 2.4358, "step": 1844000 }, { "epoch": 9.14, "learning_rate": 4.54325094256427e-05, "loss": 2.4061, "step": 1844500 }, { "epoch": 9.14, "learning_rate": 4.543127083921662e-05, "loss": 2.4233, "step": 1845000 }, { "epoch": 9.14, "learning_rate": 4.543003472996339e-05, "loss": 2.4524, "step": 1845500 }, { "epoch": 9.15, "learning_rate": 4.542879862071016e-05, "loss": 2.4392, "step": 1846000 }, { "epoch": 9.15, "learning_rate": 4.5427562511456926e-05, "loss": 2.4372, "step": 1846500 }, { "epoch": 9.15, "learning_rate": 4.542632392503084e-05, "loss": 2.4474, "step": 1847000 }, { "epoch": 9.15, "learning_rate": 4.542508533860476e-05, "loss": 2.4468, "step": 1847500 }, { "epoch": 9.16, "learning_rate": 4.542384675217868e-05, "loss": 2.4432, "step": 1848000 }, { "epoch": 9.16, "learning_rate": 4.5422608165752594e-05, "loss": 2.4501, "step": 1848500 }, { "epoch": 9.16, "learning_rate": 4.5421369579326504e-05, "loss": 2.4495, "step": 1849000 }, { "epoch": 9.16, "learning_rate": 4.542013099290042e-05, "loss": 2.4418, "step": 1849500 }, { "epoch": 9.17, "learning_rate": 4.541889240647434e-05, "loss": 2.4484, "step": 1850000 }, { "epoch": 9.17, "learning_rate": 4.5417653820048255e-05, "loss": 2.4385, "step": 1850500 }, { "epoch": 9.17, "learning_rate": 4.541641523362217e-05, "loss": 2.4258, "step": 1851000 }, { "epoch": 9.17, "learning_rate": 4.541517664719609e-05, "loss": 2.4504, "step": 1851500 }, { "epoch": 9.18, "learning_rate": 4.5413938060770006e-05, "loss": 2.458, "step": 1852000 }, { "epoch": 9.18, "learning_rate": 4.541269947434392e-05, "loss": 2.4314, "step": 1852500 }, { "epoch": 9.18, "learning_rate": 4.541146088791784e-05, "loss": 2.436, "step": 1853000 }, { "epoch": 9.18, "learning_rate": 4.5410222301491757e-05, "loss": 2.426, "step": 1853500 }, { "epoch": 9.19, "learning_rate": 4.5408983715065673e-05, "loss": 2.435, "step": 1854000 }, { "epoch": 9.19, "learning_rate": 4.540774512863959e-05, "loss": 2.4389, "step": 1854500 }, { "epoch": 9.19, "learning_rate": 4.540650901938636e-05, "loss": 2.461, "step": 1855000 }, { "epoch": 9.19, "learning_rate": 4.5405270432960276e-05, "loss": 2.4472, "step": 1855500 }, { "epoch": 9.2, "learning_rate": 4.540403432370704e-05, "loss": 2.4388, "step": 1856000 }, { "epoch": 9.2, "learning_rate": 4.5402795737280955e-05, "loss": 2.4427, "step": 1856500 }, { "epoch": 9.2, "learning_rate": 4.540155715085487e-05, "loss": 2.4563, "step": 1857000 }, { "epoch": 9.2, "learning_rate": 4.540032104160165e-05, "loss": 2.4632, "step": 1857500 }, { "epoch": 9.21, "learning_rate": 4.539908245517556e-05, "loss": 2.4482, "step": 1858000 }, { "epoch": 9.21, "learning_rate": 4.5397843868749475e-05, "loss": 2.4332, "step": 1858500 }, { "epoch": 9.21, "learning_rate": 4.539660528232339e-05, "loss": 2.4423, "step": 1859000 }, { "epoch": 9.21, "learning_rate": 4.539536669589731e-05, "loss": 2.4584, "step": 1859500 }, { "epoch": 9.22, "learning_rate": 4.5394128109471225e-05, "loss": 2.4123, "step": 1860000 }, { "epoch": 9.22, "learning_rate": 4.539288952304514e-05, "loss": 2.4395, "step": 1860500 }, { "epoch": 9.22, "learning_rate": 4.539165093661906e-05, "loss": 2.4833, "step": 1861000 }, { "epoch": 9.22, "learning_rate": 4.5390412350192976e-05, "loss": 2.4595, "step": 1861500 }, { "epoch": 9.22, "learning_rate": 4.538917376376689e-05, "loss": 2.4364, "step": 1862000 }, { "epoch": 9.23, "learning_rate": 4.538793517734081e-05, "loss": 2.4205, "step": 1862500 }, { "epoch": 9.23, "learning_rate": 4.538669659091472e-05, "loss": 2.4233, "step": 1863000 }, { "epoch": 9.23, "learning_rate": 4.538545800448864e-05, "loss": 2.4394, "step": 1863500 }, { "epoch": 9.23, "learning_rate": 4.5384219418062554e-05, "loss": 2.4386, "step": 1864000 }, { "epoch": 9.24, "learning_rate": 4.538298330880932e-05, "loss": 2.4466, "step": 1864500 }, { "epoch": 9.24, "learning_rate": 4.538174472238324e-05, "loss": 2.4483, "step": 1865000 }, { "epoch": 9.24, "learning_rate": 4.538050613595716e-05, "loss": 2.454, "step": 1865500 }, { "epoch": 9.24, "learning_rate": 4.5379267549531074e-05, "loss": 2.4675, "step": 1866000 }, { "epoch": 9.25, "learning_rate": 4.537803144027784e-05, "loss": 2.4575, "step": 1866500 }, { "epoch": 9.25, "learning_rate": 4.537679285385176e-05, "loss": 2.4484, "step": 1867000 }, { "epoch": 9.25, "learning_rate": 4.5375554267425676e-05, "loss": 2.4436, "step": 1867500 }, { "epoch": 9.25, "learning_rate": 4.537431568099959e-05, "loss": 2.4271, "step": 1868000 }, { "epoch": 9.26, "learning_rate": 4.537307709457351e-05, "loss": 2.4645, "step": 1868500 }, { "epoch": 9.26, "learning_rate": 4.537183850814743e-05, "loss": 2.4648, "step": 1869000 }, { "epoch": 9.26, "learning_rate": 4.537059992172134e-05, "loss": 2.4415, "step": 1869500 }, { "epoch": 9.26, "learning_rate": 4.5369363812468106e-05, "loss": 2.4357, "step": 1870000 }, { "epoch": 9.27, "learning_rate": 4.536812522604202e-05, "loss": 2.4403, "step": 1870500 }, { "epoch": 9.27, "learning_rate": 4.536688663961594e-05, "loss": 2.4475, "step": 1871000 }, { "epoch": 9.27, "learning_rate": 4.536564805318986e-05, "loss": 2.4376, "step": 1871500 }, { "epoch": 9.27, "learning_rate": 4.5364411943936626e-05, "loss": 2.4186, "step": 1872000 }, { "epoch": 9.28, "learning_rate": 4.536317335751054e-05, "loss": 2.4547, "step": 1872500 }, { "epoch": 9.28, "learning_rate": 4.536193477108446e-05, "loss": 2.4273, "step": 1873000 }, { "epoch": 9.28, "learning_rate": 4.5360696184658376e-05, "loss": 2.453, "step": 1873500 }, { "epoch": 9.28, "learning_rate": 4.5359457598232293e-05, "loss": 2.4315, "step": 1874000 }, { "epoch": 9.29, "learning_rate": 4.535821901180621e-05, "loss": 2.4556, "step": 1874500 }, { "epoch": 9.29, "learning_rate": 4.535698042538013e-05, "loss": 2.4386, "step": 1875000 }, { "epoch": 9.29, "learning_rate": 4.5355741838954044e-05, "loss": 2.4383, "step": 1875500 }, { "epoch": 9.29, "learning_rate": 4.535450325252796e-05, "loss": 2.4127, "step": 1876000 }, { "epoch": 9.3, "learning_rate": 4.535326466610187e-05, "loss": 2.4339, "step": 1876500 }, { "epoch": 9.3, "learning_rate": 4.535202607967579e-05, "loss": 2.4542, "step": 1877000 }, { "epoch": 9.3, "learning_rate": 4.5350787493249705e-05, "loss": 2.4275, "step": 1877500 }, { "epoch": 9.3, "learning_rate": 4.5349551383996474e-05, "loss": 2.4308, "step": 1878000 }, { "epoch": 9.31, "learning_rate": 4.534831279757039e-05, "loss": 2.4147, "step": 1878500 }, { "epoch": 9.31, "learning_rate": 4.534707421114431e-05, "loss": 2.4667, "step": 1879000 }, { "epoch": 9.31, "learning_rate": 4.5345835624718225e-05, "loss": 2.4236, "step": 1879500 }, { "epoch": 9.31, "learning_rate": 4.534459703829214e-05, "loss": 2.4343, "step": 1880000 }, { "epoch": 9.32, "learning_rate": 4.534335845186606e-05, "loss": 2.4451, "step": 1880500 }, { "epoch": 9.32, "learning_rate": 4.5342119865439975e-05, "loss": 2.4541, "step": 1881000 }, { "epoch": 9.32, "learning_rate": 4.534088127901389e-05, "loss": 2.4212, "step": 1881500 }, { "epoch": 9.32, "learning_rate": 4.533964516976066e-05, "loss": 2.4321, "step": 1882000 }, { "epoch": 9.33, "learning_rate": 4.533840658333458e-05, "loss": 2.4481, "step": 1882500 }, { "epoch": 9.33, "learning_rate": 4.533716799690849e-05, "loss": 2.4062, "step": 1883000 }, { "epoch": 9.33, "learning_rate": 4.5335929410482405e-05, "loss": 2.4301, "step": 1883500 }, { "epoch": 9.33, "learning_rate": 4.533469082405632e-05, "loss": 2.4163, "step": 1884000 }, { "epoch": 9.34, "learning_rate": 4.533345471480309e-05, "loss": 2.4449, "step": 1884500 }, { "epoch": 9.34, "learning_rate": 4.533221612837701e-05, "loss": 2.4703, "step": 1885000 }, { "epoch": 9.34, "learning_rate": 4.5330977541950925e-05, "loss": 2.4336, "step": 1885500 }, { "epoch": 9.34, "learning_rate": 4.5329741432697694e-05, "loss": 2.4546, "step": 1886000 }, { "epoch": 9.35, "learning_rate": 4.532850284627161e-05, "loss": 2.4623, "step": 1886500 }, { "epoch": 9.35, "learning_rate": 4.532726425984553e-05, "loss": 2.4571, "step": 1887000 }, { "epoch": 9.35, "learning_rate": 4.5326025673419444e-05, "loss": 2.4461, "step": 1887500 }, { "epoch": 9.35, "learning_rate": 4.532478708699336e-05, "loss": 2.4349, "step": 1888000 }, { "epoch": 9.36, "learning_rate": 4.532354850056728e-05, "loss": 2.4261, "step": 1888500 }, { "epoch": 9.36, "learning_rate": 4.5322309914141195e-05, "loss": 2.4164, "step": 1889000 }, { "epoch": 9.36, "learning_rate": 4.532107132771511e-05, "loss": 2.462, "step": 1889500 }, { "epoch": 9.36, "learning_rate": 4.5319835218461874e-05, "loss": 2.4433, "step": 1890000 }, { "epoch": 9.37, "learning_rate": 4.531859663203579e-05, "loss": 2.4574, "step": 1890500 }, { "epoch": 9.37, "learning_rate": 4.531735804560971e-05, "loss": 2.4477, "step": 1891000 }, { "epoch": 9.37, "learning_rate": 4.5316119459183625e-05, "loss": 2.4444, "step": 1891500 }, { "epoch": 9.37, "learning_rate": 4.531488087275754e-05, "loss": 2.4569, "step": 1892000 }, { "epoch": 9.38, "learning_rate": 4.531364476350431e-05, "loss": 2.4576, "step": 1892500 }, { "epoch": 9.38, "learning_rate": 4.531240617707823e-05, "loss": 2.4366, "step": 1893000 }, { "epoch": 9.38, "learning_rate": 4.5311167590652145e-05, "loss": 2.455, "step": 1893500 }, { "epoch": 9.38, "learning_rate": 4.530992900422606e-05, "loss": 2.4634, "step": 1894000 }, { "epoch": 9.39, "learning_rate": 4.530869041779998e-05, "loss": 2.4578, "step": 1894500 }, { "epoch": 9.39, "learning_rate": 4.530745430854674e-05, "loss": 2.4391, "step": 1895000 }, { "epoch": 9.39, "learning_rate": 4.530621572212066e-05, "loss": 2.4376, "step": 1895500 }, { "epoch": 9.39, "learning_rate": 4.5304977135694574e-05, "loss": 2.4466, "step": 1896000 }, { "epoch": 9.4, "learning_rate": 4.530373854926849e-05, "loss": 2.429, "step": 1896500 }, { "epoch": 9.4, "learning_rate": 4.530249996284241e-05, "loss": 2.4456, "step": 1897000 }, { "epoch": 9.4, "learning_rate": 4.5301261376416325e-05, "loss": 2.4541, "step": 1897500 }, { "epoch": 9.4, "learning_rate": 4.530002278999024e-05, "loss": 2.4651, "step": 1898000 }, { "epoch": 9.41, "learning_rate": 4.529878668073701e-05, "loss": 2.4453, "step": 1898500 }, { "epoch": 9.41, "learning_rate": 4.529754809431093e-05, "loss": 2.4451, "step": 1899000 }, { "epoch": 9.41, "learning_rate": 4.5296309507884845e-05, "loss": 2.4266, "step": 1899500 }, { "epoch": 9.41, "learning_rate": 4.529507092145876e-05, "loss": 2.455, "step": 1900000 }, { "epoch": 9.42, "learning_rate": 4.529383233503268e-05, "loss": 2.4657, "step": 1900500 }, { "epoch": 9.42, "learning_rate": 4.5292593748606595e-05, "loss": 2.4469, "step": 1901000 }, { "epoch": 9.42, "learning_rate": 4.529135516218051e-05, "loss": 2.4411, "step": 1901500 }, { "epoch": 9.42, "learning_rate": 4.5290121530100126e-05, "loss": 2.4473, "step": 1902000 }, { "epoch": 9.43, "learning_rate": 4.528888294367404e-05, "loss": 2.4457, "step": 1902500 }, { "epoch": 9.43, "learning_rate": 4.528764435724796e-05, "loss": 2.4583, "step": 1903000 }, { "epoch": 9.43, "learning_rate": 4.528640577082188e-05, "loss": 2.4524, "step": 1903500 }, { "epoch": 9.43, "learning_rate": 4.5285167184395794e-05, "loss": 2.4532, "step": 1904000 }, { "epoch": 9.44, "learning_rate": 4.528392859796971e-05, "loss": 2.4809, "step": 1904500 }, { "epoch": 9.44, "learning_rate": 4.528269001154363e-05, "loss": 2.4212, "step": 1905000 }, { "epoch": 9.44, "learning_rate": 4.5281451425117545e-05, "loss": 2.4673, "step": 1905500 }, { "epoch": 9.44, "learning_rate": 4.528021283869146e-05, "loss": 2.4051, "step": 1906000 }, { "epoch": 9.45, "learning_rate": 4.527897425226538e-05, "loss": 2.4195, "step": 1906500 }, { "epoch": 9.45, "learning_rate": 4.5277735665839296e-05, "loss": 2.4623, "step": 1907000 }, { "epoch": 9.45, "learning_rate": 4.527649955658606e-05, "loss": 2.4388, "step": 1907500 }, { "epoch": 9.45, "learning_rate": 4.5275260970159975e-05, "loss": 2.4585, "step": 1908000 }, { "epoch": 9.46, "learning_rate": 4.527402238373389e-05, "loss": 2.4361, "step": 1908500 }, { "epoch": 9.46, "learning_rate": 4.527278379730781e-05, "loss": 2.442, "step": 1909000 }, { "epoch": 9.46, "learning_rate": 4.5271545210881725e-05, "loss": 2.4635, "step": 1909500 }, { "epoch": 9.46, "learning_rate": 4.527030662445564e-05, "loss": 2.4437, "step": 1910000 }, { "epoch": 9.47, "learning_rate": 4.526906803802956e-05, "loss": 2.4507, "step": 1910500 }, { "epoch": 9.47, "learning_rate": 4.5267829451603476e-05, "loss": 2.4323, "step": 1911000 }, { "epoch": 9.47, "learning_rate": 4.526659086517739e-05, "loss": 2.45, "step": 1911500 }, { "epoch": 9.47, "learning_rate": 4.526535227875131e-05, "loss": 2.4588, "step": 1912000 }, { "epoch": 9.48, "learning_rate": 4.526411369232523e-05, "loss": 2.4592, "step": 1912500 }, { "epoch": 9.48, "learning_rate": 4.5262875105899144e-05, "loss": 2.4493, "step": 1913000 }, { "epoch": 9.48, "learning_rate": 4.526163899664591e-05, "loss": 2.4565, "step": 1913500 }, { "epoch": 9.48, "learning_rate": 4.5260402887392675e-05, "loss": 2.4562, "step": 1914000 }, { "epoch": 9.49, "learning_rate": 4.525916430096659e-05, "loss": 2.463, "step": 1914500 }, { "epoch": 9.49, "learning_rate": 4.525792819171336e-05, "loss": 2.4384, "step": 1915000 }, { "epoch": 9.49, "learning_rate": 4.525668960528728e-05, "loss": 2.429, "step": 1915500 }, { "epoch": 9.49, "learning_rate": 4.5255451018861194e-05, "loss": 2.4265, "step": 1916000 }, { "epoch": 9.5, "learning_rate": 4.525421243243511e-05, "loss": 2.457, "step": 1916500 }, { "epoch": 9.5, "learning_rate": 4.525297384600903e-05, "loss": 2.4622, "step": 1917000 }, { "epoch": 9.5, "learning_rate": 4.52517377367558e-05, "loss": 2.4689, "step": 1917500 }, { "epoch": 9.5, "learning_rate": 4.5250499150329714e-05, "loss": 2.4288, "step": 1918000 }, { "epoch": 9.5, "learning_rate": 4.524926304107648e-05, "loss": 2.454, "step": 1918500 }, { "epoch": 9.51, "learning_rate": 4.52480244546504e-05, "loss": 2.4701, "step": 1919000 }, { "epoch": 9.51, "learning_rate": 4.5246785868224317e-05, "loss": 2.4518, "step": 1919500 }, { "epoch": 9.51, "learning_rate": 4.5245547281798233e-05, "loss": 2.4624, "step": 1920000 }, { "epoch": 9.51, "learning_rate": 4.5244308695372144e-05, "loss": 2.4445, "step": 1920500 }, { "epoch": 9.52, "learning_rate": 4.524307010894606e-05, "loss": 2.4495, "step": 1921000 }, { "epoch": 9.52, "learning_rate": 4.524183152251998e-05, "loss": 2.4558, "step": 1921500 }, { "epoch": 9.52, "learning_rate": 4.5240592936093894e-05, "loss": 2.4547, "step": 1922000 }, { "epoch": 9.52, "learning_rate": 4.523935682684067e-05, "loss": 2.441, "step": 1922500 }, { "epoch": 9.53, "learning_rate": 4.523811824041459e-05, "loss": 2.4597, "step": 1923000 }, { "epoch": 9.53, "learning_rate": 4.5236879653988504e-05, "loss": 2.444, "step": 1923500 }, { "epoch": 9.53, "learning_rate": 4.523564602190812e-05, "loss": 2.4567, "step": 1924000 }, { "epoch": 9.53, "learning_rate": 4.5234407435482035e-05, "loss": 2.4353, "step": 1924500 }, { "epoch": 9.54, "learning_rate": 4.523316884905595e-05, "loss": 2.4716, "step": 1925000 }, { "epoch": 9.54, "learning_rate": 4.523193026262987e-05, "loss": 2.4343, "step": 1925500 }, { "epoch": 9.54, "learning_rate": 4.5230691676203785e-05, "loss": 2.4463, "step": 1926000 }, { "epoch": 9.54, "learning_rate": 4.52294530897777e-05, "loss": 2.438, "step": 1926500 }, { "epoch": 9.55, "learning_rate": 4.522821450335162e-05, "loss": 2.4469, "step": 1927000 }, { "epoch": 9.55, "learning_rate": 4.5226975916925536e-05, "loss": 2.4329, "step": 1927500 }, { "epoch": 9.55, "learning_rate": 4.522573733049945e-05, "loss": 2.4451, "step": 1928000 }, { "epoch": 9.55, "learning_rate": 4.522449874407337e-05, "loss": 2.4711, "step": 1928500 }, { "epoch": 9.56, "learning_rate": 4.522326015764729e-05, "loss": 2.4351, "step": 1929000 }, { "epoch": 9.56, "learning_rate": 4.5222021571221204e-05, "loss": 2.4218, "step": 1929500 }, { "epoch": 9.56, "learning_rate": 4.5220782984795114e-05, "loss": 2.437, "step": 1930000 }, { "epoch": 9.56, "learning_rate": 4.521954439836903e-05, "loss": 2.4446, "step": 1930500 }, { "epoch": 9.57, "learning_rate": 4.52183082891158e-05, "loss": 2.4568, "step": 1931000 }, { "epoch": 9.57, "learning_rate": 4.521706970268972e-05, "loss": 2.4456, "step": 1931500 }, { "epoch": 9.57, "learning_rate": 4.5215831116263634e-05, "loss": 2.435, "step": 1932000 }, { "epoch": 9.57, "learning_rate": 4.521459252983755e-05, "loss": 2.4542, "step": 1932500 }, { "epoch": 9.58, "learning_rate": 4.521335394341146e-05, "loss": 2.4418, "step": 1933000 }, { "epoch": 9.58, "learning_rate": 4.521211535698538e-05, "loss": 2.4507, "step": 1933500 }, { "epoch": 9.58, "learning_rate": 4.5210876770559295e-05, "loss": 2.4544, "step": 1934000 }, { "epoch": 9.58, "learning_rate": 4.520963818413321e-05, "loss": 2.4652, "step": 1934500 }, { "epoch": 9.59, "learning_rate": 4.520840207487999e-05, "loss": 2.4531, "step": 1935000 }, { "epoch": 9.59, "learning_rate": 4.5207163488453904e-05, "loss": 2.4594, "step": 1935500 }, { "epoch": 9.59, "learning_rate": 4.520592490202782e-05, "loss": 2.453, "step": 1936000 }, { "epoch": 9.59, "learning_rate": 4.5204693747120294e-05, "loss": 2.4411, "step": 1936500 }, { "epoch": 9.6, "learning_rate": 4.5203455160694204e-05, "loss": 2.4588, "step": 1937000 }, { "epoch": 9.6, "learning_rate": 4.520221657426812e-05, "loss": 2.4649, "step": 1937500 }, { "epoch": 9.6, "learning_rate": 4.520097798784204e-05, "loss": 2.4384, "step": 1938000 }, { "epoch": 9.6, "learning_rate": 4.5199739401415955e-05, "loss": 2.4294, "step": 1938500 }, { "epoch": 9.61, "learning_rate": 4.519850081498987e-05, "loss": 2.4468, "step": 1939000 }, { "epoch": 9.61, "learning_rate": 4.519726222856379e-05, "loss": 2.451, "step": 1939500 }, { "epoch": 9.61, "learning_rate": 4.5196023642137705e-05, "loss": 2.4349, "step": 1940000 }, { "epoch": 9.61, "learning_rate": 4.519478505571162e-05, "loss": 2.439, "step": 1940500 }, { "epoch": 9.62, "learning_rate": 4.519354646928554e-05, "loss": 2.4513, "step": 1941000 }, { "epoch": 9.62, "learning_rate": 4.519230788285945e-05, "loss": 2.4451, "step": 1941500 }, { "epoch": 9.62, "learning_rate": 4.5191069296433366e-05, "loss": 2.4526, "step": 1942000 }, { "epoch": 9.62, "learning_rate": 4.518983071000728e-05, "loss": 2.4393, "step": 1942500 }, { "epoch": 9.63, "learning_rate": 4.51885921235812e-05, "loss": 2.4453, "step": 1943000 }, { "epoch": 9.63, "learning_rate": 4.518735353715512e-05, "loss": 2.4257, "step": 1943500 }, { "epoch": 9.63, "learning_rate": 4.5186114950729034e-05, "loss": 2.4565, "step": 1944000 }, { "epoch": 9.63, "learning_rate": 4.51848788414758e-05, "loss": 2.4816, "step": 1944500 }, { "epoch": 9.64, "learning_rate": 4.518364273222257e-05, "loss": 2.4714, "step": 1945000 }, { "epoch": 9.64, "learning_rate": 4.518240662296934e-05, "loss": 2.4386, "step": 1945500 }, { "epoch": 9.64, "learning_rate": 4.518116803654326e-05, "loss": 2.4524, "step": 1946000 }, { "epoch": 9.64, "learning_rate": 4.517992945011717e-05, "loss": 2.4481, "step": 1946500 }, { "epoch": 9.65, "learning_rate": 4.5178690863691084e-05, "loss": 2.4431, "step": 1947000 }, { "epoch": 9.65, "learning_rate": 4.5177452277265e-05, "loss": 2.4608, "step": 1947500 }, { "epoch": 9.65, "learning_rate": 4.517621369083892e-05, "loss": 2.4459, "step": 1948000 }, { "epoch": 9.65, "learning_rate": 4.5174977581585694e-05, "loss": 2.4347, "step": 1948500 }, { "epoch": 9.66, "learning_rate": 4.517373899515961e-05, "loss": 2.4307, "step": 1949000 }, { "epoch": 9.66, "learning_rate": 4.517250040873352e-05, "loss": 2.4474, "step": 1949500 }, { "epoch": 9.66, "learning_rate": 4.517126182230744e-05, "loss": 2.4829, "step": 1950000 }, { "epoch": 9.66, "learning_rate": 4.5170023235881355e-05, "loss": 2.4657, "step": 1950500 }, { "epoch": 9.67, "learning_rate": 4.516878464945527e-05, "loss": 2.4238, "step": 1951000 }, { "epoch": 9.67, "learning_rate": 4.516754854020204e-05, "loss": 2.4664, "step": 1951500 }, { "epoch": 9.67, "learning_rate": 4.516630995377596e-05, "loss": 2.4432, "step": 1952000 }, { "epoch": 9.67, "learning_rate": 4.516507136734987e-05, "loss": 2.4409, "step": 1952500 }, { "epoch": 9.68, "learning_rate": 4.5163832780923785e-05, "loss": 2.4568, "step": 1953000 }, { "epoch": 9.68, "learning_rate": 4.51625941944977e-05, "loss": 2.4436, "step": 1953500 }, { "epoch": 9.68, "learning_rate": 4.516135560807162e-05, "loss": 2.4369, "step": 1954000 }, { "epoch": 9.68, "learning_rate": 4.5160117021645535e-05, "loss": 2.4344, "step": 1954500 }, { "epoch": 9.69, "learning_rate": 4.515887843521945e-05, "loss": 2.4531, "step": 1955000 }, { "epoch": 9.69, "learning_rate": 4.515764232596622e-05, "loss": 2.4491, "step": 1955500 }, { "epoch": 9.69, "learning_rate": 4.515640373954014e-05, "loss": 2.4332, "step": 1956000 }, { "epoch": 9.69, "learning_rate": 4.5155165153114055e-05, "loss": 2.4424, "step": 1956500 }, { "epoch": 9.7, "learning_rate": 4.515392656668797e-05, "loss": 2.4608, "step": 1957000 }, { "epoch": 9.7, "learning_rate": 4.515268798026189e-05, "loss": 2.444, "step": 1957500 }, { "epoch": 9.7, "learning_rate": 4.5151449393835806e-05, "loss": 2.4751, "step": 1958000 }, { "epoch": 9.7, "learning_rate": 4.5150213284582574e-05, "loss": 2.4599, "step": 1958500 }, { "epoch": 9.71, "learning_rate": 4.5148974698156485e-05, "loss": 2.4676, "step": 1959000 }, { "epoch": 9.71, "learning_rate": 4.51477361117304e-05, "loss": 2.4294, "step": 1959500 }, { "epoch": 9.71, "learning_rate": 4.514649752530432e-05, "loss": 2.4494, "step": 1960000 }, { "epoch": 9.71, "learning_rate": 4.5145263893223946e-05, "loss": 2.4253, "step": 1960500 }, { "epoch": 9.72, "learning_rate": 4.514402530679786e-05, "loss": 2.4498, "step": 1961000 }, { "epoch": 9.72, "learning_rate": 4.514278672037178e-05, "loss": 2.4453, "step": 1961500 }, { "epoch": 9.72, "learning_rate": 4.51415481339457e-05, "loss": 2.4619, "step": 1962000 }, { "epoch": 9.72, "learning_rate": 4.514030954751961e-05, "loss": 2.4357, "step": 1962500 }, { "epoch": 9.73, "learning_rate": 4.5139070961093524e-05, "loss": 2.4439, "step": 1963000 }, { "epoch": 9.73, "learning_rate": 4.513783485184029e-05, "loss": 2.4647, "step": 1963500 }, { "epoch": 9.73, "learning_rate": 4.513659626541421e-05, "loss": 2.4701, "step": 1964000 }, { "epoch": 9.73, "learning_rate": 4.5135357678988126e-05, "loss": 2.4486, "step": 1964500 }, { "epoch": 9.74, "learning_rate": 4.5134119092562043e-05, "loss": 2.4749, "step": 1965000 }, { "epoch": 9.74, "learning_rate": 4.513288050613596e-05, "loss": 2.4462, "step": 1965500 }, { "epoch": 9.74, "learning_rate": 4.513164191970988e-05, "loss": 2.452, "step": 1966000 }, { "epoch": 9.74, "learning_rate": 4.5130403333283794e-05, "loss": 2.4414, "step": 1966500 }, { "epoch": 9.75, "learning_rate": 4.512916474685771e-05, "loss": 2.4396, "step": 1967000 }, { "epoch": 9.75, "learning_rate": 4.512792616043163e-05, "loss": 2.4279, "step": 1967500 }, { "epoch": 9.75, "learning_rate": 4.512668757400554e-05, "loss": 2.4615, "step": 1968000 }, { "epoch": 9.75, "learning_rate": 4.5125451464752314e-05, "loss": 2.4715, "step": 1968500 }, { "epoch": 9.76, "learning_rate": 4.512421287832623e-05, "loss": 2.4397, "step": 1969000 }, { "epoch": 9.76, "learning_rate": 4.512297429190014e-05, "loss": 2.4726, "step": 1969500 }, { "epoch": 9.76, "learning_rate": 4.512173570547406e-05, "loss": 2.4296, "step": 1970000 }, { "epoch": 9.76, "learning_rate": 4.5120497119047975e-05, "loss": 2.4543, "step": 1970500 }, { "epoch": 9.77, "learning_rate": 4.511925853262189e-05, "loss": 2.4283, "step": 1971000 }, { "epoch": 9.77, "learning_rate": 4.511801994619581e-05, "loss": 2.4433, "step": 1971500 }, { "epoch": 9.77, "learning_rate": 4.511678135976972e-05, "loss": 2.431, "step": 1972000 }, { "epoch": 9.77, "learning_rate": 4.5115542773343636e-05, "loss": 2.4314, "step": 1972500 }, { "epoch": 9.77, "learning_rate": 4.511430666409041e-05, "loss": 2.4454, "step": 1973000 }, { "epoch": 9.78, "learning_rate": 4.511306807766433e-05, "loss": 2.4631, "step": 1973500 }, { "epoch": 9.78, "learning_rate": 4.5111829491238245e-05, "loss": 2.4612, "step": 1974000 }, { "epoch": 9.78, "learning_rate": 4.5110590904812155e-05, "loss": 2.4322, "step": 1974500 }, { "epoch": 9.78, "learning_rate": 4.510935231838607e-05, "loss": 2.4259, "step": 1975000 }, { "epoch": 9.79, "learning_rate": 4.510811620913285e-05, "loss": 2.4497, "step": 1975500 }, { "epoch": 9.79, "learning_rate": 4.510687762270676e-05, "loss": 2.4183, "step": 1976000 }, { "epoch": 9.79, "learning_rate": 4.5105639036280675e-05, "loss": 2.4897, "step": 1976500 }, { "epoch": 9.79, "learning_rate": 4.510440044985459e-05, "loss": 2.4587, "step": 1977000 }, { "epoch": 9.8, "learning_rate": 4.510316434060136e-05, "loss": 2.4401, "step": 1977500 }, { "epoch": 9.8, "learning_rate": 4.510192575417528e-05, "loss": 2.4357, "step": 1978000 }, { "epoch": 9.8, "learning_rate": 4.5100687167749194e-05, "loss": 2.4482, "step": 1978500 }, { "epoch": 9.8, "learning_rate": 4.509944858132311e-05, "loss": 2.4574, "step": 1979000 }, { "epoch": 9.81, "learning_rate": 4.509821247206988e-05, "loss": 2.4316, "step": 1979500 }, { "epoch": 9.81, "learning_rate": 4.50969738856438e-05, "loss": 2.4638, "step": 1980000 }, { "epoch": 9.81, "learning_rate": 4.5095735299217714e-05, "loss": 2.4208, "step": 1980500 }, { "epoch": 9.81, "learning_rate": 4.509449671279163e-05, "loss": 2.4633, "step": 1981000 }, { "epoch": 9.82, "learning_rate": 4.5093263080711245e-05, "loss": 2.4471, "step": 1981500 }, { "epoch": 9.82, "learning_rate": 4.509202697145802e-05, "loss": 2.4612, "step": 1982000 }, { "epoch": 9.82, "learning_rate": 4.509078838503194e-05, "loss": 2.4536, "step": 1982500 }, { "epoch": 9.82, "learning_rate": 4.50895522757787e-05, "loss": 2.4203, "step": 1983000 }, { "epoch": 9.83, "learning_rate": 4.5088313689352616e-05, "loss": 2.4689, "step": 1983500 }, { "epoch": 9.83, "learning_rate": 4.508707510292653e-05, "loss": 2.4635, "step": 1984000 }, { "epoch": 9.83, "learning_rate": 4.508583651650045e-05, "loss": 2.4427, "step": 1984500 }, { "epoch": 9.83, "learning_rate": 4.508459793007437e-05, "loss": 2.4488, "step": 1985000 }, { "epoch": 9.84, "learning_rate": 4.5083359343648284e-05, "loss": 2.4687, "step": 1985500 }, { "epoch": 9.84, "learning_rate": 4.50821207572222e-05, "loss": 2.4482, "step": 1986000 }, { "epoch": 9.84, "learning_rate": 4.508088217079612e-05, "loss": 2.4457, "step": 1986500 }, { "epoch": 9.84, "learning_rate": 4.5079643584370035e-05, "loss": 2.4492, "step": 1987000 }, { "epoch": 9.85, "learning_rate": 4.5078404997943945e-05, "loss": 2.468, "step": 1987500 }, { "epoch": 9.85, "learning_rate": 4.507716888869072e-05, "loss": 2.4591, "step": 1988000 }, { "epoch": 9.85, "learning_rate": 4.507593030226464e-05, "loss": 2.4664, "step": 1988500 }, { "epoch": 9.85, "learning_rate": 4.5074691715838554e-05, "loss": 2.4511, "step": 1989000 }, { "epoch": 9.86, "learning_rate": 4.507345312941247e-05, "loss": 2.4497, "step": 1989500 }, { "epoch": 9.86, "learning_rate": 4.507221454298639e-05, "loss": 2.4432, "step": 1990000 }, { "epoch": 9.86, "learning_rate": 4.50709759565603e-05, "loss": 2.4519, "step": 1990500 }, { "epoch": 9.86, "learning_rate": 4.5069737370134215e-05, "loss": 2.4516, "step": 1991000 }, { "epoch": 9.87, "learning_rate": 4.506849878370813e-05, "loss": 2.4417, "step": 1991500 }, { "epoch": 9.87, "learning_rate": 4.506726019728205e-05, "loss": 2.464, "step": 1992000 }, { "epoch": 9.87, "learning_rate": 4.5066021610855966e-05, "loss": 2.4332, "step": 1992500 }, { "epoch": 9.87, "learning_rate": 4.5064783024429876e-05, "loss": 2.4693, "step": 1993000 }, { "epoch": 9.88, "learning_rate": 4.506354443800379e-05, "loss": 2.4477, "step": 1993500 }, { "epoch": 9.88, "learning_rate": 4.506230585157771e-05, "loss": 2.4455, "step": 1994000 }, { "epoch": 9.88, "learning_rate": 4.506106974232448e-05, "loss": 2.4597, "step": 1994500 }, { "epoch": 9.88, "learning_rate": 4.5059833633071255e-05, "loss": 2.446, "step": 1995000 }, { "epoch": 9.89, "learning_rate": 4.505859504664517e-05, "loss": 2.4577, "step": 1995500 }, { "epoch": 9.89, "learning_rate": 4.505735646021909e-05, "loss": 2.4678, "step": 1996000 }, { "epoch": 9.89, "learning_rate": 4.5056117873793005e-05, "loss": 2.4562, "step": 1996500 }, { "epoch": 9.89, "learning_rate": 4.5054879287366916e-05, "loss": 2.4657, "step": 1997000 }, { "epoch": 9.9, "learning_rate": 4.505364070094083e-05, "loss": 2.433, "step": 1997500 }, { "epoch": 9.9, "learning_rate": 4.505240211451475e-05, "loss": 2.4243, "step": 1998000 }, { "epoch": 9.9, "learning_rate": 4.5051163528088666e-05, "loss": 2.4324, "step": 1998500 }, { "epoch": 9.9, "learning_rate": 4.504992494166258e-05, "loss": 2.4494, "step": 1999000 }, { "epoch": 9.91, "learning_rate": 4.50486863552365e-05, "loss": 2.4347, "step": 1999500 }, { "epoch": 9.91, "learning_rate": 4.504744776881041e-05, "loss": 2.4345, "step": 2000000 }, { "epoch": 9.91, "learning_rate": 4.504620918238433e-05, "loss": 2.4564, "step": 2000500 }, { "epoch": 9.91, "learning_rate": 4.5044973073131096e-05, "loss": 2.4482, "step": 2001000 }, { "epoch": 9.92, "learning_rate": 4.504373448670501e-05, "loss": 2.4592, "step": 2001500 }, { "epoch": 9.92, "learning_rate": 4.504249590027893e-05, "loss": 2.4584, "step": 2002000 }, { "epoch": 9.92, "learning_rate": 4.504125731385285e-05, "loss": 2.4937, "step": 2002500 }, { "epoch": 9.92, "learning_rate": 4.5040018727426764e-05, "loss": 2.4234, "step": 2003000 }, { "epoch": 9.93, "learning_rate": 4.503878014100068e-05, "loss": 2.4364, "step": 2003500 }, { "epoch": 9.93, "learning_rate": 4.50375415545746e-05, "loss": 2.4408, "step": 2004000 }, { "epoch": 9.93, "learning_rate": 4.5036302968148515e-05, "loss": 2.4403, "step": 2004500 }, { "epoch": 9.93, "learning_rate": 4.503506685889528e-05, "loss": 2.4583, "step": 2005000 }, { "epoch": 9.94, "learning_rate": 4.503383074964205e-05, "loss": 2.4657, "step": 2005500 }, { "epoch": 9.94, "learning_rate": 4.503259216321597e-05, "loss": 2.4634, "step": 2006000 }, { "epoch": 9.94, "learning_rate": 4.503135357678988e-05, "loss": 2.4743, "step": 2006500 }, { "epoch": 9.94, "learning_rate": 4.5030114990363796e-05, "loss": 2.4383, "step": 2007000 }, { "epoch": 9.95, "learning_rate": 4.502887640393771e-05, "loss": 2.4592, "step": 2007500 }, { "epoch": 9.95, "learning_rate": 4.502764029468449e-05, "loss": 2.458, "step": 2008000 }, { "epoch": 9.95, "learning_rate": 4.5026401708258406e-05, "loss": 2.4377, "step": 2008500 }, { "epoch": 9.95, "learning_rate": 4.502516312183232e-05, "loss": 2.4445, "step": 2009000 }, { "epoch": 9.96, "learning_rate": 4.502392453540623e-05, "loss": 2.4318, "step": 2009500 }, { "epoch": 9.96, "learning_rate": 4.502268594898015e-05, "loss": 2.4583, "step": 2010000 }, { "epoch": 9.96, "learning_rate": 4.502144983972692e-05, "loss": 2.4609, "step": 2010500 }, { "epoch": 9.96, "learning_rate": 4.5020211253300835e-05, "loss": 2.4503, "step": 2011000 }, { "epoch": 9.97, "learning_rate": 4.501897266687475e-05, "loss": 2.4607, "step": 2011500 }, { "epoch": 9.97, "learning_rate": 4.501773408044867e-05, "loss": 2.4503, "step": 2012000 }, { "epoch": 9.97, "learning_rate": 4.501649549402258e-05, "loss": 2.4397, "step": 2012500 }, { "epoch": 9.97, "learning_rate": 4.5015256907596496e-05, "loss": 2.4582, "step": 2013000 }, { "epoch": 9.98, "learning_rate": 4.501401832117041e-05, "loss": 2.438, "step": 2013500 }, { "epoch": 9.98, "learning_rate": 4.501277973474433e-05, "loss": 2.462, "step": 2014000 }, { "epoch": 9.98, "learning_rate": 4.5011543625491106e-05, "loss": 2.4676, "step": 2014500 }, { "epoch": 9.98, "learning_rate": 4.501030751623787e-05, "loss": 2.4411, "step": 2015000 }, { "epoch": 9.99, "learning_rate": 4.5009068929811785e-05, "loss": 2.4521, "step": 2015500 }, { "epoch": 9.99, "learning_rate": 4.50078303433857e-05, "loss": 2.4713, "step": 2016000 }, { "epoch": 9.99, "learning_rate": 4.500659175695962e-05, "loss": 2.432, "step": 2016500 }, { "epoch": 9.99, "learning_rate": 4.5005353170533535e-05, "loss": 2.4529, "step": 2017000 }, { "epoch": 10.0, "learning_rate": 4.500411458410745e-05, "loss": 2.4451, "step": 2017500 }, { "epoch": 10.0, "learning_rate": 4.500287847485422e-05, "loss": 2.4712, "step": 2018000 }, { "epoch": 10.0, "eval_accuracy": 0.6423499793751883, "eval_accuracy_mlm": 0.5954648160103292, "eval_accuracy_nsp": 0.8633937221278716, "eval_loss": 2.427269697189331, "eval_runtime": 146.0474, "eval_samples_per_second": 1745.728, "eval_steps_per_second": 72.743, "step": 2018430 }, { "epoch": 10.0, "learning_rate": 4.500164236560099e-05, "loss": 2.4274, "step": 2018500 }, { "epoch": 10.0, "learning_rate": 4.500040377917491e-05, "loss": 2.4295, "step": 2019000 }, { "epoch": 10.01, "learning_rate": 4.4999165192748824e-05, "loss": 2.3972, "step": 2019500 }, { "epoch": 10.01, "learning_rate": 4.499792660632274e-05, "loss": 2.4053, "step": 2020000 }, { "epoch": 10.01, "learning_rate": 4.499668801989666e-05, "loss": 2.4178, "step": 2020500 }, { "epoch": 10.01, "learning_rate": 4.499544943347057e-05, "loss": 2.4151, "step": 2021000 }, { "epoch": 10.02, "learning_rate": 4.4994210847044485e-05, "loss": 2.4157, "step": 2021500 }, { "epoch": 10.02, "learning_rate": 4.49929722606184e-05, "loss": 2.4145, "step": 2022000 }, { "epoch": 10.02, "learning_rate": 4.499173367419232e-05, "loss": 2.4341, "step": 2022500 }, { "epoch": 10.02, "learning_rate": 4.499050004211194e-05, "loss": 2.4384, "step": 2023000 }, { "epoch": 10.03, "learning_rate": 4.4989261455685856e-05, "loss": 2.4209, "step": 2023500 }, { "epoch": 10.03, "learning_rate": 4.498802286925977e-05, "loss": 2.4392, "step": 2024000 }, { "epoch": 10.03, "learning_rate": 4.498678428283369e-05, "loss": 2.4234, "step": 2024500 }, { "epoch": 10.03, "learning_rate": 4.498554569640761e-05, "loss": 2.412, "step": 2025000 }, { "epoch": 10.04, "learning_rate": 4.4984307109981524e-05, "loss": 2.4119, "step": 2025500 }, { "epoch": 10.04, "learning_rate": 4.498306852355544e-05, "loss": 2.4244, "step": 2026000 }, { "epoch": 10.04, "learning_rate": 4.498182993712936e-05, "loss": 2.4498, "step": 2026500 }, { "epoch": 10.04, "learning_rate": 4.4980591350703275e-05, "loss": 2.4358, "step": 2027000 }, { "epoch": 10.04, "learning_rate": 4.497935276427719e-05, "loss": 2.4245, "step": 2027500 }, { "epoch": 10.05, "learning_rate": 4.497811913219681e-05, "loss": 2.4284, "step": 2028000 }, { "epoch": 10.05, "learning_rate": 4.497688054577073e-05, "loss": 2.4131, "step": 2028500 }, { "epoch": 10.05, "learning_rate": 4.497564443651749e-05, "loss": 2.4187, "step": 2029000 }, { "epoch": 10.05, "learning_rate": 4.497440585009141e-05, "loss": 2.4189, "step": 2029500 }, { "epoch": 10.06, "learning_rate": 4.4973167263665325e-05, "loss": 2.4364, "step": 2030000 }, { "epoch": 10.06, "learning_rate": 4.497192867723924e-05, "loss": 2.4455, "step": 2030500 }, { "epoch": 10.06, "learning_rate": 4.497069009081316e-05, "loss": 2.4064, "step": 2031000 }, { "epoch": 10.06, "learning_rate": 4.4969451504387076e-05, "loss": 2.4481, "step": 2031500 }, { "epoch": 10.07, "learning_rate": 4.4968212917960986e-05, "loss": 2.4151, "step": 2032000 }, { "epoch": 10.07, "learning_rate": 4.49669743315349e-05, "loss": 2.4254, "step": 2032500 }, { "epoch": 10.07, "learning_rate": 4.496573574510882e-05, "loss": 2.428, "step": 2033000 }, { "epoch": 10.07, "learning_rate": 4.496449715868274e-05, "loss": 2.4263, "step": 2033500 }, { "epoch": 10.08, "learning_rate": 4.4963258572256654e-05, "loss": 2.4114, "step": 2034000 }, { "epoch": 10.08, "learning_rate": 4.496201998583057e-05, "loss": 2.4332, "step": 2034500 }, { "epoch": 10.08, "learning_rate": 4.496078139940449e-05, "loss": 2.4325, "step": 2035000 }, { "epoch": 10.08, "learning_rate": 4.4959545290151257e-05, "loss": 2.4009, "step": 2035500 }, { "epoch": 10.09, "learning_rate": 4.4958306703725173e-05, "loss": 2.435, "step": 2036000 }, { "epoch": 10.09, "learning_rate": 4.495706811729909e-05, "loss": 2.4103, "step": 2036500 }, { "epoch": 10.09, "learning_rate": 4.495582953087301e-05, "loss": 2.4359, "step": 2037000 }, { "epoch": 10.09, "learning_rate": 4.4954590944446924e-05, "loss": 2.4256, "step": 2037500 }, { "epoch": 10.1, "learning_rate": 4.495335235802084e-05, "loss": 2.4242, "step": 2038000 }, { "epoch": 10.1, "learning_rate": 4.495211377159476e-05, "loss": 2.4267, "step": 2038500 }, { "epoch": 10.1, "learning_rate": 4.4950875185168675e-05, "loss": 2.4368, "step": 2039000 }, { "epoch": 10.1, "learning_rate": 4.4949641553088296e-05, "loss": 2.4211, "step": 2039500 }, { "epoch": 10.11, "learning_rate": 4.494840296666221e-05, "loss": 2.4296, "step": 2040000 }, { "epoch": 10.11, "learning_rate": 4.494716438023613e-05, "loss": 2.4401, "step": 2040500 }, { "epoch": 10.11, "learning_rate": 4.4945925793810047e-05, "loss": 2.4441, "step": 2041000 }, { "epoch": 10.11, "learning_rate": 4.494468720738396e-05, "loss": 2.4434, "step": 2041500 }, { "epoch": 10.12, "learning_rate": 4.4943448620957874e-05, "loss": 2.4475, "step": 2042000 }, { "epoch": 10.12, "learning_rate": 4.494221003453179e-05, "loss": 2.4398, "step": 2042500 }, { "epoch": 10.12, "learning_rate": 4.494097144810571e-05, "loss": 2.4278, "step": 2043000 }, { "epoch": 10.12, "learning_rate": 4.4939732861679624e-05, "loss": 2.4079, "step": 2043500 }, { "epoch": 10.13, "learning_rate": 4.493849675242639e-05, "loss": 2.4734, "step": 2044000 }, { "epoch": 10.13, "learning_rate": 4.49372581660003e-05, "loss": 2.4516, "step": 2044500 }, { "epoch": 10.13, "learning_rate": 4.493601957957422e-05, "loss": 2.4456, "step": 2045000 }, { "epoch": 10.13, "learning_rate": 4.493478099314814e-05, "loss": 2.4319, "step": 2045500 }, { "epoch": 10.14, "learning_rate": 4.4933542406722054e-05, "loss": 2.4348, "step": 2046000 }, { "epoch": 10.14, "learning_rate": 4.493230382029597e-05, "loss": 2.4237, "step": 2046500 }, { "epoch": 10.14, "learning_rate": 4.493106771104275e-05, "loss": 2.4202, "step": 2047000 }, { "epoch": 10.14, "learning_rate": 4.492982912461666e-05, "loss": 2.421, "step": 2047500 }, { "epoch": 10.15, "learning_rate": 4.492859301536343e-05, "loss": 2.4291, "step": 2048000 }, { "epoch": 10.15, "learning_rate": 4.4927356906110194e-05, "loss": 2.422, "step": 2048500 }, { "epoch": 10.15, "learning_rate": 4.492611831968411e-05, "loss": 2.4306, "step": 2049000 }, { "epoch": 10.15, "learning_rate": 4.492487973325803e-05, "loss": 2.4154, "step": 2049500 }, { "epoch": 10.16, "learning_rate": 4.4923641146831945e-05, "loss": 2.4207, "step": 2050000 }, { "epoch": 10.16, "learning_rate": 4.492240256040586e-05, "loss": 2.4277, "step": 2050500 }, { "epoch": 10.16, "learning_rate": 4.492116397397978e-05, "loss": 2.4082, "step": 2051000 }, { "epoch": 10.16, "learning_rate": 4.4919925387553696e-05, "loss": 2.4284, "step": 2051500 }, { "epoch": 10.17, "learning_rate": 4.491868680112761e-05, "loss": 2.4324, "step": 2052000 }, { "epoch": 10.17, "learning_rate": 4.491744821470153e-05, "loss": 2.418, "step": 2052500 }, { "epoch": 10.17, "learning_rate": 4.491620962827545e-05, "loss": 2.4193, "step": 2053000 }, { "epoch": 10.17, "learning_rate": 4.4914971041849364e-05, "loss": 2.4341, "step": 2053500 }, { "epoch": 10.18, "learning_rate": 4.4913732455423274e-05, "loss": 2.4189, "step": 2054000 }, { "epoch": 10.18, "learning_rate": 4.491249634617005e-05, "loss": 2.4323, "step": 2054500 }, { "epoch": 10.18, "learning_rate": 4.4911257759743966e-05, "loss": 2.4587, "step": 2055000 }, { "epoch": 10.18, "learning_rate": 4.491002165049073e-05, "loss": 2.4278, "step": 2055500 }, { "epoch": 10.19, "learning_rate": 4.4908783064064645e-05, "loss": 2.4202, "step": 2056000 }, { "epoch": 10.19, "learning_rate": 4.490754447763856e-05, "loss": 2.4481, "step": 2056500 }, { "epoch": 10.19, "learning_rate": 4.490630589121248e-05, "loss": 2.4114, "step": 2057000 }, { "epoch": 10.19, "learning_rate": 4.490506978195925e-05, "loss": 2.4111, "step": 2057500 }, { "epoch": 10.2, "learning_rate": 4.4903831195533165e-05, "loss": 2.4391, "step": 2058000 }, { "epoch": 10.2, "learning_rate": 4.490259260910708e-05, "loss": 2.4327, "step": 2058500 }, { "epoch": 10.2, "learning_rate": 4.4901354022681e-05, "loss": 2.4279, "step": 2059000 }, { "epoch": 10.2, "learning_rate": 4.4900115436254916e-05, "loss": 2.4355, "step": 2059500 }, { "epoch": 10.21, "learning_rate": 4.4898881804174536e-05, "loss": 2.4252, "step": 2060000 }, { "epoch": 10.21, "learning_rate": 4.489764321774845e-05, "loss": 2.4196, "step": 2060500 }, { "epoch": 10.21, "learning_rate": 4.4896404631322363e-05, "loss": 2.4315, "step": 2061000 }, { "epoch": 10.21, "learning_rate": 4.489516604489628e-05, "loss": 2.4199, "step": 2061500 }, { "epoch": 10.22, "learning_rate": 4.48939274584702e-05, "loss": 2.4235, "step": 2062000 }, { "epoch": 10.22, "learning_rate": 4.4892688872044114e-05, "loss": 2.3984, "step": 2062500 }, { "epoch": 10.22, "learning_rate": 4.489145028561803e-05, "loss": 2.4339, "step": 2063000 }, { "epoch": 10.22, "learning_rate": 4.489021169919195e-05, "loss": 2.3881, "step": 2063500 }, { "epoch": 10.23, "learning_rate": 4.4888973112765865e-05, "loss": 2.4048, "step": 2064000 }, { "epoch": 10.23, "learning_rate": 4.488773452633978e-05, "loss": 2.426, "step": 2064500 }, { "epoch": 10.23, "learning_rate": 4.48864959399137e-05, "loss": 2.4169, "step": 2065000 }, { "epoch": 10.23, "learning_rate": 4.4885257353487616e-05, "loss": 2.4291, "step": 2065500 }, { "epoch": 10.24, "learning_rate": 4.488402124423438e-05, "loss": 2.422, "step": 2066000 }, { "epoch": 10.24, "learning_rate": 4.4882782657808295e-05, "loss": 2.4173, "step": 2066500 }, { "epoch": 10.24, "learning_rate": 4.488154407138221e-05, "loss": 2.415, "step": 2067000 }, { "epoch": 10.24, "learning_rate": 4.488030548495613e-05, "loss": 2.4058, "step": 2067500 }, { "epoch": 10.25, "learning_rate": 4.4879066898530046e-05, "loss": 2.4282, "step": 2068000 }, { "epoch": 10.25, "learning_rate": 4.4877830789276814e-05, "loss": 2.4527, "step": 2068500 }, { "epoch": 10.25, "learning_rate": 4.487659220285073e-05, "loss": 2.4305, "step": 2069000 }, { "epoch": 10.25, "learning_rate": 4.487535361642465e-05, "loss": 2.4422, "step": 2069500 }, { "epoch": 10.26, "learning_rate": 4.4874115029998565e-05, "loss": 2.4138, "step": 2070000 }, { "epoch": 10.26, "learning_rate": 4.4872878920745334e-05, "loss": 2.4326, "step": 2070500 }, { "epoch": 10.26, "learning_rate": 4.487164033431925e-05, "loss": 2.4561, "step": 2071000 }, { "epoch": 10.26, "learning_rate": 4.487040174789317e-05, "loss": 2.4219, "step": 2071500 }, { "epoch": 10.27, "learning_rate": 4.4869163161467085e-05, "loss": 2.4363, "step": 2072000 }, { "epoch": 10.27, "learning_rate": 4.4867924575040995e-05, "loss": 2.4182, "step": 2072500 }, { "epoch": 10.27, "learning_rate": 4.486668598861491e-05, "loss": 2.4498, "step": 2073000 }, { "epoch": 10.27, "learning_rate": 4.486544740218883e-05, "loss": 2.4262, "step": 2073500 }, { "epoch": 10.28, "learning_rate": 4.4864208815762746e-05, "loss": 2.4582, "step": 2074000 }, { "epoch": 10.28, "learning_rate": 4.4862972706509515e-05, "loss": 2.4353, "step": 2074500 }, { "epoch": 10.28, "learning_rate": 4.486173412008343e-05, "loss": 2.4298, "step": 2075000 }, { "epoch": 10.28, "learning_rate": 4.486049553365735e-05, "loss": 2.4233, "step": 2075500 }, { "epoch": 10.29, "learning_rate": 4.4859259424404124e-05, "loss": 2.4328, "step": 2076000 }, { "epoch": 10.29, "learning_rate": 4.4858023315150886e-05, "loss": 2.4591, "step": 2076500 }, { "epoch": 10.29, "learning_rate": 4.48567847287248e-05, "loss": 2.4568, "step": 2077000 }, { "epoch": 10.29, "learning_rate": 4.485554614229872e-05, "loss": 2.4302, "step": 2077500 }, { "epoch": 10.3, "learning_rate": 4.485430755587264e-05, "loss": 2.4473, "step": 2078000 }, { "epoch": 10.3, "learning_rate": 4.4853071446619406e-05, "loss": 2.4297, "step": 2078500 }, { "epoch": 10.3, "learning_rate": 4.4851835337366174e-05, "loss": 2.4359, "step": 2079000 }, { "epoch": 10.3, "learning_rate": 4.485059675094009e-05, "loss": 2.4481, "step": 2079500 }, { "epoch": 10.31, "learning_rate": 4.4849358164514e-05, "loss": 2.4354, "step": 2080000 }, { "epoch": 10.31, "learning_rate": 4.484811957808792e-05, "loss": 2.435, "step": 2080500 }, { "epoch": 10.31, "learning_rate": 4.484688346883469e-05, "loss": 2.4235, "step": 2081000 }, { "epoch": 10.31, "learning_rate": 4.4845644882408604e-05, "loss": 2.419, "step": 2081500 }, { "epoch": 10.31, "learning_rate": 4.484440629598252e-05, "loss": 2.4439, "step": 2082000 }, { "epoch": 10.32, "learning_rate": 4.484316770955644e-05, "loss": 2.423, "step": 2082500 }, { "epoch": 10.32, "learning_rate": 4.4841929123130355e-05, "loss": 2.4478, "step": 2083000 }, { "epoch": 10.32, "learning_rate": 4.484069053670427e-05, "loss": 2.4356, "step": 2083500 }, { "epoch": 10.32, "learning_rate": 4.483945195027819e-05, "loss": 2.4367, "step": 2084000 }, { "epoch": 10.33, "learning_rate": 4.4838213363852106e-05, "loss": 2.4409, "step": 2084500 }, { "epoch": 10.33, "learning_rate": 4.483697477742602e-05, "loss": 2.4238, "step": 2085000 }, { "epoch": 10.33, "learning_rate": 4.483573619099994e-05, "loss": 2.4366, "step": 2085500 }, { "epoch": 10.33, "learning_rate": 4.4834497604573856e-05, "loss": 2.4146, "step": 2086000 }, { "epoch": 10.34, "learning_rate": 4.4833259018147773e-05, "loss": 2.4158, "step": 2086500 }, { "epoch": 10.34, "learning_rate": 4.483202043172169e-05, "loss": 2.4511, "step": 2087000 }, { "epoch": 10.34, "learning_rate": 4.483078184529561e-05, "loss": 2.42, "step": 2087500 }, { "epoch": 10.34, "learning_rate": 4.4829543258869524e-05, "loss": 2.4163, "step": 2088000 }, { "epoch": 10.35, "learning_rate": 4.482830467244344e-05, "loss": 2.4279, "step": 2088500 }, { "epoch": 10.35, "learning_rate": 4.482706608601735e-05, "loss": 2.451, "step": 2089000 }, { "epoch": 10.35, "learning_rate": 4.482582749959127e-05, "loss": 2.4472, "step": 2089500 }, { "epoch": 10.35, "learning_rate": 4.4824588913165185e-05, "loss": 2.4138, "step": 2090000 }, { "epoch": 10.36, "learning_rate": 4.48233503267391e-05, "loss": 2.4263, "step": 2090500 }, { "epoch": 10.36, "learning_rate": 4.482211174031302e-05, "loss": 2.4066, "step": 2091000 }, { "epoch": 10.36, "learning_rate": 4.482087563105979e-05, "loss": 2.4424, "step": 2091500 }, { "epoch": 10.36, "learning_rate": 4.48196370446337e-05, "loss": 2.4485, "step": 2092000 }, { "epoch": 10.37, "learning_rate": 4.4818398458207615e-05, "loss": 2.4437, "step": 2092500 }, { "epoch": 10.37, "learning_rate": 4.481716234895439e-05, "loss": 2.4505, "step": 2093000 }, { "epoch": 10.37, "learning_rate": 4.481592376252831e-05, "loss": 2.4309, "step": 2093500 }, { "epoch": 10.37, "learning_rate": 4.4814685176102224e-05, "loss": 2.4408, "step": 2094000 }, { "epoch": 10.38, "learning_rate": 4.481344658967614e-05, "loss": 2.4208, "step": 2094500 }, { "epoch": 10.38, "learning_rate": 4.481220800325005e-05, "loss": 2.3977, "step": 2095000 }, { "epoch": 10.38, "learning_rate": 4.481097189399682e-05, "loss": 2.4398, "step": 2095500 }, { "epoch": 10.38, "learning_rate": 4.480973330757074e-05, "loss": 2.4252, "step": 2096000 }, { "epoch": 10.39, "learning_rate": 4.4808494721144654e-05, "loss": 2.4061, "step": 2096500 }, { "epoch": 10.39, "learning_rate": 4.480725613471857e-05, "loss": 2.4416, "step": 2097000 }, { "epoch": 10.39, "learning_rate": 4.480601754829249e-05, "loss": 2.4346, "step": 2097500 }, { "epoch": 10.39, "learning_rate": 4.4804778961866405e-05, "loss": 2.4364, "step": 2098000 }, { "epoch": 10.4, "learning_rate": 4.4803540375440315e-05, "loss": 2.4197, "step": 2098500 }, { "epoch": 10.4, "learning_rate": 4.480230178901423e-05, "loss": 2.423, "step": 2099000 }, { "epoch": 10.4, "learning_rate": 4.480106320258815e-05, "loss": 2.4381, "step": 2099500 }, { "epoch": 10.4, "learning_rate": 4.4799824616162066e-05, "loss": 2.4665, "step": 2100000 }, { "epoch": 10.41, "learning_rate": 4.479858602973598e-05, "loss": 2.4273, "step": 2100500 }, { "epoch": 10.41, "learning_rate": 4.479734992048276e-05, "loss": 2.4372, "step": 2101000 }, { "epoch": 10.41, "learning_rate": 4.479611133405667e-05, "loss": 2.429, "step": 2101500 }, { "epoch": 10.41, "learning_rate": 4.4794872747630585e-05, "loss": 2.4399, "step": 2102000 }, { "epoch": 10.42, "learning_rate": 4.47936341612045e-05, "loss": 2.4392, "step": 2102500 }, { "epoch": 10.42, "learning_rate": 4.479239805195127e-05, "loss": 2.4241, "step": 2103000 }, { "epoch": 10.42, "learning_rate": 4.479115946552519e-05, "loss": 2.4679, "step": 2103500 }, { "epoch": 10.42, "learning_rate": 4.4789920879099105e-05, "loss": 2.4386, "step": 2104000 }, { "epoch": 10.43, "learning_rate": 4.4788682292673015e-05, "loss": 2.4394, "step": 2104500 }, { "epoch": 10.43, "learning_rate": 4.478744370624693e-05, "loss": 2.4168, "step": 2105000 }, { "epoch": 10.43, "learning_rate": 4.478620511982085e-05, "loss": 2.4562, "step": 2105500 }, { "epoch": 10.43, "learning_rate": 4.4784969010567625e-05, "loss": 2.4321, "step": 2106000 }, { "epoch": 10.44, "learning_rate": 4.478373042414154e-05, "loss": 2.4339, "step": 2106500 }, { "epoch": 10.44, "learning_rate": 4.478249183771546e-05, "loss": 2.4364, "step": 2107000 }, { "epoch": 10.44, "learning_rate": 4.478125325128937e-05, "loss": 2.3967, "step": 2107500 }, { "epoch": 10.44, "learning_rate": 4.4780014664863286e-05, "loss": 2.4516, "step": 2108000 }, { "epoch": 10.45, "learning_rate": 4.47787760784372e-05, "loss": 2.4217, "step": 2108500 }, { "epoch": 10.45, "learning_rate": 4.477753749201112e-05, "loss": 2.418, "step": 2109000 }, { "epoch": 10.45, "learning_rate": 4.4776298905585036e-05, "loss": 2.4375, "step": 2109500 }, { "epoch": 10.45, "learning_rate": 4.477506031915895e-05, "loss": 2.4179, "step": 2110000 }, { "epoch": 10.46, "learning_rate": 4.477382173273287e-05, "loss": 2.4415, "step": 2110500 }, { "epoch": 10.46, "learning_rate": 4.477258562347963e-05, "loss": 2.4335, "step": 2111000 }, { "epoch": 10.46, "learning_rate": 4.477134703705355e-05, "loss": 2.4394, "step": 2111500 }, { "epoch": 10.46, "learning_rate": 4.4770108450627466e-05, "loss": 2.441, "step": 2112000 }, { "epoch": 10.47, "learning_rate": 4.476886986420138e-05, "loss": 2.4271, "step": 2112500 }, { "epoch": 10.47, "learning_rate": 4.47676312777753e-05, "loss": 2.4607, "step": 2113000 }, { "epoch": 10.47, "learning_rate": 4.476639269134922e-05, "loss": 2.4462, "step": 2113500 }, { "epoch": 10.47, "learning_rate": 4.4765154104923134e-05, "loss": 2.4493, "step": 2114000 }, { "epoch": 10.48, "learning_rate": 4.47639179956699e-05, "loss": 2.4344, "step": 2114500 }, { "epoch": 10.48, "learning_rate": 4.476267940924382e-05, "loss": 2.4534, "step": 2115000 }, { "epoch": 10.48, "learning_rate": 4.4761440822817736e-05, "loss": 2.4273, "step": 2115500 }, { "epoch": 10.48, "learning_rate": 4.476020223639165e-05, "loss": 2.4293, "step": 2116000 }, { "epoch": 10.49, "learning_rate": 4.475896612713842e-05, "loss": 2.4547, "step": 2116500 }, { "epoch": 10.49, "learning_rate": 4.475773001788519e-05, "loss": 2.4524, "step": 2117000 }, { "epoch": 10.49, "learning_rate": 4.475649143145911e-05, "loss": 2.4532, "step": 2117500 }, { "epoch": 10.49, "learning_rate": 4.4755252845033025e-05, "loss": 2.4419, "step": 2118000 }, { "epoch": 10.5, "learning_rate": 4.475401425860694e-05, "loss": 2.4467, "step": 2118500 }, { "epoch": 10.5, "learning_rate": 4.475277567218086e-05, "loss": 2.4355, "step": 2119000 }, { "epoch": 10.5, "learning_rate": 4.475153956292763e-05, "loss": 2.4465, "step": 2119500 }, { "epoch": 10.5, "learning_rate": 4.4750300976501544e-05, "loss": 2.4484, "step": 2120000 }, { "epoch": 10.51, "learning_rate": 4.474906239007546e-05, "loss": 2.4718, "step": 2120500 }, { "epoch": 10.51, "learning_rate": 4.474782380364937e-05, "loss": 2.4429, "step": 2121000 }, { "epoch": 10.51, "learning_rate": 4.474658521722329e-05, "loss": 2.461, "step": 2121500 }, { "epoch": 10.51, "learning_rate": 4.4745346630797205e-05, "loss": 2.4328, "step": 2122000 }, { "epoch": 10.52, "learning_rate": 4.474410804437112e-05, "loss": 2.457, "step": 2122500 }, { "epoch": 10.52, "learning_rate": 4.474286945794504e-05, "loss": 2.4409, "step": 2123000 }, { "epoch": 10.52, "learning_rate": 4.474163087151895e-05, "loss": 2.4528, "step": 2123500 }, { "epoch": 10.52, "learning_rate": 4.4740392285092866e-05, "loss": 2.4398, "step": 2124000 }, { "epoch": 10.53, "learning_rate": 4.473915617583964e-05, "loss": 2.419, "step": 2124500 }, { "epoch": 10.53, "learning_rate": 4.473791758941356e-05, "loss": 2.4351, "step": 2125000 }, { "epoch": 10.53, "learning_rate": 4.4736679002987476e-05, "loss": 2.4138, "step": 2125500 }, { "epoch": 10.53, "learning_rate": 4.473544041656139e-05, "loss": 2.4318, "step": 2126000 }, { "epoch": 10.54, "learning_rate": 4.47342018301353e-05, "loss": 2.4301, "step": 2126500 }, { "epoch": 10.54, "learning_rate": 4.473296324370922e-05, "loss": 2.4201, "step": 2127000 }, { "epoch": 10.54, "learning_rate": 4.473172465728314e-05, "loss": 2.439, "step": 2127500 }, { "epoch": 10.54, "learning_rate": 4.4730488548029905e-05, "loss": 2.4401, "step": 2128000 }, { "epoch": 10.55, "learning_rate": 4.472924996160382e-05, "loss": 2.4265, "step": 2128500 }, { "epoch": 10.55, "learning_rate": 4.472801137517774e-05, "loss": 2.4333, "step": 2129000 }, { "epoch": 10.55, "learning_rate": 4.4726772788751656e-05, "loss": 2.4448, "step": 2129500 }, { "epoch": 10.55, "learning_rate": 4.4725534202325566e-05, "loss": 2.4431, "step": 2130000 }, { "epoch": 10.56, "learning_rate": 4.472429561589948e-05, "loss": 2.4097, "step": 2130500 }, { "epoch": 10.56, "learning_rate": 4.47230570294734e-05, "loss": 2.4432, "step": 2131000 }, { "epoch": 10.56, "learning_rate": 4.472181844304732e-05, "loss": 2.4459, "step": 2131500 }, { "epoch": 10.56, "learning_rate": 4.472058233379409e-05, "loss": 2.4461, "step": 2132000 }, { "epoch": 10.57, "learning_rate": 4.4719343747368e-05, "loss": 2.4396, "step": 2132500 }, { "epoch": 10.57, "learning_rate": 4.471810763811478e-05, "loss": 2.4402, "step": 2133000 }, { "epoch": 10.57, "learning_rate": 4.4716869051688695e-05, "loss": 2.4444, "step": 2133500 }, { "epoch": 10.57, "learning_rate": 4.471563046526261e-05, "loss": 2.4261, "step": 2134000 }, { "epoch": 10.58, "learning_rate": 4.471439187883652e-05, "loss": 2.4395, "step": 2134500 }, { "epoch": 10.58, "learning_rate": 4.471315329241044e-05, "loss": 2.4222, "step": 2135000 }, { "epoch": 10.58, "learning_rate": 4.4711914705984356e-05, "loss": 2.4348, "step": 2135500 }, { "epoch": 10.58, "learning_rate": 4.4710678596731125e-05, "loss": 2.4261, "step": 2136000 }, { "epoch": 10.58, "learning_rate": 4.470944001030504e-05, "loss": 2.4462, "step": 2136500 }, { "epoch": 10.59, "learning_rate": 4.470820142387896e-05, "loss": 2.4554, "step": 2137000 }, { "epoch": 10.59, "learning_rate": 4.4706962837452876e-05, "loss": 2.4547, "step": 2137500 }, { "epoch": 10.59, "learning_rate": 4.470572425102679e-05, "loss": 2.4258, "step": 2138000 }, { "epoch": 10.59, "learning_rate": 4.470448566460071e-05, "loss": 2.4368, "step": 2138500 }, { "epoch": 10.6, "learning_rate": 4.470324707817462e-05, "loss": 2.4417, "step": 2139000 }, { "epoch": 10.6, "learning_rate": 4.470200849174854e-05, "loss": 2.4279, "step": 2139500 }, { "epoch": 10.6, "learning_rate": 4.470077238249531e-05, "loss": 2.4251, "step": 2140000 }, { "epoch": 10.6, "learning_rate": 4.469953379606923e-05, "loss": 2.4378, "step": 2140500 }, { "epoch": 10.61, "learning_rate": 4.469829520964314e-05, "loss": 2.4518, "step": 2141000 }, { "epoch": 10.61, "learning_rate": 4.4697056623217057e-05, "loss": 2.4638, "step": 2141500 }, { "epoch": 10.61, "learning_rate": 4.4695818036790973e-05, "loss": 2.4405, "step": 2142000 }, { "epoch": 10.61, "learning_rate": 4.469457945036489e-05, "loss": 2.444, "step": 2142500 }, { "epoch": 10.62, "learning_rate": 4.469334334111166e-05, "loss": 2.4427, "step": 2143000 }, { "epoch": 10.62, "learning_rate": 4.4692104754685576e-05, "loss": 2.4335, "step": 2143500 }, { "epoch": 10.62, "learning_rate": 4.469086616825949e-05, "loss": 2.4229, "step": 2144000 }, { "epoch": 10.62, "learning_rate": 4.468962758183341e-05, "loss": 2.4291, "step": 2144500 }, { "epoch": 10.63, "learning_rate": 4.468838899540732e-05, "loss": 2.4256, "step": 2145000 }, { "epoch": 10.63, "learning_rate": 4.4687152886154096e-05, "loss": 2.4089, "step": 2145500 }, { "epoch": 10.63, "learning_rate": 4.468591429972801e-05, "loss": 2.4351, "step": 2146000 }, { "epoch": 10.63, "learning_rate": 4.468467571330193e-05, "loss": 2.4628, "step": 2146500 }, { "epoch": 10.64, "learning_rate": 4.468343960404869e-05, "loss": 2.4254, "step": 2147000 }, { "epoch": 10.64, "learning_rate": 4.468220349479546e-05, "loss": 2.4318, "step": 2147500 }, { "epoch": 10.64, "learning_rate": 4.468096490836938e-05, "loss": 2.4339, "step": 2148000 }, { "epoch": 10.64, "learning_rate": 4.4679726321943294e-05, "loss": 2.4517, "step": 2148500 }, { "epoch": 10.65, "learning_rate": 4.467848773551721e-05, "loss": 2.4626, "step": 2149000 }, { "epoch": 10.65, "learning_rate": 4.467725162626398e-05, "loss": 2.4349, "step": 2149500 }, { "epoch": 10.65, "learning_rate": 4.467601551701075e-05, "loss": 2.4428, "step": 2150000 }, { "epoch": 10.65, "learning_rate": 4.4674776930584666e-05, "loss": 2.4328, "step": 2150500 }, { "epoch": 10.66, "learning_rate": 4.467353834415858e-05, "loss": 2.4354, "step": 2151000 }, { "epoch": 10.66, "learning_rate": 4.46722997577325e-05, "loss": 2.4413, "step": 2151500 }, { "epoch": 10.66, "learning_rate": 4.467106117130641e-05, "loss": 2.4063, "step": 2152000 }, { "epoch": 10.66, "learning_rate": 4.466982258488033e-05, "loss": 2.4428, "step": 2152500 }, { "epoch": 10.67, "learning_rate": 4.4668583998454244e-05, "loss": 2.4312, "step": 2153000 }, { "epoch": 10.67, "learning_rate": 4.466734541202816e-05, "loss": 2.4444, "step": 2153500 }, { "epoch": 10.67, "learning_rate": 4.466610682560208e-05, "loss": 2.4416, "step": 2154000 }, { "epoch": 10.67, "learning_rate": 4.4664868239175994e-05, "loss": 2.4138, "step": 2154500 }, { "epoch": 10.68, "learning_rate": 4.466362965274991e-05, "loss": 2.4313, "step": 2155000 }, { "epoch": 10.68, "learning_rate": 4.466239106632383e-05, "loss": 2.419, "step": 2155500 }, { "epoch": 10.68, "learning_rate": 4.4661152479897745e-05, "loss": 2.4566, "step": 2156000 }, { "epoch": 10.68, "learning_rate": 4.4659916370644514e-05, "loss": 2.4375, "step": 2156500 }, { "epoch": 10.69, "learning_rate": 4.465867778421843e-05, "loss": 2.4304, "step": 2157000 }, { "epoch": 10.69, "learning_rate": 4.465743919779235e-05, "loss": 2.4378, "step": 2157500 }, { "epoch": 10.69, "learning_rate": 4.465620061136626e-05, "loss": 2.4383, "step": 2158000 }, { "epoch": 10.69, "learning_rate": 4.4654962024940175e-05, "loss": 2.4265, "step": 2158500 }, { "epoch": 10.7, "learning_rate": 4.4653725915686944e-05, "loss": 2.4481, "step": 2159000 }, { "epoch": 10.7, "learning_rate": 4.465248732926086e-05, "loss": 2.4098, "step": 2159500 }, { "epoch": 10.7, "learning_rate": 4.4651251220007636e-05, "loss": 2.4331, "step": 2160000 }, { "epoch": 10.7, "learning_rate": 4.465001263358155e-05, "loss": 2.419, "step": 2160500 }, { "epoch": 10.71, "learning_rate": 4.464877404715547e-05, "loss": 2.43, "step": 2161000 }, { "epoch": 10.71, "learning_rate": 4.464753546072938e-05, "loss": 2.4379, "step": 2161500 }, { "epoch": 10.71, "learning_rate": 4.46462968743033e-05, "loss": 2.4677, "step": 2162000 }, { "epoch": 10.71, "learning_rate": 4.4645058287877214e-05, "loss": 2.431, "step": 2162500 }, { "epoch": 10.72, "learning_rate": 4.464381970145113e-05, "loss": 2.4455, "step": 2163000 }, { "epoch": 10.72, "learning_rate": 4.464258111502505e-05, "loss": 2.4502, "step": 2163500 }, { "epoch": 10.72, "learning_rate": 4.4641342528598965e-05, "loss": 2.4302, "step": 2164000 }, { "epoch": 10.72, "learning_rate": 4.464010641934573e-05, "loss": 2.4198, "step": 2164500 }, { "epoch": 10.73, "learning_rate": 4.4638867832919644e-05, "loss": 2.4332, "step": 2165000 }, { "epoch": 10.73, "learning_rate": 4.463762924649356e-05, "loss": 2.4356, "step": 2165500 }, { "epoch": 10.73, "learning_rate": 4.463639066006748e-05, "loss": 2.422, "step": 2166000 }, { "epoch": 10.73, "learning_rate": 4.4635152073641395e-05, "loss": 2.4193, "step": 2166500 }, { "epoch": 10.74, "learning_rate": 4.463391348721531e-05, "loss": 2.4501, "step": 2167000 }, { "epoch": 10.74, "learning_rate": 4.463267490078923e-05, "loss": 2.4621, "step": 2167500 }, { "epoch": 10.74, "learning_rate": 4.4631436314363145e-05, "loss": 2.4338, "step": 2168000 }, { "epoch": 10.74, "learning_rate": 4.4630200205109914e-05, "loss": 2.4167, "step": 2168500 }, { "epoch": 10.75, "learning_rate": 4.462896161868383e-05, "loss": 2.4113, "step": 2169000 }, { "epoch": 10.75, "learning_rate": 4.462772303225775e-05, "loss": 2.4667, "step": 2169500 }, { "epoch": 10.75, "learning_rate": 4.462648692300452e-05, "loss": 2.4525, "step": 2170000 }, { "epoch": 10.75, "learning_rate": 4.4625248336578434e-05, "loss": 2.4503, "step": 2170500 }, { "epoch": 10.76, "learning_rate": 4.4624009750152344e-05, "loss": 2.4539, "step": 2171000 }, { "epoch": 10.76, "learning_rate": 4.462277116372626e-05, "loss": 2.4368, "step": 2171500 }, { "epoch": 10.76, "learning_rate": 4.462153257730018e-05, "loss": 2.4427, "step": 2172000 }, { "epoch": 10.76, "learning_rate": 4.4620293990874095e-05, "loss": 2.4173, "step": 2172500 }, { "epoch": 10.77, "learning_rate": 4.461905788162087e-05, "loss": 2.4316, "step": 2173000 }, { "epoch": 10.77, "learning_rate": 4.461781929519479e-05, "loss": 2.4271, "step": 2173500 }, { "epoch": 10.77, "learning_rate": 4.461658318594155e-05, "loss": 2.4018, "step": 2174000 }, { "epoch": 10.77, "learning_rate": 4.4615344599515466e-05, "loss": 2.427, "step": 2174500 }, { "epoch": 10.78, "learning_rate": 4.461410601308938e-05, "loss": 2.4359, "step": 2175000 }, { "epoch": 10.78, "learning_rate": 4.46128674266633e-05, "loss": 2.4292, "step": 2175500 }, { "epoch": 10.78, "learning_rate": 4.461163131741007e-05, "loss": 2.4656, "step": 2176000 }, { "epoch": 10.78, "learning_rate": 4.4610392730983986e-05, "loss": 2.4482, "step": 2176500 }, { "epoch": 10.79, "learning_rate": 4.46091541445579e-05, "loss": 2.4331, "step": 2177000 }, { "epoch": 10.79, "learning_rate": 4.460791555813182e-05, "loss": 2.4638, "step": 2177500 }, { "epoch": 10.79, "learning_rate": 4.4606676971705737e-05, "loss": 2.4594, "step": 2178000 }, { "epoch": 10.79, "learning_rate": 4.4605438385279654e-05, "loss": 2.4464, "step": 2178500 }, { "epoch": 10.8, "learning_rate": 4.460419979885357e-05, "loss": 2.4558, "step": 2179000 }, { "epoch": 10.8, "learning_rate": 4.460296121242749e-05, "loss": 2.4394, "step": 2179500 }, { "epoch": 10.8, "learning_rate": 4.46017226260014e-05, "loss": 2.4307, "step": 2180000 }, { "epoch": 10.8, "learning_rate": 4.4600484039575314e-05, "loss": 2.4527, "step": 2180500 }, { "epoch": 10.81, "learning_rate": 4.459924545314923e-05, "loss": 2.4379, "step": 2181000 }, { "epoch": 10.81, "learning_rate": 4.459800686672315e-05, "loss": 2.4372, "step": 2181500 }, { "epoch": 10.81, "learning_rate": 4.4596768280297065e-05, "loss": 2.4373, "step": 2182000 }, { "epoch": 10.81, "learning_rate": 4.4595532171043834e-05, "loss": 2.4486, "step": 2182500 }, { "epoch": 10.82, "learning_rate": 4.459429358461775e-05, "loss": 2.4385, "step": 2183000 }, { "epoch": 10.82, "learning_rate": 4.459305747536452e-05, "loss": 2.4328, "step": 2183500 }, { "epoch": 10.82, "learning_rate": 4.459181888893844e-05, "loss": 2.4374, "step": 2184000 }, { "epoch": 10.82, "learning_rate": 4.4590580302512354e-05, "loss": 2.4451, "step": 2184500 }, { "epoch": 10.83, "learning_rate": 4.458934171608627e-05, "loss": 2.4589, "step": 2185000 }, { "epoch": 10.83, "learning_rate": 4.458810312966019e-05, "loss": 2.451, "step": 2185500 }, { "epoch": 10.83, "learning_rate": 4.458686702040695e-05, "loss": 2.418, "step": 2186000 }, { "epoch": 10.83, "learning_rate": 4.4585628433980866e-05, "loss": 2.4478, "step": 2186500 }, { "epoch": 10.84, "learning_rate": 4.4584389847554783e-05, "loss": 2.4271, "step": 2187000 }, { "epoch": 10.84, "learning_rate": 4.45831512611287e-05, "loss": 2.4425, "step": 2187500 }, { "epoch": 10.84, "learning_rate": 4.458191267470262e-05, "loss": 2.452, "step": 2188000 }, { "epoch": 10.84, "learning_rate": 4.4580674088276534e-05, "loss": 2.4415, "step": 2188500 }, { "epoch": 10.85, "learning_rate": 4.457943550185045e-05, "loss": 2.4458, "step": 2189000 }, { "epoch": 10.85, "learning_rate": 4.457819691542436e-05, "loss": 2.4406, "step": 2189500 }, { "epoch": 10.85, "learning_rate": 4.457696080617114e-05, "loss": 2.4222, "step": 2190000 }, { "epoch": 10.85, "learning_rate": 4.4575722219745054e-05, "loss": 2.458, "step": 2190500 }, { "epoch": 10.85, "learning_rate": 4.457448363331897e-05, "loss": 2.4399, "step": 2191000 }, { "epoch": 10.86, "learning_rate": 4.457324504689289e-05, "loss": 2.4346, "step": 2191500 }, { "epoch": 10.86, "learning_rate": 4.4572008937639656e-05, "loss": 2.4664, "step": 2192000 }, { "epoch": 10.86, "learning_rate": 4.4570770351213567e-05, "loss": 2.4396, "step": 2192500 }, { "epoch": 10.86, "learning_rate": 4.4569531764787484e-05, "loss": 2.4306, "step": 2193000 }, { "epoch": 10.87, "learning_rate": 4.45682931783614e-05, "loss": 2.4317, "step": 2193500 }, { "epoch": 10.87, "learning_rate": 4.456705459193532e-05, "loss": 2.4156, "step": 2194000 }, { "epoch": 10.87, "learning_rate": 4.4565816005509234e-05, "loss": 2.444, "step": 2194500 }, { "epoch": 10.87, "learning_rate": 4.456457741908315e-05, "loss": 2.4486, "step": 2195000 }, { "epoch": 10.88, "learning_rate": 4.456333883265707e-05, "loss": 2.4602, "step": 2195500 }, { "epoch": 10.88, "learning_rate": 4.456210024623098e-05, "loss": 2.4304, "step": 2196000 }, { "epoch": 10.88, "learning_rate": 4.4560861659804895e-05, "loss": 2.4429, "step": 2196500 }, { "epoch": 10.88, "learning_rate": 4.455962307337881e-05, "loss": 2.4199, "step": 2197000 }, { "epoch": 10.89, "learning_rate": 4.455838448695273e-05, "loss": 2.4348, "step": 2197500 }, { "epoch": 10.89, "learning_rate": 4.4557145900526646e-05, "loss": 2.4346, "step": 2198000 }, { "epoch": 10.89, "learning_rate": 4.455590979127342e-05, "loss": 2.4507, "step": 2198500 }, { "epoch": 10.89, "learning_rate": 4.455467120484733e-05, "loss": 2.443, "step": 2199000 }, { "epoch": 10.9, "learning_rate": 4.455343261842125e-05, "loss": 2.4219, "step": 2199500 }, { "epoch": 10.9, "learning_rate": 4.455219650916802e-05, "loss": 2.4231, "step": 2200000 }, { "epoch": 10.9, "learning_rate": 4.4550957922741934e-05, "loss": 2.4443, "step": 2200500 }, { "epoch": 10.9, "learning_rate": 4.454971933631585e-05, "loss": 2.4357, "step": 2201000 }, { "epoch": 10.91, "learning_rate": 4.454848570423547e-05, "loss": 2.4494, "step": 2201500 }, { "epoch": 10.91, "learning_rate": 4.454724711780939e-05, "loss": 2.4197, "step": 2202000 }, { "epoch": 10.91, "learning_rate": 4.4546008531383306e-05, "loss": 2.4512, "step": 2202500 }, { "epoch": 10.91, "learning_rate": 4.454476994495722e-05, "loss": 2.445, "step": 2203000 }, { "epoch": 10.92, "learning_rate": 4.454353135853114e-05, "loss": 2.449, "step": 2203500 }, { "epoch": 10.92, "learning_rate": 4.45422952492779e-05, "loss": 2.4383, "step": 2204000 }, { "epoch": 10.92, "learning_rate": 4.454105666285182e-05, "loss": 2.4595, "step": 2204500 }, { "epoch": 10.92, "learning_rate": 4.4539818076425736e-05, "loss": 2.4192, "step": 2205000 }, { "epoch": 10.93, "learning_rate": 4.453857948999965e-05, "loss": 2.4381, "step": 2205500 }, { "epoch": 10.93, "learning_rate": 4.453734090357357e-05, "loss": 2.429, "step": 2206000 }, { "epoch": 10.93, "learning_rate": 4.453610479432034e-05, "loss": 2.4534, "step": 2206500 }, { "epoch": 10.93, "learning_rate": 4.4534866207894255e-05, "loss": 2.4423, "step": 2207000 }, { "epoch": 10.94, "learning_rate": 4.453362762146817e-05, "loss": 2.4272, "step": 2207500 }, { "epoch": 10.94, "learning_rate": 4.453239151221494e-05, "loss": 2.4552, "step": 2208000 }, { "epoch": 10.94, "learning_rate": 4.453115292578886e-05, "loss": 2.4172, "step": 2208500 }, { "epoch": 10.94, "learning_rate": 4.4529914339362775e-05, "loss": 2.4602, "step": 2209000 }, { "epoch": 10.95, "learning_rate": 4.4528675752936685e-05, "loss": 2.4529, "step": 2209500 }, { "epoch": 10.95, "learning_rate": 4.45274371665106e-05, "loss": 2.4541, "step": 2210000 }, { "epoch": 10.95, "learning_rate": 4.452619858008452e-05, "loss": 2.4447, "step": 2210500 }, { "epoch": 10.95, "learning_rate": 4.4524959993658436e-05, "loss": 2.4507, "step": 2211000 }, { "epoch": 10.96, "learning_rate": 4.452372140723235e-05, "loss": 2.4288, "step": 2211500 }, { "epoch": 10.96, "learning_rate": 4.452248282080627e-05, "loss": 2.4157, "step": 2212000 }, { "epoch": 10.96, "learning_rate": 4.4521244234380187e-05, "loss": 2.4446, "step": 2212500 }, { "epoch": 10.96, "learning_rate": 4.4520005647954103e-05, "loss": 2.4222, "step": 2213000 }, { "epoch": 10.97, "learning_rate": 4.451876953870087e-05, "loss": 2.4498, "step": 2213500 }, { "epoch": 10.97, "learning_rate": 4.451753095227479e-05, "loss": 2.4253, "step": 2214000 }, { "epoch": 10.97, "learning_rate": 4.4516292365848706e-05, "loss": 2.4469, "step": 2214500 }, { "epoch": 10.97, "learning_rate": 4.451505377942262e-05, "loss": 2.4336, "step": 2215000 }, { "epoch": 10.98, "learning_rate": 4.451381519299654e-05, "loss": 2.4368, "step": 2215500 }, { "epoch": 10.98, "learning_rate": 4.451257908374331e-05, "loss": 2.4251, "step": 2216000 }, { "epoch": 10.98, "learning_rate": 4.451134297449008e-05, "loss": 2.4311, "step": 2216500 }, { "epoch": 10.98, "learning_rate": 4.4510104388063995e-05, "loss": 2.4489, "step": 2217000 }, { "epoch": 10.99, "learning_rate": 4.450886580163791e-05, "loss": 2.4204, "step": 2217500 }, { "epoch": 10.99, "learning_rate": 4.450762721521183e-05, "loss": 2.4315, "step": 2218000 }, { "epoch": 10.99, "learning_rate": 4.450638862878574e-05, "loss": 2.4432, "step": 2218500 }, { "epoch": 10.99, "learning_rate": 4.4505150042359655e-05, "loss": 2.4581, "step": 2219000 }, { "epoch": 11.0, "learning_rate": 4.450391145593357e-05, "loss": 2.4339, "step": 2219500 }, { "epoch": 11.0, "learning_rate": 4.450267286950749e-05, "loss": 2.4147, "step": 2220000 }, { "epoch": 11.0, "eval_accuracy": 0.6435940740781358, "eval_accuracy_mlm": 0.596974884280312, "eval_accuracy_nsp": 0.863742797861617, "eval_loss": 2.406216859817505, "eval_runtime": 146.12, "eval_samples_per_second": 1744.86, "eval_steps_per_second": 72.707, "step": 2220273 }, { "epoch": 11.0, "learning_rate": 4.4501434283081406e-05, "loss": 2.4115, "step": 2220500 }, { "epoch": 11.0, "learning_rate": 4.450019569665532e-05, "loss": 2.3987, "step": 2221000 }, { "epoch": 11.01, "learning_rate": 4.449895958740209e-05, "loss": 2.392, "step": 2221500 }, { "epoch": 11.01, "learning_rate": 4.449772100097601e-05, "loss": 2.3835, "step": 2222000 }, { "epoch": 11.01, "learning_rate": 4.4496482414549926e-05, "loss": 2.4125, "step": 2222500 }, { "epoch": 11.01, "learning_rate": 4.4495243828123836e-05, "loss": 2.4218, "step": 2223000 }, { "epoch": 11.02, "learning_rate": 4.449400524169775e-05, "loss": 2.4222, "step": 2223500 }, { "epoch": 11.02, "learning_rate": 4.449276665527167e-05, "loss": 2.3963, "step": 2224000 }, { "epoch": 11.02, "learning_rate": 4.449152806884559e-05, "loss": 2.4003, "step": 2224500 }, { "epoch": 11.02, "learning_rate": 4.4490289482419504e-05, "loss": 2.4131, "step": 2225000 }, { "epoch": 11.03, "learning_rate": 4.448905089599342e-05, "loss": 2.426, "step": 2225500 }, { "epoch": 11.03, "learning_rate": 4.448781230956734e-05, "loss": 2.4027, "step": 2226000 }, { "epoch": 11.03, "learning_rate": 4.4486573723141255e-05, "loss": 2.4331, "step": 2226500 }, { "epoch": 11.03, "learning_rate": 4.448533761388802e-05, "loss": 2.4009, "step": 2227000 }, { "epoch": 11.04, "learning_rate": 4.448409902746194e-05, "loss": 2.4009, "step": 2227500 }, { "epoch": 11.04, "learning_rate": 4.448286044103586e-05, "loss": 2.4034, "step": 2228000 }, { "epoch": 11.04, "learning_rate": 4.4481621854609774e-05, "loss": 2.4055, "step": 2228500 }, { "epoch": 11.04, "learning_rate": 4.448038326818369e-05, "loss": 2.3944, "step": 2229000 }, { "epoch": 11.05, "learning_rate": 4.447914468175761e-05, "loss": 2.4153, "step": 2229500 }, { "epoch": 11.05, "learning_rate": 4.4477906095331525e-05, "loss": 2.4075, "step": 2230000 }, { "epoch": 11.05, "learning_rate": 4.447666750890544e-05, "loss": 2.4052, "step": 2230500 }, { "epoch": 11.05, "learning_rate": 4.447542892247936e-05, "loss": 2.4021, "step": 2231000 }, { "epoch": 11.06, "learning_rate": 4.447419281322612e-05, "loss": 2.3982, "step": 2231500 }, { "epoch": 11.06, "learning_rate": 4.447295422680004e-05, "loss": 2.4161, "step": 2232000 }, { "epoch": 11.06, "learning_rate": 4.4471715640373955e-05, "loss": 2.4319, "step": 2232500 }, { "epoch": 11.06, "learning_rate": 4.447047705394787e-05, "loss": 2.4021, "step": 2233000 }, { "epoch": 11.07, "learning_rate": 4.446923846752179e-05, "loss": 2.4029, "step": 2233500 }, { "epoch": 11.07, "learning_rate": 4.4467999881095705e-05, "loss": 2.4136, "step": 2234000 }, { "epoch": 11.07, "learning_rate": 4.446676129466962e-05, "loss": 2.4027, "step": 2234500 }, { "epoch": 11.07, "learning_rate": 4.446552518541639e-05, "loss": 2.4079, "step": 2235000 }, { "epoch": 11.08, "learning_rate": 4.446428659899031e-05, "loss": 2.4259, "step": 2235500 }, { "epoch": 11.08, "learning_rate": 4.4463048012564225e-05, "loss": 2.4226, "step": 2236000 }, { "epoch": 11.08, "learning_rate": 4.446180942613814e-05, "loss": 2.4198, "step": 2236500 }, { "epoch": 11.08, "learning_rate": 4.446057083971206e-05, "loss": 2.3826, "step": 2237000 }, { "epoch": 11.09, "learning_rate": 4.4459332253285976e-05, "loss": 2.4163, "step": 2237500 }, { "epoch": 11.09, "learning_rate": 4.445809366685989e-05, "loss": 2.403, "step": 2238000 }, { "epoch": 11.09, "learning_rate": 4.445685508043381e-05, "loss": 2.4073, "step": 2238500 }, { "epoch": 11.09, "learning_rate": 4.445561897118057e-05, "loss": 2.4129, "step": 2239000 }, { "epoch": 11.1, "learning_rate": 4.445438038475449e-05, "loss": 2.4009, "step": 2239500 }, { "epoch": 11.1, "learning_rate": 4.4453141798328406e-05, "loss": 2.4227, "step": 2240000 }, { "epoch": 11.1, "learning_rate": 4.445190321190232e-05, "loss": 2.4006, "step": 2240500 }, { "epoch": 11.1, "learning_rate": 4.445066710264909e-05, "loss": 2.3994, "step": 2241000 }, { "epoch": 11.11, "learning_rate": 4.444942851622301e-05, "loss": 2.4206, "step": 2241500 }, { "epoch": 11.11, "learning_rate": 4.4448189929796925e-05, "loss": 2.4191, "step": 2242000 }, { "epoch": 11.11, "learning_rate": 4.4446953820543694e-05, "loss": 2.3991, "step": 2242500 }, { "epoch": 11.11, "learning_rate": 4.444571523411761e-05, "loss": 2.4203, "step": 2243000 }, { "epoch": 11.12, "learning_rate": 4.444447664769152e-05, "loss": 2.4075, "step": 2243500 }, { "epoch": 11.12, "learning_rate": 4.444323806126544e-05, "loss": 2.4173, "step": 2244000 }, { "epoch": 11.12, "learning_rate": 4.4441999474839355e-05, "loss": 2.4242, "step": 2244500 }, { "epoch": 11.12, "learning_rate": 4.444076088841327e-05, "loss": 2.4012, "step": 2245000 }, { "epoch": 11.12, "learning_rate": 4.443952230198719e-05, "loss": 2.4417, "step": 2245500 }, { "epoch": 11.13, "learning_rate": 4.4438283715561106e-05, "loss": 2.3909, "step": 2246000 }, { "epoch": 11.13, "learning_rate": 4.443704512913502e-05, "loss": 2.4142, "step": 2246500 }, { "epoch": 11.13, "learning_rate": 4.443580901988179e-05, "loss": 2.4145, "step": 2247000 }, { "epoch": 11.13, "learning_rate": 4.443457043345571e-05, "loss": 2.4039, "step": 2247500 }, { "epoch": 11.14, "learning_rate": 4.4433331847029625e-05, "loss": 2.4166, "step": 2248000 }, { "epoch": 11.14, "learning_rate": 4.443209326060354e-05, "loss": 2.4373, "step": 2248500 }, { "epoch": 11.14, "learning_rate": 4.443085467417746e-05, "loss": 2.3937, "step": 2249000 }, { "epoch": 11.14, "learning_rate": 4.4429616087751376e-05, "loss": 2.4121, "step": 2249500 }, { "epoch": 11.15, "learning_rate": 4.442837997849814e-05, "loss": 2.4216, "step": 2250000 }, { "epoch": 11.15, "learning_rate": 4.4427141392072055e-05, "loss": 2.4062, "step": 2250500 }, { "epoch": 11.15, "learning_rate": 4.442590280564597e-05, "loss": 2.3873, "step": 2251000 }, { "epoch": 11.15, "learning_rate": 4.442466421921989e-05, "loss": 2.42, "step": 2251500 }, { "epoch": 11.16, "learning_rate": 4.4423425632793806e-05, "loss": 2.3876, "step": 2252000 }, { "epoch": 11.16, "learning_rate": 4.442218704636772e-05, "loss": 2.4366, "step": 2252500 }, { "epoch": 11.16, "learning_rate": 4.442095093711449e-05, "loss": 2.4273, "step": 2253000 }, { "epoch": 11.16, "learning_rate": 4.441971235068841e-05, "loss": 2.4109, "step": 2253500 }, { "epoch": 11.17, "learning_rate": 4.4418473764262325e-05, "loss": 2.4171, "step": 2254000 }, { "epoch": 11.17, "learning_rate": 4.441723517783624e-05, "loss": 2.4281, "step": 2254500 }, { "epoch": 11.17, "learning_rate": 4.441599659141016e-05, "loss": 2.4044, "step": 2255000 }, { "epoch": 11.17, "learning_rate": 4.441476048215693e-05, "loss": 2.3969, "step": 2255500 }, { "epoch": 11.18, "learning_rate": 4.4413521895730845e-05, "loss": 2.4335, "step": 2256000 }, { "epoch": 11.18, "learning_rate": 4.441228330930476e-05, "loss": 2.4196, "step": 2256500 }, { "epoch": 11.18, "learning_rate": 4.441104472287867e-05, "loss": 2.4018, "step": 2257000 }, { "epoch": 11.18, "learning_rate": 4.440980613645259e-05, "loss": 2.4158, "step": 2257500 }, { "epoch": 11.19, "learning_rate": 4.4408567550026506e-05, "loss": 2.4362, "step": 2258000 }, { "epoch": 11.19, "learning_rate": 4.4407331440773275e-05, "loss": 2.4383, "step": 2258500 }, { "epoch": 11.19, "learning_rate": 4.440609285434719e-05, "loss": 2.4255, "step": 2259000 }, { "epoch": 11.19, "learning_rate": 4.440485426792111e-05, "loss": 2.4434, "step": 2259500 }, { "epoch": 11.2, "learning_rate": 4.4403615681495026e-05, "loss": 2.4004, "step": 2260000 }, { "epoch": 11.2, "learning_rate": 4.440237709506894e-05, "loss": 2.4139, "step": 2260500 }, { "epoch": 11.2, "learning_rate": 4.440113850864286e-05, "loss": 2.4226, "step": 2261000 }, { "epoch": 11.2, "learning_rate": 4.439990239938963e-05, "loss": 2.4301, "step": 2261500 }, { "epoch": 11.21, "learning_rate": 4.4398663812963545e-05, "loss": 2.4222, "step": 2262000 }, { "epoch": 11.21, "learning_rate": 4.439742522653746e-05, "loss": 2.4225, "step": 2262500 }, { "epoch": 11.21, "learning_rate": 4.439618664011138e-05, "loss": 2.4212, "step": 2263000 }, { "epoch": 11.21, "learning_rate": 4.439494805368529e-05, "loss": 2.4104, "step": 2263500 }, { "epoch": 11.22, "learning_rate": 4.439371194443206e-05, "loss": 2.4221, "step": 2264000 }, { "epoch": 11.22, "learning_rate": 4.4392473358005975e-05, "loss": 2.4218, "step": 2264500 }, { "epoch": 11.22, "learning_rate": 4.439123477157989e-05, "loss": 2.4348, "step": 2265000 }, { "epoch": 11.22, "learning_rate": 4.438999618515381e-05, "loss": 2.4062, "step": 2265500 }, { "epoch": 11.23, "learning_rate": 4.4388757598727726e-05, "loss": 2.4061, "step": 2266000 }, { "epoch": 11.23, "learning_rate": 4.438751901230164e-05, "loss": 2.4101, "step": 2266500 }, { "epoch": 11.23, "learning_rate": 4.438628290304841e-05, "loss": 2.4078, "step": 2267000 }, { "epoch": 11.23, "learning_rate": 4.438504431662233e-05, "loss": 2.4107, "step": 2267500 }, { "epoch": 11.24, "learning_rate": 4.4383805730196245e-05, "loss": 2.412, "step": 2268000 }, { "epoch": 11.24, "learning_rate": 4.438256714377016e-05, "loss": 2.4134, "step": 2268500 }, { "epoch": 11.24, "learning_rate": 4.4381331034516924e-05, "loss": 2.4223, "step": 2269000 }, { "epoch": 11.24, "learning_rate": 4.438009244809084e-05, "loss": 2.4328, "step": 2269500 }, { "epoch": 11.25, "learning_rate": 4.437885386166476e-05, "loss": 2.4224, "step": 2270000 }, { "epoch": 11.25, "learning_rate": 4.4377615275238675e-05, "loss": 2.4191, "step": 2270500 }, { "epoch": 11.25, "learning_rate": 4.437637916598545e-05, "loss": 2.4188, "step": 2271000 }, { "epoch": 11.25, "learning_rate": 4.437514057955936e-05, "loss": 2.4173, "step": 2271500 }, { "epoch": 11.26, "learning_rate": 4.437390199313328e-05, "loss": 2.3998, "step": 2272000 }, { "epoch": 11.26, "learning_rate": 4.4372663406707195e-05, "loss": 2.4385, "step": 2272500 }, { "epoch": 11.26, "learning_rate": 4.437142482028111e-05, "loss": 2.3898, "step": 2273000 }, { "epoch": 11.26, "learning_rate": 4.437018623385503e-05, "loss": 2.4457, "step": 2273500 }, { "epoch": 11.27, "learning_rate": 4.4368947647428945e-05, "loss": 2.4537, "step": 2274000 }, { "epoch": 11.27, "learning_rate": 4.436770906100286e-05, "loss": 2.4273, "step": 2274500 }, { "epoch": 11.27, "learning_rate": 4.436647047457678e-05, "loss": 2.4318, "step": 2275000 }, { "epoch": 11.27, "learning_rate": 4.4365231888150696e-05, "loss": 2.4194, "step": 2275500 }, { "epoch": 11.28, "learning_rate": 4.436399330172461e-05, "loss": 2.4104, "step": 2276000 }, { "epoch": 11.28, "learning_rate": 4.4362757192471375e-05, "loss": 2.4375, "step": 2276500 }, { "epoch": 11.28, "learning_rate": 4.436152108321815e-05, "loss": 2.4367, "step": 2277000 }, { "epoch": 11.28, "learning_rate": 4.436028249679207e-05, "loss": 2.4193, "step": 2277500 }, { "epoch": 11.29, "learning_rate": 4.435904638753883e-05, "loss": 2.4272, "step": 2278000 }, { "epoch": 11.29, "learning_rate": 4.4357807801112747e-05, "loss": 2.441, "step": 2278500 }, { "epoch": 11.29, "learning_rate": 4.4356569214686664e-05, "loss": 2.4035, "step": 2279000 }, { "epoch": 11.29, "learning_rate": 4.435533062826058e-05, "loss": 2.4012, "step": 2279500 }, { "epoch": 11.3, "learning_rate": 4.43540920418345e-05, "loss": 2.4267, "step": 2280000 }, { "epoch": 11.3, "learning_rate": 4.4352853455408414e-05, "loss": 2.4075, "step": 2280500 }, { "epoch": 11.3, "learning_rate": 4.4351614868982324e-05, "loss": 2.4112, "step": 2281000 }, { "epoch": 11.3, "learning_rate": 4.435037628255624e-05, "loss": 2.4298, "step": 2281500 }, { "epoch": 11.31, "learning_rate": 4.434913769613016e-05, "loss": 2.4208, "step": 2282000 }, { "epoch": 11.31, "learning_rate": 4.4347899109704075e-05, "loss": 2.4449, "step": 2282500 }, { "epoch": 11.31, "learning_rate": 4.434666052327799e-05, "loss": 2.4218, "step": 2283000 }, { "epoch": 11.31, "learning_rate": 4.434542193685191e-05, "loss": 2.4302, "step": 2283500 }, { "epoch": 11.32, "learning_rate": 4.4344183350425826e-05, "loss": 2.4257, "step": 2284000 }, { "epoch": 11.32, "learning_rate": 4.4342947241172595e-05, "loss": 2.4383, "step": 2284500 }, { "epoch": 11.32, "learning_rate": 4.434170865474651e-05, "loss": 2.3936, "step": 2285000 }, { "epoch": 11.32, "learning_rate": 4.434047254549328e-05, "loss": 2.4184, "step": 2285500 }, { "epoch": 11.33, "learning_rate": 4.43392339590672e-05, "loss": 2.413, "step": 2286000 }, { "epoch": 11.33, "learning_rate": 4.4337995372641114e-05, "loss": 2.4455, "step": 2286500 }, { "epoch": 11.33, "learning_rate": 4.4336756786215025e-05, "loss": 2.4197, "step": 2287000 }, { "epoch": 11.33, "learning_rate": 4.433551819978894e-05, "loss": 2.4045, "step": 2287500 }, { "epoch": 11.34, "learning_rate": 4.433427961336286e-05, "loss": 2.4081, "step": 2288000 }, { "epoch": 11.34, "learning_rate": 4.4333041026936775e-05, "loss": 2.4123, "step": 2288500 }, { "epoch": 11.34, "learning_rate": 4.433180244051069e-05, "loss": 2.4212, "step": 2289000 }, { "epoch": 11.34, "learning_rate": 4.433056633125747e-05, "loss": 2.4364, "step": 2289500 }, { "epoch": 11.35, "learning_rate": 4.4329327744831385e-05, "loss": 2.4244, "step": 2290000 }, { "epoch": 11.35, "learning_rate": 4.4328091635578154e-05, "loss": 2.439, "step": 2290500 }, { "epoch": 11.35, "learning_rate": 4.432685304915207e-05, "loss": 2.4081, "step": 2291000 }, { "epoch": 11.35, "learning_rate": 4.432561446272598e-05, "loss": 2.4084, "step": 2291500 }, { "epoch": 11.36, "learning_rate": 4.43243758762999e-05, "loss": 2.4155, "step": 2292000 }, { "epoch": 11.36, "learning_rate": 4.4323137289873815e-05, "loss": 2.4206, "step": 2292500 }, { "epoch": 11.36, "learning_rate": 4.432189870344773e-05, "loss": 2.4201, "step": 2293000 }, { "epoch": 11.36, "learning_rate": 4.43206625941945e-05, "loss": 2.4092, "step": 2293500 }, { "epoch": 11.37, "learning_rate": 4.431942400776842e-05, "loss": 2.4223, "step": 2294000 }, { "epoch": 11.37, "learning_rate": 4.4318185421342334e-05, "loss": 2.4183, "step": 2294500 }, { "epoch": 11.37, "learning_rate": 4.431694683491625e-05, "loss": 2.424, "step": 2295000 }, { "epoch": 11.37, "learning_rate": 4.431570824849017e-05, "loss": 2.4262, "step": 2295500 }, { "epoch": 11.38, "learning_rate": 4.4314469662064085e-05, "loss": 2.4002, "step": 2296000 }, { "epoch": 11.38, "learning_rate": 4.4313231075637995e-05, "loss": 2.4274, "step": 2296500 }, { "epoch": 11.38, "learning_rate": 4.431199496638477e-05, "loss": 2.4295, "step": 2297000 }, { "epoch": 11.38, "learning_rate": 4.431075637995869e-05, "loss": 2.4161, "step": 2297500 }, { "epoch": 11.39, "learning_rate": 4.4309517793532605e-05, "loss": 2.4199, "step": 2298000 }, { "epoch": 11.39, "learning_rate": 4.4308279207106515e-05, "loss": 2.4241, "step": 2298500 }, { "epoch": 11.39, "learning_rate": 4.430704062068043e-05, "loss": 2.4267, "step": 2299000 }, { "epoch": 11.39, "learning_rate": 4.430580203425435e-05, "loss": 2.4256, "step": 2299500 }, { "epoch": 11.39, "learning_rate": 4.4304563447828265e-05, "loss": 2.4038, "step": 2300000 }, { "epoch": 11.4, "learning_rate": 4.4303324861402176e-05, "loss": 2.419, "step": 2300500 }, { "epoch": 11.4, "learning_rate": 4.430208627497609e-05, "loss": 2.4068, "step": 2301000 }, { "epoch": 11.4, "learning_rate": 4.430085016572287e-05, "loss": 2.4023, "step": 2301500 }, { "epoch": 11.4, "learning_rate": 4.4299611579296785e-05, "loss": 2.4277, "step": 2302000 }, { "epoch": 11.41, "learning_rate": 4.42983729928707e-05, "loss": 2.4221, "step": 2302500 }, { "epoch": 11.41, "learning_rate": 4.429713440644461e-05, "loss": 2.429, "step": 2303000 }, { "epoch": 11.41, "learning_rate": 4.429589582001853e-05, "loss": 2.429, "step": 2303500 }, { "epoch": 11.41, "learning_rate": 4.4294657233592446e-05, "loss": 2.4134, "step": 2304000 }, { "epoch": 11.42, "learning_rate": 4.429341864716636e-05, "loss": 2.4422, "step": 2304500 }, { "epoch": 11.42, "learning_rate": 4.429218253791313e-05, "loss": 2.438, "step": 2305000 }, { "epoch": 11.42, "learning_rate": 4.429094395148705e-05, "loss": 2.4249, "step": 2305500 }, { "epoch": 11.42, "learning_rate": 4.428970784223382e-05, "loss": 2.4145, "step": 2306000 }, { "epoch": 11.43, "learning_rate": 4.4288469255807734e-05, "loss": 2.4202, "step": 2306500 }, { "epoch": 11.43, "learning_rate": 4.428723066938165e-05, "loss": 2.4222, "step": 2307000 }, { "epoch": 11.43, "learning_rate": 4.428599208295557e-05, "loss": 2.4162, "step": 2307500 }, { "epoch": 11.43, "learning_rate": 4.428475597370234e-05, "loss": 2.4277, "step": 2308000 }, { "epoch": 11.44, "learning_rate": 4.4283517387276254e-05, "loss": 2.4503, "step": 2308500 }, { "epoch": 11.44, "learning_rate": 4.4282281278023016e-05, "loss": 2.4174, "step": 2309000 }, { "epoch": 11.44, "learning_rate": 4.428104269159693e-05, "loss": 2.4125, "step": 2309500 }, { "epoch": 11.44, "learning_rate": 4.427980410517085e-05, "loss": 2.4267, "step": 2310000 }, { "epoch": 11.45, "learning_rate": 4.427856551874477e-05, "loss": 2.4294, "step": 2310500 }, { "epoch": 11.45, "learning_rate": 4.4277326932318684e-05, "loss": 2.4163, "step": 2311000 }, { "epoch": 11.45, "learning_rate": 4.42760883458926e-05, "loss": 2.406, "step": 2311500 }, { "epoch": 11.45, "learning_rate": 4.427484975946652e-05, "loss": 2.4035, "step": 2312000 }, { "epoch": 11.46, "learning_rate": 4.4273611173040435e-05, "loss": 2.4194, "step": 2312500 }, { "epoch": 11.46, "learning_rate": 4.42723750637872e-05, "loss": 2.4203, "step": 2313000 }, { "epoch": 11.46, "learning_rate": 4.427113647736112e-05, "loss": 2.4323, "step": 2313500 }, { "epoch": 11.46, "learning_rate": 4.426989789093504e-05, "loss": 2.4078, "step": 2314000 }, { "epoch": 11.47, "learning_rate": 4.4268659304508954e-05, "loss": 2.4321, "step": 2314500 }, { "epoch": 11.47, "learning_rate": 4.426742071808287e-05, "loss": 2.4087, "step": 2315000 }, { "epoch": 11.47, "learning_rate": 4.426618213165679e-05, "loss": 2.4277, "step": 2315500 }, { "epoch": 11.47, "learning_rate": 4.4264943545230705e-05, "loss": 2.41, "step": 2316000 }, { "epoch": 11.48, "learning_rate": 4.426370495880462e-05, "loss": 2.4286, "step": 2316500 }, { "epoch": 11.48, "learning_rate": 4.426246637237854e-05, "loss": 2.4222, "step": 2317000 }, { "epoch": 11.48, "learning_rate": 4.4261227785952456e-05, "loss": 2.4014, "step": 2317500 }, { "epoch": 11.48, "learning_rate": 4.425998919952637e-05, "loss": 2.4204, "step": 2318000 }, { "epoch": 11.49, "learning_rate": 4.425875061310028e-05, "loss": 2.3936, "step": 2318500 }, { "epoch": 11.49, "learning_rate": 4.42575120266742e-05, "loss": 2.4184, "step": 2319000 }, { "epoch": 11.49, "learning_rate": 4.425627591742097e-05, "loss": 2.4393, "step": 2319500 }, { "epoch": 11.49, "learning_rate": 4.4255037330994885e-05, "loss": 2.4238, "step": 2320000 }, { "epoch": 11.5, "learning_rate": 4.4253801221741654e-05, "loss": 2.4157, "step": 2320500 }, { "epoch": 11.5, "learning_rate": 4.425256263531557e-05, "loss": 2.4123, "step": 2321000 }, { "epoch": 11.5, "learning_rate": 4.425132404888949e-05, "loss": 2.4241, "step": 2321500 }, { "epoch": 11.5, "learning_rate": 4.425008793963625e-05, "loss": 2.4004, "step": 2322000 }, { "epoch": 11.51, "learning_rate": 4.424884935321017e-05, "loss": 2.4401, "step": 2322500 }, { "epoch": 11.51, "learning_rate": 4.4247610766784084e-05, "loss": 2.4317, "step": 2323000 }, { "epoch": 11.51, "learning_rate": 4.4246372180358e-05, "loss": 2.4115, "step": 2323500 }, { "epoch": 11.51, "learning_rate": 4.424513359393192e-05, "loss": 2.4012, "step": 2324000 }, { "epoch": 11.52, "learning_rate": 4.424389748467869e-05, "loss": 2.4115, "step": 2324500 }, { "epoch": 11.52, "learning_rate": 4.4242658898252604e-05, "loss": 2.392, "step": 2325000 }, { "epoch": 11.52, "learning_rate": 4.424142031182652e-05, "loss": 2.4307, "step": 2325500 }, { "epoch": 11.52, "learning_rate": 4.424018172540044e-05, "loss": 2.4094, "step": 2326000 }, { "epoch": 11.53, "learning_rate": 4.4238943138974354e-05, "loss": 2.4216, "step": 2326500 }, { "epoch": 11.53, "learning_rate": 4.423770455254827e-05, "loss": 2.4106, "step": 2327000 }, { "epoch": 11.53, "learning_rate": 4.423646596612219e-05, "loss": 2.4049, "step": 2327500 }, { "epoch": 11.53, "learning_rate": 4.4235227379696105e-05, "loss": 2.4113, "step": 2328000 }, { "epoch": 11.54, "learning_rate": 4.423398879327002e-05, "loss": 2.4132, "step": 2328500 }, { "epoch": 11.54, "learning_rate": 4.423275020684394e-05, "loss": 2.4204, "step": 2329000 }, { "epoch": 11.54, "learning_rate": 4.42315140975907e-05, "loss": 2.4299, "step": 2329500 }, { "epoch": 11.54, "learning_rate": 4.423027551116462e-05, "loss": 2.419, "step": 2330000 }, { "epoch": 11.55, "learning_rate": 4.4229036924738535e-05, "loss": 2.4446, "step": 2330500 }, { "epoch": 11.55, "learning_rate": 4.422779833831245e-05, "loss": 2.4427, "step": 2331000 }, { "epoch": 11.55, "learning_rate": 4.422655975188637e-05, "loss": 2.4376, "step": 2331500 }, { "epoch": 11.55, "learning_rate": 4.4225321165460286e-05, "loss": 2.4157, "step": 2332000 }, { "epoch": 11.56, "learning_rate": 4.42240825790342e-05, "loss": 2.4434, "step": 2332500 }, { "epoch": 11.56, "learning_rate": 4.422284399260812e-05, "loss": 2.4163, "step": 2333000 }, { "epoch": 11.56, "learning_rate": 4.4221605406182036e-05, "loss": 2.4218, "step": 2333500 }, { "epoch": 11.56, "learning_rate": 4.4220369296928805e-05, "loss": 2.4099, "step": 2334000 }, { "epoch": 11.57, "learning_rate": 4.421913071050272e-05, "loss": 2.4475, "step": 2334500 }, { "epoch": 11.57, "learning_rate": 4.421789460124949e-05, "loss": 2.4169, "step": 2335000 }, { "epoch": 11.57, "learning_rate": 4.42166560148234e-05, "loss": 2.4357, "step": 2335500 }, { "epoch": 11.57, "learning_rate": 4.421541742839732e-05, "loss": 2.4186, "step": 2336000 }, { "epoch": 11.58, "learning_rate": 4.4214178841971235e-05, "loss": 2.438, "step": 2336500 }, { "epoch": 11.58, "learning_rate": 4.421294025554515e-05, "loss": 2.4189, "step": 2337000 }, { "epoch": 11.58, "learning_rate": 4.421170414629192e-05, "loss": 2.4401, "step": 2337500 }, { "epoch": 11.58, "learning_rate": 4.421046555986584e-05, "loss": 2.4355, "step": 2338000 }, { "epoch": 11.59, "learning_rate": 4.4209226973439755e-05, "loss": 2.4278, "step": 2338500 }, { "epoch": 11.59, "learning_rate": 4.420798838701367e-05, "loss": 2.4345, "step": 2339000 }, { "epoch": 11.59, "learning_rate": 4.420674980058759e-05, "loss": 2.4363, "step": 2339500 }, { "epoch": 11.59, "learning_rate": 4.4205511214161505e-05, "loss": 2.4035, "step": 2340000 }, { "epoch": 11.6, "learning_rate": 4.4204275104908274e-05, "loss": 2.4417, "step": 2340500 }, { "epoch": 11.6, "learning_rate": 4.420303651848219e-05, "loss": 2.4219, "step": 2341000 }, { "epoch": 11.6, "learning_rate": 4.420179793205611e-05, "loss": 2.4465, "step": 2341500 }, { "epoch": 11.6, "learning_rate": 4.420056182280287e-05, "loss": 2.4282, "step": 2342000 }, { "epoch": 11.61, "learning_rate": 4.419932323637679e-05, "loss": 2.4328, "step": 2342500 }, { "epoch": 11.61, "learning_rate": 4.4198084649950704e-05, "loss": 2.4098, "step": 2343000 }, { "epoch": 11.61, "learning_rate": 4.419684606352462e-05, "loss": 2.4214, "step": 2343500 }, { "epoch": 11.61, "learning_rate": 4.419560747709854e-05, "loss": 2.442, "step": 2344000 }, { "epoch": 11.62, "learning_rate": 4.4194368890672455e-05, "loss": 2.4259, "step": 2344500 }, { "epoch": 11.62, "learning_rate": 4.419313030424637e-05, "loss": 2.4322, "step": 2345000 }, { "epoch": 11.62, "learning_rate": 4.419189171782029e-05, "loss": 2.4194, "step": 2345500 }, { "epoch": 11.62, "learning_rate": 4.4190653131394206e-05, "loss": 2.4452, "step": 2346000 }, { "epoch": 11.63, "learning_rate": 4.418941454496812e-05, "loss": 2.4208, "step": 2346500 }, { "epoch": 11.63, "learning_rate": 4.418817595854204e-05, "loss": 2.4285, "step": 2347000 }, { "epoch": 11.63, "learning_rate": 4.418693984928881e-05, "loss": 2.4065, "step": 2347500 }, { "epoch": 11.63, "learning_rate": 4.418570374003557e-05, "loss": 2.42, "step": 2348000 }, { "epoch": 11.64, "learning_rate": 4.418446515360949e-05, "loss": 2.4344, "step": 2348500 }, { "epoch": 11.64, "learning_rate": 4.4183226567183404e-05, "loss": 2.4254, "step": 2349000 }, { "epoch": 11.64, "learning_rate": 4.418198798075732e-05, "loss": 2.4336, "step": 2349500 }, { "epoch": 11.64, "learning_rate": 4.418074939433124e-05, "loss": 2.4124, "step": 2350000 }, { "epoch": 11.65, "learning_rate": 4.4179510807905155e-05, "loss": 2.4268, "step": 2350500 }, { "epoch": 11.65, "learning_rate": 4.417827222147907e-05, "loss": 2.4369, "step": 2351000 }, { "epoch": 11.65, "learning_rate": 4.417703363505299e-05, "loss": 2.4431, "step": 2351500 }, { "epoch": 11.65, "learning_rate": 4.4175795048626906e-05, "loss": 2.4248, "step": 2352000 }, { "epoch": 11.66, "learning_rate": 4.417455646220082e-05, "loss": 2.3927, "step": 2352500 }, { "epoch": 11.66, "learning_rate": 4.417331787577474e-05, "loss": 2.4374, "step": 2353000 }, { "epoch": 11.66, "learning_rate": 4.4172079289348656e-05, "loss": 2.4195, "step": 2353500 }, { "epoch": 11.66, "learning_rate": 4.417084070292257e-05, "loss": 2.4295, "step": 2354000 }, { "epoch": 11.67, "learning_rate": 4.416960459366934e-05, "loss": 2.4114, "step": 2354500 }, { "epoch": 11.67, "learning_rate": 4.416836600724326e-05, "loss": 2.4245, "step": 2355000 }, { "epoch": 11.67, "learning_rate": 4.416712742081717e-05, "loss": 2.4333, "step": 2355500 }, { "epoch": 11.67, "learning_rate": 4.416589131156394e-05, "loss": 2.4191, "step": 2356000 }, { "epoch": 11.67, "learning_rate": 4.4164652725137855e-05, "loss": 2.3961, "step": 2356500 }, { "epoch": 11.68, "learning_rate": 4.4163416615884624e-05, "loss": 2.4087, "step": 2357000 }, { "epoch": 11.68, "learning_rate": 4.416217802945854e-05, "loss": 2.4227, "step": 2357500 }, { "epoch": 11.68, "learning_rate": 4.416093944303246e-05, "loss": 2.4177, "step": 2358000 }, { "epoch": 11.68, "learning_rate": 4.4159700856606375e-05, "loss": 2.4067, "step": 2358500 }, { "epoch": 11.69, "learning_rate": 4.415846227018029e-05, "loss": 2.4215, "step": 2359000 }, { "epoch": 11.69, "learning_rate": 4.4157226160927054e-05, "loss": 2.4338, "step": 2359500 }, { "epoch": 11.69, "learning_rate": 4.415598757450097e-05, "loss": 2.4406, "step": 2360000 }, { "epoch": 11.69, "learning_rate": 4.415474898807489e-05, "loss": 2.4358, "step": 2360500 }, { "epoch": 11.7, "learning_rate": 4.4153510401648804e-05, "loss": 2.4452, "step": 2361000 }, { "epoch": 11.7, "learning_rate": 4.415227181522272e-05, "loss": 2.4173, "step": 2361500 }, { "epoch": 11.7, "learning_rate": 4.415103322879664e-05, "loss": 2.4111, "step": 2362000 }, { "epoch": 11.7, "learning_rate": 4.4149794642370555e-05, "loss": 2.4227, "step": 2362500 }, { "epoch": 11.71, "learning_rate": 4.414855605594447e-05, "loss": 2.4011, "step": 2363000 }, { "epoch": 11.71, "learning_rate": 4.414731994669124e-05, "loss": 2.4455, "step": 2363500 }, { "epoch": 11.71, "learning_rate": 4.414608136026516e-05, "loss": 2.4459, "step": 2364000 }, { "epoch": 11.71, "learning_rate": 4.4144842773839075e-05, "loss": 2.4196, "step": 2364500 }, { "epoch": 11.72, "learning_rate": 4.414360418741299e-05, "loss": 2.4346, "step": 2365000 }, { "epoch": 11.72, "learning_rate": 4.414236560098691e-05, "loss": 2.4307, "step": 2365500 }, { "epoch": 11.72, "learning_rate": 4.414113196890653e-05, "loss": 2.4386, "step": 2366000 }, { "epoch": 11.72, "learning_rate": 4.4139893382480446e-05, "loss": 2.4333, "step": 2366500 }, { "epoch": 11.73, "learning_rate": 4.413865479605436e-05, "loss": 2.4199, "step": 2367000 }, { "epoch": 11.73, "learning_rate": 4.413741620962828e-05, "loss": 2.4048, "step": 2367500 }, { "epoch": 11.73, "learning_rate": 4.41361776232022e-05, "loss": 2.4399, "step": 2368000 }, { "epoch": 11.73, "learning_rate": 4.4134939036776114e-05, "loss": 2.4399, "step": 2368500 }, { "epoch": 11.74, "learning_rate": 4.4133700450350024e-05, "loss": 2.4265, "step": 2369000 }, { "epoch": 11.74, "learning_rate": 4.413246186392394e-05, "loss": 2.4374, "step": 2369500 }, { "epoch": 11.74, "learning_rate": 4.413122327749786e-05, "loss": 2.4099, "step": 2370000 }, { "epoch": 11.74, "learning_rate": 4.4129984691071775e-05, "loss": 2.3965, "step": 2370500 }, { "epoch": 11.75, "learning_rate": 4.4128748581818544e-05, "loss": 2.4322, "step": 2371000 }, { "epoch": 11.75, "learning_rate": 4.412750999539246e-05, "loss": 2.4613, "step": 2371500 }, { "epoch": 11.75, "learning_rate": 4.412627140896638e-05, "loss": 2.4158, "step": 2372000 }, { "epoch": 11.75, "learning_rate": 4.412503282254029e-05, "loss": 2.4332, "step": 2372500 }, { "epoch": 11.76, "learning_rate": 4.4123794236114205e-05, "loss": 2.4335, "step": 2373000 }, { "epoch": 11.76, "learning_rate": 4.412255812686098e-05, "loss": 2.4264, "step": 2373500 }, { "epoch": 11.76, "learning_rate": 4.41213195404349e-05, "loss": 2.3871, "step": 2374000 }, { "epoch": 11.76, "learning_rate": 4.4120080954008814e-05, "loss": 2.4349, "step": 2374500 }, { "epoch": 11.77, "learning_rate": 4.411884236758273e-05, "loss": 2.4322, "step": 2375000 }, { "epoch": 11.77, "learning_rate": 4.411760378115664e-05, "loss": 2.4205, "step": 2375500 }, { "epoch": 11.77, "learning_rate": 4.411637014907626e-05, "loss": 2.427, "step": 2376000 }, { "epoch": 11.77, "learning_rate": 4.411513403982303e-05, "loss": 2.4348, "step": 2376500 }, { "epoch": 11.78, "learning_rate": 4.411389545339695e-05, "loss": 2.4465, "step": 2377000 }, { "epoch": 11.78, "learning_rate": 4.4112656866970864e-05, "loss": 2.4235, "step": 2377500 }, { "epoch": 11.78, "learning_rate": 4.411141828054478e-05, "loss": 2.4328, "step": 2378000 }, { "epoch": 11.78, "learning_rate": 4.41101796941187e-05, "loss": 2.3982, "step": 2378500 }, { "epoch": 11.79, "learning_rate": 4.4108941107692615e-05, "loss": 2.4139, "step": 2379000 }, { "epoch": 11.79, "learning_rate": 4.410770252126653e-05, "loss": 2.4416, "step": 2379500 }, { "epoch": 11.79, "learning_rate": 4.410646393484045e-05, "loss": 2.4149, "step": 2380000 }, { "epoch": 11.79, "learning_rate": 4.4105225348414366e-05, "loss": 2.4093, "step": 2380500 }, { "epoch": 11.8, "learning_rate": 4.410398676198828e-05, "loss": 2.4322, "step": 2381000 }, { "epoch": 11.8, "learning_rate": 4.41027481755622e-05, "loss": 2.4391, "step": 2381500 }, { "epoch": 11.8, "learning_rate": 4.410150958913612e-05, "loss": 2.4264, "step": 2382000 }, { "epoch": 11.8, "learning_rate": 4.4100271002710034e-05, "loss": 2.4302, "step": 2382500 }, { "epoch": 11.81, "learning_rate": 4.409903241628395e-05, "loss": 2.4337, "step": 2383000 }, { "epoch": 11.81, "learning_rate": 4.409779630703071e-05, "loss": 2.404, "step": 2383500 }, { "epoch": 11.81, "learning_rate": 4.409655772060463e-05, "loss": 2.4127, "step": 2384000 }, { "epoch": 11.81, "learning_rate": 4.4095319134178547e-05, "loss": 2.4261, "step": 2384500 }, { "epoch": 11.82, "learning_rate": 4.4094080547752463e-05, "loss": 2.4263, "step": 2385000 }, { "epoch": 11.82, "learning_rate": 4.409284196132638e-05, "loss": 2.4344, "step": 2385500 }, { "epoch": 11.82, "learning_rate": 4.4091608329246e-05, "loss": 2.4243, "step": 2386000 }, { "epoch": 11.82, "learning_rate": 4.409036974281992e-05, "loss": 2.4161, "step": 2386500 }, { "epoch": 11.83, "learning_rate": 4.408913115639383e-05, "loss": 2.4354, "step": 2387000 }, { "epoch": 11.83, "learning_rate": 4.4087895047140604e-05, "loss": 2.398, "step": 2387500 }, { "epoch": 11.83, "learning_rate": 4.408665646071452e-05, "loss": 2.4091, "step": 2388000 }, { "epoch": 11.83, "learning_rate": 4.408541787428843e-05, "loss": 2.4046, "step": 2388500 }, { "epoch": 11.84, "learning_rate": 4.408417928786235e-05, "loss": 2.4116, "step": 2389000 }, { "epoch": 11.84, "learning_rate": 4.4082940701436265e-05, "loss": 2.4345, "step": 2389500 }, { "epoch": 11.84, "learning_rate": 4.408170211501018e-05, "loss": 2.4339, "step": 2390000 }, { "epoch": 11.84, "learning_rate": 4.40804635285841e-05, "loss": 2.4014, "step": 2390500 }, { "epoch": 11.85, "learning_rate": 4.4079224942158015e-05, "loss": 2.4364, "step": 2391000 }, { "epoch": 11.85, "learning_rate": 4.407798635573193e-05, "loss": 2.4408, "step": 2391500 }, { "epoch": 11.85, "learning_rate": 4.407674776930585e-05, "loss": 2.4218, "step": 2392000 }, { "epoch": 11.85, "learning_rate": 4.4075509182879766e-05, "loss": 2.4203, "step": 2392500 }, { "epoch": 11.86, "learning_rate": 4.407427059645368e-05, "loss": 2.4449, "step": 2393000 }, { "epoch": 11.86, "learning_rate": 4.40730320100276e-05, "loss": 2.4359, "step": 2393500 }, { "epoch": 11.86, "learning_rate": 4.407179342360152e-05, "loss": 2.4611, "step": 2394000 }, { "epoch": 11.86, "learning_rate": 4.4070554837175434e-05, "loss": 2.4342, "step": 2394500 }, { "epoch": 11.87, "learning_rate": 4.406931625074935e-05, "loss": 2.4223, "step": 2395000 }, { "epoch": 11.87, "learning_rate": 4.406807766432327e-05, "loss": 2.4426, "step": 2395500 }, { "epoch": 11.87, "learning_rate": 4.4066839077897185e-05, "loss": 2.4305, "step": 2396000 }, { "epoch": 11.87, "learning_rate": 4.406560296864395e-05, "loss": 2.4321, "step": 2396500 }, { "epoch": 11.88, "learning_rate": 4.4064366859390716e-05, "loss": 2.4089, "step": 2397000 }, { "epoch": 11.88, "learning_rate": 4.4063133227310336e-05, "loss": 2.414, "step": 2397500 }, { "epoch": 11.88, "learning_rate": 4.406189464088425e-05, "loss": 2.4312, "step": 2398000 }, { "epoch": 11.88, "learning_rate": 4.406065605445817e-05, "loss": 2.405, "step": 2398500 }, { "epoch": 11.89, "learning_rate": 4.405941746803209e-05, "loss": 2.4159, "step": 2399000 }, { "epoch": 11.89, "learning_rate": 4.4058178881606004e-05, "loss": 2.417, "step": 2399500 }, { "epoch": 11.89, "learning_rate": 4.405694277235277e-05, "loss": 2.3933, "step": 2400000 }, { "epoch": 11.89, "learning_rate": 4.405570418592669e-05, "loss": 2.4296, "step": 2400500 }, { "epoch": 11.9, "learning_rate": 4.405446559950061e-05, "loss": 2.4389, "step": 2401000 }, { "epoch": 11.9, "learning_rate": 4.4053227013074524e-05, "loss": 2.4468, "step": 2401500 }, { "epoch": 11.9, "learning_rate": 4.405198842664844e-05, "loss": 2.4335, "step": 2402000 }, { "epoch": 11.9, "learning_rate": 4.405074984022236e-05, "loss": 2.4179, "step": 2402500 }, { "epoch": 11.91, "learning_rate": 4.4049511253796274e-05, "loss": 2.4189, "step": 2403000 }, { "epoch": 11.91, "learning_rate": 4.404827266737019e-05, "loss": 2.4311, "step": 2403500 }, { "epoch": 11.91, "learning_rate": 4.40470340809441e-05, "loss": 2.4236, "step": 2404000 }, { "epoch": 11.91, "learning_rate": 4.404579549451802e-05, "loss": 2.4249, "step": 2404500 }, { "epoch": 11.92, "learning_rate": 4.4044556908091935e-05, "loss": 2.4524, "step": 2405000 }, { "epoch": 11.92, "learning_rate": 4.404331832166585e-05, "loss": 2.4291, "step": 2405500 }, { "epoch": 11.92, "learning_rate": 4.404207973523977e-05, "loss": 2.4173, "step": 2406000 }, { "epoch": 11.92, "learning_rate": 4.4040841148813686e-05, "loss": 2.448, "step": 2406500 }, { "epoch": 11.93, "learning_rate": 4.4039602562387596e-05, "loss": 2.4346, "step": 2407000 }, { "epoch": 11.93, "learning_rate": 4.403836397596151e-05, "loss": 2.4152, "step": 2407500 }, { "epoch": 11.93, "learning_rate": 4.403712538953543e-05, "loss": 2.4002, "step": 2408000 }, { "epoch": 11.93, "learning_rate": 4.40358892802822e-05, "loss": 2.419, "step": 2408500 }, { "epoch": 11.94, "learning_rate": 4.4034653171028975e-05, "loss": 2.4215, "step": 2409000 }, { "epoch": 11.94, "learning_rate": 4.403341458460289e-05, "loss": 2.4047, "step": 2409500 }, { "epoch": 11.94, "learning_rate": 4.4032178475349653e-05, "loss": 2.4267, "step": 2410000 }, { "epoch": 11.94, "learning_rate": 4.403093988892357e-05, "loss": 2.4454, "step": 2410500 }, { "epoch": 11.94, "learning_rate": 4.402970130249749e-05, "loss": 2.3954, "step": 2411000 }, { "epoch": 11.95, "learning_rate": 4.4028462716071404e-05, "loss": 2.4412, "step": 2411500 }, { "epoch": 11.95, "learning_rate": 4.402722412964532e-05, "loss": 2.4209, "step": 2412000 }, { "epoch": 11.95, "learning_rate": 4.402598554321924e-05, "loss": 2.4415, "step": 2412500 }, { "epoch": 11.95, "learning_rate": 4.4024746956793155e-05, "loss": 2.408, "step": 2413000 }, { "epoch": 11.96, "learning_rate": 4.4023508370367065e-05, "loss": 2.4469, "step": 2413500 }, { "epoch": 11.96, "learning_rate": 4.402226978394098e-05, "loss": 2.4386, "step": 2414000 }, { "epoch": 11.96, "learning_rate": 4.402103367468776e-05, "loss": 2.4394, "step": 2414500 }, { "epoch": 11.96, "learning_rate": 4.401979756543452e-05, "loss": 2.426, "step": 2415000 }, { "epoch": 11.97, "learning_rate": 4.401855897900844e-05, "loss": 2.4471, "step": 2415500 }, { "epoch": 11.97, "learning_rate": 4.4017320392582354e-05, "loss": 2.4293, "step": 2416000 }, { "epoch": 11.97, "learning_rate": 4.401608180615627e-05, "loss": 2.4297, "step": 2416500 }, { "epoch": 11.97, "learning_rate": 4.401484321973019e-05, "loss": 2.4321, "step": 2417000 }, { "epoch": 11.98, "learning_rate": 4.4013604633304104e-05, "loss": 2.4261, "step": 2417500 }, { "epoch": 11.98, "learning_rate": 4.401236604687802e-05, "loss": 2.4338, "step": 2418000 }, { "epoch": 11.98, "learning_rate": 4.401112746045194e-05, "loss": 2.4304, "step": 2418500 }, { "epoch": 11.98, "learning_rate": 4.400989135119871e-05, "loss": 2.3984, "step": 2419000 }, { "epoch": 11.99, "learning_rate": 4.4008652764772624e-05, "loss": 2.437, "step": 2419500 }, { "epoch": 11.99, "learning_rate": 4.400741665551939e-05, "loss": 2.4412, "step": 2420000 }, { "epoch": 11.99, "learning_rate": 4.400617806909331e-05, "loss": 2.4605, "step": 2420500 }, { "epoch": 11.99, "learning_rate": 4.400493948266723e-05, "loss": 2.4154, "step": 2421000 }, { "epoch": 12.0, "learning_rate": 4.400370089624114e-05, "loss": 2.4555, "step": 2421500 }, { "epoch": 12.0, "learning_rate": 4.4002462309815054e-05, "loss": 2.4047, "step": 2422000 }, { "epoch": 12.0, "eval_accuracy": 0.6447791117813911, "eval_accuracy_mlm": 0.5983567225929853, "eval_accuracy_nsp": 0.8637231868653391, "eval_loss": 2.398054361343384, "eval_runtime": 146.0821, "eval_samples_per_second": 1745.314, "eval_steps_per_second": 72.726, "step": 2422116 }, { "epoch": 12.0, "learning_rate": 4.400122372338897e-05, "loss": 2.3898, "step": 2422500 }, { "epoch": 12.0, "learning_rate": 4.399998513696289e-05, "loss": 2.4349, "step": 2423000 }, { "epoch": 12.01, "learning_rate": 4.3998746550536804e-05, "loss": 2.3875, "step": 2423500 }, { "epoch": 12.01, "learning_rate": 4.399750796411072e-05, "loss": 2.3812, "step": 2424000 }, { "epoch": 12.01, "learning_rate": 4.399626937768464e-05, "loss": 2.3915, "step": 2424500 }, { "epoch": 12.01, "learning_rate": 4.3995030791258555e-05, "loss": 2.4071, "step": 2425000 }, { "epoch": 12.02, "learning_rate": 4.399379220483247e-05, "loss": 2.3903, "step": 2425500 }, { "epoch": 12.02, "learning_rate": 4.399255609557924e-05, "loss": 2.3863, "step": 2426000 }, { "epoch": 12.02, "learning_rate": 4.399131998632601e-05, "loss": 2.3889, "step": 2426500 }, { "epoch": 12.02, "learning_rate": 4.399008139989993e-05, "loss": 2.3892, "step": 2427000 }, { "epoch": 12.03, "learning_rate": 4.3988842813473844e-05, "loss": 2.4049, "step": 2427500 }, { "epoch": 12.03, "learning_rate": 4.398760422704776e-05, "loss": 2.3953, "step": 2428000 }, { "epoch": 12.03, "learning_rate": 4.398636564062167e-05, "loss": 2.3994, "step": 2428500 }, { "epoch": 12.03, "learning_rate": 4.398512705419559e-05, "loss": 2.3911, "step": 2429000 }, { "epoch": 12.04, "learning_rate": 4.3983888467769505e-05, "loss": 2.4036, "step": 2429500 }, { "epoch": 12.04, "learning_rate": 4.3982652358516273e-05, "loss": 2.3935, "step": 2430000 }, { "epoch": 12.04, "learning_rate": 4.398141624926304e-05, "loss": 2.4119, "step": 2430500 }, { "epoch": 12.04, "learning_rate": 4.398018014000981e-05, "loss": 2.4128, "step": 2431000 }, { "epoch": 12.05, "learning_rate": 4.397894155358373e-05, "loss": 2.3834, "step": 2431500 }, { "epoch": 12.05, "learning_rate": 4.3977702967157645e-05, "loss": 2.402, "step": 2432000 }, { "epoch": 12.05, "learning_rate": 4.397646438073156e-05, "loss": 2.3786, "step": 2432500 }, { "epoch": 12.05, "learning_rate": 4.397522579430547e-05, "loss": 2.3985, "step": 2433000 }, { "epoch": 12.06, "learning_rate": 4.397398720787939e-05, "loss": 2.3975, "step": 2433500 }, { "epoch": 12.06, "learning_rate": 4.3972748621453306e-05, "loss": 2.395, "step": 2434000 }, { "epoch": 12.06, "learning_rate": 4.397151251220008e-05, "loss": 2.3914, "step": 2434500 }, { "epoch": 12.06, "learning_rate": 4.397027640294685e-05, "loss": 2.4154, "step": 2435000 }, { "epoch": 12.07, "learning_rate": 4.396903781652077e-05, "loss": 2.4228, "step": 2435500 }, { "epoch": 12.07, "learning_rate": 4.396779923009468e-05, "loss": 2.4234, "step": 2436000 }, { "epoch": 12.07, "learning_rate": 4.3966560643668594e-05, "loss": 2.4103, "step": 2436500 }, { "epoch": 12.07, "learning_rate": 4.396532205724251e-05, "loss": 2.4163, "step": 2437000 }, { "epoch": 12.08, "learning_rate": 4.396408347081643e-05, "loss": 2.408, "step": 2437500 }, { "epoch": 12.08, "learning_rate": 4.3962844884390345e-05, "loss": 2.3927, "step": 2438000 }, { "epoch": 12.08, "learning_rate": 4.396160629796426e-05, "loss": 2.3937, "step": 2438500 }, { "epoch": 12.08, "learning_rate": 4.396036771153817e-05, "loss": 2.3874, "step": 2439000 }, { "epoch": 12.09, "learning_rate": 4.395912912511209e-05, "loss": 2.4065, "step": 2439500 }, { "epoch": 12.09, "learning_rate": 4.3957890538686006e-05, "loss": 2.3979, "step": 2440000 }, { "epoch": 12.09, "learning_rate": 4.395665442943278e-05, "loss": 2.3901, "step": 2440500 }, { "epoch": 12.09, "learning_rate": 4.39554158430067e-05, "loss": 2.4009, "step": 2441000 }, { "epoch": 12.1, "learning_rate": 4.3954177256580615e-05, "loss": 2.396, "step": 2441500 }, { "epoch": 12.1, "learning_rate": 4.3952941147327384e-05, "loss": 2.4246, "step": 2442000 }, { "epoch": 12.1, "learning_rate": 4.39517025609013e-05, "loss": 2.4031, "step": 2442500 }, { "epoch": 12.1, "learning_rate": 4.395046397447521e-05, "loss": 2.4032, "step": 2443000 }, { "epoch": 12.11, "learning_rate": 4.394922538804913e-05, "loss": 2.3833, "step": 2443500 }, { "epoch": 12.11, "learning_rate": 4.3947986801623045e-05, "loss": 2.3917, "step": 2444000 }, { "epoch": 12.11, "learning_rate": 4.394674821519696e-05, "loss": 2.4239, "step": 2444500 }, { "epoch": 12.11, "learning_rate": 4.394550962877088e-05, "loss": 2.4095, "step": 2445000 }, { "epoch": 12.12, "learning_rate": 4.394427104234479e-05, "loss": 2.4093, "step": 2445500 }, { "epoch": 12.12, "learning_rate": 4.3943032455918706e-05, "loss": 2.4138, "step": 2446000 }, { "epoch": 12.12, "learning_rate": 4.394179386949262e-05, "loss": 2.3883, "step": 2446500 }, { "epoch": 12.12, "learning_rate": 4.394055528306654e-05, "loss": 2.3971, "step": 2447000 }, { "epoch": 12.13, "learning_rate": 4.393931669664046e-05, "loss": 2.4231, "step": 2447500 }, { "epoch": 12.13, "learning_rate": 4.3938078110214374e-05, "loss": 2.4131, "step": 2448000 }, { "epoch": 12.13, "learning_rate": 4.393683952378829e-05, "loss": 2.4189, "step": 2448500 }, { "epoch": 12.13, "learning_rate": 4.393560093736221e-05, "loss": 2.3864, "step": 2449000 }, { "epoch": 12.14, "learning_rate": 4.3934362350936125e-05, "loss": 2.4134, "step": 2449500 }, { "epoch": 12.14, "learning_rate": 4.393312376451004e-05, "loss": 2.3885, "step": 2450000 }, { "epoch": 12.14, "learning_rate": 4.393188517808396e-05, "loss": 2.4138, "step": 2450500 }, { "epoch": 12.14, "learning_rate": 4.393064906883073e-05, "loss": 2.42, "step": 2451000 }, { "epoch": 12.15, "learning_rate": 4.3929410482404644e-05, "loss": 2.4087, "step": 2451500 }, { "epoch": 12.15, "learning_rate": 4.392817189597856e-05, "loss": 2.4067, "step": 2452000 }, { "epoch": 12.15, "learning_rate": 4.392693330955248e-05, "loss": 2.4229, "step": 2452500 }, { "epoch": 12.15, "learning_rate": 4.3925694723126395e-05, "loss": 2.3739, "step": 2453000 }, { "epoch": 12.16, "learning_rate": 4.392445613670031e-05, "loss": 2.3939, "step": 2453500 }, { "epoch": 12.16, "learning_rate": 4.3923220027447074e-05, "loss": 2.4108, "step": 2454000 }, { "epoch": 12.16, "learning_rate": 4.392198391819384e-05, "loss": 2.3995, "step": 2454500 }, { "epoch": 12.16, "learning_rate": 4.392074533176776e-05, "loss": 2.396, "step": 2455000 }, { "epoch": 12.17, "learning_rate": 4.3919506745341677e-05, "loss": 2.4091, "step": 2455500 }, { "epoch": 12.17, "learning_rate": 4.3918268158915594e-05, "loss": 2.4122, "step": 2456000 }, { "epoch": 12.17, "learning_rate": 4.391702957248951e-05, "loss": 2.3943, "step": 2456500 }, { "epoch": 12.17, "learning_rate": 4.391579098606343e-05, "loss": 2.4066, "step": 2457000 }, { "epoch": 12.18, "learning_rate": 4.3914552399637344e-05, "loss": 2.4244, "step": 2457500 }, { "epoch": 12.18, "learning_rate": 4.391331381321126e-05, "loss": 2.386, "step": 2458000 }, { "epoch": 12.18, "learning_rate": 4.391207522678518e-05, "loss": 2.4081, "step": 2458500 }, { "epoch": 12.18, "learning_rate": 4.3910836640359095e-05, "loss": 2.376, "step": 2459000 }, { "epoch": 12.19, "learning_rate": 4.390959805393301e-05, "loss": 2.4178, "step": 2459500 }, { "epoch": 12.19, "learning_rate": 4.3908361944679774e-05, "loss": 2.4064, "step": 2460000 }, { "epoch": 12.19, "learning_rate": 4.390712335825369e-05, "loss": 2.4019, "step": 2460500 }, { "epoch": 12.19, "learning_rate": 4.390588477182761e-05, "loss": 2.3993, "step": 2461000 }, { "epoch": 12.2, "learning_rate": 4.3904646185401525e-05, "loss": 2.3876, "step": 2461500 }, { "epoch": 12.2, "learning_rate": 4.390340759897544e-05, "loss": 2.388, "step": 2462000 }, { "epoch": 12.2, "learning_rate": 4.390216901254936e-05, "loss": 2.4285, "step": 2462500 }, { "epoch": 12.2, "learning_rate": 4.390093290329613e-05, "loss": 2.4094, "step": 2463000 }, { "epoch": 12.21, "learning_rate": 4.3899696794042896e-05, "loss": 2.3865, "step": 2463500 }, { "epoch": 12.21, "learning_rate": 4.389845820761681e-05, "loss": 2.4204, "step": 2464000 }, { "epoch": 12.21, "learning_rate": 4.389721962119073e-05, "loss": 2.4236, "step": 2464500 }, { "epoch": 12.21, "learning_rate": 4.389598103476465e-05, "loss": 2.3961, "step": 2465000 }, { "epoch": 12.21, "learning_rate": 4.3894744925511416e-05, "loss": 2.397, "step": 2465500 }, { "epoch": 12.22, "learning_rate": 4.389350633908533e-05, "loss": 2.3895, "step": 2466000 }, { "epoch": 12.22, "learning_rate": 4.389226775265925e-05, "loss": 2.4102, "step": 2466500 }, { "epoch": 12.22, "learning_rate": 4.389102916623316e-05, "loss": 2.405, "step": 2467000 }, { "epoch": 12.22, "learning_rate": 4.388979057980708e-05, "loss": 2.4291, "step": 2467500 }, { "epoch": 12.23, "learning_rate": 4.3888551993380994e-05, "loss": 2.4213, "step": 2468000 }, { "epoch": 12.23, "learning_rate": 4.388731340695491e-05, "loss": 2.4009, "step": 2468500 }, { "epoch": 12.23, "learning_rate": 4.388607482052883e-05, "loss": 2.4047, "step": 2469000 }, { "epoch": 12.23, "learning_rate": 4.3884836234102745e-05, "loss": 2.385, "step": 2469500 }, { "epoch": 12.24, "learning_rate": 4.388360012484951e-05, "loss": 2.4111, "step": 2470000 }, { "epoch": 12.24, "learning_rate": 4.388236153842343e-05, "loss": 2.4104, "step": 2470500 }, { "epoch": 12.24, "learning_rate": 4.388112295199735e-05, "loss": 2.4294, "step": 2471000 }, { "epoch": 12.24, "learning_rate": 4.3879884365571264e-05, "loss": 2.4056, "step": 2471500 }, { "epoch": 12.25, "learning_rate": 4.387864577914518e-05, "loss": 2.396, "step": 2472000 }, { "epoch": 12.25, "learning_rate": 4.387740719271909e-05, "loss": 2.3991, "step": 2472500 }, { "epoch": 12.25, "learning_rate": 4.387617108346587e-05, "loss": 2.3729, "step": 2473000 }, { "epoch": 12.25, "learning_rate": 4.387493249703978e-05, "loss": 2.4009, "step": 2473500 }, { "epoch": 12.26, "learning_rate": 4.3873693910613694e-05, "loss": 2.4272, "step": 2474000 }, { "epoch": 12.26, "learning_rate": 4.387245532418761e-05, "loss": 2.4015, "step": 2474500 }, { "epoch": 12.26, "learning_rate": 4.387121673776153e-05, "loss": 2.4204, "step": 2475000 }, { "epoch": 12.26, "learning_rate": 4.3869978151335445e-05, "loss": 2.4145, "step": 2475500 }, { "epoch": 12.27, "learning_rate": 4.386873956490936e-05, "loss": 2.4175, "step": 2476000 }, { "epoch": 12.27, "learning_rate": 4.386750097848328e-05, "loss": 2.3936, "step": 2476500 }, { "epoch": 12.27, "learning_rate": 4.3866262392057195e-05, "loss": 2.382, "step": 2477000 }, { "epoch": 12.27, "learning_rate": 4.3865026282803964e-05, "loss": 2.4075, "step": 2477500 }, { "epoch": 12.28, "learning_rate": 4.386378769637788e-05, "loss": 2.3898, "step": 2478000 }, { "epoch": 12.28, "learning_rate": 4.38625491099518e-05, "loss": 2.4162, "step": 2478500 }, { "epoch": 12.28, "learning_rate": 4.386131052352571e-05, "loss": 2.4163, "step": 2479000 }, { "epoch": 12.28, "learning_rate": 4.3860071937099625e-05, "loss": 2.4165, "step": 2479500 }, { "epoch": 12.29, "learning_rate": 4.385883335067354e-05, "loss": 2.4039, "step": 2480000 }, { "epoch": 12.29, "learning_rate": 4.385759476424746e-05, "loss": 2.4346, "step": 2480500 }, { "epoch": 12.29, "learning_rate": 4.385635865499423e-05, "loss": 2.427, "step": 2481000 }, { "epoch": 12.29, "learning_rate": 4.3855120068568145e-05, "loss": 2.3986, "step": 2481500 }, { "epoch": 12.3, "learning_rate": 4.385388148214206e-05, "loss": 2.4292, "step": 2482000 }, { "epoch": 12.3, "learning_rate": 4.385264537288884e-05, "loss": 2.4181, "step": 2482500 }, { "epoch": 12.3, "learning_rate": 4.385140678646275e-05, "loss": 2.4078, "step": 2483000 }, { "epoch": 12.3, "learning_rate": 4.3850168200036664e-05, "loss": 2.426, "step": 2483500 }, { "epoch": 12.31, "learning_rate": 4.384892961361058e-05, "loss": 2.4333, "step": 2484000 }, { "epoch": 12.31, "learning_rate": 4.38476910271845e-05, "loss": 2.4103, "step": 2484500 }, { "epoch": 12.31, "learning_rate": 4.3846452440758415e-05, "loss": 2.418, "step": 2485000 }, { "epoch": 12.31, "learning_rate": 4.384521385433233e-05, "loss": 2.399, "step": 2485500 }, { "epoch": 12.32, "learning_rate": 4.384397526790624e-05, "loss": 2.408, "step": 2486000 }, { "epoch": 12.32, "learning_rate": 4.384273668148016e-05, "loss": 2.3786, "step": 2486500 }, { "epoch": 12.32, "learning_rate": 4.3841498095054076e-05, "loss": 2.434, "step": 2487000 }, { "epoch": 12.32, "learning_rate": 4.384025950862799e-05, "loss": 2.4179, "step": 2487500 }, { "epoch": 12.33, "learning_rate": 4.383902339937476e-05, "loss": 2.4173, "step": 2488000 }, { "epoch": 12.33, "learning_rate": 4.383778729012154e-05, "loss": 2.4215, "step": 2488500 }, { "epoch": 12.33, "learning_rate": 4.383655365804115e-05, "loss": 2.4273, "step": 2489000 }, { "epoch": 12.33, "learning_rate": 4.383531507161507e-05, "loss": 2.4262, "step": 2489500 }, { "epoch": 12.34, "learning_rate": 4.3834076485188985e-05, "loss": 2.4236, "step": 2490000 }, { "epoch": 12.34, "learning_rate": 4.38328378987629e-05, "loss": 2.3886, "step": 2490500 }, { "epoch": 12.34, "learning_rate": 4.383159931233682e-05, "loss": 2.4172, "step": 2491000 }, { "epoch": 12.34, "learning_rate": 4.3830360725910736e-05, "loss": 2.4084, "step": 2491500 }, { "epoch": 12.35, "learning_rate": 4.382912213948465e-05, "loss": 2.4236, "step": 2492000 }, { "epoch": 12.35, "learning_rate": 4.382788355305857e-05, "loss": 2.4233, "step": 2492500 }, { "epoch": 12.35, "learning_rate": 4.382664496663249e-05, "loss": 2.4178, "step": 2493000 }, { "epoch": 12.35, "learning_rate": 4.3825406380206404e-05, "loss": 2.3815, "step": 2493500 }, { "epoch": 12.36, "learning_rate": 4.382416779378032e-05, "loss": 2.4237, "step": 2494000 }, { "epoch": 12.36, "learning_rate": 4.382292920735424e-05, "loss": 2.397, "step": 2494500 }, { "epoch": 12.36, "learning_rate": 4.3821690620928154e-05, "loss": 2.4008, "step": 2495000 }, { "epoch": 12.36, "learning_rate": 4.3820452034502065e-05, "loss": 2.4192, "step": 2495500 }, { "epoch": 12.37, "learning_rate": 4.3819215925248833e-05, "loss": 2.3788, "step": 2496000 }, { "epoch": 12.37, "learning_rate": 4.381797733882275e-05, "loss": 2.3979, "step": 2496500 }, { "epoch": 12.37, "learning_rate": 4.381673875239667e-05, "loss": 2.3989, "step": 2497000 }, { "epoch": 12.37, "learning_rate": 4.3815500165970584e-05, "loss": 2.4017, "step": 2497500 }, { "epoch": 12.38, "learning_rate": 4.38142615795445e-05, "loss": 2.4174, "step": 2498000 }, { "epoch": 12.38, "learning_rate": 4.381302299311841e-05, "loss": 2.4145, "step": 2498500 }, { "epoch": 12.38, "learning_rate": 4.381178440669233e-05, "loss": 2.4302, "step": 2499000 }, { "epoch": 12.38, "learning_rate": 4.3810545820266245e-05, "loss": 2.4266, "step": 2499500 }, { "epoch": 12.39, "learning_rate": 4.380930971101302e-05, "loss": 2.3984, "step": 2500000 }, { "epoch": 12.39, "learning_rate": 4.380807360175978e-05, "loss": 2.4286, "step": 2500500 }, { "epoch": 12.39, "learning_rate": 4.38068350153337e-05, "loss": 2.3976, "step": 2501000 }, { "epoch": 12.39, "learning_rate": 4.380559890608047e-05, "loss": 2.4343, "step": 2501500 }, { "epoch": 12.4, "learning_rate": 4.3804360319654385e-05, "loss": 2.4084, "step": 2502000 }, { "epoch": 12.4, "learning_rate": 4.38031217332283e-05, "loss": 2.4167, "step": 2502500 }, { "epoch": 12.4, "learning_rate": 4.380188314680222e-05, "loss": 2.4184, "step": 2503000 }, { "epoch": 12.4, "learning_rate": 4.3800644560376136e-05, "loss": 2.424, "step": 2503500 }, { "epoch": 12.41, "learning_rate": 4.3799408451122905e-05, "loss": 2.4085, "step": 2504000 }, { "epoch": 12.41, "learning_rate": 4.379816986469682e-05, "loss": 2.4307, "step": 2504500 }, { "epoch": 12.41, "learning_rate": 4.379693127827074e-05, "loss": 2.4117, "step": 2505000 }, { "epoch": 12.41, "learning_rate": 4.3795692691844656e-05, "loss": 2.3954, "step": 2505500 }, { "epoch": 12.42, "learning_rate": 4.379445658259142e-05, "loss": 2.4061, "step": 2506000 }, { "epoch": 12.42, "learning_rate": 4.3793217996165335e-05, "loss": 2.4096, "step": 2506500 }, { "epoch": 12.42, "learning_rate": 4.379197940973925e-05, "loss": 2.3931, "step": 2507000 }, { "epoch": 12.42, "learning_rate": 4.379074082331317e-05, "loss": 2.4206, "step": 2507500 }, { "epoch": 12.43, "learning_rate": 4.3789502236887086e-05, "loss": 2.4262, "step": 2508000 }, { "epoch": 12.43, "learning_rate": 4.3788263650461e-05, "loss": 2.3758, "step": 2508500 }, { "epoch": 12.43, "learning_rate": 4.378702506403492e-05, "loss": 2.3879, "step": 2509000 }, { "epoch": 12.43, "learning_rate": 4.3785786477608836e-05, "loss": 2.4278, "step": 2509500 }, { "epoch": 12.44, "learning_rate": 4.3784550368355605e-05, "loss": 2.4015, "step": 2510000 }, { "epoch": 12.44, "learning_rate": 4.378331178192952e-05, "loss": 2.4261, "step": 2510500 }, { "epoch": 12.44, "learning_rate": 4.378207319550344e-05, "loss": 2.4078, "step": 2511000 }, { "epoch": 12.44, "learning_rate": 4.3780834609077356e-05, "loss": 2.3968, "step": 2511500 }, { "epoch": 12.45, "learning_rate": 4.377959602265127e-05, "loss": 2.3812, "step": 2512000 }, { "epoch": 12.45, "learning_rate": 4.377835743622519e-05, "loss": 2.3986, "step": 2512500 }, { "epoch": 12.45, "learning_rate": 4.377711884979911e-05, "loss": 2.4222, "step": 2513000 }, { "epoch": 12.45, "learning_rate": 4.377588026337302e-05, "loss": 2.4022, "step": 2513500 }, { "epoch": 12.46, "learning_rate": 4.3774641676946934e-05, "loss": 2.4151, "step": 2514000 }, { "epoch": 12.46, "learning_rate": 4.377340309052085e-05, "loss": 2.4226, "step": 2514500 }, { "epoch": 12.46, "learning_rate": 4.377216450409477e-05, "loss": 2.3848, "step": 2515000 }, { "epoch": 12.46, "learning_rate": 4.3770925917668685e-05, "loss": 2.4285, "step": 2515500 }, { "epoch": 12.47, "learning_rate": 4.37696873312426e-05, "loss": 2.4243, "step": 2516000 }, { "epoch": 12.47, "learning_rate": 4.376845122198937e-05, "loss": 2.4008, "step": 2516500 }, { "epoch": 12.47, "learning_rate": 4.376721263556329e-05, "loss": 2.4064, "step": 2517000 }, { "epoch": 12.47, "learning_rate": 4.3765974049137204e-05, "loss": 2.4022, "step": 2517500 }, { "epoch": 12.48, "learning_rate": 4.376473793988397e-05, "loss": 2.3923, "step": 2518000 }, { "epoch": 12.48, "learning_rate": 4.376349935345789e-05, "loss": 2.4132, "step": 2518500 }, { "epoch": 12.48, "learning_rate": 4.376226076703181e-05, "loss": 2.4178, "step": 2519000 }, { "epoch": 12.48, "learning_rate": 4.3761022180605724e-05, "loss": 2.4013, "step": 2519500 }, { "epoch": 12.48, "learning_rate": 4.375978359417964e-05, "loss": 2.4077, "step": 2520000 }, { "epoch": 12.49, "learning_rate": 4.375854500775355e-05, "loss": 2.4075, "step": 2520500 }, { "epoch": 12.49, "learning_rate": 4.375730642132747e-05, "loss": 2.4111, "step": 2521000 }, { "epoch": 12.49, "learning_rate": 4.375607031207424e-05, "loss": 2.3895, "step": 2521500 }, { "epoch": 12.49, "learning_rate": 4.3754831725648154e-05, "loss": 2.3763, "step": 2522000 }, { "epoch": 12.5, "learning_rate": 4.375359313922207e-05, "loss": 2.4228, "step": 2522500 }, { "epoch": 12.5, "learning_rate": 4.375235455279599e-05, "loss": 2.3938, "step": 2523000 }, { "epoch": 12.5, "learning_rate": 4.3751115966369904e-05, "loss": 2.404, "step": 2523500 }, { "epoch": 12.5, "learning_rate": 4.374987985711667e-05, "loss": 2.4042, "step": 2524000 }, { "epoch": 12.51, "learning_rate": 4.374864127069059e-05, "loss": 2.4155, "step": 2524500 }, { "epoch": 12.51, "learning_rate": 4.374740268426451e-05, "loss": 2.4128, "step": 2525000 }, { "epoch": 12.51, "learning_rate": 4.3746164097838424e-05, "loss": 2.3995, "step": 2525500 }, { "epoch": 12.51, "learning_rate": 4.374492551141234e-05, "loss": 2.3965, "step": 2526000 }, { "epoch": 12.52, "learning_rate": 4.374368692498626e-05, "loss": 2.426, "step": 2526500 }, { "epoch": 12.52, "learning_rate": 4.3742448338560175e-05, "loss": 2.4275, "step": 2527000 }, { "epoch": 12.52, "learning_rate": 4.3741209752134085e-05, "loss": 2.407, "step": 2527500 }, { "epoch": 12.52, "learning_rate": 4.3739973642880854e-05, "loss": 2.376, "step": 2528000 }, { "epoch": 12.53, "learning_rate": 4.373873505645477e-05, "loss": 2.4029, "step": 2528500 }, { "epoch": 12.53, "learning_rate": 4.373749647002869e-05, "loss": 2.4195, "step": 2529000 }, { "epoch": 12.53, "learning_rate": 4.3736257883602604e-05, "loss": 2.3921, "step": 2529500 }, { "epoch": 12.53, "learning_rate": 4.373501929717652e-05, "loss": 2.4134, "step": 2530000 }, { "epoch": 12.54, "learning_rate": 4.3733785665096135e-05, "loss": 2.4015, "step": 2530500 }, { "epoch": 12.54, "learning_rate": 4.373254707867005e-05, "loss": 2.4044, "step": 2531000 }, { "epoch": 12.54, "learning_rate": 4.373130849224397e-05, "loss": 2.4226, "step": 2531500 }, { "epoch": 12.54, "learning_rate": 4.3730069905817886e-05, "loss": 2.4238, "step": 2532000 }, { "epoch": 12.55, "learning_rate": 4.37288313193918e-05, "loss": 2.3989, "step": 2532500 }, { "epoch": 12.55, "learning_rate": 4.372759273296572e-05, "loss": 2.4198, "step": 2533000 }, { "epoch": 12.55, "learning_rate": 4.372635414653964e-05, "loss": 2.3914, "step": 2533500 }, { "epoch": 12.55, "learning_rate": 4.3725115560113554e-05, "loss": 2.4127, "step": 2534000 }, { "epoch": 12.56, "learning_rate": 4.372387697368747e-05, "loss": 2.4067, "step": 2534500 }, { "epoch": 12.56, "learning_rate": 4.372264086443424e-05, "loss": 2.4096, "step": 2535000 }, { "epoch": 12.56, "learning_rate": 4.3721402278008156e-05, "loss": 2.392, "step": 2535500 }, { "epoch": 12.56, "learning_rate": 4.372016369158207e-05, "loss": 2.4025, "step": 2536000 }, { "epoch": 12.57, "learning_rate": 4.371892510515599e-05, "loss": 2.3878, "step": 2536500 }, { "epoch": 12.57, "learning_rate": 4.371768651872991e-05, "loss": 2.428, "step": 2537000 }, { "epoch": 12.57, "learning_rate": 4.3716447932303824e-05, "loss": 2.3988, "step": 2537500 }, { "epoch": 12.57, "learning_rate": 4.371520934587774e-05, "loss": 2.4164, "step": 2538000 }, { "epoch": 12.58, "learning_rate": 4.37139732366245e-05, "loss": 2.4146, "step": 2538500 }, { "epoch": 12.58, "learning_rate": 4.371273465019842e-05, "loss": 2.4205, "step": 2539000 }, { "epoch": 12.58, "learning_rate": 4.3711498540945196e-05, "loss": 2.4266, "step": 2539500 }, { "epoch": 12.58, "learning_rate": 4.3710259954519106e-05, "loss": 2.405, "step": 2540000 }, { "epoch": 12.59, "learning_rate": 4.370902384526588e-05, "loss": 2.4024, "step": 2540500 }, { "epoch": 12.59, "learning_rate": 4.37077852588398e-05, "loss": 2.4214, "step": 2541000 }, { "epoch": 12.59, "learning_rate": 4.370654667241371e-05, "loss": 2.4359, "step": 2541500 }, { "epoch": 12.59, "learning_rate": 4.3705308085987625e-05, "loss": 2.4429, "step": 2542000 }, { "epoch": 12.6, "learning_rate": 4.370406949956154e-05, "loss": 2.4162, "step": 2542500 }, { "epoch": 12.6, "learning_rate": 4.370283339030831e-05, "loss": 2.4065, "step": 2543000 }, { "epoch": 12.6, "learning_rate": 4.370159480388223e-05, "loss": 2.4123, "step": 2543500 }, { "epoch": 12.6, "learning_rate": 4.3700356217456145e-05, "loss": 2.3914, "step": 2544000 }, { "epoch": 12.61, "learning_rate": 4.369911763103006e-05, "loss": 2.4199, "step": 2544500 }, { "epoch": 12.61, "learning_rate": 4.369787904460398e-05, "loss": 2.4165, "step": 2545000 }, { "epoch": 12.61, "learning_rate": 4.3696640458177896e-05, "loss": 2.4082, "step": 2545500 }, { "epoch": 12.61, "learning_rate": 4.3695401871751806e-05, "loss": 2.4234, "step": 2546000 }, { "epoch": 12.62, "learning_rate": 4.369416576249858e-05, "loss": 2.4354, "step": 2546500 }, { "epoch": 12.62, "learning_rate": 4.36929271760725e-05, "loss": 2.434, "step": 2547000 }, { "epoch": 12.62, "learning_rate": 4.3691688589646415e-05, "loss": 2.3977, "step": 2547500 }, { "epoch": 12.62, "learning_rate": 4.369045000322033e-05, "loss": 2.3974, "step": 2548000 }, { "epoch": 12.63, "learning_rate": 4.368921141679424e-05, "loss": 2.4089, "step": 2548500 }, { "epoch": 12.63, "learning_rate": 4.368797283036816e-05, "loss": 2.4315, "step": 2549000 }, { "epoch": 12.63, "learning_rate": 4.3686734243942076e-05, "loss": 2.4032, "step": 2549500 }, { "epoch": 12.63, "learning_rate": 4.368549565751599e-05, "loss": 2.4246, "step": 2550000 }, { "epoch": 12.64, "learning_rate": 4.368425707108991e-05, "loss": 2.3949, "step": 2550500 }, { "epoch": 12.64, "learning_rate": 4.368302096183668e-05, "loss": 2.4226, "step": 2551000 }, { "epoch": 12.64, "learning_rate": 4.3681782375410596e-05, "loss": 2.4192, "step": 2551500 }, { "epoch": 12.64, "learning_rate": 4.368054378898451e-05, "loss": 2.4099, "step": 2552000 }, { "epoch": 12.65, "learning_rate": 4.367930520255842e-05, "loss": 2.4061, "step": 2552500 }, { "epoch": 12.65, "learning_rate": 4.367806661613234e-05, "loss": 2.4307, "step": 2553000 }, { "epoch": 12.65, "learning_rate": 4.367683298405196e-05, "loss": 2.3988, "step": 2553500 }, { "epoch": 12.65, "learning_rate": 4.367559439762588e-05, "loss": 2.3905, "step": 2554000 }, { "epoch": 12.66, "learning_rate": 4.3674355811199794e-05, "loss": 2.4154, "step": 2554500 }, { "epoch": 12.66, "learning_rate": 4.367311722477371e-05, "loss": 2.4097, "step": 2555000 }, { "epoch": 12.66, "learning_rate": 4.367187863834763e-05, "loss": 2.4109, "step": 2555500 }, { "epoch": 12.66, "learning_rate": 4.3670640051921545e-05, "loss": 2.3961, "step": 2556000 }, { "epoch": 12.67, "learning_rate": 4.366940146549546e-05, "loss": 2.4039, "step": 2556500 }, { "epoch": 12.67, "learning_rate": 4.366816287906938e-05, "loss": 2.3979, "step": 2557000 }, { "epoch": 12.67, "learning_rate": 4.366692676981615e-05, "loss": 2.4154, "step": 2557500 }, { "epoch": 12.67, "learning_rate": 4.3665688183390065e-05, "loss": 2.4105, "step": 2558000 }, { "epoch": 12.68, "learning_rate": 4.366444959696398e-05, "loss": 2.4075, "step": 2558500 }, { "epoch": 12.68, "learning_rate": 4.36632110105379e-05, "loss": 2.4176, "step": 2559000 }, { "epoch": 12.68, "learning_rate": 4.3661972424111816e-05, "loss": 2.4234, "step": 2559500 }, { "epoch": 12.68, "learning_rate": 4.366073631485858e-05, "loss": 2.414, "step": 2560000 }, { "epoch": 12.69, "learning_rate": 4.3659497728432495e-05, "loss": 2.4034, "step": 2560500 }, { "epoch": 12.69, "learning_rate": 4.365825914200641e-05, "loss": 2.4229, "step": 2561000 }, { "epoch": 12.69, "learning_rate": 4.365702055558033e-05, "loss": 2.3905, "step": 2561500 }, { "epoch": 12.69, "learning_rate": 4.3655781969154245e-05, "loss": 2.4277, "step": 2562000 }, { "epoch": 12.7, "learning_rate": 4.365454338272816e-05, "loss": 2.4012, "step": 2562500 }, { "epoch": 12.7, "learning_rate": 4.365330479630208e-05, "loss": 2.4179, "step": 2563000 }, { "epoch": 12.7, "learning_rate": 4.365206868704885e-05, "loss": 2.4339, "step": 2563500 }, { "epoch": 12.7, "learning_rate": 4.3650830100622765e-05, "loss": 2.4012, "step": 2564000 }, { "epoch": 12.71, "learning_rate": 4.364959151419668e-05, "loss": 2.4154, "step": 2564500 }, { "epoch": 12.71, "learning_rate": 4.36483529277706e-05, "loss": 2.3908, "step": 2565000 }, { "epoch": 12.71, "learning_rate": 4.3647114341344516e-05, "loss": 2.4239, "step": 2565500 }, { "epoch": 12.71, "learning_rate": 4.364587575491843e-05, "loss": 2.403, "step": 2566000 }, { "epoch": 12.72, "learning_rate": 4.364463716849235e-05, "loss": 2.3735, "step": 2566500 }, { "epoch": 12.72, "learning_rate": 4.3643398582066267e-05, "loss": 2.4058, "step": 2567000 }, { "epoch": 12.72, "learning_rate": 4.364216247281303e-05, "loss": 2.4092, "step": 2567500 }, { "epoch": 12.72, "learning_rate": 4.36409263635598e-05, "loss": 2.4055, "step": 2568000 }, { "epoch": 12.73, "learning_rate": 4.3639687777133714e-05, "loss": 2.4084, "step": 2568500 }, { "epoch": 12.73, "learning_rate": 4.363844919070763e-05, "loss": 2.4078, "step": 2569000 }, { "epoch": 12.73, "learning_rate": 4.363721060428155e-05, "loss": 2.3979, "step": 2569500 }, { "epoch": 12.73, "learning_rate": 4.363597449502832e-05, "loss": 2.4249, "step": 2570000 }, { "epoch": 12.74, "learning_rate": 4.3634735908602234e-05, "loss": 2.4067, "step": 2570500 }, { "epoch": 12.74, "learning_rate": 4.3633499799349e-05, "loss": 2.4111, "step": 2571000 }, { "epoch": 12.74, "learning_rate": 4.363226121292291e-05, "loss": 2.3941, "step": 2571500 }, { "epoch": 12.74, "learning_rate": 4.363102262649683e-05, "loss": 2.4572, "step": 2572000 }, { "epoch": 12.75, "learning_rate": 4.362978404007075e-05, "loss": 2.428, "step": 2572500 }, { "epoch": 12.75, "learning_rate": 4.3628545453644664e-05, "loss": 2.4121, "step": 2573000 }, { "epoch": 12.75, "learning_rate": 4.362730686721858e-05, "loss": 2.4407, "step": 2573500 }, { "epoch": 12.75, "learning_rate": 4.36260682807925e-05, "loss": 2.4202, "step": 2574000 }, { "epoch": 12.75, "learning_rate": 4.3624829694366414e-05, "loss": 2.3971, "step": 2574500 }, { "epoch": 12.76, "learning_rate": 4.362359358511318e-05, "loss": 2.4459, "step": 2575000 }, { "epoch": 12.76, "learning_rate": 4.36223549986871e-05, "loss": 2.4038, "step": 2575500 }, { "epoch": 12.76, "learning_rate": 4.362111641226102e-05, "loss": 2.4283, "step": 2576000 }, { "epoch": 12.76, "learning_rate": 4.3619877825834934e-05, "loss": 2.4301, "step": 2576500 }, { "epoch": 12.77, "learning_rate": 4.361863923940885e-05, "loss": 2.4261, "step": 2577000 }, { "epoch": 12.77, "learning_rate": 4.361740313015562e-05, "loss": 2.4179, "step": 2577500 }, { "epoch": 12.77, "learning_rate": 4.361616702090239e-05, "loss": 2.4112, "step": 2578000 }, { "epoch": 12.77, "learning_rate": 4.3614928434476305e-05, "loss": 2.4111, "step": 2578500 }, { "epoch": 12.78, "learning_rate": 4.361368984805022e-05, "loss": 2.4339, "step": 2579000 }, { "epoch": 12.78, "learning_rate": 4.361245126162414e-05, "loss": 2.4154, "step": 2579500 }, { "epoch": 12.78, "learning_rate": 4.3611212675198056e-05, "loss": 2.4092, "step": 2580000 }, { "epoch": 12.78, "learning_rate": 4.360997408877197e-05, "loss": 2.4017, "step": 2580500 }, { "epoch": 12.79, "learning_rate": 4.3608737979518735e-05, "loss": 2.3993, "step": 2581000 }, { "epoch": 12.79, "learning_rate": 4.360749939309265e-05, "loss": 2.4092, "step": 2581500 }, { "epoch": 12.79, "learning_rate": 4.360626080666657e-05, "loss": 2.4148, "step": 2582000 }, { "epoch": 12.79, "learning_rate": 4.3605022220240486e-05, "loss": 2.4223, "step": 2582500 }, { "epoch": 12.8, "learning_rate": 4.36037836338144e-05, "loss": 2.4065, "step": 2583000 }, { "epoch": 12.8, "learning_rate": 4.360254504738832e-05, "loss": 2.4275, "step": 2583500 }, { "epoch": 12.8, "learning_rate": 4.360130646096223e-05, "loss": 2.3997, "step": 2584000 }, { "epoch": 12.8, "learning_rate": 4.360006787453615e-05, "loss": 2.4098, "step": 2584500 }, { "epoch": 12.81, "learning_rate": 4.3598829288110064e-05, "loss": 2.4142, "step": 2585000 }, { "epoch": 12.81, "learning_rate": 4.359759070168398e-05, "loss": 2.4112, "step": 2585500 }, { "epoch": 12.81, "learning_rate": 4.3596354592430756e-05, "loss": 2.4065, "step": 2586000 }, { "epoch": 12.81, "learning_rate": 4.359511600600467e-05, "loss": 2.4327, "step": 2586500 }, { "epoch": 12.82, "learning_rate": 4.3593879896751435e-05, "loss": 2.4185, "step": 2587000 }, { "epoch": 12.82, "learning_rate": 4.359264131032535e-05, "loss": 2.3929, "step": 2587500 }, { "epoch": 12.82, "learning_rate": 4.359140272389927e-05, "loss": 2.4129, "step": 2588000 }, { "epoch": 12.82, "learning_rate": 4.3590164137473186e-05, "loss": 2.4261, "step": 2588500 }, { "epoch": 12.83, "learning_rate": 4.35889255510471e-05, "loss": 2.4036, "step": 2589000 }, { "epoch": 12.83, "learning_rate": 4.358768696462102e-05, "loss": 2.4252, "step": 2589500 }, { "epoch": 12.83, "learning_rate": 4.358644837819494e-05, "loss": 2.4257, "step": 2590000 }, { "epoch": 12.83, "learning_rate": 4.358520979176885e-05, "loss": 2.3993, "step": 2590500 }, { "epoch": 12.84, "learning_rate": 4.3583971205342764e-05, "loss": 2.4082, "step": 2591000 }, { "epoch": 12.84, "learning_rate": 4.358273261891668e-05, "loss": 2.417, "step": 2591500 }, { "epoch": 12.84, "learning_rate": 4.35814940324906e-05, "loss": 2.4277, "step": 2592000 }, { "epoch": 12.84, "learning_rate": 4.3580257923237373e-05, "loss": 2.4523, "step": 2592500 }, { "epoch": 12.85, "learning_rate": 4.357901933681129e-05, "loss": 2.4193, "step": 2593000 }, { "epoch": 12.85, "learning_rate": 4.35777807503852e-05, "loss": 2.403, "step": 2593500 }, { "epoch": 12.85, "learning_rate": 4.357654216395912e-05, "loss": 2.4337, "step": 2594000 }, { "epoch": 12.85, "learning_rate": 4.3575303577533034e-05, "loss": 2.4078, "step": 2594500 }, { "epoch": 12.86, "learning_rate": 4.35740674682798e-05, "loss": 2.4119, "step": 2595000 }, { "epoch": 12.86, "learning_rate": 4.357282888185372e-05, "loss": 2.4088, "step": 2595500 }, { "epoch": 12.86, "learning_rate": 4.357159029542764e-05, "loss": 2.4246, "step": 2596000 }, { "epoch": 12.86, "learning_rate": 4.357035170900155e-05, "loss": 2.4001, "step": 2596500 }, { "epoch": 12.87, "learning_rate": 4.3569113122575464e-05, "loss": 2.4112, "step": 2597000 }, { "epoch": 12.87, "learning_rate": 4.356787701332224e-05, "loss": 2.4181, "step": 2597500 }, { "epoch": 12.87, "learning_rate": 4.356664090406901e-05, "loss": 2.43, "step": 2598000 }, { "epoch": 12.87, "learning_rate": 4.356540479481577e-05, "loss": 2.4409, "step": 2598500 }, { "epoch": 12.88, "learning_rate": 4.3564168685562546e-05, "loss": 2.4328, "step": 2599000 }, { "epoch": 12.88, "learning_rate": 4.356293009913646e-05, "loss": 2.4351, "step": 2599500 }, { "epoch": 12.88, "learning_rate": 4.356169151271038e-05, "loss": 2.4067, "step": 2600000 }, { "epoch": 12.88, "learning_rate": 4.356045292628429e-05, "loss": 2.4088, "step": 2600500 }, { "epoch": 12.89, "learning_rate": 4.355921433985821e-05, "loss": 2.4367, "step": 2601000 }, { "epoch": 12.89, "learning_rate": 4.3557975753432124e-05, "loss": 2.4095, "step": 2601500 }, { "epoch": 12.89, "learning_rate": 4.355673716700604e-05, "loss": 2.4283, "step": 2602000 }, { "epoch": 12.89, "learning_rate": 4.355549858057996e-05, "loss": 2.4218, "step": 2602500 }, { "epoch": 12.9, "learning_rate": 4.3554259994153875e-05, "loss": 2.4259, "step": 2603000 }, { "epoch": 12.9, "learning_rate": 4.355302140772779e-05, "loss": 2.3895, "step": 2603500 }, { "epoch": 12.9, "learning_rate": 4.355178282130171e-05, "loss": 2.416, "step": 2604000 }, { "epoch": 12.9, "learning_rate": 4.3550544234875626e-05, "loss": 2.3923, "step": 2604500 }, { "epoch": 12.91, "learning_rate": 4.354930812562239e-05, "loss": 2.3966, "step": 2605000 }, { "epoch": 12.91, "learning_rate": 4.3548069539196305e-05, "loss": 2.4156, "step": 2605500 }, { "epoch": 12.91, "learning_rate": 4.354683095277022e-05, "loss": 2.4116, "step": 2606000 }, { "epoch": 12.91, "learning_rate": 4.354559236634414e-05, "loss": 2.4211, "step": 2606500 }, { "epoch": 12.92, "learning_rate": 4.3544353779918055e-05, "loss": 2.4306, "step": 2607000 }, { "epoch": 12.92, "learning_rate": 4.354311519349197e-05, "loss": 2.3948, "step": 2607500 }, { "epoch": 12.92, "learning_rate": 4.354187660706589e-05, "loss": 2.4124, "step": 2608000 }, { "epoch": 12.92, "learning_rate": 4.3540638020639806e-05, "loss": 2.4227, "step": 2608500 }, { "epoch": 12.93, "learning_rate": 4.353939943421372e-05, "loss": 2.4166, "step": 2609000 }, { "epoch": 12.93, "learning_rate": 4.353816084778764e-05, "loss": 2.4322, "step": 2609500 }, { "epoch": 12.93, "learning_rate": 4.353692226136156e-05, "loss": 2.4191, "step": 2610000 }, { "epoch": 12.93, "learning_rate": 4.3535686152108326e-05, "loss": 2.424, "step": 2610500 }, { "epoch": 12.94, "learning_rate": 4.353444756568224e-05, "loss": 2.4131, "step": 2611000 }, { "epoch": 12.94, "learning_rate": 4.353320897925616e-05, "loss": 2.422, "step": 2611500 }, { "epoch": 12.94, "learning_rate": 4.3531970392830076e-05, "loss": 2.3948, "step": 2612000 }, { "epoch": 12.94, "learning_rate": 4.3530731806403993e-05, "loss": 2.4332, "step": 2612500 }, { "epoch": 12.95, "learning_rate": 4.352949321997791e-05, "loss": 2.4331, "step": 2613000 }, { "epoch": 12.95, "learning_rate": 4.352825463355182e-05, "loss": 2.4107, "step": 2613500 }, { "epoch": 12.95, "learning_rate": 4.352701604712574e-05, "loss": 2.4368, "step": 2614000 }, { "epoch": 12.95, "learning_rate": 4.3525779937872506e-05, "loss": 2.4095, "step": 2614500 }, { "epoch": 12.96, "learning_rate": 4.352454135144642e-05, "loss": 2.3953, "step": 2615000 }, { "epoch": 12.96, "learning_rate": 4.352330276502034e-05, "loss": 2.411, "step": 2615500 }, { "epoch": 12.96, "learning_rate": 4.352206417859426e-05, "loss": 2.3921, "step": 2616000 }, { "epoch": 12.96, "learning_rate": 4.3520828069341026e-05, "loss": 2.4145, "step": 2616500 }, { "epoch": 12.97, "learning_rate": 4.351958948291494e-05, "loss": 2.4006, "step": 2617000 }, { "epoch": 12.97, "learning_rate": 4.351835089648886e-05, "loss": 2.4215, "step": 2617500 }, { "epoch": 12.97, "learning_rate": 4.3517112310062777e-05, "loss": 2.4077, "step": 2618000 }, { "epoch": 12.97, "learning_rate": 4.3515873723636694e-05, "loss": 2.4019, "step": 2618500 }, { "epoch": 12.98, "learning_rate": 4.351463513721061e-05, "loss": 2.4322, "step": 2619000 }, { "epoch": 12.98, "learning_rate": 4.351339655078453e-05, "loss": 2.4438, "step": 2619500 }, { "epoch": 12.98, "learning_rate": 4.3512157964358444e-05, "loss": 2.3985, "step": 2620000 }, { "epoch": 12.98, "learning_rate": 4.3510919377932354e-05, "loss": 2.4106, "step": 2620500 }, { "epoch": 12.99, "learning_rate": 4.350968326867912e-05, "loss": 2.4127, "step": 2621000 }, { "epoch": 12.99, "learning_rate": 4.350844468225304e-05, "loss": 2.3968, "step": 2621500 }, { "epoch": 12.99, "learning_rate": 4.350720609582696e-05, "loss": 2.4192, "step": 2622000 }, { "epoch": 12.99, "learning_rate": 4.3505967509400874e-05, "loss": 2.4349, "step": 2622500 }, { "epoch": 13.0, "learning_rate": 4.350472892297479e-05, "loss": 2.403, "step": 2623000 }, { "epoch": 13.0, "learning_rate": 4.350349281372156e-05, "loss": 2.4137, "step": 2623500 }, { "epoch": 13.0, "eval_accuracy": 0.6461218698848585, "eval_accuracy_mlm": 0.5996847093159505, "eval_accuracy_nsp": 0.8652920665675657, "eval_loss": 2.396927833557129, "eval_runtime": 146.13, "eval_samples_per_second": 1744.741, "eval_steps_per_second": 72.702, "step": 2623959 }, { "epoch": 13.0, "learning_rate": 4.350225422729548e-05, "loss": 2.3977, "step": 2624000 }, { "epoch": 13.0, "learning_rate": 4.3501015640869394e-05, "loss": 2.374, "step": 2624500 }, { "epoch": 13.01, "learning_rate": 4.3499779531616156e-05, "loss": 2.3743, "step": 2625000 }, { "epoch": 13.01, "learning_rate": 4.349854094519007e-05, "loss": 2.3774, "step": 2625500 }, { "epoch": 13.01, "learning_rate": 4.349730483593684e-05, "loss": 2.3954, "step": 2626000 }, { "epoch": 13.01, "learning_rate": 4.349606624951076e-05, "loss": 2.3685, "step": 2626500 }, { "epoch": 13.02, "learning_rate": 4.3494827663084675e-05, "loss": 2.3846, "step": 2627000 }, { "epoch": 13.02, "learning_rate": 4.349358907665859e-05, "loss": 2.4169, "step": 2627500 }, { "epoch": 13.02, "learning_rate": 4.349235049023251e-05, "loss": 2.3881, "step": 2628000 }, { "epoch": 13.02, "learning_rate": 4.3491111903806426e-05, "loss": 2.3866, "step": 2628500 }, { "epoch": 13.02, "learning_rate": 4.3489875794553195e-05, "loss": 2.3958, "step": 2629000 }, { "epoch": 13.03, "learning_rate": 4.3488639685299964e-05, "loss": 2.3686, "step": 2629500 }, { "epoch": 13.03, "learning_rate": 4.348740109887388e-05, "loss": 2.3922, "step": 2630000 }, { "epoch": 13.03, "learning_rate": 4.34861625124478e-05, "loss": 2.3662, "step": 2630500 }, { "epoch": 13.03, "learning_rate": 4.3484923926021714e-05, "loss": 2.3793, "step": 2631000 }, { "epoch": 13.04, "learning_rate": 4.3483685339595625e-05, "loss": 2.3712, "step": 2631500 }, { "epoch": 13.04, "learning_rate": 4.348244675316954e-05, "loss": 2.4067, "step": 2632000 }, { "epoch": 13.04, "learning_rate": 4.348120816674346e-05, "loss": 2.3861, "step": 2632500 }, { "epoch": 13.04, "learning_rate": 4.3479972057490234e-05, "loss": 2.4153, "step": 2633000 }, { "epoch": 13.05, "learning_rate": 4.347873347106415e-05, "loss": 2.3681, "step": 2633500 }, { "epoch": 13.05, "learning_rate": 4.347749488463807e-05, "loss": 2.3843, "step": 2634000 }, { "epoch": 13.05, "learning_rate": 4.347625629821198e-05, "loss": 2.3793, "step": 2634500 }, { "epoch": 13.05, "learning_rate": 4.3475017711785895e-05, "loss": 2.3923, "step": 2635000 }, { "epoch": 13.06, "learning_rate": 4.347377912535981e-05, "loss": 2.3799, "step": 2635500 }, { "epoch": 13.06, "learning_rate": 4.347254053893373e-05, "loss": 2.3595, "step": 2636000 }, { "epoch": 13.06, "learning_rate": 4.3471301952507646e-05, "loss": 2.4064, "step": 2636500 }, { "epoch": 13.06, "learning_rate": 4.3470063366081556e-05, "loss": 2.4083, "step": 2637000 }, { "epoch": 13.07, "learning_rate": 4.346882477965547e-05, "loss": 2.3916, "step": 2637500 }, { "epoch": 13.07, "learning_rate": 4.346758619322939e-05, "loss": 2.372, "step": 2638000 }, { "epoch": 13.07, "learning_rate": 4.346634760680331e-05, "loss": 2.3781, "step": 2638500 }, { "epoch": 13.07, "learning_rate": 4.3465109020377224e-05, "loss": 2.408, "step": 2639000 }, { "epoch": 13.08, "learning_rate": 4.346387043395114e-05, "loss": 2.4136, "step": 2639500 }, { "epoch": 13.08, "learning_rate": 4.346263432469791e-05, "loss": 2.3621, "step": 2640000 }, { "epoch": 13.08, "learning_rate": 4.3461398215444685e-05, "loss": 2.3862, "step": 2640500 }, { "epoch": 13.08, "learning_rate": 4.3460159629018595e-05, "loss": 2.3855, "step": 2641000 }, { "epoch": 13.09, "learning_rate": 4.345892104259251e-05, "loss": 2.4046, "step": 2641500 }, { "epoch": 13.09, "learning_rate": 4.345768245616643e-05, "loss": 2.3887, "step": 2642000 }, { "epoch": 13.09, "learning_rate": 4.3456443869740346e-05, "loss": 2.3702, "step": 2642500 }, { "epoch": 13.09, "learning_rate": 4.3455207760487115e-05, "loss": 2.4123, "step": 2643000 }, { "epoch": 13.1, "learning_rate": 4.345396917406103e-05, "loss": 2.3679, "step": 2643500 }, { "epoch": 13.1, "learning_rate": 4.345273058763494e-05, "loss": 2.3806, "step": 2644000 }, { "epoch": 13.1, "learning_rate": 4.345149200120886e-05, "loss": 2.4071, "step": 2644500 }, { "epoch": 13.1, "learning_rate": 4.3450253414782776e-05, "loss": 2.3682, "step": 2645000 }, { "epoch": 13.11, "learning_rate": 4.344901482835669e-05, "loss": 2.375, "step": 2645500 }, { "epoch": 13.11, "learning_rate": 4.344777624193061e-05, "loss": 2.3903, "step": 2646000 }, { "epoch": 13.11, "learning_rate": 4.3446537655504526e-05, "loss": 2.3944, "step": 2646500 }, { "epoch": 13.11, "learning_rate": 4.344529906907844e-05, "loss": 2.3925, "step": 2647000 }, { "epoch": 13.12, "learning_rate": 4.344406048265236e-05, "loss": 2.3984, "step": 2647500 }, { "epoch": 13.12, "learning_rate": 4.344282189622628e-05, "loss": 2.4028, "step": 2648000 }, { "epoch": 13.12, "learning_rate": 4.3441583309800194e-05, "loss": 2.3867, "step": 2648500 }, { "epoch": 13.12, "learning_rate": 4.344034472337411e-05, "loss": 2.4064, "step": 2649000 }, { "epoch": 13.13, "learning_rate": 4.343910613694803e-05, "loss": 2.3914, "step": 2649500 }, { "epoch": 13.13, "learning_rate": 4.34378700276948e-05, "loss": 2.4027, "step": 2650000 }, { "epoch": 13.13, "learning_rate": 4.343663391844156e-05, "loss": 2.4052, "step": 2650500 }, { "epoch": 13.13, "learning_rate": 4.3435395332015476e-05, "loss": 2.3882, "step": 2651000 }, { "epoch": 13.14, "learning_rate": 4.3434161699935096e-05, "loss": 2.3821, "step": 2651500 }, { "epoch": 13.14, "learning_rate": 4.3432923113509013e-05, "loss": 2.4076, "step": 2652000 }, { "epoch": 13.14, "learning_rate": 4.343168452708293e-05, "loss": 2.3935, "step": 2652500 }, { "epoch": 13.14, "learning_rate": 4.34304484178297e-05, "loss": 2.3977, "step": 2653000 }, { "epoch": 13.15, "learning_rate": 4.3429209831403616e-05, "loss": 2.3899, "step": 2653500 }, { "epoch": 13.15, "learning_rate": 4.342797124497753e-05, "loss": 2.3781, "step": 2654000 }, { "epoch": 13.15, "learning_rate": 4.342673265855145e-05, "loss": 2.3914, "step": 2654500 }, { "epoch": 13.15, "learning_rate": 4.342549407212537e-05, "loss": 2.3926, "step": 2655000 }, { "epoch": 13.16, "learning_rate": 4.3424255485699284e-05, "loss": 2.4267, "step": 2655500 }, { "epoch": 13.16, "learning_rate": 4.34230168992732e-05, "loss": 2.3881, "step": 2656000 }, { "epoch": 13.16, "learning_rate": 4.342177831284712e-05, "loss": 2.3844, "step": 2656500 }, { "epoch": 13.16, "learning_rate": 4.3420539726421035e-05, "loss": 2.4005, "step": 2657000 }, { "epoch": 13.17, "learning_rate": 4.341930113999495e-05, "loss": 2.3819, "step": 2657500 }, { "epoch": 13.17, "learning_rate": 4.341806255356887e-05, "loss": 2.395, "step": 2658000 }, { "epoch": 13.17, "learning_rate": 4.341682644431563e-05, "loss": 2.371, "step": 2658500 }, { "epoch": 13.17, "learning_rate": 4.341558785788955e-05, "loss": 2.3832, "step": 2659000 }, { "epoch": 13.18, "learning_rate": 4.3414349271463464e-05, "loss": 2.391, "step": 2659500 }, { "epoch": 13.18, "learning_rate": 4.341311068503738e-05, "loss": 2.3821, "step": 2660000 }, { "epoch": 13.18, "learning_rate": 4.34118720986113e-05, "loss": 2.3738, "step": 2660500 }, { "epoch": 13.18, "learning_rate": 4.3410633512185215e-05, "loss": 2.398, "step": 2661000 }, { "epoch": 13.19, "learning_rate": 4.340939492575913e-05, "loss": 2.395, "step": 2661500 }, { "epoch": 13.19, "learning_rate": 4.340815633933305e-05, "loss": 2.3963, "step": 2662000 }, { "epoch": 13.19, "learning_rate": 4.3406917752906966e-05, "loss": 2.3994, "step": 2662500 }, { "epoch": 13.19, "learning_rate": 4.3405679166480876e-05, "loss": 2.3915, "step": 2663000 }, { "epoch": 13.2, "learning_rate": 4.340444058005479e-05, "loss": 2.4018, "step": 2663500 }, { "epoch": 13.2, "learning_rate": 4.340320199362871e-05, "loss": 2.3685, "step": 2664000 }, { "epoch": 13.2, "learning_rate": 4.340196340720263e-05, "loss": 2.4209, "step": 2664500 }, { "epoch": 13.2, "learning_rate": 4.3400724820776544e-05, "loss": 2.406, "step": 2665000 }, { "epoch": 13.21, "learning_rate": 4.339948623435046e-05, "loss": 2.3968, "step": 2665500 }, { "epoch": 13.21, "learning_rate": 4.339824764792438e-05, "loss": 2.3927, "step": 2666000 }, { "epoch": 13.21, "learning_rate": 4.3397011538671146e-05, "loss": 2.3701, "step": 2666500 }, { "epoch": 13.21, "learning_rate": 4.339577295224506e-05, "loss": 2.364, "step": 2667000 }, { "epoch": 13.22, "learning_rate": 4.339453436581898e-05, "loss": 2.3787, "step": 2667500 }, { "epoch": 13.22, "learning_rate": 4.33932957793929e-05, "loss": 2.3985, "step": 2668000 }, { "epoch": 13.22, "learning_rate": 4.3392057192966814e-05, "loss": 2.379, "step": 2668500 }, { "epoch": 13.22, "learning_rate": 4.339081860654073e-05, "loss": 2.3979, "step": 2669000 }, { "epoch": 13.23, "learning_rate": 4.338958002011465e-05, "loss": 2.3876, "step": 2669500 }, { "epoch": 13.23, "learning_rate": 4.3388341433688565e-05, "loss": 2.4065, "step": 2670000 }, { "epoch": 13.23, "learning_rate": 4.338710284726248e-05, "loss": 2.3983, "step": 2670500 }, { "epoch": 13.23, "learning_rate": 4.338586426083639e-05, "loss": 2.4001, "step": 2671000 }, { "epoch": 13.24, "learning_rate": 4.338462567441031e-05, "loss": 2.4228, "step": 2671500 }, { "epoch": 13.24, "learning_rate": 4.338338956515708e-05, "loss": 2.3995, "step": 2672000 }, { "epoch": 13.24, "learning_rate": 4.3382153455903847e-05, "loss": 2.3867, "step": 2672500 }, { "epoch": 13.24, "learning_rate": 4.3380914869477763e-05, "loss": 2.3989, "step": 2673000 }, { "epoch": 13.25, "learning_rate": 4.337967628305168e-05, "loss": 2.4051, "step": 2673500 }, { "epoch": 13.25, "learning_rate": 4.33784376966256e-05, "loss": 2.4195, "step": 2674000 }, { "epoch": 13.25, "learning_rate": 4.3377199110199514e-05, "loss": 2.4004, "step": 2674500 }, { "epoch": 13.25, "learning_rate": 4.337596052377343e-05, "loss": 2.3939, "step": 2675000 }, { "epoch": 13.26, "learning_rate": 4.337472193734735e-05, "loss": 2.3849, "step": 2675500 }, { "epoch": 13.26, "learning_rate": 4.3373483350921265e-05, "loss": 2.3963, "step": 2676000 }, { "epoch": 13.26, "learning_rate": 4.337224476449518e-05, "loss": 2.4051, "step": 2676500 }, { "epoch": 13.26, "learning_rate": 4.3371008655241944e-05, "loss": 2.3968, "step": 2677000 }, { "epoch": 13.27, "learning_rate": 4.336977006881586e-05, "loss": 2.4036, "step": 2677500 }, { "epoch": 13.27, "learning_rate": 4.336853148238978e-05, "loss": 2.4113, "step": 2678000 }, { "epoch": 13.27, "learning_rate": 4.3367292895963695e-05, "loss": 2.4127, "step": 2678500 }, { "epoch": 13.27, "learning_rate": 4.336605430953761e-05, "loss": 2.3876, "step": 2679000 }, { "epoch": 13.28, "learning_rate": 4.336481572311153e-05, "loss": 2.4134, "step": 2679500 }, { "epoch": 13.28, "learning_rate": 4.33635796138583e-05, "loss": 2.4021, "step": 2680000 }, { "epoch": 13.28, "learning_rate": 4.3362343504605066e-05, "loss": 2.401, "step": 2680500 }, { "epoch": 13.28, "learning_rate": 4.336110491817898e-05, "loss": 2.3873, "step": 2681000 }, { "epoch": 13.29, "learning_rate": 4.335986633175289e-05, "loss": 2.394, "step": 2681500 }, { "epoch": 13.29, "learning_rate": 4.335863022249967e-05, "loss": 2.4049, "step": 2682000 }, { "epoch": 13.29, "learning_rate": 4.3357391636073586e-05, "loss": 2.3955, "step": 2682500 }, { "epoch": 13.29, "learning_rate": 4.33561530496475e-05, "loss": 2.4062, "step": 2683000 }, { "epoch": 13.29, "learning_rate": 4.335491446322142e-05, "loss": 2.427, "step": 2683500 }, { "epoch": 13.3, "learning_rate": 4.335367587679534e-05, "loss": 2.4073, "step": 2684000 }, { "epoch": 13.3, "learning_rate": 4.3352437290369254e-05, "loss": 2.4097, "step": 2684500 }, { "epoch": 13.3, "learning_rate": 4.3351198703943164e-05, "loss": 2.3962, "step": 2685000 }, { "epoch": 13.3, "learning_rate": 4.334996011751708e-05, "loss": 2.3851, "step": 2685500 }, { "epoch": 13.31, "learning_rate": 4.3348721531091e-05, "loss": 2.4041, "step": 2686000 }, { "epoch": 13.31, "learning_rate": 4.3347482944664914e-05, "loss": 2.3944, "step": 2686500 }, { "epoch": 13.31, "learning_rate": 4.334624435823883e-05, "loss": 2.4004, "step": 2687000 }, { "epoch": 13.31, "learning_rate": 4.334501072615845e-05, "loss": 2.3767, "step": 2687500 }, { "epoch": 13.32, "learning_rate": 4.334377213973237e-05, "loss": 2.3833, "step": 2688000 }, { "epoch": 13.32, "learning_rate": 4.3342533553306286e-05, "loss": 2.3789, "step": 2688500 }, { "epoch": 13.32, "learning_rate": 4.3341297444053055e-05, "loss": 2.4131, "step": 2689000 }, { "epoch": 13.32, "learning_rate": 4.334005885762697e-05, "loss": 2.404, "step": 2689500 }, { "epoch": 13.33, "learning_rate": 4.333882027120089e-05, "loss": 2.4251, "step": 2690000 }, { "epoch": 13.33, "learning_rate": 4.3337581684774806e-05, "loss": 2.4047, "step": 2690500 }, { "epoch": 13.33, "learning_rate": 4.333634309834872e-05, "loss": 2.384, "step": 2691000 }, { "epoch": 13.33, "learning_rate": 4.333510451192264e-05, "loss": 2.4168, "step": 2691500 }, { "epoch": 13.34, "learning_rate": 4.333386592549655e-05, "loss": 2.3786, "step": 2692000 }, { "epoch": 13.34, "learning_rate": 4.3332627339070467e-05, "loss": 2.3896, "step": 2692500 }, { "epoch": 13.34, "learning_rate": 4.3331388752644383e-05, "loss": 2.3878, "step": 2693000 }, { "epoch": 13.34, "learning_rate": 4.33301501662183e-05, "loss": 2.3996, "step": 2693500 }, { "epoch": 13.35, "learning_rate": 4.332891157979222e-05, "loss": 2.4324, "step": 2694000 }, { "epoch": 13.35, "learning_rate": 4.3327675470538986e-05, "loss": 2.3555, "step": 2694500 }, { "epoch": 13.35, "learning_rate": 4.33264368841129e-05, "loss": 2.4025, "step": 2695000 }, { "epoch": 13.35, "learning_rate": 4.332520077485967e-05, "loss": 2.4122, "step": 2695500 }, { "epoch": 13.36, "learning_rate": 4.332396218843359e-05, "loss": 2.3975, "step": 2696000 }, { "epoch": 13.36, "learning_rate": 4.3322723602007506e-05, "loss": 2.3986, "step": 2696500 }, { "epoch": 13.36, "learning_rate": 4.332148501558142e-05, "loss": 2.378, "step": 2697000 }, { "epoch": 13.36, "learning_rate": 4.332024642915534e-05, "loss": 2.4197, "step": 2697500 }, { "epoch": 13.37, "learning_rate": 4.3319007842729256e-05, "loss": 2.4088, "step": 2698000 }, { "epoch": 13.37, "learning_rate": 4.3317769256303173e-05, "loss": 2.427, "step": 2698500 }, { "epoch": 13.37, "learning_rate": 4.3316530669877084e-05, "loss": 2.4063, "step": 2699000 }, { "epoch": 13.37, "learning_rate": 4.3315292083451e-05, "loss": 2.4056, "step": 2699500 }, { "epoch": 13.38, "learning_rate": 4.331405349702492e-05, "loss": 2.3971, "step": 2700000 }, { "epoch": 13.38, "learning_rate": 4.3312814910598834e-05, "loss": 2.4082, "step": 2700500 }, { "epoch": 13.38, "learning_rate": 4.331157632417275e-05, "loss": 2.401, "step": 2701000 }, { "epoch": 13.38, "learning_rate": 4.331033773774666e-05, "loss": 2.3885, "step": 2701500 }, { "epoch": 13.39, "learning_rate": 4.330909915132058e-05, "loss": 2.3773, "step": 2702000 }, { "epoch": 13.39, "learning_rate": 4.3307860564894495e-05, "loss": 2.441, "step": 2702500 }, { "epoch": 13.39, "learning_rate": 4.330662197846841e-05, "loss": 2.4094, "step": 2703000 }, { "epoch": 13.39, "learning_rate": 4.330538339204233e-05, "loss": 2.4056, "step": 2703500 }, { "epoch": 13.4, "learning_rate": 4.3304144805616246e-05, "loss": 2.4129, "step": 2704000 }, { "epoch": 13.4, "learning_rate": 4.3302908696363015e-05, "loss": 2.3796, "step": 2704500 }, { "epoch": 13.4, "learning_rate": 4.330167010993693e-05, "loss": 2.3743, "step": 2705000 }, { "epoch": 13.4, "learning_rate": 4.330043152351085e-05, "loss": 2.4091, "step": 2705500 }, { "epoch": 13.41, "learning_rate": 4.329919789143047e-05, "loss": 2.3977, "step": 2706000 }, { "epoch": 13.41, "learning_rate": 4.3297959305004386e-05, "loss": 2.3935, "step": 2706500 }, { "epoch": 13.41, "learning_rate": 4.32967207185783e-05, "loss": 2.4121, "step": 2707000 }, { "epoch": 13.41, "learning_rate": 4.329548213215222e-05, "loss": 2.4118, "step": 2707500 }, { "epoch": 13.42, "learning_rate": 4.329424354572614e-05, "loss": 2.4029, "step": 2708000 }, { "epoch": 13.42, "learning_rate": 4.3293007436472906e-05, "loss": 2.3758, "step": 2708500 }, { "epoch": 13.42, "learning_rate": 4.329176885004682e-05, "loss": 2.3919, "step": 2709000 }, { "epoch": 13.42, "learning_rate": 4.329053026362074e-05, "loss": 2.408, "step": 2709500 }, { "epoch": 13.43, "learning_rate": 4.328929167719466e-05, "loss": 2.3962, "step": 2710000 }, { "epoch": 13.43, "learning_rate": 4.3288053090768574e-05, "loss": 2.3936, "step": 2710500 }, { "epoch": 13.43, "learning_rate": 4.328681450434249e-05, "loss": 2.402, "step": 2711000 }, { "epoch": 13.43, "learning_rate": 4.328557839508925e-05, "loss": 2.3929, "step": 2711500 }, { "epoch": 13.44, "learning_rate": 4.328433980866317e-05, "loss": 2.3941, "step": 2712000 }, { "epoch": 13.44, "learning_rate": 4.3283101222237086e-05, "loss": 2.396, "step": 2712500 }, { "epoch": 13.44, "learning_rate": 4.3281862635811003e-05, "loss": 2.4242, "step": 2713000 }, { "epoch": 13.44, "learning_rate": 4.328062404938492e-05, "loss": 2.4014, "step": 2713500 }, { "epoch": 13.45, "learning_rate": 4.327938546295884e-05, "loss": 2.3839, "step": 2714000 }, { "epoch": 13.45, "learning_rate": 4.3278146876532754e-05, "loss": 2.4211, "step": 2714500 }, { "epoch": 13.45, "learning_rate": 4.327691076727952e-05, "loss": 2.3848, "step": 2715000 }, { "epoch": 13.45, "learning_rate": 4.327567218085344e-05, "loss": 2.4099, "step": 2715500 }, { "epoch": 13.46, "learning_rate": 4.327443359442736e-05, "loss": 2.4064, "step": 2716000 }, { "epoch": 13.46, "learning_rate": 4.3273195008001274e-05, "loss": 2.4105, "step": 2716500 }, { "epoch": 13.46, "learning_rate": 4.327195642157519e-05, "loss": 2.4022, "step": 2717000 }, { "epoch": 13.46, "learning_rate": 4.327071783514911e-05, "loss": 2.3729, "step": 2717500 }, { "epoch": 13.47, "learning_rate": 4.3269479248723025e-05, "loss": 2.4031, "step": 2718000 }, { "epoch": 13.47, "learning_rate": 4.326824066229694e-05, "loss": 2.3839, "step": 2718500 }, { "epoch": 13.47, "learning_rate": 4.3267004553043704e-05, "loss": 2.4294, "step": 2719000 }, { "epoch": 13.47, "learning_rate": 4.326576596661762e-05, "loss": 2.4108, "step": 2719500 }, { "epoch": 13.48, "learning_rate": 4.326452738019154e-05, "loss": 2.4364, "step": 2720000 }, { "epoch": 13.48, "learning_rate": 4.3263288793765454e-05, "loss": 2.4068, "step": 2720500 }, { "epoch": 13.48, "learning_rate": 4.326205020733937e-05, "loss": 2.4131, "step": 2721000 }, { "epoch": 13.48, "learning_rate": 4.326081162091329e-05, "loss": 2.3866, "step": 2721500 }, { "epoch": 13.49, "learning_rate": 4.325957551166006e-05, "loss": 2.388, "step": 2722000 }, { "epoch": 13.49, "learning_rate": 4.3258336925233974e-05, "loss": 2.3977, "step": 2722500 }, { "epoch": 13.49, "learning_rate": 4.3257100815980736e-05, "loss": 2.4153, "step": 2723000 }, { "epoch": 13.49, "learning_rate": 4.325586222955465e-05, "loss": 2.3835, "step": 2723500 }, { "epoch": 13.5, "learning_rate": 4.325462364312857e-05, "loss": 2.4049, "step": 2724000 }, { "epoch": 13.5, "learning_rate": 4.325338505670249e-05, "loss": 2.4043, "step": 2724500 }, { "epoch": 13.5, "learning_rate": 4.3252146470276404e-05, "loss": 2.4012, "step": 2725000 }, { "epoch": 13.5, "learning_rate": 4.325090788385032e-05, "loss": 2.3883, "step": 2725500 }, { "epoch": 13.51, "learning_rate": 4.324966929742424e-05, "loss": 2.4236, "step": 2726000 }, { "epoch": 13.51, "learning_rate": 4.3248430710998154e-05, "loss": 2.3839, "step": 2726500 }, { "epoch": 13.51, "learning_rate": 4.324719212457207e-05, "loss": 2.4157, "step": 2727000 }, { "epoch": 13.51, "learning_rate": 4.324595353814599e-05, "loss": 2.3927, "step": 2727500 }, { "epoch": 13.52, "learning_rate": 4.324471742889276e-05, "loss": 2.388, "step": 2728000 }, { "epoch": 13.52, "learning_rate": 4.3243481319639526e-05, "loss": 2.3996, "step": 2728500 }, { "epoch": 13.52, "learning_rate": 4.3242242733213436e-05, "loss": 2.4144, "step": 2729000 }, { "epoch": 13.52, "learning_rate": 4.324100414678735e-05, "loss": 2.4069, "step": 2729500 }, { "epoch": 13.53, "learning_rate": 4.323976556036127e-05, "loss": 2.4112, "step": 2730000 }, { "epoch": 13.53, "learning_rate": 4.323852697393519e-05, "loss": 2.4076, "step": 2730500 }, { "epoch": 13.53, "learning_rate": 4.3237288387509104e-05, "loss": 2.4015, "step": 2731000 }, { "epoch": 13.53, "learning_rate": 4.323604980108302e-05, "loss": 2.4073, "step": 2731500 }, { "epoch": 13.54, "learning_rate": 4.323481369182979e-05, "loss": 2.428, "step": 2732000 }, { "epoch": 13.54, "learning_rate": 4.3233575105403706e-05, "loss": 2.3869, "step": 2732500 }, { "epoch": 13.54, "learning_rate": 4.323233651897762e-05, "loss": 2.411, "step": 2733000 }, { "epoch": 13.54, "learning_rate": 4.323109793255154e-05, "loss": 2.3869, "step": 2733500 }, { "epoch": 13.55, "learning_rate": 4.322985934612546e-05, "loss": 2.4143, "step": 2734000 }, { "epoch": 13.55, "learning_rate": 4.3228620759699374e-05, "loss": 2.4027, "step": 2734500 }, { "epoch": 13.55, "learning_rate": 4.322738217327329e-05, "loss": 2.3911, "step": 2735000 }, { "epoch": 13.55, "learning_rate": 4.322614606402006e-05, "loss": 2.4308, "step": 2735500 }, { "epoch": 13.56, "learning_rate": 4.322490747759397e-05, "loss": 2.4117, "step": 2736000 }, { "epoch": 13.56, "learning_rate": 4.322366889116789e-05, "loss": 2.4093, "step": 2736500 }, { "epoch": 13.56, "learning_rate": 4.3222430304741804e-05, "loss": 2.372, "step": 2737000 }, { "epoch": 13.56, "learning_rate": 4.322119171831572e-05, "loss": 2.3907, "step": 2737500 }, { "epoch": 13.56, "learning_rate": 4.321995313188964e-05, "loss": 2.3867, "step": 2738000 }, { "epoch": 13.57, "learning_rate": 4.3218714545463555e-05, "loss": 2.3667, "step": 2738500 }, { "epoch": 13.57, "learning_rate": 4.3217478436210323e-05, "loss": 2.3809, "step": 2739000 }, { "epoch": 13.57, "learning_rate": 4.321623984978424e-05, "loss": 2.4062, "step": 2739500 }, { "epoch": 13.57, "learning_rate": 4.321500126335816e-05, "loss": 2.396, "step": 2740000 }, { "epoch": 13.58, "learning_rate": 4.3213762676932074e-05, "loss": 2.4295, "step": 2740500 }, { "epoch": 13.58, "learning_rate": 4.321252409050599e-05, "loss": 2.4223, "step": 2741000 }, { "epoch": 13.58, "learning_rate": 4.321128550407991e-05, "loss": 2.4312, "step": 2741500 }, { "epoch": 13.58, "learning_rate": 4.3210046917653825e-05, "loss": 2.408, "step": 2742000 }, { "epoch": 13.59, "learning_rate": 4.320880833122774e-05, "loss": 2.391, "step": 2742500 }, { "epoch": 13.59, "learning_rate": 4.320756974480166e-05, "loss": 2.4148, "step": 2743000 }, { "epoch": 13.59, "learning_rate": 4.320633363554842e-05, "loss": 2.3847, "step": 2743500 }, { "epoch": 13.59, "learning_rate": 4.320509504912234e-05, "loss": 2.4125, "step": 2744000 }, { "epoch": 13.6, "learning_rate": 4.3203856462696255e-05, "loss": 2.4068, "step": 2744500 }, { "epoch": 13.6, "learning_rate": 4.320261787627017e-05, "loss": 2.3831, "step": 2745000 }, { "epoch": 13.6, "learning_rate": 4.320137928984409e-05, "loss": 2.4067, "step": 2745500 }, { "epoch": 13.6, "learning_rate": 4.3200140703418006e-05, "loss": 2.3968, "step": 2746000 }, { "epoch": 13.61, "learning_rate": 4.319890211699192e-05, "loss": 2.4172, "step": 2746500 }, { "epoch": 13.61, "learning_rate": 4.319766353056584e-05, "loss": 2.4011, "step": 2747000 }, { "epoch": 13.61, "learning_rate": 4.319642494413975e-05, "loss": 2.4083, "step": 2747500 }, { "epoch": 13.61, "learning_rate": 4.3195188834886525e-05, "loss": 2.4248, "step": 2748000 }, { "epoch": 13.62, "learning_rate": 4.319395024846044e-05, "loss": 2.3911, "step": 2748500 }, { "epoch": 13.62, "learning_rate": 4.319271166203436e-05, "loss": 2.3895, "step": 2749000 }, { "epoch": 13.62, "learning_rate": 4.3191473075608276e-05, "loss": 2.4364, "step": 2749500 }, { "epoch": 13.62, "learning_rate": 4.319023448918219e-05, "loss": 2.4193, "step": 2750000 }, { "epoch": 13.63, "learning_rate": 4.3188998379928955e-05, "loss": 2.4033, "step": 2750500 }, { "epoch": 13.63, "learning_rate": 4.318775979350287e-05, "loss": 2.4104, "step": 2751000 }, { "epoch": 13.63, "learning_rate": 4.318652120707679e-05, "loss": 2.4298, "step": 2751500 }, { "epoch": 13.63, "learning_rate": 4.3185282620650706e-05, "loss": 2.3979, "step": 2752000 }, { "epoch": 13.64, "learning_rate": 4.318404403422462e-05, "loss": 2.39, "step": 2752500 }, { "epoch": 13.64, "learning_rate": 4.318280792497139e-05, "loss": 2.4094, "step": 2753000 }, { "epoch": 13.64, "learning_rate": 4.318156933854531e-05, "loss": 2.4306, "step": 2753500 }, { "epoch": 13.64, "learning_rate": 4.3180330752119225e-05, "loss": 2.4091, "step": 2754000 }, { "epoch": 13.65, "learning_rate": 4.317909216569314e-05, "loss": 2.4295, "step": 2754500 }, { "epoch": 13.65, "learning_rate": 4.317785357926706e-05, "loss": 2.4031, "step": 2755000 }, { "epoch": 13.65, "learning_rate": 4.317661747001383e-05, "loss": 2.4377, "step": 2755500 }, { "epoch": 13.65, "learning_rate": 4.3175378883587745e-05, "loss": 2.3773, "step": 2756000 }, { "epoch": 13.66, "learning_rate": 4.3174140297161655e-05, "loss": 2.4163, "step": 2756500 }, { "epoch": 13.66, "learning_rate": 4.317290171073557e-05, "loss": 2.4139, "step": 2757000 }, { "epoch": 13.66, "learning_rate": 4.317166312430949e-05, "loss": 2.4116, "step": 2757500 }, { "epoch": 13.66, "learning_rate": 4.3170424537883406e-05, "loss": 2.391, "step": 2758000 }, { "epoch": 13.67, "learning_rate": 4.316918595145732e-05, "loss": 2.4302, "step": 2758500 }, { "epoch": 13.67, "learning_rate": 4.316794984220409e-05, "loss": 2.4052, "step": 2759000 }, { "epoch": 13.67, "learning_rate": 4.316671373295086e-05, "loss": 2.4154, "step": 2759500 }, { "epoch": 13.67, "learning_rate": 4.316547514652478e-05, "loss": 2.3856, "step": 2760000 }, { "epoch": 13.68, "learning_rate": 4.3164236560098694e-05, "loss": 2.4001, "step": 2760500 }, { "epoch": 13.68, "learning_rate": 4.316299797367261e-05, "loss": 2.4079, "step": 2761000 }, { "epoch": 13.68, "learning_rate": 4.316175938724653e-05, "loss": 2.3876, "step": 2761500 }, { "epoch": 13.68, "learning_rate": 4.3160520800820445e-05, "loss": 2.4033, "step": 2762000 }, { "epoch": 13.69, "learning_rate": 4.315928221439436e-05, "loss": 2.4024, "step": 2762500 }, { "epoch": 13.69, "learning_rate": 4.3158046105141124e-05, "loss": 2.3802, "step": 2763000 }, { "epoch": 13.69, "learning_rate": 4.315680751871504e-05, "loss": 2.4104, "step": 2763500 }, { "epoch": 13.69, "learning_rate": 4.315556893228896e-05, "loss": 2.3841, "step": 2764000 }, { "epoch": 13.7, "learning_rate": 4.315433282303573e-05, "loss": 2.3901, "step": 2764500 }, { "epoch": 13.7, "learning_rate": 4.3153094236609644e-05, "loss": 2.4044, "step": 2765000 }, { "epoch": 13.7, "learning_rate": 4.315185565018356e-05, "loss": 2.4209, "step": 2765500 }, { "epoch": 13.7, "learning_rate": 4.315061706375748e-05, "loss": 2.3937, "step": 2766000 }, { "epoch": 13.71, "learning_rate": 4.3149380954504246e-05, "loss": 2.41, "step": 2766500 }, { "epoch": 13.71, "learning_rate": 4.3148142368078156e-05, "loss": 2.4149, "step": 2767000 }, { "epoch": 13.71, "learning_rate": 4.314690378165207e-05, "loss": 2.4024, "step": 2767500 }, { "epoch": 13.71, "learning_rate": 4.314566519522599e-05, "loss": 2.4355, "step": 2768000 }, { "epoch": 13.72, "learning_rate": 4.314442660879991e-05, "loss": 2.4085, "step": 2768500 }, { "epoch": 13.72, "learning_rate": 4.3143188022373824e-05, "loss": 2.3709, "step": 2769000 }, { "epoch": 13.72, "learning_rate": 4.314194943594774e-05, "loss": 2.397, "step": 2769500 }, { "epoch": 13.72, "learning_rate": 4.314071084952166e-05, "loss": 2.3901, "step": 2770000 }, { "epoch": 13.73, "learning_rate": 4.3139472263095575e-05, "loss": 2.4026, "step": 2770500 }, { "epoch": 13.73, "learning_rate": 4.313823367666949e-05, "loss": 2.4013, "step": 2771000 }, { "epoch": 13.73, "learning_rate": 4.313699756741626e-05, "loss": 2.3871, "step": 2771500 }, { "epoch": 13.73, "learning_rate": 4.313575898099018e-05, "loss": 2.4078, "step": 2772000 }, { "epoch": 13.74, "learning_rate": 4.3134520394564094e-05, "loss": 2.4151, "step": 2772500 }, { "epoch": 13.74, "learning_rate": 4.313328180813801e-05, "loss": 2.4024, "step": 2773000 }, { "epoch": 13.74, "learning_rate": 4.313204322171193e-05, "loss": 2.4182, "step": 2773500 }, { "epoch": 13.74, "learning_rate": 4.3130804635285845e-05, "loss": 2.3841, "step": 2774000 }, { "epoch": 13.75, "learning_rate": 4.312956604885976e-05, "loss": 2.3987, "step": 2774500 }, { "epoch": 13.75, "learning_rate": 4.312832746243368e-05, "loss": 2.3907, "step": 2775000 }, { "epoch": 13.75, "learning_rate": 4.3127088876007596e-05, "loss": 2.4168, "step": 2775500 }, { "epoch": 13.75, "learning_rate": 4.312585028958151e-05, "loss": 2.4074, "step": 2776000 }, { "epoch": 13.76, "learning_rate": 4.312461170315542e-05, "loss": 2.3894, "step": 2776500 }, { "epoch": 13.76, "learning_rate": 4.312337311672934e-05, "loss": 2.3936, "step": 2777000 }, { "epoch": 13.76, "learning_rate": 4.312213453030326e-05, "loss": 2.3887, "step": 2777500 }, { "epoch": 13.76, "learning_rate": 4.3120895943877174e-05, "loss": 2.4014, "step": 2778000 }, { "epoch": 13.77, "learning_rate": 4.311965735745109e-05, "loss": 2.4004, "step": 2778500 }, { "epoch": 13.77, "learning_rate": 4.3118418771025e-05, "loss": 2.4207, "step": 2779000 }, { "epoch": 13.77, "learning_rate": 4.3117182661771777e-05, "loss": 2.4038, "step": 2779500 }, { "epoch": 13.77, "learning_rate": 4.3115944075345694e-05, "loss": 2.417, "step": 2780000 }, { "epoch": 13.78, "learning_rate": 4.311470548891961e-05, "loss": 2.4107, "step": 2780500 }, { "epoch": 13.78, "learning_rate": 4.311346690249353e-05, "loss": 2.4136, "step": 2781000 }, { "epoch": 13.78, "learning_rate": 4.3112228316067444e-05, "loss": 2.3845, "step": 2781500 }, { "epoch": 13.78, "learning_rate": 4.311099220681421e-05, "loss": 2.43, "step": 2782000 }, { "epoch": 13.79, "learning_rate": 4.3109756097560975e-05, "loss": 2.4391, "step": 2782500 }, { "epoch": 13.79, "learning_rate": 4.3108519988307744e-05, "loss": 2.417, "step": 2783000 }, { "epoch": 13.79, "learning_rate": 4.310728387905452e-05, "loss": 2.4119, "step": 2783500 }, { "epoch": 13.79, "learning_rate": 4.310604529262843e-05, "loss": 2.4281, "step": 2784000 }, { "epoch": 13.8, "learning_rate": 4.31048091833752e-05, "loss": 2.4254, "step": 2784500 }, { "epoch": 13.8, "learning_rate": 4.3103570596949115e-05, "loss": 2.3977, "step": 2785000 }, { "epoch": 13.8, "learning_rate": 4.310233201052303e-05, "loss": 2.4255, "step": 2785500 }, { "epoch": 13.8, "learning_rate": 4.310109342409695e-05, "loss": 2.393, "step": 2786000 }, { "epoch": 13.81, "learning_rate": 4.3099854837670866e-05, "loss": 2.4038, "step": 2786500 }, { "epoch": 13.81, "learning_rate": 4.309861625124478e-05, "loss": 2.4045, "step": 2787000 }, { "epoch": 13.81, "learning_rate": 4.30973776648187e-05, "loss": 2.4029, "step": 2787500 }, { "epoch": 13.81, "learning_rate": 4.309613907839262e-05, "loss": 2.4214, "step": 2788000 }, { "epoch": 13.82, "learning_rate": 4.3094902969139386e-05, "loss": 2.3809, "step": 2788500 }, { "epoch": 13.82, "learning_rate": 4.30936643827133e-05, "loss": 2.402, "step": 2789000 }, { "epoch": 13.82, "learning_rate": 4.309242579628722e-05, "loss": 2.3671, "step": 2789500 }, { "epoch": 13.82, "learning_rate": 4.3091187209861137e-05, "loss": 2.391, "step": 2790000 }, { "epoch": 13.83, "learning_rate": 4.30899511006079e-05, "loss": 2.4029, "step": 2790500 }, { "epoch": 13.83, "learning_rate": 4.3088712514181816e-05, "loss": 2.3973, "step": 2791000 }, { "epoch": 13.83, "learning_rate": 4.308747392775573e-05, "loss": 2.3823, "step": 2791500 }, { "epoch": 13.83, "learning_rate": 4.308623534132965e-05, "loss": 2.4324, "step": 2792000 }, { "epoch": 13.84, "learning_rate": 4.3084996754903566e-05, "loss": 2.4068, "step": 2792500 }, { "epoch": 13.84, "learning_rate": 4.308375816847748e-05, "loss": 2.4183, "step": 2793000 }, { "epoch": 13.84, "learning_rate": 4.30825195820514e-05, "loss": 2.4176, "step": 2793500 }, { "epoch": 13.84, "learning_rate": 4.308128099562532e-05, "loss": 2.41, "step": 2794000 }, { "epoch": 13.84, "learning_rate": 4.3080042409199234e-05, "loss": 2.3977, "step": 2794500 }, { "epoch": 13.85, "learning_rate": 4.3078806299946e-05, "loss": 2.3854, "step": 2795000 }, { "epoch": 13.85, "learning_rate": 4.3077570190692765e-05, "loss": 2.4014, "step": 2795500 }, { "epoch": 13.85, "learning_rate": 4.307633160426668e-05, "loss": 2.432, "step": 2796000 }, { "epoch": 13.85, "learning_rate": 4.30750930178406e-05, "loss": 2.4016, "step": 2796500 }, { "epoch": 13.86, "learning_rate": 4.3073854431414516e-05, "loss": 2.4061, "step": 2797000 }, { "epoch": 13.86, "learning_rate": 4.307261584498843e-05, "loss": 2.3972, "step": 2797500 }, { "epoch": 13.86, "learning_rate": 4.307137725856235e-05, "loss": 2.3841, "step": 2798000 }, { "epoch": 13.86, "learning_rate": 4.307014114930912e-05, "loss": 2.3905, "step": 2798500 }, { "epoch": 13.87, "learning_rate": 4.3068902562883035e-05, "loss": 2.412, "step": 2799000 }, { "epoch": 13.87, "learning_rate": 4.306766397645695e-05, "loss": 2.3943, "step": 2799500 }, { "epoch": 13.87, "learning_rate": 4.306642539003087e-05, "loss": 2.3898, "step": 2800000 }, { "epoch": 13.87, "learning_rate": 4.3065186803604786e-05, "loss": 2.3945, "step": 2800500 }, { "epoch": 13.88, "learning_rate": 4.30639482171787e-05, "loss": 2.4051, "step": 2801000 }, { "epoch": 13.88, "learning_rate": 4.306270963075262e-05, "loss": 2.3813, "step": 2801500 }, { "epoch": 13.88, "learning_rate": 4.306147104432654e-05, "loss": 2.3979, "step": 2802000 }, { "epoch": 13.88, "learning_rate": 4.3060232457900454e-05, "loss": 2.4034, "step": 2802500 }, { "epoch": 13.89, "learning_rate": 4.305899387147437e-05, "loss": 2.399, "step": 2803000 }, { "epoch": 13.89, "learning_rate": 4.305775528504829e-05, "loss": 2.3926, "step": 2803500 }, { "epoch": 13.89, "learning_rate": 4.3056516698622205e-05, "loss": 2.3909, "step": 2804000 }, { "epoch": 13.89, "learning_rate": 4.3055278112196115e-05, "loss": 2.4082, "step": 2804500 }, { "epoch": 13.9, "learning_rate": 4.305403952577003e-05, "loss": 2.3697, "step": 2805000 }, { "epoch": 13.9, "learning_rate": 4.30528034165168e-05, "loss": 2.4134, "step": 2805500 }, { "epoch": 13.9, "learning_rate": 4.305156483009072e-05, "loss": 2.3691, "step": 2806000 }, { "epoch": 13.9, "learning_rate": 4.3050326243664634e-05, "loss": 2.388, "step": 2806500 }, { "epoch": 13.91, "learning_rate": 4.304908765723855e-05, "loss": 2.4274, "step": 2807000 }, { "epoch": 13.91, "learning_rate": 4.304784907081246e-05, "loss": 2.4141, "step": 2807500 }, { "epoch": 13.91, "learning_rate": 4.304661048438638e-05, "loss": 2.3835, "step": 2808000 }, { "epoch": 13.91, "learning_rate": 4.3045371897960295e-05, "loss": 2.4, "step": 2808500 }, { "epoch": 13.92, "learning_rate": 4.304413331153421e-05, "loss": 2.4225, "step": 2809000 }, { "epoch": 13.92, "learning_rate": 4.304289472510813e-05, "loss": 2.3942, "step": 2809500 }, { "epoch": 13.92, "learning_rate": 4.3041658615854905e-05, "loss": 2.3977, "step": 2810000 }, { "epoch": 13.92, "learning_rate": 4.304042250660167e-05, "loss": 2.4204, "step": 2810500 }, { "epoch": 13.93, "learning_rate": 4.3039183920175584e-05, "loss": 2.4132, "step": 2811000 }, { "epoch": 13.93, "learning_rate": 4.30379453337495e-05, "loss": 2.4255, "step": 2811500 }, { "epoch": 13.93, "learning_rate": 4.303670674732342e-05, "loss": 2.4183, "step": 2812000 }, { "epoch": 13.93, "learning_rate": 4.3035468160897334e-05, "loss": 2.3954, "step": 2812500 }, { "epoch": 13.94, "learning_rate": 4.30342320516441e-05, "loss": 2.4053, "step": 2813000 }, { "epoch": 13.94, "learning_rate": 4.303299594239087e-05, "loss": 2.3931, "step": 2813500 }, { "epoch": 13.94, "learning_rate": 4.303175735596479e-05, "loss": 2.4367, "step": 2814000 }, { "epoch": 13.94, "learning_rate": 4.30305187695387e-05, "loss": 2.4039, "step": 2814500 }, { "epoch": 13.95, "learning_rate": 4.3029280183112616e-05, "loss": 2.3991, "step": 2815000 }, { "epoch": 13.95, "learning_rate": 4.302804159668653e-05, "loss": 2.4093, "step": 2815500 }, { "epoch": 13.95, "learning_rate": 4.302680301026045e-05, "loss": 2.3957, "step": 2816000 }, { "epoch": 13.95, "learning_rate": 4.302556442383437e-05, "loss": 2.4145, "step": 2816500 }, { "epoch": 13.96, "learning_rate": 4.3024325837408284e-05, "loss": 2.4256, "step": 2817000 }, { "epoch": 13.96, "learning_rate": 4.30230872509822e-05, "loss": 2.412, "step": 2817500 }, { "epoch": 13.96, "learning_rate": 4.302184866455612e-05, "loss": 2.412, "step": 2818000 }, { "epoch": 13.96, "learning_rate": 4.3020612555302886e-05, "loss": 2.4011, "step": 2818500 }, { "epoch": 13.97, "learning_rate": 4.3019376446049655e-05, "loss": 2.4118, "step": 2819000 }, { "epoch": 13.97, "learning_rate": 4.301813785962357e-05, "loss": 2.4149, "step": 2819500 }, { "epoch": 13.97, "learning_rate": 4.301689927319749e-05, "loss": 2.4283, "step": 2820000 }, { "epoch": 13.97, "learning_rate": 4.3015660686771406e-05, "loss": 2.3776, "step": 2820500 }, { "epoch": 13.98, "learning_rate": 4.301442457751817e-05, "loss": 2.4164, "step": 2821000 }, { "epoch": 13.98, "learning_rate": 4.3013185991092085e-05, "loss": 2.3833, "step": 2821500 }, { "epoch": 13.98, "learning_rate": 4.3011947404666e-05, "loss": 2.384, "step": 2822000 }, { "epoch": 13.98, "learning_rate": 4.301070881823992e-05, "loss": 2.4177, "step": 2822500 }, { "epoch": 13.99, "learning_rate": 4.3009470231813836e-05, "loss": 2.4214, "step": 2823000 }, { "epoch": 13.99, "learning_rate": 4.300823164538775e-05, "loss": 2.4005, "step": 2823500 }, { "epoch": 13.99, "learning_rate": 4.300699305896167e-05, "loss": 2.3818, "step": 2824000 }, { "epoch": 13.99, "learning_rate": 4.3005754472535587e-05, "loss": 2.3987, "step": 2824500 }, { "epoch": 14.0, "learning_rate": 4.3004515886109503e-05, "loss": 2.4102, "step": 2825000 }, { "epoch": 14.0, "learning_rate": 4.300327729968342e-05, "loss": 2.4001, "step": 2825500 }, { "epoch": 14.0, "eval_accuracy": 0.6461582624983472, "eval_accuracy_mlm": 0.5998228536990704, "eval_accuracy_nsp": 0.8650881122062763, "eval_loss": 2.389559268951416, "eval_runtime": 145.7903, "eval_samples_per_second": 1748.807, "eval_steps_per_second": 72.872, "step": 2825802 }, { "epoch": 14.0, "learning_rate": 4.300204119043019e-05, "loss": 2.3643, "step": 2826000 }, { "epoch": 14.0, "learning_rate": 4.3000802604004106e-05, "loss": 2.3761, "step": 2826500 }, { "epoch": 14.01, "learning_rate": 4.299956401757802e-05, "loss": 2.3749, "step": 2827000 }, { "epoch": 14.01, "learning_rate": 4.299832543115194e-05, "loss": 2.3892, "step": 2827500 }, { "epoch": 14.01, "learning_rate": 4.299708684472585e-05, "loss": 2.3667, "step": 2828000 }, { "epoch": 14.01, "learning_rate": 4.299585073547262e-05, "loss": 2.364, "step": 2828500 }, { "epoch": 14.02, "learning_rate": 4.2994612149046536e-05, "loss": 2.3782, "step": 2829000 }, { "epoch": 14.02, "learning_rate": 4.299337356262045e-05, "loss": 2.371, "step": 2829500 }, { "epoch": 14.02, "learning_rate": 4.299213497619437e-05, "loss": 2.349, "step": 2830000 }, { "epoch": 14.02, "learning_rate": 4.299089638976829e-05, "loss": 2.3903, "step": 2830500 }, { "epoch": 14.03, "learning_rate": 4.2989657803342204e-05, "loss": 2.374, "step": 2831000 }, { "epoch": 14.03, "learning_rate": 4.298842169408897e-05, "loss": 2.378, "step": 2831500 }, { "epoch": 14.03, "learning_rate": 4.298718310766289e-05, "loss": 2.3867, "step": 2832000 }, { "epoch": 14.03, "learning_rate": 4.2985944521236806e-05, "loss": 2.366, "step": 2832500 }, { "epoch": 14.04, "learning_rate": 4.298470593481072e-05, "loss": 2.3584, "step": 2833000 }, { "epoch": 14.04, "learning_rate": 4.298346734838464e-05, "loss": 2.3585, "step": 2833500 }, { "epoch": 14.04, "learning_rate": 4.298222876195856e-05, "loss": 2.3845, "step": 2834000 }, { "epoch": 14.04, "learning_rate": 4.2980990175532474e-05, "loss": 2.3867, "step": 2834500 }, { "epoch": 14.05, "learning_rate": 4.2979751589106384e-05, "loss": 2.3844, "step": 2835000 }, { "epoch": 14.05, "learning_rate": 4.29785130026803e-05, "loss": 2.3945, "step": 2835500 }, { "epoch": 14.05, "learning_rate": 4.297727689342707e-05, "loss": 2.3819, "step": 2836000 }, { "epoch": 14.05, "learning_rate": 4.297603830700099e-05, "loss": 2.3963, "step": 2836500 }, { "epoch": 14.06, "learning_rate": 4.2974799720574904e-05, "loss": 2.3649, "step": 2837000 }, { "epoch": 14.06, "learning_rate": 4.297356113414882e-05, "loss": 2.3759, "step": 2837500 }, { "epoch": 14.06, "learning_rate": 4.297232254772274e-05, "loss": 2.3773, "step": 2838000 }, { "epoch": 14.06, "learning_rate": 4.2971083961296655e-05, "loss": 2.3649, "step": 2838500 }, { "epoch": 14.07, "learning_rate": 4.296984785204342e-05, "loss": 2.3764, "step": 2839000 }, { "epoch": 14.07, "learning_rate": 4.296860926561734e-05, "loss": 2.3793, "step": 2839500 }, { "epoch": 14.07, "learning_rate": 4.29673731563641e-05, "loss": 2.4066, "step": 2840000 }, { "epoch": 14.07, "learning_rate": 4.296613704711088e-05, "loss": 2.377, "step": 2840500 }, { "epoch": 14.08, "learning_rate": 4.2964898460684795e-05, "loss": 2.3808, "step": 2841000 }, { "epoch": 14.08, "learning_rate": 4.296365987425871e-05, "loss": 2.4138, "step": 2841500 }, { "epoch": 14.08, "learning_rate": 4.296242128783263e-05, "loss": 2.3763, "step": 2842000 }, { "epoch": 14.08, "learning_rate": 4.296118270140654e-05, "loss": 2.3747, "step": 2842500 }, { "epoch": 14.09, "learning_rate": 4.2959944114980456e-05, "loss": 2.3829, "step": 2843000 }, { "epoch": 14.09, "learning_rate": 4.295870552855437e-05, "loss": 2.4047, "step": 2843500 }, { "epoch": 14.09, "learning_rate": 4.295746694212829e-05, "loss": 2.3814, "step": 2844000 }, { "epoch": 14.09, "learning_rate": 4.2956228355702207e-05, "loss": 2.387, "step": 2844500 }, { "epoch": 14.1, "learning_rate": 4.2954989769276123e-05, "loss": 2.3661, "step": 2845000 }, { "epoch": 14.1, "learning_rate": 4.295375118285004e-05, "loss": 2.4036, "step": 2845500 }, { "epoch": 14.1, "learning_rate": 4.29525150735968e-05, "loss": 2.3828, "step": 2846000 }, { "epoch": 14.1, "learning_rate": 4.295127648717072e-05, "loss": 2.3651, "step": 2846500 }, { "epoch": 14.11, "learning_rate": 4.2950037900744636e-05, "loss": 2.3889, "step": 2847000 }, { "epoch": 14.11, "learning_rate": 4.294879931431855e-05, "loss": 2.3733, "step": 2847500 }, { "epoch": 14.11, "learning_rate": 4.294756072789247e-05, "loss": 2.3593, "step": 2848000 }, { "epoch": 14.11, "learning_rate": 4.294632214146639e-05, "loss": 2.3742, "step": 2848500 }, { "epoch": 14.11, "learning_rate": 4.2945083555040304e-05, "loss": 2.3745, "step": 2849000 }, { "epoch": 14.12, "learning_rate": 4.294384496861422e-05, "loss": 2.3674, "step": 2849500 }, { "epoch": 14.12, "learning_rate": 4.294260638218814e-05, "loss": 2.3607, "step": 2850000 }, { "epoch": 14.12, "learning_rate": 4.294137027293491e-05, "loss": 2.383, "step": 2850500 }, { "epoch": 14.12, "learning_rate": 4.2940131686508824e-05, "loss": 2.3761, "step": 2851000 }, { "epoch": 14.13, "learning_rate": 4.293889310008274e-05, "loss": 2.3966, "step": 2851500 }, { "epoch": 14.13, "learning_rate": 4.293765451365666e-05, "loss": 2.3912, "step": 2852000 }, { "epoch": 14.13, "learning_rate": 4.293641840440342e-05, "loss": 2.3719, "step": 2852500 }, { "epoch": 14.13, "learning_rate": 4.2935179817977336e-05, "loss": 2.3608, "step": 2853000 }, { "epoch": 14.14, "learning_rate": 4.293394123155125e-05, "loss": 2.3762, "step": 2853500 }, { "epoch": 14.14, "learning_rate": 4.293270264512517e-05, "loss": 2.3916, "step": 2854000 }, { "epoch": 14.14, "learning_rate": 4.293146405869909e-05, "loss": 2.3773, "step": 2854500 }, { "epoch": 14.14, "learning_rate": 4.2930225472273004e-05, "loss": 2.3872, "step": 2855000 }, { "epoch": 14.15, "learning_rate": 4.292898688584692e-05, "loss": 2.3776, "step": 2855500 }, { "epoch": 14.15, "learning_rate": 4.292775077659369e-05, "loss": 2.4074, "step": 2856000 }, { "epoch": 14.15, "learning_rate": 4.292651219016761e-05, "loss": 2.379, "step": 2856500 }, { "epoch": 14.15, "learning_rate": 4.2925273603741524e-05, "loss": 2.3803, "step": 2857000 }, { "epoch": 14.16, "learning_rate": 4.292403501731544e-05, "loss": 2.3807, "step": 2857500 }, { "epoch": 14.16, "learning_rate": 4.292279643088936e-05, "loss": 2.3591, "step": 2858000 }, { "epoch": 14.16, "learning_rate": 4.2921557844463274e-05, "loss": 2.3725, "step": 2858500 }, { "epoch": 14.16, "learning_rate": 4.292031925803719e-05, "loss": 2.3891, "step": 2859000 }, { "epoch": 14.17, "learning_rate": 4.291908067161111e-05, "loss": 2.3872, "step": 2859500 }, { "epoch": 14.17, "learning_rate": 4.291784456235787e-05, "loss": 2.3672, "step": 2860000 }, { "epoch": 14.17, "learning_rate": 4.2916608453104646e-05, "loss": 2.3655, "step": 2860500 }, { "epoch": 14.17, "learning_rate": 4.2915369866678556e-05, "loss": 2.403, "step": 2861000 }, { "epoch": 14.18, "learning_rate": 4.291413375742533e-05, "loss": 2.4014, "step": 2861500 }, { "epoch": 14.18, "learning_rate": 4.291289517099925e-05, "loss": 2.3719, "step": 2862000 }, { "epoch": 14.18, "learning_rate": 4.2911656584573166e-05, "loss": 2.377, "step": 2862500 }, { "epoch": 14.18, "learning_rate": 4.2910417998147076e-05, "loss": 2.3801, "step": 2863000 }, { "epoch": 14.19, "learning_rate": 4.290917941172099e-05, "loss": 2.3656, "step": 2863500 }, { "epoch": 14.19, "learning_rate": 4.290794082529491e-05, "loss": 2.4083, "step": 2864000 }, { "epoch": 14.19, "learning_rate": 4.2906702238868826e-05, "loss": 2.3794, "step": 2864500 }, { "epoch": 14.19, "learning_rate": 4.2905463652442743e-05, "loss": 2.3929, "step": 2865000 }, { "epoch": 14.2, "learning_rate": 4.2904225066016654e-05, "loss": 2.3909, "step": 2865500 }, { "epoch": 14.2, "learning_rate": 4.290298647959057e-05, "loss": 2.4093, "step": 2866000 }, { "epoch": 14.2, "learning_rate": 4.290174789316449e-05, "loss": 2.3792, "step": 2866500 }, { "epoch": 14.2, "learning_rate": 4.2900509306738404e-05, "loss": 2.3973, "step": 2867000 }, { "epoch": 14.21, "learning_rate": 4.289927072031232e-05, "loss": 2.3682, "step": 2867500 }, { "epoch": 14.21, "learning_rate": 4.289803213388624e-05, "loss": 2.3672, "step": 2868000 }, { "epoch": 14.21, "learning_rate": 4.2896793547460155e-05, "loss": 2.3825, "step": 2868500 }, { "epoch": 14.21, "learning_rate": 4.289555496103407e-05, "loss": 2.3766, "step": 2869000 }, { "epoch": 14.22, "learning_rate": 4.289431885178084e-05, "loss": 2.3758, "step": 2869500 }, { "epoch": 14.22, "learning_rate": 4.289308026535476e-05, "loss": 2.37, "step": 2870000 }, { "epoch": 14.22, "learning_rate": 4.2891844156101527e-05, "loss": 2.3608, "step": 2870500 }, { "epoch": 14.22, "learning_rate": 4.2890605569675444e-05, "loss": 2.3819, "step": 2871000 }, { "epoch": 14.23, "learning_rate": 4.288936698324936e-05, "loss": 2.3681, "step": 2871500 }, { "epoch": 14.23, "learning_rate": 4.288813087399613e-05, "loss": 2.3852, "step": 2872000 }, { "epoch": 14.23, "learning_rate": 4.2886892287570046e-05, "loss": 2.3943, "step": 2872500 }, { "epoch": 14.23, "learning_rate": 4.288565370114396e-05, "loss": 2.3918, "step": 2873000 }, { "epoch": 14.24, "learning_rate": 4.288441511471787e-05, "loss": 2.363, "step": 2873500 }, { "epoch": 14.24, "learning_rate": 4.288317652829179e-05, "loss": 2.3689, "step": 2874000 }, { "epoch": 14.24, "learning_rate": 4.2881940419038566e-05, "loss": 2.3561, "step": 2874500 }, { "epoch": 14.24, "learning_rate": 4.288070183261248e-05, "loss": 2.3556, "step": 2875000 }, { "epoch": 14.25, "learning_rate": 4.28794632461864e-05, "loss": 2.3698, "step": 2875500 }, { "epoch": 14.25, "learning_rate": 4.2878224659760317e-05, "loss": 2.3819, "step": 2876000 }, { "epoch": 14.25, "learning_rate": 4.287698607333423e-05, "loss": 2.3529, "step": 2876500 }, { "epoch": 14.25, "learning_rate": 4.2875747486908144e-05, "loss": 2.3704, "step": 2877000 }, { "epoch": 14.26, "learning_rate": 4.287450890048206e-05, "loss": 2.399, "step": 2877500 }, { "epoch": 14.26, "learning_rate": 4.287327031405598e-05, "loss": 2.3887, "step": 2878000 }, { "epoch": 14.26, "learning_rate": 4.2872031727629894e-05, "loss": 2.3905, "step": 2878500 }, { "epoch": 14.26, "learning_rate": 4.2870793141203805e-05, "loss": 2.3566, "step": 2879000 }, { "epoch": 14.27, "learning_rate": 4.286955703195058e-05, "loss": 2.4, "step": 2879500 }, { "epoch": 14.27, "learning_rate": 4.286831844552449e-05, "loss": 2.3926, "step": 2880000 }, { "epoch": 14.27, "learning_rate": 4.286707985909841e-05, "loss": 2.3815, "step": 2880500 }, { "epoch": 14.27, "learning_rate": 4.2865841272672324e-05, "loss": 2.369, "step": 2881000 }, { "epoch": 14.28, "learning_rate": 4.286460268624624e-05, "loss": 2.3836, "step": 2881500 }, { "epoch": 14.28, "learning_rate": 4.286336409982016e-05, "loss": 2.4035, "step": 2882000 }, { "epoch": 14.28, "learning_rate": 4.2862127990566934e-05, "loss": 2.3799, "step": 2882500 }, { "epoch": 14.28, "learning_rate": 4.2860889404140844e-05, "loss": 2.4322, "step": 2883000 }, { "epoch": 14.29, "learning_rate": 4.285965081771476e-05, "loss": 2.4124, "step": 2883500 }, { "epoch": 14.29, "learning_rate": 4.285841223128868e-05, "loss": 2.3728, "step": 2884000 }, { "epoch": 14.29, "learning_rate": 4.2857173644862595e-05, "loss": 2.3791, "step": 2884500 }, { "epoch": 14.29, "learning_rate": 4.285593505843651e-05, "loss": 2.3921, "step": 2885000 }, { "epoch": 14.3, "learning_rate": 4.285469647201042e-05, "loss": 2.3687, "step": 2885500 }, { "epoch": 14.3, "learning_rate": 4.285346036275719e-05, "loss": 2.3897, "step": 2886000 }, { "epoch": 14.3, "learning_rate": 4.285222177633111e-05, "loss": 2.3735, "step": 2886500 }, { "epoch": 14.3, "learning_rate": 4.2850983189905024e-05, "loss": 2.3695, "step": 2887000 }, { "epoch": 14.31, "learning_rate": 4.284974460347894e-05, "loss": 2.3833, "step": 2887500 }, { "epoch": 14.31, "learning_rate": 4.284850601705286e-05, "loss": 2.3874, "step": 2888000 }, { "epoch": 14.31, "learning_rate": 4.2847267430626775e-05, "loss": 2.4175, "step": 2888500 }, { "epoch": 14.31, "learning_rate": 4.284603132137355e-05, "loss": 2.3706, "step": 2889000 }, { "epoch": 14.32, "learning_rate": 4.2844797689293165e-05, "loss": 2.3967, "step": 2889500 }, { "epoch": 14.32, "learning_rate": 4.2843561580039933e-05, "loss": 2.3934, "step": 2890000 }, { "epoch": 14.32, "learning_rate": 4.284232299361385e-05, "loss": 2.3733, "step": 2890500 }, { "epoch": 14.32, "learning_rate": 4.284108440718777e-05, "loss": 2.3728, "step": 2891000 }, { "epoch": 14.33, "learning_rate": 4.2839845820761684e-05, "loss": 2.3741, "step": 2891500 }, { "epoch": 14.33, "learning_rate": 4.28386072343356e-05, "loss": 2.406, "step": 2892000 }, { "epoch": 14.33, "learning_rate": 4.283736864790952e-05, "loss": 2.3878, "step": 2892500 }, { "epoch": 14.33, "learning_rate": 4.283613006148343e-05, "loss": 2.3857, "step": 2893000 }, { "epoch": 14.34, "learning_rate": 4.2834891475057345e-05, "loss": 2.3929, "step": 2893500 }, { "epoch": 14.34, "learning_rate": 4.2833655365804114e-05, "loss": 2.3873, "step": 2894000 }, { "epoch": 14.34, "learning_rate": 4.283241925655089e-05, "loss": 2.3801, "step": 2894500 }, { "epoch": 14.34, "learning_rate": 4.2831180670124806e-05, "loss": 2.3904, "step": 2895000 }, { "epoch": 14.35, "learning_rate": 4.282994208369872e-05, "loss": 2.4079, "step": 2895500 }, { "epoch": 14.35, "learning_rate": 4.2828703497272634e-05, "loss": 2.418, "step": 2896000 }, { "epoch": 14.35, "learning_rate": 4.282746491084655e-05, "loss": 2.3692, "step": 2896500 }, { "epoch": 14.35, "learning_rate": 4.282622632442047e-05, "loss": 2.377, "step": 2897000 }, { "epoch": 14.36, "learning_rate": 4.2824987737994384e-05, "loss": 2.3925, "step": 2897500 }, { "epoch": 14.36, "learning_rate": 4.28237491515683e-05, "loss": 2.3754, "step": 2898000 }, { "epoch": 14.36, "learning_rate": 4.282251056514222e-05, "loss": 2.3814, "step": 2898500 }, { "epoch": 14.36, "learning_rate": 4.2821271978716135e-05, "loss": 2.3872, "step": 2899000 }, { "epoch": 14.37, "learning_rate": 4.282003339229005e-05, "loss": 2.3753, "step": 2899500 }, { "epoch": 14.37, "learning_rate": 4.281879480586396e-05, "loss": 2.4065, "step": 2900000 }, { "epoch": 14.37, "learning_rate": 4.281755621943788e-05, "loss": 2.3957, "step": 2900500 }, { "epoch": 14.37, "learning_rate": 4.2816317633011796e-05, "loss": 2.4159, "step": 2901000 }, { "epoch": 14.38, "learning_rate": 4.281507904658571e-05, "loss": 2.4126, "step": 2901500 }, { "epoch": 14.38, "learning_rate": 4.281384046015963e-05, "loss": 2.3595, "step": 2902000 }, { "epoch": 14.38, "learning_rate": 4.281260187373355e-05, "loss": 2.3574, "step": 2902500 }, { "epoch": 14.38, "learning_rate": 4.281136824165317e-05, "loss": 2.3769, "step": 2903000 }, { "epoch": 14.38, "learning_rate": 4.2810129655227084e-05, "loss": 2.395, "step": 2903500 }, { "epoch": 14.39, "learning_rate": 4.2808891068801e-05, "loss": 2.4074, "step": 2904000 }, { "epoch": 14.39, "learning_rate": 4.280765495954777e-05, "loss": 2.3847, "step": 2904500 }, { "epoch": 14.39, "learning_rate": 4.280641637312169e-05, "loss": 2.4074, "step": 2905000 }, { "epoch": 14.39, "learning_rate": 4.28051777866956e-05, "loss": 2.3715, "step": 2905500 }, { "epoch": 14.4, "learning_rate": 4.2803939200269514e-05, "loss": 2.3871, "step": 2906000 }, { "epoch": 14.4, "learning_rate": 4.280270061384343e-05, "loss": 2.4013, "step": 2906500 }, { "epoch": 14.4, "learning_rate": 4.280146202741735e-05, "loss": 2.3838, "step": 2907000 }, { "epoch": 14.4, "learning_rate": 4.2800223440991265e-05, "loss": 2.4154, "step": 2907500 }, { "epoch": 14.41, "learning_rate": 4.279898485456518e-05, "loss": 2.3687, "step": 2908000 }, { "epoch": 14.41, "learning_rate": 4.27977462681391e-05, "loss": 2.396, "step": 2908500 }, { "epoch": 14.41, "learning_rate": 4.2796507681713016e-05, "loss": 2.3786, "step": 2909000 }, { "epoch": 14.41, "learning_rate": 4.2795271572459785e-05, "loss": 2.3984, "step": 2909500 }, { "epoch": 14.42, "learning_rate": 4.279403546320655e-05, "loss": 2.3943, "step": 2910000 }, { "epoch": 14.42, "learning_rate": 4.279279687678047e-05, "loss": 2.3614, "step": 2910500 }, { "epoch": 14.42, "learning_rate": 4.279155829035439e-05, "loss": 2.3771, "step": 2911000 }, { "epoch": 14.42, "learning_rate": 4.2790319703928304e-05, "loss": 2.3691, "step": 2911500 }, { "epoch": 14.43, "learning_rate": 4.2789081117502214e-05, "loss": 2.392, "step": 2912000 }, { "epoch": 14.43, "learning_rate": 4.278784253107613e-05, "loss": 2.4161, "step": 2912500 }, { "epoch": 14.43, "learning_rate": 4.278660394465005e-05, "loss": 2.3881, "step": 2913000 }, { "epoch": 14.43, "learning_rate": 4.2785365358223965e-05, "loss": 2.3568, "step": 2913500 }, { "epoch": 14.44, "learning_rate": 4.278412677179788e-05, "loss": 2.4166, "step": 2914000 }, { "epoch": 14.44, "learning_rate": 4.278289066254466e-05, "loss": 2.4008, "step": 2914500 }, { "epoch": 14.44, "learning_rate": 4.278165207611857e-05, "loss": 2.3719, "step": 2915000 }, { "epoch": 14.44, "learning_rate": 4.2780413489692485e-05, "loss": 2.4054, "step": 2915500 }, { "epoch": 14.45, "learning_rate": 4.27791749032664e-05, "loss": 2.3981, "step": 2916000 }, { "epoch": 14.45, "learning_rate": 4.277793631684032e-05, "loss": 2.4088, "step": 2916500 }, { "epoch": 14.45, "learning_rate": 4.2776697730414235e-05, "loss": 2.4193, "step": 2917000 }, { "epoch": 14.45, "learning_rate": 4.277545914398815e-05, "loss": 2.3723, "step": 2917500 }, { "epoch": 14.46, "learning_rate": 4.277422055756207e-05, "loss": 2.3896, "step": 2918000 }, { "epoch": 14.46, "learning_rate": 4.2772981971135986e-05, "loss": 2.3889, "step": 2918500 }, { "epoch": 14.46, "learning_rate": 4.27717433847099e-05, "loss": 2.3773, "step": 2919000 }, { "epoch": 14.46, "learning_rate": 4.277050479828382e-05, "loss": 2.4043, "step": 2919500 }, { "epoch": 14.47, "learning_rate": 4.276927116620344e-05, "loss": 2.3883, "step": 2920000 }, { "epoch": 14.47, "learning_rate": 4.276803257977736e-05, "loss": 2.3812, "step": 2920500 }, { "epoch": 14.47, "learning_rate": 4.276679894769697e-05, "loss": 2.4031, "step": 2921000 }, { "epoch": 14.47, "learning_rate": 4.276556036127089e-05, "loss": 2.3853, "step": 2921500 }, { "epoch": 14.48, "learning_rate": 4.2764321774844806e-05, "loss": 2.4171, "step": 2922000 }, { "epoch": 14.48, "learning_rate": 4.276308318841872e-05, "loss": 2.3994, "step": 2922500 }, { "epoch": 14.48, "learning_rate": 4.276184460199264e-05, "loss": 2.3933, "step": 2923000 }, { "epoch": 14.48, "learning_rate": 4.276060849273941e-05, "loss": 2.3946, "step": 2923500 }, { "epoch": 14.49, "learning_rate": 4.2759369906313325e-05, "loss": 2.4314, "step": 2924000 }, { "epoch": 14.49, "learning_rate": 4.275813131988724e-05, "loss": 2.3782, "step": 2924500 }, { "epoch": 14.49, "learning_rate": 4.275689273346116e-05, "loss": 2.3963, "step": 2925000 }, { "epoch": 14.49, "learning_rate": 4.2755654147035076e-05, "loss": 2.4168, "step": 2925500 }, { "epoch": 14.5, "learning_rate": 4.275441556060899e-05, "loss": 2.3955, "step": 2926000 }, { "epoch": 14.5, "learning_rate": 4.275317697418291e-05, "loss": 2.4109, "step": 2926500 }, { "epoch": 14.5, "learning_rate": 4.275193838775683e-05, "loss": 2.3643, "step": 2927000 }, { "epoch": 14.5, "learning_rate": 4.2750699801330744e-05, "loss": 2.3692, "step": 2927500 }, { "epoch": 14.51, "learning_rate": 4.2749461214904654e-05, "loss": 2.4234, "step": 2928000 }, { "epoch": 14.51, "learning_rate": 4.274822262847857e-05, "loss": 2.4136, "step": 2928500 }, { "epoch": 14.51, "learning_rate": 4.274698404205249e-05, "loss": 2.3882, "step": 2929000 }, { "epoch": 14.51, "learning_rate": 4.2745745455626405e-05, "loss": 2.3713, "step": 2929500 }, { "epoch": 14.52, "learning_rate": 4.274450686920032e-05, "loss": 2.3898, "step": 2930000 }, { "epoch": 14.52, "learning_rate": 4.274326828277423e-05, "loss": 2.4151, "step": 2930500 }, { "epoch": 14.52, "learning_rate": 4.274202969634815e-05, "loss": 2.3902, "step": 2931000 }, { "epoch": 14.52, "learning_rate": 4.2740793587094924e-05, "loss": 2.3814, "step": 2931500 }, { "epoch": 14.53, "learning_rate": 4.273955500066884e-05, "loss": 2.3855, "step": 2932000 }, { "epoch": 14.53, "learning_rate": 4.273831641424276e-05, "loss": 2.4006, "step": 2932500 }, { "epoch": 14.53, "learning_rate": 4.2737077827816675e-05, "loss": 2.3651, "step": 2933000 }, { "epoch": 14.53, "learning_rate": 4.2735839241390585e-05, "loss": 2.3846, "step": 2933500 }, { "epoch": 14.54, "learning_rate": 4.273460313213736e-05, "loss": 2.3763, "step": 2934000 }, { "epoch": 14.54, "learning_rate": 4.273336454571127e-05, "loss": 2.382, "step": 2934500 }, { "epoch": 14.54, "learning_rate": 4.273212595928519e-05, "loss": 2.3847, "step": 2935000 }, { "epoch": 14.54, "learning_rate": 4.2730887372859105e-05, "loss": 2.3934, "step": 2935500 }, { "epoch": 14.55, "learning_rate": 4.2729651263605873e-05, "loss": 2.3807, "step": 2936000 }, { "epoch": 14.55, "learning_rate": 4.272841515435264e-05, "loss": 2.3737, "step": 2936500 }, { "epoch": 14.55, "learning_rate": 4.272717656792656e-05, "loss": 2.396, "step": 2937000 }, { "epoch": 14.55, "learning_rate": 4.2725937981500476e-05, "loss": 2.397, "step": 2937500 }, { "epoch": 14.56, "learning_rate": 4.272469939507439e-05, "loss": 2.3834, "step": 2938000 }, { "epoch": 14.56, "learning_rate": 4.272346080864831e-05, "loss": 2.383, "step": 2938500 }, { "epoch": 14.56, "learning_rate": 4.272222222222223e-05, "loss": 2.3652, "step": 2939000 }, { "epoch": 14.56, "learning_rate": 4.2720983635796144e-05, "loss": 2.3763, "step": 2939500 }, { "epoch": 14.57, "learning_rate": 4.271974504937006e-05, "loss": 2.4087, "step": 2940000 }, { "epoch": 14.57, "learning_rate": 4.271850646294398e-05, "loss": 2.3843, "step": 2940500 }, { "epoch": 14.57, "learning_rate": 4.2717267876517895e-05, "loss": 2.3971, "step": 2941000 }, { "epoch": 14.57, "learning_rate": 4.2716029290091805e-05, "loss": 2.3815, "step": 2941500 }, { "epoch": 14.58, "learning_rate": 4.271479070366572e-05, "loss": 2.4035, "step": 2942000 }, { "epoch": 14.58, "learning_rate": 4.271355211723964e-05, "loss": 2.3709, "step": 2942500 }, { "epoch": 14.58, "learning_rate": 4.2712313530813556e-05, "loss": 2.3951, "step": 2943000 }, { "epoch": 14.58, "learning_rate": 4.271107494438747e-05, "loss": 2.3758, "step": 2943500 }, { "epoch": 14.59, "learning_rate": 4.270983635796138e-05, "loss": 2.4007, "step": 2944000 }, { "epoch": 14.59, "learning_rate": 4.270860024870816e-05, "loss": 2.4097, "step": 2944500 }, { "epoch": 14.59, "learning_rate": 4.2707361662282075e-05, "loss": 2.4025, "step": 2945000 }, { "epoch": 14.59, "learning_rate": 4.2706125553028844e-05, "loss": 2.3816, "step": 2945500 }, { "epoch": 14.6, "learning_rate": 4.270488696660276e-05, "loss": 2.3982, "step": 2946000 }, { "epoch": 14.6, "learning_rate": 4.270364838017668e-05, "loss": 2.3793, "step": 2946500 }, { "epoch": 14.6, "learning_rate": 4.2702409793750595e-05, "loss": 2.389, "step": 2947000 }, { "epoch": 14.6, "learning_rate": 4.270117120732451e-05, "loss": 2.362, "step": 2947500 }, { "epoch": 14.61, "learning_rate": 4.2699935098071274e-05, "loss": 2.3914, "step": 2948000 }, { "epoch": 14.61, "learning_rate": 4.269869651164519e-05, "loss": 2.3902, "step": 2948500 }, { "epoch": 14.61, "learning_rate": 4.269745792521911e-05, "loss": 2.3968, "step": 2949000 }, { "epoch": 14.61, "learning_rate": 4.2696219338793024e-05, "loss": 2.3966, "step": 2949500 }, { "epoch": 14.62, "learning_rate": 4.269498075236694e-05, "loss": 2.395, "step": 2950000 }, { "epoch": 14.62, "learning_rate": 4.269374216594086e-05, "loss": 2.3799, "step": 2950500 }, { "epoch": 14.62, "learning_rate": 4.2692503579514775e-05, "loss": 2.3837, "step": 2951000 }, { "epoch": 14.62, "learning_rate": 4.2691267470261544e-05, "loss": 2.3736, "step": 2951500 }, { "epoch": 14.63, "learning_rate": 4.269002888383546e-05, "loss": 2.3967, "step": 2952000 }, { "epoch": 14.63, "learning_rate": 4.268879029740938e-05, "loss": 2.4221, "step": 2952500 }, { "epoch": 14.63, "learning_rate": 4.2687551710983295e-05, "loss": 2.3803, "step": 2953000 }, { "epoch": 14.63, "learning_rate": 4.268631560173006e-05, "loss": 2.3863, "step": 2953500 }, { "epoch": 14.64, "learning_rate": 4.2685077015303974e-05, "loss": 2.391, "step": 2954000 }, { "epoch": 14.64, "learning_rate": 4.268383842887789e-05, "loss": 2.374, "step": 2954500 }, { "epoch": 14.64, "learning_rate": 4.268259984245181e-05, "loss": 2.3949, "step": 2955000 }, { "epoch": 14.64, "learning_rate": 4.2681363733198577e-05, "loss": 2.3759, "step": 2955500 }, { "epoch": 14.65, "learning_rate": 4.2680125146772493e-05, "loss": 2.387, "step": 2956000 }, { "epoch": 14.65, "learning_rate": 4.267888656034641e-05, "loss": 2.3696, "step": 2956500 }, { "epoch": 14.65, "learning_rate": 4.267764797392033e-05, "loss": 2.4066, "step": 2957000 }, { "epoch": 14.65, "learning_rate": 4.2676409387494244e-05, "loss": 2.4101, "step": 2957500 }, { "epoch": 14.65, "learning_rate": 4.267517327824101e-05, "loss": 2.3882, "step": 2958000 }, { "epoch": 14.66, "learning_rate": 4.267393716898778e-05, "loss": 2.4137, "step": 2958500 }, { "epoch": 14.66, "learning_rate": 4.26726985825617e-05, "loss": 2.3827, "step": 2959000 }, { "epoch": 14.66, "learning_rate": 4.267145999613561e-05, "loss": 2.4004, "step": 2959500 }, { "epoch": 14.66, "learning_rate": 4.2670221409709526e-05, "loss": 2.3833, "step": 2960000 }, { "epoch": 14.67, "learning_rate": 4.26689853004563e-05, "loss": 2.402, "step": 2960500 }, { "epoch": 14.67, "learning_rate": 4.266774671403022e-05, "loss": 2.4208, "step": 2961000 }, { "epoch": 14.67, "learning_rate": 4.2666508127604135e-05, "loss": 2.4217, "step": 2961500 }, { "epoch": 14.67, "learning_rate": 4.266526954117805e-05, "loss": 2.3977, "step": 2962000 }, { "epoch": 14.68, "learning_rate": 4.266403095475196e-05, "loss": 2.398, "step": 2962500 }, { "epoch": 14.68, "learning_rate": 4.266279236832588e-05, "loss": 2.3978, "step": 2963000 }, { "epoch": 14.68, "learning_rate": 4.2661553781899796e-05, "loss": 2.4067, "step": 2963500 }, { "epoch": 14.68, "learning_rate": 4.266031519547371e-05, "loss": 2.4094, "step": 2964000 }, { "epoch": 14.69, "learning_rate": 4.265907660904763e-05, "loss": 2.3881, "step": 2964500 }, { "epoch": 14.69, "learning_rate": 4.265783802262154e-05, "loss": 2.3953, "step": 2965000 }, { "epoch": 14.69, "learning_rate": 4.265659943619546e-05, "loss": 2.3919, "step": 2965500 }, { "epoch": 14.69, "learning_rate": 4.2655363326942226e-05, "loss": 2.3778, "step": 2966000 }, { "epoch": 14.7, "learning_rate": 4.265412474051614e-05, "loss": 2.3833, "step": 2966500 }, { "epoch": 14.7, "learning_rate": 4.265288615409006e-05, "loss": 2.3947, "step": 2967000 }, { "epoch": 14.7, "learning_rate": 4.265164756766398e-05, "loss": 2.3884, "step": 2967500 }, { "epoch": 14.7, "learning_rate": 4.2650408981237894e-05, "loss": 2.3881, "step": 2968000 }, { "epoch": 14.71, "learning_rate": 4.264917039481181e-05, "loss": 2.4038, "step": 2968500 }, { "epoch": 14.71, "learning_rate": 4.264793180838573e-05, "loss": 2.412, "step": 2969000 }, { "epoch": 14.71, "learning_rate": 4.2646693221959644e-05, "loss": 2.3893, "step": 2969500 }, { "epoch": 14.71, "learning_rate": 4.264545463553356e-05, "loss": 2.38, "step": 2970000 }, { "epoch": 14.72, "learning_rate": 4.264421604910748e-05, "loss": 2.4093, "step": 2970500 }, { "epoch": 14.72, "learning_rate": 4.264297993985425e-05, "loss": 2.3932, "step": 2971000 }, { "epoch": 14.72, "learning_rate": 4.2641741353428164e-05, "loss": 2.4133, "step": 2971500 }, { "epoch": 14.72, "learning_rate": 4.2640502767002074e-05, "loss": 2.3936, "step": 2972000 }, { "epoch": 14.73, "learning_rate": 4.263926418057599e-05, "loss": 2.405, "step": 2972500 }, { "epoch": 14.73, "learning_rate": 4.263802559414991e-05, "loss": 2.3882, "step": 2973000 }, { "epoch": 14.73, "learning_rate": 4.2636787007723825e-05, "loss": 2.3862, "step": 2973500 }, { "epoch": 14.73, "learning_rate": 4.263554842129774e-05, "loss": 2.3741, "step": 2974000 }, { "epoch": 14.74, "learning_rate": 4.263430983487166e-05, "loss": 2.4191, "step": 2974500 }, { "epoch": 14.74, "learning_rate": 4.2633071248445576e-05, "loss": 2.4142, "step": 2975000 }, { "epoch": 14.74, "learning_rate": 4.263183266201949e-05, "loss": 2.3749, "step": 2975500 }, { "epoch": 14.74, "learning_rate": 4.263059407559341e-05, "loss": 2.3808, "step": 2976000 }, { "epoch": 14.75, "learning_rate": 4.262935796634018e-05, "loss": 2.3935, "step": 2976500 }, { "epoch": 14.75, "learning_rate": 4.2628119379914095e-05, "loss": 2.3867, "step": 2977000 }, { "epoch": 14.75, "learning_rate": 4.2626883270660864e-05, "loss": 2.3977, "step": 2977500 }, { "epoch": 14.75, "learning_rate": 4.262564468423478e-05, "loss": 2.3814, "step": 2978000 }, { "epoch": 14.76, "learning_rate": 4.262440609780869e-05, "loss": 2.3813, "step": 2978500 }, { "epoch": 14.76, "learning_rate": 4.262316751138261e-05, "loss": 2.3986, "step": 2979000 }, { "epoch": 14.76, "learning_rate": 4.262193140212938e-05, "loss": 2.3936, "step": 2979500 }, { "epoch": 14.76, "learning_rate": 4.2620692815703294e-05, "loss": 2.403, "step": 2980000 }, { "epoch": 14.77, "learning_rate": 4.261945422927721e-05, "loss": 2.391, "step": 2980500 }, { "epoch": 14.77, "learning_rate": 4.261821564285113e-05, "loss": 2.4022, "step": 2981000 }, { "epoch": 14.77, "learning_rate": 4.2616977056425045e-05, "loss": 2.3994, "step": 2981500 }, { "epoch": 14.77, "learning_rate": 4.261573846999896e-05, "loss": 2.4071, "step": 2982000 }, { "epoch": 14.78, "learning_rate": 4.261449988357288e-05, "loss": 2.402, "step": 2982500 }, { "epoch": 14.78, "learning_rate": 4.2613261297146795e-05, "loss": 2.377, "step": 2983000 }, { "epoch": 14.78, "learning_rate": 4.261202271072071e-05, "loss": 2.3869, "step": 2983500 }, { "epoch": 14.78, "learning_rate": 4.261078412429463e-05, "loss": 2.3832, "step": 2984000 }, { "epoch": 14.79, "learning_rate": 4.26095480150414e-05, "loss": 2.4119, "step": 2984500 }, { "epoch": 14.79, "learning_rate": 4.2608309428615315e-05, "loss": 2.4104, "step": 2985000 }, { "epoch": 14.79, "learning_rate": 4.2607070842189225e-05, "loss": 2.3959, "step": 2985500 }, { "epoch": 14.79, "learning_rate": 4.260583225576314e-05, "loss": 2.4221, "step": 2986000 }, { "epoch": 14.8, "learning_rate": 4.260459366933706e-05, "loss": 2.4011, "step": 2986500 }, { "epoch": 14.8, "learning_rate": 4.2603355082910976e-05, "loss": 2.3937, "step": 2987000 }, { "epoch": 14.8, "learning_rate": 4.260211649648489e-05, "loss": 2.3794, "step": 2987500 }, { "epoch": 14.8, "learning_rate": 4.260088038723166e-05, "loss": 2.4179, "step": 2988000 }, { "epoch": 14.81, "learning_rate": 4.259964180080558e-05, "loss": 2.4193, "step": 2988500 }, { "epoch": 14.81, "learning_rate": 4.2598403214379496e-05, "loss": 2.3833, "step": 2989000 }, { "epoch": 14.81, "learning_rate": 4.2597167105126264e-05, "loss": 2.3923, "step": 2989500 }, { "epoch": 14.81, "learning_rate": 4.259592851870018e-05, "loss": 2.394, "step": 2990000 }, { "epoch": 14.82, "learning_rate": 4.25946899322741e-05, "loss": 2.3941, "step": 2990500 }, { "epoch": 14.82, "learning_rate": 4.2593451345848015e-05, "loss": 2.4062, "step": 2991000 }, { "epoch": 14.82, "learning_rate": 4.259221275942193e-05, "loss": 2.3848, "step": 2991500 }, { "epoch": 14.82, "learning_rate": 4.259097417299584e-05, "loss": 2.3924, "step": 2992000 }, { "epoch": 14.83, "learning_rate": 4.258973558656976e-05, "loss": 2.4166, "step": 2992500 }, { "epoch": 14.83, "learning_rate": 4.2588497000143676e-05, "loss": 2.3901, "step": 2993000 }, { "epoch": 14.83, "learning_rate": 4.258725841371759e-05, "loss": 2.4053, "step": 2993500 }, { "epoch": 14.83, "learning_rate": 4.258601982729151e-05, "loss": 2.3861, "step": 2994000 }, { "epoch": 14.84, "learning_rate": 4.258478124086543e-05, "loss": 2.3877, "step": 2994500 }, { "epoch": 14.84, "learning_rate": 4.2583542654439344e-05, "loss": 2.4219, "step": 2995000 }, { "epoch": 14.84, "learning_rate": 4.258230654518611e-05, "loss": 2.4288, "step": 2995500 }, { "epoch": 14.84, "learning_rate": 4.258106795876003e-05, "loss": 2.388, "step": 2996000 }, { "epoch": 14.85, "learning_rate": 4.2579829372333947e-05, "loss": 2.391, "step": 2996500 }, { "epoch": 14.85, "learning_rate": 4.2578590785907863e-05, "loss": 2.4086, "step": 2997000 }, { "epoch": 14.85, "learning_rate": 4.257735219948178e-05, "loss": 2.3844, "step": 2997500 }, { "epoch": 14.85, "learning_rate": 4.25761136130557e-05, "loss": 2.3956, "step": 2998000 }, { "epoch": 14.86, "learning_rate": 4.2574875026629614e-05, "loss": 2.3777, "step": 2998500 }, { "epoch": 14.86, "learning_rate": 4.257363644020353e-05, "loss": 2.3943, "step": 2999000 }, { "epoch": 14.86, "learning_rate": 4.2572402808123145e-05, "loss": 2.3853, "step": 2999500 }, { "epoch": 14.86, "learning_rate": 4.257116422169706e-05, "loss": 2.388, "step": 3000000 }, { "epoch": 14.87, "learning_rate": 4.256992563527098e-05, "loss": 2.4066, "step": 3000500 }, { "epoch": 14.87, "learning_rate": 4.2568687048844896e-05, "loss": 2.3979, "step": 3001000 }, { "epoch": 14.87, "learning_rate": 4.256744846241881e-05, "loss": 2.392, "step": 3001500 }, { "epoch": 14.87, "learning_rate": 4.256620987599273e-05, "loss": 2.4233, "step": 3002000 }, { "epoch": 14.88, "learning_rate": 4.256497128956665e-05, "loss": 2.3834, "step": 3002500 }, { "epoch": 14.88, "learning_rate": 4.2563732703140564e-05, "loss": 2.3635, "step": 3003000 }, { "epoch": 14.88, "learning_rate": 4.256249659388733e-05, "loss": 2.4039, "step": 3003500 }, { "epoch": 14.88, "learning_rate": 4.256125800746125e-05, "loss": 2.3884, "step": 3004000 }, { "epoch": 14.89, "learning_rate": 4.2560019421035166e-05, "loss": 2.4051, "step": 3004500 }, { "epoch": 14.89, "learning_rate": 4.255878083460908e-05, "loss": 2.3943, "step": 3005000 }, { "epoch": 14.89, "learning_rate": 4.255754224818299e-05, "loss": 2.4152, "step": 3005500 }, { "epoch": 14.89, "learning_rate": 4.255630613892976e-05, "loss": 2.3811, "step": 3006000 }, { "epoch": 14.9, "learning_rate": 4.255506755250368e-05, "loss": 2.393, "step": 3006500 }, { "epoch": 14.9, "learning_rate": 4.2553828966077596e-05, "loss": 2.4022, "step": 3007000 }, { "epoch": 14.9, "learning_rate": 4.255259037965151e-05, "loss": 2.3587, "step": 3007500 }, { "epoch": 14.9, "learning_rate": 4.255135179322543e-05, "loss": 2.4104, "step": 3008000 }, { "epoch": 14.91, "learning_rate": 4.255011320679935e-05, "loss": 2.3866, "step": 3008500 }, { "epoch": 14.91, "learning_rate": 4.2548874620373264e-05, "loss": 2.4138, "step": 3009000 }, { "epoch": 14.91, "learning_rate": 4.254763603394718e-05, "loss": 2.3889, "step": 3009500 }, { "epoch": 14.91, "learning_rate": 4.254639992469395e-05, "loss": 2.4053, "step": 3010000 }, { "epoch": 14.92, "learning_rate": 4.2545161338267866e-05, "loss": 2.3929, "step": 3010500 }, { "epoch": 14.92, "learning_rate": 4.254392275184178e-05, "loss": 2.401, "step": 3011000 }, { "epoch": 14.92, "learning_rate": 4.25426841654157e-05, "loss": 2.3921, "step": 3011500 }, { "epoch": 14.92, "learning_rate": 4.254144557898962e-05, "loss": 2.4238, "step": 3012000 }, { "epoch": 14.92, "learning_rate": 4.254020699256353e-05, "loss": 2.3746, "step": 3012500 }, { "epoch": 14.93, "learning_rate": 4.2538968406137444e-05, "loss": 2.3839, "step": 3013000 }, { "epoch": 14.93, "learning_rate": 4.253773229688421e-05, "loss": 2.3947, "step": 3013500 }, { "epoch": 14.93, "learning_rate": 4.253649371045813e-05, "loss": 2.402, "step": 3014000 }, { "epoch": 14.93, "learning_rate": 4.253525512403205e-05, "loss": 2.3758, "step": 3014500 }, { "epoch": 14.94, "learning_rate": 4.2534016537605964e-05, "loss": 2.3933, "step": 3015000 }, { "epoch": 14.94, "learning_rate": 4.253278290552558e-05, "loss": 2.3876, "step": 3015500 }, { "epoch": 14.94, "learning_rate": 4.2531544319099495e-05, "loss": 2.3971, "step": 3016000 }, { "epoch": 14.94, "learning_rate": 4.253030573267341e-05, "loss": 2.4107, "step": 3016500 }, { "epoch": 14.95, "learning_rate": 4.252906714624733e-05, "loss": 2.4113, "step": 3017000 }, { "epoch": 14.95, "learning_rate": 4.2527831036994104e-05, "loss": 2.3868, "step": 3017500 }, { "epoch": 14.95, "learning_rate": 4.252659245056802e-05, "loss": 2.3966, "step": 3018000 }, { "epoch": 14.95, "learning_rate": 4.252535386414194e-05, "loss": 2.3974, "step": 3018500 }, { "epoch": 14.96, "learning_rate": 4.252411527771585e-05, "loss": 2.3664, "step": 3019000 }, { "epoch": 14.96, "learning_rate": 4.2522876691289765e-05, "loss": 2.3885, "step": 3019500 }, { "epoch": 14.96, "learning_rate": 4.252163810486368e-05, "loss": 2.3988, "step": 3020000 }, { "epoch": 14.96, "learning_rate": 4.252040199561045e-05, "loss": 2.3894, "step": 3020500 }, { "epoch": 14.97, "learning_rate": 4.251916340918437e-05, "loss": 2.3897, "step": 3021000 }, { "epoch": 14.97, "learning_rate": 4.2517924822758285e-05, "loss": 2.3855, "step": 3021500 }, { "epoch": 14.97, "learning_rate": 4.2516688713505053e-05, "loss": 2.3951, "step": 3022000 }, { "epoch": 14.97, "learning_rate": 4.251545260425182e-05, "loss": 2.438, "step": 3022500 }, { "epoch": 14.98, "learning_rate": 4.251421401782574e-05, "loss": 2.3995, "step": 3023000 }, { "epoch": 14.98, "learning_rate": 4.2512975431399656e-05, "loss": 2.4106, "step": 3023500 }, { "epoch": 14.98, "learning_rate": 4.251173684497357e-05, "loss": 2.3756, "step": 3024000 }, { "epoch": 14.98, "learning_rate": 4.2510500735720335e-05, "loss": 2.3789, "step": 3024500 }, { "epoch": 14.99, "learning_rate": 4.250926214929425e-05, "loss": 2.4049, "step": 3025000 }, { "epoch": 14.99, "learning_rate": 4.250802356286817e-05, "loss": 2.363, "step": 3025500 }, { "epoch": 14.99, "learning_rate": 4.2506784976442086e-05, "loss": 2.4208, "step": 3026000 }, { "epoch": 14.99, "learning_rate": 4.2505546390016e-05, "loss": 2.3791, "step": 3026500 }, { "epoch": 15.0, "learning_rate": 4.250430780358992e-05, "loss": 2.3832, "step": 3027000 }, { "epoch": 15.0, "learning_rate": 4.250306921716384e-05, "loss": 2.3903, "step": 3027500 }, { "epoch": 15.0, "eval_accuracy": 0.6470159402014201, "eval_accuracy_mlm": 0.6010111516244826, "eval_accuracy_nsp": 0.8638683082377951, "eval_loss": 2.399909496307373, "eval_runtime": 145.758, "eval_samples_per_second": 1749.194, "eval_steps_per_second": 72.888, "step": 3027645 }, { "epoch": 15.0, "learning_rate": 4.2501830630737754e-05, "loss": 2.3627, "step": 3028000 }, { "epoch": 15.0, "learning_rate": 4.250059204431167e-05, "loss": 2.3456, "step": 3028500 }, { "epoch": 15.01, "learning_rate": 4.249935593505844e-05, "loss": 2.3451, "step": 3029000 }, { "epoch": 15.01, "learning_rate": 4.2498117348632356e-05, "loss": 2.3651, "step": 3029500 }, { "epoch": 15.01, "learning_rate": 4.249687876220627e-05, "loss": 2.3518, "step": 3030000 }, { "epoch": 15.01, "learning_rate": 4.249564017578019e-05, "loss": 2.3489, "step": 3030500 }, { "epoch": 15.02, "learning_rate": 4.249440158935411e-05, "loss": 2.3564, "step": 3031000 }, { "epoch": 15.02, "learning_rate": 4.2493163002928024e-05, "loss": 2.3755, "step": 3031500 }, { "epoch": 15.02, "learning_rate": 4.249192441650194e-05, "loss": 2.3875, "step": 3032000 }, { "epoch": 15.02, "learning_rate": 4.249068583007586e-05, "loss": 2.3706, "step": 3032500 }, { "epoch": 15.03, "learning_rate": 4.2489447243649775e-05, "loss": 2.3564, "step": 3033000 }, { "epoch": 15.03, "learning_rate": 4.2488208657223685e-05, "loss": 2.3672, "step": 3033500 }, { "epoch": 15.03, "learning_rate": 4.24869700707976e-05, "loss": 2.3532, "step": 3034000 }, { "epoch": 15.03, "learning_rate": 4.248573148437152e-05, "loss": 2.3531, "step": 3034500 }, { "epoch": 15.04, "learning_rate": 4.2484492897945436e-05, "loss": 2.3578, "step": 3035000 }, { "epoch": 15.04, "learning_rate": 4.2483256788692204e-05, "loss": 2.4104, "step": 3035500 }, { "epoch": 15.04, "learning_rate": 4.248201820226612e-05, "loss": 2.3724, "step": 3036000 }, { "epoch": 15.04, "learning_rate": 4.248077961584004e-05, "loss": 2.3303, "step": 3036500 }, { "epoch": 15.05, "learning_rate": 4.2479541029413955e-05, "loss": 2.3409, "step": 3037000 }, { "epoch": 15.05, "learning_rate": 4.2478302442987865e-05, "loss": 2.3438, "step": 3037500 }, { "epoch": 15.05, "learning_rate": 4.247706633373464e-05, "loss": 2.3988, "step": 3038000 }, { "epoch": 15.05, "learning_rate": 4.247582774730856e-05, "loss": 2.3621, "step": 3038500 }, { "epoch": 15.06, "learning_rate": 4.247459163805532e-05, "loss": 2.4002, "step": 3039000 }, { "epoch": 15.06, "learning_rate": 4.247335305162924e-05, "loss": 2.3571, "step": 3039500 }, { "epoch": 15.06, "learning_rate": 4.2472114465203154e-05, "loss": 2.3597, "step": 3040000 }, { "epoch": 15.06, "learning_rate": 4.247087587877707e-05, "loss": 2.3572, "step": 3040500 }, { "epoch": 15.07, "learning_rate": 4.246963729235099e-05, "loss": 2.391, "step": 3041000 }, { "epoch": 15.07, "learning_rate": 4.2468398705924905e-05, "loss": 2.3717, "step": 3041500 }, { "epoch": 15.07, "learning_rate": 4.246716011949882e-05, "loss": 2.3924, "step": 3042000 }, { "epoch": 15.07, "learning_rate": 4.246592153307274e-05, "loss": 2.3618, "step": 3042500 }, { "epoch": 15.08, "learning_rate": 4.2464682946646655e-05, "loss": 2.3624, "step": 3043000 }, { "epoch": 15.08, "learning_rate": 4.246344436022057e-05, "loss": 2.368, "step": 3043500 }, { "epoch": 15.08, "learning_rate": 4.246220577379448e-05, "loss": 2.3958, "step": 3044000 }, { "epoch": 15.08, "learning_rate": 4.24609671873684e-05, "loss": 2.3759, "step": 3044500 }, { "epoch": 15.09, "learning_rate": 4.2459728600942316e-05, "loss": 2.3554, "step": 3045000 }, { "epoch": 15.09, "learning_rate": 4.245849496886194e-05, "loss": 2.3552, "step": 3045500 }, { "epoch": 15.09, "learning_rate": 4.2457256382435854e-05, "loss": 2.3811, "step": 3046000 }, { "epoch": 15.09, "learning_rate": 4.245601779600977e-05, "loss": 2.381, "step": 3046500 }, { "epoch": 15.1, "learning_rate": 4.245477920958369e-05, "loss": 2.3725, "step": 3047000 }, { "epoch": 15.1, "learning_rate": 4.245354310033046e-05, "loss": 2.3783, "step": 3047500 }, { "epoch": 15.1, "learning_rate": 4.2452304513904374e-05, "loss": 2.3869, "step": 3048000 }, { "epoch": 15.1, "learning_rate": 4.245106592747829e-05, "loss": 2.3689, "step": 3048500 }, { "epoch": 15.11, "learning_rate": 4.244982734105221e-05, "loss": 2.3686, "step": 3049000 }, { "epoch": 15.11, "learning_rate": 4.2448588754626124e-05, "loss": 2.3877, "step": 3049500 }, { "epoch": 15.11, "learning_rate": 4.244735016820004e-05, "loss": 2.3803, "step": 3050000 }, { "epoch": 15.11, "learning_rate": 4.244611158177396e-05, "loss": 2.3927, "step": 3050500 }, { "epoch": 15.12, "learning_rate": 4.2444872995347875e-05, "loss": 2.3387, "step": 3051000 }, { "epoch": 15.12, "learning_rate": 4.244363688609464e-05, "loss": 2.3727, "step": 3051500 }, { "epoch": 15.12, "learning_rate": 4.2442398299668554e-05, "loss": 2.3679, "step": 3052000 }, { "epoch": 15.12, "learning_rate": 4.244115971324247e-05, "loss": 2.3728, "step": 3052500 }, { "epoch": 15.13, "learning_rate": 4.243992112681639e-05, "loss": 2.3888, "step": 3053000 }, { "epoch": 15.13, "learning_rate": 4.2438682540390305e-05, "loss": 2.3701, "step": 3053500 }, { "epoch": 15.13, "learning_rate": 4.243744395396422e-05, "loss": 2.363, "step": 3054000 }, { "epoch": 15.13, "learning_rate": 4.243620784471099e-05, "loss": 2.3638, "step": 3054500 }, { "epoch": 15.14, "learning_rate": 4.243496925828491e-05, "loss": 2.3703, "step": 3055000 }, { "epoch": 15.14, "learning_rate": 4.2433730671858824e-05, "loss": 2.3799, "step": 3055500 }, { "epoch": 15.14, "learning_rate": 4.243249208543274e-05, "loss": 2.3817, "step": 3056000 }, { "epoch": 15.14, "learning_rate": 4.243125349900666e-05, "loss": 2.3796, "step": 3056500 }, { "epoch": 15.15, "learning_rate": 4.243001738975342e-05, "loss": 2.3622, "step": 3057000 }, { "epoch": 15.15, "learning_rate": 4.242877880332734e-05, "loss": 2.3801, "step": 3057500 }, { "epoch": 15.15, "learning_rate": 4.2427542694074106e-05, "loss": 2.3878, "step": 3058000 }, { "epoch": 15.15, "learning_rate": 4.242630410764802e-05, "loss": 2.3574, "step": 3058500 }, { "epoch": 15.16, "learning_rate": 4.242506552122194e-05, "loss": 2.3479, "step": 3059000 }, { "epoch": 15.16, "learning_rate": 4.242382693479586e-05, "loss": 2.3529, "step": 3059500 }, { "epoch": 15.16, "learning_rate": 4.2422588348369774e-05, "loss": 2.3862, "step": 3060000 }, { "epoch": 15.16, "learning_rate": 4.242134976194369e-05, "loss": 2.3654, "step": 3060500 }, { "epoch": 15.17, "learning_rate": 4.242011117551761e-05, "loss": 2.3518, "step": 3061000 }, { "epoch": 15.17, "learning_rate": 4.2418875066264376e-05, "loss": 2.374, "step": 3061500 }, { "epoch": 15.17, "learning_rate": 4.241763647983829e-05, "loss": 2.3655, "step": 3062000 }, { "epoch": 15.17, "learning_rate": 4.241639789341221e-05, "loss": 2.3658, "step": 3062500 }, { "epoch": 15.18, "learning_rate": 4.241515930698613e-05, "loss": 2.3602, "step": 3063000 }, { "epoch": 15.18, "learning_rate": 4.2413920720560044e-05, "loss": 2.3609, "step": 3063500 }, { "epoch": 15.18, "learning_rate": 4.2412682134133954e-05, "loss": 2.367, "step": 3064000 }, { "epoch": 15.18, "learning_rate": 4.241144354770787e-05, "loss": 2.3744, "step": 3064500 }, { "epoch": 15.19, "learning_rate": 4.241020496128179e-05, "loss": 2.3696, "step": 3065000 }, { "epoch": 15.19, "learning_rate": 4.2408966374855705e-05, "loss": 2.3808, "step": 3065500 }, { "epoch": 15.19, "learning_rate": 4.2407730265602474e-05, "loss": 2.3886, "step": 3066000 }, { "epoch": 15.19, "learning_rate": 4.240649167917639e-05, "loss": 2.3662, "step": 3066500 }, { "epoch": 15.19, "learning_rate": 4.240525309275031e-05, "loss": 2.3931, "step": 3067000 }, { "epoch": 15.2, "learning_rate": 4.2404014506324225e-05, "loss": 2.3764, "step": 3067500 }, { "epoch": 15.2, "learning_rate": 4.240277591989814e-05, "loss": 2.3745, "step": 3068000 }, { "epoch": 15.2, "learning_rate": 4.240153733347206e-05, "loss": 2.3827, "step": 3068500 }, { "epoch": 15.2, "learning_rate": 4.240030122421883e-05, "loss": 2.3736, "step": 3069000 }, { "epoch": 15.21, "learning_rate": 4.2399062637792744e-05, "loss": 2.3926, "step": 3069500 }, { "epoch": 15.21, "learning_rate": 4.239782405136666e-05, "loss": 2.3844, "step": 3070000 }, { "epoch": 15.21, "learning_rate": 4.239658546494057e-05, "loss": 2.3614, "step": 3070500 }, { "epoch": 15.21, "learning_rate": 4.239534687851449e-05, "loss": 2.3672, "step": 3071000 }, { "epoch": 15.22, "learning_rate": 4.2394108292088405e-05, "loss": 2.3647, "step": 3071500 }, { "epoch": 15.22, "learning_rate": 4.239286970566232e-05, "loss": 2.3832, "step": 3072000 }, { "epoch": 15.22, "learning_rate": 4.239163111923624e-05, "loss": 2.3962, "step": 3072500 }, { "epoch": 15.22, "learning_rate": 4.239039500998301e-05, "loss": 2.361, "step": 3073000 }, { "epoch": 15.23, "learning_rate": 4.238915890072978e-05, "loss": 2.3841, "step": 3073500 }, { "epoch": 15.23, "learning_rate": 4.2387920314303694e-05, "loss": 2.3729, "step": 3074000 }, { "epoch": 15.23, "learning_rate": 4.238668172787761e-05, "loss": 2.3962, "step": 3074500 }, { "epoch": 15.23, "learning_rate": 4.238544314145153e-05, "loss": 2.3965, "step": 3075000 }, { "epoch": 15.24, "learning_rate": 4.2384204555025444e-05, "loss": 2.3928, "step": 3075500 }, { "epoch": 15.24, "learning_rate": 4.238296596859936e-05, "loss": 2.3878, "step": 3076000 }, { "epoch": 15.24, "learning_rate": 4.238172985934612e-05, "loss": 2.3677, "step": 3076500 }, { "epoch": 15.24, "learning_rate": 4.238049127292004e-05, "loss": 2.3718, "step": 3077000 }, { "epoch": 15.25, "learning_rate": 4.237925268649396e-05, "loss": 2.3981, "step": 3077500 }, { "epoch": 15.25, "learning_rate": 4.2378014100067874e-05, "loss": 2.3709, "step": 3078000 }, { "epoch": 15.25, "learning_rate": 4.237677551364179e-05, "loss": 2.4063, "step": 3078500 }, { "epoch": 15.25, "learning_rate": 4.237553940438856e-05, "loss": 2.3766, "step": 3079000 }, { "epoch": 15.26, "learning_rate": 4.237430081796248e-05, "loss": 2.3836, "step": 3079500 }, { "epoch": 15.26, "learning_rate": 4.2373064708709246e-05, "loss": 2.3741, "step": 3080000 }, { "epoch": 15.26, "learning_rate": 4.237182612228316e-05, "loss": 2.3866, "step": 3080500 }, { "epoch": 15.26, "learning_rate": 4.237058753585708e-05, "loss": 2.3761, "step": 3081000 }, { "epoch": 15.27, "learning_rate": 4.2369348949430996e-05, "loss": 2.3606, "step": 3081500 }, { "epoch": 15.27, "learning_rate": 4.2368110363004907e-05, "loss": 2.3544, "step": 3082000 }, { "epoch": 15.27, "learning_rate": 4.2366871776578824e-05, "loss": 2.3828, "step": 3082500 }, { "epoch": 15.27, "learning_rate": 4.236563319015274e-05, "loss": 2.3494, "step": 3083000 }, { "epoch": 15.28, "learning_rate": 4.236439460372666e-05, "loss": 2.3749, "step": 3083500 }, { "epoch": 15.28, "learning_rate": 4.2363156017300574e-05, "loss": 2.384, "step": 3084000 }, { "epoch": 15.28, "learning_rate": 4.236191990804735e-05, "loss": 2.3721, "step": 3084500 }, { "epoch": 15.28, "learning_rate": 4.236068132162126e-05, "loss": 2.367, "step": 3085000 }, { "epoch": 15.29, "learning_rate": 4.235944273519518e-05, "loss": 2.3508, "step": 3085500 }, { "epoch": 15.29, "learning_rate": 4.2358204148769094e-05, "loss": 2.3788, "step": 3086000 }, { "epoch": 15.29, "learning_rate": 4.235696556234301e-05, "loss": 2.3785, "step": 3086500 }, { "epoch": 15.29, "learning_rate": 4.235572697591693e-05, "loss": 2.3908, "step": 3087000 }, { "epoch": 15.3, "learning_rate": 4.2354488389490845e-05, "loss": 2.3881, "step": 3087500 }, { "epoch": 15.3, "learning_rate": 4.235324980306476e-05, "loss": 2.3729, "step": 3088000 }, { "epoch": 15.3, "learning_rate": 4.2352013693811524e-05, "loss": 2.3611, "step": 3088500 }, { "epoch": 15.3, "learning_rate": 4.235077510738544e-05, "loss": 2.3854, "step": 3089000 }, { "epoch": 15.31, "learning_rate": 4.2349538998132216e-05, "loss": 2.3698, "step": 3089500 }, { "epoch": 15.31, "learning_rate": 4.234830041170613e-05, "loss": 2.3766, "step": 3090000 }, { "epoch": 15.31, "learning_rate": 4.234706182528005e-05, "loss": 2.366, "step": 3090500 }, { "epoch": 15.31, "learning_rate": 4.234582323885397e-05, "loss": 2.3723, "step": 3091000 }, { "epoch": 15.32, "learning_rate": 4.234458465242788e-05, "loss": 2.378, "step": 3091500 }, { "epoch": 15.32, "learning_rate": 4.2343346066001794e-05, "loss": 2.3886, "step": 3092000 }, { "epoch": 15.32, "learning_rate": 4.234210747957571e-05, "loss": 2.3767, "step": 3092500 }, { "epoch": 15.32, "learning_rate": 4.234087137032248e-05, "loss": 2.3718, "step": 3093000 }, { "epoch": 15.33, "learning_rate": 4.23396327838964e-05, "loss": 2.3711, "step": 3093500 }, { "epoch": 15.33, "learning_rate": 4.2338394197470314e-05, "loss": 2.3911, "step": 3094000 }, { "epoch": 15.33, "learning_rate": 4.2337155611044224e-05, "loss": 2.3506, "step": 3094500 }, { "epoch": 15.33, "learning_rate": 4.2335919501791e-05, "loss": 2.3763, "step": 3095000 }, { "epoch": 15.34, "learning_rate": 4.2334680915364916e-05, "loss": 2.3951, "step": 3095500 }, { "epoch": 15.34, "learning_rate": 4.233344232893883e-05, "loss": 2.3904, "step": 3096000 }, { "epoch": 15.34, "learning_rate": 4.233220374251275e-05, "loss": 2.3658, "step": 3096500 }, { "epoch": 15.34, "learning_rate": 4.233096515608667e-05, "loss": 2.3671, "step": 3097000 }, { "epoch": 15.35, "learning_rate": 4.232972656966058e-05, "loss": 2.3921, "step": 3097500 }, { "epoch": 15.35, "learning_rate": 4.2328487983234494e-05, "loss": 2.3858, "step": 3098000 }, { "epoch": 15.35, "learning_rate": 4.232724939680841e-05, "loss": 2.3708, "step": 3098500 }, { "epoch": 15.35, "learning_rate": 4.232601081038233e-05, "loss": 2.3545, "step": 3099000 }, { "epoch": 15.36, "learning_rate": 4.2324772223956245e-05, "loss": 2.3699, "step": 3099500 }, { "epoch": 15.36, "learning_rate": 4.232353363753016e-05, "loss": 2.3685, "step": 3100000 }, { "epoch": 15.36, "learning_rate": 4.232229505110408e-05, "loss": 2.3597, "step": 3100500 }, { "epoch": 15.36, "learning_rate": 4.2321056464677996e-05, "loss": 2.4052, "step": 3101000 }, { "epoch": 15.37, "learning_rate": 4.231981787825191e-05, "loss": 2.3693, "step": 3101500 }, { "epoch": 15.37, "learning_rate": 4.231857929182583e-05, "loss": 2.3889, "step": 3102000 }, { "epoch": 15.37, "learning_rate": 4.2317340705399746e-05, "loss": 2.3852, "step": 3102500 }, { "epoch": 15.37, "learning_rate": 4.2316102118973663e-05, "loss": 2.3749, "step": 3103000 }, { "epoch": 15.38, "learning_rate": 4.2314866009720425e-05, "loss": 2.3614, "step": 3103500 }, { "epoch": 15.38, "learning_rate": 4.231362742329434e-05, "loss": 2.3763, "step": 3104000 }, { "epoch": 15.38, "learning_rate": 4.231238883686826e-05, "loss": 2.3818, "step": 3104500 }, { "epoch": 15.38, "learning_rate": 4.2311150250442176e-05, "loss": 2.3593, "step": 3105000 }, { "epoch": 15.39, "learning_rate": 4.2309914141188945e-05, "loss": 2.3944, "step": 3105500 }, { "epoch": 15.39, "learning_rate": 4.2308678031935714e-05, "loss": 2.3794, "step": 3106000 }, { "epoch": 15.39, "learning_rate": 4.230743944550963e-05, "loss": 2.3482, "step": 3106500 }, { "epoch": 15.39, "learning_rate": 4.230620085908355e-05, "loss": 2.3852, "step": 3107000 }, { "epoch": 15.4, "learning_rate": 4.2304962272657465e-05, "loss": 2.3996, "step": 3107500 }, { "epoch": 15.4, "learning_rate": 4.2303723686231375e-05, "loss": 2.3387, "step": 3108000 }, { "epoch": 15.4, "learning_rate": 4.230248509980529e-05, "loss": 2.3473, "step": 3108500 }, { "epoch": 15.4, "learning_rate": 4.230124899055207e-05, "loss": 2.3551, "step": 3109000 }, { "epoch": 15.41, "learning_rate": 4.2300012881298836e-05, "loss": 2.3676, "step": 3109500 }, { "epoch": 15.41, "learning_rate": 4.229877429487275e-05, "loss": 2.3606, "step": 3110000 }, { "epoch": 15.41, "learning_rate": 4.229753570844667e-05, "loss": 2.3807, "step": 3110500 }, { "epoch": 15.41, "learning_rate": 4.229629712202059e-05, "loss": 2.3688, "step": 3111000 }, { "epoch": 15.42, "learning_rate": 4.2295058535594504e-05, "loss": 2.4027, "step": 3111500 }, { "epoch": 15.42, "learning_rate": 4.2293819949168414e-05, "loss": 2.3676, "step": 3112000 }, { "epoch": 15.42, "learning_rate": 4.229258136274233e-05, "loss": 2.3845, "step": 3112500 }, { "epoch": 15.42, "learning_rate": 4.229134277631625e-05, "loss": 2.3844, "step": 3113000 }, { "epoch": 15.43, "learning_rate": 4.2290104189890165e-05, "loss": 2.3842, "step": 3113500 }, { "epoch": 15.43, "learning_rate": 4.2288868080636934e-05, "loss": 2.3777, "step": 3114000 }, { "epoch": 15.43, "learning_rate": 4.228762949421085e-05, "loss": 2.3698, "step": 3114500 }, { "epoch": 15.43, "learning_rate": 4.228639090778477e-05, "loss": 2.3828, "step": 3115000 }, { "epoch": 15.44, "learning_rate": 4.2285154798531536e-05, "loss": 2.3369, "step": 3115500 }, { "epoch": 15.44, "learning_rate": 4.228391621210545e-05, "loss": 2.3914, "step": 3116000 }, { "epoch": 15.44, "learning_rate": 4.228267762567937e-05, "loss": 2.3863, "step": 3116500 }, { "epoch": 15.44, "learning_rate": 4.228144151642613e-05, "loss": 2.3908, "step": 3117000 }, { "epoch": 15.45, "learning_rate": 4.228020293000005e-05, "loss": 2.3652, "step": 3117500 }, { "epoch": 15.45, "learning_rate": 4.2278964343573966e-05, "loss": 2.3764, "step": 3118000 }, { "epoch": 15.45, "learning_rate": 4.227772575714788e-05, "loss": 2.4005, "step": 3118500 }, { "epoch": 15.45, "learning_rate": 4.22764871707218e-05, "loss": 2.3834, "step": 3119000 }, { "epoch": 15.46, "learning_rate": 4.227525106146857e-05, "loss": 2.3881, "step": 3119500 }, { "epoch": 15.46, "learning_rate": 4.2274012475042486e-05, "loss": 2.3676, "step": 3120000 }, { "epoch": 15.46, "learning_rate": 4.22727738886164e-05, "loss": 2.3671, "step": 3120500 }, { "epoch": 15.46, "learning_rate": 4.227153530219032e-05, "loss": 2.3702, "step": 3121000 }, { "epoch": 15.46, "learning_rate": 4.2270296715764236e-05, "loss": 2.3961, "step": 3121500 }, { "epoch": 15.47, "learning_rate": 4.226905812933815e-05, "loss": 2.3922, "step": 3122000 }, { "epoch": 15.47, "learning_rate": 4.226781954291207e-05, "loss": 2.3649, "step": 3122500 }, { "epoch": 15.47, "learning_rate": 4.226658095648599e-05, "loss": 2.3649, "step": 3123000 }, { "epoch": 15.47, "learning_rate": 4.2265342370059904e-05, "loss": 2.393, "step": 3123500 }, { "epoch": 15.48, "learning_rate": 4.226410378363382e-05, "loss": 2.3866, "step": 3124000 }, { "epoch": 15.48, "learning_rate": 4.226286519720774e-05, "loss": 2.3817, "step": 3124500 }, { "epoch": 15.48, "learning_rate": 4.2261626610781655e-05, "loss": 2.3549, "step": 3125000 }, { "epoch": 15.48, "learning_rate": 4.2260388024355565e-05, "loss": 2.3664, "step": 3125500 }, { "epoch": 15.49, "learning_rate": 4.225914943792948e-05, "loss": 2.3663, "step": 3126000 }, { "epoch": 15.49, "learning_rate": 4.22579158058491e-05, "loss": 2.3709, "step": 3126500 }, { "epoch": 15.49, "learning_rate": 4.225667721942302e-05, "loss": 2.368, "step": 3127000 }, { "epoch": 15.49, "learning_rate": 4.2255438632996936e-05, "loss": 2.3716, "step": 3127500 }, { "epoch": 15.5, "learning_rate": 4.2254200046570853e-05, "loss": 2.4181, "step": 3128000 }, { "epoch": 15.5, "learning_rate": 4.225296146014477e-05, "loss": 2.3836, "step": 3128500 }, { "epoch": 15.5, "learning_rate": 4.225172287371869e-05, "loss": 2.3661, "step": 3129000 }, { "epoch": 15.5, "learning_rate": 4.225048676446545e-05, "loss": 2.3753, "step": 3129500 }, { "epoch": 15.51, "learning_rate": 4.2249248178039366e-05, "loss": 2.3983, "step": 3130000 }, { "epoch": 15.51, "learning_rate": 4.224800959161328e-05, "loss": 2.3943, "step": 3130500 }, { "epoch": 15.51, "learning_rate": 4.22467710051872e-05, "loss": 2.3889, "step": 3131000 }, { "epoch": 15.51, "learning_rate": 4.224553241876112e-05, "loss": 2.3742, "step": 3131500 }, { "epoch": 15.52, "learning_rate": 4.2244293832335034e-05, "loss": 2.3783, "step": 3132000 }, { "epoch": 15.52, "learning_rate": 4.224305524590895e-05, "loss": 2.4068, "step": 3132500 }, { "epoch": 15.52, "learning_rate": 4.224181665948287e-05, "loss": 2.3924, "step": 3133000 }, { "epoch": 15.52, "learning_rate": 4.2240580550229637e-05, "loss": 2.386, "step": 3133500 }, { "epoch": 15.53, "learning_rate": 4.2239341963803554e-05, "loss": 2.3976, "step": 3134000 }, { "epoch": 15.53, "learning_rate": 4.223810337737747e-05, "loss": 2.369, "step": 3134500 }, { "epoch": 15.53, "learning_rate": 4.223686479095139e-05, "loss": 2.3854, "step": 3135000 }, { "epoch": 15.53, "learning_rate": 4.2235626204525304e-05, "loss": 2.392, "step": 3135500 }, { "epoch": 15.54, "learning_rate": 4.223438761809922e-05, "loss": 2.3792, "step": 3136000 }, { "epoch": 15.54, "learning_rate": 4.223314903167314e-05, "loss": 2.357, "step": 3136500 }, { "epoch": 15.54, "learning_rate": 4.2231910445247055e-05, "loss": 2.3669, "step": 3137000 }, { "epoch": 15.54, "learning_rate": 4.223067185882097e-05, "loss": 2.3775, "step": 3137500 }, { "epoch": 15.55, "learning_rate": 4.2229435749567734e-05, "loss": 2.3768, "step": 3138000 }, { "epoch": 15.55, "learning_rate": 4.222819716314165e-05, "loss": 2.3868, "step": 3138500 }, { "epoch": 15.55, "learning_rate": 4.222695857671557e-05, "loss": 2.3805, "step": 3139000 }, { "epoch": 15.55, "learning_rate": 4.2225719990289485e-05, "loss": 2.3855, "step": 3139500 }, { "epoch": 15.56, "learning_rate": 4.22244814038634e-05, "loss": 2.364, "step": 3140000 }, { "epoch": 15.56, "learning_rate": 4.222324281743732e-05, "loss": 2.3673, "step": 3140500 }, { "epoch": 15.56, "learning_rate": 4.222200670818409e-05, "loss": 2.3848, "step": 3141000 }, { "epoch": 15.56, "learning_rate": 4.2220768121758004e-05, "loss": 2.3857, "step": 3141500 }, { "epoch": 15.57, "learning_rate": 4.221953201250477e-05, "loss": 2.3872, "step": 3142000 }, { "epoch": 15.57, "learning_rate": 4.2218293426078683e-05, "loss": 2.3764, "step": 3142500 }, { "epoch": 15.57, "learning_rate": 4.22170548396526e-05, "loss": 2.3927, "step": 3143000 }, { "epoch": 15.57, "learning_rate": 4.221581625322652e-05, "loss": 2.3675, "step": 3143500 }, { "epoch": 15.58, "learning_rate": 4.2214577666800434e-05, "loss": 2.3898, "step": 3144000 }, { "epoch": 15.58, "learning_rate": 4.221333908037435e-05, "loss": 2.3677, "step": 3144500 }, { "epoch": 15.58, "learning_rate": 4.221210049394827e-05, "loss": 2.4042, "step": 3145000 }, { "epoch": 15.58, "learning_rate": 4.2210861907522185e-05, "loss": 2.3829, "step": 3145500 }, { "epoch": 15.59, "learning_rate": 4.2209625798268954e-05, "loss": 2.3929, "step": 3146000 }, { "epoch": 15.59, "learning_rate": 4.220838721184287e-05, "loss": 2.3803, "step": 3146500 }, { "epoch": 15.59, "learning_rate": 4.220714862541679e-05, "loss": 2.3651, "step": 3147000 }, { "epoch": 15.59, "learning_rate": 4.2205910038990705e-05, "loss": 2.4064, "step": 3147500 }, { "epoch": 15.6, "learning_rate": 4.220467392973747e-05, "loss": 2.3791, "step": 3148000 }, { "epoch": 15.6, "learning_rate": 4.220343534331139e-05, "loss": 2.402, "step": 3148500 }, { "epoch": 15.6, "learning_rate": 4.22021967568853e-05, "loss": 2.3714, "step": 3149000 }, { "epoch": 15.6, "learning_rate": 4.220095817045922e-05, "loss": 2.3842, "step": 3149500 }, { "epoch": 15.61, "learning_rate": 4.2199719584033134e-05, "loss": 2.3855, "step": 3150000 }, { "epoch": 15.61, "learning_rate": 4.21984834747799e-05, "loss": 2.4002, "step": 3150500 }, { "epoch": 15.61, "learning_rate": 4.219724488835382e-05, "loss": 2.3798, "step": 3151000 }, { "epoch": 15.61, "learning_rate": 4.219600630192774e-05, "loss": 2.3679, "step": 3151500 }, { "epoch": 15.62, "learning_rate": 4.2194770192674506e-05, "loss": 2.3694, "step": 3152000 }, { "epoch": 15.62, "learning_rate": 4.219353160624842e-05, "loss": 2.3997, "step": 3152500 }, { "epoch": 15.62, "learning_rate": 4.219229301982234e-05, "loss": 2.3808, "step": 3153000 }, { "epoch": 15.62, "learning_rate": 4.2191054433396257e-05, "loss": 2.3822, "step": 3153500 }, { "epoch": 15.63, "learning_rate": 4.2189815846970173e-05, "loss": 2.3799, "step": 3154000 }, { "epoch": 15.63, "learning_rate": 4.218857726054409e-05, "loss": 2.3874, "step": 3154500 }, { "epoch": 15.63, "learning_rate": 4.218733867411801e-05, "loss": 2.387, "step": 3155000 }, { "epoch": 15.63, "learning_rate": 4.2186100087691924e-05, "loss": 2.3953, "step": 3155500 }, { "epoch": 15.64, "learning_rate": 4.2184861501265834e-05, "loss": 2.3654, "step": 3156000 }, { "epoch": 15.64, "learning_rate": 4.218362291483975e-05, "loss": 2.3823, "step": 3156500 }, { "epoch": 15.64, "learning_rate": 4.218238432841367e-05, "loss": 2.3889, "step": 3157000 }, { "epoch": 15.64, "learning_rate": 4.218114821916044e-05, "loss": 2.3922, "step": 3157500 }, { "epoch": 15.65, "learning_rate": 4.2179909632734354e-05, "loss": 2.395, "step": 3158000 }, { "epoch": 15.65, "learning_rate": 4.217867352348112e-05, "loss": 2.3761, "step": 3158500 }, { "epoch": 15.65, "learning_rate": 4.217743493705504e-05, "loss": 2.4031, "step": 3159000 }, { "epoch": 15.65, "learning_rate": 4.217619635062896e-05, "loss": 2.3753, "step": 3159500 }, { "epoch": 15.66, "learning_rate": 4.2174957764202874e-05, "loss": 2.3735, "step": 3160000 }, { "epoch": 15.66, "learning_rate": 4.217371917777679e-05, "loss": 2.3785, "step": 3160500 }, { "epoch": 15.66, "learning_rate": 4.217248059135071e-05, "loss": 2.3862, "step": 3161000 }, { "epoch": 15.66, "learning_rate": 4.217124448209747e-05, "loss": 2.3566, "step": 3161500 }, { "epoch": 15.67, "learning_rate": 4.2170008372844245e-05, "loss": 2.3831, "step": 3162000 }, { "epoch": 15.67, "learning_rate": 4.216876978641816e-05, "loss": 2.3836, "step": 3162500 }, { "epoch": 15.67, "learning_rate": 4.216753119999208e-05, "loss": 2.3862, "step": 3163000 }, { "epoch": 15.67, "learning_rate": 4.2166292613565996e-05, "loss": 2.3876, "step": 3163500 }, { "epoch": 15.68, "learning_rate": 4.2165054027139906e-05, "loss": 2.3782, "step": 3164000 }, { "epoch": 15.68, "learning_rate": 4.216381544071382e-05, "loss": 2.3743, "step": 3164500 }, { "epoch": 15.68, "learning_rate": 4.216257685428774e-05, "loss": 2.3789, "step": 3165000 }, { "epoch": 15.68, "learning_rate": 4.216133826786166e-05, "loss": 2.3919, "step": 3165500 }, { "epoch": 15.69, "learning_rate": 4.2160099681435574e-05, "loss": 2.3701, "step": 3166000 }, { "epoch": 15.69, "learning_rate": 4.215886109500949e-05, "loss": 2.3719, "step": 3166500 }, { "epoch": 15.69, "learning_rate": 4.215762250858341e-05, "loss": 2.3928, "step": 3167000 }, { "epoch": 15.69, "learning_rate": 4.2156383922157325e-05, "loss": 2.3984, "step": 3167500 }, { "epoch": 15.7, "learning_rate": 4.215514533573124e-05, "loss": 2.3644, "step": 3168000 }, { "epoch": 15.7, "learning_rate": 4.215390674930516e-05, "loss": 2.3855, "step": 3168500 }, { "epoch": 15.7, "learning_rate": 4.2152668162879075e-05, "loss": 2.3751, "step": 3169000 }, { "epoch": 15.7, "learning_rate": 4.2151429576452985e-05, "loss": 2.3593, "step": 3169500 }, { "epoch": 15.71, "learning_rate": 4.2150193467199754e-05, "loss": 2.3574, "step": 3170000 }, { "epoch": 15.71, "learning_rate": 4.214895488077367e-05, "loss": 2.3706, "step": 3170500 }, { "epoch": 15.71, "learning_rate": 4.214771629434759e-05, "loss": 2.3809, "step": 3171000 }, { "epoch": 15.71, "learning_rate": 4.214648018509436e-05, "loss": 2.3843, "step": 3171500 }, { "epoch": 15.72, "learning_rate": 4.2145241598668274e-05, "loss": 2.3904, "step": 3172000 }, { "epoch": 15.72, "learning_rate": 4.214400301224219e-05, "loss": 2.38, "step": 3172500 }, { "epoch": 15.72, "learning_rate": 4.214276442581611e-05, "loss": 2.3769, "step": 3173000 }, { "epoch": 15.72, "learning_rate": 4.2141525839390025e-05, "loss": 2.3672, "step": 3173500 }, { "epoch": 15.73, "learning_rate": 4.214028725296394e-05, "loss": 2.4197, "step": 3174000 }, { "epoch": 15.73, "learning_rate": 4.213904866653786e-05, "loss": 2.3741, "step": 3174500 }, { "epoch": 15.73, "learning_rate": 4.2137810080111775e-05, "loss": 2.3853, "step": 3175000 }, { "epoch": 15.73, "learning_rate": 4.213657149368569e-05, "loss": 2.3921, "step": 3175500 }, { "epoch": 15.74, "learning_rate": 4.213533290725961e-05, "loss": 2.3486, "step": 3176000 }, { "epoch": 15.74, "learning_rate": 4.213409679800637e-05, "loss": 2.3594, "step": 3176500 }, { "epoch": 15.74, "learning_rate": 4.213285821158029e-05, "loss": 2.3819, "step": 3177000 }, { "epoch": 15.74, "learning_rate": 4.2131619625154205e-05, "loss": 2.3789, "step": 3177500 }, { "epoch": 15.74, "learning_rate": 4.213038103872812e-05, "loss": 2.3747, "step": 3178000 }, { "epoch": 15.75, "learning_rate": 4.212914245230204e-05, "loss": 2.3711, "step": 3178500 }, { "epoch": 15.75, "learning_rate": 4.2127903865875956e-05, "loss": 2.3928, "step": 3179000 }, { "epoch": 15.75, "learning_rate": 4.2126667756622725e-05, "loss": 2.3843, "step": 3179500 }, { "epoch": 15.75, "learning_rate": 4.212543164736949e-05, "loss": 2.4232, "step": 3180000 }, { "epoch": 15.76, "learning_rate": 4.2124193060943404e-05, "loss": 2.3803, "step": 3180500 }, { "epoch": 15.76, "learning_rate": 4.212295447451732e-05, "loss": 2.3913, "step": 3181000 }, { "epoch": 15.76, "learning_rate": 4.212171588809124e-05, "loss": 2.3512, "step": 3181500 }, { "epoch": 15.76, "learning_rate": 4.2120477301665155e-05, "loss": 2.3715, "step": 3182000 }, { "epoch": 15.77, "learning_rate": 4.211923871523907e-05, "loss": 2.3803, "step": 3182500 }, { "epoch": 15.77, "learning_rate": 4.211800012881299e-05, "loss": 2.3765, "step": 3183000 }, { "epoch": 15.77, "learning_rate": 4.211676401955976e-05, "loss": 2.3654, "step": 3183500 }, { "epoch": 15.77, "learning_rate": 4.2115525433133674e-05, "loss": 2.3893, "step": 3184000 }, { "epoch": 15.78, "learning_rate": 4.211428684670759e-05, "loss": 2.3841, "step": 3184500 }, { "epoch": 15.78, "learning_rate": 4.211304826028151e-05, "loss": 2.3901, "step": 3185000 }, { "epoch": 15.78, "learning_rate": 4.2111809673855425e-05, "loss": 2.3614, "step": 3185500 }, { "epoch": 15.78, "learning_rate": 4.211057108742934e-05, "loss": 2.3858, "step": 3186000 }, { "epoch": 15.79, "learning_rate": 4.210933250100326e-05, "loss": 2.3817, "step": 3186500 }, { "epoch": 15.79, "learning_rate": 4.210809886892288e-05, "loss": 2.3566, "step": 3187000 }, { "epoch": 15.79, "learning_rate": 4.2106860282496796e-05, "loss": 2.3557, "step": 3187500 }, { "epoch": 15.79, "learning_rate": 4.210562169607071e-05, "loss": 2.3717, "step": 3188000 }, { "epoch": 15.8, "learning_rate": 4.210438310964463e-05, "loss": 2.3864, "step": 3188500 }, { "epoch": 15.8, "learning_rate": 4.210314452321854e-05, "loss": 2.3699, "step": 3189000 }, { "epoch": 15.8, "learning_rate": 4.210190593679246e-05, "loss": 2.4, "step": 3189500 }, { "epoch": 15.8, "learning_rate": 4.2100667350366374e-05, "loss": 2.3853, "step": 3190000 }, { "epoch": 15.81, "learning_rate": 4.209942876394029e-05, "loss": 2.3952, "step": 3190500 }, { "epoch": 15.81, "learning_rate": 4.209819017751421e-05, "loss": 2.3948, "step": 3191000 }, { "epoch": 15.81, "learning_rate": 4.2096951591088125e-05, "loss": 2.3522, "step": 3191500 }, { "epoch": 15.81, "learning_rate": 4.209571300466204e-05, "loss": 2.3915, "step": 3192000 }, { "epoch": 15.82, "learning_rate": 4.209447441823596e-05, "loss": 2.3876, "step": 3192500 }, { "epoch": 15.82, "learning_rate": 4.2093235831809876e-05, "loss": 2.4019, "step": 3193000 }, { "epoch": 15.82, "learning_rate": 4.209199724538379e-05, "loss": 2.3881, "step": 3193500 }, { "epoch": 15.82, "learning_rate": 4.209075865895771e-05, "loss": 2.4025, "step": 3194000 }, { "epoch": 15.83, "learning_rate": 4.208952254970447e-05, "loss": 2.3753, "step": 3194500 }, { "epoch": 15.83, "learning_rate": 4.208828644045124e-05, "loss": 2.4118, "step": 3195000 }, { "epoch": 15.83, "learning_rate": 4.208704785402516e-05, "loss": 2.3851, "step": 3195500 }, { "epoch": 15.83, "learning_rate": 4.2085809267599074e-05, "loss": 2.3801, "step": 3196000 }, { "epoch": 15.84, "learning_rate": 4.208457315834585e-05, "loss": 2.394, "step": 3196500 }, { "epoch": 15.84, "learning_rate": 4.208333457191977e-05, "loss": 2.4099, "step": 3197000 }, { "epoch": 15.84, "learning_rate": 4.208209598549368e-05, "loss": 2.3875, "step": 3197500 }, { "epoch": 15.84, "learning_rate": 4.2080857399067594e-05, "loss": 2.3896, "step": 3198000 }, { "epoch": 15.85, "learning_rate": 4.207961881264151e-05, "loss": 2.3944, "step": 3198500 }, { "epoch": 15.85, "learning_rate": 4.207838022621543e-05, "loss": 2.3874, "step": 3199000 }, { "epoch": 15.85, "learning_rate": 4.2077141639789345e-05, "loss": 2.3929, "step": 3199500 }, { "epoch": 15.85, "learning_rate": 4.2075903053363255e-05, "loss": 2.3987, "step": 3200000 }, { "epoch": 15.86, "learning_rate": 4.207466694411003e-05, "loss": 2.3844, "step": 3200500 }, { "epoch": 15.86, "learning_rate": 4.207342835768395e-05, "loss": 2.3909, "step": 3201000 }, { "epoch": 15.86, "learning_rate": 4.207218977125786e-05, "loss": 2.3669, "step": 3201500 }, { "epoch": 15.86, "learning_rate": 4.2070951184831774e-05, "loss": 2.3722, "step": 3202000 }, { "epoch": 15.87, "learning_rate": 4.206971259840569e-05, "loss": 2.3922, "step": 3202500 }, { "epoch": 15.87, "learning_rate": 4.206847401197961e-05, "loss": 2.3647, "step": 3203000 }, { "epoch": 15.87, "learning_rate": 4.2067235425553525e-05, "loss": 2.3798, "step": 3203500 }, { "epoch": 15.87, "learning_rate": 4.206599683912744e-05, "loss": 2.3789, "step": 3204000 }, { "epoch": 15.88, "learning_rate": 4.206475825270136e-05, "loss": 2.3835, "step": 3204500 }, { "epoch": 15.88, "learning_rate": 4.2063519666275276e-05, "loss": 2.4027, "step": 3205000 }, { "epoch": 15.88, "learning_rate": 4.206228107984919e-05, "loss": 2.3981, "step": 3205500 }, { "epoch": 15.88, "learning_rate": 4.206104249342311e-05, "loss": 2.3786, "step": 3206000 }, { "epoch": 15.89, "learning_rate": 4.205980390699703e-05, "loss": 2.3917, "step": 3206500 }, { "epoch": 15.89, "learning_rate": 4.2058565320570944e-05, "loss": 2.381, "step": 3207000 }, { "epoch": 15.89, "learning_rate": 4.2057329211317706e-05, "loss": 2.4097, "step": 3207500 }, { "epoch": 15.89, "learning_rate": 4.205609062489162e-05, "loss": 2.37, "step": 3208000 }, { "epoch": 15.9, "learning_rate": 4.205485203846554e-05, "loss": 2.3886, "step": 3208500 }, { "epoch": 15.9, "learning_rate": 4.2053613452039457e-05, "loss": 2.3526, "step": 3209000 }, { "epoch": 15.9, "learning_rate": 4.2052374865613374e-05, "loss": 2.3746, "step": 3209500 }, { "epoch": 15.9, "learning_rate": 4.205113875636014e-05, "loss": 2.397, "step": 3210000 }, { "epoch": 15.91, "learning_rate": 4.204990016993406e-05, "loss": 2.3853, "step": 3210500 }, { "epoch": 15.91, "learning_rate": 4.2048661583507976e-05, "loss": 2.4041, "step": 3211000 }, { "epoch": 15.91, "learning_rate": 4.204742299708189e-05, "loss": 2.3686, "step": 3211500 }, { "epoch": 15.91, "learning_rate": 4.204618441065581e-05, "loss": 2.3785, "step": 3212000 }, { "epoch": 15.92, "learning_rate": 4.204494582422973e-05, "loss": 2.4122, "step": 3212500 }, { "epoch": 15.92, "learning_rate": 4.2043707237803644e-05, "loss": 2.3736, "step": 3213000 }, { "epoch": 15.92, "learning_rate": 4.204246865137756e-05, "loss": 2.3955, "step": 3213500 }, { "epoch": 15.92, "learning_rate": 4.204123006495148e-05, "loss": 2.4139, "step": 3214000 }, { "epoch": 15.93, "learning_rate": 4.2039991478525395e-05, "loss": 2.385, "step": 3214500 }, { "epoch": 15.93, "learning_rate": 4.203875536927216e-05, "loss": 2.3506, "step": 3215000 }, { "epoch": 15.93, "learning_rate": 4.2037516782846074e-05, "loss": 2.3841, "step": 3215500 }, { "epoch": 15.93, "learning_rate": 4.203627819641999e-05, "loss": 2.3781, "step": 3216000 }, { "epoch": 15.94, "learning_rate": 4.203503960999391e-05, "loss": 2.3828, "step": 3216500 }, { "epoch": 15.94, "learning_rate": 4.2033801023567824e-05, "loss": 2.402, "step": 3217000 }, { "epoch": 15.94, "learning_rate": 4.203256243714174e-05, "loss": 2.3728, "step": 3217500 }, { "epoch": 15.94, "learning_rate": 4.203132632788851e-05, "loss": 2.3832, "step": 3218000 }, { "epoch": 15.95, "learning_rate": 4.203008774146243e-05, "loss": 2.3773, "step": 3218500 }, { "epoch": 15.95, "learning_rate": 4.2028849155036344e-05, "loss": 2.3783, "step": 3219000 }, { "epoch": 15.95, "learning_rate": 4.202761056861026e-05, "loss": 2.401, "step": 3219500 }, { "epoch": 15.95, "learning_rate": 4.202637445935702e-05, "loss": 2.4113, "step": 3220000 }, { "epoch": 15.96, "learning_rate": 4.202513587293094e-05, "loss": 2.3905, "step": 3220500 }, { "epoch": 15.96, "learning_rate": 4.202389728650486e-05, "loss": 2.3798, "step": 3221000 }, { "epoch": 15.96, "learning_rate": 4.2022658700078774e-05, "loss": 2.3802, "step": 3221500 }, { "epoch": 15.96, "learning_rate": 4.202142011365269e-05, "loss": 2.3822, "step": 3222000 }, { "epoch": 15.97, "learning_rate": 4.202018152722661e-05, "loss": 2.3768, "step": 3222500 }, { "epoch": 15.97, "learning_rate": 4.2018942940800525e-05, "loss": 2.373, "step": 3223000 }, { "epoch": 15.97, "learning_rate": 4.201770683154729e-05, "loss": 2.3979, "step": 3223500 }, { "epoch": 15.97, "learning_rate": 4.201646824512121e-05, "loss": 2.4065, "step": 3224000 }, { "epoch": 15.98, "learning_rate": 4.201522965869513e-05, "loss": 2.3761, "step": 3224500 }, { "epoch": 15.98, "learning_rate": 4.2013991072269044e-05, "loss": 2.3741, "step": 3225000 }, { "epoch": 15.98, "learning_rate": 4.201275248584296e-05, "loss": 2.354, "step": 3225500 }, { "epoch": 15.98, "learning_rate": 4.201151637658973e-05, "loss": 2.3871, "step": 3226000 }, { "epoch": 15.99, "learning_rate": 4.201027779016365e-05, "loss": 2.3685, "step": 3226500 }, { "epoch": 15.99, "learning_rate": 4.200903920373756e-05, "loss": 2.3788, "step": 3227000 }, { "epoch": 15.99, "learning_rate": 4.2007805571657184e-05, "loss": 2.365, "step": 3227500 }, { "epoch": 15.99, "learning_rate": 4.20065669852311e-05, "loss": 2.3865, "step": 3228000 }, { "epoch": 16.0, "learning_rate": 4.200532839880502e-05, "loss": 2.3693, "step": 3228500 }, { "epoch": 16.0, "learning_rate": 4.200409228955178e-05, "loss": 2.3664, "step": 3229000 }, { "epoch": 16.0, "eval_accuracy": 0.6492868439902124, "eval_accuracy_mlm": 0.6034055016175841, "eval_accuracy_nsp": 0.8654450323385329, "eval_loss": 2.379127025604248, "eval_runtime": 145.9487, "eval_samples_per_second": 1746.908, "eval_steps_per_second": 72.793, "step": 3229488 }, { "epoch": 16.0, "learning_rate": 4.20028537031257e-05, "loss": 2.3991, "step": 3229500 }, { "epoch": 16.0, "learning_rate": 4.2001615116699614e-05, "loss": 2.3311, "step": 3230000 }, { "epoch": 16.01, "learning_rate": 4.200037653027353e-05, "loss": 2.379, "step": 3230500 }, { "epoch": 16.01, "learning_rate": 4.199913794384745e-05, "loss": 2.3483, "step": 3231000 }, { "epoch": 16.01, "learning_rate": 4.1997899357421365e-05, "loss": 2.3625, "step": 3231500 }, { "epoch": 16.01, "learning_rate": 4.199666077099528e-05, "loss": 2.3739, "step": 3232000 }, { "epoch": 16.01, "learning_rate": 4.199542218456919e-05, "loss": 2.3595, "step": 3232500 }, { "epoch": 16.02, "learning_rate": 4.199418359814311e-05, "loss": 2.3672, "step": 3233000 }, { "epoch": 16.02, "learning_rate": 4.1992947488889885e-05, "loss": 2.3369, "step": 3233500 }, { "epoch": 16.02, "learning_rate": 4.19917089024638e-05, "loss": 2.3675, "step": 3234000 }, { "epoch": 16.02, "learning_rate": 4.199047031603772e-05, "loss": 2.3456, "step": 3234500 }, { "epoch": 16.03, "learning_rate": 4.1989231729611635e-05, "loss": 2.345, "step": 3235000 }, { "epoch": 16.03, "learning_rate": 4.1987993143185545e-05, "loss": 2.3621, "step": 3235500 }, { "epoch": 16.03, "learning_rate": 4.198675455675946e-05, "loss": 2.3692, "step": 3236000 }, { "epoch": 16.03, "learning_rate": 4.198551597033338e-05, "loss": 2.3583, "step": 3236500 }, { "epoch": 16.04, "learning_rate": 4.1984277383907296e-05, "loss": 2.3604, "step": 3237000 }, { "epoch": 16.04, "learning_rate": 4.198303879748121e-05, "loss": 2.3367, "step": 3237500 }, { "epoch": 16.04, "learning_rate": 4.198180268822798e-05, "loss": 2.3517, "step": 3238000 }, { "epoch": 16.04, "learning_rate": 4.19805641018019e-05, "loss": 2.3562, "step": 3238500 }, { "epoch": 16.05, "learning_rate": 4.197932551537581e-05, "loss": 2.3516, "step": 3239000 }, { "epoch": 16.05, "learning_rate": 4.1978086928949726e-05, "loss": 2.3629, "step": 3239500 }, { "epoch": 16.05, "learning_rate": 4.197684834252364e-05, "loss": 2.3508, "step": 3240000 }, { "epoch": 16.05, "learning_rate": 4.197560975609756e-05, "loss": 2.3546, "step": 3240500 }, { "epoch": 16.06, "learning_rate": 4.1974373646844335e-05, "loss": 2.3317, "step": 3241000 }, { "epoch": 16.06, "learning_rate": 4.19731375375911e-05, "loss": 2.3723, "step": 3241500 }, { "epoch": 16.06, "learning_rate": 4.1971898951165014e-05, "loss": 2.3662, "step": 3242000 }, { "epoch": 16.06, "learning_rate": 4.197066036473893e-05, "loss": 2.3793, "step": 3242500 }, { "epoch": 16.07, "learning_rate": 4.196942177831285e-05, "loss": 2.3432, "step": 3243000 }, { "epoch": 16.07, "learning_rate": 4.196818566905962e-05, "loss": 2.357, "step": 3243500 }, { "epoch": 16.07, "learning_rate": 4.1966947082633534e-05, "loss": 2.3268, "step": 3244000 }, { "epoch": 16.07, "learning_rate": 4.196570849620745e-05, "loss": 2.3552, "step": 3244500 }, { "epoch": 16.08, "learning_rate": 4.196446990978137e-05, "loss": 2.3761, "step": 3245000 }, { "epoch": 16.08, "learning_rate": 4.1963231323355285e-05, "loss": 2.3717, "step": 3245500 }, { "epoch": 16.08, "learning_rate": 4.19619927369292e-05, "loss": 2.3475, "step": 3246000 }, { "epoch": 16.08, "learning_rate": 4.196075415050312e-05, "loss": 2.3493, "step": 3246500 }, { "epoch": 16.09, "learning_rate": 4.1959515564077036e-05, "loss": 2.3632, "step": 3247000 }, { "epoch": 16.09, "learning_rate": 4.1958279454823804e-05, "loss": 2.3624, "step": 3247500 }, { "epoch": 16.09, "learning_rate": 4.1957040868397715e-05, "loss": 2.3572, "step": 3248000 }, { "epoch": 16.09, "learning_rate": 4.195580228197163e-05, "loss": 2.3751, "step": 3248500 }, { "epoch": 16.1, "learning_rate": 4.195456369554555e-05, "loss": 2.3589, "step": 3249000 }, { "epoch": 16.1, "learning_rate": 4.195332758629232e-05, "loss": 2.3459, "step": 3249500 }, { "epoch": 16.1, "learning_rate": 4.1952088999866234e-05, "loss": 2.3791, "step": 3250000 }, { "epoch": 16.1, "learning_rate": 4.195085041344015e-05, "loss": 2.3528, "step": 3250500 }, { "epoch": 16.11, "learning_rate": 4.194961182701407e-05, "loss": 2.3381, "step": 3251000 }, { "epoch": 16.11, "learning_rate": 4.1948373240587985e-05, "loss": 2.3431, "step": 3251500 }, { "epoch": 16.11, "learning_rate": 4.19471346541619e-05, "loss": 2.3478, "step": 3252000 }, { "epoch": 16.11, "learning_rate": 4.194589606773582e-05, "loss": 2.3681, "step": 3252500 }, { "epoch": 16.12, "learning_rate": 4.1944657481309736e-05, "loss": 2.3455, "step": 3253000 }, { "epoch": 16.12, "learning_rate": 4.194341889488365e-05, "loss": 2.3379, "step": 3253500 }, { "epoch": 16.12, "learning_rate": 4.194218030845757e-05, "loss": 2.3439, "step": 3254000 }, { "epoch": 16.12, "learning_rate": 4.194094172203148e-05, "loss": 2.3919, "step": 3254500 }, { "epoch": 16.13, "learning_rate": 4.193970561277825e-05, "loss": 2.3615, "step": 3255000 }, { "epoch": 16.13, "learning_rate": 4.1938467026352165e-05, "loss": 2.3455, "step": 3255500 }, { "epoch": 16.13, "learning_rate": 4.193722843992608e-05, "loss": 2.3646, "step": 3256000 }, { "epoch": 16.13, "learning_rate": 4.19359898535e-05, "loss": 2.3457, "step": 3256500 }, { "epoch": 16.14, "learning_rate": 4.1934751267073916e-05, "loss": 2.364, "step": 3257000 }, { "epoch": 16.14, "learning_rate": 4.1933512680647826e-05, "loss": 2.3615, "step": 3257500 }, { "epoch": 16.14, "learning_rate": 4.193227409422174e-05, "loss": 2.3757, "step": 3258000 }, { "epoch": 16.14, "learning_rate": 4.193104046214137e-05, "loss": 2.3399, "step": 3258500 }, { "epoch": 16.15, "learning_rate": 4.192980187571529e-05, "loss": 2.3593, "step": 3259000 }, { "epoch": 16.15, "learning_rate": 4.192856576646205e-05, "loss": 2.378, "step": 3259500 }, { "epoch": 16.15, "learning_rate": 4.192732718003597e-05, "loss": 2.362, "step": 3260000 }, { "epoch": 16.15, "learning_rate": 4.1926088593609884e-05, "loss": 2.381, "step": 3260500 }, { "epoch": 16.16, "learning_rate": 4.19248500071838e-05, "loss": 2.3708, "step": 3261000 }, { "epoch": 16.16, "learning_rate": 4.192361142075772e-05, "loss": 2.3763, "step": 3261500 }, { "epoch": 16.16, "learning_rate": 4.1922372834331634e-05, "loss": 2.3587, "step": 3262000 }, { "epoch": 16.16, "learning_rate": 4.192113424790555e-05, "loss": 2.3468, "step": 3262500 }, { "epoch": 16.17, "learning_rate": 4.191989566147947e-05, "loss": 2.3629, "step": 3263000 }, { "epoch": 16.17, "learning_rate": 4.1918657075053385e-05, "loss": 2.3905, "step": 3263500 }, { "epoch": 16.17, "learning_rate": 4.1917423442973006e-05, "loss": 2.3469, "step": 3264000 }, { "epoch": 16.17, "learning_rate": 4.191618485654692e-05, "loss": 2.3685, "step": 3264500 }, { "epoch": 16.18, "learning_rate": 4.191494627012083e-05, "loss": 2.3798, "step": 3265000 }, { "epoch": 16.18, "learning_rate": 4.191370768369475e-05, "loss": 2.368, "step": 3265500 }, { "epoch": 16.18, "learning_rate": 4.191246909726867e-05, "loss": 2.3569, "step": 3266000 }, { "epoch": 16.18, "learning_rate": 4.1911230510842584e-05, "loss": 2.3642, "step": 3266500 }, { "epoch": 16.19, "learning_rate": 4.19099919244165e-05, "loss": 2.339, "step": 3267000 }, { "epoch": 16.19, "learning_rate": 4.190875333799042e-05, "loss": 2.392, "step": 3267500 }, { "epoch": 16.19, "learning_rate": 4.1907514751564335e-05, "loss": 2.3535, "step": 3268000 }, { "epoch": 16.19, "learning_rate": 4.190627616513825e-05, "loss": 2.3608, "step": 3268500 }, { "epoch": 16.2, "learning_rate": 4.190504005588502e-05, "loss": 2.3881, "step": 3269000 }, { "epoch": 16.2, "learning_rate": 4.190380146945894e-05, "loss": 2.354, "step": 3269500 }, { "epoch": 16.2, "learning_rate": 4.1902562883032854e-05, "loss": 2.3587, "step": 3270000 }, { "epoch": 16.2, "learning_rate": 4.190132429660677e-05, "loss": 2.375, "step": 3270500 }, { "epoch": 16.21, "learning_rate": 4.190008571018069e-05, "loss": 2.3826, "step": 3271000 }, { "epoch": 16.21, "learning_rate": 4.1898847123754605e-05, "loss": 2.3526, "step": 3271500 }, { "epoch": 16.21, "learning_rate": 4.189760853732852e-05, "loss": 2.3849, "step": 3272000 }, { "epoch": 16.21, "learning_rate": 4.189636995090244e-05, "loss": 2.3827, "step": 3272500 }, { "epoch": 16.22, "learning_rate": 4.1895131364476356e-05, "loss": 2.3505, "step": 3273000 }, { "epoch": 16.22, "learning_rate": 4.189389277805027e-05, "loss": 2.3436, "step": 3273500 }, { "epoch": 16.22, "learning_rate": 4.189265419162419e-05, "loss": 2.3404, "step": 3274000 }, { "epoch": 16.22, "learning_rate": 4.1891415605198106e-05, "loss": 2.3904, "step": 3274500 }, { "epoch": 16.23, "learning_rate": 4.189017701877202e-05, "loss": 2.3878, "step": 3275000 }, { "epoch": 16.23, "learning_rate": 4.188894338669164e-05, "loss": 2.372, "step": 3275500 }, { "epoch": 16.23, "learning_rate": 4.1887704800265554e-05, "loss": 2.3541, "step": 3276000 }, { "epoch": 16.23, "learning_rate": 4.188646621383947e-05, "loss": 2.3633, "step": 3276500 }, { "epoch": 16.24, "learning_rate": 4.188522762741339e-05, "loss": 2.3915, "step": 3277000 }, { "epoch": 16.24, "learning_rate": 4.1883989040987305e-05, "loss": 2.3363, "step": 3277500 }, { "epoch": 16.24, "learning_rate": 4.188275045456122e-05, "loss": 2.3568, "step": 3278000 }, { "epoch": 16.24, "learning_rate": 4.188151186813514e-05, "loss": 2.3588, "step": 3278500 }, { "epoch": 16.25, "learning_rate": 4.18802757588819e-05, "loss": 2.3645, "step": 3279000 }, { "epoch": 16.25, "learning_rate": 4.187903717245582e-05, "loss": 2.3591, "step": 3279500 }, { "epoch": 16.25, "learning_rate": 4.1877798586029735e-05, "loss": 2.3448, "step": 3280000 }, { "epoch": 16.25, "learning_rate": 4.187655999960365e-05, "loss": 2.3665, "step": 3280500 }, { "epoch": 16.26, "learning_rate": 4.187532389035042e-05, "loss": 2.3366, "step": 3281000 }, { "epoch": 16.26, "learning_rate": 4.187408530392434e-05, "loss": 2.3512, "step": 3281500 }, { "epoch": 16.26, "learning_rate": 4.1872846717498254e-05, "loss": 2.3426, "step": 3282000 }, { "epoch": 16.26, "learning_rate": 4.187160813107217e-05, "loss": 2.3714, "step": 3282500 }, { "epoch": 16.27, "learning_rate": 4.187036954464609e-05, "loss": 2.3823, "step": 3283000 }, { "epoch": 16.27, "learning_rate": 4.186913343539286e-05, "loss": 2.3549, "step": 3283500 }, { "epoch": 16.27, "learning_rate": 4.1867894848966774e-05, "loss": 2.3734, "step": 3284000 }, { "epoch": 16.27, "learning_rate": 4.186665626254069e-05, "loss": 2.372, "step": 3284500 }, { "epoch": 16.28, "learning_rate": 4.18654176761146e-05, "loss": 2.3525, "step": 3285000 }, { "epoch": 16.28, "learning_rate": 4.186417908968852e-05, "loss": 2.3635, "step": 3285500 }, { "epoch": 16.28, "learning_rate": 4.1862940503262435e-05, "loss": 2.3709, "step": 3286000 }, { "epoch": 16.28, "learning_rate": 4.186170191683635e-05, "loss": 2.364, "step": 3286500 }, { "epoch": 16.28, "learning_rate": 4.186046333041027e-05, "loss": 2.3777, "step": 3287000 }, { "epoch": 16.29, "learning_rate": 4.185922722115704e-05, "loss": 2.3548, "step": 3287500 }, { "epoch": 16.29, "learning_rate": 4.1857988634730954e-05, "loss": 2.3503, "step": 3288000 }, { "epoch": 16.29, "learning_rate": 4.185675004830487e-05, "loss": 2.3702, "step": 3288500 }, { "epoch": 16.29, "learning_rate": 4.185551146187879e-05, "loss": 2.3839, "step": 3289000 }, { "epoch": 16.3, "learning_rate": 4.185427535262556e-05, "loss": 2.3612, "step": 3289500 }, { "epoch": 16.3, "learning_rate": 4.1853039243372326e-05, "loss": 2.3803, "step": 3290000 }, { "epoch": 16.3, "learning_rate": 4.185180065694624e-05, "loss": 2.3574, "step": 3290500 }, { "epoch": 16.3, "learning_rate": 4.185056207052016e-05, "loss": 2.3609, "step": 3291000 }, { "epoch": 16.31, "learning_rate": 4.184932348409408e-05, "loss": 2.3628, "step": 3291500 }, { "epoch": 16.31, "learning_rate": 4.1848087374840846e-05, "loss": 2.3595, "step": 3292000 }, { "epoch": 16.31, "learning_rate": 4.1846851265587614e-05, "loss": 2.3629, "step": 3292500 }, { "epoch": 16.31, "learning_rate": 4.1845612679161525e-05, "loss": 2.3337, "step": 3293000 }, { "epoch": 16.32, "learning_rate": 4.184437409273544e-05, "loss": 2.3881, "step": 3293500 }, { "epoch": 16.32, "learning_rate": 4.184313550630936e-05, "loss": 2.3534, "step": 3294000 }, { "epoch": 16.32, "learning_rate": 4.184189939705613e-05, "loss": 2.4006, "step": 3294500 }, { "epoch": 16.32, "learning_rate": 4.1840660810630044e-05, "loss": 2.351, "step": 3295000 }, { "epoch": 16.33, "learning_rate": 4.183942222420396e-05, "loss": 2.3661, "step": 3295500 }, { "epoch": 16.33, "learning_rate": 4.183818363777788e-05, "loss": 2.3705, "step": 3296000 }, { "epoch": 16.33, "learning_rate": 4.1836945051351795e-05, "loss": 2.3444, "step": 3296500 }, { "epoch": 16.33, "learning_rate": 4.183570646492571e-05, "loss": 2.3706, "step": 3297000 }, { "epoch": 16.34, "learning_rate": 4.183446787849963e-05, "loss": 2.3554, "step": 3297500 }, { "epoch": 16.34, "learning_rate": 4.1833229292073546e-05, "loss": 2.372, "step": 3298000 }, { "epoch": 16.34, "learning_rate": 4.183199070564746e-05, "loss": 2.3748, "step": 3298500 }, { "epoch": 16.34, "learning_rate": 4.183075211922138e-05, "loss": 2.3779, "step": 3299000 }, { "epoch": 16.35, "learning_rate": 4.1829513532795296e-05, "loss": 2.3628, "step": 3299500 }, { "epoch": 16.35, "learning_rate": 4.182827494636921e-05, "loss": 2.3664, "step": 3300000 }, { "epoch": 16.35, "learning_rate": 4.182703635994313e-05, "loss": 2.3823, "step": 3300500 }, { "epoch": 16.35, "learning_rate": 4.182579777351705e-05, "loss": 2.3607, "step": 3301000 }, { "epoch": 16.36, "learning_rate": 4.1824559187090964e-05, "loss": 2.3748, "step": 3301500 }, { "epoch": 16.36, "learning_rate": 4.1823320600664874e-05, "loss": 2.3491, "step": 3302000 }, { "epoch": 16.36, "learning_rate": 4.182208201423879e-05, "loss": 2.3577, "step": 3302500 }, { "epoch": 16.36, "learning_rate": 4.182084342781271e-05, "loss": 2.3337, "step": 3303000 }, { "epoch": 16.37, "learning_rate": 4.1819604841386625e-05, "loss": 2.3717, "step": 3303500 }, { "epoch": 16.37, "learning_rate": 4.181836625496054e-05, "loss": 2.3637, "step": 3304000 }, { "epoch": 16.37, "learning_rate": 4.181712766853446e-05, "loss": 2.3689, "step": 3304500 }, { "epoch": 16.37, "learning_rate": 4.181589155928122e-05, "loss": 2.3704, "step": 3305000 }, { "epoch": 16.38, "learning_rate": 4.181465297285514e-05, "loss": 2.3826, "step": 3305500 }, { "epoch": 16.38, "learning_rate": 4.1813414386429055e-05, "loss": 2.3344, "step": 3306000 }, { "epoch": 16.38, "learning_rate": 4.181217580000297e-05, "loss": 2.3534, "step": 3306500 }, { "epoch": 16.38, "learning_rate": 4.181093721357689e-05, "loss": 2.3628, "step": 3307000 }, { "epoch": 16.39, "learning_rate": 4.1809698627150806e-05, "loss": 2.3458, "step": 3307500 }, { "epoch": 16.39, "learning_rate": 4.180846251789758e-05, "loss": 2.3597, "step": 3308000 }, { "epoch": 16.39, "learning_rate": 4.180722640864434e-05, "loss": 2.3612, "step": 3308500 }, { "epoch": 16.39, "learning_rate": 4.180598782221826e-05, "loss": 2.348, "step": 3309000 }, { "epoch": 16.4, "learning_rate": 4.180474923579218e-05, "loss": 2.3849, "step": 3309500 }, { "epoch": 16.4, "learning_rate": 4.1803510649366094e-05, "loss": 2.3843, "step": 3310000 }, { "epoch": 16.4, "learning_rate": 4.180227206294001e-05, "loss": 2.3771, "step": 3310500 }, { "epoch": 16.4, "learning_rate": 4.180103347651393e-05, "loss": 2.3771, "step": 3311000 }, { "epoch": 16.41, "learning_rate": 4.179979489008784e-05, "loss": 2.3497, "step": 3311500 }, { "epoch": 16.41, "learning_rate": 4.1798556303661755e-05, "loss": 2.348, "step": 3312000 }, { "epoch": 16.41, "learning_rate": 4.179732019440853e-05, "loss": 2.3576, "step": 3312500 }, { "epoch": 16.41, "learning_rate": 4.179608160798245e-05, "loss": 2.3733, "step": 3313000 }, { "epoch": 16.42, "learning_rate": 4.1794843021556364e-05, "loss": 2.3712, "step": 3313500 }, { "epoch": 16.42, "learning_rate": 4.179360443513028e-05, "loss": 2.3685, "step": 3314000 }, { "epoch": 16.42, "learning_rate": 4.179236832587704e-05, "loss": 2.3851, "step": 3314500 }, { "epoch": 16.42, "learning_rate": 4.179112973945096e-05, "loss": 2.3961, "step": 3315000 }, { "epoch": 16.43, "learning_rate": 4.178989115302488e-05, "loss": 2.4049, "step": 3315500 }, { "epoch": 16.43, "learning_rate": 4.1788652566598794e-05, "loss": 2.3543, "step": 3316000 }, { "epoch": 16.43, "learning_rate": 4.178741398017271e-05, "loss": 2.3707, "step": 3316500 }, { "epoch": 16.43, "learning_rate": 4.178617539374663e-05, "loss": 2.3482, "step": 3317000 }, { "epoch": 16.44, "learning_rate": 4.178493680732054e-05, "loss": 2.3832, "step": 3317500 }, { "epoch": 16.44, "learning_rate": 4.1783700698067314e-05, "loss": 2.4093, "step": 3318000 }, { "epoch": 16.44, "learning_rate": 4.178246458881408e-05, "loss": 2.3686, "step": 3318500 }, { "epoch": 16.44, "learning_rate": 4.1781226002388e-05, "loss": 2.3364, "step": 3319000 }, { "epoch": 16.45, "learning_rate": 4.177998989313476e-05, "loss": 2.3651, "step": 3319500 }, { "epoch": 16.45, "learning_rate": 4.177875130670868e-05, "loss": 2.3806, "step": 3320000 }, { "epoch": 16.45, "learning_rate": 4.1777512720282595e-05, "loss": 2.3776, "step": 3320500 }, { "epoch": 16.45, "learning_rate": 4.177627413385651e-05, "loss": 2.3673, "step": 3321000 }, { "epoch": 16.46, "learning_rate": 4.177503554743043e-05, "loss": 2.3572, "step": 3321500 }, { "epoch": 16.46, "learning_rate": 4.1773796961004346e-05, "loss": 2.3464, "step": 3322000 }, { "epoch": 16.46, "learning_rate": 4.177255837457826e-05, "loss": 2.3861, "step": 3322500 }, { "epoch": 16.46, "learning_rate": 4.177131978815218e-05, "loss": 2.3693, "step": 3323000 }, { "epoch": 16.47, "learning_rate": 4.177008367889895e-05, "loss": 2.362, "step": 3323500 }, { "epoch": 16.47, "learning_rate": 4.1768845092472866e-05, "loss": 2.3583, "step": 3324000 }, { "epoch": 16.47, "learning_rate": 4.176760650604678e-05, "loss": 2.4084, "step": 3324500 }, { "epoch": 16.47, "learning_rate": 4.17663679196207e-05, "loss": 2.3874, "step": 3325000 }, { "epoch": 16.48, "learning_rate": 4.1765129333194617e-05, "loss": 2.4014, "step": 3325500 }, { "epoch": 16.48, "learning_rate": 4.1763890746768533e-05, "loss": 2.3739, "step": 3326000 }, { "epoch": 16.48, "learning_rate": 4.1762652160342444e-05, "loss": 2.362, "step": 3326500 }, { "epoch": 16.48, "learning_rate": 4.176141357391636e-05, "loss": 2.3732, "step": 3327000 }, { "epoch": 16.49, "learning_rate": 4.176017498749028e-05, "loss": 2.3626, "step": 3327500 }, { "epoch": 16.49, "learning_rate": 4.1758936401064194e-05, "loss": 2.3443, "step": 3328000 }, { "epoch": 16.49, "learning_rate": 4.175770029181096e-05, "loss": 2.3804, "step": 3328500 }, { "epoch": 16.49, "learning_rate": 4.175646170538488e-05, "loss": 2.3758, "step": 3329000 }, { "epoch": 16.5, "learning_rate": 4.17552231189588e-05, "loss": 2.3673, "step": 3329500 }, { "epoch": 16.5, "learning_rate": 4.1753984532532714e-05, "loss": 2.3549, "step": 3330000 }, { "epoch": 16.5, "learning_rate": 4.175274594610663e-05, "loss": 2.362, "step": 3330500 }, { "epoch": 16.5, "learning_rate": 4.175150735968055e-05, "loss": 2.3912, "step": 3331000 }, { "epoch": 16.51, "learning_rate": 4.1750268773254465e-05, "loss": 2.3859, "step": 3331500 }, { "epoch": 16.51, "learning_rate": 4.174903018682838e-05, "loss": 2.3581, "step": 3332000 }, { "epoch": 16.51, "learning_rate": 4.17477916004023e-05, "loss": 2.3842, "step": 3332500 }, { "epoch": 16.51, "learning_rate": 4.1746553013976216e-05, "loss": 2.3812, "step": 3333000 }, { "epoch": 16.52, "learning_rate": 4.1745314427550126e-05, "loss": 2.3868, "step": 3333500 }, { "epoch": 16.52, "learning_rate": 4.174407584112404e-05, "loss": 2.3852, "step": 3334000 }, { "epoch": 16.52, "learning_rate": 4.174283725469796e-05, "loss": 2.3561, "step": 3334500 }, { "epoch": 16.52, "learning_rate": 4.1741598668271877e-05, "loss": 2.3751, "step": 3335000 }, { "epoch": 16.53, "learning_rate": 4.1740362559018645e-05, "loss": 2.3717, "step": 3335500 }, { "epoch": 16.53, "learning_rate": 4.173912397259256e-05, "loss": 2.3852, "step": 3336000 }, { "epoch": 16.53, "learning_rate": 4.173788786333933e-05, "loss": 2.4165, "step": 3336500 }, { "epoch": 16.53, "learning_rate": 4.173664927691325e-05, "loss": 2.3534, "step": 3337000 }, { "epoch": 16.54, "learning_rate": 4.1735410690487165e-05, "loss": 2.3673, "step": 3337500 }, { "epoch": 16.54, "learning_rate": 4.173417210406108e-05, "loss": 2.3468, "step": 3338000 }, { "epoch": 16.54, "learning_rate": 4.1732933517635e-05, "loss": 2.3575, "step": 3338500 }, { "epoch": 16.54, "learning_rate": 4.1731694931208916e-05, "loss": 2.4043, "step": 3339000 }, { "epoch": 16.55, "learning_rate": 4.1730456344782826e-05, "loss": 2.3744, "step": 3339500 }, { "epoch": 16.55, "learning_rate": 4.172921775835674e-05, "loss": 2.3968, "step": 3340000 }, { "epoch": 16.55, "learning_rate": 4.172797917193066e-05, "loss": 2.389, "step": 3340500 }, { "epoch": 16.55, "learning_rate": 4.172674306267743e-05, "loss": 2.3549, "step": 3341000 }, { "epoch": 16.55, "learning_rate": 4.17255069534242e-05, "loss": 2.3684, "step": 3341500 }, { "epoch": 16.56, "learning_rate": 4.1724268366998114e-05, "loss": 2.379, "step": 3342000 }, { "epoch": 16.56, "learning_rate": 4.172302978057203e-05, "loss": 2.3729, "step": 3342500 }, { "epoch": 16.56, "learning_rate": 4.172179119414595e-05, "loss": 2.3568, "step": 3343000 }, { "epoch": 16.56, "learning_rate": 4.172055508489272e-05, "loss": 2.3773, "step": 3343500 }, { "epoch": 16.57, "learning_rate": 4.1719316498466634e-05, "loss": 2.3581, "step": 3344000 }, { "epoch": 16.57, "learning_rate": 4.171807791204055e-05, "loss": 2.368, "step": 3344500 }, { "epoch": 16.57, "learning_rate": 4.171683932561447e-05, "loss": 2.3668, "step": 3345000 }, { "epoch": 16.57, "learning_rate": 4.1715600739188385e-05, "loss": 2.3714, "step": 3345500 }, { "epoch": 16.58, "learning_rate": 4.17143621527623e-05, "loss": 2.3788, "step": 3346000 }, { "epoch": 16.58, "learning_rate": 4.171312356633622e-05, "loss": 2.3737, "step": 3346500 }, { "epoch": 16.58, "learning_rate": 4.171188497991013e-05, "loss": 2.3877, "step": 3347000 }, { "epoch": 16.58, "learning_rate": 4.1710646393484046e-05, "loss": 2.359, "step": 3347500 }, { "epoch": 16.59, "learning_rate": 4.170940780705796e-05, "loss": 2.3579, "step": 3348000 }, { "epoch": 16.59, "learning_rate": 4.170817169780473e-05, "loss": 2.3688, "step": 3348500 }, { "epoch": 16.59, "learning_rate": 4.170693311137865e-05, "loss": 2.3799, "step": 3349000 }, { "epoch": 16.59, "learning_rate": 4.170569700212542e-05, "loss": 2.3597, "step": 3349500 }, { "epoch": 16.6, "learning_rate": 4.1704458415699334e-05, "loss": 2.3678, "step": 3350000 }, { "epoch": 16.6, "learning_rate": 4.170321982927325e-05, "loss": 2.3693, "step": 3350500 }, { "epoch": 16.6, "learning_rate": 4.170198124284717e-05, "loss": 2.3535, "step": 3351000 }, { "epoch": 16.6, "learning_rate": 4.1700742656421085e-05, "loss": 2.3655, "step": 3351500 }, { "epoch": 16.61, "learning_rate": 4.169950654716785e-05, "loss": 2.3376, "step": 3352000 }, { "epoch": 16.61, "learning_rate": 4.1698267960741764e-05, "loss": 2.3532, "step": 3352500 }, { "epoch": 16.61, "learning_rate": 4.169702937431568e-05, "loss": 2.3747, "step": 3353000 }, { "epoch": 16.61, "learning_rate": 4.16957907878896e-05, "loss": 2.3603, "step": 3353500 }, { "epoch": 16.62, "learning_rate": 4.1694552201463515e-05, "loss": 2.377, "step": 3354000 }, { "epoch": 16.62, "learning_rate": 4.169331361503743e-05, "loss": 2.3917, "step": 3354500 }, { "epoch": 16.62, "learning_rate": 4.16920775057842e-05, "loss": 2.3773, "step": 3355000 }, { "epoch": 16.62, "learning_rate": 4.169083891935812e-05, "loss": 2.3885, "step": 3355500 }, { "epoch": 16.63, "learning_rate": 4.1689600332932034e-05, "loss": 2.3602, "step": 3356000 }, { "epoch": 16.63, "learning_rate": 4.16883642236788e-05, "loss": 2.3732, "step": 3356500 }, { "epoch": 16.63, "learning_rate": 4.168712563725271e-05, "loss": 2.3674, "step": 3357000 }, { "epoch": 16.63, "learning_rate": 4.168588705082663e-05, "loss": 2.3854, "step": 3357500 }, { "epoch": 16.64, "learning_rate": 4.168464846440055e-05, "loss": 2.3712, "step": 3358000 }, { "epoch": 16.64, "learning_rate": 4.1683409877974464e-05, "loss": 2.3552, "step": 3358500 }, { "epoch": 16.64, "learning_rate": 4.168217129154838e-05, "loss": 2.3662, "step": 3359000 }, { "epoch": 16.64, "learning_rate": 4.168093518229515e-05, "loss": 2.3816, "step": 3359500 }, { "epoch": 16.65, "learning_rate": 4.1679696595869067e-05, "loss": 2.3567, "step": 3360000 }, { "epoch": 16.65, "learning_rate": 4.1678458009442983e-05, "loss": 2.3709, "step": 3360500 }, { "epoch": 16.65, "learning_rate": 4.16772194230169e-05, "loss": 2.3764, "step": 3361000 }, { "epoch": 16.65, "learning_rate": 4.167598083659082e-05, "loss": 2.375, "step": 3361500 }, { "epoch": 16.66, "learning_rate": 4.1674742250164734e-05, "loss": 2.3625, "step": 3362000 }, { "epoch": 16.66, "learning_rate": 4.167350366373865e-05, "loss": 2.3537, "step": 3362500 }, { "epoch": 16.66, "learning_rate": 4.167226507731257e-05, "loss": 2.3476, "step": 3363000 }, { "epoch": 16.66, "learning_rate": 4.1671026490886485e-05, "loss": 2.4053, "step": 3363500 }, { "epoch": 16.67, "learning_rate": 4.16697879044604e-05, "loss": 2.3621, "step": 3364000 }, { "epoch": 16.67, "learning_rate": 4.166854931803432e-05, "loss": 2.3565, "step": 3364500 }, { "epoch": 16.67, "learning_rate": 4.1667310731608236e-05, "loss": 2.4182, "step": 3365000 }, { "epoch": 16.67, "learning_rate": 4.1666074622355e-05, "loss": 2.378, "step": 3365500 }, { "epoch": 16.68, "learning_rate": 4.1664836035928915e-05, "loss": 2.3553, "step": 3366000 }, { "epoch": 16.68, "learning_rate": 4.166359744950283e-05, "loss": 2.3641, "step": 3366500 }, { "epoch": 16.68, "learning_rate": 4.166235886307675e-05, "loss": 2.3704, "step": 3367000 }, { "epoch": 16.68, "learning_rate": 4.1661120276650666e-05, "loss": 2.3547, "step": 3367500 }, { "epoch": 16.69, "learning_rate": 4.165988169022458e-05, "loss": 2.3821, "step": 3368000 }, { "epoch": 16.69, "learning_rate": 4.16586431037985e-05, "loss": 2.3792, "step": 3368500 }, { "epoch": 16.69, "learning_rate": 4.165740699454527e-05, "loss": 2.3716, "step": 3369000 }, { "epoch": 16.69, "learning_rate": 4.1656168408119185e-05, "loss": 2.3796, "step": 3369500 }, { "epoch": 16.7, "learning_rate": 4.16549298216931e-05, "loss": 2.3907, "step": 3370000 }, { "epoch": 16.7, "learning_rate": 4.165369123526702e-05, "loss": 2.3926, "step": 3370500 }, { "epoch": 16.7, "learning_rate": 4.165245512601378e-05, "loss": 2.3875, "step": 3371000 }, { "epoch": 16.7, "learning_rate": 4.16512165395877e-05, "loss": 2.3632, "step": 3371500 }, { "epoch": 16.71, "learning_rate": 4.1649977953161615e-05, "loss": 2.3904, "step": 3372000 }, { "epoch": 16.71, "learning_rate": 4.1648741843908384e-05, "loss": 2.3739, "step": 3372500 }, { "epoch": 16.71, "learning_rate": 4.16475032574823e-05, "loss": 2.3697, "step": 3373000 }, { "epoch": 16.71, "learning_rate": 4.164626467105622e-05, "loss": 2.3728, "step": 3373500 }, { "epoch": 16.72, "learning_rate": 4.164502856180299e-05, "loss": 2.3865, "step": 3374000 }, { "epoch": 16.72, "learning_rate": 4.16437899753769e-05, "loss": 2.3485, "step": 3374500 }, { "epoch": 16.72, "learning_rate": 4.164255138895082e-05, "loss": 2.3721, "step": 3375000 }, { "epoch": 16.72, "learning_rate": 4.164131280252474e-05, "loss": 2.3634, "step": 3375500 }, { "epoch": 16.73, "learning_rate": 4.1640076693271506e-05, "loss": 2.3643, "step": 3376000 }, { "epoch": 16.73, "learning_rate": 4.163883810684542e-05, "loss": 2.3706, "step": 3376500 }, { "epoch": 16.73, "learning_rate": 4.163759952041934e-05, "loss": 2.3708, "step": 3377000 }, { "epoch": 16.73, "learning_rate": 4.163636093399325e-05, "loss": 2.3637, "step": 3377500 }, { "epoch": 16.74, "learning_rate": 4.163512234756717e-05, "loss": 2.378, "step": 3378000 }, { "epoch": 16.74, "learning_rate": 4.1633883761141084e-05, "loss": 2.3886, "step": 3378500 }, { "epoch": 16.74, "learning_rate": 4.1632645174715e-05, "loss": 2.3935, "step": 3379000 }, { "epoch": 16.74, "learning_rate": 4.163140658828892e-05, "loss": 2.3653, "step": 3379500 }, { "epoch": 16.75, "learning_rate": 4.1630168001862835e-05, "loss": 2.3607, "step": 3380000 }, { "epoch": 16.75, "learning_rate": 4.162892941543675e-05, "loss": 2.3704, "step": 3380500 }, { "epoch": 16.75, "learning_rate": 4.162769082901067e-05, "loss": 2.3722, "step": 3381000 }, { "epoch": 16.75, "learning_rate": 4.1626452242584585e-05, "loss": 2.3535, "step": 3381500 }, { "epoch": 16.76, "learning_rate": 4.16252136561585e-05, "loss": 2.3634, "step": 3382000 }, { "epoch": 16.76, "learning_rate": 4.162397754690527e-05, "loss": 2.3759, "step": 3382500 }, { "epoch": 16.76, "learning_rate": 4.162273896047919e-05, "loss": 2.3764, "step": 3383000 }, { "epoch": 16.76, "learning_rate": 4.1621500374053105e-05, "loss": 2.3739, "step": 3383500 }, { "epoch": 16.77, "learning_rate": 4.1620261787627015e-05, "loss": 2.3762, "step": 3384000 }, { "epoch": 16.77, "learning_rate": 4.161902320120093e-05, "loss": 2.3656, "step": 3384500 }, { "epoch": 16.77, "learning_rate": 4.16177870919477e-05, "loss": 2.3756, "step": 3385000 }, { "epoch": 16.77, "learning_rate": 4.161654850552162e-05, "loss": 2.3657, "step": 3385500 }, { "epoch": 16.78, "learning_rate": 4.1615309919095535e-05, "loss": 2.3836, "step": 3386000 }, { "epoch": 16.78, "learning_rate": 4.161407133266945e-05, "loss": 2.3893, "step": 3386500 }, { "epoch": 16.78, "learning_rate": 4.161283274624337e-05, "loss": 2.3778, "step": 3387000 }, { "epoch": 16.78, "learning_rate": 4.1611594159817286e-05, "loss": 2.3606, "step": 3387500 }, { "epoch": 16.79, "learning_rate": 4.16103555733912e-05, "loss": 2.3618, "step": 3388000 }, { "epoch": 16.79, "learning_rate": 4.160911698696512e-05, "loss": 2.3565, "step": 3388500 }, { "epoch": 16.79, "learning_rate": 4.160788087771189e-05, "loss": 2.3636, "step": 3389000 }, { "epoch": 16.79, "learning_rate": 4.1606642291285805e-05, "loss": 2.3672, "step": 3389500 }, { "epoch": 16.8, "learning_rate": 4.160540370485972e-05, "loss": 2.3587, "step": 3390000 }, { "epoch": 16.8, "learning_rate": 4.160416511843364e-05, "loss": 2.3782, "step": 3390500 }, { "epoch": 16.8, "learning_rate": 4.160292653200755e-05, "loss": 2.3666, "step": 3391000 }, { "epoch": 16.8, "learning_rate": 4.160169042275432e-05, "loss": 2.3514, "step": 3391500 }, { "epoch": 16.81, "learning_rate": 4.1600451836328235e-05, "loss": 2.3646, "step": 3392000 }, { "epoch": 16.81, "learning_rate": 4.159921324990215e-05, "loss": 2.3601, "step": 3392500 }, { "epoch": 16.81, "learning_rate": 4.159797466347607e-05, "loss": 2.3792, "step": 3393000 }, { "epoch": 16.81, "learning_rate": 4.1596736077049986e-05, "loss": 2.355, "step": 3393500 }, { "epoch": 16.82, "learning_rate": 4.1595499967796754e-05, "loss": 2.3727, "step": 3394000 }, { "epoch": 16.82, "learning_rate": 4.159426138137067e-05, "loss": 2.3774, "step": 3394500 }, { "epoch": 16.82, "learning_rate": 4.159302279494459e-05, "loss": 2.3607, "step": 3395000 }, { "epoch": 16.82, "learning_rate": 4.1591784208518505e-05, "loss": 2.3917, "step": 3395500 }, { "epoch": 16.82, "learning_rate": 4.159054562209242e-05, "loss": 2.3518, "step": 3396000 }, { "epoch": 16.83, "learning_rate": 4.158930703566634e-05, "loss": 2.3856, "step": 3396500 }, { "epoch": 16.83, "learning_rate": 4.1588068449240256e-05, "loss": 2.387, "step": 3397000 }, { "epoch": 16.83, "learning_rate": 4.1586829862814166e-05, "loss": 2.3707, "step": 3397500 }, { "epoch": 16.83, "learning_rate": 4.158559127638808e-05, "loss": 2.3603, "step": 3398000 }, { "epoch": 16.84, "learning_rate": 4.158435764430771e-05, "loss": 2.3707, "step": 3398500 }, { "epoch": 16.84, "learning_rate": 4.158311905788163e-05, "loss": 2.3861, "step": 3399000 }, { "epoch": 16.84, "learning_rate": 4.158188047145554e-05, "loss": 2.3695, "step": 3399500 }, { "epoch": 16.84, "learning_rate": 4.1580641885029455e-05, "loss": 2.3408, "step": 3400000 }, { "epoch": 16.85, "learning_rate": 4.157940329860337e-05, "loss": 2.3676, "step": 3400500 }, { "epoch": 16.85, "learning_rate": 4.157816718935014e-05, "loss": 2.3638, "step": 3401000 }, { "epoch": 16.85, "learning_rate": 4.157692860292406e-05, "loss": 2.3828, "step": 3401500 }, { "epoch": 16.85, "learning_rate": 4.1575690016497974e-05, "loss": 2.3853, "step": 3402000 }, { "epoch": 16.86, "learning_rate": 4.1574451430071884e-05, "loss": 2.3722, "step": 3402500 }, { "epoch": 16.86, "learning_rate": 4.15732128436458e-05, "loss": 2.3818, "step": 3403000 }, { "epoch": 16.86, "learning_rate": 4.157197425721972e-05, "loss": 2.3493, "step": 3403500 }, { "epoch": 16.86, "learning_rate": 4.1570735670793635e-05, "loss": 2.3795, "step": 3404000 }, { "epoch": 16.87, "learning_rate": 4.156949708436755e-05, "loss": 2.3624, "step": 3404500 }, { "epoch": 16.87, "learning_rate": 4.156826097511433e-05, "loss": 2.3783, "step": 3405000 }, { "epoch": 16.87, "learning_rate": 4.1567022388688245e-05, "loss": 2.379, "step": 3405500 }, { "epoch": 16.87, "learning_rate": 4.1565783802262155e-05, "loss": 2.3758, "step": 3406000 }, { "epoch": 16.88, "learning_rate": 4.156454521583607e-05, "loss": 2.3549, "step": 3406500 }, { "epoch": 16.88, "learning_rate": 4.156330662940999e-05, "loss": 2.3986, "step": 3407000 }, { "epoch": 16.88, "learning_rate": 4.1562068042983905e-05, "loss": 2.3615, "step": 3407500 }, { "epoch": 16.88, "learning_rate": 4.156082945655782e-05, "loss": 2.3583, "step": 3408000 }, { "epoch": 16.89, "learning_rate": 4.155959334730459e-05, "loss": 2.3854, "step": 3408500 }, { "epoch": 16.89, "learning_rate": 4.15583547608785e-05, "loss": 2.3673, "step": 3409000 }, { "epoch": 16.89, "learning_rate": 4.155711617445242e-05, "loss": 2.3642, "step": 3409500 }, { "epoch": 16.89, "learning_rate": 4.1555877588026335e-05, "loss": 2.3299, "step": 3410000 }, { "epoch": 16.9, "learning_rate": 4.155463900160025e-05, "loss": 2.3807, "step": 3410500 }, { "epoch": 16.9, "learning_rate": 4.155340041517417e-05, "loss": 2.3767, "step": 3411000 }, { "epoch": 16.9, "learning_rate": 4.1552161828748086e-05, "loss": 2.3789, "step": 3411500 }, { "epoch": 16.9, "learning_rate": 4.1550923242322e-05, "loss": 2.3876, "step": 3412000 }, { "epoch": 16.91, "learning_rate": 4.154968713306877e-05, "loss": 2.3517, "step": 3412500 }, { "epoch": 16.91, "learning_rate": 4.154844854664269e-05, "loss": 2.4007, "step": 3413000 }, { "epoch": 16.91, "learning_rate": 4.1547209960216606e-05, "loss": 2.3944, "step": 3413500 }, { "epoch": 16.91, "learning_rate": 4.154597137379052e-05, "loss": 2.3647, "step": 3414000 }, { "epoch": 16.92, "learning_rate": 4.154473278736444e-05, "loss": 2.3501, "step": 3414500 }, { "epoch": 16.92, "learning_rate": 4.1543494200938356e-05, "loss": 2.3941, "step": 3415000 }, { "epoch": 16.92, "learning_rate": 4.154225561451227e-05, "loss": 2.3809, "step": 3415500 }, { "epoch": 16.92, "learning_rate": 4.154101702808619e-05, "loss": 2.3663, "step": 3416000 }, { "epoch": 16.93, "learning_rate": 4.153978091883295e-05, "loss": 2.3529, "step": 3416500 }, { "epoch": 16.93, "learning_rate": 4.153854233240687e-05, "loss": 2.3712, "step": 3417000 }, { "epoch": 16.93, "learning_rate": 4.1537306223153645e-05, "loss": 2.3917, "step": 3417500 }, { "epoch": 16.93, "learning_rate": 4.153606763672756e-05, "loss": 2.3671, "step": 3418000 }, { "epoch": 16.94, "learning_rate": 4.153482905030147e-05, "loss": 2.3691, "step": 3418500 }, { "epoch": 16.94, "learning_rate": 4.153359046387539e-05, "loss": 2.3647, "step": 3419000 }, { "epoch": 16.94, "learning_rate": 4.1532351877449306e-05, "loss": 2.3684, "step": 3419500 }, { "epoch": 16.94, "learning_rate": 4.153111329102322e-05, "loss": 2.3566, "step": 3420000 }, { "epoch": 16.95, "learning_rate": 4.152987718176999e-05, "loss": 2.3672, "step": 3420500 }, { "epoch": 16.95, "learning_rate": 4.152863859534391e-05, "loss": 2.383, "step": 3421000 }, { "epoch": 16.95, "learning_rate": 4.152740248609068e-05, "loss": 2.3671, "step": 3421500 }, { "epoch": 16.95, "learning_rate": 4.1526163899664594e-05, "loss": 2.3578, "step": 3422000 }, { "epoch": 16.96, "learning_rate": 4.152492531323851e-05, "loss": 2.3837, "step": 3422500 }, { "epoch": 16.96, "learning_rate": 4.152368672681243e-05, "loss": 2.3824, "step": 3423000 }, { "epoch": 16.96, "learning_rate": 4.1522448140386345e-05, "loss": 2.3624, "step": 3423500 }, { "epoch": 16.96, "learning_rate": 4.152120955396026e-05, "loss": 2.3848, "step": 3424000 }, { "epoch": 16.97, "learning_rate": 4.151997096753417e-05, "loss": 2.379, "step": 3424500 }, { "epoch": 16.97, "learning_rate": 4.151873238110809e-05, "loss": 2.3548, "step": 3425000 }, { "epoch": 16.97, "learning_rate": 4.1517493794682006e-05, "loss": 2.363, "step": 3425500 }, { "epoch": 16.97, "learning_rate": 4.151625520825592e-05, "loss": 2.3791, "step": 3426000 }, { "epoch": 16.98, "learning_rate": 4.151501662182984e-05, "loss": 2.3776, "step": 3426500 }, { "epoch": 16.98, "learning_rate": 4.151378051257661e-05, "loss": 2.3708, "step": 3427000 }, { "epoch": 16.98, "learning_rate": 4.1512541926150525e-05, "loss": 2.3777, "step": 3427500 }, { "epoch": 16.98, "learning_rate": 4.1511303339724436e-05, "loss": 2.3577, "step": 3428000 }, { "epoch": 16.99, "learning_rate": 4.151006475329835e-05, "loss": 2.3726, "step": 3428500 }, { "epoch": 16.99, "learning_rate": 4.150882616687227e-05, "loss": 2.393, "step": 3429000 }, { "epoch": 16.99, "learning_rate": 4.1507590057619045e-05, "loss": 2.356, "step": 3429500 }, { "epoch": 16.99, "learning_rate": 4.150635147119296e-05, "loss": 2.3563, "step": 3430000 }, { "epoch": 17.0, "learning_rate": 4.150511288476688e-05, "loss": 2.3842, "step": 3430500 }, { "epoch": 17.0, "learning_rate": 4.150387429834079e-05, "loss": 2.3678, "step": 3431000 }, { "epoch": 17.0, "eval_accuracy": 0.6498075948776978, "eval_accuracy_mlm": 0.6043656048690601, "eval_accuracy_nsp": 0.8643428943477186, "eval_loss": 2.376680374145508, "eval_runtime": 145.8417, "eval_samples_per_second": 1748.189, "eval_steps_per_second": 72.846, "step": 3431331 }, { "epoch": 17.0, "learning_rate": 4.1502638189087565e-05, "loss": 2.3765, "step": 3431500 }, { "epoch": 17.0, "learning_rate": 4.150139960266148e-05, "loss": 2.3414, "step": 3432000 }, { "epoch": 17.01, "learning_rate": 4.150016101623539e-05, "loss": 2.3531, "step": 3432500 }, { "epoch": 17.01, "learning_rate": 4.149892242980931e-05, "loss": 2.3628, "step": 3433000 }, { "epoch": 17.01, "learning_rate": 4.1497683843383226e-05, "loss": 2.3441, "step": 3433500 }, { "epoch": 17.01, "learning_rate": 4.149644525695714e-05, "loss": 2.3688, "step": 3434000 }, { "epoch": 17.02, "learning_rate": 4.149520667053106e-05, "loss": 2.3296, "step": 3434500 }, { "epoch": 17.02, "learning_rate": 4.149396808410497e-05, "loss": 2.3295, "step": 3435000 }, { "epoch": 17.02, "learning_rate": 4.1492729497678886e-05, "loss": 2.3542, "step": 3435500 }, { "epoch": 17.02, "learning_rate": 4.1491490911252803e-05, "loss": 2.3233, "step": 3436000 }, { "epoch": 17.03, "learning_rate": 4.149025232482672e-05, "loss": 2.3492, "step": 3436500 }, { "epoch": 17.03, "learning_rate": 4.148901373840064e-05, "loss": 2.341, "step": 3437000 }, { "epoch": 17.03, "learning_rate": 4.1487775151974554e-05, "loss": 2.356, "step": 3437500 }, { "epoch": 17.03, "learning_rate": 4.148653656554847e-05, "loss": 2.3461, "step": 3438000 }, { "epoch": 17.04, "learning_rate": 4.148529797912239e-05, "loss": 2.3431, "step": 3438500 }, { "epoch": 17.04, "learning_rate": 4.148406186986916e-05, "loss": 2.3379, "step": 3439000 }, { "epoch": 17.04, "learning_rate": 4.1482823283443074e-05, "loss": 2.3413, "step": 3439500 }, { "epoch": 17.04, "learning_rate": 4.148158469701699e-05, "loss": 2.3506, "step": 3440000 }, { "epoch": 17.05, "learning_rate": 4.148034858776376e-05, "loss": 2.3299, "step": 3440500 }, { "epoch": 17.05, "learning_rate": 4.1479110001337676e-05, "loss": 2.3431, "step": 3441000 }, { "epoch": 17.05, "learning_rate": 4.147787141491159e-05, "loss": 2.3623, "step": 3441500 }, { "epoch": 17.05, "learning_rate": 4.1476632828485504e-05, "loss": 2.3722, "step": 3442000 }, { "epoch": 17.06, "learning_rate": 4.147539424205942e-05, "loss": 2.3552, "step": 3442500 }, { "epoch": 17.06, "learning_rate": 4.147415565563334e-05, "loss": 2.3468, "step": 3443000 }, { "epoch": 17.06, "learning_rate": 4.1472917069207254e-05, "loss": 2.3322, "step": 3443500 }, { "epoch": 17.06, "learning_rate": 4.147167848278117e-05, "loss": 2.3492, "step": 3444000 }, { "epoch": 17.07, "learning_rate": 4.147044237352794e-05, "loss": 2.3549, "step": 3444500 }, { "epoch": 17.07, "learning_rate": 4.146920378710186e-05, "loss": 2.373, "step": 3445000 }, { "epoch": 17.07, "learning_rate": 4.1467965200675774e-05, "loss": 2.3629, "step": 3445500 }, { "epoch": 17.07, "learning_rate": 4.146672909142254e-05, "loss": 2.3718, "step": 3446000 }, { "epoch": 17.08, "learning_rate": 4.146549050499646e-05, "loss": 2.3301, "step": 3446500 }, { "epoch": 17.08, "learning_rate": 4.1464251918570377e-05, "loss": 2.3385, "step": 3447000 }, { "epoch": 17.08, "learning_rate": 4.1463013332144294e-05, "loss": 2.3374, "step": 3447500 }, { "epoch": 17.08, "learning_rate": 4.146177474571821e-05, "loss": 2.3562, "step": 3448000 }, { "epoch": 17.09, "learning_rate": 4.146053615929212e-05, "loss": 2.3514, "step": 3448500 }, { "epoch": 17.09, "learning_rate": 4.145929757286604e-05, "loss": 2.3568, "step": 3449000 }, { "epoch": 17.09, "learning_rate": 4.1458058986439954e-05, "loss": 2.3525, "step": 3449500 }, { "epoch": 17.09, "learning_rate": 4.145682287718672e-05, "loss": 2.3269, "step": 3450000 }, { "epoch": 17.09, "learning_rate": 4.145558429076064e-05, "loss": 2.3618, "step": 3450500 }, { "epoch": 17.1, "learning_rate": 4.145434570433456e-05, "loss": 2.3438, "step": 3451000 }, { "epoch": 17.1, "learning_rate": 4.1453107117908474e-05, "loss": 2.341, "step": 3451500 }, { "epoch": 17.1, "learning_rate": 4.145186853148239e-05, "loss": 2.3567, "step": 3452000 }, { "epoch": 17.1, "learning_rate": 4.145063489940201e-05, "loss": 2.3433, "step": 3452500 }, { "epoch": 17.11, "learning_rate": 4.144939631297593e-05, "loss": 2.3524, "step": 3453000 }, { "epoch": 17.11, "learning_rate": 4.1448157726549846e-05, "loss": 2.3444, "step": 3453500 }, { "epoch": 17.11, "learning_rate": 4.144691914012376e-05, "loss": 2.3531, "step": 3454000 }, { "epoch": 17.11, "learning_rate": 4.144568055369768e-05, "loss": 2.3459, "step": 3454500 }, { "epoch": 17.12, "learning_rate": 4.1444441967271596e-05, "loss": 2.35, "step": 3455000 }, { "epoch": 17.12, "learning_rate": 4.144320338084551e-05, "loss": 2.338, "step": 3455500 }, { "epoch": 17.12, "learning_rate": 4.1441964794419423e-05, "loss": 2.3489, "step": 3456000 }, { "epoch": 17.12, "learning_rate": 4.14407286851662e-05, "loss": 2.3561, "step": 3456500 }, { "epoch": 17.13, "learning_rate": 4.1439490098740116e-05, "loss": 2.3618, "step": 3457000 }, { "epoch": 17.13, "learning_rate": 4.143825151231403e-05, "loss": 2.3614, "step": 3457500 }, { "epoch": 17.13, "learning_rate": 4.143701292588795e-05, "loss": 2.3375, "step": 3458000 }, { "epoch": 17.13, "learning_rate": 4.143577433946187e-05, "loss": 2.3632, "step": 3458500 }, { "epoch": 17.14, "learning_rate": 4.143453823020863e-05, "loss": 2.341, "step": 3459000 }, { "epoch": 17.14, "learning_rate": 4.1433299643782546e-05, "loss": 2.3918, "step": 3459500 }, { "epoch": 17.14, "learning_rate": 4.143206105735646e-05, "loss": 2.3394, "step": 3460000 }, { "epoch": 17.14, "learning_rate": 4.143082247093038e-05, "loss": 2.3564, "step": 3460500 }, { "epoch": 17.15, "learning_rate": 4.1429583884504296e-05, "loss": 2.351, "step": 3461000 }, { "epoch": 17.15, "learning_rate": 4.142834529807821e-05, "loss": 2.3515, "step": 3461500 }, { "epoch": 17.15, "learning_rate": 4.142710918882498e-05, "loss": 2.343, "step": 3462000 }, { "epoch": 17.15, "learning_rate": 4.14258706023989e-05, "loss": 2.3226, "step": 3462500 }, { "epoch": 17.16, "learning_rate": 4.1424632015972816e-05, "loss": 2.35, "step": 3463000 }, { "epoch": 17.16, "learning_rate": 4.142339342954673e-05, "loss": 2.3612, "step": 3463500 }, { "epoch": 17.16, "learning_rate": 4.142215484312065e-05, "loss": 2.3746, "step": 3464000 }, { "epoch": 17.16, "learning_rate": 4.142091625669457e-05, "loss": 2.3619, "step": 3464500 }, { "epoch": 17.17, "learning_rate": 4.141967767026848e-05, "loss": 2.3721, "step": 3465000 }, { "epoch": 17.17, "learning_rate": 4.1418439083842394e-05, "loss": 2.3639, "step": 3465500 }, { "epoch": 17.17, "learning_rate": 4.1417205451762015e-05, "loss": 2.343, "step": 3466000 }, { "epoch": 17.17, "learning_rate": 4.141596686533593e-05, "loss": 2.3616, "step": 3466500 }, { "epoch": 17.18, "learning_rate": 4.141472827890985e-05, "loss": 2.3572, "step": 3467000 }, { "epoch": 17.18, "learning_rate": 4.1413489692483765e-05, "loss": 2.374, "step": 3467500 }, { "epoch": 17.18, "learning_rate": 4.1412253583230534e-05, "loss": 2.3437, "step": 3468000 }, { "epoch": 17.18, "learning_rate": 4.141101499680445e-05, "loss": 2.3703, "step": 3468500 }, { "epoch": 17.19, "learning_rate": 4.140977641037837e-05, "loss": 2.3546, "step": 3469000 }, { "epoch": 17.19, "learning_rate": 4.140853782395228e-05, "loss": 2.3631, "step": 3469500 }, { "epoch": 17.19, "learning_rate": 4.140730171469905e-05, "loss": 2.3685, "step": 3470000 }, { "epoch": 17.19, "learning_rate": 4.1406063128272964e-05, "loss": 2.3637, "step": 3470500 }, { "epoch": 17.2, "learning_rate": 4.140482454184688e-05, "loss": 2.3558, "step": 3471000 }, { "epoch": 17.2, "learning_rate": 4.14035859554208e-05, "loss": 2.3688, "step": 3471500 }, { "epoch": 17.2, "learning_rate": 4.1402347368994715e-05, "loss": 2.343, "step": 3472000 }, { "epoch": 17.2, "learning_rate": 4.140110878256863e-05, "loss": 2.364, "step": 3472500 }, { "epoch": 17.21, "learning_rate": 4.139987019614255e-05, "loss": 2.3704, "step": 3473000 }, { "epoch": 17.21, "learning_rate": 4.1398631609716465e-05, "loss": 2.3474, "step": 3473500 }, { "epoch": 17.21, "learning_rate": 4.1397395500463234e-05, "loss": 2.3439, "step": 3474000 }, { "epoch": 17.21, "learning_rate": 4.139615691403715e-05, "loss": 2.3453, "step": 3474500 }, { "epoch": 17.22, "learning_rate": 4.139491832761107e-05, "loss": 2.3463, "step": 3475000 }, { "epoch": 17.22, "learning_rate": 4.1393679741184985e-05, "loss": 2.3682, "step": 3475500 }, { "epoch": 17.22, "learning_rate": 4.1392441154758895e-05, "loss": 2.3424, "step": 3476000 }, { "epoch": 17.22, "learning_rate": 4.139120256833281e-05, "loss": 2.3605, "step": 3476500 }, { "epoch": 17.23, "learning_rate": 4.138996398190673e-05, "loss": 2.3513, "step": 3477000 }, { "epoch": 17.23, "learning_rate": 4.13887278726535e-05, "loss": 2.3756, "step": 3477500 }, { "epoch": 17.23, "learning_rate": 4.1387489286227415e-05, "loss": 2.3382, "step": 3478000 }, { "epoch": 17.23, "learning_rate": 4.138625069980133e-05, "loss": 2.3326, "step": 3478500 }, { "epoch": 17.24, "learning_rate": 4.138501211337525e-05, "loss": 2.3489, "step": 3479000 }, { "epoch": 17.24, "learning_rate": 4.1383773526949166e-05, "loss": 2.3513, "step": 3479500 }, { "epoch": 17.24, "learning_rate": 4.1382537417695934e-05, "loss": 2.3649, "step": 3480000 }, { "epoch": 17.24, "learning_rate": 4.13813013084427e-05, "loss": 2.3805, "step": 3480500 }, { "epoch": 17.25, "learning_rate": 4.138006272201662e-05, "loss": 2.3487, "step": 3481000 }, { "epoch": 17.25, "learning_rate": 4.137882413559053e-05, "loss": 2.3675, "step": 3481500 }, { "epoch": 17.25, "learning_rate": 4.137758554916445e-05, "loss": 2.3361, "step": 3482000 }, { "epoch": 17.25, "learning_rate": 4.1376346962738364e-05, "loss": 2.3631, "step": 3482500 }, { "epoch": 17.26, "learning_rate": 4.137510837631228e-05, "loss": 2.3593, "step": 3483000 }, { "epoch": 17.26, "learning_rate": 4.13738697898862e-05, "loss": 2.3565, "step": 3483500 }, { "epoch": 17.26, "learning_rate": 4.1372631203460115e-05, "loss": 2.353, "step": 3484000 }, { "epoch": 17.26, "learning_rate": 4.137139261703403e-05, "loss": 2.3434, "step": 3484500 }, { "epoch": 17.27, "learning_rate": 4.137015403060795e-05, "loss": 2.3584, "step": 3485000 }, { "epoch": 17.27, "learning_rate": 4.1368915444181866e-05, "loss": 2.3592, "step": 3485500 }, { "epoch": 17.27, "learning_rate": 4.136767685775578e-05, "loss": 2.3616, "step": 3486000 }, { "epoch": 17.27, "learning_rate": 4.13664382713297e-05, "loss": 2.361, "step": 3486500 }, { "epoch": 17.28, "learning_rate": 4.1365199684903617e-05, "loss": 2.3554, "step": 3487000 }, { "epoch": 17.28, "learning_rate": 4.1363961098477533e-05, "loss": 2.3607, "step": 3487500 }, { "epoch": 17.28, "learning_rate": 4.136272251205145e-05, "loss": 2.3504, "step": 3488000 }, { "epoch": 17.28, "learning_rate": 4.136148640279822e-05, "loss": 2.3489, "step": 3488500 }, { "epoch": 17.29, "learning_rate": 4.1360247816372136e-05, "loss": 2.3515, "step": 3489000 }, { "epoch": 17.29, "learning_rate": 4.1359009229946046e-05, "loss": 2.3622, "step": 3489500 }, { "epoch": 17.29, "learning_rate": 4.1357773120692815e-05, "loss": 2.366, "step": 3490000 }, { "epoch": 17.29, "learning_rate": 4.135653453426673e-05, "loss": 2.3397, "step": 3490500 }, { "epoch": 17.3, "learning_rate": 4.135529594784065e-05, "loss": 2.3427, "step": 3491000 }, { "epoch": 17.3, "learning_rate": 4.1354057361414566e-05, "loss": 2.3803, "step": 3491500 }, { "epoch": 17.3, "learning_rate": 4.135281877498848e-05, "loss": 2.3359, "step": 3492000 }, { "epoch": 17.3, "learning_rate": 4.13515801885624e-05, "loss": 2.3294, "step": 3492500 }, { "epoch": 17.31, "learning_rate": 4.135034160213632e-05, "loss": 2.3517, "step": 3493000 }, { "epoch": 17.31, "learning_rate": 4.1349103015710234e-05, "loss": 2.3769, "step": 3493500 }, { "epoch": 17.31, "learning_rate": 4.134786442928415e-05, "loss": 2.388, "step": 3494000 }, { "epoch": 17.31, "learning_rate": 4.134662584285807e-05, "loss": 2.3651, "step": 3494500 }, { "epoch": 17.32, "learning_rate": 4.1345387256431984e-05, "loss": 2.3778, "step": 3495000 }, { "epoch": 17.32, "learning_rate": 4.13441486700059e-05, "loss": 2.3529, "step": 3495500 }, { "epoch": 17.32, "learning_rate": 4.134291008357982e-05, "loss": 2.3574, "step": 3496000 }, { "epoch": 17.32, "learning_rate": 4.134167149715373e-05, "loss": 2.3382, "step": 3496500 }, { "epoch": 17.33, "learning_rate": 4.13404353879005e-05, "loss": 2.3456, "step": 3497000 }, { "epoch": 17.33, "learning_rate": 4.1339196801474414e-05, "loss": 2.3635, "step": 3497500 }, { "epoch": 17.33, "learning_rate": 4.133795821504833e-05, "loss": 2.3514, "step": 3498000 }, { "epoch": 17.33, "learning_rate": 4.133671962862225e-05, "loss": 2.352, "step": 3498500 }, { "epoch": 17.34, "learning_rate": 4.1335481042196165e-05, "loss": 2.3373, "step": 3499000 }, { "epoch": 17.34, "learning_rate": 4.1334242455770075e-05, "loss": 2.3498, "step": 3499500 }, { "epoch": 17.34, "learning_rate": 4.133300386934399e-05, "loss": 2.3483, "step": 3500000 }, { "epoch": 17.34, "learning_rate": 4.133176528291791e-05, "loss": 2.3468, "step": 3500500 }, { "epoch": 17.35, "learning_rate": 4.1330526696491826e-05, "loss": 2.3388, "step": 3501000 }, { "epoch": 17.35, "learning_rate": 4.132928811006574e-05, "loss": 2.3492, "step": 3501500 }, { "epoch": 17.35, "learning_rate": 4.132805200081252e-05, "loss": 2.3273, "step": 3502000 }, { "epoch": 17.35, "learning_rate": 4.132681341438643e-05, "loss": 2.3493, "step": 3502500 }, { "epoch": 17.36, "learning_rate": 4.132557978230605e-05, "loss": 2.3476, "step": 3503000 }, { "epoch": 17.36, "learning_rate": 4.1324341195879966e-05, "loss": 2.3567, "step": 3503500 }, { "epoch": 17.36, "learning_rate": 4.1323105086626735e-05, "loss": 2.3562, "step": 3504000 }, { "epoch": 17.36, "learning_rate": 4.132186650020065e-05, "loss": 2.3567, "step": 3504500 }, { "epoch": 17.36, "learning_rate": 4.132062791377457e-05, "loss": 2.3373, "step": 3505000 }, { "epoch": 17.37, "learning_rate": 4.1319389327348486e-05, "loss": 2.3705, "step": 3505500 }, { "epoch": 17.37, "learning_rate": 4.13181507409224e-05, "loss": 2.36, "step": 3506000 }, { "epoch": 17.37, "learning_rate": 4.131691215449632e-05, "loss": 2.3511, "step": 3506500 }, { "epoch": 17.37, "learning_rate": 4.1315673568070236e-05, "loss": 2.3651, "step": 3507000 }, { "epoch": 17.38, "learning_rate": 4.1314434981644153e-05, "loss": 2.358, "step": 3507500 }, { "epoch": 17.38, "learning_rate": 4.1313198872390915e-05, "loss": 2.3668, "step": 3508000 }, { "epoch": 17.38, "learning_rate": 4.131196028596483e-05, "loss": 2.3245, "step": 3508500 }, { "epoch": 17.38, "learning_rate": 4.131072169953875e-05, "loss": 2.3623, "step": 3509000 }, { "epoch": 17.39, "learning_rate": 4.1309483113112666e-05, "loss": 2.3565, "step": 3509500 }, { "epoch": 17.39, "learning_rate": 4.130824452668658e-05, "loss": 2.3437, "step": 3510000 }, { "epoch": 17.39, "learning_rate": 4.13070059402605e-05, "loss": 2.3466, "step": 3510500 }, { "epoch": 17.39, "learning_rate": 4.130576735383442e-05, "loss": 2.383, "step": 3511000 }, { "epoch": 17.4, "learning_rate": 4.1304528767408334e-05, "loss": 2.3689, "step": 3511500 }, { "epoch": 17.4, "learning_rate": 4.130329018098225e-05, "loss": 2.3632, "step": 3512000 }, { "epoch": 17.4, "learning_rate": 4.130205159455617e-05, "loss": 2.3213, "step": 3512500 }, { "epoch": 17.4, "learning_rate": 4.1300813008130085e-05, "loss": 2.3577, "step": 3513000 }, { "epoch": 17.41, "learning_rate": 4.1299576898876854e-05, "loss": 2.3611, "step": 3513500 }, { "epoch": 17.41, "learning_rate": 4.129833831245077e-05, "loss": 2.3616, "step": 3514000 }, { "epoch": 17.41, "learning_rate": 4.129709972602469e-05, "loss": 2.3532, "step": 3514500 }, { "epoch": 17.41, "learning_rate": 4.1295861139598604e-05, "loss": 2.3241, "step": 3515000 }, { "epoch": 17.42, "learning_rate": 4.129462255317252e-05, "loss": 2.3725, "step": 3515500 }, { "epoch": 17.42, "learning_rate": 4.129338396674644e-05, "loss": 2.3333, "step": 3516000 }, { "epoch": 17.42, "learning_rate": 4.1292145380320355e-05, "loss": 2.3379, "step": 3516500 }, { "epoch": 17.42, "learning_rate": 4.129090927106712e-05, "loss": 2.3909, "step": 3517000 }, { "epoch": 17.43, "learning_rate": 4.1289670684641034e-05, "loss": 2.3664, "step": 3517500 }, { "epoch": 17.43, "learning_rate": 4.128843209821495e-05, "loss": 2.3661, "step": 3518000 }, { "epoch": 17.43, "learning_rate": 4.128719351178887e-05, "loss": 2.3586, "step": 3518500 }, { "epoch": 17.43, "learning_rate": 4.1285954925362785e-05, "loss": 2.3723, "step": 3519000 }, { "epoch": 17.44, "learning_rate": 4.12847163389367e-05, "loss": 2.3486, "step": 3519500 }, { "epoch": 17.44, "learning_rate": 4.128347775251062e-05, "loss": 2.3571, "step": 3520000 }, { "epoch": 17.44, "learning_rate": 4.128224164325739e-05, "loss": 2.3583, "step": 3520500 }, { "epoch": 17.44, "learning_rate": 4.1281003056831304e-05, "loss": 2.4005, "step": 3521000 }, { "epoch": 17.45, "learning_rate": 4.127976447040522e-05, "loss": 2.3775, "step": 3521500 }, { "epoch": 17.45, "learning_rate": 4.127852588397914e-05, "loss": 2.3687, "step": 3522000 }, { "epoch": 17.45, "learning_rate": 4.1277287297553055e-05, "loss": 2.3569, "step": 3522500 }, { "epoch": 17.45, "learning_rate": 4.127605118829982e-05, "loss": 2.3676, "step": 3523000 }, { "epoch": 17.46, "learning_rate": 4.1274815079046586e-05, "loss": 2.3535, "step": 3523500 }, { "epoch": 17.46, "learning_rate": 4.12735764926205e-05, "loss": 2.368, "step": 3524000 }, { "epoch": 17.46, "learning_rate": 4.127233790619442e-05, "loss": 2.3668, "step": 3524500 }, { "epoch": 17.46, "learning_rate": 4.127109931976834e-05, "loss": 2.3525, "step": 3525000 }, { "epoch": 17.47, "learning_rate": 4.1269860733342254e-05, "loss": 2.3639, "step": 3525500 }, { "epoch": 17.47, "learning_rate": 4.126862214691617e-05, "loss": 2.3634, "step": 3526000 }, { "epoch": 17.47, "learning_rate": 4.126738603766294e-05, "loss": 2.3497, "step": 3526500 }, { "epoch": 17.47, "learning_rate": 4.126614992840971e-05, "loss": 2.3586, "step": 3527000 }, { "epoch": 17.48, "learning_rate": 4.1264911341983625e-05, "loss": 2.3447, "step": 3527500 }, { "epoch": 17.48, "learning_rate": 4.126367275555754e-05, "loss": 2.3595, "step": 3528000 }, { "epoch": 17.48, "learning_rate": 4.126243416913145e-05, "loss": 2.3406, "step": 3528500 }, { "epoch": 17.48, "learning_rate": 4.126119805987823e-05, "loss": 2.3242, "step": 3529000 }, { "epoch": 17.49, "learning_rate": 4.1259959473452145e-05, "loss": 2.3484, "step": 3529500 }, { "epoch": 17.49, "learning_rate": 4.125872336419891e-05, "loss": 2.3547, "step": 3530000 }, { "epoch": 17.49, "learning_rate": 4.1257484777772824e-05, "loss": 2.3682, "step": 3530500 }, { "epoch": 17.49, "learning_rate": 4.125624619134674e-05, "loss": 2.352, "step": 3531000 }, { "epoch": 17.5, "learning_rate": 4.125500760492066e-05, "loss": 2.3719, "step": 3531500 }, { "epoch": 17.5, "learning_rate": 4.1253769018494575e-05, "loss": 2.3395, "step": 3532000 }, { "epoch": 17.5, "learning_rate": 4.125253043206849e-05, "loss": 2.3595, "step": 3532500 }, { "epoch": 17.5, "learning_rate": 4.125129184564241e-05, "loss": 2.3707, "step": 3533000 }, { "epoch": 17.51, "learning_rate": 4.1250053259216325e-05, "loss": 2.3691, "step": 3533500 }, { "epoch": 17.51, "learning_rate": 4.124881467279024e-05, "loss": 2.3668, "step": 3534000 }, { "epoch": 17.51, "learning_rate": 4.124757608636415e-05, "loss": 2.3482, "step": 3534500 }, { "epoch": 17.51, "learning_rate": 4.124633749993807e-05, "loss": 2.3422, "step": 3535000 }, { "epoch": 17.52, "learning_rate": 4.1245098913511986e-05, "loss": 2.3515, "step": 3535500 }, { "epoch": 17.52, "learning_rate": 4.12438603270859e-05, "loss": 2.3495, "step": 3536000 }, { "epoch": 17.52, "learning_rate": 4.124262174065982e-05, "loss": 2.3579, "step": 3536500 }, { "epoch": 17.52, "learning_rate": 4.124138315423374e-05, "loss": 2.3655, "step": 3537000 }, { "epoch": 17.53, "learning_rate": 4.1240144567807654e-05, "loss": 2.3543, "step": 3537500 }, { "epoch": 17.53, "learning_rate": 4.123890598138157e-05, "loss": 2.3707, "step": 3538000 }, { "epoch": 17.53, "learning_rate": 4.123766987212834e-05, "loss": 2.3427, "step": 3538500 }, { "epoch": 17.53, "learning_rate": 4.123643128570226e-05, "loss": 2.3651, "step": 3539000 }, { "epoch": 17.54, "learning_rate": 4.1235192699276174e-05, "loss": 2.3747, "step": 3539500 }, { "epoch": 17.54, "learning_rate": 4.123395411285009e-05, "loss": 2.3532, "step": 3540000 }, { "epoch": 17.54, "learning_rate": 4.1232715526424e-05, "loss": 2.3638, "step": 3540500 }, { "epoch": 17.54, "learning_rate": 4.123147941717077e-05, "loss": 2.3649, "step": 3541000 }, { "epoch": 17.55, "learning_rate": 4.1230240830744686e-05, "loss": 2.3551, "step": 3541500 }, { "epoch": 17.55, "learning_rate": 4.12290022443186e-05, "loss": 2.3676, "step": 3542000 }, { "epoch": 17.55, "learning_rate": 4.122776365789252e-05, "loss": 2.3658, "step": 3542500 }, { "epoch": 17.55, "learning_rate": 4.122652507146644e-05, "loss": 2.3553, "step": 3543000 }, { "epoch": 17.56, "learning_rate": 4.1225286485040354e-05, "loss": 2.3745, "step": 3543500 }, { "epoch": 17.56, "learning_rate": 4.122405037578712e-05, "loss": 2.3211, "step": 3544000 }, { "epoch": 17.56, "learning_rate": 4.122281178936104e-05, "loss": 2.3367, "step": 3544500 }, { "epoch": 17.56, "learning_rate": 4.122157320293496e-05, "loss": 2.3815, "step": 3545000 }, { "epoch": 17.57, "learning_rate": 4.1220334616508874e-05, "loss": 2.3569, "step": 3545500 }, { "epoch": 17.57, "learning_rate": 4.121909603008279e-05, "loss": 2.3516, "step": 3546000 }, { "epoch": 17.57, "learning_rate": 4.121785992082956e-05, "loss": 2.3911, "step": 3546500 }, { "epoch": 17.57, "learning_rate": 4.121662133440347e-05, "loss": 2.3477, "step": 3547000 }, { "epoch": 17.58, "learning_rate": 4.1215382747977387e-05, "loss": 2.3665, "step": 3547500 }, { "epoch": 17.58, "learning_rate": 4.1214144161551304e-05, "loss": 2.3727, "step": 3548000 }, { "epoch": 17.58, "learning_rate": 4.121290557512522e-05, "loss": 2.3339, "step": 3548500 }, { "epoch": 17.58, "learning_rate": 4.121166698869914e-05, "loss": 2.391, "step": 3549000 }, { "epoch": 17.59, "learning_rate": 4.1210428402273054e-05, "loss": 2.3653, "step": 3549500 }, { "epoch": 17.59, "learning_rate": 4.120918981584697e-05, "loss": 2.3585, "step": 3550000 }, { "epoch": 17.59, "learning_rate": 4.120795122942089e-05, "loss": 2.3839, "step": 3550500 }, { "epoch": 17.59, "learning_rate": 4.1206712642994805e-05, "loss": 2.3457, "step": 3551000 }, { "epoch": 17.6, "learning_rate": 4.120547405656872e-05, "loss": 2.3383, "step": 3551500 }, { "epoch": 17.6, "learning_rate": 4.120423547014264e-05, "loss": 2.3665, "step": 3552000 }, { "epoch": 17.6, "learning_rate": 4.1202996883716556e-05, "loss": 2.3762, "step": 3552500 }, { "epoch": 17.6, "learning_rate": 4.120175829729047e-05, "loss": 2.364, "step": 3553000 }, { "epoch": 17.61, "learning_rate": 4.120051971086439e-05, "loss": 2.3739, "step": 3553500 }, { "epoch": 17.61, "learning_rate": 4.119928360161115e-05, "loss": 2.3595, "step": 3554000 }, { "epoch": 17.61, "learning_rate": 4.119804501518507e-05, "loss": 2.3607, "step": 3554500 }, { "epoch": 17.61, "learning_rate": 4.1196806428758986e-05, "loss": 2.3675, "step": 3555000 }, { "epoch": 17.62, "learning_rate": 4.11955678423329e-05, "loss": 2.3651, "step": 3555500 }, { "epoch": 17.62, "learning_rate": 4.119432925590682e-05, "loss": 2.3478, "step": 3556000 }, { "epoch": 17.62, "learning_rate": 4.1193090669480736e-05, "loss": 2.3354, "step": 3556500 }, { "epoch": 17.62, "learning_rate": 4.119185208305465e-05, "loss": 2.3537, "step": 3557000 }, { "epoch": 17.63, "learning_rate": 4.1190618450974274e-05, "loss": 2.3613, "step": 3557500 }, { "epoch": 17.63, "learning_rate": 4.118937986454819e-05, "loss": 2.3725, "step": 3558000 }, { "epoch": 17.63, "learning_rate": 4.118814127812211e-05, "loss": 2.3538, "step": 3558500 }, { "epoch": 17.63, "learning_rate": 4.1186902691696025e-05, "loss": 2.3833, "step": 3559000 }, { "epoch": 17.63, "learning_rate": 4.118566410526994e-05, "loss": 2.3447, "step": 3559500 }, { "epoch": 17.64, "learning_rate": 4.118442551884386e-05, "loss": 2.3642, "step": 3560000 }, { "epoch": 17.64, "learning_rate": 4.118318693241777e-05, "loss": 2.3562, "step": 3560500 }, { "epoch": 17.64, "learning_rate": 4.118195082316454e-05, "loss": 2.3684, "step": 3561000 }, { "epoch": 17.64, "learning_rate": 4.118071471391131e-05, "loss": 2.376, "step": 3561500 }, { "epoch": 17.65, "learning_rate": 4.117947612748523e-05, "loss": 2.3549, "step": 3562000 }, { "epoch": 17.65, "learning_rate": 4.117823754105914e-05, "loss": 2.3716, "step": 3562500 }, { "epoch": 17.65, "learning_rate": 4.117699895463306e-05, "loss": 2.3731, "step": 3563000 }, { "epoch": 17.65, "learning_rate": 4.1175760368206974e-05, "loss": 2.3784, "step": 3563500 }, { "epoch": 17.66, "learning_rate": 4.117452425895374e-05, "loss": 2.375, "step": 3564000 }, { "epoch": 17.66, "learning_rate": 4.117328567252766e-05, "loss": 2.3604, "step": 3564500 }, { "epoch": 17.66, "learning_rate": 4.117204708610158e-05, "loss": 2.3685, "step": 3565000 }, { "epoch": 17.66, "learning_rate": 4.1170808499675494e-05, "loss": 2.365, "step": 3565500 }, { "epoch": 17.67, "learning_rate": 4.1169569913249404e-05, "loss": 2.3495, "step": 3566000 }, { "epoch": 17.67, "learning_rate": 4.116833132682332e-05, "loss": 2.3564, "step": 3566500 }, { "epoch": 17.67, "learning_rate": 4.116709274039724e-05, "loss": 2.3713, "step": 3567000 }, { "epoch": 17.67, "learning_rate": 4.1165854153971155e-05, "loss": 2.3543, "step": 3567500 }, { "epoch": 17.68, "learning_rate": 4.116461556754507e-05, "loss": 2.3739, "step": 3568000 }, { "epoch": 17.68, "learning_rate": 4.116337945829185e-05, "loss": 2.3615, "step": 3568500 }, { "epoch": 17.68, "learning_rate": 4.116214087186576e-05, "loss": 2.3605, "step": 3569000 }, { "epoch": 17.68, "learning_rate": 4.1160902285439674e-05, "loss": 2.3517, "step": 3569500 }, { "epoch": 17.69, "learning_rate": 4.115966369901359e-05, "loss": 2.3571, "step": 3570000 }, { "epoch": 17.69, "learning_rate": 4.115842511258751e-05, "loss": 2.3669, "step": 3570500 }, { "epoch": 17.69, "learning_rate": 4.115718900333428e-05, "loss": 2.377, "step": 3571000 }, { "epoch": 17.69, "learning_rate": 4.1155950416908194e-05, "loss": 2.3674, "step": 3571500 }, { "epoch": 17.7, "learning_rate": 4.1154711830482104e-05, "loss": 2.3401, "step": 3572000 }, { "epoch": 17.7, "learning_rate": 4.115347572122888e-05, "loss": 2.3685, "step": 3572500 }, { "epoch": 17.7, "learning_rate": 4.1152237134802797e-05, "loss": 2.3623, "step": 3573000 }, { "epoch": 17.7, "learning_rate": 4.1150998548376713e-05, "loss": 2.3706, "step": 3573500 }, { "epoch": 17.71, "learning_rate": 4.114975996195063e-05, "loss": 2.3436, "step": 3574000 }, { "epoch": 17.71, "learning_rate": 4.114852137552455e-05, "loss": 2.386, "step": 3574500 }, { "epoch": 17.71, "learning_rate": 4.1147282789098464e-05, "loss": 2.3572, "step": 3575000 }, { "epoch": 17.71, "learning_rate": 4.1146044202672374e-05, "loss": 2.3548, "step": 3575500 }, { "epoch": 17.72, "learning_rate": 4.114480561624629e-05, "loss": 2.3364, "step": 3576000 }, { "epoch": 17.72, "learning_rate": 4.114356702982021e-05, "loss": 2.3401, "step": 3576500 }, { "epoch": 17.72, "learning_rate": 4.1142328443394125e-05, "loss": 2.3559, "step": 3577000 }, { "epoch": 17.72, "learning_rate": 4.1141094811313746e-05, "loss": 2.3307, "step": 3577500 }, { "epoch": 17.73, "learning_rate": 4.113985622488766e-05, "loss": 2.3549, "step": 3578000 }, { "epoch": 17.73, "learning_rate": 4.113861763846158e-05, "loss": 2.3539, "step": 3578500 }, { "epoch": 17.73, "learning_rate": 4.11373790520355e-05, "loss": 2.3734, "step": 3579000 }, { "epoch": 17.73, "learning_rate": 4.1136140465609414e-05, "loss": 2.3802, "step": 3579500 }, { "epoch": 17.74, "learning_rate": 4.113490187918333e-05, "loss": 2.3796, "step": 3580000 }, { "epoch": 17.74, "learning_rate": 4.113366329275725e-05, "loss": 2.3417, "step": 3580500 }, { "epoch": 17.74, "learning_rate": 4.1132424706331164e-05, "loss": 2.3862, "step": 3581000 }, { "epoch": 17.74, "learning_rate": 4.1131186119905075e-05, "loss": 2.3848, "step": 3581500 }, { "epoch": 17.75, "learning_rate": 4.112994753347899e-05, "loss": 2.3474, "step": 3582000 }, { "epoch": 17.75, "learning_rate": 4.112870894705291e-05, "loss": 2.3563, "step": 3582500 }, { "epoch": 17.75, "learning_rate": 4.1127470360626825e-05, "loss": 2.3501, "step": 3583000 }, { "epoch": 17.75, "learning_rate": 4.112623177420074e-05, "loss": 2.3553, "step": 3583500 }, { "epoch": 17.76, "learning_rate": 4.112499566494751e-05, "loss": 2.3721, "step": 3584000 }, { "epoch": 17.76, "learning_rate": 4.112375707852142e-05, "loss": 2.3584, "step": 3584500 }, { "epoch": 17.76, "learning_rate": 4.112251849209534e-05, "loss": 2.3551, "step": 3585000 }, { "epoch": 17.76, "learning_rate": 4.1121279905669255e-05, "loss": 2.3418, "step": 3585500 }, { "epoch": 17.77, "learning_rate": 4.112004379641603e-05, "loss": 2.3532, "step": 3586000 }, { "epoch": 17.77, "learning_rate": 4.111880520998995e-05, "loss": 2.3619, "step": 3586500 }, { "epoch": 17.77, "learning_rate": 4.1117566623563864e-05, "loss": 2.3691, "step": 3587000 }, { "epoch": 17.77, "learning_rate": 4.111632803713778e-05, "loss": 2.3613, "step": 3587500 }, { "epoch": 17.78, "learning_rate": 4.111508945071169e-05, "loss": 2.3628, "step": 3588000 }, { "epoch": 17.78, "learning_rate": 4.111385581863131e-05, "loss": 2.3733, "step": 3588500 }, { "epoch": 17.78, "learning_rate": 4.111261723220523e-05, "loss": 2.35, "step": 3589000 }, { "epoch": 17.78, "learning_rate": 4.1111378645779146e-05, "loss": 2.3595, "step": 3589500 }, { "epoch": 17.79, "learning_rate": 4.111014005935306e-05, "loss": 2.3707, "step": 3590000 }, { "epoch": 17.79, "learning_rate": 4.110890147292698e-05, "loss": 2.3446, "step": 3590500 }, { "epoch": 17.79, "learning_rate": 4.11076628865009e-05, "loss": 2.3831, "step": 3591000 }, { "epoch": 17.79, "learning_rate": 4.1106424300074814e-05, "loss": 2.3779, "step": 3591500 }, { "epoch": 17.8, "learning_rate": 4.110518571364873e-05, "loss": 2.3586, "step": 3592000 }, { "epoch": 17.8, "learning_rate": 4.11039496043955e-05, "loss": 2.3604, "step": 3592500 }, { "epoch": 17.8, "learning_rate": 4.1102711017969416e-05, "loss": 2.3887, "step": 3593000 }, { "epoch": 17.8, "learning_rate": 4.1101472431543333e-05, "loss": 2.3616, "step": 3593500 }, { "epoch": 17.81, "learning_rate": 4.110023384511725e-05, "loss": 2.3608, "step": 3594000 }, { "epoch": 17.81, "learning_rate": 4.109899773586401e-05, "loss": 2.3625, "step": 3594500 }, { "epoch": 17.81, "learning_rate": 4.109775914943793e-05, "loss": 2.3792, "step": 3595000 }, { "epoch": 17.81, "learning_rate": 4.1096520563011846e-05, "loss": 2.3811, "step": 3595500 }, { "epoch": 17.82, "learning_rate": 4.109528197658576e-05, "loss": 2.3449, "step": 3596000 }, { "epoch": 17.82, "learning_rate": 4.109404339015968e-05, "loss": 2.3525, "step": 3596500 }, { "epoch": 17.82, "learning_rate": 4.10928048037336e-05, "loss": 2.365, "step": 3597000 }, { "epoch": 17.82, "learning_rate": 4.1091568694480366e-05, "loss": 2.3526, "step": 3597500 }, { "epoch": 17.83, "learning_rate": 4.109033010805428e-05, "loss": 2.3738, "step": 3598000 }, { "epoch": 17.83, "learning_rate": 4.10890915216282e-05, "loss": 2.3432, "step": 3598500 }, { "epoch": 17.83, "learning_rate": 4.1087852935202117e-05, "loss": 2.3489, "step": 3599000 }, { "epoch": 17.83, "learning_rate": 4.1086614348776034e-05, "loss": 2.3774, "step": 3599500 }, { "epoch": 17.84, "learning_rate": 4.108537576234995e-05, "loss": 2.3676, "step": 3600000 }, { "epoch": 17.84, "learning_rate": 4.108413717592387e-05, "loss": 2.3776, "step": 3600500 }, { "epoch": 17.84, "learning_rate": 4.1082898589497784e-05, "loss": 2.3718, "step": 3601000 }, { "epoch": 17.84, "learning_rate": 4.10816649574174e-05, "loss": 2.3682, "step": 3601500 }, { "epoch": 17.85, "learning_rate": 4.1080426370991315e-05, "loss": 2.376, "step": 3602000 }, { "epoch": 17.85, "learning_rate": 4.107919026173809e-05, "loss": 2.3763, "step": 3602500 }, { "epoch": 17.85, "learning_rate": 4.1077951675312e-05, "loss": 2.3482, "step": 3603000 }, { "epoch": 17.85, "learning_rate": 4.107671308888592e-05, "loss": 2.3469, "step": 3603500 }, { "epoch": 17.86, "learning_rate": 4.1075474502459835e-05, "loss": 2.3561, "step": 3604000 }, { "epoch": 17.86, "learning_rate": 4.107423591603375e-05, "loss": 2.3922, "step": 3604500 }, { "epoch": 17.86, "learning_rate": 4.107299732960767e-05, "loss": 2.3394, "step": 3605000 }, { "epoch": 17.86, "learning_rate": 4.107175874318158e-05, "loss": 2.3683, "step": 3605500 }, { "epoch": 17.87, "learning_rate": 4.1070520156755496e-05, "loss": 2.3565, "step": 3606000 }, { "epoch": 17.87, "learning_rate": 4.106928157032941e-05, "loss": 2.3693, "step": 3606500 }, { "epoch": 17.87, "learning_rate": 4.106804298390333e-05, "loss": 2.3589, "step": 3607000 }, { "epoch": 17.87, "learning_rate": 4.1066804397477246e-05, "loss": 2.3629, "step": 3607500 }, { "epoch": 17.88, "learning_rate": 4.1065565811051163e-05, "loss": 2.3639, "step": 3608000 }, { "epoch": 17.88, "learning_rate": 4.106432970179793e-05, "loss": 2.3378, "step": 3608500 }, { "epoch": 17.88, "learning_rate": 4.106309111537185e-05, "loss": 2.3463, "step": 3609000 }, { "epoch": 17.88, "learning_rate": 4.1061855006118625e-05, "loss": 2.3587, "step": 3609500 }, { "epoch": 17.89, "learning_rate": 4.1060616419692535e-05, "loss": 2.3465, "step": 3610000 }, { "epoch": 17.89, "learning_rate": 4.105937783326645e-05, "loss": 2.3632, "step": 3610500 }, { "epoch": 17.89, "learning_rate": 4.105813924684037e-05, "loss": 2.3575, "step": 3611000 }, { "epoch": 17.89, "learning_rate": 4.1056900660414286e-05, "loss": 2.3853, "step": 3611500 }, { "epoch": 17.9, "learning_rate": 4.1055662073988196e-05, "loss": 2.3734, "step": 3612000 }, { "epoch": 17.9, "learning_rate": 4.105442348756211e-05, "loss": 2.3292, "step": 3612500 }, { "epoch": 17.9, "learning_rate": 4.105318490113603e-05, "loss": 2.3488, "step": 3613000 }, { "epoch": 17.9, "learning_rate": 4.10519487918828e-05, "loss": 2.3648, "step": 3613500 }, { "epoch": 17.91, "learning_rate": 4.1050710205456715e-05, "loss": 2.3435, "step": 3614000 }, { "epoch": 17.91, "learning_rate": 4.104947161903063e-05, "loss": 2.3709, "step": 3614500 }, { "epoch": 17.91, "learning_rate": 4.104823303260455e-05, "loss": 2.375, "step": 3615000 }, { "epoch": 17.91, "learning_rate": 4.1046994446178466e-05, "loss": 2.3707, "step": 3615500 }, { "epoch": 17.91, "learning_rate": 4.104575585975238e-05, "loss": 2.3493, "step": 3616000 }, { "epoch": 17.92, "learning_rate": 4.10445172733263e-05, "loss": 2.3454, "step": 3616500 }, { "epoch": 17.92, "learning_rate": 4.104327868690022e-05, "loss": 2.34, "step": 3617000 }, { "epoch": 17.92, "learning_rate": 4.1042042577646986e-05, "loss": 2.3497, "step": 3617500 }, { "epoch": 17.92, "learning_rate": 4.1040806468393755e-05, "loss": 2.3711, "step": 3618000 }, { "epoch": 17.93, "learning_rate": 4.103956788196767e-05, "loss": 2.3608, "step": 3618500 }, { "epoch": 17.93, "learning_rate": 4.103832929554159e-05, "loss": 2.3884, "step": 3619000 }, { "epoch": 17.93, "learning_rate": 4.10370907091155e-05, "loss": 2.3432, "step": 3619500 }, { "epoch": 17.93, "learning_rate": 4.1035852122689416e-05, "loss": 2.3545, "step": 3620000 }, { "epoch": 17.94, "learning_rate": 4.103462096778189e-05, "loss": 2.3426, "step": 3620500 }, { "epoch": 17.94, "learning_rate": 4.1033382381355805e-05, "loss": 2.3625, "step": 3621000 }, { "epoch": 17.94, "learning_rate": 4.103214379492972e-05, "loss": 2.3656, "step": 3621500 }, { "epoch": 17.94, "learning_rate": 4.103090520850364e-05, "loss": 2.3802, "step": 3622000 }, { "epoch": 17.95, "learning_rate": 4.1029666622077556e-05, "loss": 2.3537, "step": 3622500 }, { "epoch": 17.95, "learning_rate": 4.102842803565147e-05, "loss": 2.3691, "step": 3623000 }, { "epoch": 17.95, "learning_rate": 4.102718944922539e-05, "loss": 2.3754, "step": 3623500 }, { "epoch": 17.95, "learning_rate": 4.1025950862799307e-05, "loss": 2.3664, "step": 3624000 }, { "epoch": 17.96, "learning_rate": 4.1024712276373224e-05, "loss": 2.3663, "step": 3624500 }, { "epoch": 17.96, "learning_rate": 4.102347368994714e-05, "loss": 2.3661, "step": 3625000 }, { "epoch": 17.96, "learning_rate": 4.102223510352106e-05, "loss": 2.3567, "step": 3625500 }, { "epoch": 17.96, "learning_rate": 4.1020998994267826e-05, "loss": 2.3632, "step": 3626000 }, { "epoch": 17.97, "learning_rate": 4.1019760407841736e-05, "loss": 2.3703, "step": 3626500 }, { "epoch": 17.97, "learning_rate": 4.101852182141565e-05, "loss": 2.3692, "step": 3627000 }, { "epoch": 17.97, "learning_rate": 4.101728323498957e-05, "loss": 2.3708, "step": 3627500 }, { "epoch": 17.97, "learning_rate": 4.101604464856349e-05, "loss": 2.3665, "step": 3628000 }, { "epoch": 17.98, "learning_rate": 4.1014806062137404e-05, "loss": 2.3794, "step": 3628500 }, { "epoch": 17.98, "learning_rate": 4.101356747571132e-05, "loss": 2.3282, "step": 3629000 }, { "epoch": 17.98, "learning_rate": 4.101232888928524e-05, "loss": 2.3399, "step": 3629500 }, { "epoch": 17.98, "learning_rate": 4.1011090302859155e-05, "loss": 2.3481, "step": 3630000 }, { "epoch": 17.99, "learning_rate": 4.1009854193605924e-05, "loss": 2.3503, "step": 3630500 }, { "epoch": 17.99, "learning_rate": 4.100861560717984e-05, "loss": 2.3472, "step": 3631000 }, { "epoch": 17.99, "learning_rate": 4.100737702075376e-05, "loss": 2.3613, "step": 3631500 }, { "epoch": 17.99, "learning_rate": 4.1006140911500526e-05, "loss": 2.3675, "step": 3632000 }, { "epoch": 18.0, "learning_rate": 4.100490232507444e-05, "loss": 2.3705, "step": 3632500 }, { "epoch": 18.0, "learning_rate": 4.100366373864836e-05, "loss": 2.3473, "step": 3633000 }, { "epoch": 18.0, "eval_accuracy": 0.650584751161539, "eval_accuracy_mlm": 0.6049741663885951, "eval_accuracy_nsp": 0.8655666205154554, "eval_loss": 2.37389874458313, "eval_runtime": 145.8488, "eval_samples_per_second": 1748.105, "eval_steps_per_second": 72.843, "step": 3633174 }, { "epoch": 18.0, "learning_rate": 4.100242515222227e-05, "loss": 2.3426, "step": 3633500 }, { "epoch": 18.0, "learning_rate": 4.100118904296904e-05, "loss": 2.3235, "step": 3634000 }, { "epoch": 18.01, "learning_rate": 4.0999950456542956e-05, "loss": 2.3318, "step": 3634500 }, { "epoch": 18.01, "learning_rate": 4.099871187011687e-05, "loss": 2.3237, "step": 3635000 }, { "epoch": 18.01, "learning_rate": 4.099747328369079e-05, "loss": 2.3424, "step": 3635500 }, { "epoch": 18.01, "learning_rate": 4.099623469726471e-05, "loss": 2.3453, "step": 3636000 }, { "epoch": 18.02, "learning_rate": 4.0994996110838624e-05, "loss": 2.3344, "step": 3636500 }, { "epoch": 18.02, "learning_rate": 4.099375752441254e-05, "loss": 2.3098, "step": 3637000 }, { "epoch": 18.02, "learning_rate": 4.099251893798646e-05, "loss": 2.3288, "step": 3637500 }, { "epoch": 18.02, "learning_rate": 4.0991282828733226e-05, "loss": 2.3565, "step": 3638000 }, { "epoch": 18.03, "learning_rate": 4.099004424230714e-05, "loss": 2.3382, "step": 3638500 }, { "epoch": 18.03, "learning_rate": 4.098880565588106e-05, "loss": 2.3385, "step": 3639000 }, { "epoch": 18.03, "learning_rate": 4.098756954662782e-05, "loss": 2.3369, "step": 3639500 }, { "epoch": 18.03, "learning_rate": 4.098633096020174e-05, "loss": 2.3324, "step": 3640000 }, { "epoch": 18.04, "learning_rate": 4.0985092373775656e-05, "loss": 2.3385, "step": 3640500 }, { "epoch": 18.04, "learning_rate": 4.098385378734957e-05, "loss": 2.341, "step": 3641000 }, { "epoch": 18.04, "learning_rate": 4.098261520092349e-05, "loss": 2.3253, "step": 3641500 }, { "epoch": 18.04, "learning_rate": 4.098137661449741e-05, "loss": 2.3427, "step": 3642000 }, { "epoch": 18.05, "learning_rate": 4.0980138028071324e-05, "loss": 2.3467, "step": 3642500 }, { "epoch": 18.05, "learning_rate": 4.097889944164524e-05, "loss": 2.3307, "step": 3643000 }, { "epoch": 18.05, "learning_rate": 4.097766333239201e-05, "loss": 2.3384, "step": 3643500 }, { "epoch": 18.05, "learning_rate": 4.0976424745965927e-05, "loss": 2.3376, "step": 3644000 }, { "epoch": 18.06, "learning_rate": 4.0975186159539843e-05, "loss": 2.3253, "step": 3644500 }, { "epoch": 18.06, "learning_rate": 4.097394757311376e-05, "loss": 2.347, "step": 3645000 }, { "epoch": 18.06, "learning_rate": 4.097270898668768e-05, "loss": 2.3095, "step": 3645500 }, { "epoch": 18.06, "learning_rate": 4.0971470400261594e-05, "loss": 2.3029, "step": 3646000 }, { "epoch": 18.07, "learning_rate": 4.097023181383551e-05, "loss": 2.3188, "step": 3646500 }, { "epoch": 18.07, "learning_rate": 4.096899322740942e-05, "loss": 2.3294, "step": 3647000 }, { "epoch": 18.07, "learning_rate": 4.096775464098334e-05, "loss": 2.3405, "step": 3647500 }, { "epoch": 18.07, "learning_rate": 4.0966516054557255e-05, "loss": 2.3273, "step": 3648000 }, { "epoch": 18.08, "learning_rate": 4.096527746813117e-05, "loss": 2.3155, "step": 3648500 }, { "epoch": 18.08, "learning_rate": 4.096403888170509e-05, "loss": 2.3269, "step": 3649000 }, { "epoch": 18.08, "learning_rate": 4.0962800295279006e-05, "loss": 2.3336, "step": 3649500 }, { "epoch": 18.08, "learning_rate": 4.0961564186025775e-05, "loss": 2.3234, "step": 3650000 }, { "epoch": 18.09, "learning_rate": 4.096032559959969e-05, "loss": 2.3282, "step": 3650500 }, { "epoch": 18.09, "learning_rate": 4.095908701317361e-05, "loss": 2.3405, "step": 3651000 }, { "epoch": 18.09, "learning_rate": 4.0957848426747526e-05, "loss": 2.317, "step": 3651500 }, { "epoch": 18.09, "learning_rate": 4.0956612317494294e-05, "loss": 2.3369, "step": 3652000 }, { "epoch": 18.1, "learning_rate": 4.095537373106821e-05, "loss": 2.3423, "step": 3652500 }, { "epoch": 18.1, "learning_rate": 4.095413762181497e-05, "loss": 2.3363, "step": 3653000 }, { "epoch": 18.1, "learning_rate": 4.095289903538889e-05, "loss": 2.3515, "step": 3653500 }, { "epoch": 18.1, "learning_rate": 4.095166044896281e-05, "loss": 2.3327, "step": 3654000 }, { "epoch": 18.11, "learning_rate": 4.0950421862536724e-05, "loss": 2.3208, "step": 3654500 }, { "epoch": 18.11, "learning_rate": 4.094918327611064e-05, "loss": 2.3527, "step": 3655000 }, { "epoch": 18.11, "learning_rate": 4.094794468968456e-05, "loss": 2.352, "step": 3655500 }, { "epoch": 18.11, "learning_rate": 4.0946706103258475e-05, "loss": 2.3502, "step": 3656000 }, { "epoch": 18.12, "learning_rate": 4.094546751683239e-05, "loss": 2.3427, "step": 3656500 }, { "epoch": 18.12, "learning_rate": 4.094423140757916e-05, "loss": 2.3514, "step": 3657000 }, { "epoch": 18.12, "learning_rate": 4.094299282115308e-05, "loss": 2.3399, "step": 3657500 }, { "epoch": 18.12, "learning_rate": 4.0941754234726995e-05, "loss": 2.3461, "step": 3658000 }, { "epoch": 18.13, "learning_rate": 4.094051564830091e-05, "loss": 2.3321, "step": 3658500 }, { "epoch": 18.13, "learning_rate": 4.093927706187483e-05, "loss": 2.3296, "step": 3659000 }, { "epoch": 18.13, "learning_rate": 4.093804095262159e-05, "loss": 2.338, "step": 3659500 }, { "epoch": 18.13, "learning_rate": 4.093680236619551e-05, "loss": 2.3325, "step": 3660000 }, { "epoch": 18.14, "learning_rate": 4.093556625694228e-05, "loss": 2.3297, "step": 3660500 }, { "epoch": 18.14, "learning_rate": 4.093432767051619e-05, "loss": 2.3328, "step": 3661000 }, { "epoch": 18.14, "learning_rate": 4.093308908409011e-05, "loss": 2.354, "step": 3661500 }, { "epoch": 18.14, "learning_rate": 4.093185049766403e-05, "loss": 2.3395, "step": 3662000 }, { "epoch": 18.15, "learning_rate": 4.0930611911237944e-05, "loss": 2.3472, "step": 3662500 }, { "epoch": 18.15, "learning_rate": 4.092937332481186e-05, "loss": 2.3441, "step": 3663000 }, { "epoch": 18.15, "learning_rate": 4.092813473838578e-05, "loss": 2.3015, "step": 3663500 }, { "epoch": 18.15, "learning_rate": 4.092689862913254e-05, "loss": 2.3355, "step": 3664000 }, { "epoch": 18.16, "learning_rate": 4.092566004270646e-05, "loss": 2.3456, "step": 3664500 }, { "epoch": 18.16, "learning_rate": 4.0924421456280374e-05, "loss": 2.3623, "step": 3665000 }, { "epoch": 18.16, "learning_rate": 4.092318286985429e-05, "loss": 2.3409, "step": 3665500 }, { "epoch": 18.16, "learning_rate": 4.092194428342821e-05, "loss": 2.3148, "step": 3666000 }, { "epoch": 18.17, "learning_rate": 4.0920705697002124e-05, "loss": 2.3218, "step": 3666500 }, { "epoch": 18.17, "learning_rate": 4.091946711057604e-05, "loss": 2.3479, "step": 3667000 }, { "epoch": 18.17, "learning_rate": 4.091822852414996e-05, "loss": 2.3304, "step": 3667500 }, { "epoch": 18.17, "learning_rate": 4.0916989937723875e-05, "loss": 2.3497, "step": 3668000 }, { "epoch": 18.18, "learning_rate": 4.0915753828470644e-05, "loss": 2.3362, "step": 3668500 }, { "epoch": 18.18, "learning_rate": 4.091451771921741e-05, "loss": 2.3458, "step": 3669000 }, { "epoch": 18.18, "learning_rate": 4.091327913279133e-05, "loss": 2.3631, "step": 3669500 }, { "epoch": 18.18, "learning_rate": 4.091204054636525e-05, "loss": 2.3432, "step": 3670000 }, { "epoch": 18.18, "learning_rate": 4.091080195993916e-05, "loss": 2.3421, "step": 3670500 }, { "epoch": 18.19, "learning_rate": 4.0909563373513074e-05, "loss": 2.3361, "step": 3671000 }, { "epoch": 18.19, "learning_rate": 4.090832478708699e-05, "loss": 2.3355, "step": 3671500 }, { "epoch": 18.19, "learning_rate": 4.090708620066091e-05, "loss": 2.3339, "step": 3672000 }, { "epoch": 18.19, "learning_rate": 4.0905847614234825e-05, "loss": 2.3367, "step": 3672500 }, { "epoch": 18.2, "learning_rate": 4.090460902780874e-05, "loss": 2.3447, "step": 3673000 }, { "epoch": 18.2, "learning_rate": 4.090337291855551e-05, "loss": 2.3549, "step": 3673500 }, { "epoch": 18.2, "learning_rate": 4.090213433212943e-05, "loss": 2.3378, "step": 3674000 }, { "epoch": 18.2, "learning_rate": 4.0900895745703344e-05, "loss": 2.3436, "step": 3674500 }, { "epoch": 18.21, "learning_rate": 4.089965715927726e-05, "loss": 2.3406, "step": 3675000 }, { "epoch": 18.21, "learning_rate": 4.089841857285118e-05, "loss": 2.3393, "step": 3675500 }, { "epoch": 18.21, "learning_rate": 4.089718246359795e-05, "loss": 2.3336, "step": 3676000 }, { "epoch": 18.21, "learning_rate": 4.0895943877171864e-05, "loss": 2.3367, "step": 3676500 }, { "epoch": 18.22, "learning_rate": 4.089470529074578e-05, "loss": 2.3672, "step": 3677000 }, { "epoch": 18.22, "learning_rate": 4.089346670431969e-05, "loss": 2.3312, "step": 3677500 }, { "epoch": 18.22, "learning_rate": 4.089222811789361e-05, "loss": 2.35, "step": 3678000 }, { "epoch": 18.22, "learning_rate": 4.0890989531467525e-05, "loss": 2.3514, "step": 3678500 }, { "epoch": 18.23, "learning_rate": 4.08897534222143e-05, "loss": 2.369, "step": 3679000 }, { "epoch": 18.23, "learning_rate": 4.088851483578821e-05, "loss": 2.3395, "step": 3679500 }, { "epoch": 18.23, "learning_rate": 4.088727624936213e-05, "loss": 2.3569, "step": 3680000 }, { "epoch": 18.23, "learning_rate": 4.0886037662936044e-05, "loss": 2.3456, "step": 3680500 }, { "epoch": 18.24, "learning_rate": 4.088480155368282e-05, "loss": 2.3367, "step": 3681000 }, { "epoch": 18.24, "learning_rate": 4.088356544442958e-05, "loss": 2.3175, "step": 3681500 }, { "epoch": 18.24, "learning_rate": 4.08823268580035e-05, "loss": 2.3266, "step": 3682000 }, { "epoch": 18.24, "learning_rate": 4.0881088271577416e-05, "loss": 2.3512, "step": 3682500 }, { "epoch": 18.25, "learning_rate": 4.087984968515133e-05, "loss": 2.3464, "step": 3683000 }, { "epoch": 18.25, "learning_rate": 4.08786135758981e-05, "loss": 2.3319, "step": 3683500 }, { "epoch": 18.25, "learning_rate": 4.087737498947202e-05, "loss": 2.3545, "step": 3684000 }, { "epoch": 18.25, "learning_rate": 4.0876136403045935e-05, "loss": 2.3483, "step": 3684500 }, { "epoch": 18.26, "learning_rate": 4.087489781661985e-05, "loss": 2.3413, "step": 3685000 }, { "epoch": 18.26, "learning_rate": 4.087365923019377e-05, "loss": 2.3222, "step": 3685500 }, { "epoch": 18.26, "learning_rate": 4.0872420643767686e-05, "loss": 2.3522, "step": 3686000 }, { "epoch": 18.26, "learning_rate": 4.08711820573416e-05, "loss": 2.3184, "step": 3686500 }, { "epoch": 18.27, "learning_rate": 4.086994347091552e-05, "loss": 2.3553, "step": 3687000 }, { "epoch": 18.27, "learning_rate": 4.086870736166228e-05, "loss": 2.3474, "step": 3687500 }, { "epoch": 18.27, "learning_rate": 4.08674687752362e-05, "loss": 2.3626, "step": 3688000 }, { "epoch": 18.27, "learning_rate": 4.086623266598297e-05, "loss": 2.3262, "step": 3688500 }, { "epoch": 18.28, "learning_rate": 4.0864994079556885e-05, "loss": 2.3548, "step": 3689000 }, { "epoch": 18.28, "learning_rate": 4.08637554931308e-05, "loss": 2.3467, "step": 3689500 }, { "epoch": 18.28, "learning_rate": 4.086251690670472e-05, "loss": 2.3482, "step": 3690000 }, { "epoch": 18.28, "learning_rate": 4.086128079745149e-05, "loss": 2.3704, "step": 3690500 }, { "epoch": 18.29, "learning_rate": 4.0860042211025404e-05, "loss": 2.3454, "step": 3691000 }, { "epoch": 18.29, "learning_rate": 4.085880610177217e-05, "loss": 2.3512, "step": 3691500 }, { "epoch": 18.29, "learning_rate": 4.085756751534609e-05, "loss": 2.3291, "step": 3692000 }, { "epoch": 18.29, "learning_rate": 4.085632892892001e-05, "loss": 2.3378, "step": 3692500 }, { "epoch": 18.3, "learning_rate": 4.085509034249392e-05, "loss": 2.3274, "step": 3693000 }, { "epoch": 18.3, "learning_rate": 4.0853851756067834e-05, "loss": 2.3324, "step": 3693500 }, { "epoch": 18.3, "learning_rate": 4.085261316964175e-05, "loss": 2.3558, "step": 3694000 }, { "epoch": 18.3, "learning_rate": 4.085137458321567e-05, "loss": 2.3529, "step": 3694500 }, { "epoch": 18.31, "learning_rate": 4.0850135996789585e-05, "loss": 2.3572, "step": 3695000 }, { "epoch": 18.31, "learning_rate": 4.08488974103635e-05, "loss": 2.3294, "step": 3695500 }, { "epoch": 18.31, "learning_rate": 4.084765882393742e-05, "loss": 2.3592, "step": 3696000 }, { "epoch": 18.31, "learning_rate": 4.084642271468419e-05, "loss": 2.3392, "step": 3696500 }, { "epoch": 18.32, "learning_rate": 4.0845184128258104e-05, "loss": 2.3154, "step": 3697000 }, { "epoch": 18.32, "learning_rate": 4.084394801900487e-05, "loss": 2.3509, "step": 3697500 }, { "epoch": 18.32, "learning_rate": 4.084270943257879e-05, "loss": 2.3477, "step": 3698000 }, { "epoch": 18.32, "learning_rate": 4.084147084615271e-05, "loss": 2.3218, "step": 3698500 }, { "epoch": 18.33, "learning_rate": 4.084023225972662e-05, "loss": 2.3387, "step": 3699000 }, { "epoch": 18.33, "learning_rate": 4.0838993673300534e-05, "loss": 2.3438, "step": 3699500 }, { "epoch": 18.33, "learning_rate": 4.083775508687445e-05, "loss": 2.341, "step": 3700000 }, { "epoch": 18.33, "learning_rate": 4.083651650044837e-05, "loss": 2.3482, "step": 3700500 }, { "epoch": 18.34, "learning_rate": 4.0835277914022285e-05, "loss": 2.3661, "step": 3701000 }, { "epoch": 18.34, "learning_rate": 4.08340393275962e-05, "loss": 2.3362, "step": 3701500 }, { "epoch": 18.34, "learning_rate": 4.083280074117012e-05, "loss": 2.3077, "step": 3702000 }, { "epoch": 18.34, "learning_rate": 4.083156463191689e-05, "loss": 2.362, "step": 3702500 }, { "epoch": 18.35, "learning_rate": 4.0830326045490804e-05, "loss": 2.3652, "step": 3703000 }, { "epoch": 18.35, "learning_rate": 4.082908745906472e-05, "loss": 2.3664, "step": 3703500 }, { "epoch": 18.35, "learning_rate": 4.082784887263864e-05, "loss": 2.3331, "step": 3704000 }, { "epoch": 18.35, "learning_rate": 4.0826610286212555e-05, "loss": 2.3267, "step": 3704500 }, { "epoch": 18.36, "learning_rate": 4.0825371699786465e-05, "loss": 2.3312, "step": 3705000 }, { "epoch": 18.36, "learning_rate": 4.082413311336038e-05, "loss": 2.362, "step": 3705500 }, { "epoch": 18.36, "learning_rate": 4.082289700410715e-05, "loss": 2.3326, "step": 3706000 }, { "epoch": 18.36, "learning_rate": 4.082165841768107e-05, "loss": 2.3602, "step": 3706500 }, { "epoch": 18.37, "learning_rate": 4.0820419831254985e-05, "loss": 2.3623, "step": 3707000 }, { "epoch": 18.37, "learning_rate": 4.08191812448289e-05, "loss": 2.3526, "step": 3707500 }, { "epoch": 18.37, "learning_rate": 4.081794265840282e-05, "loss": 2.3439, "step": 3708000 }, { "epoch": 18.37, "learning_rate": 4.081670654914959e-05, "loss": 2.3465, "step": 3708500 }, { "epoch": 18.38, "learning_rate": 4.0815467962723505e-05, "loss": 2.3447, "step": 3709000 }, { "epoch": 18.38, "learning_rate": 4.081422937629742e-05, "loss": 2.3459, "step": 3709500 }, { "epoch": 18.38, "learning_rate": 4.081299078987134e-05, "loss": 2.3666, "step": 3710000 }, { "epoch": 18.38, "learning_rate": 4.0811752203445255e-05, "loss": 2.3043, "step": 3710500 }, { "epoch": 18.39, "learning_rate": 4.0810516094192024e-05, "loss": 2.3246, "step": 3711000 }, { "epoch": 18.39, "learning_rate": 4.0809277507765934e-05, "loss": 2.3471, "step": 3711500 }, { "epoch": 18.39, "learning_rate": 4.080803892133985e-05, "loss": 2.3432, "step": 3712000 }, { "epoch": 18.39, "learning_rate": 4.080680033491377e-05, "loss": 2.3355, "step": 3712500 }, { "epoch": 18.4, "learning_rate": 4.0805564225660544e-05, "loss": 2.3684, "step": 3713000 }, { "epoch": 18.4, "learning_rate": 4.080432563923446e-05, "loss": 2.366, "step": 3713500 }, { "epoch": 18.4, "learning_rate": 4.080308705280838e-05, "loss": 2.3494, "step": 3714000 }, { "epoch": 18.4, "learning_rate": 4.080184846638229e-05, "loss": 2.3516, "step": 3714500 }, { "epoch": 18.41, "learning_rate": 4.0800609879956205e-05, "loss": 2.3581, "step": 3715000 }, { "epoch": 18.41, "learning_rate": 4.079937129353012e-05, "loss": 2.3587, "step": 3715500 }, { "epoch": 18.41, "learning_rate": 4.079813270710404e-05, "loss": 2.3318, "step": 3716000 }, { "epoch": 18.41, "learning_rate": 4.0796894120677956e-05, "loss": 2.3423, "step": 3716500 }, { "epoch": 18.42, "learning_rate": 4.079565553425187e-05, "loss": 2.338, "step": 3717000 }, { "epoch": 18.42, "learning_rate": 4.079441942499864e-05, "loss": 2.3467, "step": 3717500 }, { "epoch": 18.42, "learning_rate": 4.079318331574541e-05, "loss": 2.3822, "step": 3718000 }, { "epoch": 18.42, "learning_rate": 4.079194472931933e-05, "loss": 2.3626, "step": 3718500 }, { "epoch": 18.43, "learning_rate": 4.0790706142893244e-05, "loss": 2.3683, "step": 3719000 }, { "epoch": 18.43, "learning_rate": 4.078946755646716e-05, "loss": 2.3831, "step": 3719500 }, { "epoch": 18.43, "learning_rate": 4.078822897004108e-05, "loss": 2.3206, "step": 3720000 }, { "epoch": 18.43, "learning_rate": 4.0786990383614995e-05, "loss": 2.3446, "step": 3720500 }, { "epoch": 18.44, "learning_rate": 4.0785751797188905e-05, "loss": 2.3532, "step": 3721000 }, { "epoch": 18.44, "learning_rate": 4.078451321076282e-05, "loss": 2.3127, "step": 3721500 }, { "epoch": 18.44, "learning_rate": 4.078327462433674e-05, "loss": 2.3387, "step": 3722000 }, { "epoch": 18.44, "learning_rate": 4.0782036037910656e-05, "loss": 2.3392, "step": 3722500 }, { "epoch": 18.45, "learning_rate": 4.0780799928657424e-05, "loss": 2.3358, "step": 3723000 }, { "epoch": 18.45, "learning_rate": 4.077956134223134e-05, "loss": 2.3367, "step": 3723500 }, { "epoch": 18.45, "learning_rate": 4.077832275580525e-05, "loss": 2.375, "step": 3724000 }, { "epoch": 18.45, "learning_rate": 4.077708416937917e-05, "loss": 2.3369, "step": 3724500 }, { "epoch": 18.45, "learning_rate": 4.0775845582953085e-05, "loss": 2.3509, "step": 3725000 }, { "epoch": 18.46, "learning_rate": 4.0774606996527e-05, "loss": 2.3558, "step": 3725500 }, { "epoch": 18.46, "learning_rate": 4.077336841010092e-05, "loss": 2.338, "step": 3726000 }, { "epoch": 18.46, "learning_rate": 4.0772129823674836e-05, "loss": 2.351, "step": 3726500 }, { "epoch": 18.46, "learning_rate": 4.0770893714421605e-05, "loss": 2.3565, "step": 3727000 }, { "epoch": 18.47, "learning_rate": 4.076965512799552e-05, "loss": 2.3638, "step": 3727500 }, { "epoch": 18.47, "learning_rate": 4.076841654156944e-05, "loss": 2.3696, "step": 3728000 }, { "epoch": 18.47, "learning_rate": 4.0767177955143356e-05, "loss": 2.3566, "step": 3728500 }, { "epoch": 18.47, "learning_rate": 4.076593936871727e-05, "loss": 2.3472, "step": 3729000 }, { "epoch": 18.48, "learning_rate": 4.076470325946404e-05, "loss": 2.3396, "step": 3729500 }, { "epoch": 18.48, "learning_rate": 4.076346467303796e-05, "loss": 2.3794, "step": 3730000 }, { "epoch": 18.48, "learning_rate": 4.076222608661187e-05, "loss": 2.3788, "step": 3730500 }, { "epoch": 18.48, "learning_rate": 4.0760987500185786e-05, "loss": 2.3517, "step": 3731000 }, { "epoch": 18.49, "learning_rate": 4.075975139093256e-05, "loss": 2.3382, "step": 3731500 }, { "epoch": 18.49, "learning_rate": 4.075851280450648e-05, "loss": 2.3403, "step": 3732000 }, { "epoch": 18.49, "learning_rate": 4.0757274218080395e-05, "loss": 2.3588, "step": 3732500 }, { "epoch": 18.49, "learning_rate": 4.075603563165431e-05, "loss": 2.3352, "step": 3733000 }, { "epoch": 18.5, "learning_rate": 4.075479704522822e-05, "loss": 2.3494, "step": 3733500 }, { "epoch": 18.5, "learning_rate": 4.075355845880214e-05, "loss": 2.3463, "step": 3734000 }, { "epoch": 18.5, "learning_rate": 4.0752319872376056e-05, "loss": 2.3804, "step": 3734500 }, { "epoch": 18.5, "learning_rate": 4.075108128594997e-05, "loss": 2.3203, "step": 3735000 }, { "epoch": 18.51, "learning_rate": 4.074984269952389e-05, "loss": 2.3402, "step": 3735500 }, { "epoch": 18.51, "learning_rate": 4.074860659027066e-05, "loss": 2.3319, "step": 3736000 }, { "epoch": 18.51, "learning_rate": 4.074737048101743e-05, "loss": 2.3261, "step": 3736500 }, { "epoch": 18.51, "learning_rate": 4.0746131894591344e-05, "loss": 2.3539, "step": 3737000 }, { "epoch": 18.52, "learning_rate": 4.074489330816526e-05, "loss": 2.3372, "step": 3737500 }, { "epoch": 18.52, "learning_rate": 4.074365472173918e-05, "loss": 2.3349, "step": 3738000 }, { "epoch": 18.52, "learning_rate": 4.0742416135313095e-05, "loss": 2.3208, "step": 3738500 }, { "epoch": 18.52, "learning_rate": 4.0741180026059864e-05, "loss": 2.3495, "step": 3739000 }, { "epoch": 18.53, "learning_rate": 4.073994143963378e-05, "loss": 2.3535, "step": 3739500 }, { "epoch": 18.53, "learning_rate": 4.073870285320769e-05, "loss": 2.3686, "step": 3740000 }, { "epoch": 18.53, "learning_rate": 4.073746426678161e-05, "loss": 2.3508, "step": 3740500 }, { "epoch": 18.53, "learning_rate": 4.0736225680355525e-05, "loss": 2.3398, "step": 3741000 }, { "epoch": 18.54, "learning_rate": 4.073498709392944e-05, "loss": 2.3602, "step": 3741500 }, { "epoch": 18.54, "learning_rate": 4.073375098467621e-05, "loss": 2.344, "step": 3742000 }, { "epoch": 18.54, "learning_rate": 4.073251239825013e-05, "loss": 2.3464, "step": 3742500 }, { "epoch": 18.54, "learning_rate": 4.0731273811824044e-05, "loss": 2.3433, "step": 3743000 }, { "epoch": 18.55, "learning_rate": 4.073003522539796e-05, "loss": 2.3592, "step": 3743500 }, { "epoch": 18.55, "learning_rate": 4.072879663897188e-05, "loss": 2.318, "step": 3744000 }, { "epoch": 18.55, "learning_rate": 4.072756052971865e-05, "loss": 2.3456, "step": 3744500 }, { "epoch": 18.55, "learning_rate": 4.0726321943292564e-05, "loss": 2.3568, "step": 3745000 }, { "epoch": 18.56, "learning_rate": 4.072508335686648e-05, "loss": 2.3698, "step": 3745500 }, { "epoch": 18.56, "learning_rate": 4.07238447704404e-05, "loss": 2.3393, "step": 3746000 }, { "epoch": 18.56, "learning_rate": 4.072260618401431e-05, "loss": 2.3585, "step": 3746500 }, { "epoch": 18.56, "learning_rate": 4.0721367597588225e-05, "loss": 2.3521, "step": 3747000 }, { "epoch": 18.57, "learning_rate": 4.072012901116214e-05, "loss": 2.3465, "step": 3747500 }, { "epoch": 18.57, "learning_rate": 4.071889042473606e-05, "loss": 2.3389, "step": 3748000 }, { "epoch": 18.57, "learning_rate": 4.071765431548283e-05, "loss": 2.3552, "step": 3748500 }, { "epoch": 18.57, "learning_rate": 4.0716415729056745e-05, "loss": 2.3512, "step": 3749000 }, { "epoch": 18.58, "learning_rate": 4.071517714263066e-05, "loss": 2.3437, "step": 3749500 }, { "epoch": 18.58, "learning_rate": 4.071393855620458e-05, "loss": 2.3615, "step": 3750000 }, { "epoch": 18.58, "learning_rate": 4.0712699969778495e-05, "loss": 2.3265, "step": 3750500 }, { "epoch": 18.58, "learning_rate": 4.071146138335241e-05, "loss": 2.3665, "step": 3751000 }, { "epoch": 18.59, "learning_rate": 4.071022527409918e-05, "loss": 2.345, "step": 3751500 }, { "epoch": 18.59, "learning_rate": 4.07089866876731e-05, "loss": 2.3294, "step": 3752000 }, { "epoch": 18.59, "learning_rate": 4.0707748101247015e-05, "loss": 2.3589, "step": 3752500 }, { "epoch": 18.59, "learning_rate": 4.070650951482093e-05, "loss": 2.3329, "step": 3753000 }, { "epoch": 18.6, "learning_rate": 4.070527092839484e-05, "loss": 2.3413, "step": 3753500 }, { "epoch": 18.6, "learning_rate": 4.070403729631446e-05, "loss": 2.3506, "step": 3754000 }, { "epoch": 18.6, "learning_rate": 4.070279870988838e-05, "loss": 2.3563, "step": 3754500 }, { "epoch": 18.6, "learning_rate": 4.0701560123462297e-05, "loss": 2.3641, "step": 3755000 }, { "epoch": 18.61, "learning_rate": 4.0700321537036213e-05, "loss": 2.3566, "step": 3755500 }, { "epoch": 18.61, "learning_rate": 4.069908295061013e-05, "loss": 2.3409, "step": 3756000 }, { "epoch": 18.61, "learning_rate": 4.06978468413569e-05, "loss": 2.3472, "step": 3756500 }, { "epoch": 18.61, "learning_rate": 4.069660825493081e-05, "loss": 2.3627, "step": 3757000 }, { "epoch": 18.62, "learning_rate": 4.0695369668504726e-05, "loss": 2.3429, "step": 3757500 }, { "epoch": 18.62, "learning_rate": 4.069413108207864e-05, "loss": 2.3491, "step": 3758000 }, { "epoch": 18.62, "learning_rate": 4.069289497282542e-05, "loss": 2.3293, "step": 3758500 }, { "epoch": 18.62, "learning_rate": 4.069165886357219e-05, "loss": 2.365, "step": 3759000 }, { "epoch": 18.63, "learning_rate": 4.0690420277146105e-05, "loss": 2.3383, "step": 3759500 }, { "epoch": 18.63, "learning_rate": 4.068918169072002e-05, "loss": 2.3502, "step": 3760000 }, { "epoch": 18.63, "learning_rate": 4.068794310429394e-05, "loss": 2.3331, "step": 3760500 }, { "epoch": 18.63, "learning_rate": 4.068670451786785e-05, "loss": 2.3463, "step": 3761000 }, { "epoch": 18.64, "learning_rate": 4.0685465931441765e-05, "loss": 2.3493, "step": 3761500 }, { "epoch": 18.64, "learning_rate": 4.068422734501568e-05, "loss": 2.3455, "step": 3762000 }, { "epoch": 18.64, "learning_rate": 4.06829887585896e-05, "loss": 2.3398, "step": 3762500 }, { "epoch": 18.64, "learning_rate": 4.0681750172163516e-05, "loss": 2.3684, "step": 3763000 }, { "epoch": 18.65, "learning_rate": 4.0680514062910285e-05, "loss": 2.3276, "step": 3763500 }, { "epoch": 18.65, "learning_rate": 4.06792754764842e-05, "loss": 2.3632, "step": 3764000 }, { "epoch": 18.65, "learning_rate": 4.067803689005812e-05, "loss": 2.3577, "step": 3764500 }, { "epoch": 18.65, "learning_rate": 4.0676798303632036e-05, "loss": 2.3654, "step": 3765000 }, { "epoch": 18.66, "learning_rate": 4.0675562194378805e-05, "loss": 2.3418, "step": 3765500 }, { "epoch": 18.66, "learning_rate": 4.067432360795272e-05, "loss": 2.3504, "step": 3766000 }, { "epoch": 18.66, "learning_rate": 4.067308502152664e-05, "loss": 2.3258, "step": 3766500 }, { "epoch": 18.66, "learning_rate": 4.0671846435100555e-05, "loss": 2.3525, "step": 3767000 }, { "epoch": 18.67, "learning_rate": 4.067060784867447e-05, "loss": 2.3618, "step": 3767500 }, { "epoch": 18.67, "learning_rate": 4.066936926224838e-05, "loss": 2.3604, "step": 3768000 }, { "epoch": 18.67, "learning_rate": 4.06681306758223e-05, "loss": 2.365, "step": 3768500 }, { "epoch": 18.67, "learning_rate": 4.0666892089396216e-05, "loss": 2.3799, "step": 3769000 }, { "epoch": 18.68, "learning_rate": 4.066565350297013e-05, "loss": 2.3418, "step": 3769500 }, { "epoch": 18.68, "learning_rate": 4.066441491654405e-05, "loss": 2.3537, "step": 3770000 }, { "epoch": 18.68, "learning_rate": 4.066317633011796e-05, "loss": 2.363, "step": 3770500 }, { "epoch": 18.68, "learning_rate": 4.066193774369188e-05, "loss": 2.3617, "step": 3771000 }, { "epoch": 18.69, "learning_rate": 4.0660699157265794e-05, "loss": 2.3339, "step": 3771500 }, { "epoch": 18.69, "learning_rate": 4.065946057083971e-05, "loss": 2.3676, "step": 3772000 }, { "epoch": 18.69, "learning_rate": 4.065822446158648e-05, "loss": 2.3482, "step": 3772500 }, { "epoch": 18.69, "learning_rate": 4.06569858751604e-05, "loss": 2.333, "step": 3773000 }, { "epoch": 18.7, "learning_rate": 4.0655747288734314e-05, "loss": 2.3455, "step": 3773500 }, { "epoch": 18.7, "learning_rate": 4.065450870230823e-05, "loss": 2.3702, "step": 3774000 }, { "epoch": 18.7, "learning_rate": 4.065327011588215e-05, "loss": 2.3753, "step": 3774500 }, { "epoch": 18.7, "learning_rate": 4.0652034006628917e-05, "loss": 2.3434, "step": 3775000 }, { "epoch": 18.71, "learning_rate": 4.0650795420202833e-05, "loss": 2.3595, "step": 3775500 }, { "epoch": 18.71, "learning_rate": 4.064955683377675e-05, "loss": 2.3349, "step": 3776000 }, { "epoch": 18.71, "learning_rate": 4.064831824735067e-05, "loss": 2.3468, "step": 3776500 }, { "epoch": 18.71, "learning_rate": 4.064707966092458e-05, "loss": 2.35, "step": 3777000 }, { "epoch": 18.72, "learning_rate": 4.0645841074498494e-05, "loss": 2.3771, "step": 3777500 }, { "epoch": 18.72, "learning_rate": 4.064460496524526e-05, "loss": 2.3921, "step": 3778000 }, { "epoch": 18.72, "learning_rate": 4.064336637881918e-05, "loss": 2.3522, "step": 3778500 }, { "epoch": 18.72, "learning_rate": 4.06421277923931e-05, "loss": 2.3284, "step": 3779000 }, { "epoch": 18.72, "learning_rate": 4.0640889205967014e-05, "loss": 2.3193, "step": 3779500 }, { "epoch": 18.73, "learning_rate": 4.063965061954093e-05, "loss": 2.3292, "step": 3780000 }, { "epoch": 18.73, "learning_rate": 4.063841203311485e-05, "loss": 2.3412, "step": 3780500 }, { "epoch": 18.73, "learning_rate": 4.0637173446688765e-05, "loss": 2.3477, "step": 3781000 }, { "epoch": 18.73, "learning_rate": 4.063593486026268e-05, "loss": 2.3427, "step": 3781500 }, { "epoch": 18.74, "learning_rate": 4.063469875100945e-05, "loss": 2.359, "step": 3782000 }, { "epoch": 18.74, "learning_rate": 4.063346264175622e-05, "loss": 2.3708, "step": 3782500 }, { "epoch": 18.74, "learning_rate": 4.0632224055330136e-05, "loss": 2.3456, "step": 3783000 }, { "epoch": 18.74, "learning_rate": 4.063098546890405e-05, "loss": 2.3478, "step": 3783500 }, { "epoch": 18.75, "learning_rate": 4.062974935965082e-05, "loss": 2.3417, "step": 3784000 }, { "epoch": 18.75, "learning_rate": 4.062851077322474e-05, "loss": 2.3459, "step": 3784500 }, { "epoch": 18.75, "learning_rate": 4.0627272186798656e-05, "loss": 2.3443, "step": 3785000 }, { "epoch": 18.75, "learning_rate": 4.062603360037257e-05, "loss": 2.3511, "step": 3785500 }, { "epoch": 18.76, "learning_rate": 4.062479501394649e-05, "loss": 2.3571, "step": 3786000 }, { "epoch": 18.76, "learning_rate": 4.062355890469325e-05, "loss": 2.3711, "step": 3786500 }, { "epoch": 18.76, "learning_rate": 4.062232031826717e-05, "loss": 2.3704, "step": 3787000 }, { "epoch": 18.76, "learning_rate": 4.0621081731841086e-05, "loss": 2.3424, "step": 3787500 }, { "epoch": 18.77, "learning_rate": 4.0619843145415e-05, "loss": 2.3654, "step": 3788000 }, { "epoch": 18.77, "learning_rate": 4.061860455898892e-05, "loss": 2.3397, "step": 3788500 }, { "epoch": 18.77, "learning_rate": 4.0617365972562836e-05, "loss": 2.3416, "step": 3789000 }, { "epoch": 18.77, "learning_rate": 4.061612738613675e-05, "loss": 2.3722, "step": 3789500 }, { "epoch": 18.78, "learning_rate": 4.061488879971067e-05, "loss": 2.3566, "step": 3790000 }, { "epoch": 18.78, "learning_rate": 4.061365269045744e-05, "loss": 2.3691, "step": 3790500 }, { "epoch": 18.78, "learning_rate": 4.0612414104031356e-05, "loss": 2.3407, "step": 3791000 }, { "epoch": 18.78, "learning_rate": 4.061117551760527e-05, "loss": 2.3298, "step": 3791500 }, { "epoch": 18.79, "learning_rate": 4.060993693117919e-05, "loss": 2.3593, "step": 3792000 }, { "epoch": 18.79, "learning_rate": 4.060869834475311e-05, "loss": 2.3477, "step": 3792500 }, { "epoch": 18.79, "learning_rate": 4.0607459758327024e-05, "loss": 2.3582, "step": 3793000 }, { "epoch": 18.79, "learning_rate": 4.0606223649073786e-05, "loss": 2.3396, "step": 3793500 }, { "epoch": 18.8, "learning_rate": 4.06049850626477e-05, "loss": 2.3546, "step": 3794000 }, { "epoch": 18.8, "learning_rate": 4.060374647622162e-05, "loss": 2.3364, "step": 3794500 }, { "epoch": 18.8, "learning_rate": 4.0602507889795536e-05, "loss": 2.3689, "step": 3795000 }, { "epoch": 18.8, "learning_rate": 4.0601269303369453e-05, "loss": 2.3572, "step": 3795500 }, { "epoch": 18.81, "learning_rate": 4.060003071694337e-05, "loss": 2.3567, "step": 3796000 }, { "epoch": 18.81, "learning_rate": 4.059879213051728e-05, "loss": 2.3234, "step": 3796500 }, { "epoch": 18.81, "learning_rate": 4.05975535440912e-05, "loss": 2.3548, "step": 3797000 }, { "epoch": 18.81, "learning_rate": 4.0596314957665114e-05, "loss": 2.3628, "step": 3797500 }, { "epoch": 18.82, "learning_rate": 4.059507884841189e-05, "loss": 2.3432, "step": 3798000 }, { "epoch": 18.82, "learning_rate": 4.059384026198581e-05, "loss": 2.3482, "step": 3798500 }, { "epoch": 18.82, "learning_rate": 4.0592601675559724e-05, "loss": 2.3262, "step": 3799000 }, { "epoch": 18.82, "learning_rate": 4.059136308913364e-05, "loss": 2.354, "step": 3799500 }, { "epoch": 18.83, "learning_rate": 4.05901269798804e-05, "loss": 2.3366, "step": 3800000 }, { "epoch": 18.83, "learning_rate": 4.058889087062717e-05, "loss": 2.3351, "step": 3800500 }, { "epoch": 18.83, "learning_rate": 4.058765228420109e-05, "loss": 2.3698, "step": 3801000 }, { "epoch": 18.83, "learning_rate": 4.0586413697775005e-05, "loss": 2.3652, "step": 3801500 }, { "epoch": 18.84, "learning_rate": 4.058517511134892e-05, "loss": 2.3572, "step": 3802000 }, { "epoch": 18.84, "learning_rate": 4.058393900209569e-05, "loss": 2.3613, "step": 3802500 }, { "epoch": 18.84, "learning_rate": 4.058270041566961e-05, "loss": 2.3577, "step": 3803000 }, { "epoch": 18.84, "learning_rate": 4.0581461829243525e-05, "loss": 2.3717, "step": 3803500 }, { "epoch": 18.85, "learning_rate": 4.058022324281744e-05, "loss": 2.3315, "step": 3804000 }, { "epoch": 18.85, "learning_rate": 4.057898465639136e-05, "loss": 2.375, "step": 3804500 }, { "epoch": 18.85, "learning_rate": 4.057774606996527e-05, "loss": 2.3772, "step": 3805000 }, { "epoch": 18.85, "learning_rate": 4.0576507483539186e-05, "loss": 2.3574, "step": 3805500 }, { "epoch": 18.86, "learning_rate": 4.05752688971131e-05, "loss": 2.3731, "step": 3806000 }, { "epoch": 18.86, "learning_rate": 4.057403031068702e-05, "loss": 2.3567, "step": 3806500 }, { "epoch": 18.86, "learning_rate": 4.057279172426094e-05, "loss": 2.3155, "step": 3807000 }, { "epoch": 18.86, "learning_rate": 4.0571553137834854e-05, "loss": 2.362, "step": 3807500 }, { "epoch": 18.87, "learning_rate": 4.057031455140877e-05, "loss": 2.3639, "step": 3808000 }, { "epoch": 18.87, "learning_rate": 4.056907596498269e-05, "loss": 2.3458, "step": 3808500 }, { "epoch": 18.87, "learning_rate": 4.05678373785566e-05, "loss": 2.3485, "step": 3809000 }, { "epoch": 18.87, "learning_rate": 4.0566598792130515e-05, "loss": 2.3583, "step": 3809500 }, { "epoch": 18.88, "learning_rate": 4.056536516005014e-05, "loss": 2.3601, "step": 3810000 }, { "epoch": 18.88, "learning_rate": 4.056412657362406e-05, "loss": 2.3376, "step": 3810500 }, { "epoch": 18.88, "learning_rate": 4.0562887987197976e-05, "loss": 2.3372, "step": 3811000 }, { "epoch": 18.88, "learning_rate": 4.056165187794474e-05, "loss": 2.3443, "step": 3811500 }, { "epoch": 18.89, "learning_rate": 4.0560413291518655e-05, "loss": 2.349, "step": 3812000 }, { "epoch": 18.89, "learning_rate": 4.055917470509257e-05, "loss": 2.3483, "step": 3812500 }, { "epoch": 18.89, "learning_rate": 4.055793611866649e-05, "loss": 2.3419, "step": 3813000 }, { "epoch": 18.89, "learning_rate": 4.0556697532240406e-05, "loss": 2.3571, "step": 3813500 }, { "epoch": 18.9, "learning_rate": 4.055545894581432e-05, "loss": 2.3455, "step": 3814000 }, { "epoch": 18.9, "learning_rate": 4.055422035938824e-05, "loss": 2.3596, "step": 3814500 }, { "epoch": 18.9, "learning_rate": 4.0552981772962156e-05, "loss": 2.3755, "step": 3815000 }, { "epoch": 18.9, "learning_rate": 4.055174318653607e-05, "loss": 2.3312, "step": 3815500 }, { "epoch": 18.91, "learning_rate": 4.055050460010999e-05, "loss": 2.356, "step": 3816000 }, { "epoch": 18.91, "learning_rate": 4.054926601368391e-05, "loss": 2.3591, "step": 3816500 }, { "epoch": 18.91, "learning_rate": 4.0548027427257824e-05, "loss": 2.3418, "step": 3817000 }, { "epoch": 18.91, "learning_rate": 4.054678884083174e-05, "loss": 2.3178, "step": 3817500 }, { "epoch": 18.92, "learning_rate": 4.054555025440566e-05, "loss": 2.3461, "step": 3818000 }, { "epoch": 18.92, "learning_rate": 4.054431166797957e-05, "loss": 2.3436, "step": 3818500 }, { "epoch": 18.92, "learning_rate": 4.0543073081553485e-05, "loss": 2.3313, "step": 3819000 }, { "epoch": 18.92, "learning_rate": 4.05418344951274e-05, "loss": 2.3429, "step": 3819500 }, { "epoch": 18.93, "learning_rate": 4.054059838587417e-05, "loss": 2.3593, "step": 3820000 }, { "epoch": 18.93, "learning_rate": 4.053936227662094e-05, "loss": 2.3424, "step": 3820500 }, { "epoch": 18.93, "learning_rate": 4.0538123690194857e-05, "loss": 2.3253, "step": 3821000 }, { "epoch": 18.93, "learning_rate": 4.0536885103768774e-05, "loss": 2.3515, "step": 3821500 }, { "epoch": 18.94, "learning_rate": 4.053564651734269e-05, "loss": 2.3587, "step": 3822000 }, { "epoch": 18.94, "learning_rate": 4.053440793091661e-05, "loss": 2.3128, "step": 3822500 }, { "epoch": 18.94, "learning_rate": 4.0533169344490524e-05, "loss": 2.3619, "step": 3823000 }, { "epoch": 18.94, "learning_rate": 4.053193075806444e-05, "loss": 2.3424, "step": 3823500 }, { "epoch": 18.95, "learning_rate": 4.053069217163836e-05, "loss": 2.3527, "step": 3824000 }, { "epoch": 18.95, "learning_rate": 4.0529453585212275e-05, "loss": 2.3656, "step": 3824500 }, { "epoch": 18.95, "learning_rate": 4.0528217475959044e-05, "loss": 2.3551, "step": 3825000 }, { "epoch": 18.95, "learning_rate": 4.0526978889532954e-05, "loss": 2.3346, "step": 3825500 }, { "epoch": 18.96, "learning_rate": 4.052574278027972e-05, "loss": 2.3258, "step": 3826000 }, { "epoch": 18.96, "learning_rate": 4.052450419385364e-05, "loss": 2.3451, "step": 3826500 }, { "epoch": 18.96, "learning_rate": 4.052326560742756e-05, "loss": 2.3521, "step": 3827000 }, { "epoch": 18.96, "learning_rate": 4.0522027021001474e-05, "loss": 2.3527, "step": 3827500 }, { "epoch": 18.97, "learning_rate": 4.052078843457539e-05, "loss": 2.3459, "step": 3828000 }, { "epoch": 18.97, "learning_rate": 4.051954984814931e-05, "loss": 2.3684, "step": 3828500 }, { "epoch": 18.97, "learning_rate": 4.0518311261723224e-05, "loss": 2.3569, "step": 3829000 }, { "epoch": 18.97, "learning_rate": 4.051707267529714e-05, "loss": 2.368, "step": 3829500 }, { "epoch": 18.98, "learning_rate": 4.051583408887106e-05, "loss": 2.3412, "step": 3830000 }, { "epoch": 18.98, "learning_rate": 4.0514595502444975e-05, "loss": 2.3685, "step": 3830500 }, { "epoch": 18.98, "learning_rate": 4.0513356916018885e-05, "loss": 2.3667, "step": 3831000 }, { "epoch": 18.98, "learning_rate": 4.05121183295928e-05, "loss": 2.333, "step": 3831500 }, { "epoch": 18.99, "learning_rate": 4.051088469751242e-05, "loss": 2.3416, "step": 3832000 }, { "epoch": 18.99, "learning_rate": 4.050964611108634e-05, "loss": 2.344, "step": 3832500 }, { "epoch": 18.99, "learning_rate": 4.050840752466026e-05, "loss": 2.3712, "step": 3833000 }, { "epoch": 18.99, "learning_rate": 4.0507168938234174e-05, "loss": 2.3576, "step": 3833500 }, { "epoch": 18.99, "learning_rate": 4.050593282898094e-05, "loss": 2.3196, "step": 3834000 }, { "epoch": 19.0, "learning_rate": 4.050469671972771e-05, "loss": 2.3713, "step": 3834500 }, { "epoch": 19.0, "learning_rate": 4.050345813330163e-05, "loss": 2.3427, "step": 3835000 }, { "epoch": 19.0, "eval_accuracy": 0.6510998864965143, "eval_accuracy_mlm": 0.6054676656033432, "eval_accuracy_nsp": 0.8664373487501912, "eval_loss": 2.3667972087860107, "eval_runtime": 145.68, "eval_samples_per_second": 1750.131, "eval_steps_per_second": 72.927, "step": 3835017 }, { "epoch": 19.0, "learning_rate": 4.050221954687554e-05, "loss": 2.3276, "step": 3835500 }, { "epoch": 19.0, "learning_rate": 4.0500983437622314e-05, "loss": 2.3316, "step": 3836000 }, { "epoch": 19.01, "learning_rate": 4.049974485119623e-05, "loss": 2.3063, "step": 3836500 }, { "epoch": 19.01, "learning_rate": 4.049850626477015e-05, "loss": 2.3161, "step": 3837000 }, { "epoch": 19.01, "learning_rate": 4.0497267678344065e-05, "loss": 2.3319, "step": 3837500 }, { "epoch": 19.01, "learning_rate": 4.0496031569090834e-05, "loss": 2.3158, "step": 3838000 }, { "epoch": 19.02, "learning_rate": 4.049479298266475e-05, "loss": 2.3323, "step": 3838500 }, { "epoch": 19.02, "learning_rate": 4.049355439623867e-05, "loss": 2.3132, "step": 3839000 }, { "epoch": 19.02, "learning_rate": 4.049231580981258e-05, "loss": 2.3162, "step": 3839500 }, { "epoch": 19.02, "learning_rate": 4.0491077223386495e-05, "loss": 2.3272, "step": 3840000 }, { "epoch": 19.03, "learning_rate": 4.048983863696041e-05, "loss": 2.3237, "step": 3840500 }, { "epoch": 19.03, "learning_rate": 4.048860005053433e-05, "loss": 2.3282, "step": 3841000 }, { "epoch": 19.03, "learning_rate": 4.0487361464108245e-05, "loss": 2.3251, "step": 3841500 }, { "epoch": 19.03, "learning_rate": 4.0486122877682156e-05, "loss": 2.3182, "step": 3842000 }, { "epoch": 19.04, "learning_rate": 4.048488429125607e-05, "loss": 2.3366, "step": 3842500 }, { "epoch": 19.04, "learning_rate": 4.048364570482999e-05, "loss": 2.312, "step": 3843000 }, { "epoch": 19.04, "learning_rate": 4.0482407118403906e-05, "loss": 2.3254, "step": 3843500 }, { "epoch": 19.04, "learning_rate": 4.048116853197782e-05, "loss": 2.3187, "step": 3844000 }, { "epoch": 19.05, "learning_rate": 4.047992994555174e-05, "loss": 2.3117, "step": 3844500 }, { "epoch": 19.05, "learning_rate": 4.047869135912566e-05, "loss": 2.3228, "step": 3845000 }, { "epoch": 19.05, "learning_rate": 4.0477452772699574e-05, "loss": 2.301, "step": 3845500 }, { "epoch": 19.05, "learning_rate": 4.047621666344634e-05, "loss": 2.3024, "step": 3846000 }, { "epoch": 19.06, "learning_rate": 4.047497807702026e-05, "loss": 2.3125, "step": 3846500 }, { "epoch": 19.06, "learning_rate": 4.047373949059418e-05, "loss": 2.3355, "step": 3847000 }, { "epoch": 19.06, "learning_rate": 4.0472500904168094e-05, "loss": 2.3184, "step": 3847500 }, { "epoch": 19.06, "learning_rate": 4.047126231774201e-05, "loss": 2.2946, "step": 3848000 }, { "epoch": 19.07, "learning_rate": 4.047002620848878e-05, "loss": 2.3503, "step": 3848500 }, { "epoch": 19.07, "learning_rate": 4.046878762206269e-05, "loss": 2.3138, "step": 3849000 }, { "epoch": 19.07, "learning_rate": 4.0467549035636606e-05, "loss": 2.3092, "step": 3849500 }, { "epoch": 19.07, "learning_rate": 4.046631044921052e-05, "loss": 2.3133, "step": 3850000 }, { "epoch": 19.08, "learning_rate": 4.046507186278444e-05, "loss": 2.337, "step": 3850500 }, { "epoch": 19.08, "learning_rate": 4.046383327635836e-05, "loss": 2.3352, "step": 3851000 }, { "epoch": 19.08, "learning_rate": 4.0462594689932274e-05, "loss": 2.3084, "step": 3851500 }, { "epoch": 19.08, "learning_rate": 4.046135858067904e-05, "loss": 2.3219, "step": 3852000 }, { "epoch": 19.09, "learning_rate": 4.046011999425296e-05, "loss": 2.3261, "step": 3852500 }, { "epoch": 19.09, "learning_rate": 4.045888140782688e-05, "loss": 2.3286, "step": 3853000 }, { "epoch": 19.09, "learning_rate": 4.0457642821400794e-05, "loss": 2.2986, "step": 3853500 }, { "epoch": 19.09, "learning_rate": 4.045640423497471e-05, "loss": 2.3174, "step": 3854000 }, { "epoch": 19.1, "learning_rate": 4.045516564854863e-05, "loss": 2.3252, "step": 3854500 }, { "epoch": 19.1, "learning_rate": 4.0453927062122545e-05, "loss": 2.3386, "step": 3855000 }, { "epoch": 19.1, "learning_rate": 4.0452690952869307e-05, "loss": 2.3323, "step": 3855500 }, { "epoch": 19.1, "learning_rate": 4.0451452366443223e-05, "loss": 2.3168, "step": 3856000 }, { "epoch": 19.11, "learning_rate": 4.045021378001714e-05, "loss": 2.3312, "step": 3856500 }, { "epoch": 19.11, "learning_rate": 4.044897519359106e-05, "loss": 2.3278, "step": 3857000 }, { "epoch": 19.11, "learning_rate": 4.0447736607164974e-05, "loss": 2.3539, "step": 3857500 }, { "epoch": 19.11, "learning_rate": 4.044649802073889e-05, "loss": 2.3532, "step": 3858000 }, { "epoch": 19.12, "learning_rate": 4.044525943431281e-05, "loss": 2.3201, "step": 3858500 }, { "epoch": 19.12, "learning_rate": 4.0444020847886725e-05, "loss": 2.2934, "step": 3859000 }, { "epoch": 19.12, "learning_rate": 4.044278226146064e-05, "loss": 2.3019, "step": 3859500 }, { "epoch": 19.12, "learning_rate": 4.044154367503456e-05, "loss": 2.3039, "step": 3860000 }, { "epoch": 19.13, "learning_rate": 4.0440305088608476e-05, "loss": 2.3172, "step": 3860500 }, { "epoch": 19.13, "learning_rate": 4.0439071456528097e-05, "loss": 2.3581, "step": 3861000 }, { "epoch": 19.13, "learning_rate": 4.0437832870102013e-05, "loss": 2.3188, "step": 3861500 }, { "epoch": 19.13, "learning_rate": 4.043659428367593e-05, "loss": 2.3124, "step": 3862000 }, { "epoch": 19.14, "learning_rate": 4.043535569724984e-05, "loss": 2.3261, "step": 3862500 }, { "epoch": 19.14, "learning_rate": 4.043411958799661e-05, "loss": 2.3299, "step": 3863000 }, { "epoch": 19.14, "learning_rate": 4.0432881001570526e-05, "loss": 2.3548, "step": 3863500 }, { "epoch": 19.14, "learning_rate": 4.043164241514444e-05, "loss": 2.3506, "step": 3864000 }, { "epoch": 19.15, "learning_rate": 4.043040382871836e-05, "loss": 2.3457, "step": 3864500 }, { "epoch": 19.15, "learning_rate": 4.0429167719465136e-05, "loss": 2.3462, "step": 3865000 }, { "epoch": 19.15, "learning_rate": 4.042792913303905e-05, "loss": 2.3082, "step": 3865500 }, { "epoch": 19.15, "learning_rate": 4.042669054661296e-05, "loss": 2.3479, "step": 3866000 }, { "epoch": 19.16, "learning_rate": 4.042545196018688e-05, "loss": 2.3207, "step": 3866500 }, { "epoch": 19.16, "learning_rate": 4.042421585093365e-05, "loss": 2.332, "step": 3867000 }, { "epoch": 19.16, "learning_rate": 4.0422977264507565e-05, "loss": 2.3244, "step": 3867500 }, { "epoch": 19.16, "learning_rate": 4.042173867808148e-05, "loss": 2.3418, "step": 3868000 }, { "epoch": 19.17, "learning_rate": 4.04205000916554e-05, "loss": 2.3337, "step": 3868500 }, { "epoch": 19.17, "learning_rate": 4.041926150522931e-05, "loss": 2.3363, "step": 3869000 }, { "epoch": 19.17, "learning_rate": 4.0418022918803226e-05, "loss": 2.3161, "step": 3869500 }, { "epoch": 19.17, "learning_rate": 4.041678433237714e-05, "loss": 2.3293, "step": 3870000 }, { "epoch": 19.18, "learning_rate": 4.041554574595106e-05, "loss": 2.3562, "step": 3870500 }, { "epoch": 19.18, "learning_rate": 4.041430715952498e-05, "loss": 2.3249, "step": 3871000 }, { "epoch": 19.18, "learning_rate": 4.0413068573098894e-05, "loss": 2.3386, "step": 3871500 }, { "epoch": 19.18, "learning_rate": 4.041182998667281e-05, "loss": 2.3363, "step": 3872000 }, { "epoch": 19.19, "learning_rate": 4.041059140024673e-05, "loss": 2.3261, "step": 3872500 }, { "epoch": 19.19, "learning_rate": 4.04093552909935e-05, "loss": 2.3164, "step": 3873000 }, { "epoch": 19.19, "learning_rate": 4.0408119181740266e-05, "loss": 2.3363, "step": 3873500 }, { "epoch": 19.19, "learning_rate": 4.040688059531418e-05, "loss": 2.3191, "step": 3874000 }, { "epoch": 19.2, "learning_rate": 4.04056420088881e-05, "loss": 2.3325, "step": 3874500 }, { "epoch": 19.2, "learning_rate": 4.0404403422462016e-05, "loss": 2.3294, "step": 3875000 }, { "epoch": 19.2, "learning_rate": 4.0403164836035927e-05, "loss": 2.348, "step": 3875500 }, { "epoch": 19.2, "learning_rate": 4.0401926249609843e-05, "loss": 2.3279, "step": 3876000 }, { "epoch": 19.21, "learning_rate": 4.040068766318376e-05, "loss": 2.3494, "step": 3876500 }, { "epoch": 19.21, "learning_rate": 4.0399451553930536e-05, "loss": 2.3361, "step": 3877000 }, { "epoch": 19.21, "learning_rate": 4.039821296750445e-05, "loss": 2.329, "step": 3877500 }, { "epoch": 19.21, "learning_rate": 4.039697438107837e-05, "loss": 2.3137, "step": 3878000 }, { "epoch": 19.22, "learning_rate": 4.039573579465228e-05, "loss": 2.3535, "step": 3878500 }, { "epoch": 19.22, "learning_rate": 4.03944972082262e-05, "loss": 2.3296, "step": 3879000 }, { "epoch": 19.22, "learning_rate": 4.0393258621800114e-05, "loss": 2.3558, "step": 3879500 }, { "epoch": 19.22, "learning_rate": 4.039202251254688e-05, "loss": 2.3215, "step": 3880000 }, { "epoch": 19.23, "learning_rate": 4.03907839261208e-05, "loss": 2.3023, "step": 3880500 }, { "epoch": 19.23, "learning_rate": 4.038954781686757e-05, "loss": 2.3208, "step": 3881000 }, { "epoch": 19.23, "learning_rate": 4.0388309230441485e-05, "loss": 2.3491, "step": 3881500 }, { "epoch": 19.23, "learning_rate": 4.0387073121188254e-05, "loss": 2.3284, "step": 3882000 }, { "epoch": 19.24, "learning_rate": 4.038583453476217e-05, "loss": 2.3285, "step": 3882500 }, { "epoch": 19.24, "learning_rate": 4.038459594833609e-05, "loss": 2.3254, "step": 3883000 }, { "epoch": 19.24, "learning_rate": 4.038335736191e-05, "loss": 2.3343, "step": 3883500 }, { "epoch": 19.24, "learning_rate": 4.0382118775483915e-05, "loss": 2.3346, "step": 3884000 }, { "epoch": 19.25, "learning_rate": 4.038088018905783e-05, "loss": 2.3331, "step": 3884500 }, { "epoch": 19.25, "learning_rate": 4.037964160263175e-05, "loss": 2.3303, "step": 3885000 }, { "epoch": 19.25, "learning_rate": 4.0378403016205666e-05, "loss": 2.3329, "step": 3885500 }, { "epoch": 19.25, "learning_rate": 4.037716442977958e-05, "loss": 2.3502, "step": 3886000 }, { "epoch": 19.26, "learning_rate": 4.03759258433535e-05, "loss": 2.3128, "step": 3886500 }, { "epoch": 19.26, "learning_rate": 4.0374687256927417e-05, "loss": 2.3384, "step": 3887000 }, { "epoch": 19.26, "learning_rate": 4.0373448670501334e-05, "loss": 2.3581, "step": 3887500 }, { "epoch": 19.26, "learning_rate": 4.0372210084075244e-05, "loss": 2.3329, "step": 3888000 }, { "epoch": 19.26, "learning_rate": 4.037097397482202e-05, "loss": 2.3289, "step": 3888500 }, { "epoch": 19.27, "learning_rate": 4.0369735388395936e-05, "loss": 2.3199, "step": 3889000 }, { "epoch": 19.27, "learning_rate": 4.036849680196985e-05, "loss": 2.3655, "step": 3889500 }, { "epoch": 19.27, "learning_rate": 4.036725821554377e-05, "loss": 2.3308, "step": 3890000 }, { "epoch": 19.27, "learning_rate": 4.036601962911769e-05, "loss": 2.3335, "step": 3890500 }, { "epoch": 19.28, "learning_rate": 4.03647810426916e-05, "loss": 2.3548, "step": 3891000 }, { "epoch": 19.28, "learning_rate": 4.0363542456265514e-05, "loss": 2.3258, "step": 3891500 }, { "epoch": 19.28, "learning_rate": 4.036230386983943e-05, "loss": 2.3499, "step": 3892000 }, { "epoch": 19.28, "learning_rate": 4.036106528341335e-05, "loss": 2.3421, "step": 3892500 }, { "epoch": 19.29, "learning_rate": 4.035982917416012e-05, "loss": 2.3172, "step": 3893000 }, { "epoch": 19.29, "learning_rate": 4.0358593064906886e-05, "loss": 2.3416, "step": 3893500 }, { "epoch": 19.29, "learning_rate": 4.03573544784808e-05, "loss": 2.3245, "step": 3894000 }, { "epoch": 19.29, "learning_rate": 4.035611589205472e-05, "loss": 2.3291, "step": 3894500 }, { "epoch": 19.3, "learning_rate": 4.035487978280149e-05, "loss": 2.3485, "step": 3895000 }, { "epoch": 19.3, "learning_rate": 4.0353641196375405e-05, "loss": 2.3216, "step": 3895500 }, { "epoch": 19.3, "learning_rate": 4.035240260994932e-05, "loss": 2.352, "step": 3896000 }, { "epoch": 19.3, "learning_rate": 4.035116402352324e-05, "loss": 2.3244, "step": 3896500 }, { "epoch": 19.31, "learning_rate": 4.034992543709715e-05, "loss": 2.347, "step": 3897000 }, { "epoch": 19.31, "learning_rate": 4.0348686850671066e-05, "loss": 2.3369, "step": 3897500 }, { "epoch": 19.31, "learning_rate": 4.034744826424498e-05, "loss": 2.31, "step": 3898000 }, { "epoch": 19.31, "learning_rate": 4.03462096778189e-05, "loss": 2.3318, "step": 3898500 }, { "epoch": 19.32, "learning_rate": 4.034497109139282e-05, "loss": 2.3256, "step": 3899000 }, { "epoch": 19.32, "learning_rate": 4.0343732504966734e-05, "loss": 2.3221, "step": 3899500 }, { "epoch": 19.32, "learning_rate": 4.03424963957135e-05, "loss": 2.3195, "step": 3900000 }, { "epoch": 19.32, "learning_rate": 4.034125780928742e-05, "loss": 2.3262, "step": 3900500 }, { "epoch": 19.33, "learning_rate": 4.0340019222861336e-05, "loss": 2.3251, "step": 3901000 }, { "epoch": 19.33, "learning_rate": 4.033878063643525e-05, "loss": 2.3431, "step": 3901500 }, { "epoch": 19.33, "learning_rate": 4.033754452718202e-05, "loss": 2.3286, "step": 3902000 }, { "epoch": 19.33, "learning_rate": 4.033630594075594e-05, "loss": 2.3164, "step": 3902500 }, { "epoch": 19.34, "learning_rate": 4.03350698315027e-05, "loss": 2.3303, "step": 3903000 }, { "epoch": 19.34, "learning_rate": 4.033383124507662e-05, "loss": 2.3216, "step": 3903500 }, { "epoch": 19.34, "learning_rate": 4.033259513582339e-05, "loss": 2.3386, "step": 3904000 }, { "epoch": 19.34, "learning_rate": 4.0331356549397304e-05, "loss": 2.3393, "step": 3904500 }, { "epoch": 19.35, "learning_rate": 4.033011796297122e-05, "loss": 2.342, "step": 3905000 }, { "epoch": 19.35, "learning_rate": 4.032887937654514e-05, "loss": 2.3289, "step": 3905500 }, { "epoch": 19.35, "learning_rate": 4.0327640790119055e-05, "loss": 2.3325, "step": 3906000 }, { "epoch": 19.35, "learning_rate": 4.032640220369297e-05, "loss": 2.3331, "step": 3906500 }, { "epoch": 19.36, "learning_rate": 4.032516361726689e-05, "loss": 2.3308, "step": 3907000 }, { "epoch": 19.36, "learning_rate": 4.0323925030840805e-05, "loss": 2.3238, "step": 3907500 }, { "epoch": 19.36, "learning_rate": 4.032268644441472e-05, "loss": 2.3196, "step": 3908000 }, { "epoch": 19.36, "learning_rate": 4.032144785798864e-05, "loss": 2.3406, "step": 3908500 }, { "epoch": 19.37, "learning_rate": 4.0320209271562556e-05, "loss": 2.3241, "step": 3909000 }, { "epoch": 19.37, "learning_rate": 4.031897068513647e-05, "loss": 2.3429, "step": 3909500 }, { "epoch": 19.37, "learning_rate": 4.031773209871039e-05, "loss": 2.3428, "step": 3910000 }, { "epoch": 19.37, "learning_rate": 4.03164935122843e-05, "loss": 2.3308, "step": 3910500 }, { "epoch": 19.38, "learning_rate": 4.031525492585822e-05, "loss": 2.3214, "step": 3911000 }, { "epoch": 19.38, "learning_rate": 4.0314016339432134e-05, "loss": 2.3418, "step": 3911500 }, { "epoch": 19.38, "learning_rate": 4.031277775300605e-05, "loss": 2.3363, "step": 3912000 }, { "epoch": 19.38, "learning_rate": 4.031153916657997e-05, "loss": 2.3595, "step": 3912500 }, { "epoch": 19.39, "learning_rate": 4.031030058015388e-05, "loss": 2.3229, "step": 3913000 }, { "epoch": 19.39, "learning_rate": 4.0309064470900654e-05, "loss": 2.323, "step": 3913500 }, { "epoch": 19.39, "learning_rate": 4.030782836164742e-05, "loss": 2.3263, "step": 3914000 }, { "epoch": 19.39, "learning_rate": 4.030658977522134e-05, "loss": 2.3366, "step": 3914500 }, { "epoch": 19.4, "learning_rate": 4.0305351188795256e-05, "loss": 2.3512, "step": 3915000 }, { "epoch": 19.4, "learning_rate": 4.030411260236917e-05, "loss": 2.3201, "step": 3915500 }, { "epoch": 19.4, "learning_rate": 4.030287401594309e-05, "loss": 2.338, "step": 3916000 }, { "epoch": 19.4, "learning_rate": 4.030163542951701e-05, "loss": 2.3361, "step": 3916500 }, { "epoch": 19.41, "learning_rate": 4.0300396843090924e-05, "loss": 2.3406, "step": 3917000 }, { "epoch": 19.41, "learning_rate": 4.0299158256664834e-05, "loss": 2.3373, "step": 3917500 }, { "epoch": 19.41, "learning_rate": 4.029791967023875e-05, "loss": 2.3003, "step": 3918000 }, { "epoch": 19.41, "learning_rate": 4.029668108381267e-05, "loss": 2.3411, "step": 3918500 }, { "epoch": 19.42, "learning_rate": 4.029544497455944e-05, "loss": 2.3372, "step": 3919000 }, { "epoch": 19.42, "learning_rate": 4.0294206388133354e-05, "loss": 2.3438, "step": 3919500 }, { "epoch": 19.42, "learning_rate": 4.029296780170727e-05, "loss": 2.3411, "step": 3920000 }, { "epoch": 19.42, "learning_rate": 4.029172921528119e-05, "loss": 2.3428, "step": 3920500 }, { "epoch": 19.43, "learning_rate": 4.0290493106027956e-05, "loss": 2.3244, "step": 3921000 }, { "epoch": 19.43, "learning_rate": 4.028925451960187e-05, "loss": 2.339, "step": 3921500 }, { "epoch": 19.43, "learning_rate": 4.028801593317579e-05, "loss": 2.3394, "step": 3922000 }, { "epoch": 19.43, "learning_rate": 4.028677734674971e-05, "loss": 2.3725, "step": 3922500 }, { "epoch": 19.44, "learning_rate": 4.028554123749647e-05, "loss": 2.3452, "step": 3923000 }, { "epoch": 19.44, "learning_rate": 4.0284302651070386e-05, "loss": 2.307, "step": 3923500 }, { "epoch": 19.44, "learning_rate": 4.0283066541817155e-05, "loss": 2.3612, "step": 3924000 }, { "epoch": 19.44, "learning_rate": 4.028182795539107e-05, "loss": 2.3514, "step": 3924500 }, { "epoch": 19.45, "learning_rate": 4.028058936896499e-05, "loss": 2.3375, "step": 3925000 }, { "epoch": 19.45, "learning_rate": 4.0279350782538906e-05, "loss": 2.342, "step": 3925500 }, { "epoch": 19.45, "learning_rate": 4.027811219611282e-05, "loss": 2.3437, "step": 3926000 }, { "epoch": 19.45, "learning_rate": 4.027687608685959e-05, "loss": 2.3342, "step": 3926500 }, { "epoch": 19.46, "learning_rate": 4.027563750043351e-05, "loss": 2.3393, "step": 3927000 }, { "epoch": 19.46, "learning_rate": 4.027439891400742e-05, "loss": 2.3505, "step": 3927500 }, { "epoch": 19.46, "learning_rate": 4.0273160327581335e-05, "loss": 2.3358, "step": 3928000 }, { "epoch": 19.46, "learning_rate": 4.027192174115525e-05, "loss": 2.3349, "step": 3928500 }, { "epoch": 19.47, "learning_rate": 4.027068315472917e-05, "loss": 2.3537, "step": 3929000 }, { "epoch": 19.47, "learning_rate": 4.026944704547594e-05, "loss": 2.323, "step": 3929500 }, { "epoch": 19.47, "learning_rate": 4.0268208459049855e-05, "loss": 2.3303, "step": 3930000 }, { "epoch": 19.47, "learning_rate": 4.026696987262377e-05, "loss": 2.3301, "step": 3930500 }, { "epoch": 19.48, "learning_rate": 4.026573128619769e-05, "loss": 2.3492, "step": 3931000 }, { "epoch": 19.48, "learning_rate": 4.0264492699771606e-05, "loss": 2.3522, "step": 3931500 }, { "epoch": 19.48, "learning_rate": 4.026325411334552e-05, "loss": 2.3272, "step": 3932000 }, { "epoch": 19.48, "learning_rate": 4.026201552691944e-05, "loss": 2.3068, "step": 3932500 }, { "epoch": 19.49, "learning_rate": 4.026077694049336e-05, "loss": 2.3614, "step": 3933000 }, { "epoch": 19.49, "learning_rate": 4.0259540831240125e-05, "loss": 2.3525, "step": 3933500 }, { "epoch": 19.49, "learning_rate": 4.0258304721986894e-05, "loss": 2.3418, "step": 3934000 }, { "epoch": 19.49, "learning_rate": 4.025706613556081e-05, "loss": 2.3367, "step": 3934500 }, { "epoch": 19.5, "learning_rate": 4.025582754913473e-05, "loss": 2.3377, "step": 3935000 }, { "epoch": 19.5, "learning_rate": 4.025458896270864e-05, "loss": 2.3424, "step": 3935500 }, { "epoch": 19.5, "learning_rate": 4.0253350376282555e-05, "loss": 2.3364, "step": 3936000 }, { "epoch": 19.5, "learning_rate": 4.025211178985647e-05, "loss": 2.3356, "step": 3936500 }, { "epoch": 19.51, "learning_rate": 4.025087320343039e-05, "loss": 2.3515, "step": 3937000 }, { "epoch": 19.51, "learning_rate": 4.0249637094177165e-05, "loss": 2.3304, "step": 3937500 }, { "epoch": 19.51, "learning_rate": 4.024839850775108e-05, "loss": 2.3372, "step": 3938000 }, { "epoch": 19.51, "learning_rate": 4.024715992132499e-05, "loss": 2.3153, "step": 3938500 }, { "epoch": 19.52, "learning_rate": 4.024592133489891e-05, "loss": 2.3227, "step": 3939000 }, { "epoch": 19.52, "learning_rate": 4.024468522564568e-05, "loss": 2.3313, "step": 3939500 }, { "epoch": 19.52, "learning_rate": 4.0243446639219594e-05, "loss": 2.3482, "step": 3940000 }, { "epoch": 19.52, "learning_rate": 4.024221052996636e-05, "loss": 2.305, "step": 3940500 }, { "epoch": 19.53, "learning_rate": 4.024097194354028e-05, "loss": 2.3352, "step": 3941000 }, { "epoch": 19.53, "learning_rate": 4.02397333571142e-05, "loss": 2.3291, "step": 3941500 }, { "epoch": 19.53, "learning_rate": 4.0238494770688114e-05, "loss": 2.333, "step": 3942000 }, { "epoch": 19.53, "learning_rate": 4.0237258661434876e-05, "loss": 2.3466, "step": 3942500 }, { "epoch": 19.53, "learning_rate": 4.023602007500879e-05, "loss": 2.3585, "step": 3943000 }, { "epoch": 19.54, "learning_rate": 4.023478148858271e-05, "loss": 2.3453, "step": 3943500 }, { "epoch": 19.54, "learning_rate": 4.023354290215663e-05, "loss": 2.3602, "step": 3944000 }, { "epoch": 19.54, "learning_rate": 4.0232304315730544e-05, "loss": 2.3127, "step": 3944500 }, { "epoch": 19.54, "learning_rate": 4.023106572930446e-05, "loss": 2.331, "step": 3945000 }, { "epoch": 19.55, "learning_rate": 4.022982714287838e-05, "loss": 2.3243, "step": 3945500 }, { "epoch": 19.55, "learning_rate": 4.0228588556452295e-05, "loss": 2.3414, "step": 3946000 }, { "epoch": 19.55, "learning_rate": 4.022734997002621e-05, "loss": 2.3321, "step": 3946500 }, { "epoch": 19.55, "learning_rate": 4.022611138360013e-05, "loss": 2.3295, "step": 3947000 }, { "epoch": 19.56, "learning_rate": 4.0224872797174045e-05, "loss": 2.3272, "step": 3947500 }, { "epoch": 19.56, "learning_rate": 4.0223634210747955e-05, "loss": 2.3229, "step": 3948000 }, { "epoch": 19.56, "learning_rate": 4.022239810149473e-05, "loss": 2.3459, "step": 3948500 }, { "epoch": 19.56, "learning_rate": 4.022115951506865e-05, "loss": 2.3256, "step": 3949000 }, { "epoch": 19.57, "learning_rate": 4.0219920928642565e-05, "loss": 2.3478, "step": 3949500 }, { "epoch": 19.57, "learning_rate": 4.021868234221648e-05, "loss": 2.3261, "step": 3950000 }, { "epoch": 19.57, "learning_rate": 4.02174437557904e-05, "loss": 2.3297, "step": 3950500 }, { "epoch": 19.57, "learning_rate": 4.021620764653716e-05, "loss": 2.326, "step": 3951000 }, { "epoch": 19.58, "learning_rate": 4.021496906011108e-05, "loss": 2.33, "step": 3951500 }, { "epoch": 19.58, "learning_rate": 4.0213730473684995e-05, "loss": 2.3451, "step": 3952000 }, { "epoch": 19.58, "learning_rate": 4.021249188725891e-05, "loss": 2.3306, "step": 3952500 }, { "epoch": 19.58, "learning_rate": 4.021125330083283e-05, "loss": 2.3403, "step": 3953000 }, { "epoch": 19.59, "learning_rate": 4.0210014714406745e-05, "loss": 2.3302, "step": 3953500 }, { "epoch": 19.59, "learning_rate": 4.0208776127980656e-05, "loss": 2.3446, "step": 3954000 }, { "epoch": 19.59, "learning_rate": 4.020754001872743e-05, "loss": 2.3467, "step": 3954500 }, { "epoch": 19.59, "learning_rate": 4.020630143230135e-05, "loss": 2.3475, "step": 3955000 }, { "epoch": 19.6, "learning_rate": 4.0205062845875265e-05, "loss": 2.3296, "step": 3955500 }, { "epoch": 19.6, "learning_rate": 4.020382425944918e-05, "loss": 2.3456, "step": 3956000 }, { "epoch": 19.6, "learning_rate": 4.02025856730231e-05, "loss": 2.3194, "step": 3956500 }, { "epoch": 19.6, "learning_rate": 4.020134956376986e-05, "loss": 2.3377, "step": 3957000 }, { "epoch": 19.61, "learning_rate": 4.020011097734378e-05, "loss": 2.3476, "step": 3957500 }, { "epoch": 19.61, "learning_rate": 4.0198872390917695e-05, "loss": 2.3331, "step": 3958000 }, { "epoch": 19.61, "learning_rate": 4.0197636281664464e-05, "loss": 2.3387, "step": 3958500 }, { "epoch": 19.61, "learning_rate": 4.019639769523838e-05, "loss": 2.3505, "step": 3959000 }, { "epoch": 19.62, "learning_rate": 4.01951591088123e-05, "loss": 2.3324, "step": 3959500 }, { "epoch": 19.62, "learning_rate": 4.0193920522386214e-05, "loss": 2.32, "step": 3960000 }, { "epoch": 19.62, "learning_rate": 4.019268193596013e-05, "loss": 2.3419, "step": 3960500 }, { "epoch": 19.62, "learning_rate": 4.019144334953405e-05, "loss": 2.3614, "step": 3961000 }, { "epoch": 19.63, "learning_rate": 4.0190204763107965e-05, "loss": 2.338, "step": 3961500 }, { "epoch": 19.63, "learning_rate": 4.018896865385473e-05, "loss": 2.3367, "step": 3962000 }, { "epoch": 19.63, "learning_rate": 4.0187730067428644e-05, "loss": 2.3342, "step": 3962500 }, { "epoch": 19.63, "learning_rate": 4.018649148100256e-05, "loss": 2.3435, "step": 3963000 }, { "epoch": 19.64, "learning_rate": 4.018525537174933e-05, "loss": 2.3708, "step": 3963500 }, { "epoch": 19.64, "learning_rate": 4.018401678532325e-05, "loss": 2.3477, "step": 3964000 }, { "epoch": 19.64, "learning_rate": 4.0182780676070016e-05, "loss": 2.3401, "step": 3964500 }, { "epoch": 19.64, "learning_rate": 4.018154208964393e-05, "loss": 2.2965, "step": 3965000 }, { "epoch": 19.65, "learning_rate": 4.018030350321785e-05, "loss": 2.3713, "step": 3965500 }, { "epoch": 19.65, "learning_rate": 4.0179064916791766e-05, "loss": 2.3456, "step": 3966000 }, { "epoch": 19.65, "learning_rate": 4.017782633036568e-05, "loss": 2.3476, "step": 3966500 }, { "epoch": 19.65, "learning_rate": 4.01765877439396e-05, "loss": 2.3343, "step": 3967000 }, { "epoch": 19.66, "learning_rate": 4.017534915751352e-05, "loss": 2.3137, "step": 3967500 }, { "epoch": 19.66, "learning_rate": 4.0174110571087434e-05, "loss": 2.3458, "step": 3968000 }, { "epoch": 19.66, "learning_rate": 4.017287198466135e-05, "loss": 2.3405, "step": 3968500 }, { "epoch": 19.66, "learning_rate": 4.017163339823526e-05, "loss": 2.3362, "step": 3969000 }, { "epoch": 19.67, "learning_rate": 4.017039481180918e-05, "loss": 2.3175, "step": 3969500 }, { "epoch": 19.67, "learning_rate": 4.0169156225383095e-05, "loss": 2.3659, "step": 3970000 }, { "epoch": 19.67, "learning_rate": 4.016791763895701e-05, "loss": 2.3572, "step": 3970500 }, { "epoch": 19.67, "learning_rate": 4.016668400687663e-05, "loss": 2.3597, "step": 3971000 }, { "epoch": 19.68, "learning_rate": 4.016544542045055e-05, "loss": 2.3242, "step": 3971500 }, { "epoch": 19.68, "learning_rate": 4.0164206834024466e-05, "loss": 2.3366, "step": 3972000 }, { "epoch": 19.68, "learning_rate": 4.0162968247598383e-05, "loss": 2.3619, "step": 3972500 }, { "epoch": 19.68, "learning_rate": 4.01617296611723e-05, "loss": 2.3342, "step": 3973000 }, { "epoch": 19.69, "learning_rate": 4.016049107474622e-05, "loss": 2.3261, "step": 3973500 }, { "epoch": 19.69, "learning_rate": 4.0159252488320134e-05, "loss": 2.3447, "step": 3974000 }, { "epoch": 19.69, "learning_rate": 4.015801390189405e-05, "loss": 2.3384, "step": 3974500 }, { "epoch": 19.69, "learning_rate": 4.015677531546797e-05, "loss": 2.3568, "step": 3975000 }, { "epoch": 19.7, "learning_rate": 4.015553672904188e-05, "loss": 2.3522, "step": 3975500 }, { "epoch": 19.7, "learning_rate": 4.0154298142615795e-05, "loss": 2.3391, "step": 3976000 }, { "epoch": 19.7, "learning_rate": 4.015305955618971e-05, "loss": 2.3405, "step": 3976500 }, { "epoch": 19.7, "learning_rate": 4.015182096976363e-05, "loss": 2.3432, "step": 3977000 }, { "epoch": 19.71, "learning_rate": 4.0150582383337546e-05, "loss": 2.3234, "step": 3977500 }, { "epoch": 19.71, "learning_rate": 4.0149346274084315e-05, "loss": 2.3282, "step": 3978000 }, { "epoch": 19.71, "learning_rate": 4.014810768765823e-05, "loss": 2.3459, "step": 3978500 }, { "epoch": 19.71, "learning_rate": 4.014686910123215e-05, "loss": 2.3324, "step": 3979000 }, { "epoch": 19.72, "learning_rate": 4.0145630514806066e-05, "loss": 2.3331, "step": 3979500 }, { "epoch": 19.72, "learning_rate": 4.014439192837998e-05, "loss": 2.3306, "step": 3980000 }, { "epoch": 19.72, "learning_rate": 4.014315581912675e-05, "loss": 2.3346, "step": 3980500 }, { "epoch": 19.72, "learning_rate": 4.014191723270067e-05, "loss": 2.3382, "step": 3981000 }, { "epoch": 19.73, "learning_rate": 4.0140678646274585e-05, "loss": 2.3328, "step": 3981500 }, { "epoch": 19.73, "learning_rate": 4.01394400598485e-05, "loss": 2.3343, "step": 3982000 }, { "epoch": 19.73, "learning_rate": 4.013820147342241e-05, "loss": 2.3313, "step": 3982500 }, { "epoch": 19.73, "learning_rate": 4.013696288699633e-05, "loss": 2.3678, "step": 3983000 }, { "epoch": 19.74, "learning_rate": 4.0135724300570246e-05, "loss": 2.3607, "step": 3983500 }, { "epoch": 19.74, "learning_rate": 4.013448571414416e-05, "loss": 2.3278, "step": 3984000 }, { "epoch": 19.74, "learning_rate": 4.013324712771808e-05, "loss": 2.3487, "step": 3984500 }, { "epoch": 19.74, "learning_rate": 4.0132008541292e-05, "loss": 2.3366, "step": 3985000 }, { "epoch": 19.75, "learning_rate": 4.013076995486591e-05, "loss": 2.364, "step": 3985500 }, { "epoch": 19.75, "learning_rate": 4.012953384561268e-05, "loss": 2.3186, "step": 3986000 }, { "epoch": 19.75, "learning_rate": 4.012829773635945e-05, "loss": 2.3758, "step": 3986500 }, { "epoch": 19.75, "learning_rate": 4.012705914993337e-05, "loss": 2.3873, "step": 3987000 }, { "epoch": 19.76, "learning_rate": 4.0125820563507285e-05, "loss": 2.3456, "step": 3987500 }, { "epoch": 19.76, "learning_rate": 4.01245819770812e-05, "loss": 2.342, "step": 3988000 }, { "epoch": 19.76, "learning_rate": 4.012334339065512e-05, "loss": 2.3393, "step": 3988500 }, { "epoch": 19.76, "learning_rate": 4.012210480422903e-05, "loss": 2.3263, "step": 3989000 }, { "epoch": 19.77, "learning_rate": 4.0120866217802946e-05, "loss": 2.3438, "step": 3989500 }, { "epoch": 19.77, "learning_rate": 4.011962763137686e-05, "loss": 2.348, "step": 3990000 }, { "epoch": 19.77, "learning_rate": 4.011838904495078e-05, "loss": 2.3395, "step": 3990500 }, { "epoch": 19.77, "learning_rate": 4.011715293569755e-05, "loss": 2.3636, "step": 3991000 }, { "epoch": 19.78, "learning_rate": 4.0115914349271466e-05, "loss": 2.3862, "step": 3991500 }, { "epoch": 19.78, "learning_rate": 4.0114678240018235e-05, "loss": 2.3051, "step": 3992000 }, { "epoch": 19.78, "learning_rate": 4.011343965359215e-05, "loss": 2.3611, "step": 3992500 }, { "epoch": 19.78, "learning_rate": 4.011220106716607e-05, "loss": 2.3512, "step": 3993000 }, { "epoch": 19.79, "learning_rate": 4.0110962480739985e-05, "loss": 2.3359, "step": 3993500 }, { "epoch": 19.79, "learning_rate": 4.01097238943139e-05, "loss": 2.3261, "step": 3994000 }, { "epoch": 19.79, "learning_rate": 4.010848530788782e-05, "loss": 2.369, "step": 3994500 }, { "epoch": 19.79, "learning_rate": 4.010724919863458e-05, "loss": 2.3553, "step": 3995000 }, { "epoch": 19.8, "learning_rate": 4.01060106122085e-05, "loss": 2.3294, "step": 3995500 }, { "epoch": 19.8, "learning_rate": 4.0104772025782415e-05, "loss": 2.3247, "step": 3996000 }, { "epoch": 19.8, "learning_rate": 4.010353343935633e-05, "loss": 2.3215, "step": 3996500 }, { "epoch": 19.8, "learning_rate": 4.010229485293025e-05, "loss": 2.3778, "step": 3997000 }, { "epoch": 19.8, "learning_rate": 4.0101056266504166e-05, "loss": 2.3558, "step": 3997500 }, { "epoch": 19.81, "learning_rate": 4.009981768007808e-05, "loss": 2.3269, "step": 3998000 }, { "epoch": 19.81, "learning_rate": 4.0098579093652e-05, "loss": 2.3752, "step": 3998500 }, { "epoch": 19.81, "learning_rate": 4.009734050722592e-05, "loss": 2.3359, "step": 3999000 }, { "epoch": 19.81, "learning_rate": 4.0096101920799834e-05, "loss": 2.3454, "step": 3999500 }, { "epoch": 19.82, "learning_rate": 4.009486333437375e-05, "loss": 2.3231, "step": 4000000 }, { "epoch": 19.82, "learning_rate": 4.009362474794767e-05, "loss": 2.3238, "step": 4000500 }, { "epoch": 19.82, "learning_rate": 4.009238616152158e-05, "loss": 2.3168, "step": 4001000 }, { "epoch": 19.82, "learning_rate": 4.00911525294412e-05, "loss": 2.3225, "step": 4001500 }, { "epoch": 19.83, "learning_rate": 4.0089913943015115e-05, "loss": 2.3095, "step": 4002000 }, { "epoch": 19.83, "learning_rate": 4.008867535658903e-05, "loss": 2.3578, "step": 4002500 }, { "epoch": 19.83, "learning_rate": 4.008743677016295e-05, "loss": 2.3386, "step": 4003000 }, { "epoch": 19.83, "learning_rate": 4.0086198183736866e-05, "loss": 2.3406, "step": 4003500 }, { "epoch": 19.84, "learning_rate": 4.008496455165649e-05, "loss": 2.3178, "step": 4004000 }, { "epoch": 19.84, "learning_rate": 4.0083725965230404e-05, "loss": 2.3481, "step": 4004500 }, { "epoch": 19.84, "learning_rate": 4.008248737880432e-05, "loss": 2.3606, "step": 4005000 }, { "epoch": 19.84, "learning_rate": 4.008124879237824e-05, "loss": 2.3519, "step": 4005500 }, { "epoch": 19.85, "learning_rate": 4.008001020595215e-05, "loss": 2.3332, "step": 4006000 }, { "epoch": 19.85, "learning_rate": 4.0078771619526065e-05, "loss": 2.3432, "step": 4006500 }, { "epoch": 19.85, "learning_rate": 4.007753551027284e-05, "loss": 2.3641, "step": 4007000 }, { "epoch": 19.85, "learning_rate": 4.007629692384676e-05, "loss": 2.3344, "step": 4007500 }, { "epoch": 19.86, "learning_rate": 4.007505833742067e-05, "loss": 2.3358, "step": 4008000 }, { "epoch": 19.86, "learning_rate": 4.0073819750994584e-05, "loss": 2.3221, "step": 4008500 }, { "epoch": 19.86, "learning_rate": 4.00725811645685e-05, "loss": 2.3158, "step": 4009000 }, { "epoch": 19.86, "learning_rate": 4.007134505531528e-05, "loss": 2.3348, "step": 4009500 }, { "epoch": 19.87, "learning_rate": 4.0070106468889194e-05, "loss": 2.3408, "step": 4010000 }, { "epoch": 19.87, "learning_rate": 4.0068870359635956e-05, "loss": 2.339, "step": 4010500 }, { "epoch": 19.87, "learning_rate": 4.0067634250382724e-05, "loss": 2.3199, "step": 4011000 }, { "epoch": 19.87, "learning_rate": 4.006639566395664e-05, "loss": 2.3297, "step": 4011500 }, { "epoch": 19.88, "learning_rate": 4.006515707753056e-05, "loss": 2.3532, "step": 4012000 }, { "epoch": 19.88, "learning_rate": 4.0063918491104475e-05, "loss": 2.3398, "step": 4012500 }, { "epoch": 19.88, "learning_rate": 4.006267990467839e-05, "loss": 2.328, "step": 4013000 }, { "epoch": 19.88, "learning_rate": 4.006144131825231e-05, "loss": 2.3538, "step": 4013500 }, { "epoch": 19.89, "learning_rate": 4.0060202731826226e-05, "loss": 2.3285, "step": 4014000 }, { "epoch": 19.89, "learning_rate": 4.005896414540014e-05, "loss": 2.3619, "step": 4014500 }, { "epoch": 19.89, "learning_rate": 4.005772555897406e-05, "loss": 2.3521, "step": 4015000 }, { "epoch": 19.89, "learning_rate": 4.005648697254798e-05, "loss": 2.3502, "step": 4015500 }, { "epoch": 19.9, "learning_rate": 4.0055248386121894e-05, "loss": 2.3306, "step": 4016000 }, { "epoch": 19.9, "learning_rate": 4.0054012276868656e-05, "loss": 2.3506, "step": 4016500 }, { "epoch": 19.9, "learning_rate": 4.005277369044257e-05, "loss": 2.3259, "step": 4017000 }, { "epoch": 19.9, "learning_rate": 4.005153758118934e-05, "loss": 2.3518, "step": 4017500 }, { "epoch": 19.91, "learning_rate": 4.005029899476326e-05, "loss": 2.3261, "step": 4018000 }, { "epoch": 19.91, "learning_rate": 4.0049060408337175e-05, "loss": 2.3528, "step": 4018500 }, { "epoch": 19.91, "learning_rate": 4.004782182191109e-05, "loss": 2.3506, "step": 4019000 }, { "epoch": 19.91, "learning_rate": 4.004658323548501e-05, "loss": 2.3426, "step": 4019500 }, { "epoch": 19.92, "learning_rate": 4.0045344649058926e-05, "loss": 2.3404, "step": 4020000 }, { "epoch": 19.92, "learning_rate": 4.004410606263284e-05, "loss": 2.373, "step": 4020500 }, { "epoch": 19.92, "learning_rate": 4.004286747620676e-05, "loss": 2.3435, "step": 4021000 }, { "epoch": 19.92, "learning_rate": 4.004163136695352e-05, "loss": 2.329, "step": 4021500 }, { "epoch": 19.93, "learning_rate": 4.004039278052744e-05, "loss": 2.336, "step": 4022000 }, { "epoch": 19.93, "learning_rate": 4.0039154194101356e-05, "loss": 2.3432, "step": 4022500 }, { "epoch": 19.93, "learning_rate": 4.003791560767527e-05, "loss": 2.3376, "step": 4023000 }, { "epoch": 19.93, "learning_rate": 4.003667702124919e-05, "loss": 2.3447, "step": 4023500 }, { "epoch": 19.94, "learning_rate": 4.003543843482311e-05, "loss": 2.3564, "step": 4024000 }, { "epoch": 19.94, "learning_rate": 4.0034202325569875e-05, "loss": 2.3494, "step": 4024500 }, { "epoch": 19.94, "learning_rate": 4.003296373914379e-05, "loss": 2.3439, "step": 4025000 }, { "epoch": 19.94, "learning_rate": 4.003172515271771e-05, "loss": 2.3499, "step": 4025500 }, { "epoch": 19.95, "learning_rate": 4.003048904346448e-05, "loss": 2.311, "step": 4026000 }, { "epoch": 19.95, "learning_rate": 4.0029250457038395e-05, "loss": 2.3349, "step": 4026500 }, { "epoch": 19.95, "learning_rate": 4.0028011870612305e-05, "loss": 2.3582, "step": 4027000 }, { "epoch": 19.95, "learning_rate": 4.002677328418622e-05, "loss": 2.3338, "step": 4027500 }, { "epoch": 19.96, "learning_rate": 4.002553469776014e-05, "loss": 2.3483, "step": 4028000 }, { "epoch": 19.96, "learning_rate": 4.0024296111334056e-05, "loss": 2.3628, "step": 4028500 }, { "epoch": 19.96, "learning_rate": 4.002305752490797e-05, "loss": 2.3416, "step": 4029000 }, { "epoch": 19.96, "learning_rate": 4.002182141565474e-05, "loss": 2.3288, "step": 4029500 }, { "epoch": 19.97, "learning_rate": 4.002058282922866e-05, "loss": 2.3303, "step": 4030000 }, { "epoch": 19.97, "learning_rate": 4.0019344242802576e-05, "loss": 2.3498, "step": 4030500 }, { "epoch": 19.97, "learning_rate": 4.001810565637649e-05, "loss": 2.3482, "step": 4031000 }, { "epoch": 19.97, "learning_rate": 4.001686706995041e-05, "loss": 2.3286, "step": 4031500 }, { "epoch": 19.98, "learning_rate": 4.0015628483524326e-05, "loss": 2.3441, "step": 4032000 }, { "epoch": 19.98, "learning_rate": 4.001438989709824e-05, "loss": 2.3522, "step": 4032500 }, { "epoch": 19.98, "learning_rate": 4.001315131067216e-05, "loss": 2.3592, "step": 4033000 }, { "epoch": 19.98, "learning_rate": 4.001191272424608e-05, "loss": 2.3551, "step": 4033500 }, { "epoch": 19.99, "learning_rate": 4.0010674137819994e-05, "loss": 2.3237, "step": 4034000 }, { "epoch": 19.99, "learning_rate": 4.000943555139391e-05, "loss": 2.3385, "step": 4034500 }, { "epoch": 19.99, "learning_rate": 4.000819696496783e-05, "loss": 2.3485, "step": 4035000 }, { "epoch": 19.99, "learning_rate": 4.0006958378541745e-05, "loss": 2.3399, "step": 4035500 }, { "epoch": 20.0, "learning_rate": 4.000572226928851e-05, "loss": 2.3312, "step": 4036000 }, { "epoch": 20.0, "learning_rate": 4.0004483682862424e-05, "loss": 2.3215, "step": 4036500 }, { "epoch": 20.0, "eval_accuracy": 0.6514623229212978, "eval_accuracy_mlm": 0.6059239055867153, "eval_accuracy_nsp": 0.8662569275844352, "eval_loss": 2.360621213912964, "eval_runtime": 145.9157, "eval_samples_per_second": 1747.304, "eval_steps_per_second": 72.809, "step": 4036860 }, { "epoch": 20.0, "learning_rate": 4.000324509643634e-05, "loss": 2.3361, "step": 4037000 }, { "epoch": 20.0, "learning_rate": 4.000200651001026e-05, "loss": 2.331, "step": 4037500 }, { "epoch": 20.01, "learning_rate": 4.0000767923584175e-05, "loss": 2.3296, "step": 4038000 }, { "epoch": 20.01, "learning_rate": 3.999952933715809e-05, "loss": 2.324, "step": 4038500 }, { "epoch": 20.01, "learning_rate": 3.999829075073201e-05, "loss": 2.274, "step": 4039000 }, { "epoch": 20.01, "learning_rate": 3.999705216430592e-05, "loss": 2.3324, "step": 4039500 }, { "epoch": 20.02, "learning_rate": 3.9995813577879836e-05, "loss": 2.3256, "step": 4040000 }, { "epoch": 20.02, "learning_rate": 3.999457499145375e-05, "loss": 2.2992, "step": 4040500 }, { "epoch": 20.02, "learning_rate": 3.999333640502767e-05, "loss": 2.2902, "step": 4041000 }, { "epoch": 20.02, "learning_rate": 3.9992097818601586e-05, "loss": 2.2879, "step": 4041500 }, { "epoch": 20.03, "learning_rate": 3.999086170934836e-05, "loss": 2.3138, "step": 4042000 }, { "epoch": 20.03, "learning_rate": 3.998962312292227e-05, "loss": 2.3257, "step": 4042500 }, { "epoch": 20.03, "learning_rate": 3.998838453649619e-05, "loss": 2.3098, "step": 4043000 }, { "epoch": 20.03, "learning_rate": 3.9987145950070106e-05, "loss": 2.2984, "step": 4043500 }, { "epoch": 20.04, "learning_rate": 3.998590736364402e-05, "loss": 2.2918, "step": 4044000 }, { "epoch": 20.04, "learning_rate": 3.998466877721794e-05, "loss": 2.2826, "step": 4044500 }, { "epoch": 20.04, "learning_rate": 3.998343019079186e-05, "loss": 2.3165, "step": 4045000 }, { "epoch": 20.04, "learning_rate": 3.998219408153862e-05, "loss": 2.3079, "step": 4045500 }, { "epoch": 20.05, "learning_rate": 3.9980955495112536e-05, "loss": 2.3265, "step": 4046000 }, { "epoch": 20.05, "learning_rate": 3.997971690868645e-05, "loss": 2.3006, "step": 4046500 }, { "epoch": 20.05, "learning_rate": 3.997848079943323e-05, "loss": 2.3074, "step": 4047000 }, { "epoch": 20.05, "learning_rate": 3.9977242213007145e-05, "loss": 2.3117, "step": 4047500 }, { "epoch": 20.06, "learning_rate": 3.997600362658106e-05, "loss": 2.3119, "step": 4048000 }, { "epoch": 20.06, "learning_rate": 3.997476504015497e-05, "loss": 2.3299, "step": 4048500 }, { "epoch": 20.06, "learning_rate": 3.997352645372889e-05, "loss": 2.3168, "step": 4049000 }, { "epoch": 20.06, "learning_rate": 3.9972287867302806e-05, "loss": 2.3114, "step": 4049500 }, { "epoch": 20.07, "learning_rate": 3.997104928087672e-05, "loss": 2.3004, "step": 4050000 }, { "epoch": 20.07, "learning_rate": 3.996981069445064e-05, "loss": 2.3204, "step": 4050500 }, { "epoch": 20.07, "learning_rate": 3.996857458519741e-05, "loss": 2.3282, "step": 4051000 }, { "epoch": 20.07, "learning_rate": 3.9967335998771326e-05, "loss": 2.3293, "step": 4051500 }, { "epoch": 20.08, "learning_rate": 3.9966097412345236e-05, "loss": 2.3379, "step": 4052000 }, { "epoch": 20.08, "learning_rate": 3.996485882591915e-05, "loss": 2.3334, "step": 4052500 }, { "epoch": 20.08, "learning_rate": 3.996362271666593e-05, "loss": 2.3042, "step": 4053000 }, { "epoch": 20.08, "learning_rate": 3.9962384130239845e-05, "loss": 2.3357, "step": 4053500 }, { "epoch": 20.08, "learning_rate": 3.9961148020986614e-05, "loss": 2.3007, "step": 4054000 }, { "epoch": 20.09, "learning_rate": 3.9959909434560524e-05, "loss": 2.3134, "step": 4054500 }, { "epoch": 20.09, "learning_rate": 3.995867084813444e-05, "loss": 2.3141, "step": 4055000 }, { "epoch": 20.09, "learning_rate": 3.995743226170836e-05, "loss": 2.3416, "step": 4055500 }, { "epoch": 20.09, "learning_rate": 3.9956193675282275e-05, "loss": 2.3416, "step": 4056000 }, { "epoch": 20.1, "learning_rate": 3.995495508885619e-05, "loss": 2.3061, "step": 4056500 }, { "epoch": 20.1, "learning_rate": 3.995371650243011e-05, "loss": 2.3172, "step": 4057000 }, { "epoch": 20.1, "learning_rate": 3.9952477916004026e-05, "loss": 2.3176, "step": 4057500 }, { "epoch": 20.1, "learning_rate": 3.9951241806750795e-05, "loss": 2.3158, "step": 4058000 }, { "epoch": 20.11, "learning_rate": 3.995000322032471e-05, "loss": 2.3287, "step": 4058500 }, { "epoch": 20.11, "learning_rate": 3.994876711107148e-05, "loss": 2.3434, "step": 4059000 }, { "epoch": 20.11, "learning_rate": 3.99475285246454e-05, "loss": 2.3069, "step": 4059500 }, { "epoch": 20.11, "learning_rate": 3.9946289938219314e-05, "loss": 2.3125, "step": 4060000 }, { "epoch": 20.12, "learning_rate": 3.994505135179323e-05, "loss": 2.3109, "step": 4060500 }, { "epoch": 20.12, "learning_rate": 3.994381276536714e-05, "loss": 2.3453, "step": 4061000 }, { "epoch": 20.12, "learning_rate": 3.994257417894106e-05, "loss": 2.339, "step": 4061500 }, { "epoch": 20.12, "learning_rate": 3.9941335592514975e-05, "loss": 2.3199, "step": 4062000 }, { "epoch": 20.13, "learning_rate": 3.994009700608889e-05, "loss": 2.3294, "step": 4062500 }, { "epoch": 20.13, "learning_rate": 3.993885841966281e-05, "loss": 2.3076, "step": 4063000 }, { "epoch": 20.13, "learning_rate": 3.9937619833236726e-05, "loss": 2.3158, "step": 4063500 }, { "epoch": 20.13, "learning_rate": 3.993638124681064e-05, "loss": 2.3061, "step": 4064000 }, { "epoch": 20.14, "learning_rate": 3.993514266038455e-05, "loss": 2.343, "step": 4064500 }, { "epoch": 20.14, "learning_rate": 3.993390655113133e-05, "loss": 2.3145, "step": 4065000 }, { "epoch": 20.14, "learning_rate": 3.9932667964705246e-05, "loss": 2.2968, "step": 4065500 }, { "epoch": 20.14, "learning_rate": 3.993142937827916e-05, "loss": 2.3266, "step": 4066000 }, { "epoch": 20.15, "learning_rate": 3.993019079185308e-05, "loss": 2.3402, "step": 4066500 }, { "epoch": 20.15, "learning_rate": 3.9928952205426996e-05, "loss": 2.3157, "step": 4067000 }, { "epoch": 20.15, "learning_rate": 3.992771857334661e-05, "loss": 2.3301, "step": 4067500 }, { "epoch": 20.15, "learning_rate": 3.992647998692053e-05, "loss": 2.3513, "step": 4068000 }, { "epoch": 20.16, "learning_rate": 3.9925241400494444e-05, "loss": 2.3077, "step": 4068500 }, { "epoch": 20.16, "learning_rate": 3.992400281406836e-05, "loss": 2.3166, "step": 4069000 }, { "epoch": 20.16, "learning_rate": 3.992276422764228e-05, "loss": 2.3251, "step": 4069500 }, { "epoch": 20.16, "learning_rate": 3.992152811838905e-05, "loss": 2.3054, "step": 4070000 }, { "epoch": 20.17, "learning_rate": 3.9920289531962964e-05, "loss": 2.322, "step": 4070500 }, { "epoch": 20.17, "learning_rate": 3.9919053422709726e-05, "loss": 2.3153, "step": 4071000 }, { "epoch": 20.17, "learning_rate": 3.991781483628364e-05, "loss": 2.2919, "step": 4071500 }, { "epoch": 20.17, "learning_rate": 3.991657624985756e-05, "loss": 2.3338, "step": 4072000 }, { "epoch": 20.18, "learning_rate": 3.9915337663431476e-05, "loss": 2.3325, "step": 4072500 }, { "epoch": 20.18, "learning_rate": 3.9914099077005393e-05, "loss": 2.3226, "step": 4073000 }, { "epoch": 20.18, "learning_rate": 3.991286049057931e-05, "loss": 2.3341, "step": 4073500 }, { "epoch": 20.18, "learning_rate": 3.991162190415323e-05, "loss": 2.2985, "step": 4074000 }, { "epoch": 20.19, "learning_rate": 3.9910383317727144e-05, "loss": 2.3138, "step": 4074500 }, { "epoch": 20.19, "learning_rate": 3.990914473130106e-05, "loss": 2.3167, "step": 4075000 }, { "epoch": 20.19, "learning_rate": 3.990790614487498e-05, "loss": 2.3082, "step": 4075500 }, { "epoch": 20.19, "learning_rate": 3.9906667558448895e-05, "loss": 2.3136, "step": 4076000 }, { "epoch": 20.2, "learning_rate": 3.990542897202281e-05, "loss": 2.3414, "step": 4076500 }, { "epoch": 20.2, "learning_rate": 3.990419038559673e-05, "loss": 2.298, "step": 4077000 }, { "epoch": 20.2, "learning_rate": 3.99029542763435e-05, "loss": 2.317, "step": 4077500 }, { "epoch": 20.2, "learning_rate": 3.990171816709026e-05, "loss": 2.3172, "step": 4078000 }, { "epoch": 20.21, "learning_rate": 3.9900479580664177e-05, "loss": 2.3035, "step": 4078500 }, { "epoch": 20.21, "learning_rate": 3.9899240994238094e-05, "loss": 2.3391, "step": 4079000 }, { "epoch": 20.21, "learning_rate": 3.989800240781201e-05, "loss": 2.3139, "step": 4079500 }, { "epoch": 20.21, "learning_rate": 3.989676382138593e-05, "loss": 2.297, "step": 4080000 }, { "epoch": 20.22, "learning_rate": 3.9895527712132696e-05, "loss": 2.326, "step": 4080500 }, { "epoch": 20.22, "learning_rate": 3.989428912570661e-05, "loss": 2.3263, "step": 4081000 }, { "epoch": 20.22, "learning_rate": 3.989305053928053e-05, "loss": 2.3248, "step": 4081500 }, { "epoch": 20.22, "learning_rate": 3.98918144300273e-05, "loss": 2.3094, "step": 4082000 }, { "epoch": 20.23, "learning_rate": 3.9890575843601216e-05, "loss": 2.3216, "step": 4082500 }, { "epoch": 20.23, "learning_rate": 3.988933725717513e-05, "loss": 2.3138, "step": 4083000 }, { "epoch": 20.23, "learning_rate": 3.988809867074905e-05, "loss": 2.3215, "step": 4083500 }, { "epoch": 20.23, "learning_rate": 3.9886860084322967e-05, "loss": 2.3122, "step": 4084000 }, { "epoch": 20.24, "learning_rate": 3.988562149789688e-05, "loss": 2.3316, "step": 4084500 }, { "epoch": 20.24, "learning_rate": 3.988438538864365e-05, "loss": 2.324, "step": 4085000 }, { "epoch": 20.24, "learning_rate": 3.988314680221757e-05, "loss": 2.3268, "step": 4085500 }, { "epoch": 20.24, "learning_rate": 3.9881908215791486e-05, "loss": 2.2985, "step": 4086000 }, { "epoch": 20.25, "learning_rate": 3.98806696293654e-05, "loss": 2.3345, "step": 4086500 }, { "epoch": 20.25, "learning_rate": 3.987943104293931e-05, "loss": 2.3098, "step": 4087000 }, { "epoch": 20.25, "learning_rate": 3.987819245651323e-05, "loss": 2.3195, "step": 4087500 }, { "epoch": 20.25, "learning_rate": 3.987695387008715e-05, "loss": 2.373, "step": 4088000 }, { "epoch": 20.26, "learning_rate": 3.9875715283661064e-05, "loss": 2.3466, "step": 4088500 }, { "epoch": 20.26, "learning_rate": 3.987447669723498e-05, "loss": 2.3485, "step": 4089000 }, { "epoch": 20.26, "learning_rate": 3.98732381108089e-05, "loss": 2.3326, "step": 4089500 }, { "epoch": 20.26, "learning_rate": 3.9871999524382815e-05, "loss": 2.2901, "step": 4090000 }, { "epoch": 20.27, "learning_rate": 3.987076093795673e-05, "loss": 2.3226, "step": 4090500 }, { "epoch": 20.27, "learning_rate": 3.98695248287035e-05, "loss": 2.3075, "step": 4091000 }, { "epoch": 20.27, "learning_rate": 3.986828624227741e-05, "loss": 2.2915, "step": 4091500 }, { "epoch": 20.27, "learning_rate": 3.986704765585133e-05, "loss": 2.3465, "step": 4092000 }, { "epoch": 20.28, "learning_rate": 3.9865809069425245e-05, "loss": 2.3153, "step": 4092500 }, { "epoch": 20.28, "learning_rate": 3.986457048299916e-05, "loss": 2.3102, "step": 4093000 }, { "epoch": 20.28, "learning_rate": 3.986333189657308e-05, "loss": 2.3314, "step": 4093500 }, { "epoch": 20.28, "learning_rate": 3.9862093310146995e-05, "loss": 2.3403, "step": 4094000 }, { "epoch": 20.29, "learning_rate": 3.9860857200893764e-05, "loss": 2.3266, "step": 4094500 }, { "epoch": 20.29, "learning_rate": 3.985962109164054e-05, "loss": 2.3155, "step": 4095000 }, { "epoch": 20.29, "learning_rate": 3.985838250521445e-05, "loss": 2.3508, "step": 4095500 }, { "epoch": 20.29, "learning_rate": 3.985714391878837e-05, "loss": 2.32, "step": 4096000 }, { "epoch": 20.3, "learning_rate": 3.9855905332362284e-05, "loss": 2.328, "step": 4096500 }, { "epoch": 20.3, "learning_rate": 3.98546667459362e-05, "loss": 2.3088, "step": 4097000 }, { "epoch": 20.3, "learning_rate": 3.985343063668297e-05, "loss": 2.3212, "step": 4097500 }, { "epoch": 20.3, "learning_rate": 3.9852192050256886e-05, "loss": 2.3098, "step": 4098000 }, { "epoch": 20.31, "learning_rate": 3.98509534638308e-05, "loss": 2.32, "step": 4098500 }, { "epoch": 20.31, "learning_rate": 3.984971487740472e-05, "loss": 2.3258, "step": 4099000 }, { "epoch": 20.31, "learning_rate": 3.984847629097863e-05, "loss": 2.3186, "step": 4099500 }, { "epoch": 20.31, "learning_rate": 3.984723770455255e-05, "loss": 2.3401, "step": 4100000 }, { "epoch": 20.32, "learning_rate": 3.984600159529932e-05, "loss": 2.3358, "step": 4100500 }, { "epoch": 20.32, "learning_rate": 3.984476300887324e-05, "loss": 2.323, "step": 4101000 }, { "epoch": 20.32, "learning_rate": 3.984352442244716e-05, "loss": 2.3239, "step": 4101500 }, { "epoch": 20.32, "learning_rate": 3.9842285836021074e-05, "loss": 2.2897, "step": 4102000 }, { "epoch": 20.33, "learning_rate": 3.9841049726767836e-05, "loss": 2.3372, "step": 4102500 }, { "epoch": 20.33, "learning_rate": 3.983981114034175e-05, "loss": 2.3183, "step": 4103000 }, { "epoch": 20.33, "learning_rate": 3.983857255391567e-05, "loss": 2.3244, "step": 4103500 }, { "epoch": 20.33, "learning_rate": 3.9837333967489587e-05, "loss": 2.341, "step": 4104000 }, { "epoch": 20.34, "learning_rate": 3.9836095381063503e-05, "loss": 2.3251, "step": 4104500 }, { "epoch": 20.34, "learning_rate": 3.983485679463742e-05, "loss": 2.3282, "step": 4105000 }, { "epoch": 20.34, "learning_rate": 3.983361820821133e-05, "loss": 2.3602, "step": 4105500 }, { "epoch": 20.34, "learning_rate": 3.983237962178525e-05, "loss": 2.3238, "step": 4106000 }, { "epoch": 20.35, "learning_rate": 3.9831141035359164e-05, "loss": 2.3484, "step": 4106500 }, { "epoch": 20.35, "learning_rate": 3.982990244893308e-05, "loss": 2.3466, "step": 4107000 }, { "epoch": 20.35, "learning_rate": 3.9828663862507e-05, "loss": 2.3151, "step": 4107500 }, { "epoch": 20.35, "learning_rate": 3.9827427753253774e-05, "loss": 2.3078, "step": 4108000 }, { "epoch": 20.35, "learning_rate": 3.9826189166827684e-05, "loss": 2.3361, "step": 4108500 }, { "epoch": 20.36, "learning_rate": 3.98249505804016e-05, "loss": 2.3302, "step": 4109000 }, { "epoch": 20.36, "learning_rate": 3.982371199397552e-05, "loss": 2.3368, "step": 4109500 }, { "epoch": 20.36, "learning_rate": 3.9822473407549435e-05, "loss": 2.3268, "step": 4110000 }, { "epoch": 20.36, "learning_rate": 3.982123482112335e-05, "loss": 2.337, "step": 4110500 }, { "epoch": 20.37, "learning_rate": 3.981999623469727e-05, "loss": 2.3263, "step": 4111000 }, { "epoch": 20.37, "learning_rate": 3.981875764827118e-05, "loss": 2.3309, "step": 4111500 }, { "epoch": 20.37, "learning_rate": 3.9817519061845096e-05, "loss": 2.3406, "step": 4112000 }, { "epoch": 20.37, "learning_rate": 3.981628047541901e-05, "loss": 2.304, "step": 4112500 }, { "epoch": 20.38, "learning_rate": 3.981504188899293e-05, "loss": 2.3435, "step": 4113000 }, { "epoch": 20.38, "learning_rate": 3.9813803302566847e-05, "loss": 2.3374, "step": 4113500 }, { "epoch": 20.38, "learning_rate": 3.9812567193313615e-05, "loss": 2.334, "step": 4114000 }, { "epoch": 20.38, "learning_rate": 3.981132860688753e-05, "loss": 2.3147, "step": 4114500 }, { "epoch": 20.39, "learning_rate": 3.981009002046145e-05, "loss": 2.33, "step": 4115000 }, { "epoch": 20.39, "learning_rate": 3.9808851434035366e-05, "loss": 2.3088, "step": 4115500 }, { "epoch": 20.39, "learning_rate": 3.980761284760928e-05, "loss": 2.3281, "step": 4116000 }, { "epoch": 20.39, "learning_rate": 3.98063742611832e-05, "loss": 2.3257, "step": 4116500 }, { "epoch": 20.4, "learning_rate": 3.980513815192997e-05, "loss": 2.2962, "step": 4117000 }, { "epoch": 20.4, "learning_rate": 3.9803899565503886e-05, "loss": 2.3362, "step": 4117500 }, { "epoch": 20.4, "learning_rate": 3.98026609790778e-05, "loss": 2.3423, "step": 4118000 }, { "epoch": 20.4, "learning_rate": 3.980142239265171e-05, "loss": 2.2976, "step": 4118500 }, { "epoch": 20.41, "learning_rate": 3.980018380622563e-05, "loss": 2.3139, "step": 4119000 }, { "epoch": 20.41, "learning_rate": 3.979894521979955e-05, "loss": 2.3334, "step": 4119500 }, { "epoch": 20.41, "learning_rate": 3.9797709110546315e-05, "loss": 2.3412, "step": 4120000 }, { "epoch": 20.41, "learning_rate": 3.979647052412023e-05, "loss": 2.3465, "step": 4120500 }, { "epoch": 20.42, "learning_rate": 3.979523193769415e-05, "loss": 2.3413, "step": 4121000 }, { "epoch": 20.42, "learning_rate": 3.9793993351268066e-05, "loss": 2.3351, "step": 4121500 }, { "epoch": 20.42, "learning_rate": 3.979275476484198e-05, "loss": 2.3251, "step": 4122000 }, { "epoch": 20.42, "learning_rate": 3.979151865558875e-05, "loss": 2.3504, "step": 4122500 }, { "epoch": 20.43, "learning_rate": 3.979028006916267e-05, "loss": 2.3297, "step": 4123000 }, { "epoch": 20.43, "learning_rate": 3.9789041482736586e-05, "loss": 2.3087, "step": 4123500 }, { "epoch": 20.43, "learning_rate": 3.97878028963105e-05, "loss": 2.3201, "step": 4124000 }, { "epoch": 20.43, "learning_rate": 3.978656430988442e-05, "loss": 2.2992, "step": 4124500 }, { "epoch": 20.44, "learning_rate": 3.978532572345833e-05, "loss": 2.3279, "step": 4125000 }, { "epoch": 20.44, "learning_rate": 3.97840896142051e-05, "loss": 2.3199, "step": 4125500 }, { "epoch": 20.44, "learning_rate": 3.9782851027779016e-05, "loss": 2.3297, "step": 4126000 }, { "epoch": 20.44, "learning_rate": 3.978161244135293e-05, "loss": 2.3423, "step": 4126500 }, { "epoch": 20.45, "learning_rate": 3.978037385492685e-05, "loss": 2.3141, "step": 4127000 }, { "epoch": 20.45, "learning_rate": 3.9779135268500766e-05, "loss": 2.336, "step": 4127500 }, { "epoch": 20.45, "learning_rate": 3.977789668207468e-05, "loss": 2.3278, "step": 4128000 }, { "epoch": 20.45, "learning_rate": 3.97766580956486e-05, "loss": 2.3016, "step": 4128500 }, { "epoch": 20.46, "learning_rate": 3.977541950922252e-05, "loss": 2.3267, "step": 4129000 }, { "epoch": 20.46, "learning_rate": 3.9774183399969286e-05, "loss": 2.3293, "step": 4129500 }, { "epoch": 20.46, "learning_rate": 3.97729448135432e-05, "loss": 2.3389, "step": 4130000 }, { "epoch": 20.46, "learning_rate": 3.977170622711712e-05, "loss": 2.3131, "step": 4130500 }, { "epoch": 20.47, "learning_rate": 3.977046764069104e-05, "loss": 2.3192, "step": 4131000 }, { "epoch": 20.47, "learning_rate": 3.9769229054264954e-05, "loss": 2.3528, "step": 4131500 }, { "epoch": 20.47, "learning_rate": 3.9767992945011716e-05, "loss": 2.3322, "step": 4132000 }, { "epoch": 20.47, "learning_rate": 3.976675435858563e-05, "loss": 2.3396, "step": 4132500 }, { "epoch": 20.48, "learning_rate": 3.976551824933241e-05, "loss": 2.3243, "step": 4133000 }, { "epoch": 20.48, "learning_rate": 3.976427966290632e-05, "loss": 2.3496, "step": 4133500 }, { "epoch": 20.48, "learning_rate": 3.9763041076480235e-05, "loss": 2.3338, "step": 4134000 }, { "epoch": 20.48, "learning_rate": 3.976180249005415e-05, "loss": 2.2961, "step": 4134500 }, { "epoch": 20.49, "learning_rate": 3.976056390362807e-05, "loss": 2.3068, "step": 4135000 }, { "epoch": 20.49, "learning_rate": 3.9759325317201986e-05, "loss": 2.358, "step": 4135500 }, { "epoch": 20.49, "learning_rate": 3.97580867307759e-05, "loss": 2.2942, "step": 4136000 }, { "epoch": 20.49, "learning_rate": 3.975684814434982e-05, "loss": 2.3191, "step": 4136500 }, { "epoch": 20.5, "learning_rate": 3.975561203509658e-05, "loss": 2.3382, "step": 4137000 }, { "epoch": 20.5, "learning_rate": 3.97543734486705e-05, "loss": 2.3275, "step": 4137500 }, { "epoch": 20.5, "learning_rate": 3.9753134862244416e-05, "loss": 2.352, "step": 4138000 }, { "epoch": 20.5, "learning_rate": 3.975189627581833e-05, "loss": 2.3277, "step": 4138500 }, { "epoch": 20.51, "learning_rate": 3.975066016656511e-05, "loss": 2.3214, "step": 4139000 }, { "epoch": 20.51, "learning_rate": 3.974942405731187e-05, "loss": 2.3368, "step": 4139500 }, { "epoch": 20.51, "learning_rate": 3.974818547088579e-05, "loss": 2.3367, "step": 4140000 }, { "epoch": 20.51, "learning_rate": 3.9746946884459704e-05, "loss": 2.3376, "step": 4140500 }, { "epoch": 20.52, "learning_rate": 3.974570829803362e-05, "loss": 2.3505, "step": 4141000 }, { "epoch": 20.52, "learning_rate": 3.974447218878039e-05, "loss": 2.3357, "step": 4141500 }, { "epoch": 20.52, "learning_rate": 3.974323607952716e-05, "loss": 2.326, "step": 4142000 }, { "epoch": 20.52, "learning_rate": 3.9741997493101076e-05, "loss": 2.3177, "step": 4142500 }, { "epoch": 20.53, "learning_rate": 3.974075890667499e-05, "loss": 2.3331, "step": 4143000 }, { "epoch": 20.53, "learning_rate": 3.973952032024891e-05, "loss": 2.3333, "step": 4143500 }, { "epoch": 20.53, "learning_rate": 3.9738281733822826e-05, "loss": 2.3378, "step": 4144000 }, { "epoch": 20.53, "learning_rate": 3.9737043147396743e-05, "loss": 2.3116, "step": 4144500 }, { "epoch": 20.54, "learning_rate": 3.973580456097066e-05, "loss": 2.3071, "step": 4145000 }, { "epoch": 20.54, "learning_rate": 3.973456597454458e-05, "loss": 2.302, "step": 4145500 }, { "epoch": 20.54, "learning_rate": 3.9733327388118494e-05, "loss": 2.3401, "step": 4146000 }, { "epoch": 20.54, "learning_rate": 3.9732088801692404e-05, "loss": 2.3107, "step": 4146500 }, { "epoch": 20.55, "learning_rate": 3.973085021526632e-05, "loss": 2.3115, "step": 4147000 }, { "epoch": 20.55, "learning_rate": 3.972961162884024e-05, "loss": 2.3198, "step": 4147500 }, { "epoch": 20.55, "learning_rate": 3.9728373042414155e-05, "loss": 2.3458, "step": 4148000 }, { "epoch": 20.55, "learning_rate": 3.972713445598807e-05, "loss": 2.3251, "step": 4148500 }, { "epoch": 20.56, "learning_rate": 3.972589586956199e-05, "loss": 2.3449, "step": 4149000 }, { "epoch": 20.56, "learning_rate": 3.97246572831359e-05, "loss": 2.3381, "step": 4149500 }, { "epoch": 20.56, "learning_rate": 3.9723421173882675e-05, "loss": 2.3345, "step": 4150000 }, { "epoch": 20.56, "learning_rate": 3.972218258745659e-05, "loss": 2.2837, "step": 4150500 }, { "epoch": 20.57, "learning_rate": 3.972094400103051e-05, "loss": 2.3166, "step": 4151000 }, { "epoch": 20.57, "learning_rate": 3.9719705414604426e-05, "loss": 2.3352, "step": 4151500 }, { "epoch": 20.57, "learning_rate": 3.971846682817834e-05, "loss": 2.3251, "step": 4152000 }, { "epoch": 20.57, "learning_rate": 3.971723071892511e-05, "loss": 2.3379, "step": 4152500 }, { "epoch": 20.58, "learning_rate": 3.971599213249902e-05, "loss": 2.3292, "step": 4153000 }, { "epoch": 20.58, "learning_rate": 3.971475354607294e-05, "loss": 2.3387, "step": 4153500 }, { "epoch": 20.58, "learning_rate": 3.9713514959646855e-05, "loss": 2.3423, "step": 4154000 }, { "epoch": 20.58, "learning_rate": 3.971227637322077e-05, "loss": 2.3238, "step": 4154500 }, { "epoch": 20.59, "learning_rate": 3.971104026396754e-05, "loss": 2.3356, "step": 4155000 }, { "epoch": 20.59, "learning_rate": 3.970980167754146e-05, "loss": 2.3428, "step": 4155500 }, { "epoch": 20.59, "learning_rate": 3.9708563091115375e-05, "loss": 2.3381, "step": 4156000 }, { "epoch": 20.59, "learning_rate": 3.970732450468929e-05, "loss": 2.335, "step": 4156500 }, { "epoch": 20.6, "learning_rate": 3.970608591826321e-05, "loss": 2.3374, "step": 4157000 }, { "epoch": 20.6, "learning_rate": 3.9704847331837126e-05, "loss": 2.3173, "step": 4157500 }, { "epoch": 20.6, "learning_rate": 3.9703611222583894e-05, "loss": 2.3357, "step": 4158000 }, { "epoch": 20.6, "learning_rate": 3.970237263615781e-05, "loss": 2.3303, "step": 4158500 }, { "epoch": 20.61, "learning_rate": 3.9701136526904573e-05, "loss": 2.3437, "step": 4159000 }, { "epoch": 20.61, "learning_rate": 3.969989794047849e-05, "loss": 2.2999, "step": 4159500 }, { "epoch": 20.61, "learning_rate": 3.969865935405241e-05, "loss": 2.3308, "step": 4160000 }, { "epoch": 20.61, "learning_rate": 3.9697420767626324e-05, "loss": 2.3444, "step": 4160500 }, { "epoch": 20.62, "learning_rate": 3.969618218120024e-05, "loss": 2.3279, "step": 4161000 }, { "epoch": 20.62, "learning_rate": 3.969494359477416e-05, "loss": 2.3423, "step": 4161500 }, { "epoch": 20.62, "learning_rate": 3.9693705008348075e-05, "loss": 2.352, "step": 4162000 }, { "epoch": 20.62, "learning_rate": 3.969246642192199e-05, "loss": 2.3336, "step": 4162500 }, { "epoch": 20.62, "learning_rate": 3.969122783549591e-05, "loss": 2.3079, "step": 4163000 }, { "epoch": 20.63, "learning_rate": 3.9689989249069826e-05, "loss": 2.3315, "step": 4163500 }, { "epoch": 20.63, "learning_rate": 3.9688753139816595e-05, "loss": 2.3208, "step": 4164000 }, { "epoch": 20.63, "learning_rate": 3.968751455339051e-05, "loss": 2.3149, "step": 4164500 }, { "epoch": 20.63, "learning_rate": 3.968627596696443e-05, "loss": 2.3026, "step": 4165000 }, { "epoch": 20.64, "learning_rate": 3.9685037380538345e-05, "loss": 2.3623, "step": 4165500 }, { "epoch": 20.64, "learning_rate": 3.968379879411226e-05, "loss": 2.3316, "step": 4166000 }, { "epoch": 20.64, "learning_rate": 3.968256020768617e-05, "loss": 2.3202, "step": 4166500 }, { "epoch": 20.64, "learning_rate": 3.968132162126009e-05, "loss": 2.3328, "step": 4167000 }, { "epoch": 20.65, "learning_rate": 3.9680083034834006e-05, "loss": 2.3363, "step": 4167500 }, { "epoch": 20.65, "learning_rate": 3.9678846925580775e-05, "loss": 2.339, "step": 4168000 }, { "epoch": 20.65, "learning_rate": 3.967760833915469e-05, "loss": 2.3511, "step": 4168500 }, { "epoch": 20.65, "learning_rate": 3.967636975272861e-05, "loss": 2.3296, "step": 4169000 }, { "epoch": 20.66, "learning_rate": 3.967513364347538e-05, "loss": 2.3214, "step": 4169500 }, { "epoch": 20.66, "learning_rate": 3.9673895057049295e-05, "loss": 2.3289, "step": 4170000 }, { "epoch": 20.66, "learning_rate": 3.967265647062321e-05, "loss": 2.333, "step": 4170500 }, { "epoch": 20.66, "learning_rate": 3.967141788419713e-05, "loss": 2.3358, "step": 4171000 }, { "epoch": 20.67, "learning_rate": 3.9670179297771045e-05, "loss": 2.3285, "step": 4171500 }, { "epoch": 20.67, "learning_rate": 3.966894318851781e-05, "loss": 2.3058, "step": 4172000 }, { "epoch": 20.67, "learning_rate": 3.9667704602091724e-05, "loss": 2.3333, "step": 4172500 }, { "epoch": 20.67, "learning_rate": 3.966646601566564e-05, "loss": 2.3302, "step": 4173000 }, { "epoch": 20.68, "learning_rate": 3.966522742923956e-05, "loss": 2.3255, "step": 4173500 }, { "epoch": 20.68, "learning_rate": 3.9663988842813475e-05, "loss": 2.3237, "step": 4174000 }, { "epoch": 20.68, "learning_rate": 3.966275025638739e-05, "loss": 2.3398, "step": 4174500 }, { "epoch": 20.68, "learning_rate": 3.966151166996131e-05, "loss": 2.3193, "step": 4175000 }, { "epoch": 20.69, "learning_rate": 3.9660273083535226e-05, "loss": 2.3686, "step": 4175500 }, { "epoch": 20.69, "learning_rate": 3.9659036974281995e-05, "loss": 2.3719, "step": 4176000 }, { "epoch": 20.69, "learning_rate": 3.965779838785591e-05, "loss": 2.3404, "step": 4176500 }, { "epoch": 20.69, "learning_rate": 3.965655980142983e-05, "loss": 2.333, "step": 4177000 }, { "epoch": 20.7, "learning_rate": 3.965532616934945e-05, "loss": 2.339, "step": 4177500 }, { "epoch": 20.7, "learning_rate": 3.965408758292336e-05, "loss": 2.3373, "step": 4178000 }, { "epoch": 20.7, "learning_rate": 3.9652851473670135e-05, "loss": 2.3554, "step": 4178500 }, { "epoch": 20.7, "learning_rate": 3.965161288724405e-05, "loss": 2.3353, "step": 4179000 }, { "epoch": 20.71, "learning_rate": 3.965037430081797e-05, "loss": 2.3286, "step": 4179500 }, { "epoch": 20.71, "learning_rate": 3.9649135714391886e-05, "loss": 2.3024, "step": 4180000 }, { "epoch": 20.71, "learning_rate": 3.964789960513865e-05, "loss": 2.3233, "step": 4180500 }, { "epoch": 20.71, "learning_rate": 3.9646661018712565e-05, "loss": 2.328, "step": 4181000 }, { "epoch": 20.72, "learning_rate": 3.964542243228648e-05, "loss": 2.301, "step": 4181500 }, { "epoch": 20.72, "learning_rate": 3.96441838458604e-05, "loss": 2.3744, "step": 4182000 }, { "epoch": 20.72, "learning_rate": 3.9642945259434316e-05, "loss": 2.3801, "step": 4182500 }, { "epoch": 20.72, "learning_rate": 3.964170667300823e-05, "loss": 2.3168, "step": 4183000 }, { "epoch": 20.73, "learning_rate": 3.964046808658215e-05, "loss": 2.3182, "step": 4183500 }, { "epoch": 20.73, "learning_rate": 3.9639229500156066e-05, "loss": 2.3429, "step": 4184000 }, { "epoch": 20.73, "learning_rate": 3.9637990913729977e-05, "loss": 2.3526, "step": 4184500 }, { "epoch": 20.73, "learning_rate": 3.9636752327303893e-05, "loss": 2.3447, "step": 4185000 }, { "epoch": 20.74, "learning_rate": 3.963551374087781e-05, "loss": 2.3361, "step": 4185500 }, { "epoch": 20.74, "learning_rate": 3.963427515445173e-05, "loss": 2.3486, "step": 4186000 }, { "epoch": 20.74, "learning_rate": 3.9633036568025644e-05, "loss": 2.3517, "step": 4186500 }, { "epoch": 20.74, "learning_rate": 3.963179798159956e-05, "loss": 2.3206, "step": 4187000 }, { "epoch": 20.75, "learning_rate": 3.963055939517348e-05, "loss": 2.3178, "step": 4187500 }, { "epoch": 20.75, "learning_rate": 3.9629320808747395e-05, "loss": 2.3548, "step": 4188000 }, { "epoch": 20.75, "learning_rate": 3.962808222232131e-05, "loss": 2.3043, "step": 4188500 }, { "epoch": 20.75, "learning_rate": 3.962684859024093e-05, "loss": 2.3362, "step": 4189000 }, { "epoch": 20.76, "learning_rate": 3.962561000381485e-05, "loss": 2.3417, "step": 4189500 }, { "epoch": 20.76, "learning_rate": 3.9624371417388767e-05, "loss": 2.3329, "step": 4190000 }, { "epoch": 20.76, "learning_rate": 3.962313283096268e-05, "loss": 2.3078, "step": 4190500 }, { "epoch": 20.76, "learning_rate": 3.9621894244536594e-05, "loss": 2.3241, "step": 4191000 }, { "epoch": 20.77, "learning_rate": 3.962065565811051e-05, "loss": 2.3591, "step": 4191500 }, { "epoch": 20.77, "learning_rate": 3.961941707168443e-05, "loss": 2.3285, "step": 4192000 }, { "epoch": 20.77, "learning_rate": 3.9618178485258344e-05, "loss": 2.3298, "step": 4192500 }, { "epoch": 20.77, "learning_rate": 3.961694237600512e-05, "loss": 2.3402, "step": 4193000 }, { "epoch": 20.78, "learning_rate": 3.961570378957903e-05, "loss": 2.3157, "step": 4193500 }, { "epoch": 20.78, "learning_rate": 3.961446520315295e-05, "loss": 2.3364, "step": 4194000 }, { "epoch": 20.78, "learning_rate": 3.9613226616726864e-05, "loss": 2.339, "step": 4194500 }, { "epoch": 20.78, "learning_rate": 3.961198803030078e-05, "loss": 2.3329, "step": 4195000 }, { "epoch": 20.79, "learning_rate": 3.961075192104755e-05, "loss": 2.3085, "step": 4195500 }, { "epoch": 20.79, "learning_rate": 3.960951333462147e-05, "loss": 2.3486, "step": 4196000 }, { "epoch": 20.79, "learning_rate": 3.9608277225368235e-05, "loss": 2.3269, "step": 4196500 }, { "epoch": 20.79, "learning_rate": 3.960703863894215e-05, "loss": 2.3502, "step": 4197000 }, { "epoch": 20.8, "learning_rate": 3.960580005251607e-05, "loss": 2.32, "step": 4197500 }, { "epoch": 20.8, "learning_rate": 3.960456642043568e-05, "loss": 2.3226, "step": 4198000 }, { "epoch": 20.8, "learning_rate": 3.96033278340096e-05, "loss": 2.3389, "step": 4198500 }, { "epoch": 20.8, "learning_rate": 3.960208924758352e-05, "loss": 2.321, "step": 4199000 }, { "epoch": 20.81, "learning_rate": 3.9600850661157434e-05, "loss": 2.3624, "step": 4199500 }, { "epoch": 20.81, "learning_rate": 3.959961207473135e-05, "loss": 2.3265, "step": 4200000 }, { "epoch": 20.81, "learning_rate": 3.959837596547812e-05, "loss": 2.3382, "step": 4200500 }, { "epoch": 20.81, "learning_rate": 3.959713737905204e-05, "loss": 2.3298, "step": 4201000 }, { "epoch": 20.82, "learning_rate": 3.9595898792625954e-05, "loss": 2.3306, "step": 4201500 }, { "epoch": 20.82, "learning_rate": 3.959466020619987e-05, "loss": 2.3273, "step": 4202000 }, { "epoch": 20.82, "learning_rate": 3.959342409694664e-05, "loss": 2.3362, "step": 4202500 }, { "epoch": 20.82, "learning_rate": 3.9592185510520556e-05, "loss": 2.3331, "step": 4203000 }, { "epoch": 20.83, "learning_rate": 3.9590946924094466e-05, "loss": 2.3285, "step": 4203500 }, { "epoch": 20.83, "learning_rate": 3.958970833766838e-05, "loss": 2.3242, "step": 4204000 }, { "epoch": 20.83, "learning_rate": 3.95884697512423e-05, "loss": 2.3425, "step": 4204500 }, { "epoch": 20.83, "learning_rate": 3.958723116481622e-05, "loss": 2.3424, "step": 4205000 }, { "epoch": 20.84, "learning_rate": 3.9585992578390134e-05, "loss": 2.3153, "step": 4205500 }, { "epoch": 20.84, "learning_rate": 3.958475399196405e-05, "loss": 2.3106, "step": 4206000 }, { "epoch": 20.84, "learning_rate": 3.958351540553797e-05, "loss": 2.3452, "step": 4206500 }, { "epoch": 20.84, "learning_rate": 3.9582276819111885e-05, "loss": 2.3583, "step": 4207000 }, { "epoch": 20.85, "learning_rate": 3.95810382326858e-05, "loss": 2.339, "step": 4207500 }, { "epoch": 20.85, "learning_rate": 3.957979964625972e-05, "loss": 2.3194, "step": 4208000 }, { "epoch": 20.85, "learning_rate": 3.957856353700649e-05, "loss": 2.3322, "step": 4208500 }, { "epoch": 20.85, "learning_rate": 3.9577324950580405e-05, "loss": 2.3334, "step": 4209000 }, { "epoch": 20.86, "learning_rate": 3.957608636415432e-05, "loss": 2.3398, "step": 4209500 }, { "epoch": 20.86, "learning_rate": 3.957484777772824e-05, "loss": 2.3255, "step": 4210000 }, { "epoch": 20.86, "learning_rate": 3.9573611668475e-05, "loss": 2.3186, "step": 4210500 }, { "epoch": 20.86, "learning_rate": 3.957237308204892e-05, "loss": 2.3099, "step": 4211000 }, { "epoch": 20.87, "learning_rate": 3.9571134495622834e-05, "loss": 2.3274, "step": 4211500 }, { "epoch": 20.87, "learning_rate": 3.956989590919675e-05, "loss": 2.3277, "step": 4212000 }, { "epoch": 20.87, "learning_rate": 3.956865732277067e-05, "loss": 2.3225, "step": 4212500 }, { "epoch": 20.87, "learning_rate": 3.9567418736344585e-05, "loss": 2.3221, "step": 4213000 }, { "epoch": 20.88, "learning_rate": 3.95661801499185e-05, "loss": 2.337, "step": 4213500 }, { "epoch": 20.88, "learning_rate": 3.956494156349242e-05, "loss": 2.3301, "step": 4214000 }, { "epoch": 20.88, "learning_rate": 3.9563702977066336e-05, "loss": 2.3094, "step": 4214500 }, { "epoch": 20.88, "learning_rate": 3.956246439064025e-05, "loss": 2.353, "step": 4215000 }, { "epoch": 20.89, "learning_rate": 3.956122580421417e-05, "loss": 2.3262, "step": 4215500 }, { "epoch": 20.89, "learning_rate": 3.955998969496094e-05, "loss": 2.3429, "step": 4216000 }, { "epoch": 20.89, "learning_rate": 3.95587535857077e-05, "loss": 2.3003, "step": 4216500 }, { "epoch": 20.89, "learning_rate": 3.955751499928162e-05, "loss": 2.3653, "step": 4217000 }, { "epoch": 20.89, "learning_rate": 3.9556276412855534e-05, "loss": 2.3238, "step": 4217500 }, { "epoch": 20.9, "learning_rate": 3.955503782642945e-05, "loss": 2.3313, "step": 4218000 }, { "epoch": 20.9, "learning_rate": 3.955379924000337e-05, "loss": 2.3488, "step": 4218500 }, { "epoch": 20.9, "learning_rate": 3.9552560653577285e-05, "loss": 2.3553, "step": 4219000 }, { "epoch": 20.9, "learning_rate": 3.95513220671512e-05, "loss": 2.3272, "step": 4219500 }, { "epoch": 20.91, "learning_rate": 3.955008348072512e-05, "loss": 2.3104, "step": 4220000 }, { "epoch": 20.91, "learning_rate": 3.9548844894299036e-05, "loss": 2.3523, "step": 4220500 }, { "epoch": 20.91, "learning_rate": 3.954760630787295e-05, "loss": 2.3315, "step": 4221000 }, { "epoch": 20.91, "learning_rate": 3.954636772144687e-05, "loss": 2.3421, "step": 4221500 }, { "epoch": 20.92, "learning_rate": 3.954512913502079e-05, "loss": 2.3485, "step": 4222000 }, { "epoch": 20.92, "learning_rate": 3.9543893025767556e-05, "loss": 2.3143, "step": 4222500 }, { "epoch": 20.92, "learning_rate": 3.954265443934147e-05, "loss": 2.3271, "step": 4223000 }, { "epoch": 20.92, "learning_rate": 3.954141585291539e-05, "loss": 2.3282, "step": 4223500 }, { "epoch": 20.93, "learning_rate": 3.9540177266489306e-05, "loss": 2.3492, "step": 4224000 }, { "epoch": 20.93, "learning_rate": 3.953894115723607e-05, "loss": 2.3315, "step": 4224500 }, { "epoch": 20.93, "learning_rate": 3.9537702570809985e-05, "loss": 2.3257, "step": 4225000 }, { "epoch": 20.93, "learning_rate": 3.95364639843839e-05, "loss": 2.351, "step": 4225500 }, { "epoch": 20.94, "learning_rate": 3.953522539795782e-05, "loss": 2.3266, "step": 4226000 }, { "epoch": 20.94, "learning_rate": 3.9533986811531736e-05, "loss": 2.3206, "step": 4226500 }, { "epoch": 20.94, "learning_rate": 3.953274822510565e-05, "loss": 2.343, "step": 4227000 }, { "epoch": 20.94, "learning_rate": 3.953150963867957e-05, "loss": 2.3361, "step": 4227500 }, { "epoch": 20.95, "learning_rate": 3.953027105225349e-05, "loss": 2.2889, "step": 4228000 }, { "epoch": 20.95, "learning_rate": 3.95290374201731e-05, "loss": 2.3352, "step": 4228500 }, { "epoch": 20.95, "learning_rate": 3.952779883374702e-05, "loss": 2.3433, "step": 4229000 }, { "epoch": 20.95, "learning_rate": 3.9526560247320935e-05, "loss": 2.326, "step": 4229500 }, { "epoch": 20.96, "learning_rate": 3.952532413806771e-05, "loss": 2.3601, "step": 4230000 }, { "epoch": 20.96, "learning_rate": 3.952408555164163e-05, "loss": 2.314, "step": 4230500 }, { "epoch": 20.96, "learning_rate": 3.9522846965215544e-05, "loss": 2.3232, "step": 4231000 }, { "epoch": 20.96, "learning_rate": 3.952160837878946e-05, "loss": 2.3366, "step": 4231500 }, { "epoch": 20.97, "learning_rate": 3.952036979236337e-05, "loss": 2.3331, "step": 4232000 }, { "epoch": 20.97, "learning_rate": 3.951913120593729e-05, "loss": 2.3387, "step": 4232500 }, { "epoch": 20.97, "learning_rate": 3.951789509668406e-05, "loss": 2.3342, "step": 4233000 }, { "epoch": 20.97, "learning_rate": 3.9516656510257974e-05, "loss": 2.3227, "step": 4233500 }, { "epoch": 20.98, "learning_rate": 3.951541792383189e-05, "loss": 2.3227, "step": 4234000 }, { "epoch": 20.98, "learning_rate": 3.951417933740581e-05, "loss": 2.3304, "step": 4234500 }, { "epoch": 20.98, "learning_rate": 3.951294075097972e-05, "loss": 2.3277, "step": 4235000 }, { "epoch": 20.98, "learning_rate": 3.9511702164553635e-05, "loss": 2.3255, "step": 4235500 }, { "epoch": 20.99, "learning_rate": 3.951046357812755e-05, "loss": 2.3444, "step": 4236000 }, { "epoch": 20.99, "learning_rate": 3.950922499170147e-05, "loss": 2.3233, "step": 4236500 }, { "epoch": 20.99, "learning_rate": 3.9507986405275386e-05, "loss": 2.3227, "step": 4237000 }, { "epoch": 20.99, "learning_rate": 3.95067478188493e-05, "loss": 2.3412, "step": 4237500 }, { "epoch": 21.0, "learning_rate": 3.950550923242322e-05, "loss": 2.336, "step": 4238000 }, { "epoch": 21.0, "learning_rate": 3.9504270645997136e-05, "loss": 2.3633, "step": 4238500 }, { "epoch": 21.0, "eval_accuracy": 0.6523621169343081, "eval_accuracy_mlm": 0.6068608911574469, "eval_accuracy_nsp": 0.8669707678489482, "eval_loss": 2.365056276321411, "eval_runtime": 145.9769, "eval_samples_per_second": 1746.57, "eval_steps_per_second": 72.779, "step": 4238703 }, { "epoch": 21.0, "learning_rate": 3.9503034536743905e-05, "loss": 2.2921, "step": 4239000 }, { "epoch": 21.0, "learning_rate": 3.950179595031782e-05, "loss": 2.3045, "step": 4239500 }, { "epoch": 21.01, "learning_rate": 3.950055736389174e-05, "loss": 2.2858, "step": 4240000 }, { "epoch": 21.01, "learning_rate": 3.9499318777465656e-05, "loss": 2.2861, "step": 4240500 }, { "epoch": 21.01, "learning_rate": 3.949808019103957e-05, "loss": 2.2879, "step": 4241000 }, { "epoch": 21.01, "learning_rate": 3.949684160461349e-05, "loss": 2.2669, "step": 4241500 }, { "epoch": 21.02, "learning_rate": 3.949560301818741e-05, "loss": 2.3146, "step": 4242000 }, { "epoch": 21.02, "learning_rate": 3.949436690893417e-05, "loss": 2.2891, "step": 4242500 }, { "epoch": 21.02, "learning_rate": 3.9493128322508086e-05, "loss": 2.3136, "step": 4243000 }, { "epoch": 21.02, "learning_rate": 3.9491889736082e-05, "loss": 2.2931, "step": 4243500 }, { "epoch": 21.03, "learning_rate": 3.949065114965592e-05, "loss": 2.3204, "step": 4244000 }, { "epoch": 21.03, "learning_rate": 3.9489412563229836e-05, "loss": 2.2862, "step": 4244500 }, { "epoch": 21.03, "learning_rate": 3.9488176453976605e-05, "loss": 2.2945, "step": 4245000 }, { "epoch": 21.03, "learning_rate": 3.948693786755052e-05, "loss": 2.3142, "step": 4245500 }, { "epoch": 21.04, "learning_rate": 3.948569928112444e-05, "loss": 2.305, "step": 4246000 }, { "epoch": 21.04, "learning_rate": 3.9484460694698356e-05, "loss": 2.2793, "step": 4246500 }, { "epoch": 21.04, "learning_rate": 3.948322210827227e-05, "loss": 2.3251, "step": 4247000 }, { "epoch": 21.04, "learning_rate": 3.948198352184619e-05, "loss": 2.3238, "step": 4247500 }, { "epoch": 21.05, "learning_rate": 3.948074493542011e-05, "loss": 2.2985, "step": 4248000 }, { "epoch": 21.05, "learning_rate": 3.9479506348994024e-05, "loss": 2.3153, "step": 4248500 }, { "epoch": 21.05, "learning_rate": 3.947826776256794e-05, "loss": 2.3251, "step": 4249000 }, { "epoch": 21.05, "learning_rate": 3.947702917614186e-05, "loss": 2.2942, "step": 4249500 }, { "epoch": 21.06, "learning_rate": 3.9475790589715775e-05, "loss": 2.2967, "step": 4250000 }, { "epoch": 21.06, "learning_rate": 3.947455200328969e-05, "loss": 2.3266, "step": 4250500 }, { "epoch": 21.06, "learning_rate": 3.947331341686361e-05, "loss": 2.3041, "step": 4251000 }, { "epoch": 21.06, "learning_rate": 3.947207730761037e-05, "loss": 2.3075, "step": 4251500 }, { "epoch": 21.07, "learning_rate": 3.947083872118429e-05, "loss": 2.3057, "step": 4252000 }, { "epoch": 21.07, "learning_rate": 3.9469600134758204e-05, "loss": 2.3371, "step": 4252500 }, { "epoch": 21.07, "learning_rate": 3.946836154833212e-05, "loss": 2.2894, "step": 4253000 }, { "epoch": 21.07, "learning_rate": 3.946712296190604e-05, "loss": 2.3192, "step": 4253500 }, { "epoch": 21.08, "learning_rate": 3.946588932982566e-05, "loss": 2.2943, "step": 4254000 }, { "epoch": 21.08, "learning_rate": 3.946465322057243e-05, "loss": 2.3263, "step": 4254500 }, { "epoch": 21.08, "learning_rate": 3.9463414634146345e-05, "loss": 2.292, "step": 4255000 }, { "epoch": 21.08, "learning_rate": 3.946217604772026e-05, "loss": 2.324, "step": 4255500 }, { "epoch": 21.09, "learning_rate": 3.946093746129418e-05, "loss": 2.3155, "step": 4256000 }, { "epoch": 21.09, "learning_rate": 3.9459698874868095e-05, "loss": 2.3074, "step": 4256500 }, { "epoch": 21.09, "learning_rate": 3.9458460288442006e-05, "loss": 2.2982, "step": 4257000 }, { "epoch": 21.09, "learning_rate": 3.945722170201592e-05, "loss": 2.3279, "step": 4257500 }, { "epoch": 21.1, "learning_rate": 3.945598311558984e-05, "loss": 2.3079, "step": 4258000 }, { "epoch": 21.1, "learning_rate": 3.9454744529163756e-05, "loss": 2.2971, "step": 4258500 }, { "epoch": 21.1, "learning_rate": 3.945350594273767e-05, "loss": 2.2837, "step": 4259000 }, { "epoch": 21.1, "learning_rate": 3.945226735631159e-05, "loss": 2.3128, "step": 4259500 }, { "epoch": 21.11, "learning_rate": 3.945103124705836e-05, "loss": 2.2928, "step": 4260000 }, { "epoch": 21.11, "learning_rate": 3.9449792660632276e-05, "loss": 2.3077, "step": 4260500 }, { "epoch": 21.11, "learning_rate": 3.944855407420619e-05, "loss": 2.2944, "step": 4261000 }, { "epoch": 21.11, "learning_rate": 3.944731548778011e-05, "loss": 2.3236, "step": 4261500 }, { "epoch": 21.12, "learning_rate": 3.944607690135402e-05, "loss": 2.3145, "step": 4262000 }, { "epoch": 21.12, "learning_rate": 3.944483831492794e-05, "loss": 2.2849, "step": 4262500 }, { "epoch": 21.12, "learning_rate": 3.9443602205674706e-05, "loss": 2.3208, "step": 4263000 }, { "epoch": 21.12, "learning_rate": 3.944236361924862e-05, "loss": 2.3103, "step": 4263500 }, { "epoch": 21.13, "learning_rate": 3.944112503282254e-05, "loss": 2.3213, "step": 4264000 }, { "epoch": 21.13, "learning_rate": 3.9439886446396456e-05, "loss": 2.3028, "step": 4264500 }, { "epoch": 21.13, "learning_rate": 3.943864785997037e-05, "loss": 2.3208, "step": 4265000 }, { "epoch": 21.13, "learning_rate": 3.943740927354429e-05, "loss": 2.2868, "step": 4265500 }, { "epoch": 21.14, "learning_rate": 3.943617316429106e-05, "loss": 2.3265, "step": 4266000 }, { "epoch": 21.14, "learning_rate": 3.9434934577864976e-05, "loss": 2.3104, "step": 4266500 }, { "epoch": 21.14, "learning_rate": 3.943369599143889e-05, "loss": 2.325, "step": 4267000 }, { "epoch": 21.14, "learning_rate": 3.943245740501281e-05, "loss": 2.3422, "step": 4267500 }, { "epoch": 21.15, "learning_rate": 3.943121881858673e-05, "loss": 2.3074, "step": 4268000 }, { "epoch": 21.15, "learning_rate": 3.9429980232160644e-05, "loss": 2.3229, "step": 4268500 }, { "epoch": 21.15, "learning_rate": 3.942874412290741e-05, "loss": 2.2827, "step": 4269000 }, { "epoch": 21.15, "learning_rate": 3.942750553648132e-05, "loss": 2.3152, "step": 4269500 }, { "epoch": 21.16, "learning_rate": 3.942626695005524e-05, "loss": 2.3225, "step": 4270000 }, { "epoch": 21.16, "learning_rate": 3.9425028363629157e-05, "loss": 2.3046, "step": 4270500 }, { "epoch": 21.16, "learning_rate": 3.9423789777203073e-05, "loss": 2.3032, "step": 4271000 }, { "epoch": 21.16, "learning_rate": 3.942255119077699e-05, "loss": 2.2956, "step": 4271500 }, { "epoch": 21.16, "learning_rate": 3.942131260435091e-05, "loss": 2.3496, "step": 4272000 }, { "epoch": 21.17, "learning_rate": 3.9420074017924824e-05, "loss": 2.3057, "step": 4272500 }, { "epoch": 21.17, "learning_rate": 3.941883790867159e-05, "loss": 2.2873, "step": 4273000 }, { "epoch": 21.17, "learning_rate": 3.941759932224551e-05, "loss": 2.2936, "step": 4273500 }, { "epoch": 21.17, "learning_rate": 3.941636073581943e-05, "loss": 2.3081, "step": 4274000 }, { "epoch": 21.18, "learning_rate": 3.9415122149393344e-05, "loss": 2.2982, "step": 4274500 }, { "epoch": 21.18, "learning_rate": 3.941388604014011e-05, "loss": 2.3144, "step": 4275000 }, { "epoch": 21.18, "learning_rate": 3.941264745371402e-05, "loss": 2.3072, "step": 4275500 }, { "epoch": 21.18, "learning_rate": 3.941140886728794e-05, "loss": 2.3279, "step": 4276000 }, { "epoch": 21.19, "learning_rate": 3.941017028086186e-05, "loss": 2.3102, "step": 4276500 }, { "epoch": 21.19, "learning_rate": 3.9408931694435774e-05, "loss": 2.298, "step": 4277000 }, { "epoch": 21.19, "learning_rate": 3.940769558518255e-05, "loss": 2.3229, "step": 4277500 }, { "epoch": 21.19, "learning_rate": 3.9406456998756466e-05, "loss": 2.3222, "step": 4278000 }, { "epoch": 21.2, "learning_rate": 3.940521841233038e-05, "loss": 2.2881, "step": 4278500 }, { "epoch": 21.2, "learning_rate": 3.940397982590429e-05, "loss": 2.3254, "step": 4279000 }, { "epoch": 21.2, "learning_rate": 3.940274123947821e-05, "loss": 2.3011, "step": 4279500 }, { "epoch": 21.2, "learning_rate": 3.940150265305213e-05, "loss": 2.3101, "step": 4280000 }, { "epoch": 21.21, "learning_rate": 3.9400264066626044e-05, "loss": 2.3173, "step": 4280500 }, { "epoch": 21.21, "learning_rate": 3.939902548019996e-05, "loss": 2.3111, "step": 4281000 }, { "epoch": 21.21, "learning_rate": 3.939778937094673e-05, "loss": 2.3002, "step": 4281500 }, { "epoch": 21.21, "learning_rate": 3.93965532616935e-05, "loss": 2.2919, "step": 4282000 }, { "epoch": 21.22, "learning_rate": 3.9395314675267415e-05, "loss": 2.2976, "step": 4282500 }, { "epoch": 21.22, "learning_rate": 3.9394078566014184e-05, "loss": 2.3086, "step": 4283000 }, { "epoch": 21.22, "learning_rate": 3.9392839979588094e-05, "loss": 2.3153, "step": 4283500 }, { "epoch": 21.22, "learning_rate": 3.939160387033486e-05, "loss": 2.3233, "step": 4284000 }, { "epoch": 21.23, "learning_rate": 3.939036528390878e-05, "loss": 2.3166, "step": 4284500 }, { "epoch": 21.23, "learning_rate": 3.9389129174655556e-05, "loss": 2.2879, "step": 4285000 }, { "epoch": 21.23, "learning_rate": 3.9387890588229466e-05, "loss": 2.2924, "step": 4285500 }, { "epoch": 21.23, "learning_rate": 3.938665200180338e-05, "loss": 2.3315, "step": 4286000 }, { "epoch": 21.24, "learning_rate": 3.93854134153773e-05, "loss": 2.2909, "step": 4286500 }, { "epoch": 21.24, "learning_rate": 3.938417482895122e-05, "loss": 2.3155, "step": 4287000 }, { "epoch": 21.24, "learning_rate": 3.9382936242525134e-05, "loss": 2.3186, "step": 4287500 }, { "epoch": 21.24, "learning_rate": 3.938169765609905e-05, "loss": 2.3072, "step": 4288000 }, { "epoch": 21.25, "learning_rate": 3.938046154684582e-05, "loss": 2.3222, "step": 4288500 }, { "epoch": 21.25, "learning_rate": 3.937922296041973e-05, "loss": 2.3089, "step": 4289000 }, { "epoch": 21.25, "learning_rate": 3.9377984373993646e-05, "loss": 2.3375, "step": 4289500 }, { "epoch": 21.25, "learning_rate": 3.937674578756756e-05, "loss": 2.3187, "step": 4290000 }, { "epoch": 21.26, "learning_rate": 3.937550720114148e-05, "loss": 2.3231, "step": 4290500 }, { "epoch": 21.26, "learning_rate": 3.93742686147154e-05, "loss": 2.2876, "step": 4291000 }, { "epoch": 21.26, "learning_rate": 3.9373030028289314e-05, "loss": 2.3264, "step": 4291500 }, { "epoch": 21.26, "learning_rate": 3.937179144186323e-05, "loss": 2.3282, "step": 4292000 }, { "epoch": 21.27, "learning_rate": 3.937055285543715e-05, "loss": 2.3319, "step": 4292500 }, { "epoch": 21.27, "learning_rate": 3.9369314269011065e-05, "loss": 2.3423, "step": 4293000 }, { "epoch": 21.27, "learning_rate": 3.936807568258498e-05, "loss": 2.286, "step": 4293500 }, { "epoch": 21.27, "learning_rate": 3.93668370961589e-05, "loss": 2.3192, "step": 4294000 }, { "epoch": 21.28, "learning_rate": 3.9365598509732816e-05, "loss": 2.3145, "step": 4294500 }, { "epoch": 21.28, "learning_rate": 3.936435992330673e-05, "loss": 2.3265, "step": 4295000 }, { "epoch": 21.28, "learning_rate": 3.936312133688065e-05, "loss": 2.334, "step": 4295500 }, { "epoch": 21.28, "learning_rate": 3.9361882750454566e-05, "loss": 2.3322, "step": 4296000 }, { "epoch": 21.29, "learning_rate": 3.9360646641201335e-05, "loss": 2.2928, "step": 4296500 }, { "epoch": 21.29, "learning_rate": 3.9359408054775245e-05, "loss": 2.314, "step": 4297000 }, { "epoch": 21.29, "learning_rate": 3.935816946834916e-05, "loss": 2.3296, "step": 4297500 }, { "epoch": 21.29, "learning_rate": 3.935693335909593e-05, "loss": 2.3279, "step": 4298000 }, { "epoch": 21.3, "learning_rate": 3.935569477266985e-05, "loss": 2.2969, "step": 4298500 }, { "epoch": 21.3, "learning_rate": 3.9354456186243765e-05, "loss": 2.3151, "step": 4299000 }, { "epoch": 21.3, "learning_rate": 3.935321759981768e-05, "loss": 2.3044, "step": 4299500 }, { "epoch": 21.3, "learning_rate": 3.935198149056445e-05, "loss": 2.3075, "step": 4300000 }, { "epoch": 21.31, "learning_rate": 3.935074290413837e-05, "loss": 2.2896, "step": 4300500 }, { "epoch": 21.31, "learning_rate": 3.9349504317712285e-05, "loss": 2.3119, "step": 4301000 }, { "epoch": 21.31, "learning_rate": 3.93482657312862e-05, "loss": 2.3079, "step": 4301500 }, { "epoch": 21.31, "learning_rate": 3.934702714486012e-05, "loss": 2.3173, "step": 4302000 }, { "epoch": 21.32, "learning_rate": 3.9345788558434035e-05, "loss": 2.3188, "step": 4302500 }, { "epoch": 21.32, "learning_rate": 3.934454997200795e-05, "loss": 2.2956, "step": 4303000 }, { "epoch": 21.32, "learning_rate": 3.934331138558186e-05, "loss": 2.3025, "step": 4303500 }, { "epoch": 21.32, "learning_rate": 3.934207279915578e-05, "loss": 2.3133, "step": 4304000 }, { "epoch": 21.33, "learning_rate": 3.9340834212729696e-05, "loss": 2.2965, "step": 4304500 }, { "epoch": 21.33, "learning_rate": 3.933959562630361e-05, "loss": 2.3176, "step": 4305000 }, { "epoch": 21.33, "learning_rate": 3.933835703987753e-05, "loss": 2.3177, "step": 4305500 }, { "epoch": 21.33, "learning_rate": 3.933711845345145e-05, "loss": 2.3032, "step": 4306000 }, { "epoch": 21.34, "learning_rate": 3.9335879867025364e-05, "loss": 2.2993, "step": 4306500 }, { "epoch": 21.34, "learning_rate": 3.9334641280599274e-05, "loss": 2.3499, "step": 4307000 }, { "epoch": 21.34, "learning_rate": 3.933340269417319e-05, "loss": 2.3083, "step": 4307500 }, { "epoch": 21.34, "learning_rate": 3.933216658491997e-05, "loss": 2.3264, "step": 4308000 }, { "epoch": 21.35, "learning_rate": 3.9330927998493884e-05, "loss": 2.3329, "step": 4308500 }, { "epoch": 21.35, "learning_rate": 3.93296894120678e-05, "loss": 2.2855, "step": 4309000 }, { "epoch": 21.35, "learning_rate": 3.932845082564172e-05, "loss": 2.3285, "step": 4309500 }, { "epoch": 21.35, "learning_rate": 3.932721223921563e-05, "loss": 2.2971, "step": 4310000 }, { "epoch": 21.36, "learning_rate": 3.9325976129962396e-05, "loss": 2.3157, "step": 4310500 }, { "epoch": 21.36, "learning_rate": 3.9324740020709165e-05, "loss": 2.3484, "step": 4311000 }, { "epoch": 21.36, "learning_rate": 3.932350143428308e-05, "loss": 2.3032, "step": 4311500 }, { "epoch": 21.36, "learning_rate": 3.932226532502985e-05, "loss": 2.3432, "step": 4312000 }, { "epoch": 21.37, "learning_rate": 3.932102673860377e-05, "loss": 2.3068, "step": 4312500 }, { "epoch": 21.37, "learning_rate": 3.9319788152177685e-05, "loss": 2.3444, "step": 4313000 }, { "epoch": 21.37, "learning_rate": 3.93185495657516e-05, "loss": 2.3294, "step": 4313500 }, { "epoch": 21.37, "learning_rate": 3.931731097932552e-05, "loss": 2.3024, "step": 4314000 }, { "epoch": 21.38, "learning_rate": 3.9316072392899436e-05, "loss": 2.3364, "step": 4314500 }, { "epoch": 21.38, "learning_rate": 3.931483380647335e-05, "loss": 2.3322, "step": 4315000 }, { "epoch": 21.38, "learning_rate": 3.931359522004727e-05, "loss": 2.3135, "step": 4315500 }, { "epoch": 21.38, "learning_rate": 3.9312356633621186e-05, "loss": 2.3176, "step": 4316000 }, { "epoch": 21.39, "learning_rate": 3.93111180471951e-05, "loss": 2.34, "step": 4316500 }, { "epoch": 21.39, "learning_rate": 3.9309879460769014e-05, "loss": 2.3245, "step": 4317000 }, { "epoch": 21.39, "learning_rate": 3.930864087434293e-05, "loss": 2.3008, "step": 4317500 }, { "epoch": 21.39, "learning_rate": 3.93074047650897e-05, "loss": 2.2976, "step": 4318000 }, { "epoch": 21.4, "learning_rate": 3.9306166178663616e-05, "loss": 2.3099, "step": 4318500 }, { "epoch": 21.4, "learning_rate": 3.9304930069410385e-05, "loss": 2.3081, "step": 4319000 }, { "epoch": 21.4, "learning_rate": 3.93036914829843e-05, "loss": 2.3236, "step": 4319500 }, { "epoch": 21.4, "learning_rate": 3.930245289655822e-05, "loss": 2.3435, "step": 4320000 }, { "epoch": 21.41, "learning_rate": 3.9301214310132136e-05, "loss": 2.3184, "step": 4320500 }, { "epoch": 21.41, "learning_rate": 3.929997572370605e-05, "loss": 2.3174, "step": 4321000 }, { "epoch": 21.41, "learning_rate": 3.929873713727997e-05, "loss": 2.3308, "step": 4321500 }, { "epoch": 21.41, "learning_rate": 3.9297498550853887e-05, "loss": 2.2802, "step": 4322000 }, { "epoch": 21.42, "learning_rate": 3.9296259964427804e-05, "loss": 2.2863, "step": 4322500 }, { "epoch": 21.42, "learning_rate": 3.929502137800172e-05, "loss": 2.3012, "step": 4323000 }, { "epoch": 21.42, "learning_rate": 3.929378526874848e-05, "loss": 2.3041, "step": 4323500 }, { "epoch": 21.42, "learning_rate": 3.929254915949525e-05, "loss": 2.343, "step": 4324000 }, { "epoch": 21.43, "learning_rate": 3.929131057306917e-05, "loss": 2.3007, "step": 4324500 }, { "epoch": 21.43, "learning_rate": 3.9290071986643085e-05, "loss": 2.3265, "step": 4325000 }, { "epoch": 21.43, "learning_rate": 3.9288833400217e-05, "loss": 2.3181, "step": 4325500 }, { "epoch": 21.43, "learning_rate": 3.928759481379092e-05, "loss": 2.3068, "step": 4326000 }, { "epoch": 21.43, "learning_rate": 3.9286356227364836e-05, "loss": 2.3173, "step": 4326500 }, { "epoch": 21.44, "learning_rate": 3.928511764093875e-05, "loss": 2.3145, "step": 4327000 }, { "epoch": 21.44, "learning_rate": 3.9283881531685515e-05, "loss": 2.3157, "step": 4327500 }, { "epoch": 21.44, "learning_rate": 3.928264294525943e-05, "loss": 2.3428, "step": 4328000 }, { "epoch": 21.44, "learning_rate": 3.928140435883335e-05, "loss": 2.3414, "step": 4328500 }, { "epoch": 21.45, "learning_rate": 3.9280165772407266e-05, "loss": 2.3233, "step": 4329000 }, { "epoch": 21.45, "learning_rate": 3.9278929663154034e-05, "loss": 2.3465, "step": 4329500 }, { "epoch": 21.45, "learning_rate": 3.927769107672795e-05, "loss": 2.3259, "step": 4330000 }, { "epoch": 21.45, "learning_rate": 3.927645249030187e-05, "loss": 2.3362, "step": 4330500 }, { "epoch": 21.46, "learning_rate": 3.9275213903875785e-05, "loss": 2.3229, "step": 4331000 }, { "epoch": 21.46, "learning_rate": 3.92739753174497e-05, "loss": 2.3119, "step": 4331500 }, { "epoch": 21.46, "learning_rate": 3.927273673102362e-05, "loss": 2.3136, "step": 4332000 }, { "epoch": 21.46, "learning_rate": 3.9271498144597536e-05, "loss": 2.332, "step": 4332500 }, { "epoch": 21.47, "learning_rate": 3.927025955817145e-05, "loss": 2.3427, "step": 4333000 }, { "epoch": 21.47, "learning_rate": 3.926902097174537e-05, "loss": 2.348, "step": 4333500 }, { "epoch": 21.47, "learning_rate": 3.926778486249213e-05, "loss": 2.3072, "step": 4334000 }, { "epoch": 21.47, "learning_rate": 3.926654627606605e-05, "loss": 2.2956, "step": 4334500 }, { "epoch": 21.48, "learning_rate": 3.9265310166812824e-05, "loss": 2.3138, "step": 4335000 }, { "epoch": 21.48, "learning_rate": 3.9264071580386735e-05, "loss": 2.296, "step": 4335500 }, { "epoch": 21.48, "learning_rate": 3.926283299396065e-05, "loss": 2.3195, "step": 4336000 }, { "epoch": 21.48, "learning_rate": 3.926159440753457e-05, "loss": 2.3196, "step": 4336500 }, { "epoch": 21.49, "learning_rate": 3.9260358298281344e-05, "loss": 2.2992, "step": 4337000 }, { "epoch": 21.49, "learning_rate": 3.925911971185526e-05, "loss": 2.3306, "step": 4337500 }, { "epoch": 21.49, "learning_rate": 3.925788112542918e-05, "loss": 2.3089, "step": 4338000 }, { "epoch": 21.49, "learning_rate": 3.925664253900309e-05, "loss": 2.3162, "step": 4338500 }, { "epoch": 21.5, "learning_rate": 3.9255403952577005e-05, "loss": 2.3234, "step": 4339000 }, { "epoch": 21.5, "learning_rate": 3.925416536615092e-05, "loss": 2.3258, "step": 4339500 }, { "epoch": 21.5, "learning_rate": 3.925292677972484e-05, "loss": 2.3317, "step": 4340000 }, { "epoch": 21.5, "learning_rate": 3.925168819329875e-05, "loss": 2.3034, "step": 4340500 }, { "epoch": 21.51, "learning_rate": 3.9250449606872666e-05, "loss": 2.3462, "step": 4341000 }, { "epoch": 21.51, "learning_rate": 3.924921102044658e-05, "loss": 2.3138, "step": 4341500 }, { "epoch": 21.51, "learning_rate": 3.924797491119335e-05, "loss": 2.3311, "step": 4342000 }, { "epoch": 21.51, "learning_rate": 3.924673632476727e-05, "loss": 2.3276, "step": 4342500 }, { "epoch": 21.52, "learning_rate": 3.9245497738341186e-05, "loss": 2.3321, "step": 4343000 }, { "epoch": 21.52, "learning_rate": 3.924426162908796e-05, "loss": 2.2764, "step": 4343500 }, { "epoch": 21.52, "learning_rate": 3.924302304266188e-05, "loss": 2.3127, "step": 4344000 }, { "epoch": 21.52, "learning_rate": 3.9241784456235795e-05, "loss": 2.3023, "step": 4344500 }, { "epoch": 21.53, "learning_rate": 3.9240545869809705e-05, "loss": 2.2963, "step": 4345000 }, { "epoch": 21.53, "learning_rate": 3.923930728338362e-05, "loss": 2.3223, "step": 4345500 }, { "epoch": 21.53, "learning_rate": 3.923806869695754e-05, "loss": 2.3039, "step": 4346000 }, { "epoch": 21.53, "learning_rate": 3.9236830110531456e-05, "loss": 2.3454, "step": 4346500 }, { "epoch": 21.54, "learning_rate": 3.923559152410537e-05, "loss": 2.3503, "step": 4347000 }, { "epoch": 21.54, "learning_rate": 3.923435293767928e-05, "loss": 2.3224, "step": 4347500 }, { "epoch": 21.54, "learning_rate": 3.923311682842605e-05, "loss": 2.3291, "step": 4348000 }, { "epoch": 21.54, "learning_rate": 3.923187824199997e-05, "loss": 2.3457, "step": 4348500 }, { "epoch": 21.55, "learning_rate": 3.9230639655573886e-05, "loss": 2.3133, "step": 4349000 }, { "epoch": 21.55, "learning_rate": 3.92294010691478e-05, "loss": 2.3439, "step": 4349500 }, { "epoch": 21.55, "learning_rate": 3.922816248272172e-05, "loss": 2.3106, "step": 4350000 }, { "epoch": 21.55, "learning_rate": 3.9226926373468495e-05, "loss": 2.3065, "step": 4350500 }, { "epoch": 21.56, "learning_rate": 3.922569026421526e-05, "loss": 2.3188, "step": 4351000 }, { "epoch": 21.56, "learning_rate": 3.9224451677789174e-05, "loss": 2.3561, "step": 4351500 }, { "epoch": 21.56, "learning_rate": 3.922321309136309e-05, "loss": 2.2994, "step": 4352000 }, { "epoch": 21.56, "learning_rate": 3.922197450493701e-05, "loss": 2.2983, "step": 4352500 }, { "epoch": 21.57, "learning_rate": 3.9220735918510925e-05, "loss": 2.2753, "step": 4353000 }, { "epoch": 21.57, "learning_rate": 3.921949733208484e-05, "loss": 2.3172, "step": 4353500 }, { "epoch": 21.57, "learning_rate": 3.921825874565876e-05, "loss": 2.3432, "step": 4354000 }, { "epoch": 21.57, "learning_rate": 3.921702015923267e-05, "loss": 2.3251, "step": 4354500 }, { "epoch": 21.58, "learning_rate": 3.9215781572806586e-05, "loss": 2.3145, "step": 4355000 }, { "epoch": 21.58, "learning_rate": 3.92145429863805e-05, "loss": 2.3169, "step": 4355500 }, { "epoch": 21.58, "learning_rate": 3.921330687712728e-05, "loss": 2.3224, "step": 4356000 }, { "epoch": 21.58, "learning_rate": 3.9212068290701195e-05, "loss": 2.2893, "step": 4356500 }, { "epoch": 21.59, "learning_rate": 3.921082970427511e-05, "loss": 2.3134, "step": 4357000 }, { "epoch": 21.59, "learning_rate": 3.920959111784902e-05, "loss": 2.3234, "step": 4357500 }, { "epoch": 21.59, "learning_rate": 3.920835500859579e-05, "loss": 2.2992, "step": 4358000 }, { "epoch": 21.59, "learning_rate": 3.920711642216971e-05, "loss": 2.3117, "step": 4358500 }, { "epoch": 21.6, "learning_rate": 3.9205877835743625e-05, "loss": 2.321, "step": 4359000 }, { "epoch": 21.6, "learning_rate": 3.920463924931754e-05, "loss": 2.2993, "step": 4359500 }, { "epoch": 21.6, "learning_rate": 3.920340314006431e-05, "loss": 2.319, "step": 4360000 }, { "epoch": 21.6, "learning_rate": 3.920216455363823e-05, "loss": 2.3492, "step": 4360500 }, { "epoch": 21.61, "learning_rate": 3.9200928444384996e-05, "loss": 2.3343, "step": 4361000 }, { "epoch": 21.61, "learning_rate": 3.919968985795891e-05, "loss": 2.3362, "step": 4361500 }, { "epoch": 21.61, "learning_rate": 3.9198451271532824e-05, "loss": 2.3155, "step": 4362000 }, { "epoch": 21.61, "learning_rate": 3.919721516227959e-05, "loss": 2.3279, "step": 4362500 }, { "epoch": 21.62, "learning_rate": 3.919597657585351e-05, "loss": 2.3272, "step": 4363000 }, { "epoch": 21.62, "learning_rate": 3.9194737989427426e-05, "loss": 2.3127, "step": 4363500 }, { "epoch": 21.62, "learning_rate": 3.919349940300134e-05, "loss": 2.3002, "step": 4364000 }, { "epoch": 21.62, "learning_rate": 3.919226081657526e-05, "loss": 2.3042, "step": 4364500 }, { "epoch": 21.63, "learning_rate": 3.919102223014918e-05, "loss": 2.302, "step": 4365000 }, { "epoch": 21.63, "learning_rate": 3.9189786120895946e-05, "loss": 2.3265, "step": 4365500 }, { "epoch": 21.63, "learning_rate": 3.918854753446986e-05, "loss": 2.3245, "step": 4366000 }, { "epoch": 21.63, "learning_rate": 3.918730894804378e-05, "loss": 2.3159, "step": 4366500 }, { "epoch": 21.64, "learning_rate": 3.918607283879055e-05, "loss": 2.3157, "step": 4367000 }, { "epoch": 21.64, "learning_rate": 3.918483425236446e-05, "loss": 2.3319, "step": 4367500 }, { "epoch": 21.64, "learning_rate": 3.9183595665938376e-05, "loss": 2.3131, "step": 4368000 }, { "epoch": 21.64, "learning_rate": 3.918235707951229e-05, "loss": 2.3323, "step": 4368500 }, { "epoch": 21.65, "learning_rate": 3.918111849308621e-05, "loss": 2.3466, "step": 4369000 }, { "epoch": 21.65, "learning_rate": 3.9179879906660126e-05, "loss": 2.3123, "step": 4369500 }, { "epoch": 21.65, "learning_rate": 3.917864132023404e-05, "loss": 2.296, "step": 4370000 }, { "epoch": 21.65, "learning_rate": 3.917740273380796e-05, "loss": 2.313, "step": 4370500 }, { "epoch": 21.66, "learning_rate": 3.917616414738188e-05, "loss": 2.3314, "step": 4371000 }, { "epoch": 21.66, "learning_rate": 3.9174928038128646e-05, "loss": 2.2958, "step": 4371500 }, { "epoch": 21.66, "learning_rate": 3.917368945170256e-05, "loss": 2.3307, "step": 4372000 }, { "epoch": 21.66, "learning_rate": 3.917245086527648e-05, "loss": 2.3187, "step": 4372500 }, { "epoch": 21.67, "learning_rate": 3.91712122788504e-05, "loss": 2.3269, "step": 4373000 }, { "epoch": 21.67, "learning_rate": 3.9169973692424314e-05, "loss": 2.3228, "step": 4373500 }, { "epoch": 21.67, "learning_rate": 3.916873510599823e-05, "loss": 2.3128, "step": 4374000 }, { "epoch": 21.67, "learning_rate": 3.916749651957215e-05, "loss": 2.3191, "step": 4374500 }, { "epoch": 21.68, "learning_rate": 3.9166257933146064e-05, "loss": 2.3091, "step": 4375000 }, { "epoch": 21.68, "learning_rate": 3.9165019346719975e-05, "loss": 2.3338, "step": 4375500 }, { "epoch": 21.68, "learning_rate": 3.916378076029389e-05, "loss": 2.3332, "step": 4376000 }, { "epoch": 21.68, "learning_rate": 3.916254465104066e-05, "loss": 2.3182, "step": 4376500 }, { "epoch": 21.69, "learning_rate": 3.916130606461458e-05, "loss": 2.3053, "step": 4377000 }, { "epoch": 21.69, "learning_rate": 3.9160067478188494e-05, "loss": 2.3355, "step": 4377500 }, { "epoch": 21.69, "learning_rate": 3.915882889176241e-05, "loss": 2.3166, "step": 4378000 }, { "epoch": 21.69, "learning_rate": 3.915759278250918e-05, "loss": 2.3133, "step": 4378500 }, { "epoch": 21.7, "learning_rate": 3.91563541960831e-05, "loss": 2.3149, "step": 4379000 }, { "epoch": 21.7, "learning_rate": 3.9155115609657014e-05, "loss": 2.3224, "step": 4379500 }, { "epoch": 21.7, "learning_rate": 3.915387702323093e-05, "loss": 2.3327, "step": 4380000 }, { "epoch": 21.7, "learning_rate": 3.915263843680485e-05, "loss": 2.3332, "step": 4380500 }, { "epoch": 21.7, "learning_rate": 3.9151399850378765e-05, "loss": 2.3271, "step": 4381000 }, { "epoch": 21.71, "learning_rate": 3.915016126395268e-05, "loss": 2.286, "step": 4381500 }, { "epoch": 21.71, "learning_rate": 3.914892267752659e-05, "loss": 2.3318, "step": 4382000 }, { "epoch": 21.71, "learning_rate": 3.914768409110051e-05, "loss": 2.3186, "step": 4382500 }, { "epoch": 21.71, "learning_rate": 3.9146445504674425e-05, "loss": 2.3394, "step": 4383000 }, { "epoch": 21.72, "learning_rate": 3.914520691824834e-05, "loss": 2.3411, "step": 4383500 }, { "epoch": 21.72, "learning_rate": 3.914396833182226e-05, "loss": 2.312, "step": 4384000 }, { "epoch": 21.72, "learning_rate": 3.914273222256903e-05, "loss": 2.3266, "step": 4384500 }, { "epoch": 21.72, "learning_rate": 3.9141493636142945e-05, "loss": 2.3351, "step": 4385000 }, { "epoch": 21.73, "learning_rate": 3.914025504971686e-05, "loss": 2.3116, "step": 4385500 }, { "epoch": 21.73, "learning_rate": 3.913901646329078e-05, "loss": 2.3258, "step": 4386000 }, { "epoch": 21.73, "learning_rate": 3.9137777876864696e-05, "loss": 2.3249, "step": 4386500 }, { "epoch": 21.73, "learning_rate": 3.9136541767611465e-05, "loss": 2.3161, "step": 4387000 }, { "epoch": 21.74, "learning_rate": 3.913530318118538e-05, "loss": 2.3343, "step": 4387500 }, { "epoch": 21.74, "learning_rate": 3.91340645947593e-05, "loss": 2.2996, "step": 4388000 }, { "epoch": 21.74, "learning_rate": 3.913282848550606e-05, "loss": 2.307, "step": 4388500 }, { "epoch": 21.74, "learning_rate": 3.913159485342569e-05, "loss": 2.308, "step": 4389000 }, { "epoch": 21.75, "learning_rate": 3.9130356266999605e-05, "loss": 2.3072, "step": 4389500 }, { "epoch": 21.75, "learning_rate": 3.9129117680573515e-05, "loss": 2.3254, "step": 4390000 }, { "epoch": 21.75, "learning_rate": 3.912787909414743e-05, "loss": 2.3295, "step": 4390500 }, { "epoch": 21.75, "learning_rate": 3.912664050772135e-05, "loss": 2.3067, "step": 4391000 }, { "epoch": 21.76, "learning_rate": 3.9125401921295266e-05, "loss": 2.3233, "step": 4391500 }, { "epoch": 21.76, "learning_rate": 3.912416333486918e-05, "loss": 2.3388, "step": 4392000 }, { "epoch": 21.76, "learning_rate": 3.912292474844309e-05, "loss": 2.2997, "step": 4392500 }, { "epoch": 21.76, "learning_rate": 3.912168616201701e-05, "loss": 2.3244, "step": 4393000 }, { "epoch": 21.77, "learning_rate": 3.912044757559093e-05, "loss": 2.3057, "step": 4393500 }, { "epoch": 21.77, "learning_rate": 3.9119208989164844e-05, "loss": 2.291, "step": 4394000 }, { "epoch": 21.77, "learning_rate": 3.911797040273876e-05, "loss": 2.3322, "step": 4394500 }, { "epoch": 21.77, "learning_rate": 3.911673181631268e-05, "loss": 2.3331, "step": 4395000 }, { "epoch": 21.78, "learning_rate": 3.9115493229886595e-05, "loss": 2.3059, "step": 4395500 }, { "epoch": 21.78, "learning_rate": 3.911425464346051e-05, "loss": 2.3113, "step": 4396000 }, { "epoch": 21.78, "learning_rate": 3.911301605703443e-05, "loss": 2.3046, "step": 4396500 }, { "epoch": 21.78, "learning_rate": 3.91117799477812e-05, "loss": 2.3182, "step": 4397000 }, { "epoch": 21.79, "learning_rate": 3.9110541361355114e-05, "loss": 2.3106, "step": 4397500 }, { "epoch": 21.79, "learning_rate": 3.910930277492903e-05, "loss": 2.3463, "step": 4398000 }, { "epoch": 21.79, "learning_rate": 3.910806418850295e-05, "loss": 2.3151, "step": 4398500 }, { "epoch": 21.79, "learning_rate": 3.9106825602076865e-05, "loss": 2.3151, "step": 4399000 }, { "epoch": 21.8, "learning_rate": 3.910558701565078e-05, "loss": 2.332, "step": 4399500 }, { "epoch": 21.8, "learning_rate": 3.91043484292247e-05, "loss": 2.3448, "step": 4400000 }, { "epoch": 21.8, "learning_rate": 3.9103109842798616e-05, "loss": 2.3485, "step": 4400500 }, { "epoch": 21.8, "learning_rate": 3.910187373354538e-05, "loss": 2.342, "step": 4401000 }, { "epoch": 21.81, "learning_rate": 3.9100635147119295e-05, "loss": 2.3263, "step": 4401500 }, { "epoch": 21.81, "learning_rate": 3.9099399037866063e-05, "loss": 2.3294, "step": 4402000 }, { "epoch": 21.81, "learning_rate": 3.909816292861284e-05, "loss": 2.3133, "step": 4402500 }, { "epoch": 21.81, "learning_rate": 3.9096924342186756e-05, "loss": 2.3127, "step": 4403000 }, { "epoch": 21.82, "learning_rate": 3.9095685755760666e-05, "loss": 2.3094, "step": 4403500 }, { "epoch": 21.82, "learning_rate": 3.909444716933458e-05, "loss": 2.2822, "step": 4404000 }, { "epoch": 21.82, "learning_rate": 3.90932085829085e-05, "loss": 2.3257, "step": 4404500 }, { "epoch": 21.82, "learning_rate": 3.909196999648242e-05, "loss": 2.2952, "step": 4405000 }, { "epoch": 21.83, "learning_rate": 3.9090731410056334e-05, "loss": 2.3526, "step": 4405500 }, { "epoch": 21.83, "learning_rate": 3.9089492823630244e-05, "loss": 2.3249, "step": 4406000 }, { "epoch": 21.83, "learning_rate": 3.908825423720416e-05, "loss": 2.3282, "step": 4406500 }, { "epoch": 21.83, "learning_rate": 3.9087018127950936e-05, "loss": 2.3322, "step": 4407000 }, { "epoch": 21.84, "learning_rate": 3.9085779541524853e-05, "loss": 2.3248, "step": 4407500 }, { "epoch": 21.84, "learning_rate": 3.9084540955098764e-05, "loss": 2.325, "step": 4408000 }, { "epoch": 21.84, "learning_rate": 3.908330236867268e-05, "loss": 2.3289, "step": 4408500 }, { "epoch": 21.84, "learning_rate": 3.90820637822466e-05, "loss": 2.3138, "step": 4409000 }, { "epoch": 21.85, "learning_rate": 3.908082767299337e-05, "loss": 2.3137, "step": 4409500 }, { "epoch": 21.85, "learning_rate": 3.907958908656728e-05, "loss": 2.3257, "step": 4410000 }, { "epoch": 21.85, "learning_rate": 3.90783505001412e-05, "loss": 2.322, "step": 4410500 }, { "epoch": 21.85, "learning_rate": 3.907711191371512e-05, "loss": 2.3273, "step": 4411000 }, { "epoch": 21.86, "learning_rate": 3.9075873327289034e-05, "loss": 2.3332, "step": 4411500 }, { "epoch": 21.86, "learning_rate": 3.907463474086295e-05, "loss": 2.337, "step": 4412000 }, { "epoch": 21.86, "learning_rate": 3.907339863160972e-05, "loss": 2.3351, "step": 4412500 }, { "epoch": 21.86, "learning_rate": 3.9072160045183637e-05, "loss": 2.3349, "step": 4413000 }, { "epoch": 21.87, "learning_rate": 3.9070921458757554e-05, "loss": 2.3297, "step": 4413500 }, { "epoch": 21.87, "learning_rate": 3.906968287233147e-05, "loss": 2.3183, "step": 4414000 }, { "epoch": 21.87, "learning_rate": 3.906844428590538e-05, "loss": 2.3166, "step": 4414500 }, { "epoch": 21.87, "learning_rate": 3.90672056994793e-05, "loss": 2.317, "step": 4415000 }, { "epoch": 21.88, "learning_rate": 3.9065967113053214e-05, "loss": 2.3059, "step": 4415500 }, { "epoch": 21.88, "learning_rate": 3.906472852662713e-05, "loss": 2.3185, "step": 4416000 }, { "epoch": 21.88, "learning_rate": 3.906349241737391e-05, "loss": 2.3452, "step": 4416500 }, { "epoch": 21.88, "learning_rate": 3.906225383094782e-05, "loss": 2.32, "step": 4417000 }, { "epoch": 21.89, "learning_rate": 3.9061015244521734e-05, "loss": 2.3385, "step": 4417500 }, { "epoch": 21.89, "learning_rate": 3.905977665809565e-05, "loss": 2.3351, "step": 4418000 }, { "epoch": 21.89, "learning_rate": 3.905853807166957e-05, "loss": 2.3168, "step": 4418500 }, { "epoch": 21.89, "learning_rate": 3.905730196241634e-05, "loss": 2.3263, "step": 4419000 }, { "epoch": 21.9, "learning_rate": 3.9056063375990254e-05, "loss": 2.331, "step": 4419500 }, { "epoch": 21.9, "learning_rate": 3.905482478956417e-05, "loss": 2.3449, "step": 4420000 }, { "epoch": 21.9, "learning_rate": 3.905358868031094e-05, "loss": 2.327, "step": 4420500 }, { "epoch": 21.9, "learning_rate": 3.9052350093884856e-05, "loss": 2.3243, "step": 4421000 }, { "epoch": 21.91, "learning_rate": 3.905111150745877e-05, "loss": 2.3226, "step": 4421500 }, { "epoch": 21.91, "learning_rate": 3.904987292103269e-05, "loss": 2.3242, "step": 4422000 }, { "epoch": 21.91, "learning_rate": 3.904863433460661e-05, "loss": 2.3421, "step": 4422500 }, { "epoch": 21.91, "learning_rate": 3.9047395748180524e-05, "loss": 2.3393, "step": 4423000 }, { "epoch": 21.92, "learning_rate": 3.9046157161754434e-05, "loss": 2.3457, "step": 4423500 }, { "epoch": 21.92, "learning_rate": 3.904491857532835e-05, "loss": 2.3174, "step": 4424000 }, { "epoch": 21.92, "learning_rate": 3.904367998890227e-05, "loss": 2.3136, "step": 4424500 }, { "epoch": 21.92, "learning_rate": 3.904244387964904e-05, "loss": 2.311, "step": 4425000 }, { "epoch": 21.93, "learning_rate": 3.9041205293222954e-05, "loss": 2.3206, "step": 4425500 }, { "epoch": 21.93, "learning_rate": 3.903996670679687e-05, "loss": 2.3157, "step": 4426000 }, { "epoch": 21.93, "learning_rate": 3.903872812037079e-05, "loss": 2.3178, "step": 4426500 }, { "epoch": 21.93, "learning_rate": 3.90374895339447e-05, "loss": 2.3013, "step": 4427000 }, { "epoch": 21.94, "learning_rate": 3.9036250947518615e-05, "loss": 2.3279, "step": 4427500 }, { "epoch": 21.94, "learning_rate": 3.903501236109253e-05, "loss": 2.3246, "step": 4428000 }, { "epoch": 21.94, "learning_rate": 3.903377377466645e-05, "loss": 2.3211, "step": 4428500 }, { "epoch": 21.94, "learning_rate": 3.9032535188240365e-05, "loss": 2.3386, "step": 4429000 }, { "epoch": 21.95, "learning_rate": 3.903129907898714e-05, "loss": 2.3297, "step": 4429500 }, { "epoch": 21.95, "learning_rate": 3.903006049256105e-05, "loss": 2.3189, "step": 4430000 }, { "epoch": 21.95, "learning_rate": 3.902882438330782e-05, "loss": 2.3271, "step": 4430500 }, { "epoch": 21.95, "learning_rate": 3.902758579688174e-05, "loss": 2.2818, "step": 4431000 }, { "epoch": 21.96, "learning_rate": 3.9026347210455654e-05, "loss": 2.2994, "step": 4431500 }, { "epoch": 21.96, "learning_rate": 3.902510862402957e-05, "loss": 2.3206, "step": 4432000 }, { "epoch": 21.96, "learning_rate": 3.902387003760349e-05, "loss": 2.3146, "step": 4432500 }, { "epoch": 21.96, "learning_rate": 3.90226314511774e-05, "loss": 2.325, "step": 4433000 }, { "epoch": 21.97, "learning_rate": 3.9021392864751315e-05, "loss": 2.3225, "step": 4433500 }, { "epoch": 21.97, "learning_rate": 3.902015675549809e-05, "loss": 2.3511, "step": 4434000 }, { "epoch": 21.97, "learning_rate": 3.901891816907201e-05, "loss": 2.307, "step": 4434500 }, { "epoch": 21.97, "learning_rate": 3.9017679582645924e-05, "loss": 2.3254, "step": 4435000 }, { "epoch": 21.98, "learning_rate": 3.901644099621984e-05, "loss": 2.3352, "step": 4435500 }, { "epoch": 21.98, "learning_rate": 3.901520240979376e-05, "loss": 2.337, "step": 4436000 }, { "epoch": 21.98, "learning_rate": 3.901396630054052e-05, "loss": 2.3028, "step": 4436500 }, { "epoch": 21.98, "learning_rate": 3.901272771411444e-05, "loss": 2.3097, "step": 4437000 }, { "epoch": 21.98, "learning_rate": 3.9011489127688354e-05, "loss": 2.3093, "step": 4437500 }, { "epoch": 21.99, "learning_rate": 3.901025054126227e-05, "loss": 2.348, "step": 4438000 }, { "epoch": 21.99, "learning_rate": 3.900901195483619e-05, "loss": 2.3076, "step": 4438500 }, { "epoch": 21.99, "learning_rate": 3.900777584558296e-05, "loss": 2.3348, "step": 4439000 }, { "epoch": 21.99, "learning_rate": 3.9006537259156874e-05, "loss": 2.3648, "step": 4439500 }, { "epoch": 22.0, "learning_rate": 3.900529867273079e-05, "loss": 2.3346, "step": 4440000 }, { "epoch": 22.0, "learning_rate": 3.900406008630471e-05, "loss": 2.3289, "step": 4440500 }, { "epoch": 22.0, "eval_accuracy": 0.6535946905725298, "eval_accuracy_mlm": 0.6084227722805187, "eval_accuracy_nsp": 0.86680995767947, "eval_loss": 2.3525571823120117, "eval_runtime": 145.9518, "eval_samples_per_second": 1746.871, "eval_steps_per_second": 72.791, "step": 4440546 }, { "epoch": 22.0, "learning_rate": 3.900282397705147e-05, "loss": 2.2772, "step": 4441000 }, { "epoch": 22.0, "learning_rate": 3.9001585390625386e-05, "loss": 2.2912, "step": 4441500 }, { "epoch": 22.01, "learning_rate": 3.90003468041993e-05, "loss": 2.282, "step": 4442000 }, { "epoch": 22.01, "learning_rate": 3.899911069494607e-05, "loss": 2.2739, "step": 4442500 }, { "epoch": 22.01, "learning_rate": 3.899787210851999e-05, "loss": 2.2682, "step": 4443000 }, { "epoch": 22.01, "learning_rate": 3.8996633522093906e-05, "loss": 2.2959, "step": 4443500 }, { "epoch": 22.02, "learning_rate": 3.899539493566782e-05, "loss": 2.2891, "step": 4444000 }, { "epoch": 22.02, "learning_rate": 3.899415634924174e-05, "loss": 2.2862, "step": 4444500 }, { "epoch": 22.02, "learning_rate": 3.899291776281566e-05, "loss": 2.3008, "step": 4445000 }, { "epoch": 22.02, "learning_rate": 3.8991679176389574e-05, "loss": 2.2792, "step": 4445500 }, { "epoch": 22.03, "learning_rate": 3.899044058996349e-05, "loss": 2.2976, "step": 4446000 }, { "epoch": 22.03, "learning_rate": 3.898920448071026e-05, "loss": 2.303, "step": 4446500 }, { "epoch": 22.03, "learning_rate": 3.898796589428417e-05, "loss": 2.2843, "step": 4447000 }, { "epoch": 22.03, "learning_rate": 3.8986727307858087e-05, "loss": 2.2869, "step": 4447500 }, { "epoch": 22.04, "learning_rate": 3.8985488721432003e-05, "loss": 2.2573, "step": 4448000 }, { "epoch": 22.04, "learning_rate": 3.898425013500592e-05, "loss": 2.2712, "step": 4448500 }, { "epoch": 22.04, "learning_rate": 3.898301154857984e-05, "loss": 2.275, "step": 4449000 }, { "epoch": 22.04, "learning_rate": 3.8981772962153754e-05, "loss": 2.2868, "step": 4449500 }, { "epoch": 22.05, "learning_rate": 3.898053437572767e-05, "loss": 2.3145, "step": 4450000 }, { "epoch": 22.05, "learning_rate": 3.897929578930159e-05, "loss": 2.3197, "step": 4450500 }, { "epoch": 22.05, "learning_rate": 3.8978057202875505e-05, "loss": 2.2986, "step": 4451000 }, { "epoch": 22.05, "learning_rate": 3.8976821093622274e-05, "loss": 2.2807, "step": 4451500 }, { "epoch": 22.06, "learning_rate": 3.897558498436904e-05, "loss": 2.3027, "step": 4452000 }, { "epoch": 22.06, "learning_rate": 3.897434639794296e-05, "loss": 2.2952, "step": 4452500 }, { "epoch": 22.06, "learning_rate": 3.8973107811516877e-05, "loss": 2.2791, "step": 4453000 }, { "epoch": 22.06, "learning_rate": 3.8971869225090793e-05, "loss": 2.2952, "step": 4453500 }, { "epoch": 22.07, "learning_rate": 3.8970630638664704e-05, "loss": 2.3056, "step": 4454000 }, { "epoch": 22.07, "learning_rate": 3.896939205223862e-05, "loss": 2.3098, "step": 4454500 }, { "epoch": 22.07, "learning_rate": 3.896815346581254e-05, "loss": 2.2978, "step": 4455000 }, { "epoch": 22.07, "learning_rate": 3.8966914879386454e-05, "loss": 2.306, "step": 4455500 }, { "epoch": 22.08, "learning_rate": 3.896567629296037e-05, "loss": 2.2921, "step": 4456000 }, { "epoch": 22.08, "learning_rate": 3.896443770653429e-05, "loss": 2.3061, "step": 4456500 }, { "epoch": 22.08, "learning_rate": 3.896320159728106e-05, "loss": 2.2947, "step": 4457000 }, { "epoch": 22.08, "learning_rate": 3.8961963010854974e-05, "loss": 2.2962, "step": 4457500 }, { "epoch": 22.09, "learning_rate": 3.896072442442889e-05, "loss": 2.2948, "step": 4458000 }, { "epoch": 22.09, "learning_rate": 3.895948831517566e-05, "loss": 2.3185, "step": 4458500 }, { "epoch": 22.09, "learning_rate": 3.895824972874958e-05, "loss": 2.3078, "step": 4459000 }, { "epoch": 22.09, "learning_rate": 3.8957011142323494e-05, "loss": 2.303, "step": 4459500 }, { "epoch": 22.1, "learning_rate": 3.8955775033070256e-05, "loss": 2.2979, "step": 4460000 }, { "epoch": 22.1, "learning_rate": 3.895453644664417e-05, "loss": 2.2898, "step": 4460500 }, { "epoch": 22.1, "learning_rate": 3.895330033739095e-05, "loss": 2.3058, "step": 4461000 }, { "epoch": 22.1, "learning_rate": 3.8952061750964865e-05, "loss": 2.2828, "step": 4461500 }, { "epoch": 22.11, "learning_rate": 3.8950823164538775e-05, "loss": 2.3222, "step": 4462000 }, { "epoch": 22.11, "learning_rate": 3.894958457811269e-05, "loss": 2.3003, "step": 4462500 }, { "epoch": 22.11, "learning_rate": 3.894834599168661e-05, "loss": 2.2981, "step": 4463000 }, { "epoch": 22.11, "learning_rate": 3.8947107405260526e-05, "loss": 2.3091, "step": 4463500 }, { "epoch": 22.12, "learning_rate": 3.894586881883444e-05, "loss": 2.271, "step": 4464000 }, { "epoch": 22.12, "learning_rate": 3.894463023240836e-05, "loss": 2.334, "step": 4464500 }, { "epoch": 22.12, "learning_rate": 3.894339164598228e-05, "loss": 2.2926, "step": 4465000 }, { "epoch": 22.12, "learning_rate": 3.8942153059556194e-05, "loss": 2.3064, "step": 4465500 }, { "epoch": 22.13, "learning_rate": 3.894091447313011e-05, "loss": 2.2886, "step": 4466000 }, { "epoch": 22.13, "learning_rate": 3.893967588670403e-05, "loss": 2.2827, "step": 4466500 }, { "epoch": 22.13, "learning_rate": 3.8938437300277944e-05, "loss": 2.2968, "step": 4467000 }, { "epoch": 22.13, "learning_rate": 3.8937201191024707e-05, "loss": 2.298, "step": 4467500 }, { "epoch": 22.14, "learning_rate": 3.8935962604598623e-05, "loss": 2.2935, "step": 4468000 }, { "epoch": 22.14, "learning_rate": 3.893472401817254e-05, "loss": 2.2881, "step": 4468500 }, { "epoch": 22.14, "learning_rate": 3.893348543174646e-05, "loss": 2.2915, "step": 4469000 }, { "epoch": 22.14, "learning_rate": 3.8932246845320374e-05, "loss": 2.3102, "step": 4469500 }, { "epoch": 22.15, "learning_rate": 3.893100825889429e-05, "loss": 2.3013, "step": 4470000 }, { "epoch": 22.15, "learning_rate": 3.892977214964106e-05, "loss": 2.285, "step": 4470500 }, { "epoch": 22.15, "learning_rate": 3.892853356321498e-05, "loss": 2.2956, "step": 4471000 }, { "epoch": 22.15, "learning_rate": 3.892729745396174e-05, "loss": 2.3063, "step": 4471500 }, { "epoch": 22.16, "learning_rate": 3.8926058867535656e-05, "loss": 2.3184, "step": 4472000 }, { "epoch": 22.16, "learning_rate": 3.892482028110957e-05, "loss": 2.2951, "step": 4472500 }, { "epoch": 22.16, "learning_rate": 3.892358169468349e-05, "loss": 2.2968, "step": 4473000 }, { "epoch": 22.16, "learning_rate": 3.892234310825741e-05, "loss": 2.2868, "step": 4473500 }, { "epoch": 22.17, "learning_rate": 3.892110699900418e-05, "loss": 2.2937, "step": 4474000 }, { "epoch": 22.17, "learning_rate": 3.891986841257809e-05, "loss": 2.2953, "step": 4474500 }, { "epoch": 22.17, "learning_rate": 3.891862982615201e-05, "loss": 2.3181, "step": 4475000 }, { "epoch": 22.17, "learning_rate": 3.8917391239725926e-05, "loss": 2.2859, "step": 4475500 }, { "epoch": 22.18, "learning_rate": 3.891615265329984e-05, "loss": 2.3111, "step": 4476000 }, { "epoch": 22.18, "learning_rate": 3.891491406687376e-05, "loss": 2.3094, "step": 4476500 }, { "epoch": 22.18, "learning_rate": 3.891367548044768e-05, "loss": 2.3165, "step": 4477000 }, { "epoch": 22.18, "learning_rate": 3.8912436894021594e-05, "loss": 2.3021, "step": 4477500 }, { "epoch": 22.19, "learning_rate": 3.891119830759551e-05, "loss": 2.2889, "step": 4478000 }, { "epoch": 22.19, "learning_rate": 3.890995972116943e-05, "loss": 2.3061, "step": 4478500 }, { "epoch": 22.19, "learning_rate": 3.8908721134743345e-05, "loss": 2.303, "step": 4479000 }, { "epoch": 22.19, "learning_rate": 3.890748254831726e-05, "loss": 2.2969, "step": 4479500 }, { "epoch": 22.2, "learning_rate": 3.890624891623688e-05, "loss": 2.2988, "step": 4480000 }, { "epoch": 22.2, "learning_rate": 3.890501032981079e-05, "loss": 2.3114, "step": 4480500 }, { "epoch": 22.2, "learning_rate": 3.890377174338471e-05, "loss": 2.3078, "step": 4481000 }, { "epoch": 22.2, "learning_rate": 3.8902533156958626e-05, "loss": 2.3274, "step": 4481500 }, { "epoch": 22.21, "learning_rate": 3.890129457053254e-05, "loss": 2.296, "step": 4482000 }, { "epoch": 22.21, "learning_rate": 3.890005598410646e-05, "loss": 2.3063, "step": 4482500 }, { "epoch": 22.21, "learning_rate": 3.889881739768038e-05, "loss": 2.2988, "step": 4483000 }, { "epoch": 22.21, "learning_rate": 3.8897578811254294e-05, "loss": 2.2946, "step": 4483500 }, { "epoch": 22.22, "learning_rate": 3.889634022482821e-05, "loss": 2.2692, "step": 4484000 }, { "epoch": 22.22, "learning_rate": 3.889510411557497e-05, "loss": 2.2717, "step": 4484500 }, { "epoch": 22.22, "learning_rate": 3.889386552914889e-05, "loss": 2.3011, "step": 4485000 }, { "epoch": 22.22, "learning_rate": 3.889262694272281e-05, "loss": 2.2948, "step": 4485500 }, { "epoch": 22.23, "learning_rate": 3.8891388356296724e-05, "loss": 2.2936, "step": 4486000 }, { "epoch": 22.23, "learning_rate": 3.889014976987064e-05, "loss": 2.2986, "step": 4486500 }, { "epoch": 22.23, "learning_rate": 3.888891366061741e-05, "loss": 2.3014, "step": 4487000 }, { "epoch": 22.23, "learning_rate": 3.8887675074191326e-05, "loss": 2.3245, "step": 4487500 }, { "epoch": 22.24, "learning_rate": 3.8886436487765243e-05, "loss": 2.3155, "step": 4488000 }, { "epoch": 22.24, "learning_rate": 3.888520037851201e-05, "loss": 2.3137, "step": 4488500 }, { "epoch": 22.24, "learning_rate": 3.888396179208593e-05, "loss": 2.3031, "step": 4489000 }, { "epoch": 22.24, "learning_rate": 3.8882723205659846e-05, "loss": 2.3025, "step": 4489500 }, { "epoch": 22.25, "learning_rate": 3.888148461923376e-05, "loss": 2.3273, "step": 4490000 }, { "epoch": 22.25, "learning_rate": 3.888024850998053e-05, "loss": 2.3053, "step": 4490500 }, { "epoch": 22.25, "learning_rate": 3.887900992355445e-05, "loss": 2.3006, "step": 4491000 }, { "epoch": 22.25, "learning_rate": 3.8877771337128366e-05, "loss": 2.3109, "step": 4491500 }, { "epoch": 22.25, "learning_rate": 3.887653275070228e-05, "loss": 2.3198, "step": 4492000 }, { "epoch": 22.26, "learning_rate": 3.887529664144905e-05, "loss": 2.3127, "step": 4492500 }, { "epoch": 22.26, "learning_rate": 3.887405805502297e-05, "loss": 2.2989, "step": 4493000 }, { "epoch": 22.26, "learning_rate": 3.8872819468596885e-05, "loss": 2.3253, "step": 4493500 }, { "epoch": 22.26, "learning_rate": 3.88715808821708e-05, "loss": 2.2805, "step": 4494000 }, { "epoch": 22.27, "learning_rate": 3.887034229574472e-05, "loss": 2.3011, "step": 4494500 }, { "epoch": 22.27, "learning_rate": 3.8869103709318636e-05, "loss": 2.3047, "step": 4495000 }, { "epoch": 22.27, "learning_rate": 3.8867865122892546e-05, "loss": 2.2922, "step": 4495500 }, { "epoch": 22.27, "learning_rate": 3.886662653646646e-05, "loss": 2.2869, "step": 4496000 }, { "epoch": 22.28, "learning_rate": 3.886538795004038e-05, "loss": 2.3214, "step": 4496500 }, { "epoch": 22.28, "learning_rate": 3.88641493636143e-05, "loss": 2.3244, "step": 4497000 }, { "epoch": 22.28, "learning_rate": 3.8862910777188214e-05, "loss": 2.3028, "step": 4497500 }, { "epoch": 22.28, "learning_rate": 3.8861672190762124e-05, "loss": 2.3, "step": 4498000 }, { "epoch": 22.29, "learning_rate": 3.88604360815089e-05, "loss": 2.2982, "step": 4498500 }, { "epoch": 22.29, "learning_rate": 3.8859197495082817e-05, "loss": 2.3043, "step": 4499000 }, { "epoch": 22.29, "learning_rate": 3.885795890865673e-05, "loss": 2.3042, "step": 4499500 }, { "epoch": 22.29, "learning_rate": 3.8856720322230644e-05, "loss": 2.3022, "step": 4500000 }, { "epoch": 22.3, "learning_rate": 3.885548173580456e-05, "loss": 2.3037, "step": 4500500 }, { "epoch": 22.3, "learning_rate": 3.885424314937848e-05, "loss": 2.2896, "step": 4501000 }, { "epoch": 22.3, "learning_rate": 3.88530095172981e-05, "loss": 2.2943, "step": 4501500 }, { "epoch": 22.3, "learning_rate": 3.8851770930872015e-05, "loss": 2.2988, "step": 4502000 }, { "epoch": 22.31, "learning_rate": 3.8850534821618784e-05, "loss": 2.3111, "step": 4502500 }, { "epoch": 22.31, "learning_rate": 3.88492962351927e-05, "loss": 2.3171, "step": 4503000 }, { "epoch": 22.31, "learning_rate": 3.884805764876662e-05, "loss": 2.2992, "step": 4503500 }, { "epoch": 22.31, "learning_rate": 3.8846819062340535e-05, "loss": 2.3312, "step": 4504000 }, { "epoch": 22.32, "learning_rate": 3.884558047591445e-05, "loss": 2.3045, "step": 4504500 }, { "epoch": 22.32, "learning_rate": 3.884434188948837e-05, "loss": 2.3171, "step": 4505000 }, { "epoch": 22.32, "learning_rate": 3.8843103303062286e-05, "loss": 2.3009, "step": 4505500 }, { "epoch": 22.32, "learning_rate": 3.88418647166362e-05, "loss": 2.3001, "step": 4506000 }, { "epoch": 22.33, "learning_rate": 3.884062613021012e-05, "loss": 2.3133, "step": 4506500 }, { "epoch": 22.33, "learning_rate": 3.883939002095688e-05, "loss": 2.2882, "step": 4507000 }, { "epoch": 22.33, "learning_rate": 3.88381514345308e-05, "loss": 2.3329, "step": 4507500 }, { "epoch": 22.33, "learning_rate": 3.8836912848104715e-05, "loss": 2.3059, "step": 4508000 }, { "epoch": 22.34, "learning_rate": 3.883567426167863e-05, "loss": 2.2917, "step": 4508500 }, { "epoch": 22.34, "learning_rate": 3.883443567525255e-05, "loss": 2.2894, "step": 4509000 }, { "epoch": 22.34, "learning_rate": 3.8833197088826466e-05, "loss": 2.3078, "step": 4509500 }, { "epoch": 22.34, "learning_rate": 3.883195850240038e-05, "loss": 2.3134, "step": 4510000 }, { "epoch": 22.35, "learning_rate": 3.88307199159743e-05, "loss": 2.3113, "step": 4510500 }, { "epoch": 22.35, "learning_rate": 3.882948132954822e-05, "loss": 2.2759, "step": 4511000 }, { "epoch": 22.35, "learning_rate": 3.8828242743122134e-05, "loss": 2.3251, "step": 4511500 }, { "epoch": 22.35, "learning_rate": 3.8827004156696044e-05, "loss": 2.3013, "step": 4512000 }, { "epoch": 22.36, "learning_rate": 3.882576557026996e-05, "loss": 2.2991, "step": 4512500 }, { "epoch": 22.36, "learning_rate": 3.882453193818958e-05, "loss": 2.3148, "step": 4513000 }, { "epoch": 22.36, "learning_rate": 3.88232933517635e-05, "loss": 2.3174, "step": 4513500 }, { "epoch": 22.36, "learning_rate": 3.882205724251027e-05, "loss": 2.2938, "step": 4514000 }, { "epoch": 22.37, "learning_rate": 3.8820818656084184e-05, "loss": 2.3036, "step": 4514500 }, { "epoch": 22.37, "learning_rate": 3.88195800696581e-05, "loss": 2.2946, "step": 4515000 }, { "epoch": 22.37, "learning_rate": 3.881834148323202e-05, "loss": 2.3099, "step": 4515500 }, { "epoch": 22.37, "learning_rate": 3.8817102896805935e-05, "loss": 2.3022, "step": 4516000 }, { "epoch": 22.38, "learning_rate": 3.881586431037985e-05, "loss": 2.309, "step": 4516500 }, { "epoch": 22.38, "learning_rate": 3.881462820112662e-05, "loss": 2.2991, "step": 4517000 }, { "epoch": 22.38, "learning_rate": 3.881338961470054e-05, "loss": 2.2815, "step": 4517500 }, { "epoch": 22.38, "learning_rate": 3.8812151028274455e-05, "loss": 2.3127, "step": 4518000 }, { "epoch": 22.39, "learning_rate": 3.881091244184837e-05, "loss": 2.3122, "step": 4518500 }, { "epoch": 22.39, "learning_rate": 3.880967385542228e-05, "loss": 2.305, "step": 4519000 }, { "epoch": 22.39, "learning_rate": 3.88084352689962e-05, "loss": 2.3096, "step": 4519500 }, { "epoch": 22.39, "learning_rate": 3.8807196682570116e-05, "loss": 2.2711, "step": 4520000 }, { "epoch": 22.4, "learning_rate": 3.880595809614403e-05, "loss": 2.3383, "step": 4520500 }, { "epoch": 22.4, "learning_rate": 3.880471950971795e-05, "loss": 2.2977, "step": 4521000 }, { "epoch": 22.4, "learning_rate": 3.8803480923291866e-05, "loss": 2.3228, "step": 4521500 }, { "epoch": 22.4, "learning_rate": 3.880224233686578e-05, "loss": 2.3133, "step": 4522000 }, { "epoch": 22.41, "learning_rate": 3.880100622761255e-05, "loss": 2.3178, "step": 4522500 }, { "epoch": 22.41, "learning_rate": 3.879976764118647e-05, "loss": 2.2977, "step": 4523000 }, { "epoch": 22.41, "learning_rate": 3.8798529054760386e-05, "loss": 2.3113, "step": 4523500 }, { "epoch": 22.41, "learning_rate": 3.87972904683343e-05, "loss": 2.2864, "step": 4524000 }, { "epoch": 22.42, "learning_rate": 3.879605435908107e-05, "loss": 2.2973, "step": 4524500 }, { "epoch": 22.42, "learning_rate": 3.879481577265499e-05, "loss": 2.3181, "step": 4525000 }, { "epoch": 22.42, "learning_rate": 3.8793577186228905e-05, "loss": 2.3087, "step": 4525500 }, { "epoch": 22.42, "learning_rate": 3.8792338599802816e-05, "loss": 2.3024, "step": 4526000 }, { "epoch": 22.43, "learning_rate": 3.879110001337673e-05, "loss": 2.297, "step": 4526500 }, { "epoch": 22.43, "learning_rate": 3.878986142695065e-05, "loss": 2.2964, "step": 4527000 }, { "epoch": 22.43, "learning_rate": 3.878862531769742e-05, "loss": 2.3192, "step": 4527500 }, { "epoch": 22.43, "learning_rate": 3.8787386731271335e-05, "loss": 2.2818, "step": 4528000 }, { "epoch": 22.44, "learning_rate": 3.878614814484525e-05, "loss": 2.3023, "step": 4528500 }, { "epoch": 22.44, "learning_rate": 3.878490955841917e-05, "loss": 2.3361, "step": 4529000 }, { "epoch": 22.44, "learning_rate": 3.8783670971993086e-05, "loss": 2.3235, "step": 4529500 }, { "epoch": 22.44, "learning_rate": 3.8782432385567e-05, "loss": 2.2829, "step": 4530000 }, { "epoch": 22.45, "learning_rate": 3.878119379914092e-05, "loss": 2.3039, "step": 4530500 }, { "epoch": 22.45, "learning_rate": 3.877995521271484e-05, "loss": 2.2703, "step": 4531000 }, { "epoch": 22.45, "learning_rate": 3.8778719103461606e-05, "loss": 2.2963, "step": 4531500 }, { "epoch": 22.45, "learning_rate": 3.877748051703552e-05, "loss": 2.3257, "step": 4532000 }, { "epoch": 22.46, "learning_rate": 3.877624193060943e-05, "loss": 2.3125, "step": 4532500 }, { "epoch": 22.46, "learning_rate": 3.877500334418335e-05, "loss": 2.3036, "step": 4533000 }, { "epoch": 22.46, "learning_rate": 3.877376723493012e-05, "loss": 2.3172, "step": 4533500 }, { "epoch": 22.46, "learning_rate": 3.8772528648504035e-05, "loss": 2.3257, "step": 4534000 }, { "epoch": 22.47, "learning_rate": 3.8771292539250804e-05, "loss": 2.2861, "step": 4534500 }, { "epoch": 22.47, "learning_rate": 3.877005395282472e-05, "loss": 2.3049, "step": 4535000 }, { "epoch": 22.47, "learning_rate": 3.876881536639864e-05, "loss": 2.3128, "step": 4535500 }, { "epoch": 22.47, "learning_rate": 3.8767576779972555e-05, "loss": 2.3123, "step": 4536000 }, { "epoch": 22.48, "learning_rate": 3.8766340670719324e-05, "loss": 2.3054, "step": 4536500 }, { "epoch": 22.48, "learning_rate": 3.876510208429324e-05, "loss": 2.3278, "step": 4537000 }, { "epoch": 22.48, "learning_rate": 3.876386349786715e-05, "loss": 2.3234, "step": 4537500 }, { "epoch": 22.48, "learning_rate": 3.876262491144107e-05, "loss": 2.3163, "step": 4538000 }, { "epoch": 22.49, "learning_rate": 3.8761386325014985e-05, "loss": 2.3147, "step": 4538500 }, { "epoch": 22.49, "learning_rate": 3.87601477385889e-05, "loss": 2.325, "step": 4539000 }, { "epoch": 22.49, "learning_rate": 3.875890915216282e-05, "loss": 2.3072, "step": 4539500 }, { "epoch": 22.49, "learning_rate": 3.8757670565736735e-05, "loss": 2.3123, "step": 4540000 }, { "epoch": 22.5, "learning_rate": 3.875643197931065e-05, "loss": 2.3073, "step": 4540500 }, { "epoch": 22.5, "learning_rate": 3.875519587005742e-05, "loss": 2.2933, "step": 4541000 }, { "epoch": 22.5, "learning_rate": 3.875395976080419e-05, "loss": 2.3299, "step": 4541500 }, { "epoch": 22.5, "learning_rate": 3.875272117437811e-05, "loss": 2.3161, "step": 4542000 }, { "epoch": 22.51, "learning_rate": 3.8751482587952024e-05, "loss": 2.3171, "step": 4542500 }, { "epoch": 22.51, "learning_rate": 3.875024400152594e-05, "loss": 2.3235, "step": 4543000 }, { "epoch": 22.51, "learning_rate": 3.874900541509986e-05, "loss": 2.3079, "step": 4543500 }, { "epoch": 22.51, "learning_rate": 3.8747769305846627e-05, "loss": 2.313, "step": 4544000 }, { "epoch": 22.52, "learning_rate": 3.8746530719420543e-05, "loss": 2.3353, "step": 4544500 }, { "epoch": 22.52, "learning_rate": 3.874529213299446e-05, "loss": 2.3102, "step": 4545000 }, { "epoch": 22.52, "learning_rate": 3.874405354656838e-05, "loss": 2.3287, "step": 4545500 }, { "epoch": 22.52, "learning_rate": 3.8742817437315146e-05, "loss": 2.3158, "step": 4546000 }, { "epoch": 22.52, "learning_rate": 3.874157885088906e-05, "loss": 2.2897, "step": 4546500 }, { "epoch": 22.53, "learning_rate": 3.874034026446297e-05, "loss": 2.3011, "step": 4547000 }, { "epoch": 22.53, "learning_rate": 3.873910167803689e-05, "loss": 2.2923, "step": 4547500 }, { "epoch": 22.53, "learning_rate": 3.873786309161081e-05, "loss": 2.2992, "step": 4548000 }, { "epoch": 22.53, "learning_rate": 3.8736624505184724e-05, "loss": 2.3284, "step": 4548500 }, { "epoch": 22.54, "learning_rate": 3.873538839593149e-05, "loss": 2.3066, "step": 4549000 }, { "epoch": 22.54, "learning_rate": 3.873414980950541e-05, "loss": 2.3289, "step": 4549500 }, { "epoch": 22.54, "learning_rate": 3.873291122307933e-05, "loss": 2.2994, "step": 4550000 }, { "epoch": 22.54, "learning_rate": 3.8731672636653244e-05, "loss": 2.3128, "step": 4550500 }, { "epoch": 22.55, "learning_rate": 3.873043405022716e-05, "loss": 2.3113, "step": 4551000 }, { "epoch": 22.55, "learning_rate": 3.872919794097393e-05, "loss": 2.3115, "step": 4551500 }, { "epoch": 22.55, "learning_rate": 3.8727959354547846e-05, "loss": 2.3109, "step": 4552000 }, { "epoch": 22.55, "learning_rate": 3.872672076812176e-05, "loss": 2.32, "step": 4552500 }, { "epoch": 22.56, "learning_rate": 3.872548218169568e-05, "loss": 2.3099, "step": 4553000 }, { "epoch": 22.56, "learning_rate": 3.872424607244244e-05, "loss": 2.3342, "step": 4553500 }, { "epoch": 22.56, "learning_rate": 3.872300996318921e-05, "loss": 2.2928, "step": 4554000 }, { "epoch": 22.56, "learning_rate": 3.872177137676313e-05, "loss": 2.3082, "step": 4554500 }, { "epoch": 22.57, "learning_rate": 3.8720532790337045e-05, "loss": 2.326, "step": 4555000 }, { "epoch": 22.57, "learning_rate": 3.871929420391096e-05, "loss": 2.3155, "step": 4555500 }, { "epoch": 22.57, "learning_rate": 3.871805561748488e-05, "loss": 2.299, "step": 4556000 }, { "epoch": 22.57, "learning_rate": 3.8716817031058796e-05, "loss": 2.3201, "step": 4556500 }, { "epoch": 22.58, "learning_rate": 3.871557844463271e-05, "loss": 2.2982, "step": 4557000 }, { "epoch": 22.58, "learning_rate": 3.871433985820663e-05, "loss": 2.3207, "step": 4557500 }, { "epoch": 22.58, "learning_rate": 3.8713101271780546e-05, "loss": 2.316, "step": 4558000 }, { "epoch": 22.58, "learning_rate": 3.871186516252731e-05, "loss": 2.3293, "step": 4558500 }, { "epoch": 22.59, "learning_rate": 3.8710626576101225e-05, "loss": 2.3033, "step": 4559000 }, { "epoch": 22.59, "learning_rate": 3.870938798967514e-05, "loss": 2.3159, "step": 4559500 }, { "epoch": 22.59, "learning_rate": 3.870814940324906e-05, "loss": 2.3366, "step": 4560000 }, { "epoch": 22.59, "learning_rate": 3.8706910816822976e-05, "loss": 2.3203, "step": 4560500 }, { "epoch": 22.6, "learning_rate": 3.870567223039689e-05, "loss": 2.32, "step": 4561000 }, { "epoch": 22.6, "learning_rate": 3.870443364397081e-05, "loss": 2.315, "step": 4561500 }, { "epoch": 22.6, "learning_rate": 3.870319505754473e-05, "loss": 2.3213, "step": 4562000 }, { "epoch": 22.6, "learning_rate": 3.8701956471118644e-05, "loss": 2.3211, "step": 4562500 }, { "epoch": 22.61, "learning_rate": 3.870072036186541e-05, "loss": 2.3087, "step": 4563000 }, { "epoch": 22.61, "learning_rate": 3.869948177543933e-05, "loss": 2.3402, "step": 4563500 }, { "epoch": 22.61, "learning_rate": 3.8698243189013247e-05, "loss": 2.3021, "step": 4564000 }, { "epoch": 22.61, "learning_rate": 3.8697004602587163e-05, "loss": 2.3204, "step": 4564500 }, { "epoch": 22.62, "learning_rate": 3.869576601616108e-05, "loss": 2.2966, "step": 4565000 }, { "epoch": 22.62, "learning_rate": 3.869452990690784e-05, "loss": 2.3114, "step": 4565500 }, { "epoch": 22.62, "learning_rate": 3.869329132048176e-05, "loss": 2.3284, "step": 4566000 }, { "epoch": 22.62, "learning_rate": 3.8692052734055676e-05, "loss": 2.3001, "step": 4566500 }, { "epoch": 22.63, "learning_rate": 3.869081414762959e-05, "loss": 2.3107, "step": 4567000 }, { "epoch": 22.63, "learning_rate": 3.868957556120351e-05, "loss": 2.322, "step": 4567500 }, { "epoch": 22.63, "learning_rate": 3.868833697477743e-05, "loss": 2.3449, "step": 4568000 }, { "epoch": 22.63, "learning_rate": 3.8687098388351344e-05, "loss": 2.3089, "step": 4568500 }, { "epoch": 22.64, "learning_rate": 3.868585980192526e-05, "loss": 2.3135, "step": 4569000 }, { "epoch": 22.64, "learning_rate": 3.868462121549918e-05, "loss": 2.3005, "step": 4569500 }, { "epoch": 22.64, "learning_rate": 3.868338510624595e-05, "loss": 2.3158, "step": 4570000 }, { "epoch": 22.64, "learning_rate": 3.868214899699271e-05, "loss": 2.3041, "step": 4570500 }, { "epoch": 22.65, "learning_rate": 3.8680910410566626e-05, "loss": 2.328, "step": 4571000 }, { "epoch": 22.65, "learning_rate": 3.867967182414054e-05, "loss": 2.3015, "step": 4571500 }, { "epoch": 22.65, "learning_rate": 3.867843323771446e-05, "loss": 2.3389, "step": 4572000 }, { "epoch": 22.65, "learning_rate": 3.867719712846123e-05, "loss": 2.2967, "step": 4572500 }, { "epoch": 22.66, "learning_rate": 3.8675961019208004e-05, "loss": 2.3422, "step": 4573000 }, { "epoch": 22.66, "learning_rate": 3.867472243278192e-05, "loss": 2.3099, "step": 4573500 }, { "epoch": 22.66, "learning_rate": 3.867348384635584e-05, "loss": 2.326, "step": 4574000 }, { "epoch": 22.66, "learning_rate": 3.8672245259929755e-05, "loss": 2.3225, "step": 4574500 }, { "epoch": 22.67, "learning_rate": 3.8671006673503665e-05, "loss": 2.3198, "step": 4575000 }, { "epoch": 22.67, "learning_rate": 3.866976808707758e-05, "loss": 2.3108, "step": 4575500 }, { "epoch": 22.67, "learning_rate": 3.86685295006515e-05, "loss": 2.3139, "step": 4576000 }, { "epoch": 22.67, "learning_rate": 3.8667290914225416e-05, "loss": 2.3271, "step": 4576500 }, { "epoch": 22.68, "learning_rate": 3.866605232779933e-05, "loss": 2.3365, "step": 4577000 }, { "epoch": 22.68, "learning_rate": 3.86648162185461e-05, "loss": 2.316, "step": 4577500 }, { "epoch": 22.68, "learning_rate": 3.866357763212002e-05, "loss": 2.3204, "step": 4578000 }, { "epoch": 22.68, "learning_rate": 3.8662339045693935e-05, "loss": 2.3074, "step": 4578500 }, { "epoch": 22.69, "learning_rate": 3.8661100459267845e-05, "loss": 2.3012, "step": 4579000 }, { "epoch": 22.69, "learning_rate": 3.865986187284176e-05, "loss": 2.3167, "step": 4579500 }, { "epoch": 22.69, "learning_rate": 3.865862328641568e-05, "loss": 2.3111, "step": 4580000 }, { "epoch": 22.69, "learning_rate": 3.8657387177162455e-05, "loss": 2.3244, "step": 4580500 }, { "epoch": 22.7, "learning_rate": 3.865614859073637e-05, "loss": 2.3031, "step": 4581000 }, { "epoch": 22.7, "learning_rate": 3.865491000431028e-05, "loss": 2.3241, "step": 4581500 }, { "epoch": 22.7, "learning_rate": 3.86536714178842e-05, "loss": 2.2923, "step": 4582000 }, { "epoch": 22.7, "learning_rate": 3.865243530863097e-05, "loss": 2.2862, "step": 4582500 }, { "epoch": 22.71, "learning_rate": 3.8651196722204885e-05, "loss": 2.3268, "step": 4583000 }, { "epoch": 22.71, "learning_rate": 3.86499581357788e-05, "loss": 2.3275, "step": 4583500 }, { "epoch": 22.71, "learning_rate": 3.864871954935272e-05, "loss": 2.3048, "step": 4584000 }, { "epoch": 22.71, "learning_rate": 3.8647480962926635e-05, "loss": 2.3134, "step": 4584500 }, { "epoch": 22.72, "learning_rate": 3.8646244853673404e-05, "loss": 2.304, "step": 4585000 }, { "epoch": 22.72, "learning_rate": 3.864500626724732e-05, "loss": 2.2997, "step": 4585500 }, { "epoch": 22.72, "learning_rate": 3.864376768082124e-05, "loss": 2.3093, "step": 4586000 }, { "epoch": 22.72, "learning_rate": 3.8642529094395155e-05, "loss": 2.3179, "step": 4586500 }, { "epoch": 22.73, "learning_rate": 3.864129050796907e-05, "loss": 2.317, "step": 4587000 }, { "epoch": 22.73, "learning_rate": 3.8640054398715834e-05, "loss": 2.3088, "step": 4587500 }, { "epoch": 22.73, "learning_rate": 3.86388182894626e-05, "loss": 2.2975, "step": 4588000 }, { "epoch": 22.73, "learning_rate": 3.863757970303652e-05, "loss": 2.3084, "step": 4588500 }, { "epoch": 22.74, "learning_rate": 3.8636341116610437e-05, "loss": 2.2775, "step": 4589000 }, { "epoch": 22.74, "learning_rate": 3.8635102530184353e-05, "loss": 2.2953, "step": 4589500 }, { "epoch": 22.74, "learning_rate": 3.863386642093112e-05, "loss": 2.3188, "step": 4590000 }, { "epoch": 22.74, "learning_rate": 3.863262783450504e-05, "loss": 2.3119, "step": 4590500 }, { "epoch": 22.75, "learning_rate": 3.8631389248078956e-05, "loss": 2.2956, "step": 4591000 }, { "epoch": 22.75, "learning_rate": 3.863015066165287e-05, "loss": 2.2869, "step": 4591500 }, { "epoch": 22.75, "learning_rate": 3.862891207522678e-05, "loss": 2.3249, "step": 4592000 }, { "epoch": 22.75, "learning_rate": 3.86276734888007e-05, "loss": 2.3244, "step": 4592500 }, { "epoch": 22.76, "learning_rate": 3.862643737954747e-05, "loss": 2.3138, "step": 4593000 }, { "epoch": 22.76, "learning_rate": 3.8625198793121386e-05, "loss": 2.2791, "step": 4593500 }, { "epoch": 22.76, "learning_rate": 3.86239602066953e-05, "loss": 2.313, "step": 4594000 }, { "epoch": 22.76, "learning_rate": 3.862272409744208e-05, "loss": 2.3295, "step": 4594500 }, { "epoch": 22.77, "learning_rate": 3.862148551101599e-05, "loss": 2.3138, "step": 4595000 }, { "epoch": 22.77, "learning_rate": 3.8620246924589905e-05, "loss": 2.2907, "step": 4595500 }, { "epoch": 22.77, "learning_rate": 3.861900833816382e-05, "loss": 2.3033, "step": 4596000 }, { "epoch": 22.77, "learning_rate": 3.861776975173774e-05, "loss": 2.3326, "step": 4596500 }, { "epoch": 22.78, "learning_rate": 3.8616531165311656e-05, "loss": 2.3104, "step": 4597000 }, { "epoch": 22.78, "learning_rate": 3.861529257888557e-05, "loss": 2.3412, "step": 4597500 }, { "epoch": 22.78, "learning_rate": 3.861405399245949e-05, "loss": 2.3166, "step": 4598000 }, { "epoch": 22.78, "learning_rate": 3.86128154060334e-05, "loss": 2.3151, "step": 4598500 }, { "epoch": 22.79, "learning_rate": 3.861157681960732e-05, "loss": 2.325, "step": 4599000 }, { "epoch": 22.79, "learning_rate": 3.8610338233181234e-05, "loss": 2.307, "step": 4599500 }, { "epoch": 22.79, "learning_rate": 3.860909964675515e-05, "loss": 2.3147, "step": 4600000 }, { "epoch": 22.79, "learning_rate": 3.860786106032907e-05, "loss": 2.3078, "step": 4600500 }, { "epoch": 22.79, "learning_rate": 3.8606622473902985e-05, "loss": 2.3078, "step": 4601000 }, { "epoch": 22.8, "learning_rate": 3.86053838874769e-05, "loss": 2.3242, "step": 4601500 }, { "epoch": 22.8, "learning_rate": 3.860414777822367e-05, "loss": 2.3032, "step": 4602000 }, { "epoch": 22.8, "learning_rate": 3.860290919179759e-05, "loss": 2.3121, "step": 4602500 }, { "epoch": 22.8, "learning_rate": 3.8601670605371504e-05, "loss": 2.3143, "step": 4603000 }, { "epoch": 22.81, "learning_rate": 3.860043201894542e-05, "loss": 2.3154, "step": 4603500 }, { "epoch": 22.81, "learning_rate": 3.859919343251934e-05, "loss": 2.327, "step": 4604000 }, { "epoch": 22.81, "learning_rate": 3.8597954846093255e-05, "loss": 2.3061, "step": 4604500 }, { "epoch": 22.81, "learning_rate": 3.859671625966717e-05, "loss": 2.3172, "step": 4605000 }, { "epoch": 22.82, "learning_rate": 3.859547767324109e-05, "loss": 2.3136, "step": 4605500 }, { "epoch": 22.82, "learning_rate": 3.8594239086815006e-05, "loss": 2.3018, "step": 4606000 }, { "epoch": 22.82, "learning_rate": 3.859300297756177e-05, "loss": 2.2998, "step": 4606500 }, { "epoch": 22.82, "learning_rate": 3.8591764391135685e-05, "loss": 2.3314, "step": 4607000 }, { "epoch": 22.83, "learning_rate": 3.85905258047096e-05, "loss": 2.3057, "step": 4607500 }, { "epoch": 22.83, "learning_rate": 3.858928721828352e-05, "loss": 2.3195, "step": 4608000 }, { "epoch": 22.83, "learning_rate": 3.8588048631857436e-05, "loss": 2.3239, "step": 4608500 }, { "epoch": 22.83, "learning_rate": 3.8586812522604205e-05, "loss": 2.293, "step": 4609000 }, { "epoch": 22.84, "learning_rate": 3.858557393617812e-05, "loss": 2.3212, "step": 4609500 }, { "epoch": 22.84, "learning_rate": 3.858433534975204e-05, "loss": 2.3048, "step": 4610000 }, { "epoch": 22.84, "learning_rate": 3.8583096763325955e-05, "loss": 2.2934, "step": 4610500 }, { "epoch": 22.84, "learning_rate": 3.858185817689987e-05, "loss": 2.2999, "step": 4611000 }, { "epoch": 22.85, "learning_rate": 3.858062206764664e-05, "loss": 2.3105, "step": 4611500 }, { "epoch": 22.85, "learning_rate": 3.857938348122055e-05, "loss": 2.3013, "step": 4612000 }, { "epoch": 22.85, "learning_rate": 3.857814489479447e-05, "loss": 2.3245, "step": 4612500 }, { "epoch": 22.85, "learning_rate": 3.8576911262714096e-05, "loss": 2.311, "step": 4613000 }, { "epoch": 22.86, "learning_rate": 3.857567267628801e-05, "loss": 2.3275, "step": 4613500 }, { "epoch": 22.86, "learning_rate": 3.857443408986192e-05, "loss": 2.3123, "step": 4614000 }, { "epoch": 22.86, "learning_rate": 3.857319550343584e-05, "loss": 2.3256, "step": 4614500 }, { "epoch": 22.86, "learning_rate": 3.8571956917009757e-05, "loss": 2.3182, "step": 4615000 }, { "epoch": 22.87, "learning_rate": 3.8570718330583674e-05, "loss": 2.3452, "step": 4615500 }, { "epoch": 22.87, "learning_rate": 3.856947974415759e-05, "loss": 2.3253, "step": 4616000 }, { "epoch": 22.87, "learning_rate": 3.856824115773151e-05, "loss": 2.3012, "step": 4616500 }, { "epoch": 22.87, "learning_rate": 3.8567002571305424e-05, "loss": 2.3173, "step": 4617000 }, { "epoch": 22.88, "learning_rate": 3.856576398487934e-05, "loss": 2.3155, "step": 4617500 }, { "epoch": 22.88, "learning_rate": 3.856452539845326e-05, "loss": 2.2929, "step": 4618000 }, { "epoch": 22.88, "learning_rate": 3.8563286812027175e-05, "loss": 2.3329, "step": 4618500 }, { "epoch": 22.88, "learning_rate": 3.8562048225601085e-05, "loss": 2.3319, "step": 4619000 }, { "epoch": 22.89, "learning_rate": 3.8560809639175e-05, "loss": 2.3235, "step": 4619500 }, { "epoch": 22.89, "learning_rate": 3.855957600709462e-05, "loss": 2.3148, "step": 4620000 }, { "epoch": 22.89, "learning_rate": 3.855833742066854e-05, "loss": 2.3293, "step": 4620500 }, { "epoch": 22.89, "learning_rate": 3.855709883424246e-05, "loss": 2.2721, "step": 4621000 }, { "epoch": 22.9, "learning_rate": 3.8555860247816374e-05, "loss": 2.3437, "step": 4621500 }, { "epoch": 22.9, "learning_rate": 3.855462166139029e-05, "loss": 2.3259, "step": 4622000 }, { "epoch": 22.9, "learning_rate": 3.855338307496421e-05, "loss": 2.3164, "step": 4622500 }, { "epoch": 22.9, "learning_rate": 3.8552146965710976e-05, "loss": 2.3135, "step": 4623000 }, { "epoch": 22.91, "learning_rate": 3.8550910856457745e-05, "loss": 2.3246, "step": 4623500 }, { "epoch": 22.91, "learning_rate": 3.854967227003166e-05, "loss": 2.3097, "step": 4624000 }, { "epoch": 22.91, "learning_rate": 3.854843368360558e-05, "loss": 2.3233, "step": 4624500 }, { "epoch": 22.91, "learning_rate": 3.8547195097179496e-05, "loss": 2.3195, "step": 4625000 }, { "epoch": 22.92, "learning_rate": 3.854595651075341e-05, "loss": 2.3101, "step": 4625500 }, { "epoch": 22.92, "learning_rate": 3.854471792432733e-05, "loss": 2.3006, "step": 4626000 }, { "epoch": 22.92, "learning_rate": 3.854347933790124e-05, "loss": 2.3205, "step": 4626500 }, { "epoch": 22.92, "learning_rate": 3.854224075147516e-05, "loss": 2.3168, "step": 4627000 }, { "epoch": 22.93, "learning_rate": 3.8541002165049074e-05, "loss": 2.3087, "step": 4627500 }, { "epoch": 22.93, "learning_rate": 3.853976357862299e-05, "loss": 2.3106, "step": 4628000 }, { "epoch": 22.93, "learning_rate": 3.853852746936976e-05, "loss": 2.3153, "step": 4628500 }, { "epoch": 22.93, "learning_rate": 3.8537288882943676e-05, "loss": 2.3313, "step": 4629000 }, { "epoch": 22.94, "learning_rate": 3.8536050296517587e-05, "loss": 2.3148, "step": 4629500 }, { "epoch": 22.94, "learning_rate": 3.8534811710091504e-05, "loss": 2.3088, "step": 4630000 }, { "epoch": 22.94, "learning_rate": 3.853357312366542e-05, "loss": 2.3086, "step": 4630500 }, { "epoch": 22.94, "learning_rate": 3.853233453723934e-05, "loss": 2.3283, "step": 4631000 }, { "epoch": 22.95, "learning_rate": 3.853109842798611e-05, "loss": 2.3124, "step": 4631500 }, { "epoch": 22.95, "learning_rate": 3.852985984156003e-05, "loss": 2.3007, "step": 4632000 }, { "epoch": 22.95, "learning_rate": 3.852862125513394e-05, "loss": 2.318, "step": 4632500 }, { "epoch": 22.95, "learning_rate": 3.852738266870786e-05, "loss": 2.3008, "step": 4633000 }, { "epoch": 22.96, "learning_rate": 3.8526144082281774e-05, "loss": 2.3385, "step": 4633500 }, { "epoch": 22.96, "learning_rate": 3.852490549585569e-05, "loss": 2.3005, "step": 4634000 }, { "epoch": 22.96, "learning_rate": 3.852366690942961e-05, "loss": 2.2903, "step": 4634500 }, { "epoch": 22.96, "learning_rate": 3.8522428323003525e-05, "loss": 2.3239, "step": 4635000 }, { "epoch": 22.97, "learning_rate": 3.8521192213750294e-05, "loss": 2.3164, "step": 4635500 }, { "epoch": 22.97, "learning_rate": 3.8519953627324204e-05, "loss": 2.3282, "step": 4636000 }, { "epoch": 22.97, "learning_rate": 3.851871504089812e-05, "loss": 2.3116, "step": 4636500 }, { "epoch": 22.97, "learning_rate": 3.851747645447204e-05, "loss": 2.3066, "step": 4637000 }, { "epoch": 22.98, "learning_rate": 3.8516237868045954e-05, "loss": 2.3298, "step": 4637500 }, { "epoch": 22.98, "learning_rate": 3.851499928161987e-05, "loss": 2.3011, "step": 4638000 }, { "epoch": 22.98, "learning_rate": 3.851376069519379e-05, "loss": 2.33, "step": 4638500 }, { "epoch": 22.98, "learning_rate": 3.851252458594056e-05, "loss": 2.2978, "step": 4639000 }, { "epoch": 22.99, "learning_rate": 3.8511285999514474e-05, "loss": 2.3144, "step": 4639500 }, { "epoch": 22.99, "learning_rate": 3.851004741308839e-05, "loss": 2.3008, "step": 4640000 }, { "epoch": 22.99, "learning_rate": 3.850880882666231e-05, "loss": 2.3002, "step": 4640500 }, { "epoch": 22.99, "learning_rate": 3.8507570240236225e-05, "loss": 2.3052, "step": 4641000 }, { "epoch": 23.0, "learning_rate": 3.850633165381014e-05, "loss": 2.3447, "step": 4641500 }, { "epoch": 23.0, "learning_rate": 3.850509306738406e-05, "loss": 2.2959, "step": 4642000 }, { "epoch": 23.0, "eval_accuracy": 0.6535051236946925, "eval_accuracy_mlm": 0.6085347615669268, "eval_accuracy_nsp": 0.8653273663608658, "eval_loss": 2.3427975177764893, "eval_runtime": 145.7935, "eval_samples_per_second": 1748.768, "eval_steps_per_second": 72.87, "step": 4642389 }, { "epoch": 23.0, "learning_rate": 3.8503854480957976e-05, "loss": 2.2864, "step": 4642500 }, { "epoch": 23.0, "learning_rate": 3.850261589453189e-05, "loss": 2.2797, "step": 4643000 }, { "epoch": 23.01, "learning_rate": 3.850137730810581e-05, "loss": 2.2687, "step": 4643500 }, { "epoch": 23.01, "learning_rate": 3.8500138721679726e-05, "loss": 2.2998, "step": 4644000 }, { "epoch": 23.01, "learning_rate": 3.849890261242649e-05, "loss": 2.2772, "step": 4644500 }, { "epoch": 23.01, "learning_rate": 3.8497664026000405e-05, "loss": 2.2775, "step": 4645000 }, { "epoch": 23.02, "learning_rate": 3.849642543957432e-05, "loss": 2.2837, "step": 4645500 }, { "epoch": 23.02, "learning_rate": 3.849518685314824e-05, "loss": 2.2925, "step": 4646000 }, { "epoch": 23.02, "learning_rate": 3.849395074389501e-05, "loss": 2.2727, "step": 4646500 }, { "epoch": 23.02, "learning_rate": 3.8492712157468925e-05, "loss": 2.2708, "step": 4647000 }, { "epoch": 23.03, "learning_rate": 3.849147357104284e-05, "loss": 2.2694, "step": 4647500 }, { "epoch": 23.03, "learning_rate": 3.849023498461676e-05, "loss": 2.2986, "step": 4648000 }, { "epoch": 23.03, "learning_rate": 3.8488996398190676e-05, "loss": 2.2675, "step": 4648500 }, { "epoch": 23.03, "learning_rate": 3.848775781176459e-05, "loss": 2.2932, "step": 4649000 }, { "epoch": 23.04, "learning_rate": 3.848651922533851e-05, "loss": 2.2778, "step": 4649500 }, { "epoch": 23.04, "learning_rate": 3.8485280638912427e-05, "loss": 2.2582, "step": 4650000 }, { "epoch": 23.04, "learning_rate": 3.848404452965919e-05, "loss": 2.2853, "step": 4650500 }, { "epoch": 23.04, "learning_rate": 3.8482805943233105e-05, "loss": 2.2806, "step": 4651000 }, { "epoch": 23.05, "learning_rate": 3.848156735680702e-05, "loss": 2.3017, "step": 4651500 }, { "epoch": 23.05, "learning_rate": 3.848032877038094e-05, "loss": 2.2922, "step": 4652000 }, { "epoch": 23.05, "learning_rate": 3.8479090183954856e-05, "loss": 2.2852, "step": 4652500 }, { "epoch": 23.05, "learning_rate": 3.847785159752877e-05, "loss": 2.2834, "step": 4653000 }, { "epoch": 23.06, "learning_rate": 3.847661301110269e-05, "loss": 2.2794, "step": 4653500 }, { "epoch": 23.06, "learning_rate": 3.847537690184946e-05, "loss": 2.2736, "step": 4654000 }, { "epoch": 23.06, "learning_rate": 3.8474138315423376e-05, "loss": 2.2817, "step": 4654500 }, { "epoch": 23.06, "learning_rate": 3.847289972899729e-05, "loss": 2.3046, "step": 4655000 }, { "epoch": 23.06, "learning_rate": 3.847166114257121e-05, "loss": 2.2827, "step": 4655500 }, { "epoch": 23.07, "learning_rate": 3.847042255614513e-05, "loss": 2.3029, "step": 4656000 }, { "epoch": 23.07, "learning_rate": 3.8469183969719044e-05, "loss": 2.2855, "step": 4656500 }, { "epoch": 23.07, "learning_rate": 3.846794538329296e-05, "loss": 2.2797, "step": 4657000 }, { "epoch": 23.07, "learning_rate": 3.846670927403972e-05, "loss": 2.269, "step": 4657500 }, { "epoch": 23.08, "learning_rate": 3.846547068761364e-05, "loss": 2.3017, "step": 4658000 }, { "epoch": 23.08, "learning_rate": 3.8464232101187556e-05, "loss": 2.3036, "step": 4658500 }, { "epoch": 23.08, "learning_rate": 3.846299351476147e-05, "loss": 2.2968, "step": 4659000 }, { "epoch": 23.08, "learning_rate": 3.846175492833539e-05, "loss": 2.2592, "step": 4659500 }, { "epoch": 23.09, "learning_rate": 3.846051634190931e-05, "loss": 2.2923, "step": 4660000 }, { "epoch": 23.09, "learning_rate": 3.8459277755483224e-05, "loss": 2.2877, "step": 4660500 }, { "epoch": 23.09, "learning_rate": 3.845804164622999e-05, "loss": 2.2687, "step": 4661000 }, { "epoch": 23.09, "learning_rate": 3.845680305980391e-05, "loss": 2.2669, "step": 4661500 }, { "epoch": 23.1, "learning_rate": 3.845556447337783e-05, "loss": 2.2845, "step": 4662000 }, { "epoch": 23.1, "learning_rate": 3.8454325886951744e-05, "loss": 2.2866, "step": 4662500 }, { "epoch": 23.1, "learning_rate": 3.845308730052566e-05, "loss": 2.3037, "step": 4663000 }, { "epoch": 23.1, "learning_rate": 3.845184871409958e-05, "loss": 2.2609, "step": 4663500 }, { "epoch": 23.11, "learning_rate": 3.8450610127673494e-05, "loss": 2.2816, "step": 4664000 }, { "epoch": 23.11, "learning_rate": 3.844937154124741e-05, "loss": 2.2791, "step": 4664500 }, { "epoch": 23.11, "learning_rate": 3.8448135431994173e-05, "loss": 2.3103, "step": 4665000 }, { "epoch": 23.11, "learning_rate": 3.844689932274094e-05, "loss": 2.2907, "step": 4665500 }, { "epoch": 23.12, "learning_rate": 3.844566073631486e-05, "loss": 2.296, "step": 4666000 }, { "epoch": 23.12, "learning_rate": 3.8444422149888776e-05, "loss": 2.2811, "step": 4666500 }, { "epoch": 23.12, "learning_rate": 3.844318356346269e-05, "loss": 2.2881, "step": 4667000 }, { "epoch": 23.12, "learning_rate": 3.844194497703661e-05, "loss": 2.2979, "step": 4667500 }, { "epoch": 23.13, "learning_rate": 3.844070886778338e-05, "loss": 2.3004, "step": 4668000 }, { "epoch": 23.13, "learning_rate": 3.8439470281357296e-05, "loss": 2.2805, "step": 4668500 }, { "epoch": 23.13, "learning_rate": 3.843823169493121e-05, "loss": 2.2995, "step": 4669000 }, { "epoch": 23.13, "learning_rate": 3.843699310850512e-05, "loss": 2.2929, "step": 4669500 }, { "epoch": 23.14, "learning_rate": 3.843575452207904e-05, "loss": 2.3062, "step": 4670000 }, { "epoch": 23.14, "learning_rate": 3.843451593565296e-05, "loss": 2.3233, "step": 4670500 }, { "epoch": 23.14, "learning_rate": 3.8433277349226874e-05, "loss": 2.3191, "step": 4671000 }, { "epoch": 23.14, "learning_rate": 3.843203876280079e-05, "loss": 2.2908, "step": 4671500 }, { "epoch": 23.15, "learning_rate": 3.843080265354756e-05, "loss": 2.267, "step": 4672000 }, { "epoch": 23.15, "learning_rate": 3.842956902146718e-05, "loss": 2.2694, "step": 4672500 }, { "epoch": 23.15, "learning_rate": 3.84283304350411e-05, "loss": 2.2964, "step": 4673000 }, { "epoch": 23.15, "learning_rate": 3.8427091848615014e-05, "loss": 2.3116, "step": 4673500 }, { "epoch": 23.16, "learning_rate": 3.842585573936178e-05, "loss": 2.2689, "step": 4674000 }, { "epoch": 23.16, "learning_rate": 3.84246171529357e-05, "loss": 2.2976, "step": 4674500 }, { "epoch": 23.16, "learning_rate": 3.8423378566509617e-05, "loss": 2.3003, "step": 4675000 }, { "epoch": 23.16, "learning_rate": 3.8422139980083533e-05, "loss": 2.3213, "step": 4675500 }, { "epoch": 23.17, "learning_rate": 3.842090139365745e-05, "loss": 2.287, "step": 4676000 }, { "epoch": 23.17, "learning_rate": 3.841966280723137e-05, "loss": 2.303, "step": 4676500 }, { "epoch": 23.17, "learning_rate": 3.8418424220805284e-05, "loss": 2.2827, "step": 4677000 }, { "epoch": 23.17, "learning_rate": 3.84171856343792e-05, "loss": 2.2881, "step": 4677500 }, { "epoch": 23.18, "learning_rate": 3.841594704795312e-05, "loss": 2.3183, "step": 4678000 }, { "epoch": 23.18, "learning_rate": 3.8414708461527035e-05, "loss": 2.3166, "step": 4678500 }, { "epoch": 23.18, "learning_rate": 3.841346987510095e-05, "loss": 2.3065, "step": 4679000 }, { "epoch": 23.18, "learning_rate": 3.841223128867486e-05, "loss": 2.278, "step": 4679500 }, { "epoch": 23.19, "learning_rate": 3.841099270224878e-05, "loss": 2.3138, "step": 4680000 }, { "epoch": 23.19, "learning_rate": 3.8409754115822696e-05, "loss": 2.3044, "step": 4680500 }, { "epoch": 23.19, "learning_rate": 3.840851552939661e-05, "loss": 2.2684, "step": 4681000 }, { "epoch": 23.19, "learning_rate": 3.840727694297053e-05, "loss": 2.293, "step": 4681500 }, { "epoch": 23.2, "learning_rate": 3.840603835654445e-05, "loss": 2.3256, "step": 4682000 }, { "epoch": 23.2, "learning_rate": 3.840480224729121e-05, "loss": 2.2807, "step": 4682500 }, { "epoch": 23.2, "learning_rate": 3.8403563660865126e-05, "loss": 2.2796, "step": 4683000 }, { "epoch": 23.2, "learning_rate": 3.840232507443904e-05, "loss": 2.3154, "step": 4683500 }, { "epoch": 23.21, "learning_rate": 3.840108648801296e-05, "loss": 2.2889, "step": 4684000 }, { "epoch": 23.21, "learning_rate": 3.8399847901586876e-05, "loss": 2.2843, "step": 4684500 }, { "epoch": 23.21, "learning_rate": 3.839861179233365e-05, "loss": 2.2907, "step": 4685000 }, { "epoch": 23.21, "learning_rate": 3.839737320590757e-05, "loss": 2.3155, "step": 4685500 }, { "epoch": 23.22, "learning_rate": 3.839613461948148e-05, "loss": 2.3057, "step": 4686000 }, { "epoch": 23.22, "learning_rate": 3.8394896033055396e-05, "loss": 2.2936, "step": 4686500 }, { "epoch": 23.22, "learning_rate": 3.839365744662931e-05, "loss": 2.3158, "step": 4687000 }, { "epoch": 23.22, "learning_rate": 3.839241886020323e-05, "loss": 2.2809, "step": 4687500 }, { "epoch": 23.23, "learning_rate": 3.839118027377715e-05, "loss": 2.2988, "step": 4688000 }, { "epoch": 23.23, "learning_rate": 3.8389941687351064e-05, "loss": 2.2952, "step": 4688500 }, { "epoch": 23.23, "learning_rate": 3.838870310092498e-05, "loss": 2.2915, "step": 4689000 }, { "epoch": 23.23, "learning_rate": 3.83874645144989e-05, "loss": 2.2737, "step": 4689500 }, { "epoch": 23.24, "learning_rate": 3.838622592807281e-05, "loss": 2.2757, "step": 4690000 }, { "epoch": 23.24, "learning_rate": 3.8384989818819577e-05, "loss": 2.3108, "step": 4690500 }, { "epoch": 23.24, "learning_rate": 3.838375370956635e-05, "loss": 2.3196, "step": 4691000 }, { "epoch": 23.24, "learning_rate": 3.838251512314027e-05, "loss": 2.2907, "step": 4691500 }, { "epoch": 23.25, "learning_rate": 3.838127653671418e-05, "loss": 2.2718, "step": 4692000 }, { "epoch": 23.25, "learning_rate": 3.8380037950288096e-05, "loss": 2.2993, "step": 4692500 }, { "epoch": 23.25, "learning_rate": 3.8378801841034865e-05, "loss": 2.3027, "step": 4693000 }, { "epoch": 23.25, "learning_rate": 3.837756325460878e-05, "loss": 2.29, "step": 4693500 }, { "epoch": 23.26, "learning_rate": 3.83763246681827e-05, "loss": 2.2988, "step": 4694000 }, { "epoch": 23.26, "learning_rate": 3.8375086081756616e-05, "loss": 2.2946, "step": 4694500 }, { "epoch": 23.26, "learning_rate": 3.8373847495330526e-05, "loss": 2.3021, "step": 4695000 }, { "epoch": 23.26, "learning_rate": 3.837260890890444e-05, "loss": 2.2775, "step": 4695500 }, { "epoch": 23.27, "learning_rate": 3.837137032247836e-05, "loss": 2.2802, "step": 4696000 }, { "epoch": 23.27, "learning_rate": 3.837013173605228e-05, "loss": 2.3041, "step": 4696500 }, { "epoch": 23.27, "learning_rate": 3.8368893149626194e-05, "loss": 2.2622, "step": 4697000 }, { "epoch": 23.27, "learning_rate": 3.836765704037297e-05, "loss": 2.305, "step": 4697500 }, { "epoch": 23.28, "learning_rate": 3.8366418453946886e-05, "loss": 2.3045, "step": 4698000 }, { "epoch": 23.28, "learning_rate": 3.836518234469365e-05, "loss": 2.282, "step": 4698500 }, { "epoch": 23.28, "learning_rate": 3.8363943758267565e-05, "loss": 2.2845, "step": 4699000 }, { "epoch": 23.28, "learning_rate": 3.836270517184148e-05, "loss": 2.2755, "step": 4699500 }, { "epoch": 23.29, "learning_rate": 3.83614665854154e-05, "loss": 2.2895, "step": 4700000 }, { "epoch": 23.29, "learning_rate": 3.8360227998989316e-05, "loss": 2.2926, "step": 4700500 }, { "epoch": 23.29, "learning_rate": 3.835898941256323e-05, "loss": 2.2743, "step": 4701000 }, { "epoch": 23.29, "learning_rate": 3.835775330331e-05, "loss": 2.2907, "step": 4701500 }, { "epoch": 23.3, "learning_rate": 3.835651471688392e-05, "loss": 2.2967, "step": 4702000 }, { "epoch": 23.3, "learning_rate": 3.8355276130457836e-05, "loss": 2.3121, "step": 4702500 }, { "epoch": 23.3, "learning_rate": 3.835403754403175e-05, "loss": 2.3018, "step": 4703000 }, { "epoch": 23.3, "learning_rate": 3.835279895760567e-05, "loss": 2.2903, "step": 4703500 }, { "epoch": 23.31, "learning_rate": 3.8351560371179586e-05, "loss": 2.3101, "step": 4704000 }, { "epoch": 23.31, "learning_rate": 3.8350321784753496e-05, "loss": 2.2976, "step": 4704500 }, { "epoch": 23.31, "learning_rate": 3.8349085675500265e-05, "loss": 2.2819, "step": 4705000 }, { "epoch": 23.31, "learning_rate": 3.834784708907418e-05, "loss": 2.2727, "step": 4705500 }, { "epoch": 23.32, "learning_rate": 3.834661097982095e-05, "loss": 2.2911, "step": 4706000 }, { "epoch": 23.32, "learning_rate": 3.834537239339487e-05, "loss": 2.3118, "step": 4706500 }, { "epoch": 23.32, "learning_rate": 3.8344133806968785e-05, "loss": 2.3047, "step": 4707000 }, { "epoch": 23.32, "learning_rate": 3.83428952205427e-05, "loss": 2.3097, "step": 4707500 }, { "epoch": 23.33, "learning_rate": 3.834165663411662e-05, "loss": 2.2727, "step": 4708000 }, { "epoch": 23.33, "learning_rate": 3.8340418047690536e-05, "loss": 2.313, "step": 4708500 }, { "epoch": 23.33, "learning_rate": 3.833917946126445e-05, "loss": 2.295, "step": 4709000 }, { "epoch": 23.33, "learning_rate": 3.833794087483837e-05, "loss": 2.3, "step": 4709500 }, { "epoch": 23.33, "learning_rate": 3.8336702288412286e-05, "loss": 2.287, "step": 4710000 }, { "epoch": 23.34, "learning_rate": 3.83354637019862e-05, "loss": 2.2963, "step": 4710500 }, { "epoch": 23.34, "learning_rate": 3.8334227592732965e-05, "loss": 2.2942, "step": 4711000 }, { "epoch": 23.34, "learning_rate": 3.833298900630688e-05, "loss": 2.2964, "step": 4711500 }, { "epoch": 23.34, "learning_rate": 3.83317504198808e-05, "loss": 2.3011, "step": 4712000 }, { "epoch": 23.35, "learning_rate": 3.8330511833454716e-05, "loss": 2.2922, "step": 4712500 }, { "epoch": 23.35, "learning_rate": 3.832927324702863e-05, "loss": 2.2892, "step": 4713000 }, { "epoch": 23.35, "learning_rate": 3.832803466060255e-05, "loss": 2.3098, "step": 4713500 }, { "epoch": 23.35, "learning_rate": 3.832679607417646e-05, "loss": 2.2946, "step": 4714000 }, { "epoch": 23.36, "learning_rate": 3.8325559964923236e-05, "loss": 2.2958, "step": 4714500 }, { "epoch": 23.36, "learning_rate": 3.832432137849715e-05, "loss": 2.3095, "step": 4715000 }, { "epoch": 23.36, "learning_rate": 3.832308279207107e-05, "loss": 2.2891, "step": 4715500 }, { "epoch": 23.36, "learning_rate": 3.8321844205644987e-05, "loss": 2.2835, "step": 4716000 }, { "epoch": 23.37, "learning_rate": 3.8320605619218903e-05, "loss": 2.3211, "step": 4716500 }, { "epoch": 23.37, "learning_rate": 3.831936950996567e-05, "loss": 2.2844, "step": 4717000 }, { "epoch": 23.37, "learning_rate": 3.831813092353958e-05, "loss": 2.298, "step": 4717500 }, { "epoch": 23.37, "learning_rate": 3.83168923371135e-05, "loss": 2.2937, "step": 4718000 }, { "epoch": 23.38, "learning_rate": 3.8315653750687416e-05, "loss": 2.2833, "step": 4718500 }, { "epoch": 23.38, "learning_rate": 3.831441516426133e-05, "loss": 2.2817, "step": 4719000 }, { "epoch": 23.38, "learning_rate": 3.831317657783525e-05, "loss": 2.2641, "step": 4719500 }, { "epoch": 23.38, "learning_rate": 3.831193799140916e-05, "loss": 2.3006, "step": 4720000 }, { "epoch": 23.39, "learning_rate": 3.8310701882155936e-05, "loss": 2.3117, "step": 4720500 }, { "epoch": 23.39, "learning_rate": 3.830946329572985e-05, "loss": 2.2952, "step": 4721000 }, { "epoch": 23.39, "learning_rate": 3.830822470930377e-05, "loss": 2.2941, "step": 4721500 }, { "epoch": 23.39, "learning_rate": 3.830698612287769e-05, "loss": 2.303, "step": 4722000 }, { "epoch": 23.4, "learning_rate": 3.8305747536451604e-05, "loss": 2.2916, "step": 4722500 }, { "epoch": 23.4, "learning_rate": 3.830451142719837e-05, "loss": 2.2864, "step": 4723000 }, { "epoch": 23.4, "learning_rate": 3.830327284077229e-05, "loss": 2.2994, "step": 4723500 }, { "epoch": 23.4, "learning_rate": 3.8302034254346206e-05, "loss": 2.3089, "step": 4724000 }, { "epoch": 23.41, "learning_rate": 3.830079814509297e-05, "loss": 2.3033, "step": 4724500 }, { "epoch": 23.41, "learning_rate": 3.8299559558666885e-05, "loss": 2.2952, "step": 4725000 }, { "epoch": 23.41, "learning_rate": 3.82983209722408e-05, "loss": 2.3225, "step": 4725500 }, { "epoch": 23.41, "learning_rate": 3.829708238581472e-05, "loss": 2.2881, "step": 4726000 }, { "epoch": 23.42, "learning_rate": 3.8295843799388636e-05, "loss": 2.2867, "step": 4726500 }, { "epoch": 23.42, "learning_rate": 3.829460521296255e-05, "loss": 2.2851, "step": 4727000 }, { "epoch": 23.42, "learning_rate": 3.829336662653647e-05, "loss": 2.2913, "step": 4727500 }, { "epoch": 23.42, "learning_rate": 3.829212804011039e-05, "loss": 2.2991, "step": 4728000 }, { "epoch": 23.43, "learning_rate": 3.8290889453684304e-05, "loss": 2.2934, "step": 4728500 }, { "epoch": 23.43, "learning_rate": 3.828965086725822e-05, "loss": 2.2761, "step": 4729000 }, { "epoch": 23.43, "learning_rate": 3.828841228083213e-05, "loss": 2.3159, "step": 4729500 }, { "epoch": 23.43, "learning_rate": 3.828717369440605e-05, "loss": 2.3028, "step": 4730000 }, { "epoch": 23.44, "learning_rate": 3.8285935107979965e-05, "loss": 2.3088, "step": 4730500 }, { "epoch": 23.44, "learning_rate": 3.828469652155388e-05, "loss": 2.2843, "step": 4731000 }, { "epoch": 23.44, "learning_rate": 3.828346041230065e-05, "loss": 2.3045, "step": 4731500 }, { "epoch": 23.44, "learning_rate": 3.828222182587457e-05, "loss": 2.2686, "step": 4732000 }, { "epoch": 23.45, "learning_rate": 3.8280983239448484e-05, "loss": 2.2848, "step": 4732500 }, { "epoch": 23.45, "learning_rate": 3.82797446530224e-05, "loss": 2.2904, "step": 4733000 }, { "epoch": 23.45, "learning_rate": 3.827850854376917e-05, "loss": 2.3025, "step": 4733500 }, { "epoch": 23.45, "learning_rate": 3.827726995734309e-05, "loss": 2.312, "step": 4734000 }, { "epoch": 23.46, "learning_rate": 3.8276031370917004e-05, "loss": 2.2943, "step": 4734500 }, { "epoch": 23.46, "learning_rate": 3.827479526166377e-05, "loss": 2.2959, "step": 4735000 }, { "epoch": 23.46, "learning_rate": 3.8273559152410535e-05, "loss": 2.284, "step": 4735500 }, { "epoch": 23.46, "learning_rate": 3.827232056598445e-05, "loss": 2.2884, "step": 4736000 }, { "epoch": 23.47, "learning_rate": 3.827108197955837e-05, "loss": 2.3079, "step": 4736500 }, { "epoch": 23.47, "learning_rate": 3.8269843393132285e-05, "loss": 2.2866, "step": 4737000 }, { "epoch": 23.47, "learning_rate": 3.82686048067062e-05, "loss": 2.283, "step": 4737500 }, { "epoch": 23.47, "learning_rate": 3.826736622028012e-05, "loss": 2.3011, "step": 4738000 }, { "epoch": 23.48, "learning_rate": 3.8266127633854036e-05, "loss": 2.2811, "step": 4738500 }, { "epoch": 23.48, "learning_rate": 3.826488904742795e-05, "loss": 2.2937, "step": 4739000 }, { "epoch": 23.48, "learning_rate": 3.826365046100187e-05, "loss": 2.2939, "step": 4739500 }, { "epoch": 23.48, "learning_rate": 3.826241187457579e-05, "loss": 2.3019, "step": 4740000 }, { "epoch": 23.49, "learning_rate": 3.8261173288149704e-05, "loss": 2.3265, "step": 4740500 }, { "epoch": 23.49, "learning_rate": 3.825993470172362e-05, "loss": 2.2916, "step": 4741000 }, { "epoch": 23.49, "learning_rate": 3.825869611529754e-05, "loss": 2.2738, "step": 4741500 }, { "epoch": 23.49, "learning_rate": 3.825746000604431e-05, "loss": 2.2937, "step": 4742000 }, { "epoch": 23.5, "learning_rate": 3.8256221419618224e-05, "loss": 2.2827, "step": 4742500 }, { "epoch": 23.5, "learning_rate": 3.825498283319214e-05, "loss": 2.2835, "step": 4743000 }, { "epoch": 23.5, "learning_rate": 3.825374424676606e-05, "loss": 2.3097, "step": 4743500 }, { "epoch": 23.5, "learning_rate": 3.8252505660339974e-05, "loss": 2.3038, "step": 4744000 }, { "epoch": 23.51, "learning_rate": 3.8251267073913884e-05, "loss": 2.3029, "step": 4744500 }, { "epoch": 23.51, "learning_rate": 3.82500284874878e-05, "loss": 2.327, "step": 4745000 }, { "epoch": 23.51, "learning_rate": 3.824878990106172e-05, "loss": 2.3, "step": 4745500 }, { "epoch": 23.51, "learning_rate": 3.8247551314635635e-05, "loss": 2.2879, "step": 4746000 }, { "epoch": 23.52, "learning_rate": 3.8246315205382404e-05, "loss": 2.2921, "step": 4746500 }, { "epoch": 23.52, "learning_rate": 3.824507661895632e-05, "loss": 2.2748, "step": 4747000 }, { "epoch": 23.52, "learning_rate": 3.824383803253024e-05, "loss": 2.26, "step": 4747500 }, { "epoch": 23.52, "learning_rate": 3.8242599446104155e-05, "loss": 2.3142, "step": 4748000 }, { "epoch": 23.53, "learning_rate": 3.8241360859678065e-05, "loss": 2.2673, "step": 4748500 }, { "epoch": 23.53, "learning_rate": 3.824012475042484e-05, "loss": 2.2915, "step": 4749000 }, { "epoch": 23.53, "learning_rate": 3.823888616399876e-05, "loss": 2.2943, "step": 4749500 }, { "epoch": 23.53, "learning_rate": 3.8237647577572674e-05, "loss": 2.2901, "step": 4750000 }, { "epoch": 23.54, "learning_rate": 3.823640899114659e-05, "loss": 2.3014, "step": 4750500 }, { "epoch": 23.54, "learning_rate": 3.8235172881893353e-05, "loss": 2.3051, "step": 4751000 }, { "epoch": 23.54, "learning_rate": 3.823393429546727e-05, "loss": 2.3029, "step": 4751500 }, { "epoch": 23.54, "learning_rate": 3.823269570904119e-05, "loss": 2.3158, "step": 4752000 }, { "epoch": 23.55, "learning_rate": 3.8231459599787956e-05, "loss": 2.3026, "step": 4752500 }, { "epoch": 23.55, "learning_rate": 3.823022101336187e-05, "loss": 2.2962, "step": 4753000 }, { "epoch": 23.55, "learning_rate": 3.822898242693579e-05, "loss": 2.3032, "step": 4753500 }, { "epoch": 23.55, "learning_rate": 3.822774384050971e-05, "loss": 2.2858, "step": 4754000 }, { "epoch": 23.56, "learning_rate": 3.8226505254083624e-05, "loss": 2.312, "step": 4754500 }, { "epoch": 23.56, "learning_rate": 3.8225269144830386e-05, "loss": 2.3003, "step": 4755000 }, { "epoch": 23.56, "learning_rate": 3.82240305584043e-05, "loss": 2.3145, "step": 4755500 }, { "epoch": 23.56, "learning_rate": 3.822279197197822e-05, "loss": 2.2898, "step": 4756000 }, { "epoch": 23.57, "learning_rate": 3.822155338555214e-05, "loss": 2.2888, "step": 4756500 }, { "epoch": 23.57, "learning_rate": 3.8220314799126054e-05, "loss": 2.3015, "step": 4757000 }, { "epoch": 23.57, "learning_rate": 3.821907621269997e-05, "loss": 2.2855, "step": 4757500 }, { "epoch": 23.57, "learning_rate": 3.821784010344674e-05, "loss": 2.3261, "step": 4758000 }, { "epoch": 23.58, "learning_rate": 3.8216601517020656e-05, "loss": 2.2987, "step": 4758500 }, { "epoch": 23.58, "learning_rate": 3.821536293059457e-05, "loss": 2.2882, "step": 4759000 }, { "epoch": 23.58, "learning_rate": 3.8214129298514194e-05, "loss": 2.3058, "step": 4759500 }, { "epoch": 23.58, "learning_rate": 3.821289071208811e-05, "loss": 2.3102, "step": 4760000 }, { "epoch": 23.59, "learning_rate": 3.821165460283488e-05, "loss": 2.3256, "step": 4760500 }, { "epoch": 23.59, "learning_rate": 3.8210416016408796e-05, "loss": 2.3053, "step": 4761000 }, { "epoch": 23.59, "learning_rate": 3.8209177429982713e-05, "loss": 2.3171, "step": 4761500 }, { "epoch": 23.59, "learning_rate": 3.820793884355663e-05, "loss": 2.3362, "step": 4762000 }, { "epoch": 23.6, "learning_rate": 3.820670025713055e-05, "loss": 2.3121, "step": 4762500 }, { "epoch": 23.6, "learning_rate": 3.8205461670704464e-05, "loss": 2.3041, "step": 4763000 }, { "epoch": 23.6, "learning_rate": 3.820422308427838e-05, "loss": 2.2931, "step": 4763500 }, { "epoch": 23.6, "learning_rate": 3.82029844978523e-05, "loss": 2.3172, "step": 4764000 }, { "epoch": 23.6, "learning_rate": 3.820174591142621e-05, "loss": 2.3078, "step": 4764500 }, { "epoch": 23.61, "learning_rate": 3.8200507325000125e-05, "loss": 2.2917, "step": 4765000 }, { "epoch": 23.61, "learning_rate": 3.819926873857404e-05, "loss": 2.3073, "step": 4765500 }, { "epoch": 23.61, "learning_rate": 3.819803015214796e-05, "loss": 2.2881, "step": 4766000 }, { "epoch": 23.61, "learning_rate": 3.8196791565721876e-05, "loss": 2.335, "step": 4766500 }, { "epoch": 23.62, "learning_rate": 3.819555297929579e-05, "loss": 2.2791, "step": 4767000 }, { "epoch": 23.62, "learning_rate": 3.819431439286971e-05, "loss": 2.2827, "step": 4767500 }, { "epoch": 23.62, "learning_rate": 3.819307580644363e-05, "loss": 2.3115, "step": 4768000 }, { "epoch": 23.62, "learning_rate": 3.819183722001754e-05, "loss": 2.3083, "step": 4768500 }, { "epoch": 23.63, "learning_rate": 3.8190598633591454e-05, "loss": 2.3059, "step": 4769000 }, { "epoch": 23.63, "learning_rate": 3.818936252433822e-05, "loss": 2.2958, "step": 4769500 }, { "epoch": 23.63, "learning_rate": 3.8188126415085e-05, "loss": 2.3038, "step": 4770000 }, { "epoch": 23.63, "learning_rate": 3.8186887828658915e-05, "loss": 2.3175, "step": 4770500 }, { "epoch": 23.64, "learning_rate": 3.8185649242232825e-05, "loss": 2.2936, "step": 4771000 }, { "epoch": 23.64, "learning_rate": 3.818441065580674e-05, "loss": 2.2973, "step": 4771500 }, { "epoch": 23.64, "learning_rate": 3.818317206938066e-05, "loss": 2.2993, "step": 4772000 }, { "epoch": 23.64, "learning_rate": 3.8181933482954576e-05, "loss": 2.3115, "step": 4772500 }, { "epoch": 23.65, "learning_rate": 3.818069489652849e-05, "loss": 2.3108, "step": 4773000 }, { "epoch": 23.65, "learning_rate": 3.817945631010241e-05, "loss": 2.3075, "step": 4773500 }, { "epoch": 23.65, "learning_rate": 3.817821772367633e-05, "loss": 2.2759, "step": 4774000 }, { "epoch": 23.65, "learning_rate": 3.817698409159595e-05, "loss": 2.3221, "step": 4774500 }, { "epoch": 23.66, "learning_rate": 3.8175747982342716e-05, "loss": 2.2834, "step": 4775000 }, { "epoch": 23.66, "learning_rate": 3.817450939591663e-05, "loss": 2.2967, "step": 4775500 }, { "epoch": 23.66, "learning_rate": 3.8173270809490543e-05, "loss": 2.2973, "step": 4776000 }, { "epoch": 23.66, "learning_rate": 3.817203222306446e-05, "loss": 2.2763, "step": 4776500 }, { "epoch": 23.67, "learning_rate": 3.817079611381123e-05, "loss": 2.2766, "step": 4777000 }, { "epoch": 23.67, "learning_rate": 3.8169557527385146e-05, "loss": 2.3263, "step": 4777500 }, { "epoch": 23.67, "learning_rate": 3.816831894095906e-05, "loss": 2.2982, "step": 4778000 }, { "epoch": 23.67, "learning_rate": 3.816708035453298e-05, "loss": 2.3112, "step": 4778500 }, { "epoch": 23.68, "learning_rate": 3.81658417681069e-05, "loss": 2.3014, "step": 4779000 }, { "epoch": 23.68, "learning_rate": 3.8164603181680814e-05, "loss": 2.286, "step": 4779500 }, { "epoch": 23.68, "learning_rate": 3.816336459525473e-05, "loss": 2.3127, "step": 4780000 }, { "epoch": 23.68, "learning_rate": 3.816212600882865e-05, "loss": 2.3024, "step": 4780500 }, { "epoch": 23.69, "learning_rate": 3.8160887422402565e-05, "loss": 2.2597, "step": 4781000 }, { "epoch": 23.69, "learning_rate": 3.815964883597648e-05, "loss": 2.2918, "step": 4781500 }, { "epoch": 23.69, "learning_rate": 3.81584102495504e-05, "loss": 2.2857, "step": 4782000 }, { "epoch": 23.69, "learning_rate": 3.8157171663124315e-05, "loss": 2.3056, "step": 4782500 }, { "epoch": 23.7, "learning_rate": 3.815593307669823e-05, "loss": 2.2823, "step": 4783000 }, { "epoch": 23.7, "learning_rate": 3.815469449027214e-05, "loss": 2.2994, "step": 4783500 }, { "epoch": 23.7, "learning_rate": 3.815345590384606e-05, "loss": 2.2764, "step": 4784000 }, { "epoch": 23.7, "learning_rate": 3.8152217317419976e-05, "loss": 2.2975, "step": 4784500 }, { "epoch": 23.71, "learning_rate": 3.815097873099389e-05, "loss": 2.2998, "step": 4785000 }, { "epoch": 23.71, "learning_rate": 3.814974014456781e-05, "loss": 2.293, "step": 4785500 }, { "epoch": 23.71, "learning_rate": 3.814850403531458e-05, "loss": 2.2987, "step": 4786000 }, { "epoch": 23.71, "learning_rate": 3.814726544888849e-05, "loss": 2.2947, "step": 4786500 }, { "epoch": 23.72, "learning_rate": 3.8146026862462406e-05, "loss": 2.296, "step": 4787000 }, { "epoch": 23.72, "learning_rate": 3.814478827603632e-05, "loss": 2.3184, "step": 4787500 }, { "epoch": 23.72, "learning_rate": 3.814354968961024e-05, "loss": 2.3023, "step": 4788000 }, { "epoch": 23.72, "learning_rate": 3.814231110318416e-05, "loss": 2.2848, "step": 4788500 }, { "epoch": 23.73, "learning_rate": 3.8141072516758074e-05, "loss": 2.2869, "step": 4789000 }, { "epoch": 23.73, "learning_rate": 3.813983640750484e-05, "loss": 2.3372, "step": 4789500 }, { "epoch": 23.73, "learning_rate": 3.813859782107876e-05, "loss": 2.3091, "step": 4790000 }, { "epoch": 23.73, "learning_rate": 3.8137359234652676e-05, "loss": 2.2714, "step": 4790500 }, { "epoch": 23.74, "learning_rate": 3.813612064822659e-05, "loss": 2.2939, "step": 4791000 }, { "epoch": 23.74, "learning_rate": 3.813488206180051e-05, "loss": 2.3059, "step": 4791500 }, { "epoch": 23.74, "learning_rate": 3.813364347537443e-05, "loss": 2.2798, "step": 4792000 }, { "epoch": 23.74, "learning_rate": 3.8132407366121196e-05, "loss": 2.2939, "step": 4792500 }, { "epoch": 23.75, "learning_rate": 3.8131168779695106e-05, "loss": 2.2965, "step": 4793000 }, { "epoch": 23.75, "learning_rate": 3.812993019326902e-05, "loss": 2.2802, "step": 4793500 }, { "epoch": 23.75, "learning_rate": 3.81286940840158e-05, "loss": 2.3213, "step": 4794000 }, { "epoch": 23.75, "learning_rate": 3.8127455497589716e-05, "loss": 2.3187, "step": 4794500 }, { "epoch": 23.76, "learning_rate": 3.812621691116363e-05, "loss": 2.314, "step": 4795000 }, { "epoch": 23.76, "learning_rate": 3.812497832473755e-05, "loss": 2.283, "step": 4795500 }, { "epoch": 23.76, "learning_rate": 3.812373973831146e-05, "loss": 2.3006, "step": 4796000 }, { "epoch": 23.76, "learning_rate": 3.812250362905823e-05, "loss": 2.3053, "step": 4796500 }, { "epoch": 23.77, "learning_rate": 3.8121265042632145e-05, "loss": 2.2898, "step": 4797000 }, { "epoch": 23.77, "learning_rate": 3.812002645620606e-05, "loss": 2.3042, "step": 4797500 }, { "epoch": 23.77, "learning_rate": 3.811878786977998e-05, "loss": 2.3032, "step": 4798000 }, { "epoch": 23.77, "learning_rate": 3.8117549283353896e-05, "loss": 2.2994, "step": 4798500 }, { "epoch": 23.78, "learning_rate": 3.8116310696927806e-05, "loss": 2.3109, "step": 4799000 }, { "epoch": 23.78, "learning_rate": 3.811507211050172e-05, "loss": 2.3052, "step": 4799500 }, { "epoch": 23.78, "learning_rate": 3.81138360012485e-05, "loss": 2.2868, "step": 4800000 }, { "epoch": 23.78, "learning_rate": 3.8112597414822416e-05, "loss": 2.3109, "step": 4800500 }, { "epoch": 23.79, "learning_rate": 3.811135882839633e-05, "loss": 2.3048, "step": 4801000 }, { "epoch": 23.79, "learning_rate": 3.811012024197025e-05, "loss": 2.3028, "step": 4801500 }, { "epoch": 23.79, "learning_rate": 3.810888165554416e-05, "loss": 2.3225, "step": 4802000 }, { "epoch": 23.79, "learning_rate": 3.810764306911808e-05, "loss": 2.2936, "step": 4802500 }, { "epoch": 23.8, "learning_rate": 3.8106404482691994e-05, "loss": 2.3293, "step": 4803000 }, { "epoch": 23.8, "learning_rate": 3.810516589626591e-05, "loss": 2.3076, "step": 4803500 }, { "epoch": 23.8, "learning_rate": 3.810392730983983e-05, "loss": 2.2963, "step": 4804000 }, { "epoch": 23.8, "learning_rate": 3.8102688723413744e-05, "loss": 2.2903, "step": 4804500 }, { "epoch": 23.81, "learning_rate": 3.810145261416051e-05, "loss": 2.3059, "step": 4805000 }, { "epoch": 23.81, "learning_rate": 3.810021402773442e-05, "loss": 2.2973, "step": 4805500 }, { "epoch": 23.81, "learning_rate": 3.809897544130834e-05, "loss": 2.2988, "step": 4806000 }, { "epoch": 23.81, "learning_rate": 3.809773685488226e-05, "loss": 2.2914, "step": 4806500 }, { "epoch": 23.82, "learning_rate": 3.8096498268456174e-05, "loss": 2.3179, "step": 4807000 }, { "epoch": 23.82, "learning_rate": 3.809525968203009e-05, "loss": 2.3134, "step": 4807500 }, { "epoch": 23.82, "learning_rate": 3.809402357277687e-05, "loss": 2.28, "step": 4808000 }, { "epoch": 23.82, "learning_rate": 3.809278498635078e-05, "loss": 2.3142, "step": 4808500 }, { "epoch": 23.83, "learning_rate": 3.8091546399924694e-05, "loss": 2.2888, "step": 4809000 }, { "epoch": 23.83, "learning_rate": 3.809030781349861e-05, "loss": 2.3241, "step": 4809500 }, { "epoch": 23.83, "learning_rate": 3.808906922707253e-05, "loss": 2.3173, "step": 4810000 }, { "epoch": 23.83, "learning_rate": 3.8087830640646445e-05, "loss": 2.2905, "step": 4810500 }, { "epoch": 23.84, "learning_rate": 3.8086597008566065e-05, "loss": 2.3409, "step": 4811000 }, { "epoch": 23.84, "learning_rate": 3.808535842213998e-05, "loss": 2.3207, "step": 4811500 }, { "epoch": 23.84, "learning_rate": 3.80841198357139e-05, "loss": 2.3175, "step": 4812000 }, { "epoch": 23.84, "learning_rate": 3.808288372646067e-05, "loss": 2.3065, "step": 4812500 }, { "epoch": 23.85, "learning_rate": 3.8081645140034585e-05, "loss": 2.2876, "step": 4813000 }, { "epoch": 23.85, "learning_rate": 3.80804065536085e-05, "loss": 2.3141, "step": 4813500 }, { "epoch": 23.85, "learning_rate": 3.8079170444355264e-05, "loss": 2.2991, "step": 4814000 }, { "epoch": 23.85, "learning_rate": 3.807793185792918e-05, "loss": 2.2971, "step": 4814500 }, { "epoch": 23.86, "learning_rate": 3.80766932715031e-05, "loss": 2.2939, "step": 4815000 }, { "epoch": 23.86, "learning_rate": 3.8075454685077015e-05, "loss": 2.3154, "step": 4815500 }, { "epoch": 23.86, "learning_rate": 3.807421609865093e-05, "loss": 2.3036, "step": 4816000 }, { "epoch": 23.86, "learning_rate": 3.807297751222485e-05, "loss": 2.3105, "step": 4816500 }, { "epoch": 23.87, "learning_rate": 3.8071738925798765e-05, "loss": 2.281, "step": 4817000 }, { "epoch": 23.87, "learning_rate": 3.807050033937268e-05, "loss": 2.2819, "step": 4817500 }, { "epoch": 23.87, "learning_rate": 3.80692617529466e-05, "loss": 2.2728, "step": 4818000 }, { "epoch": 23.87, "learning_rate": 3.8068023166520516e-05, "loss": 2.3142, "step": 4818500 }, { "epoch": 23.87, "learning_rate": 3.806678458009443e-05, "loss": 2.3092, "step": 4819000 }, { "epoch": 23.88, "learning_rate": 3.806554599366835e-05, "loss": 2.3183, "step": 4819500 }, { "epoch": 23.88, "learning_rate": 3.806430740724227e-05, "loss": 2.2989, "step": 4820000 }, { "epoch": 23.88, "learning_rate": 3.8063071297989036e-05, "loss": 2.2973, "step": 4820500 }, { "epoch": 23.88, "learning_rate": 3.8061837665908656e-05, "loss": 2.3134, "step": 4821000 }, { "epoch": 23.89, "learning_rate": 3.8060599079482567e-05, "loss": 2.3101, "step": 4821500 }, { "epoch": 23.89, "learning_rate": 3.8059360493056483e-05, "loss": 2.3073, "step": 4822000 }, { "epoch": 23.89, "learning_rate": 3.80581219066304e-05, "loss": 2.2903, "step": 4822500 }, { "epoch": 23.89, "learning_rate": 3.805688332020432e-05, "loss": 2.2952, "step": 4823000 }, { "epoch": 23.9, "learning_rate": 3.8055644733778234e-05, "loss": 2.334, "step": 4823500 }, { "epoch": 23.9, "learning_rate": 3.805440614735215e-05, "loss": 2.3168, "step": 4824000 }, { "epoch": 23.9, "learning_rate": 3.805316756092607e-05, "loss": 2.3011, "step": 4824500 }, { "epoch": 23.9, "learning_rate": 3.805193145167284e-05, "loss": 2.3194, "step": 4825000 }, { "epoch": 23.91, "learning_rate": 3.8050692865246754e-05, "loss": 2.313, "step": 4825500 }, { "epoch": 23.91, "learning_rate": 3.804945427882067e-05, "loss": 2.3036, "step": 4826000 }, { "epoch": 23.91, "learning_rate": 3.804821569239458e-05, "loss": 2.2979, "step": 4826500 }, { "epoch": 23.91, "learning_rate": 3.80469771059685e-05, "loss": 2.2836, "step": 4827000 }, { "epoch": 23.92, "learning_rate": 3.8045738519542415e-05, "loss": 2.3093, "step": 4827500 }, { "epoch": 23.92, "learning_rate": 3.8044502410289184e-05, "loss": 2.2908, "step": 4828000 }, { "epoch": 23.92, "learning_rate": 3.80432638238631e-05, "loss": 2.3154, "step": 4828500 }, { "epoch": 23.92, "learning_rate": 3.804202523743702e-05, "loss": 2.3275, "step": 4829000 }, { "epoch": 23.93, "learning_rate": 3.8040786651010934e-05, "loss": 2.3259, "step": 4829500 }, { "epoch": 23.93, "learning_rate": 3.803954806458485e-05, "loss": 2.3006, "step": 4830000 }, { "epoch": 23.93, "learning_rate": 3.803830947815877e-05, "loss": 2.3199, "step": 4830500 }, { "epoch": 23.93, "learning_rate": 3.8037070891732685e-05, "loss": 2.3036, "step": 4831000 }, { "epoch": 23.94, "learning_rate": 3.80358323053066e-05, "loss": 2.2679, "step": 4831500 }, { "epoch": 23.94, "learning_rate": 3.803459619605337e-05, "loss": 2.3195, "step": 4832000 }, { "epoch": 23.94, "learning_rate": 3.803335760962729e-05, "loss": 2.3192, "step": 4832500 }, { "epoch": 23.94, "learning_rate": 3.803212150037406e-05, "loss": 2.3352, "step": 4833000 }, { "epoch": 23.95, "learning_rate": 3.8030885391120825e-05, "loss": 2.3116, "step": 4833500 }, { "epoch": 23.95, "learning_rate": 3.802964680469474e-05, "loss": 2.3202, "step": 4834000 }, { "epoch": 23.95, "learning_rate": 3.802840821826866e-05, "loss": 2.3153, "step": 4834500 }, { "epoch": 23.95, "learning_rate": 3.8027169631842576e-05, "loss": 2.3291, "step": 4835000 }, { "epoch": 23.96, "learning_rate": 3.802593104541649e-05, "loss": 2.2869, "step": 4835500 }, { "epoch": 23.96, "learning_rate": 3.802469245899041e-05, "loss": 2.3052, "step": 4836000 }, { "epoch": 23.96, "learning_rate": 3.802345387256433e-05, "loss": 2.313, "step": 4836500 }, { "epoch": 23.96, "learning_rate": 3.802221776331109e-05, "loss": 2.2973, "step": 4837000 }, { "epoch": 23.97, "learning_rate": 3.8020979176885006e-05, "loss": 2.3024, "step": 4837500 }, { "epoch": 23.97, "learning_rate": 3.801974059045892e-05, "loss": 2.3348, "step": 4838000 }, { "epoch": 23.97, "learning_rate": 3.801850200403284e-05, "loss": 2.3053, "step": 4838500 }, { "epoch": 23.97, "learning_rate": 3.801726341760676e-05, "loss": 2.3055, "step": 4839000 }, { "epoch": 23.98, "learning_rate": 3.8016024831180674e-05, "loss": 2.3312, "step": 4839500 }, { "epoch": 23.98, "learning_rate": 3.801478872192744e-05, "loss": 2.2953, "step": 4840000 }, { "epoch": 23.98, "learning_rate": 3.801355013550136e-05, "loss": 2.3206, "step": 4840500 }, { "epoch": 23.98, "learning_rate": 3.8012311549075276e-05, "loss": 2.3084, "step": 4841000 }, { "epoch": 23.99, "learning_rate": 3.801107296264919e-05, "loss": 2.2991, "step": 4841500 }, { "epoch": 23.99, "learning_rate": 3.800983437622311e-05, "loss": 2.2901, "step": 4842000 }, { "epoch": 23.99, "learning_rate": 3.800859578979703e-05, "loss": 2.3141, "step": 4842500 }, { "epoch": 23.99, "learning_rate": 3.8007357203370944e-05, "loss": 2.3149, "step": 4843000 }, { "epoch": 24.0, "learning_rate": 3.8006118616944854e-05, "loss": 2.2997, "step": 4843500 }, { "epoch": 24.0, "learning_rate": 3.800488003051877e-05, "loss": 2.3124, "step": 4844000 }, { "epoch": 24.0, "eval_accuracy": 0.6541513737695038, "eval_accuracy_mlm": 0.608915092455769, "eval_accuracy_nsp": 0.8673943653685494, "eval_loss": 2.3393051624298096, "eval_runtime": 146.2902, "eval_samples_per_second": 1742.831, "eval_steps_per_second": 72.623, "step": 4844232 }, { "epoch": 24.0, "learning_rate": 3.800364144409269e-05, "loss": 2.2969, "step": 4844500 }, { "epoch": 24.0, "learning_rate": 3.8002402857666605e-05, "loss": 2.2555, "step": 4845000 }, { "epoch": 24.01, "learning_rate": 3.800116427124052e-05, "loss": 2.2708, "step": 4845500 }, { "epoch": 24.01, "learning_rate": 3.799992568481444e-05, "loss": 2.2948, "step": 4846000 }, { "epoch": 24.01, "learning_rate": 3.7998687098388356e-05, "loss": 2.277, "step": 4846500 }, { "epoch": 24.01, "learning_rate": 3.7997448511962266e-05, "loss": 2.249, "step": 4847000 }, { "epoch": 24.02, "learning_rate": 3.7996212402709035e-05, "loss": 2.2613, "step": 4847500 }, { "epoch": 24.02, "learning_rate": 3.799497629345581e-05, "loss": 2.2796, "step": 4848000 }, { "epoch": 24.02, "learning_rate": 3.799373770702973e-05, "loss": 2.2798, "step": 4848500 }, { "epoch": 24.02, "learning_rate": 3.7992499120603644e-05, "loss": 2.2457, "step": 4849000 }, { "epoch": 24.03, "learning_rate": 3.7991260534177554e-05, "loss": 2.2718, "step": 4849500 }, { "epoch": 24.03, "learning_rate": 3.799002194775147e-05, "loss": 2.2644, "step": 4850000 }, { "epoch": 24.03, "learning_rate": 3.798878336132539e-05, "loss": 2.282, "step": 4850500 }, { "epoch": 24.03, "learning_rate": 3.7987544774899305e-05, "loss": 2.2948, "step": 4851000 }, { "epoch": 24.04, "learning_rate": 3.7986308665646074e-05, "loss": 2.2776, "step": 4851500 }, { "epoch": 24.04, "learning_rate": 3.798507007921999e-05, "loss": 2.2824, "step": 4852000 }, { "epoch": 24.04, "learning_rate": 3.798383149279391e-05, "loss": 2.2988, "step": 4852500 }, { "epoch": 24.04, "learning_rate": 3.798259290636782e-05, "loss": 2.296, "step": 4853000 }, { "epoch": 24.05, "learning_rate": 3.7981356797114594e-05, "loss": 2.2601, "step": 4853500 }, { "epoch": 24.05, "learning_rate": 3.798011821068851e-05, "loss": 2.2779, "step": 4854000 }, { "epoch": 24.05, "learning_rate": 3.797887962426243e-05, "loss": 2.2901, "step": 4854500 }, { "epoch": 24.05, "learning_rate": 3.7977641037836344e-05, "loss": 2.286, "step": 4855000 }, { "epoch": 24.06, "learning_rate": 3.797640245141026e-05, "loss": 2.2828, "step": 4855500 }, { "epoch": 24.06, "learning_rate": 3.797516386498417e-05, "loss": 2.2735, "step": 4856000 }, { "epoch": 24.06, "learning_rate": 3.797392527855809e-05, "loss": 2.2708, "step": 4856500 }, { "epoch": 24.06, "learning_rate": 3.7972686692132005e-05, "loss": 2.2948, "step": 4857000 }, { "epoch": 24.07, "learning_rate": 3.7971450582878774e-05, "loss": 2.2892, "step": 4857500 }, { "epoch": 24.07, "learning_rate": 3.797021199645269e-05, "loss": 2.2911, "step": 4858000 }, { "epoch": 24.07, "learning_rate": 3.796897341002661e-05, "loss": 2.2749, "step": 4858500 }, { "epoch": 24.07, "learning_rate": 3.796773482360052e-05, "loss": 2.3011, "step": 4859000 }, { "epoch": 24.08, "learning_rate": 3.7966496237174435e-05, "loss": 2.2884, "step": 4859500 }, { "epoch": 24.08, "learning_rate": 3.796525765074835e-05, "loss": 2.2755, "step": 4860000 }, { "epoch": 24.08, "learning_rate": 3.796401906432227e-05, "loss": 2.3002, "step": 4860500 }, { "epoch": 24.08, "learning_rate": 3.7962780477896186e-05, "loss": 2.2786, "step": 4861000 }, { "epoch": 24.09, "learning_rate": 3.79615418914701e-05, "loss": 2.2814, "step": 4861500 }, { "epoch": 24.09, "learning_rate": 3.796030330504402e-05, "loss": 2.2544, "step": 4862000 }, { "epoch": 24.09, "learning_rate": 3.795906719579079e-05, "loss": 2.2841, "step": 4862500 }, { "epoch": 24.09, "learning_rate": 3.7957828609364705e-05, "loss": 2.3162, "step": 4863000 }, { "epoch": 24.1, "learning_rate": 3.7956592500111474e-05, "loss": 2.2993, "step": 4863500 }, { "epoch": 24.1, "learning_rate": 3.795535391368539e-05, "loss": 2.288, "step": 4864000 }, { "epoch": 24.1, "learning_rate": 3.795411532725931e-05, "loss": 2.2672, "step": 4864500 }, { "epoch": 24.1, "learning_rate": 3.795287921800608e-05, "loss": 2.2889, "step": 4865000 }, { "epoch": 24.11, "learning_rate": 3.7951640631579994e-05, "loss": 2.2822, "step": 4865500 }, { "epoch": 24.11, "learning_rate": 3.795040204515391e-05, "loss": 2.271, "step": 4866000 }, { "epoch": 24.11, "learning_rate": 3.794916345872783e-05, "loss": 2.2691, "step": 4866500 }, { "epoch": 24.11, "learning_rate": 3.7947924872301745e-05, "loss": 2.2968, "step": 4867000 }, { "epoch": 24.12, "learning_rate": 3.794668876304851e-05, "loss": 2.2602, "step": 4867500 }, { "epoch": 24.12, "learning_rate": 3.7945450176622424e-05, "loss": 2.2793, "step": 4868000 }, { "epoch": 24.12, "learning_rate": 3.794421159019634e-05, "loss": 2.2726, "step": 4868500 }, { "epoch": 24.12, "learning_rate": 3.794297300377026e-05, "loss": 2.2902, "step": 4869000 }, { "epoch": 24.13, "learning_rate": 3.7941734417344174e-05, "loss": 2.2575, "step": 4869500 }, { "epoch": 24.13, "learning_rate": 3.794049583091809e-05, "loss": 2.2759, "step": 4870000 }, { "epoch": 24.13, "learning_rate": 3.793925724449201e-05, "loss": 2.2869, "step": 4870500 }, { "epoch": 24.13, "learning_rate": 3.7938018658065925e-05, "loss": 2.2826, "step": 4871000 }, { "epoch": 24.14, "learning_rate": 3.7936780071639835e-05, "loss": 2.2913, "step": 4871500 }, { "epoch": 24.14, "learning_rate": 3.793554396238661e-05, "loss": 2.3063, "step": 4872000 }, { "epoch": 24.14, "learning_rate": 3.793430537596053e-05, "loss": 2.2777, "step": 4872500 }, { "epoch": 24.14, "learning_rate": 3.7933066789534445e-05, "loss": 2.2694, "step": 4873000 }, { "epoch": 24.15, "learning_rate": 3.793182820310836e-05, "loss": 2.2867, "step": 4873500 }, { "epoch": 24.15, "learning_rate": 3.793058961668228e-05, "loss": 2.2729, "step": 4874000 }, { "epoch": 24.15, "learning_rate": 3.792935350742905e-05, "loss": 2.2849, "step": 4874500 }, { "epoch": 24.15, "learning_rate": 3.792811492100296e-05, "loss": 2.2694, "step": 4875000 }, { "epoch": 24.15, "learning_rate": 3.7926876334576874e-05, "loss": 2.263, "step": 4875500 }, { "epoch": 24.16, "learning_rate": 3.792563774815079e-05, "loss": 2.2894, "step": 4876000 }, { "epoch": 24.16, "learning_rate": 3.792439916172471e-05, "loss": 2.2946, "step": 4876500 }, { "epoch": 24.16, "learning_rate": 3.792316305247148e-05, "loss": 2.2824, "step": 4877000 }, { "epoch": 24.16, "learning_rate": 3.7921924466045394e-05, "loss": 2.2882, "step": 4877500 }, { "epoch": 24.17, "learning_rate": 3.792068587961931e-05, "loss": 2.2966, "step": 4878000 }, { "epoch": 24.17, "learning_rate": 3.791944729319323e-05, "loss": 2.2884, "step": 4878500 }, { "epoch": 24.17, "learning_rate": 3.7918208706767145e-05, "loss": 2.2832, "step": 4879000 }, { "epoch": 24.17, "learning_rate": 3.7916972597513914e-05, "loss": 2.268, "step": 4879500 }, { "epoch": 24.18, "learning_rate": 3.791573401108783e-05, "loss": 2.2686, "step": 4880000 }, { "epoch": 24.18, "learning_rate": 3.791449542466175e-05, "loss": 2.2891, "step": 4880500 }, { "epoch": 24.18, "learning_rate": 3.7913256838235664e-05, "loss": 2.2771, "step": 4881000 }, { "epoch": 24.18, "learning_rate": 3.7912018251809575e-05, "loss": 2.3122, "step": 4881500 }, { "epoch": 24.19, "learning_rate": 3.791077966538349e-05, "loss": 2.2735, "step": 4882000 }, { "epoch": 24.19, "learning_rate": 3.790954107895741e-05, "loss": 2.2336, "step": 4882500 }, { "epoch": 24.19, "learning_rate": 3.7908302492531325e-05, "loss": 2.2546, "step": 4883000 }, { "epoch": 24.19, "learning_rate": 3.7907066383278094e-05, "loss": 2.2733, "step": 4883500 }, { "epoch": 24.2, "learning_rate": 3.790582779685201e-05, "loss": 2.2813, "step": 4884000 }, { "epoch": 24.2, "learning_rate": 3.790459168759878e-05, "loss": 2.2616, "step": 4884500 }, { "epoch": 24.2, "learning_rate": 3.79033531011727e-05, "loss": 2.2972, "step": 4885000 }, { "epoch": 24.2, "learning_rate": 3.7902114514746614e-05, "loss": 2.2637, "step": 4885500 }, { "epoch": 24.21, "learning_rate": 3.790087592832053e-05, "loss": 2.2805, "step": 4886000 }, { "epoch": 24.21, "learning_rate": 3.789963734189445e-05, "loss": 2.2822, "step": 4886500 }, { "epoch": 24.21, "learning_rate": 3.7898398755468365e-05, "loss": 2.2969, "step": 4887000 }, { "epoch": 24.21, "learning_rate": 3.789716016904228e-05, "loss": 2.2901, "step": 4887500 }, { "epoch": 24.22, "learning_rate": 3.78959215826162e-05, "loss": 2.2994, "step": 4888000 }, { "epoch": 24.22, "learning_rate": 3.789468299619011e-05, "loss": 2.2735, "step": 4888500 }, { "epoch": 24.22, "learning_rate": 3.7893444409764025e-05, "loss": 2.2762, "step": 4889000 }, { "epoch": 24.22, "learning_rate": 3.789220582333794e-05, "loss": 2.3055, "step": 4889500 }, { "epoch": 24.23, "learning_rate": 3.789096971408471e-05, "loss": 2.2829, "step": 4890000 }, { "epoch": 24.23, "learning_rate": 3.788973112765863e-05, "loss": 2.2869, "step": 4890500 }, { "epoch": 24.23, "learning_rate": 3.78884950184054e-05, "loss": 2.284, "step": 4891000 }, { "epoch": 24.23, "learning_rate": 3.7887256431979314e-05, "loss": 2.2953, "step": 4891500 }, { "epoch": 24.24, "learning_rate": 3.788601784555323e-05, "loss": 2.2887, "step": 4892000 }, { "epoch": 24.24, "learning_rate": 3.788477925912715e-05, "loss": 2.2865, "step": 4892500 }, { "epoch": 24.24, "learning_rate": 3.7883540672701065e-05, "loss": 2.2958, "step": 4893000 }, { "epoch": 24.24, "learning_rate": 3.788230208627498e-05, "loss": 2.2692, "step": 4893500 }, { "epoch": 24.25, "learning_rate": 3.7881065977021744e-05, "loss": 2.2848, "step": 4894000 }, { "epoch": 24.25, "learning_rate": 3.787982986776851e-05, "loss": 2.2996, "step": 4894500 }, { "epoch": 24.25, "learning_rate": 3.787859128134243e-05, "loss": 2.2844, "step": 4895000 }, { "epoch": 24.25, "learning_rate": 3.7877352694916346e-05, "loss": 2.2728, "step": 4895500 }, { "epoch": 24.26, "learning_rate": 3.7876116585663115e-05, "loss": 2.2742, "step": 4896000 }, { "epoch": 24.26, "learning_rate": 3.787487799923703e-05, "loss": 2.2734, "step": 4896500 }, { "epoch": 24.26, "learning_rate": 3.787363941281095e-05, "loss": 2.3034, "step": 4897000 }, { "epoch": 24.26, "learning_rate": 3.7872400826384866e-05, "loss": 2.3104, "step": 4897500 }, { "epoch": 24.27, "learning_rate": 3.787116223995878e-05, "loss": 2.2996, "step": 4898000 }, { "epoch": 24.27, "learning_rate": 3.786992365353269e-05, "loss": 2.2914, "step": 4898500 }, { "epoch": 24.27, "learning_rate": 3.786868506710661e-05, "loss": 2.2983, "step": 4899000 }, { "epoch": 24.27, "learning_rate": 3.786744648068053e-05, "loss": 2.3012, "step": 4899500 }, { "epoch": 24.28, "learning_rate": 3.78662103714273e-05, "loss": 2.2913, "step": 4900000 }, { "epoch": 24.28, "learning_rate": 3.786497178500121e-05, "loss": 2.2849, "step": 4900500 }, { "epoch": 24.28, "learning_rate": 3.786373319857513e-05, "loss": 2.2893, "step": 4901000 }, { "epoch": 24.28, "learning_rate": 3.7862494612149046e-05, "loss": 2.2917, "step": 4901500 }, { "epoch": 24.29, "learning_rate": 3.786125602572296e-05, "loss": 2.2711, "step": 4902000 }, { "epoch": 24.29, "learning_rate": 3.786001743929688e-05, "loss": 2.2723, "step": 4902500 }, { "epoch": 24.29, "learning_rate": 3.78587788528708e-05, "loss": 2.2921, "step": 4903000 }, { "epoch": 24.29, "learning_rate": 3.7857540266444714e-05, "loss": 2.2999, "step": 4903500 }, { "epoch": 24.3, "learning_rate": 3.785630168001863e-05, "loss": 2.2939, "step": 4904000 }, { "epoch": 24.3, "learning_rate": 3.78550655707654e-05, "loss": 2.3084, "step": 4904500 }, { "epoch": 24.3, "learning_rate": 3.785382698433931e-05, "loss": 2.311, "step": 4905000 }, { "epoch": 24.3, "learning_rate": 3.785258839791323e-05, "loss": 2.2878, "step": 4905500 }, { "epoch": 24.31, "learning_rate": 3.7851349811487144e-05, "loss": 2.273, "step": 4906000 }, { "epoch": 24.31, "learning_rate": 3.785011122506106e-05, "loss": 2.2828, "step": 4906500 }, { "epoch": 24.31, "learning_rate": 3.784887263863498e-05, "loss": 2.2966, "step": 4907000 }, { "epoch": 24.31, "learning_rate": 3.7847634052208895e-05, "loss": 2.2957, "step": 4907500 }, { "epoch": 24.32, "learning_rate": 3.784639546578281e-05, "loss": 2.2923, "step": 4908000 }, { "epoch": 24.32, "learning_rate": 3.784515687935673e-05, "loss": 2.2919, "step": 4908500 }, { "epoch": 24.32, "learning_rate": 3.7843918292930645e-05, "loss": 2.2741, "step": 4909000 }, { "epoch": 24.32, "learning_rate": 3.784267970650456e-05, "loss": 2.3029, "step": 4909500 }, { "epoch": 24.33, "learning_rate": 3.784144112007848e-05, "loss": 2.2969, "step": 4910000 }, { "epoch": 24.33, "learning_rate": 3.784020501082525e-05, "loss": 2.2778, "step": 4910500 }, { "epoch": 24.33, "learning_rate": 3.7838966424399165e-05, "loss": 2.3072, "step": 4911000 }, { "epoch": 24.33, "learning_rate": 3.783772783797308e-05, "loss": 2.2775, "step": 4911500 }, { "epoch": 24.34, "learning_rate": 3.7836489251547e-05, "loss": 2.2819, "step": 4912000 }, { "epoch": 24.34, "learning_rate": 3.783525314229376e-05, "loss": 2.2748, "step": 4912500 }, { "epoch": 24.34, "learning_rate": 3.783401703304053e-05, "loss": 2.3047, "step": 4913000 }, { "epoch": 24.34, "learning_rate": 3.783277844661445e-05, "loss": 2.2986, "step": 4913500 }, { "epoch": 24.35, "learning_rate": 3.7831539860188364e-05, "loss": 2.3051, "step": 4914000 }, { "epoch": 24.35, "learning_rate": 3.783030127376228e-05, "loss": 2.2865, "step": 4914500 }, { "epoch": 24.35, "learning_rate": 3.78290626873362e-05, "loss": 2.3101, "step": 4915000 }, { "epoch": 24.35, "learning_rate": 3.7827824100910114e-05, "loss": 2.3132, "step": 4915500 }, { "epoch": 24.36, "learning_rate": 3.782658551448403e-05, "loss": 2.2878, "step": 4916000 }, { "epoch": 24.36, "learning_rate": 3.78253494052308e-05, "loss": 2.2744, "step": 4916500 }, { "epoch": 24.36, "learning_rate": 3.782411329597757e-05, "loss": 2.2969, "step": 4917000 }, { "epoch": 24.36, "learning_rate": 3.7822874709551486e-05, "loss": 2.2918, "step": 4917500 }, { "epoch": 24.37, "learning_rate": 3.7821638600298255e-05, "loss": 2.2919, "step": 4918000 }, { "epoch": 24.37, "learning_rate": 3.782040001387217e-05, "loss": 2.3031, "step": 4918500 }, { "epoch": 24.37, "learning_rate": 3.781916142744609e-05, "loss": 2.2955, "step": 4919000 }, { "epoch": 24.37, "learning_rate": 3.7817922841020005e-05, "loss": 2.2644, "step": 4919500 }, { "epoch": 24.38, "learning_rate": 3.781668425459392e-05, "loss": 2.3126, "step": 4920000 }, { "epoch": 24.38, "learning_rate": 3.781544566816784e-05, "loss": 2.2968, "step": 4920500 }, { "epoch": 24.38, "learning_rate": 3.7814207081741756e-05, "loss": 2.2943, "step": 4921000 }, { "epoch": 24.38, "learning_rate": 3.781296849531567e-05, "loss": 2.3009, "step": 4921500 }, { "epoch": 24.39, "learning_rate": 3.781172990888958e-05, "loss": 2.2804, "step": 4922000 }, { "epoch": 24.39, "learning_rate": 3.781049379963635e-05, "loss": 2.2773, "step": 4922500 }, { "epoch": 24.39, "learning_rate": 3.780925521321027e-05, "loss": 2.3084, "step": 4923000 }, { "epoch": 24.39, "learning_rate": 3.7808016626784186e-05, "loss": 2.3082, "step": 4923500 }, { "epoch": 24.4, "learning_rate": 3.78067780403581e-05, "loss": 2.2789, "step": 4924000 }, { "epoch": 24.4, "learning_rate": 3.780554193110487e-05, "loss": 2.2776, "step": 4924500 }, { "epoch": 24.4, "learning_rate": 3.780430334467879e-05, "loss": 2.2606, "step": 4925000 }, { "epoch": 24.4, "learning_rate": 3.7803064758252706e-05, "loss": 2.3084, "step": 4925500 }, { "epoch": 24.41, "learning_rate": 3.780182617182662e-05, "loss": 2.2744, "step": 4926000 }, { "epoch": 24.41, "learning_rate": 3.780058758540054e-05, "loss": 2.2805, "step": 4926500 }, { "epoch": 24.41, "learning_rate": 3.7799348998974456e-05, "loss": 2.2899, "step": 4927000 }, { "epoch": 24.41, "learning_rate": 3.779811041254837e-05, "loss": 2.2927, "step": 4927500 }, { "epoch": 24.42, "learning_rate": 3.7796874303295135e-05, "loss": 2.3032, "step": 4928000 }, { "epoch": 24.42, "learning_rate": 3.779563571686905e-05, "loss": 2.2916, "step": 4928500 }, { "epoch": 24.42, "learning_rate": 3.779439713044297e-05, "loss": 2.284, "step": 4929000 }, { "epoch": 24.42, "learning_rate": 3.7793158544016886e-05, "loss": 2.2702, "step": 4929500 }, { "epoch": 24.42, "learning_rate": 3.7791922434763655e-05, "loss": 2.2966, "step": 4930000 }, { "epoch": 24.43, "learning_rate": 3.779068384833757e-05, "loss": 2.2612, "step": 4930500 }, { "epoch": 24.43, "learning_rate": 3.778944526191149e-05, "loss": 2.304, "step": 4931000 }, { "epoch": 24.43, "learning_rate": 3.7788206675485406e-05, "loss": 2.2901, "step": 4931500 }, { "epoch": 24.43, "learning_rate": 3.778696808905932e-05, "loss": 2.2847, "step": 4932000 }, { "epoch": 24.44, "learning_rate": 3.778572950263324e-05, "loss": 2.2852, "step": 4932500 }, { "epoch": 24.44, "learning_rate": 3.7784490916207156e-05, "loss": 2.2935, "step": 4933000 }, { "epoch": 24.44, "learning_rate": 3.7783252329781073e-05, "loss": 2.2765, "step": 4933500 }, { "epoch": 24.44, "learning_rate": 3.778201374335499e-05, "loss": 2.3069, "step": 4934000 }, { "epoch": 24.45, "learning_rate": 3.77807751569289e-05, "loss": 2.3149, "step": 4934500 }, { "epoch": 24.45, "learning_rate": 3.777953657050282e-05, "loss": 2.3075, "step": 4935000 }, { "epoch": 24.45, "learning_rate": 3.7778297984076734e-05, "loss": 2.2755, "step": 4935500 }, { "epoch": 24.45, "learning_rate": 3.777705939765065e-05, "loss": 2.2864, "step": 4936000 }, { "epoch": 24.46, "learning_rate": 3.777582328839742e-05, "loss": 2.3105, "step": 4936500 }, { "epoch": 24.46, "learning_rate": 3.777458470197134e-05, "loss": 2.2645, "step": 4937000 }, { "epoch": 24.46, "learning_rate": 3.7773346115545254e-05, "loss": 2.2963, "step": 4937500 }, { "epoch": 24.46, "learning_rate": 3.7772107529119164e-05, "loss": 2.3063, "step": 4938000 }, { "epoch": 24.47, "learning_rate": 3.777086894269308e-05, "loss": 2.309, "step": 4938500 }, { "epoch": 24.47, "learning_rate": 3.7769630356267e-05, "loss": 2.309, "step": 4939000 }, { "epoch": 24.47, "learning_rate": 3.7768391769840915e-05, "loss": 2.2765, "step": 4939500 }, { "epoch": 24.47, "learning_rate": 3.776715318341483e-05, "loss": 2.2682, "step": 4940000 }, { "epoch": 24.48, "learning_rate": 3.776591459698875e-05, "loss": 2.2867, "step": 4940500 }, { "epoch": 24.48, "learning_rate": 3.7764676010562666e-05, "loss": 2.2626, "step": 4941000 }, { "epoch": 24.48, "learning_rate": 3.776343742413658e-05, "loss": 2.2847, "step": 4941500 }, { "epoch": 24.48, "learning_rate": 3.776220131488335e-05, "loss": 2.2891, "step": 4942000 }, { "epoch": 24.49, "learning_rate": 3.776096272845727e-05, "loss": 2.2844, "step": 4942500 }, { "epoch": 24.49, "learning_rate": 3.7759724142031185e-05, "loss": 2.2788, "step": 4943000 }, { "epoch": 24.49, "learning_rate": 3.7758488032777954e-05, "loss": 2.2986, "step": 4943500 }, { "epoch": 24.49, "learning_rate": 3.7757249446351864e-05, "loss": 2.2886, "step": 4944000 }, { "epoch": 24.5, "learning_rate": 3.775601085992578e-05, "loss": 2.3065, "step": 4944500 }, { "epoch": 24.5, "learning_rate": 3.77547722734997e-05, "loss": 2.2977, "step": 4945000 }, { "epoch": 24.5, "learning_rate": 3.7753536164246474e-05, "loss": 2.2616, "step": 4945500 }, { "epoch": 24.5, "learning_rate": 3.775229757782039e-05, "loss": 2.2727, "step": 4946000 }, { "epoch": 24.51, "learning_rate": 3.775105899139431e-05, "loss": 2.2846, "step": 4946500 }, { "epoch": 24.51, "learning_rate": 3.774982040496822e-05, "loss": 2.2827, "step": 4947000 }, { "epoch": 24.51, "learning_rate": 3.7748581818542135e-05, "loss": 2.2623, "step": 4947500 }, { "epoch": 24.51, "learning_rate": 3.774734323211605e-05, "loss": 2.3012, "step": 4948000 }, { "epoch": 24.52, "learning_rate": 3.774610712286282e-05, "loss": 2.2657, "step": 4948500 }, { "epoch": 24.52, "learning_rate": 3.774486853643674e-05, "loss": 2.279, "step": 4949000 }, { "epoch": 24.52, "learning_rate": 3.7743629950010654e-05, "loss": 2.3157, "step": 4949500 }, { "epoch": 24.52, "learning_rate": 3.774239136358457e-05, "loss": 2.2974, "step": 4950000 }, { "epoch": 24.53, "learning_rate": 3.774115277715848e-05, "loss": 2.2873, "step": 4950500 }, { "epoch": 24.53, "learning_rate": 3.773991666790526e-05, "loss": 2.2983, "step": 4951000 }, { "epoch": 24.53, "learning_rate": 3.7738678081479174e-05, "loss": 2.2926, "step": 4951500 }, { "epoch": 24.53, "learning_rate": 3.773743949505309e-05, "loss": 2.2734, "step": 4952000 }, { "epoch": 24.54, "learning_rate": 3.773620090862701e-05, "loss": 2.3017, "step": 4952500 }, { "epoch": 24.54, "learning_rate": 3.7734962322200925e-05, "loss": 2.3132, "step": 4953000 }, { "epoch": 24.54, "learning_rate": 3.7733723735774835e-05, "loss": 2.2891, "step": 4953500 }, { "epoch": 24.54, "learning_rate": 3.773248514934875e-05, "loss": 2.2889, "step": 4954000 }, { "epoch": 24.55, "learning_rate": 3.773124656292267e-05, "loss": 2.3073, "step": 4954500 }, { "epoch": 24.55, "learning_rate": 3.7730007976496585e-05, "loss": 2.2684, "step": 4955000 }, { "epoch": 24.55, "learning_rate": 3.7728774344416206e-05, "loss": 2.2905, "step": 4955500 }, { "epoch": 24.55, "learning_rate": 3.772753575799012e-05, "loss": 2.2836, "step": 4956000 }, { "epoch": 24.56, "learning_rate": 3.772629717156404e-05, "loss": 2.2936, "step": 4956500 }, { "epoch": 24.56, "learning_rate": 3.772505858513796e-05, "loss": 2.3092, "step": 4957000 }, { "epoch": 24.56, "learning_rate": 3.7723822475884726e-05, "loss": 2.2971, "step": 4957500 }, { "epoch": 24.56, "learning_rate": 3.772258388945864e-05, "loss": 2.2959, "step": 4958000 }, { "epoch": 24.57, "learning_rate": 3.772134530303256e-05, "loss": 2.2656, "step": 4958500 }, { "epoch": 24.57, "learning_rate": 3.7720106716606477e-05, "loss": 2.2685, "step": 4959000 }, { "epoch": 24.57, "learning_rate": 3.7718868130180393e-05, "loss": 2.2871, "step": 4959500 }, { "epoch": 24.57, "learning_rate": 3.7717629543754304e-05, "loss": 2.2768, "step": 4960000 }, { "epoch": 24.58, "learning_rate": 3.771639095732822e-05, "loss": 2.3032, "step": 4960500 }, { "epoch": 24.58, "learning_rate": 3.771515237090214e-05, "loss": 2.2941, "step": 4961000 }, { "epoch": 24.58, "learning_rate": 3.7713913784476054e-05, "loss": 2.2861, "step": 4961500 }, { "epoch": 24.58, "learning_rate": 3.771267519804997e-05, "loss": 2.2827, "step": 4962000 }, { "epoch": 24.59, "learning_rate": 3.771143661162389e-05, "loss": 2.2945, "step": 4962500 }, { "epoch": 24.59, "learning_rate": 3.77101980251978e-05, "loss": 2.292, "step": 4963000 }, { "epoch": 24.59, "learning_rate": 3.7708959438771715e-05, "loss": 2.3007, "step": 4963500 }, { "epoch": 24.59, "learning_rate": 3.770772085234563e-05, "loss": 2.2972, "step": 4964000 }, { "epoch": 24.6, "learning_rate": 3.770648226591955e-05, "loss": 2.2883, "step": 4964500 }, { "epoch": 24.6, "learning_rate": 3.7705246156666325e-05, "loss": 2.3001, "step": 4965000 }, { "epoch": 24.6, "learning_rate": 3.7704010047413094e-05, "loss": 2.2894, "step": 4965500 }, { "epoch": 24.6, "learning_rate": 3.770277146098701e-05, "loss": 2.2897, "step": 4966000 }, { "epoch": 24.61, "learning_rate": 3.770153287456093e-05, "loss": 2.267, "step": 4966500 }, { "epoch": 24.61, "learning_rate": 3.770029428813484e-05, "loss": 2.2917, "step": 4967000 }, { "epoch": 24.61, "learning_rate": 3.7699055701708755e-05, "loss": 2.3047, "step": 4967500 }, { "epoch": 24.61, "learning_rate": 3.769781711528267e-05, "loss": 2.2945, "step": 4968000 }, { "epoch": 24.62, "learning_rate": 3.769657852885659e-05, "loss": 2.2779, "step": 4968500 }, { "epoch": 24.62, "learning_rate": 3.769534241960336e-05, "loss": 2.2889, "step": 4969000 }, { "epoch": 24.62, "learning_rate": 3.7694103833177274e-05, "loss": 2.2736, "step": 4969500 }, { "epoch": 24.62, "learning_rate": 3.769286524675119e-05, "loss": 2.2839, "step": 4970000 }, { "epoch": 24.63, "learning_rate": 3.769162666032511e-05, "loss": 2.2858, "step": 4970500 }, { "epoch": 24.63, "learning_rate": 3.7690388073899025e-05, "loss": 2.2925, "step": 4971000 }, { "epoch": 24.63, "learning_rate": 3.768914948747294e-05, "loss": 2.2977, "step": 4971500 }, { "epoch": 24.63, "learning_rate": 3.768791090104685e-05, "loss": 2.2755, "step": 4972000 }, { "epoch": 24.64, "learning_rate": 3.768667231462077e-05, "loss": 2.2916, "step": 4972500 }, { "epoch": 24.64, "learning_rate": 3.7685433728194686e-05, "loss": 2.2846, "step": 4973000 }, { "epoch": 24.64, "learning_rate": 3.76841951417686e-05, "loss": 2.3258, "step": 4973500 }, { "epoch": 24.64, "learning_rate": 3.768295903251537e-05, "loss": 2.3021, "step": 4974000 }, { "epoch": 24.65, "learning_rate": 3.768172044608929e-05, "loss": 2.29, "step": 4974500 }, { "epoch": 24.65, "learning_rate": 3.7680481859663205e-05, "loss": 2.2926, "step": 4975000 }, { "epoch": 24.65, "learning_rate": 3.7679245750409974e-05, "loss": 2.3141, "step": 4975500 }, { "epoch": 24.65, "learning_rate": 3.767800716398389e-05, "loss": 2.3305, "step": 4976000 }, { "epoch": 24.66, "learning_rate": 3.767676857755781e-05, "loss": 2.298, "step": 4976500 }, { "epoch": 24.66, "learning_rate": 3.7675529991131725e-05, "loss": 2.2815, "step": 4977000 }, { "epoch": 24.66, "learning_rate": 3.767429140470564e-05, "loss": 2.2933, "step": 4977500 }, { "epoch": 24.66, "learning_rate": 3.767305281827956e-05, "loss": 2.2923, "step": 4978000 }, { "epoch": 24.67, "learning_rate": 3.767181423185347e-05, "loss": 2.2987, "step": 4978500 }, { "epoch": 24.67, "learning_rate": 3.7670575645427386e-05, "loss": 2.2733, "step": 4979000 }, { "epoch": 24.67, "learning_rate": 3.76693370590013e-05, "loss": 2.2781, "step": 4979500 }, { "epoch": 24.67, "learning_rate": 3.766809847257522e-05, "loss": 2.2993, "step": 4980000 }, { "epoch": 24.68, "learning_rate": 3.766685988614914e-05, "loss": 2.2971, "step": 4980500 }, { "epoch": 24.68, "learning_rate": 3.7665621299723054e-05, "loss": 2.2793, "step": 4981000 }, { "epoch": 24.68, "learning_rate": 3.766438519046982e-05, "loss": 2.3047, "step": 4981500 }, { "epoch": 24.68, "learning_rate": 3.766314908121659e-05, "loss": 2.2659, "step": 4982000 }, { "epoch": 24.69, "learning_rate": 3.766191049479051e-05, "loss": 2.2947, "step": 4982500 }, { "epoch": 24.69, "learning_rate": 3.7660671908364425e-05, "loss": 2.2981, "step": 4983000 }, { "epoch": 24.69, "learning_rate": 3.765943332193834e-05, "loss": 2.3044, "step": 4983500 }, { "epoch": 24.69, "learning_rate": 3.765819473551226e-05, "loss": 2.2545, "step": 4984000 }, { "epoch": 24.69, "learning_rate": 3.765695614908617e-05, "loss": 2.2985, "step": 4984500 }, { "epoch": 24.7, "learning_rate": 3.7655717562660086e-05, "loss": 2.2902, "step": 4985000 }, { "epoch": 24.7, "learning_rate": 3.765448145340686e-05, "loss": 2.2621, "step": 4985500 }, { "epoch": 24.7, "learning_rate": 3.765324286698078e-05, "loss": 2.2897, "step": 4986000 }, { "epoch": 24.7, "learning_rate": 3.7652004280554696e-05, "loss": 2.2919, "step": 4986500 }, { "epoch": 24.71, "learning_rate": 3.7650765694128606e-05, "loss": 2.2849, "step": 4987000 }, { "epoch": 24.71, "learning_rate": 3.764952710770252e-05, "loss": 2.2823, "step": 4987500 }, { "epoch": 24.71, "learning_rate": 3.764828852127644e-05, "loss": 2.3095, "step": 4988000 }, { "epoch": 24.71, "learning_rate": 3.764705241202321e-05, "loss": 2.3133, "step": 4988500 }, { "epoch": 24.72, "learning_rate": 3.7645813825597125e-05, "loss": 2.2789, "step": 4989000 }, { "epoch": 24.72, "learning_rate": 3.764457523917104e-05, "loss": 2.2912, "step": 4989500 }, { "epoch": 24.72, "learning_rate": 3.764333665274496e-05, "loss": 2.2882, "step": 4990000 }, { "epoch": 24.72, "learning_rate": 3.7642098066318876e-05, "loss": 2.2744, "step": 4990500 }, { "epoch": 24.73, "learning_rate": 3.7640859479892786e-05, "loss": 2.2954, "step": 4991000 }, { "epoch": 24.73, "learning_rate": 3.76396208934667e-05, "loss": 2.2657, "step": 4991500 }, { "epoch": 24.73, "learning_rate": 3.763838230704062e-05, "loss": 2.2872, "step": 4992000 }, { "epoch": 24.73, "learning_rate": 3.763714372061454e-05, "loss": 2.3018, "step": 4992500 }, { "epoch": 24.74, "learning_rate": 3.7635905134188454e-05, "loss": 2.276, "step": 4993000 }, { "epoch": 24.74, "learning_rate": 3.763466654776237e-05, "loss": 2.2895, "step": 4993500 }, { "epoch": 24.74, "learning_rate": 3.763342796133629e-05, "loss": 2.3095, "step": 4994000 }, { "epoch": 24.74, "learning_rate": 3.763219185208306e-05, "loss": 2.2876, "step": 4994500 }, { "epoch": 24.75, "learning_rate": 3.7630955742829825e-05, "loss": 2.3048, "step": 4995000 }, { "epoch": 24.75, "learning_rate": 3.762971715640374e-05, "loss": 2.296, "step": 4995500 }, { "epoch": 24.75, "learning_rate": 3.762847856997766e-05, "loss": 2.2919, "step": 4996000 }, { "epoch": 24.75, "learning_rate": 3.7627239983551576e-05, "loss": 2.2907, "step": 4996500 }, { "epoch": 24.76, "learning_rate": 3.7626001397125486e-05, "loss": 2.3036, "step": 4997000 }, { "epoch": 24.76, "learning_rate": 3.76247628106994e-05, "loss": 2.2775, "step": 4997500 }, { "epoch": 24.76, "learning_rate": 3.762352422427332e-05, "loss": 2.3023, "step": 4998000 }, { "epoch": 24.76, "learning_rate": 3.762228563784724e-05, "loss": 2.2945, "step": 4998500 }, { "epoch": 24.77, "learning_rate": 3.762104952859401e-05, "loss": 2.2875, "step": 4999000 }, { "epoch": 24.77, "learning_rate": 3.761981094216793e-05, "loss": 2.3163, "step": 4999500 }, { "epoch": 24.77, "learning_rate": 3.7618572355741847e-05, "loss": 2.3003, "step": 5000000 }, { "epoch": 24.77, "learning_rate": 3.761733376931576e-05, "loss": 2.2923, "step": 5000500 }, { "epoch": 24.78, "learning_rate": 3.7616095182889674e-05, "loss": 2.3032, "step": 5001000 }, { "epoch": 24.78, "learning_rate": 3.761485659646359e-05, "loss": 2.2881, "step": 5001500 }, { "epoch": 24.78, "learning_rate": 3.761361801003751e-05, "loss": 2.2852, "step": 5002000 }, { "epoch": 24.78, "learning_rate": 3.7612379423611424e-05, "loss": 2.2943, "step": 5002500 }, { "epoch": 24.79, "learning_rate": 3.7611140837185335e-05, "loss": 2.2967, "step": 5003000 }, { "epoch": 24.79, "learning_rate": 3.760990225075925e-05, "loss": 2.3114, "step": 5003500 }, { "epoch": 24.79, "learning_rate": 3.760866614150602e-05, "loss": 2.301, "step": 5004000 }, { "epoch": 24.79, "learning_rate": 3.7607430032252796e-05, "loss": 2.2613, "step": 5004500 }, { "epoch": 24.8, "learning_rate": 3.760619144582671e-05, "loss": 2.2814, "step": 5005000 }, { "epoch": 24.8, "learning_rate": 3.760495285940063e-05, "loss": 2.2881, "step": 5005500 }, { "epoch": 24.8, "learning_rate": 3.760371427297455e-05, "loss": 2.2897, "step": 5006000 }, { "epoch": 24.8, "learning_rate": 3.760247568654846e-05, "loss": 2.2766, "step": 5006500 }, { "epoch": 24.81, "learning_rate": 3.7601237100122374e-05, "loss": 2.2921, "step": 5007000 }, { "epoch": 24.81, "learning_rate": 3.759999851369629e-05, "loss": 2.3077, "step": 5007500 }, { "epoch": 24.81, "learning_rate": 3.759875992727021e-05, "loss": 2.2995, "step": 5008000 }, { "epoch": 24.81, "learning_rate": 3.7597521340844125e-05, "loss": 2.3194, "step": 5008500 }, { "epoch": 24.82, "learning_rate": 3.759628275441804e-05, "loss": 2.2753, "step": 5009000 }, { "epoch": 24.82, "learning_rate": 3.759504416799196e-05, "loss": 2.2873, "step": 5009500 }, { "epoch": 24.82, "learning_rate": 3.759380558156587e-05, "loss": 2.279, "step": 5010000 }, { "epoch": 24.82, "learning_rate": 3.759256947231264e-05, "loss": 2.2831, "step": 5010500 }, { "epoch": 24.83, "learning_rate": 3.7591330885886554e-05, "loss": 2.2996, "step": 5011000 }, { "epoch": 24.83, "learning_rate": 3.759009229946047e-05, "loss": 2.3105, "step": 5011500 }, { "epoch": 24.83, "learning_rate": 3.758885371303439e-05, "loss": 2.3228, "step": 5012000 }, { "epoch": 24.83, "learning_rate": 3.7587615126608305e-05, "loss": 2.3095, "step": 5012500 }, { "epoch": 24.84, "learning_rate": 3.7586379017355074e-05, "loss": 2.2962, "step": 5013000 }, { "epoch": 24.84, "learning_rate": 3.758514043092899e-05, "loss": 2.3076, "step": 5013500 }, { "epoch": 24.84, "learning_rate": 3.758390432167576e-05, "loss": 2.2853, "step": 5014000 }, { "epoch": 24.84, "learning_rate": 3.7582665735249677e-05, "loss": 2.2836, "step": 5014500 }, { "epoch": 24.85, "learning_rate": 3.7581427148823594e-05, "loss": 2.2983, "step": 5015000 }, { "epoch": 24.85, "learning_rate": 3.758018856239751e-05, "loss": 2.2958, "step": 5015500 }, { "epoch": 24.85, "learning_rate": 3.757894997597142e-05, "loss": 2.312, "step": 5016000 }, { "epoch": 24.85, "learning_rate": 3.757771138954534e-05, "loss": 2.2927, "step": 5016500 }, { "epoch": 24.86, "learning_rate": 3.7576472803119254e-05, "loss": 2.3107, "step": 5017000 }, { "epoch": 24.86, "learning_rate": 3.757523421669317e-05, "loss": 2.2668, "step": 5017500 }, { "epoch": 24.86, "learning_rate": 3.757399563026709e-05, "loss": 2.29, "step": 5018000 }, { "epoch": 24.86, "learning_rate": 3.7572757043841005e-05, "loss": 2.2804, "step": 5018500 }, { "epoch": 24.87, "learning_rate": 3.757151845741492e-05, "loss": 2.301, "step": 5019000 }, { "epoch": 24.87, "learning_rate": 3.757027987098884e-05, "loss": 2.2906, "step": 5019500 }, { "epoch": 24.87, "learning_rate": 3.756904376173561e-05, "loss": 2.264, "step": 5020000 }, { "epoch": 24.87, "learning_rate": 3.756780765248238e-05, "loss": 2.3048, "step": 5020500 }, { "epoch": 24.88, "learning_rate": 3.7566571543229146e-05, "loss": 2.2908, "step": 5021000 }, { "epoch": 24.88, "learning_rate": 3.756533295680306e-05, "loss": 2.31, "step": 5021500 }, { "epoch": 24.88, "learning_rate": 3.756409437037698e-05, "loss": 2.2986, "step": 5022000 }, { "epoch": 24.88, "learning_rate": 3.7562855783950896e-05, "loss": 2.2729, "step": 5022500 }, { "epoch": 24.89, "learning_rate": 3.756161719752481e-05, "loss": 2.2964, "step": 5023000 }, { "epoch": 24.89, "learning_rate": 3.756038108827158e-05, "loss": 2.2741, "step": 5023500 }, { "epoch": 24.89, "learning_rate": 3.75591425018455e-05, "loss": 2.315, "step": 5024000 }, { "epoch": 24.89, "learning_rate": 3.755790391541941e-05, "loss": 2.2977, "step": 5024500 }, { "epoch": 24.9, "learning_rate": 3.7556665328993326e-05, "loss": 2.292, "step": 5025000 }, { "epoch": 24.9, "learning_rate": 3.755542674256724e-05, "loss": 2.285, "step": 5025500 }, { "epoch": 24.9, "learning_rate": 3.755418815614116e-05, "loss": 2.3066, "step": 5026000 }, { "epoch": 24.9, "learning_rate": 3.755295204688793e-05, "loss": 2.2951, "step": 5026500 }, { "epoch": 24.91, "learning_rate": 3.7551713460461846e-05, "loss": 2.2866, "step": 5027000 }, { "epoch": 24.91, "learning_rate": 3.755047487403576e-05, "loss": 2.2931, "step": 5027500 }, { "epoch": 24.91, "learning_rate": 3.754923628760968e-05, "loss": 2.2865, "step": 5028000 }, { "epoch": 24.91, "learning_rate": 3.7547997701183596e-05, "loss": 2.2993, "step": 5028500 }, { "epoch": 24.92, "learning_rate": 3.7546761591930365e-05, "loss": 2.3104, "step": 5029000 }, { "epoch": 24.92, "learning_rate": 3.754552300550428e-05, "loss": 2.2981, "step": 5029500 }, { "epoch": 24.92, "learning_rate": 3.75442844190782e-05, "loss": 2.3134, "step": 5030000 }, { "epoch": 24.92, "learning_rate": 3.7543045832652116e-05, "loss": 2.3015, "step": 5030500 }, { "epoch": 24.93, "learning_rate": 3.7541807246226026e-05, "loss": 2.2976, "step": 5031000 }, { "epoch": 24.93, "learning_rate": 3.754056865979994e-05, "loss": 2.3091, "step": 5031500 }, { "epoch": 24.93, "learning_rate": 3.753933007337386e-05, "loss": 2.2988, "step": 5032000 }, { "epoch": 24.93, "learning_rate": 3.753809396412063e-05, "loss": 2.2839, "step": 5032500 }, { "epoch": 24.94, "learning_rate": 3.75368578548674e-05, "loss": 2.297, "step": 5033000 }, { "epoch": 24.94, "learning_rate": 3.7535619268441315e-05, "loss": 2.2998, "step": 5033500 }, { "epoch": 24.94, "learning_rate": 3.753438068201523e-05, "loss": 2.2997, "step": 5034000 }, { "epoch": 24.94, "learning_rate": 3.753314209558915e-05, "loss": 2.2748, "step": 5034500 }, { "epoch": 24.95, "learning_rate": 3.7531903509163065e-05, "loss": 2.3065, "step": 5035000 }, { "epoch": 24.95, "learning_rate": 3.753066492273698e-05, "loss": 2.3051, "step": 5035500 }, { "epoch": 24.95, "learning_rate": 3.75294263363109e-05, "loss": 2.2992, "step": 5036000 }, { "epoch": 24.95, "learning_rate": 3.7528187749884816e-05, "loss": 2.3059, "step": 5036500 }, { "epoch": 24.96, "learning_rate": 3.752694916345873e-05, "loss": 2.2977, "step": 5037000 }, { "epoch": 24.96, "learning_rate": 3.752571057703265e-05, "loss": 2.289, "step": 5037500 }, { "epoch": 24.96, "learning_rate": 3.752447199060656e-05, "loss": 2.2699, "step": 5038000 }, { "epoch": 24.96, "learning_rate": 3.752323340418048e-05, "loss": 2.2974, "step": 5038500 }, { "epoch": 24.96, "learning_rate": 3.7521994817754394e-05, "loss": 2.2897, "step": 5039000 }, { "epoch": 24.97, "learning_rate": 3.752075623132831e-05, "loss": 2.3178, "step": 5039500 }, { "epoch": 24.97, "learning_rate": 3.751951764490223e-05, "loss": 2.3062, "step": 5040000 }, { "epoch": 24.97, "learning_rate": 3.7518279058476145e-05, "loss": 2.3189, "step": 5040500 }, { "epoch": 24.97, "learning_rate": 3.7517040472050055e-05, "loss": 2.299, "step": 5041000 }, { "epoch": 24.98, "learning_rate": 3.751580188562397e-05, "loss": 2.3129, "step": 5041500 }, { "epoch": 24.98, "learning_rate": 3.751456329919789e-05, "loss": 2.3282, "step": 5042000 }, { "epoch": 24.98, "learning_rate": 3.7513324712771806e-05, "loss": 2.2736, "step": 5042500 }, { "epoch": 24.98, "learning_rate": 3.751208612634572e-05, "loss": 2.3151, "step": 5043000 }, { "epoch": 24.99, "learning_rate": 3.751085249426535e-05, "loss": 2.2894, "step": 5043500 }, { "epoch": 24.99, "learning_rate": 3.750961390783927e-05, "loss": 2.3101, "step": 5044000 }, { "epoch": 24.99, "learning_rate": 3.750837532141318e-05, "loss": 2.3174, "step": 5044500 }, { "epoch": 24.99, "learning_rate": 3.7507136734987094e-05, "loss": 2.311, "step": 5045000 }, { "epoch": 25.0, "learning_rate": 3.750589814856101e-05, "loss": 2.3085, "step": 5045500 }, { "epoch": 25.0, "learning_rate": 3.750465956213493e-05, "loss": 2.2712, "step": 5046000 }, { "epoch": 25.0, "eval_accuracy": 0.654987755169183, "eval_accuracy_mlm": 0.6101664086430171, "eval_accuracy_nsp": 0.8666530697092474, "eval_loss": 2.337043523788452, "eval_runtime": 145.9167, "eval_samples_per_second": 1747.291, "eval_steps_per_second": 72.809, "step": 5046075 }, { "epoch": 25.0, "learning_rate": 3.7503420975708845e-05, "loss": 2.2487, "step": 5046500 }, { "epoch": 25.0, "learning_rate": 3.7502184866455614e-05, "loss": 2.254, "step": 5047000 }, { "epoch": 25.01, "learning_rate": 3.750094628002953e-05, "loss": 2.2641, "step": 5047500 }, { "epoch": 25.01, "learning_rate": 3.74997101707763e-05, "loss": 2.2432, "step": 5048000 }, { "epoch": 25.01, "learning_rate": 3.7498471584350216e-05, "loss": 2.2432, "step": 5048500 }, { "epoch": 25.01, "learning_rate": 3.749723299792413e-05, "loss": 2.2643, "step": 5049000 }, { "epoch": 25.02, "learning_rate": 3.749599441149805e-05, "loss": 2.2568, "step": 5049500 }, { "epoch": 25.02, "learning_rate": 3.749475582507197e-05, "loss": 2.2875, "step": 5050000 }, { "epoch": 25.02, "learning_rate": 3.749351971581873e-05, "loss": 2.2394, "step": 5050500 }, { "epoch": 25.02, "learning_rate": 3.74922836065655e-05, "loss": 2.2771, "step": 5051000 }, { "epoch": 25.03, "learning_rate": 3.7491045020139415e-05, "loss": 2.3011, "step": 5051500 }, { "epoch": 25.03, "learning_rate": 3.748980643371333e-05, "loss": 2.2375, "step": 5052000 }, { "epoch": 25.03, "learning_rate": 3.748856784728725e-05, "loss": 2.2646, "step": 5052500 }, { "epoch": 25.03, "learning_rate": 3.7487329260861166e-05, "loss": 2.2578, "step": 5053000 }, { "epoch": 25.04, "learning_rate": 3.748609067443508e-05, "loss": 2.2628, "step": 5053500 }, { "epoch": 25.04, "learning_rate": 3.7484852088009e-05, "loss": 2.2763, "step": 5054000 }, { "epoch": 25.04, "learning_rate": 3.7483613501582917e-05, "loss": 2.2344, "step": 5054500 }, { "epoch": 25.04, "learning_rate": 3.7482374915156833e-05, "loss": 2.2395, "step": 5055000 }, { "epoch": 25.05, "learning_rate": 3.7481138805903595e-05, "loss": 2.2747, "step": 5055500 }, { "epoch": 25.05, "learning_rate": 3.747990021947751e-05, "loss": 2.2727, "step": 5056000 }, { "epoch": 25.05, "learning_rate": 3.747866411022429e-05, "loss": 2.2932, "step": 5056500 }, { "epoch": 25.05, "learning_rate": 3.7477425523798205e-05, "loss": 2.2713, "step": 5057000 }, { "epoch": 25.06, "learning_rate": 3.7476186937372115e-05, "loss": 2.2643, "step": 5057500 }, { "epoch": 25.06, "learning_rate": 3.747494835094603e-05, "loss": 2.2898, "step": 5058000 }, { "epoch": 25.06, "learning_rate": 3.747370976451995e-05, "loss": 2.2548, "step": 5058500 }, { "epoch": 25.06, "learning_rate": 3.7472471178093866e-05, "loss": 2.2443, "step": 5059000 }, { "epoch": 25.07, "learning_rate": 3.747123259166778e-05, "loss": 2.2565, "step": 5059500 }, { "epoch": 25.07, "learning_rate": 3.74699940052417e-05, "loss": 2.2639, "step": 5060000 }, { "epoch": 25.07, "learning_rate": 3.746875541881562e-05, "loss": 2.266, "step": 5060500 }, { "epoch": 25.07, "learning_rate": 3.7467516832389534e-05, "loss": 2.2691, "step": 5061000 }, { "epoch": 25.08, "learning_rate": 3.746627824596345e-05, "loss": 2.2544, "step": 5061500 }, { "epoch": 25.08, "learning_rate": 3.746504213671021e-05, "loss": 2.2926, "step": 5062000 }, { "epoch": 25.08, "learning_rate": 3.746380355028413e-05, "loss": 2.2788, "step": 5062500 }, { "epoch": 25.08, "learning_rate": 3.7462564963858046e-05, "loss": 2.2783, "step": 5063000 }, { "epoch": 25.09, "learning_rate": 3.7461328854604815e-05, "loss": 2.279, "step": 5063500 }, { "epoch": 25.09, "learning_rate": 3.746009274535159e-05, "loss": 2.3008, "step": 5064000 }, { "epoch": 25.09, "learning_rate": 3.745885415892551e-05, "loss": 2.2739, "step": 5064500 }, { "epoch": 25.09, "learning_rate": 3.7457615572499425e-05, "loss": 2.2622, "step": 5065000 }, { "epoch": 25.1, "learning_rate": 3.745637698607334e-05, "loss": 2.2654, "step": 5065500 }, { "epoch": 25.1, "learning_rate": 3.745513839964725e-05, "loss": 2.2805, "step": 5066000 }, { "epoch": 25.1, "learning_rate": 3.745389981322117e-05, "loss": 2.2743, "step": 5066500 }, { "epoch": 25.1, "learning_rate": 3.7452661226795086e-05, "loss": 2.2902, "step": 5067000 }, { "epoch": 25.11, "learning_rate": 3.7451422640369e-05, "loss": 2.2832, "step": 5067500 }, { "epoch": 25.11, "learning_rate": 3.745018405394292e-05, "loss": 2.2714, "step": 5068000 }, { "epoch": 25.11, "learning_rate": 3.744894546751683e-05, "loss": 2.2632, "step": 5068500 }, { "epoch": 25.11, "learning_rate": 3.7447706881090747e-05, "loss": 2.2622, "step": 5069000 }, { "epoch": 25.12, "learning_rate": 3.7446468294664663e-05, "loss": 2.2784, "step": 5069500 }, { "epoch": 25.12, "learning_rate": 3.744522970823858e-05, "loss": 2.257, "step": 5070000 }, { "epoch": 25.12, "learning_rate": 3.744399607615821e-05, "loss": 2.2936, "step": 5070500 }, { "epoch": 25.12, "learning_rate": 3.7442757489732125e-05, "loss": 2.2487, "step": 5071000 }, { "epoch": 25.13, "learning_rate": 3.744151890330604e-05, "loss": 2.281, "step": 5071500 }, { "epoch": 25.13, "learning_rate": 3.744028031687996e-05, "loss": 2.2606, "step": 5072000 }, { "epoch": 25.13, "learning_rate": 3.743904173045387e-05, "loss": 2.2643, "step": 5072500 }, { "epoch": 25.13, "learning_rate": 3.7437803144027786e-05, "loss": 2.2806, "step": 5073000 }, { "epoch": 25.14, "learning_rate": 3.74365645576017e-05, "loss": 2.2664, "step": 5073500 }, { "epoch": 25.14, "learning_rate": 3.743532844834847e-05, "loss": 2.261, "step": 5074000 }, { "epoch": 25.14, "learning_rate": 3.743408986192239e-05, "loss": 2.2754, "step": 5074500 }, { "epoch": 25.14, "learning_rate": 3.743285375266916e-05, "loss": 2.2802, "step": 5075000 }, { "epoch": 25.15, "learning_rate": 3.7431615166243074e-05, "loss": 2.2682, "step": 5075500 }, { "epoch": 25.15, "learning_rate": 3.743037657981699e-05, "loss": 2.2538, "step": 5076000 }, { "epoch": 25.15, "learning_rate": 3.742913799339091e-05, "loss": 2.2676, "step": 5076500 }, { "epoch": 25.15, "learning_rate": 3.7427899406964825e-05, "loss": 2.291, "step": 5077000 }, { "epoch": 25.16, "learning_rate": 3.742666082053874e-05, "loss": 2.2751, "step": 5077500 }, { "epoch": 25.16, "learning_rate": 3.742542223411266e-05, "loss": 2.2786, "step": 5078000 }, { "epoch": 25.16, "learning_rate": 3.742418612485942e-05, "loss": 2.2768, "step": 5078500 }, { "epoch": 25.16, "learning_rate": 3.742294753843334e-05, "loss": 2.2702, "step": 5079000 }, { "epoch": 25.17, "learning_rate": 3.7421708952007255e-05, "loss": 2.2698, "step": 5079500 }, { "epoch": 25.17, "learning_rate": 3.7420472842754023e-05, "loss": 2.2873, "step": 5080000 }, { "epoch": 25.17, "learning_rate": 3.741923425632794e-05, "loss": 2.2968, "step": 5080500 }, { "epoch": 25.17, "learning_rate": 3.741799566990186e-05, "loss": 2.2721, "step": 5081000 }, { "epoch": 25.18, "learning_rate": 3.7416757083475774e-05, "loss": 2.2607, "step": 5081500 }, { "epoch": 25.18, "learning_rate": 3.741551849704969e-05, "loss": 2.2801, "step": 5082000 }, { "epoch": 25.18, "learning_rate": 3.741427991062361e-05, "loss": 2.2657, "step": 5082500 }, { "epoch": 25.18, "learning_rate": 3.7413041324197525e-05, "loss": 2.2593, "step": 5083000 }, { "epoch": 25.19, "learning_rate": 3.741180273777144e-05, "loss": 2.2759, "step": 5083500 }, { "epoch": 25.19, "learning_rate": 3.741056415134536e-05, "loss": 2.2836, "step": 5084000 }, { "epoch": 25.19, "learning_rate": 3.7409325564919276e-05, "loss": 2.2825, "step": 5084500 }, { "epoch": 25.19, "learning_rate": 3.740808697849319e-05, "loss": 2.2724, "step": 5085000 }, { "epoch": 25.2, "learning_rate": 3.74068483920671e-05, "loss": 2.268, "step": 5085500 }, { "epoch": 25.2, "learning_rate": 3.740560980564102e-05, "loss": 2.2915, "step": 5086000 }, { "epoch": 25.2, "learning_rate": 3.740437121921494e-05, "loss": 2.2578, "step": 5086500 }, { "epoch": 25.2, "learning_rate": 3.7403132632788854e-05, "loss": 2.2763, "step": 5087000 }, { "epoch": 25.21, "learning_rate": 3.740189404636277e-05, "loss": 2.2847, "step": 5087500 }, { "epoch": 25.21, "learning_rate": 3.740065545993669e-05, "loss": 2.2892, "step": 5088000 }, { "epoch": 25.21, "learning_rate": 3.73994168735106e-05, "loss": 2.2638, "step": 5088500 }, { "epoch": 25.21, "learning_rate": 3.7398178287084515e-05, "loss": 2.2464, "step": 5089000 }, { "epoch": 25.22, "learning_rate": 3.739693970065843e-05, "loss": 2.2865, "step": 5089500 }, { "epoch": 25.22, "learning_rate": 3.739570111423235e-05, "loss": 2.2407, "step": 5090000 }, { "epoch": 25.22, "learning_rate": 3.7394462527806265e-05, "loss": 2.2847, "step": 5090500 }, { "epoch": 25.22, "learning_rate": 3.739322394138018e-05, "loss": 2.2936, "step": 5091000 }, { "epoch": 25.23, "learning_rate": 3.73919853549541e-05, "loss": 2.2751, "step": 5091500 }, { "epoch": 25.23, "learning_rate": 3.7390746768528016e-05, "loss": 2.2734, "step": 5092000 }, { "epoch": 25.23, "learning_rate": 3.738951313644764e-05, "loss": 2.2856, "step": 5092500 }, { "epoch": 25.23, "learning_rate": 3.7388274550021554e-05, "loss": 2.2731, "step": 5093000 }, { "epoch": 25.23, "learning_rate": 3.738703596359547e-05, "loss": 2.2916, "step": 5093500 }, { "epoch": 25.24, "learning_rate": 3.738579737716939e-05, "loss": 2.2879, "step": 5094000 }, { "epoch": 25.24, "learning_rate": 3.7384561267916156e-05, "loss": 2.2828, "step": 5094500 }, { "epoch": 25.24, "learning_rate": 3.7383325158662925e-05, "loss": 2.2666, "step": 5095000 }, { "epoch": 25.24, "learning_rate": 3.738208657223684e-05, "loss": 2.2738, "step": 5095500 }, { "epoch": 25.25, "learning_rate": 3.738084798581076e-05, "loss": 2.2947, "step": 5096000 }, { "epoch": 25.25, "learning_rate": 3.7379609399384676e-05, "loss": 2.2574, "step": 5096500 }, { "epoch": 25.25, "learning_rate": 3.737837081295859e-05, "loss": 2.2668, "step": 5097000 }, { "epoch": 25.25, "learning_rate": 3.737713222653251e-05, "loss": 2.2884, "step": 5097500 }, { "epoch": 25.26, "learning_rate": 3.737589364010642e-05, "loss": 2.2407, "step": 5098000 }, { "epoch": 25.26, "learning_rate": 3.737465505368034e-05, "loss": 2.2656, "step": 5098500 }, { "epoch": 25.26, "learning_rate": 3.7373416467254254e-05, "loss": 2.2799, "step": 5099000 }, { "epoch": 25.26, "learning_rate": 3.737217788082817e-05, "loss": 2.2688, "step": 5099500 }, { "epoch": 25.27, "learning_rate": 3.737093929440209e-05, "loss": 2.284, "step": 5100000 }, { "epoch": 25.27, "learning_rate": 3.7369703185148857e-05, "loss": 2.2631, "step": 5100500 }, { "epoch": 25.27, "learning_rate": 3.736846459872277e-05, "loss": 2.3051, "step": 5101000 }, { "epoch": 25.27, "learning_rate": 3.7367226012296684e-05, "loss": 2.2549, "step": 5101500 }, { "epoch": 25.28, "learning_rate": 3.73659874258706e-05, "loss": 2.2622, "step": 5102000 }, { "epoch": 25.28, "learning_rate": 3.736474883944452e-05, "loss": 2.2823, "step": 5102500 }, { "epoch": 25.28, "learning_rate": 3.7363510253018434e-05, "loss": 2.2786, "step": 5103000 }, { "epoch": 25.28, "learning_rate": 3.736227166659235e-05, "loss": 2.3078, "step": 5103500 }, { "epoch": 25.29, "learning_rate": 3.736103803451197e-05, "loss": 2.3087, "step": 5104000 }, { "epoch": 25.29, "learning_rate": 3.735980192525874e-05, "loss": 2.2929, "step": 5104500 }, { "epoch": 25.29, "learning_rate": 3.735856333883266e-05, "loss": 2.2852, "step": 5105000 }, { "epoch": 25.29, "learning_rate": 3.7357324752406575e-05, "loss": 2.2776, "step": 5105500 }, { "epoch": 25.3, "learning_rate": 3.735608616598049e-05, "loss": 2.2995, "step": 5106000 }, { "epoch": 25.3, "learning_rate": 3.735484757955441e-05, "loss": 2.2672, "step": 5106500 }, { "epoch": 25.3, "learning_rate": 3.7353608993128326e-05, "loss": 2.2895, "step": 5107000 }, { "epoch": 25.3, "learning_rate": 3.735237040670224e-05, "loss": 2.277, "step": 5107500 }, { "epoch": 25.31, "learning_rate": 3.735113182027616e-05, "loss": 2.275, "step": 5108000 }, { "epoch": 25.31, "learning_rate": 3.7349893233850076e-05, "loss": 2.2546, "step": 5108500 }, { "epoch": 25.31, "learning_rate": 3.734865464742399e-05, "loss": 2.295, "step": 5109000 }, { "epoch": 25.31, "learning_rate": 3.7347418538170755e-05, "loss": 2.2752, "step": 5109500 }, { "epoch": 25.32, "learning_rate": 3.734617995174467e-05, "loss": 2.2777, "step": 5110000 }, { "epoch": 25.32, "learning_rate": 3.734494136531859e-05, "loss": 2.2843, "step": 5110500 }, { "epoch": 25.32, "learning_rate": 3.7343702778892506e-05, "loss": 2.2714, "step": 5111000 }, { "epoch": 25.32, "learning_rate": 3.734246419246642e-05, "loss": 2.2732, "step": 5111500 }, { "epoch": 25.33, "learning_rate": 3.734122560604034e-05, "loss": 2.3063, "step": 5112000 }, { "epoch": 25.33, "learning_rate": 3.733998701961426e-05, "loss": 2.2975, "step": 5112500 }, { "epoch": 25.33, "learning_rate": 3.7338748433188174e-05, "loss": 2.291, "step": 5113000 }, { "epoch": 25.33, "learning_rate": 3.7337509846762084e-05, "loss": 2.2564, "step": 5113500 }, { "epoch": 25.34, "learning_rate": 3.7336271260336e-05, "loss": 2.2872, "step": 5114000 }, { "epoch": 25.34, "learning_rate": 3.733503267390992e-05, "loss": 2.2867, "step": 5114500 }, { "epoch": 25.34, "learning_rate": 3.7333794087483835e-05, "loss": 2.243, "step": 5115000 }, { "epoch": 25.34, "learning_rate": 3.733255797823061e-05, "loss": 2.2655, "step": 5115500 }, { "epoch": 25.35, "learning_rate": 3.733131939180453e-05, "loss": 2.2926, "step": 5116000 }, { "epoch": 25.35, "learning_rate": 3.733008080537844e-05, "loss": 2.2715, "step": 5116500 }, { "epoch": 25.35, "learning_rate": 3.7328842218952354e-05, "loss": 2.2757, "step": 5117000 }, { "epoch": 25.35, "learning_rate": 3.732760363252627e-05, "loss": 2.2806, "step": 5117500 }, { "epoch": 25.36, "learning_rate": 3.732636752327304e-05, "loss": 2.2805, "step": 5118000 }, { "epoch": 25.36, "learning_rate": 3.732513141401981e-05, "loss": 2.2542, "step": 5118500 }, { "epoch": 25.36, "learning_rate": 3.7323892827593726e-05, "loss": 2.277, "step": 5119000 }, { "epoch": 25.36, "learning_rate": 3.732265424116764e-05, "loss": 2.2745, "step": 5119500 }, { "epoch": 25.37, "learning_rate": 3.732141565474156e-05, "loss": 2.2858, "step": 5120000 }, { "epoch": 25.37, "learning_rate": 3.7320177068315477e-05, "loss": 2.2666, "step": 5120500 }, { "epoch": 25.37, "learning_rate": 3.7318938481889393e-05, "loss": 2.2903, "step": 5121000 }, { "epoch": 25.37, "learning_rate": 3.731769989546331e-05, "loss": 2.2642, "step": 5121500 }, { "epoch": 25.38, "learning_rate": 3.731646130903723e-05, "loss": 2.2661, "step": 5122000 }, { "epoch": 25.38, "learning_rate": 3.7315222722611144e-05, "loss": 2.2305, "step": 5122500 }, { "epoch": 25.38, "learning_rate": 3.7313984136185054e-05, "loss": 2.2755, "step": 5123000 }, { "epoch": 25.38, "learning_rate": 3.731274554975897e-05, "loss": 2.2832, "step": 5123500 }, { "epoch": 25.39, "learning_rate": 3.731150696333289e-05, "loss": 2.2592, "step": 5124000 }, { "epoch": 25.39, "learning_rate": 3.7310268376906805e-05, "loss": 2.2704, "step": 5124500 }, { "epoch": 25.39, "learning_rate": 3.7309032267653574e-05, "loss": 2.2827, "step": 5125000 }, { "epoch": 25.39, "learning_rate": 3.730779368122749e-05, "loss": 2.2715, "step": 5125500 }, { "epoch": 25.4, "learning_rate": 3.73065550948014e-05, "loss": 2.2773, "step": 5126000 }, { "epoch": 25.4, "learning_rate": 3.730531650837532e-05, "loss": 2.2667, "step": 5126500 }, { "epoch": 25.4, "learning_rate": 3.7304080399122094e-05, "loss": 2.3061, "step": 5127000 }, { "epoch": 25.4, "learning_rate": 3.730284428986886e-05, "loss": 2.2737, "step": 5127500 }, { "epoch": 25.41, "learning_rate": 3.730160570344278e-05, "loss": 2.2715, "step": 5128000 }, { "epoch": 25.41, "learning_rate": 3.7300367117016696e-05, "loss": 2.2898, "step": 5128500 }, { "epoch": 25.41, "learning_rate": 3.729912853059061e-05, "loss": 2.2592, "step": 5129000 }, { "epoch": 25.41, "learning_rate": 3.729788994416453e-05, "loss": 2.2816, "step": 5129500 }, { "epoch": 25.42, "learning_rate": 3.729665135773844e-05, "loss": 2.2692, "step": 5130000 }, { "epoch": 25.42, "learning_rate": 3.729541277131236e-05, "loss": 2.2715, "step": 5130500 }, { "epoch": 25.42, "learning_rate": 3.7294174184886274e-05, "loss": 2.28, "step": 5131000 }, { "epoch": 25.42, "learning_rate": 3.729293807563304e-05, "loss": 2.2883, "step": 5131500 }, { "epoch": 25.43, "learning_rate": 3.729169948920696e-05, "loss": 2.2569, "step": 5132000 }, { "epoch": 25.43, "learning_rate": 3.729046337995373e-05, "loss": 2.2704, "step": 5132500 }, { "epoch": 25.43, "learning_rate": 3.7289224793527646e-05, "loss": 2.279, "step": 5133000 }, { "epoch": 25.43, "learning_rate": 3.728798620710156e-05, "loss": 2.2866, "step": 5133500 }, { "epoch": 25.44, "learning_rate": 3.728674762067548e-05, "loss": 2.2773, "step": 5134000 }, { "epoch": 25.44, "learning_rate": 3.7285509034249396e-05, "loss": 2.3014, "step": 5134500 }, { "epoch": 25.44, "learning_rate": 3.728427044782331e-05, "loss": 2.269, "step": 5135000 }, { "epoch": 25.44, "learning_rate": 3.728303186139723e-05, "loss": 2.2995, "step": 5135500 }, { "epoch": 25.45, "learning_rate": 3.728179327497115e-05, "loss": 2.2661, "step": 5136000 }, { "epoch": 25.45, "learning_rate": 3.7280554688545064e-05, "loss": 2.2761, "step": 5136500 }, { "epoch": 25.45, "learning_rate": 3.7279316102118974e-05, "loss": 2.2874, "step": 5137000 }, { "epoch": 25.45, "learning_rate": 3.727807751569289e-05, "loss": 2.2651, "step": 5137500 }, { "epoch": 25.46, "learning_rate": 3.727683892926681e-05, "loss": 2.2692, "step": 5138000 }, { "epoch": 25.46, "learning_rate": 3.7275600342840725e-05, "loss": 2.2918, "step": 5138500 }, { "epoch": 25.46, "learning_rate": 3.7274364233587494e-05, "loss": 2.288, "step": 5139000 }, { "epoch": 25.46, "learning_rate": 3.727312564716141e-05, "loss": 2.2863, "step": 5139500 }, { "epoch": 25.47, "learning_rate": 3.727188706073533e-05, "loss": 2.2988, "step": 5140000 }, { "epoch": 25.47, "learning_rate": 3.7270648474309245e-05, "loss": 2.2731, "step": 5140500 }, { "epoch": 25.47, "learning_rate": 3.726940988788316e-05, "loss": 2.2773, "step": 5141000 }, { "epoch": 25.47, "learning_rate": 3.726817130145707e-05, "loss": 2.2942, "step": 5141500 }, { "epoch": 25.48, "learning_rate": 3.726693519220385e-05, "loss": 2.2827, "step": 5142000 }, { "epoch": 25.48, "learning_rate": 3.7265696605777764e-05, "loss": 2.2954, "step": 5142500 }, { "epoch": 25.48, "learning_rate": 3.726445801935168e-05, "loss": 2.2909, "step": 5143000 }, { "epoch": 25.48, "learning_rate": 3.726322191009844e-05, "loss": 2.3019, "step": 5143500 }, { "epoch": 25.49, "learning_rate": 3.726198332367236e-05, "loss": 2.2943, "step": 5144000 }, { "epoch": 25.49, "learning_rate": 3.726074473724628e-05, "loss": 2.2736, "step": 5144500 }, { "epoch": 25.49, "learning_rate": 3.7259506150820194e-05, "loss": 2.2876, "step": 5145000 }, { "epoch": 25.49, "learning_rate": 3.725826756439411e-05, "loss": 2.2856, "step": 5145500 }, { "epoch": 25.5, "learning_rate": 3.725702897796803e-05, "loss": 2.272, "step": 5146000 }, { "epoch": 25.5, "learning_rate": 3.7255790391541945e-05, "loss": 2.2921, "step": 5146500 }, { "epoch": 25.5, "learning_rate": 3.725455180511586e-05, "loss": 2.2833, "step": 5147000 }, { "epoch": 25.5, "learning_rate": 3.725331321868978e-05, "loss": 2.2587, "step": 5147500 }, { "epoch": 25.5, "learning_rate": 3.725207463226369e-05, "loss": 2.2883, "step": 5148000 }, { "epoch": 25.51, "learning_rate": 3.7250836045837606e-05, "loss": 2.2676, "step": 5148500 }, { "epoch": 25.51, "learning_rate": 3.724959993658438e-05, "loss": 2.2797, "step": 5149000 }, { "epoch": 25.51, "learning_rate": 3.72483613501583e-05, "loss": 2.2961, "step": 5149500 }, { "epoch": 25.51, "learning_rate": 3.724712524090506e-05, "loss": 2.2984, "step": 5150000 }, { "epoch": 25.52, "learning_rate": 3.724588665447898e-05, "loss": 2.2816, "step": 5150500 }, { "epoch": 25.52, "learning_rate": 3.7244648068052894e-05, "loss": 2.2632, "step": 5151000 }, { "epoch": 25.52, "learning_rate": 3.724340948162681e-05, "loss": 2.2847, "step": 5151500 }, { "epoch": 25.52, "learning_rate": 3.724217089520073e-05, "loss": 2.2749, "step": 5152000 }, { "epoch": 25.53, "learning_rate": 3.7240932308774645e-05, "loss": 2.3042, "step": 5152500 }, { "epoch": 25.53, "learning_rate": 3.723969372234856e-05, "loss": 2.2551, "step": 5153000 }, { "epoch": 25.53, "learning_rate": 3.723845513592248e-05, "loss": 2.2692, "step": 5153500 }, { "epoch": 25.53, "learning_rate": 3.723721654949639e-05, "loss": 2.2742, "step": 5154000 }, { "epoch": 25.54, "learning_rate": 3.7235980440243164e-05, "loss": 2.2607, "step": 5154500 }, { "epoch": 25.54, "learning_rate": 3.7234744330989927e-05, "loss": 2.3025, "step": 5155000 }, { "epoch": 25.54, "learning_rate": 3.7233505744563843e-05, "loss": 2.2741, "step": 5155500 }, { "epoch": 25.54, "learning_rate": 3.723226715813776e-05, "loss": 2.2887, "step": 5156000 }, { "epoch": 25.55, "learning_rate": 3.723102857171168e-05, "loss": 2.2849, "step": 5156500 }, { "epoch": 25.55, "learning_rate": 3.7229789985285594e-05, "loss": 2.2911, "step": 5157000 }, { "epoch": 25.55, "learning_rate": 3.722855139885951e-05, "loss": 2.266, "step": 5157500 }, { "epoch": 25.55, "learning_rate": 3.722731281243343e-05, "loss": 2.2631, "step": 5158000 }, { "epoch": 25.56, "learning_rate": 3.7226074226007345e-05, "loss": 2.2779, "step": 5158500 }, { "epoch": 25.56, "learning_rate": 3.722483563958126e-05, "loss": 2.2959, "step": 5159000 }, { "epoch": 25.56, "learning_rate": 3.722359705315518e-05, "loss": 2.306, "step": 5159500 }, { "epoch": 25.56, "learning_rate": 3.7222358466729096e-05, "loss": 2.2844, "step": 5160000 }, { "epoch": 25.57, "learning_rate": 3.7221119880303006e-05, "loss": 2.2897, "step": 5160500 }, { "epoch": 25.57, "learning_rate": 3.721988377104978e-05, "loss": 2.2778, "step": 5161000 }, { "epoch": 25.57, "learning_rate": 3.7218647661796544e-05, "loss": 2.2916, "step": 5161500 }, { "epoch": 25.57, "learning_rate": 3.721740907537046e-05, "loss": 2.2833, "step": 5162000 }, { "epoch": 25.58, "learning_rate": 3.721617048894438e-05, "loss": 2.2951, "step": 5162500 }, { "epoch": 25.58, "learning_rate": 3.7214931902518294e-05, "loss": 2.273, "step": 5163000 }, { "epoch": 25.58, "learning_rate": 3.721369331609221e-05, "loss": 2.2695, "step": 5163500 }, { "epoch": 25.58, "learning_rate": 3.721245720683898e-05, "loss": 2.2833, "step": 5164000 }, { "epoch": 25.59, "learning_rate": 3.72112186204129e-05, "loss": 2.2957, "step": 5164500 }, { "epoch": 25.59, "learning_rate": 3.7209980033986814e-05, "loss": 2.2693, "step": 5165000 }, { "epoch": 25.59, "learning_rate": 3.720874144756073e-05, "loss": 2.2783, "step": 5165500 }, { "epoch": 25.59, "learning_rate": 3.720750286113465e-05, "loss": 2.2865, "step": 5166000 }, { "epoch": 25.6, "learning_rate": 3.7206264274708565e-05, "loss": 2.2855, "step": 5166500 }, { "epoch": 25.6, "learning_rate": 3.720502568828248e-05, "loss": 2.2972, "step": 5167000 }, { "epoch": 25.6, "learning_rate": 3.72037871018564e-05, "loss": 2.2696, "step": 5167500 }, { "epoch": 25.6, "learning_rate": 3.720255099260316e-05, "loss": 2.2739, "step": 5168000 }, { "epoch": 25.61, "learning_rate": 3.720131488334993e-05, "loss": 2.2988, "step": 5168500 }, { "epoch": 25.61, "learning_rate": 3.7200078774096705e-05, "loss": 2.2543, "step": 5169000 }, { "epoch": 25.61, "learning_rate": 3.719884018767062e-05, "loss": 2.2768, "step": 5169500 }, { "epoch": 25.61, "learning_rate": 3.719760160124454e-05, "loss": 2.2944, "step": 5170000 }, { "epoch": 25.62, "learning_rate": 3.719636301481845e-05, "loss": 2.2929, "step": 5170500 }, { "epoch": 25.62, "learning_rate": 3.7195124428392366e-05, "loss": 2.273, "step": 5171000 }, { "epoch": 25.62, "learning_rate": 3.719388584196628e-05, "loss": 2.2615, "step": 5171500 }, { "epoch": 25.62, "learning_rate": 3.71926472555402e-05, "loss": 2.2707, "step": 5172000 }, { "epoch": 25.63, "learning_rate": 3.719140866911412e-05, "loss": 2.288, "step": 5172500 }, { "epoch": 25.63, "learning_rate": 3.7190170082688034e-05, "loss": 2.2828, "step": 5173000 }, { "epoch": 25.63, "learning_rate": 3.718893149626195e-05, "loss": 2.2858, "step": 5173500 }, { "epoch": 25.63, "learning_rate": 3.718769290983586e-05, "loss": 2.2594, "step": 5174000 }, { "epoch": 25.64, "learning_rate": 3.718645432340978e-05, "loss": 2.2852, "step": 5174500 }, { "epoch": 25.64, "learning_rate": 3.7185215736983695e-05, "loss": 2.2674, "step": 5175000 }, { "epoch": 25.64, "learning_rate": 3.718398210490332e-05, "loss": 2.2819, "step": 5175500 }, { "epoch": 25.64, "learning_rate": 3.718274351847724e-05, "loss": 2.2753, "step": 5176000 }, { "epoch": 25.65, "learning_rate": 3.718150493205115e-05, "loss": 2.2723, "step": 5176500 }, { "epoch": 25.65, "learning_rate": 3.7180266345625066e-05, "loss": 2.2838, "step": 5177000 }, { "epoch": 25.65, "learning_rate": 3.717902775919898e-05, "loss": 2.2957, "step": 5177500 }, { "epoch": 25.65, "learning_rate": 3.717779164994575e-05, "loss": 2.2762, "step": 5178000 }, { "epoch": 25.66, "learning_rate": 3.717655306351967e-05, "loss": 2.2829, "step": 5178500 }, { "epoch": 25.66, "learning_rate": 3.7175314477093586e-05, "loss": 2.2563, "step": 5179000 }, { "epoch": 25.66, "learning_rate": 3.71740758906675e-05, "loss": 2.2887, "step": 5179500 }, { "epoch": 25.66, "learning_rate": 3.717283730424141e-05, "loss": 2.283, "step": 5180000 }, { "epoch": 25.67, "learning_rate": 3.717160119498819e-05, "loss": 2.2762, "step": 5180500 }, { "epoch": 25.67, "learning_rate": 3.7170362608562105e-05, "loss": 2.2929, "step": 5181000 }, { "epoch": 25.67, "learning_rate": 3.716912649930887e-05, "loss": 2.2826, "step": 5181500 }, { "epoch": 25.67, "learning_rate": 3.7167887912882784e-05, "loss": 2.3166, "step": 5182000 }, { "epoch": 25.68, "learning_rate": 3.71666493264567e-05, "loss": 2.2852, "step": 5182500 }, { "epoch": 25.68, "learning_rate": 3.716541074003062e-05, "loss": 2.2891, "step": 5183000 }, { "epoch": 25.68, "learning_rate": 3.7164172153604535e-05, "loss": 2.2888, "step": 5183500 }, { "epoch": 25.68, "learning_rate": 3.7162936044351304e-05, "loss": 2.2808, "step": 5184000 }, { "epoch": 25.69, "learning_rate": 3.716169745792522e-05, "loss": 2.2693, "step": 5184500 }, { "epoch": 25.69, "learning_rate": 3.716046134867199e-05, "loss": 2.2618, "step": 5185000 }, { "epoch": 25.69, "learning_rate": 3.7159222762245906e-05, "loss": 2.2966, "step": 5185500 }, { "epoch": 25.69, "learning_rate": 3.7157984175819823e-05, "loss": 2.2962, "step": 5186000 }, { "epoch": 25.7, "learning_rate": 3.715674558939374e-05, "loss": 2.2637, "step": 5186500 }, { "epoch": 25.7, "learning_rate": 3.715550700296766e-05, "loss": 2.3125, "step": 5187000 }, { "epoch": 25.7, "learning_rate": 3.715427089371442e-05, "loss": 2.2799, "step": 5187500 }, { "epoch": 25.7, "learning_rate": 3.7153032307288336e-05, "loss": 2.3013, "step": 5188000 }, { "epoch": 25.71, "learning_rate": 3.715179372086225e-05, "loss": 2.2987, "step": 5188500 }, { "epoch": 25.71, "learning_rate": 3.715055513443617e-05, "loss": 2.3231, "step": 5189000 }, { "epoch": 25.71, "learning_rate": 3.714931654801009e-05, "loss": 2.2836, "step": 5189500 }, { "epoch": 25.71, "learning_rate": 3.7148077961584004e-05, "loss": 2.2828, "step": 5190000 }, { "epoch": 25.72, "learning_rate": 3.7146844329503625e-05, "loss": 2.2943, "step": 5190500 }, { "epoch": 25.72, "learning_rate": 3.714560574307754e-05, "loss": 2.2569, "step": 5191000 }, { "epoch": 25.72, "learning_rate": 3.714436715665146e-05, "loss": 2.2996, "step": 5191500 }, { "epoch": 25.72, "learning_rate": 3.7143128570225375e-05, "loss": 2.2977, "step": 5192000 }, { "epoch": 25.73, "learning_rate": 3.714188998379929e-05, "loss": 2.2883, "step": 5192500 }, { "epoch": 25.73, "learning_rate": 3.71406513973732e-05, "loss": 2.2551, "step": 5193000 }, { "epoch": 25.73, "learning_rate": 3.713941281094712e-05, "loss": 2.2708, "step": 5193500 }, { "epoch": 25.73, "learning_rate": 3.7138174224521036e-05, "loss": 2.2823, "step": 5194000 }, { "epoch": 25.74, "learning_rate": 3.713693563809495e-05, "loss": 2.274, "step": 5194500 }, { "epoch": 25.74, "learning_rate": 3.713569705166887e-05, "loss": 2.2976, "step": 5195000 }, { "epoch": 25.74, "learning_rate": 3.713445846524279e-05, "loss": 2.2881, "step": 5195500 }, { "epoch": 25.74, "learning_rate": 3.7133219878816704e-05, "loss": 2.2842, "step": 5196000 }, { "epoch": 25.75, "learning_rate": 3.713198129239062e-05, "loss": 2.2757, "step": 5196500 }, { "epoch": 25.75, "learning_rate": 3.713074270596454e-05, "loss": 2.2781, "step": 5197000 }, { "epoch": 25.75, "learning_rate": 3.7129504119538455e-05, "loss": 2.2648, "step": 5197500 }, { "epoch": 25.75, "learning_rate": 3.712826553311237e-05, "loss": 2.2698, "step": 5198000 }, { "epoch": 25.76, "learning_rate": 3.712702942385914e-05, "loss": 2.2989, "step": 5198500 }, { "epoch": 25.76, "learning_rate": 3.712579083743306e-05, "loss": 2.2976, "step": 5199000 }, { "epoch": 25.76, "learning_rate": 3.7124552251006974e-05, "loss": 2.2934, "step": 5199500 }, { "epoch": 25.76, "learning_rate": 3.712331366458089e-05, "loss": 2.2713, "step": 5200000 }, { "epoch": 25.77, "learning_rate": 3.7122077555327653e-05, "loss": 2.3012, "step": 5200500 }, { "epoch": 25.77, "learning_rate": 3.712083896890157e-05, "loss": 2.2833, "step": 5201000 }, { "epoch": 25.77, "learning_rate": 3.711960038247549e-05, "loss": 2.2911, "step": 5201500 }, { "epoch": 25.77, "learning_rate": 3.7118361796049404e-05, "loss": 2.2916, "step": 5202000 }, { "epoch": 25.77, "learning_rate": 3.711712320962332e-05, "loss": 2.2596, "step": 5202500 }, { "epoch": 25.78, "learning_rate": 3.711588710037009e-05, "loss": 2.2809, "step": 5203000 }, { "epoch": 25.78, "learning_rate": 3.711464851394401e-05, "loss": 2.3006, "step": 5203500 }, { "epoch": 25.78, "learning_rate": 3.7113409927517924e-05, "loss": 2.2742, "step": 5204000 }, { "epoch": 25.78, "learning_rate": 3.711217134109184e-05, "loss": 2.2681, "step": 5204500 }, { "epoch": 25.79, "learning_rate": 3.711093275466576e-05, "loss": 2.3087, "step": 5205000 }, { "epoch": 25.79, "learning_rate": 3.7109694168239675e-05, "loss": 2.3044, "step": 5205500 }, { "epoch": 25.79, "learning_rate": 3.710845558181359e-05, "loss": 2.277, "step": 5206000 }, { "epoch": 25.79, "learning_rate": 3.710721699538751e-05, "loss": 2.2873, "step": 5206500 }, { "epoch": 25.8, "learning_rate": 3.710598088613427e-05, "loss": 2.2831, "step": 5207000 }, { "epoch": 25.8, "learning_rate": 3.710474229970819e-05, "loss": 2.2702, "step": 5207500 }, { "epoch": 25.8, "learning_rate": 3.710350619045496e-05, "loss": 2.2777, "step": 5208000 }, { "epoch": 25.8, "learning_rate": 3.710226760402887e-05, "loss": 2.2703, "step": 5208500 }, { "epoch": 25.81, "learning_rate": 3.710102901760279e-05, "loss": 2.2656, "step": 5209000 }, { "epoch": 25.81, "learning_rate": 3.709979043117671e-05, "loss": 2.3058, "step": 5209500 }, { "epoch": 25.81, "learning_rate": 3.7098551844750624e-05, "loss": 2.3062, "step": 5210000 }, { "epoch": 25.81, "learning_rate": 3.709731573549739e-05, "loss": 2.2778, "step": 5210500 }, { "epoch": 25.82, "learning_rate": 3.709607714907131e-05, "loss": 2.2868, "step": 5211000 }, { "epoch": 25.82, "learning_rate": 3.7094838562645227e-05, "loss": 2.2837, "step": 5211500 }, { "epoch": 25.82, "learning_rate": 3.709359997621914e-05, "loss": 2.2617, "step": 5212000 }, { "epoch": 25.82, "learning_rate": 3.7092361389793054e-05, "loss": 2.2789, "step": 5212500 }, { "epoch": 25.83, "learning_rate": 3.709112528053983e-05, "loss": 2.3062, "step": 5213000 }, { "epoch": 25.83, "learning_rate": 3.7089886694113746e-05, "loss": 2.2737, "step": 5213500 }, { "epoch": 25.83, "learning_rate": 3.708864810768766e-05, "loss": 2.2862, "step": 5214000 }, { "epoch": 25.83, "learning_rate": 3.708740952126158e-05, "loss": 2.2864, "step": 5214500 }, { "epoch": 25.84, "learning_rate": 3.708617093483549e-05, "loss": 2.3172, "step": 5215000 }, { "epoch": 25.84, "learning_rate": 3.708493234840941e-05, "loss": 2.2644, "step": 5215500 }, { "epoch": 25.84, "learning_rate": 3.7083693761983324e-05, "loss": 2.2906, "step": 5216000 }, { "epoch": 25.84, "learning_rate": 3.708245765273009e-05, "loss": 2.277, "step": 5216500 }, { "epoch": 25.85, "learning_rate": 3.708121906630401e-05, "loss": 2.2867, "step": 5217000 }, { "epoch": 25.85, "learning_rate": 3.707998047987793e-05, "loss": 2.2931, "step": 5217500 }, { "epoch": 25.85, "learning_rate": 3.7078744370624695e-05, "loss": 2.2907, "step": 5218000 }, { "epoch": 25.85, "learning_rate": 3.707750578419861e-05, "loss": 2.2581, "step": 5218500 }, { "epoch": 25.86, "learning_rate": 3.707626719777253e-05, "loss": 2.3109, "step": 5219000 }, { "epoch": 25.86, "learning_rate": 3.7075028611346446e-05, "loss": 2.293, "step": 5219500 }, { "epoch": 25.86, "learning_rate": 3.707379002492036e-05, "loss": 2.311, "step": 5220000 }, { "epoch": 25.86, "learning_rate": 3.707255143849428e-05, "loss": 2.2732, "step": 5220500 }, { "epoch": 25.87, "learning_rate": 3.707131285206819e-05, "loss": 2.2977, "step": 5221000 }, { "epoch": 25.87, "learning_rate": 3.707007426564211e-05, "loss": 2.2984, "step": 5221500 }, { "epoch": 25.87, "learning_rate": 3.706883815638888e-05, "loss": 2.2718, "step": 5222000 }, { "epoch": 25.87, "learning_rate": 3.7067602047135645e-05, "loss": 2.2732, "step": 5222500 }, { "epoch": 25.88, "learning_rate": 3.706636346070956e-05, "loss": 2.2941, "step": 5223000 }, { "epoch": 25.88, "learning_rate": 3.706512487428348e-05, "loss": 2.2739, "step": 5223500 }, { "epoch": 25.88, "learning_rate": 3.7063886287857396e-05, "loss": 2.265, "step": 5224000 }, { "epoch": 25.88, "learning_rate": 3.706264770143131e-05, "loss": 2.2956, "step": 5224500 }, { "epoch": 25.89, "learning_rate": 3.706140911500523e-05, "loss": 2.2913, "step": 5225000 }, { "epoch": 25.89, "learning_rate": 3.7060170528579146e-05, "loss": 2.2667, "step": 5225500 }, { "epoch": 25.89, "learning_rate": 3.705893194215306e-05, "loss": 2.3244, "step": 5226000 }, { "epoch": 25.89, "learning_rate": 3.705769335572698e-05, "loss": 2.2802, "step": 5226500 }, { "epoch": 25.9, "learning_rate": 3.70564547693009e-05, "loss": 2.2817, "step": 5227000 }, { "epoch": 25.9, "learning_rate": 3.705521618287481e-05, "loss": 2.3061, "step": 5227500 }, { "epoch": 25.9, "learning_rate": 3.7053977596448724e-05, "loss": 2.2814, "step": 5228000 }, { "epoch": 25.9, "learning_rate": 3.705273901002264e-05, "loss": 2.2876, "step": 5228500 }, { "epoch": 25.91, "learning_rate": 3.705150042359656e-05, "loss": 2.2637, "step": 5229000 }, { "epoch": 25.91, "learning_rate": 3.7050261837170475e-05, "loss": 2.2767, "step": 5229500 }, { "epoch": 25.91, "learning_rate": 3.704902325074439e-05, "loss": 2.2874, "step": 5230000 }, { "epoch": 25.91, "learning_rate": 3.704778466431831e-05, "loss": 2.2886, "step": 5230500 }, { "epoch": 25.92, "learning_rate": 3.7046546077892226e-05, "loss": 2.2975, "step": 5231000 }, { "epoch": 25.92, "learning_rate": 3.704530749146614e-05, "loss": 2.2706, "step": 5231500 }, { "epoch": 25.92, "learning_rate": 3.7044071382212905e-05, "loss": 2.2877, "step": 5232000 }, { "epoch": 25.92, "learning_rate": 3.704283279578682e-05, "loss": 2.3207, "step": 5232500 }, { "epoch": 25.93, "learning_rate": 3.704159420936074e-05, "loss": 2.2841, "step": 5233000 }, { "epoch": 25.93, "learning_rate": 3.7040355622934656e-05, "loss": 2.2769, "step": 5233500 }, { "epoch": 25.93, "learning_rate": 3.7039119513681424e-05, "loss": 2.2918, "step": 5234000 }, { "epoch": 25.93, "learning_rate": 3.703788092725534e-05, "loss": 2.288, "step": 5234500 }, { "epoch": 25.94, "learning_rate": 3.703664234082926e-05, "loss": 2.2764, "step": 5235000 }, { "epoch": 25.94, "learning_rate": 3.7035403754403175e-05, "loss": 2.283, "step": 5235500 }, { "epoch": 25.94, "learning_rate": 3.703416764514995e-05, "loss": 2.2929, "step": 5236000 }, { "epoch": 25.94, "learning_rate": 3.703292905872386e-05, "loss": 2.2911, "step": 5236500 }, { "epoch": 25.95, "learning_rate": 3.703169047229778e-05, "loss": 2.2815, "step": 5237000 }, { "epoch": 25.95, "learning_rate": 3.7030451885871695e-05, "loss": 2.3038, "step": 5237500 }, { "epoch": 25.95, "learning_rate": 3.702921329944561e-05, "loss": 2.2901, "step": 5238000 }, { "epoch": 25.95, "learning_rate": 3.702797471301953e-05, "loss": 2.3041, "step": 5238500 }, { "epoch": 25.96, "learning_rate": 3.702673612659344e-05, "loss": 2.2909, "step": 5239000 }, { "epoch": 25.96, "learning_rate": 3.7025497540167356e-05, "loss": 2.3005, "step": 5239500 }, { "epoch": 25.96, "learning_rate": 3.7024261430914125e-05, "loss": 2.3043, "step": 5240000 }, { "epoch": 25.96, "learning_rate": 3.702302284448804e-05, "loss": 2.3069, "step": 5240500 }, { "epoch": 25.97, "learning_rate": 3.702178425806196e-05, "loss": 2.2887, "step": 5241000 }, { "epoch": 25.97, "learning_rate": 3.7020545671635875e-05, "loss": 2.2661, "step": 5241500 }, { "epoch": 25.97, "learning_rate": 3.701930708520979e-05, "loss": 2.2777, "step": 5242000 }, { "epoch": 25.97, "learning_rate": 3.701806849878371e-05, "loss": 2.2992, "step": 5242500 }, { "epoch": 25.98, "learning_rate": 3.701683238953048e-05, "loss": 2.2748, "step": 5243000 }, { "epoch": 25.98, "learning_rate": 3.701559628027725e-05, "loss": 2.2756, "step": 5243500 }, { "epoch": 25.98, "learning_rate": 3.7014357693851164e-05, "loss": 2.2784, "step": 5244000 }, { "epoch": 25.98, "learning_rate": 3.701311910742508e-05, "loss": 2.307, "step": 5244500 }, { "epoch": 25.99, "learning_rate": 3.7011880520999e-05, "loss": 2.2752, "step": 5245000 }, { "epoch": 25.99, "learning_rate": 3.7010641934572914e-05, "loss": 2.2948, "step": 5245500 }, { "epoch": 25.99, "learning_rate": 3.7009403348146825e-05, "loss": 2.3109, "step": 5246000 }, { "epoch": 25.99, "learning_rate": 3.700816476172074e-05, "loss": 2.2798, "step": 5246500 }, { "epoch": 26.0, "learning_rate": 3.700692617529466e-05, "loss": 2.3082, "step": 5247000 }, { "epoch": 26.0, "learning_rate": 3.7005687588868575e-05, "loss": 2.3089, "step": 5247500 }, { "epoch": 26.0, "eval_accuracy": 0.6544683759370017, "eval_accuracy_mlm": 0.6095992095473112, "eval_accuracy_nsp": 0.8660647398209124, "eval_loss": 2.3457705974578857, "eval_runtime": 146.2347, "eval_samples_per_second": 1743.491, "eval_steps_per_second": 72.65, "step": 5247918 }, { "epoch": 26.0, "learning_rate": 3.700444900244249e-05, "loss": 2.275, "step": 5248000 }, { "epoch": 26.0, "learning_rate": 3.700321041601641e-05, "loss": 2.2462, "step": 5248500 }, { "epoch": 26.01, "learning_rate": 3.700197430676318e-05, "loss": 2.2828, "step": 5249000 }, { "epoch": 26.01, "learning_rate": 3.7000735720337095e-05, "loss": 2.2497, "step": 5249500 }, { "epoch": 26.01, "learning_rate": 3.699949713391101e-05, "loss": 2.2561, "step": 5250000 }, { "epoch": 26.01, "learning_rate": 3.699825854748493e-05, "loss": 2.2692, "step": 5250500 }, { "epoch": 26.02, "learning_rate": 3.6997019961058846e-05, "loss": 2.2704, "step": 5251000 }, { "epoch": 26.02, "learning_rate": 3.699578137463276e-05, "loss": 2.2654, "step": 5251500 }, { "epoch": 26.02, "learning_rate": 3.699454278820668e-05, "loss": 2.2692, "step": 5252000 }, { "epoch": 26.02, "learning_rate": 3.699330420178059e-05, "loss": 2.2392, "step": 5252500 }, { "epoch": 26.03, "learning_rate": 3.699206561535451e-05, "loss": 2.2405, "step": 5253000 }, { "epoch": 26.03, "learning_rate": 3.6990831983274134e-05, "loss": 2.2633, "step": 5253500 }, { "epoch": 26.03, "learning_rate": 3.6989595874020896e-05, "loss": 2.2523, "step": 5254000 }, { "epoch": 26.03, "learning_rate": 3.698835728759481e-05, "loss": 2.2422, "step": 5254500 }, { "epoch": 26.04, "learning_rate": 3.698711870116873e-05, "loss": 2.2852, "step": 5255000 }, { "epoch": 26.04, "learning_rate": 3.698588011474265e-05, "loss": 2.2703, "step": 5255500 }, { "epoch": 26.04, "learning_rate": 3.6984641528316564e-05, "loss": 2.2614, "step": 5256000 }, { "epoch": 26.04, "learning_rate": 3.698340541906333e-05, "loss": 2.2694, "step": 5256500 }, { "epoch": 26.04, "learning_rate": 3.698216683263725e-05, "loss": 2.2442, "step": 5257000 }, { "epoch": 26.05, "learning_rate": 3.698092824621117e-05, "loss": 2.263, "step": 5257500 }, { "epoch": 26.05, "learning_rate": 3.6979689659785084e-05, "loss": 2.2511, "step": 5258000 }, { "epoch": 26.05, "learning_rate": 3.6978451073359e-05, "loss": 2.2405, "step": 5258500 }, { "epoch": 26.05, "learning_rate": 3.697721248693292e-05, "loss": 2.2604, "step": 5259000 }, { "epoch": 26.06, "learning_rate": 3.6975973900506834e-05, "loss": 2.2556, "step": 5259500 }, { "epoch": 26.06, "learning_rate": 3.697473531408075e-05, "loss": 2.2919, "step": 5260000 }, { "epoch": 26.06, "learning_rate": 3.697349672765467e-05, "loss": 2.2512, "step": 5260500 }, { "epoch": 26.06, "learning_rate": 3.6972258141228585e-05, "loss": 2.2531, "step": 5261000 }, { "epoch": 26.07, "learning_rate": 3.697102203197535e-05, "loss": 2.2625, "step": 5261500 }, { "epoch": 26.07, "learning_rate": 3.6969783445549264e-05, "loss": 2.2725, "step": 5262000 }, { "epoch": 26.07, "learning_rate": 3.696854733629603e-05, "loss": 2.2693, "step": 5262500 }, { "epoch": 26.07, "learning_rate": 3.696730874986995e-05, "loss": 2.2746, "step": 5263000 }, { "epoch": 26.08, "learning_rate": 3.696607016344387e-05, "loss": 2.2666, "step": 5263500 }, { "epoch": 26.08, "learning_rate": 3.6964831577017784e-05, "loss": 2.271, "step": 5264000 }, { "epoch": 26.08, "learning_rate": 3.69635929905917e-05, "loss": 2.2448, "step": 5264500 }, { "epoch": 26.08, "learning_rate": 3.696235440416562e-05, "loss": 2.283, "step": 5265000 }, { "epoch": 26.09, "learning_rate": 3.6961118294912386e-05, "loss": 2.2575, "step": 5265500 }, { "epoch": 26.09, "learning_rate": 3.695988218565915e-05, "loss": 2.2743, "step": 5266000 }, { "epoch": 26.09, "learning_rate": 3.6958643599233065e-05, "loss": 2.2608, "step": 5266500 }, { "epoch": 26.09, "learning_rate": 3.695740501280698e-05, "loss": 2.2905, "step": 5267000 }, { "epoch": 26.1, "learning_rate": 3.69561664263809e-05, "loss": 2.2947, "step": 5267500 }, { "epoch": 26.1, "learning_rate": 3.6954927839954816e-05, "loss": 2.2509, "step": 5268000 }, { "epoch": 26.1, "learning_rate": 3.695368925352873e-05, "loss": 2.2685, "step": 5268500 }, { "epoch": 26.1, "learning_rate": 3.695245066710265e-05, "loss": 2.2794, "step": 5269000 }, { "epoch": 26.11, "learning_rate": 3.695121208067657e-05, "loss": 2.2516, "step": 5269500 }, { "epoch": 26.11, "learning_rate": 3.6949973494250484e-05, "loss": 2.2744, "step": 5270000 }, { "epoch": 26.11, "learning_rate": 3.69487349078244e-05, "loss": 2.2572, "step": 5270500 }, { "epoch": 26.11, "learning_rate": 3.694749632139832e-05, "loss": 2.2396, "step": 5271000 }, { "epoch": 26.12, "learning_rate": 3.6946257734972235e-05, "loss": 2.2823, "step": 5271500 }, { "epoch": 26.12, "learning_rate": 3.694501914854615e-05, "loss": 2.2637, "step": 5272000 }, { "epoch": 26.12, "learning_rate": 3.694378056212007e-05, "loss": 2.2612, "step": 5272500 }, { "epoch": 26.12, "learning_rate": 3.6942541975693985e-05, "loss": 2.2399, "step": 5273000 }, { "epoch": 26.13, "learning_rate": 3.69413033892679e-05, "loss": 2.2862, "step": 5273500 }, { "epoch": 26.13, "learning_rate": 3.6940067280014664e-05, "loss": 2.284, "step": 5274000 }, { "epoch": 26.13, "learning_rate": 3.693882869358858e-05, "loss": 2.2642, "step": 5274500 }, { "epoch": 26.13, "learning_rate": 3.69375901071625e-05, "loss": 2.2629, "step": 5275000 }, { "epoch": 26.14, "learning_rate": 3.6936351520736415e-05, "loss": 2.2659, "step": 5275500 }, { "epoch": 26.14, "learning_rate": 3.693511293431033e-05, "loss": 2.2365, "step": 5276000 }, { "epoch": 26.14, "learning_rate": 3.693387434788425e-05, "loss": 2.2769, "step": 5276500 }, { "epoch": 26.14, "learning_rate": 3.693263823863102e-05, "loss": 2.2433, "step": 5277000 }, { "epoch": 26.15, "learning_rate": 3.6931399652204935e-05, "loss": 2.2665, "step": 5277500 }, { "epoch": 26.15, "learning_rate": 3.693016106577885e-05, "loss": 2.2601, "step": 5278000 }, { "epoch": 26.15, "learning_rate": 3.692892247935277e-05, "loss": 2.3155, "step": 5278500 }, { "epoch": 26.15, "learning_rate": 3.6927683892926685e-05, "loss": 2.2518, "step": 5279000 }, { "epoch": 26.16, "learning_rate": 3.69264453065006e-05, "loss": 2.2622, "step": 5279500 }, { "epoch": 26.16, "learning_rate": 3.692520672007452e-05, "loss": 2.2623, "step": 5280000 }, { "epoch": 26.16, "learning_rate": 3.692397061082128e-05, "loss": 2.2732, "step": 5280500 }, { "epoch": 26.16, "learning_rate": 3.69227320243952e-05, "loss": 2.2425, "step": 5281000 }, { "epoch": 26.17, "learning_rate": 3.6921493437969115e-05, "loss": 2.2449, "step": 5281500 }, { "epoch": 26.17, "learning_rate": 3.692025485154303e-05, "loss": 2.2487, "step": 5282000 }, { "epoch": 26.17, "learning_rate": 3.691901626511695e-05, "loss": 2.2494, "step": 5282500 }, { "epoch": 26.17, "learning_rate": 3.6917777678690866e-05, "loss": 2.2683, "step": 5283000 }, { "epoch": 26.18, "learning_rate": 3.6916539092264776e-05, "loss": 2.2414, "step": 5283500 }, { "epoch": 26.18, "learning_rate": 3.691530050583869e-05, "loss": 2.2555, "step": 5284000 }, { "epoch": 26.18, "learning_rate": 3.691406439658547e-05, "loss": 2.246, "step": 5284500 }, { "epoch": 26.18, "learning_rate": 3.6912825810159386e-05, "loss": 2.2715, "step": 5285000 }, { "epoch": 26.19, "learning_rate": 3.69115872237333e-05, "loss": 2.2586, "step": 5285500 }, { "epoch": 26.19, "learning_rate": 3.691034863730722e-05, "loss": 2.2493, "step": 5286000 }, { "epoch": 26.19, "learning_rate": 3.690911500522683e-05, "loss": 2.2628, "step": 5286500 }, { "epoch": 26.19, "learning_rate": 3.690787641880075e-05, "loss": 2.2646, "step": 5287000 }, { "epoch": 26.2, "learning_rate": 3.690663783237467e-05, "loss": 2.245, "step": 5287500 }, { "epoch": 26.2, "learning_rate": 3.6905399245948584e-05, "loss": 2.2657, "step": 5288000 }, { "epoch": 26.2, "learning_rate": 3.69041606595225e-05, "loss": 2.2547, "step": 5288500 }, { "epoch": 26.2, "learning_rate": 3.690292207309642e-05, "loss": 2.2863, "step": 5289000 }, { "epoch": 26.21, "learning_rate": 3.6901683486670335e-05, "loss": 2.2627, "step": 5289500 }, { "epoch": 26.21, "learning_rate": 3.690044490024425e-05, "loss": 2.2421, "step": 5290000 }, { "epoch": 26.21, "learning_rate": 3.689920631381817e-05, "loss": 2.2575, "step": 5290500 }, { "epoch": 26.21, "learning_rate": 3.6897967727392086e-05, "loss": 2.2554, "step": 5291000 }, { "epoch": 26.22, "learning_rate": 3.6896729140966e-05, "loss": 2.2682, "step": 5291500 }, { "epoch": 26.22, "learning_rate": 3.689549055453992e-05, "loss": 2.257, "step": 5292000 }, { "epoch": 26.22, "learning_rate": 3.6894256922459534e-05, "loss": 2.2817, "step": 5292500 }, { "epoch": 26.22, "learning_rate": 3.689301833603345e-05, "loss": 2.2867, "step": 5293000 }, { "epoch": 26.23, "learning_rate": 3.689177974960737e-05, "loss": 2.2552, "step": 5293500 }, { "epoch": 26.23, "learning_rate": 3.6890541163181284e-05, "loss": 2.2692, "step": 5294000 }, { "epoch": 26.23, "learning_rate": 3.68893025767552e-05, "loss": 2.2514, "step": 5294500 }, { "epoch": 26.23, "learning_rate": 3.688806399032912e-05, "loss": 2.2587, "step": 5295000 }, { "epoch": 26.24, "learning_rate": 3.6886825403903035e-05, "loss": 2.2817, "step": 5295500 }, { "epoch": 26.24, "learning_rate": 3.688558681747695e-05, "loss": 2.2631, "step": 5296000 }, { "epoch": 26.24, "learning_rate": 3.688434823105087e-05, "loss": 2.2627, "step": 5296500 }, { "epoch": 26.24, "learning_rate": 3.688311212179764e-05, "loss": 2.2805, "step": 5297000 }, { "epoch": 26.25, "learning_rate": 3.6881873535371555e-05, "loss": 2.2687, "step": 5297500 }, { "epoch": 26.25, "learning_rate": 3.688063494894547e-05, "loss": 2.2834, "step": 5298000 }, { "epoch": 26.25, "learning_rate": 3.687939636251939e-05, "loss": 2.2436, "step": 5298500 }, { "epoch": 26.25, "learning_rate": 3.687816025326615e-05, "loss": 2.2389, "step": 5299000 }, { "epoch": 26.26, "learning_rate": 3.687692166684007e-05, "loss": 2.2489, "step": 5299500 }, { "epoch": 26.26, "learning_rate": 3.6875683080413984e-05, "loss": 2.2658, "step": 5300000 }, { "epoch": 26.26, "learning_rate": 3.68744444939879e-05, "loss": 2.2578, "step": 5300500 }, { "epoch": 26.26, "learning_rate": 3.687320590756182e-05, "loss": 2.2667, "step": 5301000 }, { "epoch": 26.27, "learning_rate": 3.6871967321135735e-05, "loss": 2.2481, "step": 5301500 }, { "epoch": 26.27, "learning_rate": 3.687072873470965e-05, "loss": 2.2475, "step": 5302000 }, { "epoch": 26.27, "learning_rate": 3.686949014828357e-05, "loss": 2.2658, "step": 5302500 }, { "epoch": 26.27, "learning_rate": 3.6868251561857486e-05, "loss": 2.2964, "step": 5303000 }, { "epoch": 26.28, "learning_rate": 3.68670129754314e-05, "loss": 2.2901, "step": 5303500 }, { "epoch": 26.28, "learning_rate": 3.686577438900532e-05, "loss": 2.2668, "step": 5304000 }, { "epoch": 26.28, "learning_rate": 3.686453827975209e-05, "loss": 2.289, "step": 5304500 }, { "epoch": 26.28, "learning_rate": 3.6863299693326006e-05, "loss": 2.2806, "step": 5305000 }, { "epoch": 26.29, "learning_rate": 3.686206110689992e-05, "loss": 2.2532, "step": 5305500 }, { "epoch": 26.29, "learning_rate": 3.686082252047384e-05, "loss": 2.2487, "step": 5306000 }, { "epoch": 26.29, "learning_rate": 3.6859583934047756e-05, "loss": 2.2815, "step": 5306500 }, { "epoch": 26.29, "learning_rate": 3.685834534762167e-05, "loss": 2.2663, "step": 5307000 }, { "epoch": 26.3, "learning_rate": 3.6857109238368435e-05, "loss": 2.2659, "step": 5307500 }, { "epoch": 26.3, "learning_rate": 3.685587065194235e-05, "loss": 2.2551, "step": 5308000 }, { "epoch": 26.3, "learning_rate": 3.685463206551627e-05, "loss": 2.2631, "step": 5308500 }, { "epoch": 26.3, "learning_rate": 3.6853393479090186e-05, "loss": 2.2611, "step": 5309000 }, { "epoch": 26.31, "learning_rate": 3.68521548926641e-05, "loss": 2.2766, "step": 5309500 }, { "epoch": 26.31, "learning_rate": 3.685091630623802e-05, "loss": 2.2725, "step": 5310000 }, { "epoch": 26.31, "learning_rate": 3.684967771981194e-05, "loss": 2.2788, "step": 5310500 }, { "epoch": 26.31, "learning_rate": 3.6848439133385854e-05, "loss": 2.2321, "step": 5311000 }, { "epoch": 26.32, "learning_rate": 3.684720302413262e-05, "loss": 2.2733, "step": 5311500 }, { "epoch": 26.32, "learning_rate": 3.684596443770654e-05, "loss": 2.2867, "step": 5312000 }, { "epoch": 26.32, "learning_rate": 3.6844725851280456e-05, "loss": 2.2839, "step": 5312500 }, { "epoch": 26.32, "learning_rate": 3.6843487264854373e-05, "loss": 2.2721, "step": 5313000 }, { "epoch": 26.32, "learning_rate": 3.6842251155601135e-05, "loss": 2.2814, "step": 5313500 }, { "epoch": 26.33, "learning_rate": 3.6841015046347904e-05, "loss": 2.2994, "step": 5314000 }, { "epoch": 26.33, "learning_rate": 3.683977645992182e-05, "loss": 2.2346, "step": 5314500 }, { "epoch": 26.33, "learning_rate": 3.683853787349574e-05, "loss": 2.272, "step": 5315000 }, { "epoch": 26.33, "learning_rate": 3.6837299287069655e-05, "loss": 2.2787, "step": 5315500 }, { "epoch": 26.34, "learning_rate": 3.683606070064357e-05, "loss": 2.2815, "step": 5316000 }, { "epoch": 26.34, "learning_rate": 3.683482211421749e-05, "loss": 2.2951, "step": 5316500 }, { "epoch": 26.34, "learning_rate": 3.6833583527791406e-05, "loss": 2.2583, "step": 5317000 }, { "epoch": 26.34, "learning_rate": 3.683234741853817e-05, "loss": 2.2752, "step": 5317500 }, { "epoch": 26.35, "learning_rate": 3.6831111309284943e-05, "loss": 2.245, "step": 5318000 }, { "epoch": 26.35, "learning_rate": 3.682987520003171e-05, "loss": 2.2642, "step": 5318500 }, { "epoch": 26.35, "learning_rate": 3.6828639090778474e-05, "loss": 2.2769, "step": 5319000 }, { "epoch": 26.35, "learning_rate": 3.682740050435239e-05, "loss": 2.2546, "step": 5319500 }, { "epoch": 26.36, "learning_rate": 3.682616191792631e-05, "loss": 2.2726, "step": 5320000 }, { "epoch": 26.36, "learning_rate": 3.6824923331500225e-05, "loss": 2.247, "step": 5320500 }, { "epoch": 26.36, "learning_rate": 3.682368474507414e-05, "loss": 2.2547, "step": 5321000 }, { "epoch": 26.36, "learning_rate": 3.682244615864806e-05, "loss": 2.2627, "step": 5321500 }, { "epoch": 26.37, "learning_rate": 3.6821207572221976e-05, "loss": 2.2813, "step": 5322000 }, { "epoch": 26.37, "learning_rate": 3.681996898579589e-05, "loss": 2.2566, "step": 5322500 }, { "epoch": 26.37, "learning_rate": 3.681873039936981e-05, "loss": 2.2719, "step": 5323000 }, { "epoch": 26.37, "learning_rate": 3.681749181294373e-05, "loss": 2.2736, "step": 5323500 }, { "epoch": 26.38, "learning_rate": 3.6816253226517644e-05, "loss": 2.2618, "step": 5324000 }, { "epoch": 26.38, "learning_rate": 3.681501464009156e-05, "loss": 2.2542, "step": 5324500 }, { "epoch": 26.38, "learning_rate": 3.681377605366547e-05, "loss": 2.262, "step": 5325000 }, { "epoch": 26.38, "learning_rate": 3.6812539944412246e-05, "loss": 2.2745, "step": 5325500 }, { "epoch": 26.39, "learning_rate": 3.681130135798616e-05, "loss": 2.2565, "step": 5326000 }, { "epoch": 26.39, "learning_rate": 3.681006277156008e-05, "loss": 2.2563, "step": 5326500 }, { "epoch": 26.39, "learning_rate": 3.6808824185134e-05, "loss": 2.2395, "step": 5327000 }, { "epoch": 26.39, "learning_rate": 3.6807585598707914e-05, "loss": 2.2644, "step": 5327500 }, { "epoch": 26.4, "learning_rate": 3.6806347012281824e-05, "loss": 2.2437, "step": 5328000 }, { "epoch": 26.4, "learning_rate": 3.680510842585574e-05, "loss": 2.2721, "step": 5328500 }, { "epoch": 26.4, "learning_rate": 3.680386983942966e-05, "loss": 2.273, "step": 5329000 }, { "epoch": 26.4, "learning_rate": 3.6802631253003575e-05, "loss": 2.2778, "step": 5329500 }, { "epoch": 26.41, "learning_rate": 3.680139266657749e-05, "loss": 2.2782, "step": 5330000 }, { "epoch": 26.41, "learning_rate": 3.680015408015141e-05, "loss": 2.2676, "step": 5330500 }, { "epoch": 26.41, "learning_rate": 3.679891549372532e-05, "loss": 2.2632, "step": 5331000 }, { "epoch": 26.41, "learning_rate": 3.6797676907299236e-05, "loss": 2.2902, "step": 5331500 }, { "epoch": 26.42, "learning_rate": 3.679643832087315e-05, "loss": 2.2698, "step": 5332000 }, { "epoch": 26.42, "learning_rate": 3.679519973444707e-05, "loss": 2.2682, "step": 5332500 }, { "epoch": 26.42, "learning_rate": 3.6793961148020987e-05, "loss": 2.2695, "step": 5333000 }, { "epoch": 26.42, "learning_rate": 3.6792722561594904e-05, "loss": 2.2699, "step": 5333500 }, { "epoch": 26.43, "learning_rate": 3.679148645234167e-05, "loss": 2.2628, "step": 5334000 }, { "epoch": 26.43, "learning_rate": 3.679025034308844e-05, "loss": 2.2823, "step": 5334500 }, { "epoch": 26.43, "learning_rate": 3.678901175666236e-05, "loss": 2.2675, "step": 5335000 }, { "epoch": 26.43, "learning_rate": 3.6787773170236275e-05, "loss": 2.2469, "step": 5335500 }, { "epoch": 26.44, "learning_rate": 3.678653458381019e-05, "loss": 2.2607, "step": 5336000 }, { "epoch": 26.44, "learning_rate": 3.678529599738411e-05, "loss": 2.2687, "step": 5336500 }, { "epoch": 26.44, "learning_rate": 3.6784057410958026e-05, "loss": 2.2789, "step": 5337000 }, { "epoch": 26.44, "learning_rate": 3.678281882453194e-05, "loss": 2.2953, "step": 5337500 }, { "epoch": 26.45, "learning_rate": 3.678158023810585e-05, "loss": 2.2404, "step": 5338000 }, { "epoch": 26.45, "learning_rate": 3.678034165167977e-05, "loss": 2.282, "step": 5338500 }, { "epoch": 26.45, "learning_rate": 3.677910306525369e-05, "loss": 2.2767, "step": 5339000 }, { "epoch": 26.45, "learning_rate": 3.6777864478827604e-05, "loss": 2.2794, "step": 5339500 }, { "epoch": 26.46, "learning_rate": 3.677662589240152e-05, "loss": 2.2838, "step": 5340000 }, { "epoch": 26.46, "learning_rate": 3.677538730597544e-05, "loss": 2.2583, "step": 5340500 }, { "epoch": 26.46, "learning_rate": 3.6774151196722206e-05, "loss": 2.2644, "step": 5341000 }, { "epoch": 26.46, "learning_rate": 3.677291261029612e-05, "loss": 2.2895, "step": 5341500 }, { "epoch": 26.47, "learning_rate": 3.677167402387004e-05, "loss": 2.2884, "step": 5342000 }, { "epoch": 26.47, "learning_rate": 3.677043543744396e-05, "loss": 2.2567, "step": 5342500 }, { "epoch": 26.47, "learning_rate": 3.6769196851017874e-05, "loss": 2.2824, "step": 5343000 }, { "epoch": 26.47, "learning_rate": 3.676796074176464e-05, "loss": 2.2754, "step": 5343500 }, { "epoch": 26.48, "learning_rate": 3.676672215533856e-05, "loss": 2.2883, "step": 5344000 }, { "epoch": 26.48, "learning_rate": 3.676548604608532e-05, "loss": 2.2732, "step": 5344500 }, { "epoch": 26.48, "learning_rate": 3.676424745965924e-05, "loss": 2.2879, "step": 5345000 }, { "epoch": 26.48, "learning_rate": 3.6763008873233156e-05, "loss": 2.2564, "step": 5345500 }, { "epoch": 26.49, "learning_rate": 3.676177028680707e-05, "loss": 2.2885, "step": 5346000 }, { "epoch": 26.49, "learning_rate": 3.676053170038099e-05, "loss": 2.2796, "step": 5346500 }, { "epoch": 26.49, "learning_rate": 3.6759293113954906e-05, "loss": 2.2713, "step": 5347000 }, { "epoch": 26.49, "learning_rate": 3.6758057004701675e-05, "loss": 2.2911, "step": 5347500 }, { "epoch": 26.5, "learning_rate": 3.6756823372621296e-05, "loss": 2.2539, "step": 5348000 }, { "epoch": 26.5, "learning_rate": 3.675558478619521e-05, "loss": 2.2742, "step": 5348500 }, { "epoch": 26.5, "learning_rate": 3.675434619976913e-05, "loss": 2.246, "step": 5349000 }, { "epoch": 26.5, "learning_rate": 3.675310761334305e-05, "loss": 2.2775, "step": 5349500 }, { "epoch": 26.51, "learning_rate": 3.6751871504089816e-05, "loss": 2.2543, "step": 5350000 }, { "epoch": 26.51, "learning_rate": 3.675063291766373e-05, "loss": 2.2928, "step": 5350500 }, { "epoch": 26.51, "learning_rate": 3.674939433123765e-05, "loss": 2.2686, "step": 5351000 }, { "epoch": 26.51, "learning_rate": 3.6748155744811566e-05, "loss": 2.2733, "step": 5351500 }, { "epoch": 26.52, "learning_rate": 3.674691715838548e-05, "loss": 2.2779, "step": 5352000 }, { "epoch": 26.52, "learning_rate": 3.6745678571959393e-05, "loss": 2.2912, "step": 5352500 }, { "epoch": 26.52, "learning_rate": 3.674444246270616e-05, "loss": 2.2687, "step": 5353000 }, { "epoch": 26.52, "learning_rate": 3.674320387628008e-05, "loss": 2.2662, "step": 5353500 }, { "epoch": 26.53, "learning_rate": 3.674196776702685e-05, "loss": 2.2396, "step": 5354000 }, { "epoch": 26.53, "learning_rate": 3.6740729180600765e-05, "loss": 2.2742, "step": 5354500 }, { "epoch": 26.53, "learning_rate": 3.673949059417468e-05, "loss": 2.2846, "step": 5355000 }, { "epoch": 26.53, "learning_rate": 3.67382520077486e-05, "loss": 2.2667, "step": 5355500 }, { "epoch": 26.54, "learning_rate": 3.6737013421322516e-05, "loss": 2.2765, "step": 5356000 }, { "epoch": 26.54, "learning_rate": 3.673577483489643e-05, "loss": 2.251, "step": 5356500 }, { "epoch": 26.54, "learning_rate": 3.673453624847035e-05, "loss": 2.2586, "step": 5357000 }, { "epoch": 26.54, "learning_rate": 3.6733297662044266e-05, "loss": 2.2858, "step": 5357500 }, { "epoch": 26.55, "learning_rate": 3.673205907561818e-05, "loss": 2.2807, "step": 5358000 }, { "epoch": 26.55, "learning_rate": 3.6730822966364945e-05, "loss": 2.2795, "step": 5358500 }, { "epoch": 26.55, "learning_rate": 3.672958437993886e-05, "loss": 2.2698, "step": 5359000 }, { "epoch": 26.55, "learning_rate": 3.672834579351278e-05, "loss": 2.2692, "step": 5359500 }, { "epoch": 26.56, "learning_rate": 3.6727107207086696e-05, "loss": 2.2697, "step": 5360000 }, { "epoch": 26.56, "learning_rate": 3.672586862066061e-05, "loss": 2.2799, "step": 5360500 }, { "epoch": 26.56, "learning_rate": 3.672463003423453e-05, "loss": 2.2751, "step": 5361000 }, { "epoch": 26.56, "learning_rate": 3.672339144780845e-05, "loss": 2.2689, "step": 5361500 }, { "epoch": 26.57, "learning_rate": 3.6722152861382364e-05, "loss": 2.2778, "step": 5362000 }, { "epoch": 26.57, "learning_rate": 3.672091675212913e-05, "loss": 2.2961, "step": 5362500 }, { "epoch": 26.57, "learning_rate": 3.671967816570305e-05, "loss": 2.2765, "step": 5363000 }, { "epoch": 26.57, "learning_rate": 3.6718439579276967e-05, "loss": 2.2756, "step": 5363500 }, { "epoch": 26.58, "learning_rate": 3.6717200992850884e-05, "loss": 2.3013, "step": 5364000 }, { "epoch": 26.58, "learning_rate": 3.67159624064248e-05, "loss": 2.2892, "step": 5364500 }, { "epoch": 26.58, "learning_rate": 3.671473125151727e-05, "loss": 2.3011, "step": 5365000 }, { "epoch": 26.58, "learning_rate": 3.671349266509119e-05, "loss": 2.3148, "step": 5365500 }, { "epoch": 26.59, "learning_rate": 3.671225407866511e-05, "loss": 2.2778, "step": 5366000 }, { "epoch": 26.59, "learning_rate": 3.671101549223902e-05, "loss": 2.2993, "step": 5366500 }, { "epoch": 26.59, "learning_rate": 3.6709776905812934e-05, "loss": 2.2779, "step": 5367000 }, { "epoch": 26.59, "learning_rate": 3.670853831938685e-05, "loss": 2.2696, "step": 5367500 }, { "epoch": 26.59, "learning_rate": 3.670729973296077e-05, "loss": 2.2576, "step": 5368000 }, { "epoch": 26.6, "learning_rate": 3.6706061146534685e-05, "loss": 2.2611, "step": 5368500 }, { "epoch": 26.6, "learning_rate": 3.6704825037281454e-05, "loss": 2.2756, "step": 5369000 }, { "epoch": 26.6, "learning_rate": 3.670358645085537e-05, "loss": 2.2703, "step": 5369500 }, { "epoch": 26.6, "learning_rate": 3.670234786442929e-05, "loss": 2.2862, "step": 5370000 }, { "epoch": 26.61, "learning_rate": 3.6701109278003204e-05, "loss": 2.3082, "step": 5370500 }, { "epoch": 26.61, "learning_rate": 3.669987069157712e-05, "loss": 2.2528, "step": 5371000 }, { "epoch": 26.61, "learning_rate": 3.669863210515104e-05, "loss": 2.2901, "step": 5371500 }, { "epoch": 26.61, "learning_rate": 3.6697393518724955e-05, "loss": 2.2658, "step": 5372000 }, { "epoch": 26.62, "learning_rate": 3.6696154932298865e-05, "loss": 2.2874, "step": 5372500 }, { "epoch": 26.62, "learning_rate": 3.669491634587278e-05, "loss": 2.2565, "step": 5373000 }, { "epoch": 26.62, "learning_rate": 3.66936777594467e-05, "loss": 2.2681, "step": 5373500 }, { "epoch": 26.62, "learning_rate": 3.669244412736632e-05, "loss": 2.2731, "step": 5374000 }, { "epoch": 26.63, "learning_rate": 3.669120554094024e-05, "loss": 2.3088, "step": 5374500 }, { "epoch": 26.63, "learning_rate": 3.6689966954514154e-05, "loss": 2.2603, "step": 5375000 }, { "epoch": 26.63, "learning_rate": 3.668872836808807e-05, "loss": 2.2773, "step": 5375500 }, { "epoch": 26.63, "learning_rate": 3.668748978166199e-05, "loss": 2.2609, "step": 5376000 }, { "epoch": 26.64, "learning_rate": 3.6686251195235904e-05, "loss": 2.2708, "step": 5376500 }, { "epoch": 26.64, "learning_rate": 3.668501260880982e-05, "loss": 2.2872, "step": 5377000 }, { "epoch": 26.64, "learning_rate": 3.668377402238374e-05, "loss": 2.2752, "step": 5377500 }, { "epoch": 26.64, "learning_rate": 3.6682535435957655e-05, "loss": 2.2825, "step": 5378000 }, { "epoch": 26.65, "learning_rate": 3.6681296849531565e-05, "loss": 2.2562, "step": 5378500 }, { "epoch": 26.65, "learning_rate": 3.668005826310548e-05, "loss": 2.2711, "step": 5379000 }, { "epoch": 26.65, "learning_rate": 3.667882215385226e-05, "loss": 2.2773, "step": 5379500 }, { "epoch": 26.65, "learning_rate": 3.667758356742617e-05, "loss": 2.2731, "step": 5380000 }, { "epoch": 26.66, "learning_rate": 3.6676344981000085e-05, "loss": 2.253, "step": 5380500 }, { "epoch": 26.66, "learning_rate": 3.6675106394574e-05, "loss": 2.2824, "step": 5381000 }, { "epoch": 26.66, "learning_rate": 3.667387028532077e-05, "loss": 2.2933, "step": 5381500 }, { "epoch": 26.66, "learning_rate": 3.667263417606754e-05, "loss": 2.2788, "step": 5382000 }, { "epoch": 26.67, "learning_rate": 3.6671395589641456e-05, "loss": 2.2608, "step": 5382500 }, { "epoch": 26.67, "learning_rate": 3.667015700321537e-05, "loss": 2.2762, "step": 5383000 }, { "epoch": 26.67, "learning_rate": 3.666891841678929e-05, "loss": 2.2692, "step": 5383500 }, { "epoch": 26.67, "learning_rate": 3.666767983036321e-05, "loss": 2.2757, "step": 5384000 }, { "epoch": 26.68, "learning_rate": 3.666644372110997e-05, "loss": 2.2969, "step": 5384500 }, { "epoch": 26.68, "learning_rate": 3.6665205134683886e-05, "loss": 2.2756, "step": 5385000 }, { "epoch": 26.68, "learning_rate": 3.66639665482578e-05, "loss": 2.3052, "step": 5385500 }, { "epoch": 26.68, "learning_rate": 3.666272796183172e-05, "loss": 2.2807, "step": 5386000 }, { "epoch": 26.69, "learning_rate": 3.666148937540564e-05, "loss": 2.2769, "step": 5386500 }, { "epoch": 26.69, "learning_rate": 3.6660250788979554e-05, "loss": 2.2956, "step": 5387000 }, { "epoch": 26.69, "learning_rate": 3.665901220255347e-05, "loss": 2.2773, "step": 5387500 }, { "epoch": 26.69, "learning_rate": 3.665777361612739e-05, "loss": 2.2996, "step": 5388000 }, { "epoch": 26.7, "learning_rate": 3.6656535029701305e-05, "loss": 2.2919, "step": 5388500 }, { "epoch": 26.7, "learning_rate": 3.665529644327522e-05, "loss": 2.2895, "step": 5389000 }, { "epoch": 26.7, "learning_rate": 3.665406033402199e-05, "loss": 2.2703, "step": 5389500 }, { "epoch": 26.7, "learning_rate": 3.665282174759591e-05, "loss": 2.2458, "step": 5390000 }, { "epoch": 26.71, "learning_rate": 3.6651583161169824e-05, "loss": 2.2799, "step": 5390500 }, { "epoch": 26.71, "learning_rate": 3.665034457474374e-05, "loss": 2.272, "step": 5391000 }, { "epoch": 26.71, "learning_rate": 3.664910598831766e-05, "loss": 2.2755, "step": 5391500 }, { "epoch": 26.71, "learning_rate": 3.6647867401891575e-05, "loss": 2.284, "step": 5392000 }, { "epoch": 26.72, "learning_rate": 3.664662881546549e-05, "loss": 2.2968, "step": 5392500 }, { "epoch": 26.72, "learning_rate": 3.664539022903941e-05, "loss": 2.2897, "step": 5393000 }, { "epoch": 26.72, "learning_rate": 3.664415164261332e-05, "loss": 2.2828, "step": 5393500 }, { "epoch": 26.72, "learning_rate": 3.6642913056187236e-05, "loss": 2.2667, "step": 5394000 }, { "epoch": 26.73, "learning_rate": 3.6641676946934005e-05, "loss": 2.2811, "step": 5394500 }, { "epoch": 26.73, "learning_rate": 3.6640440837680774e-05, "loss": 2.2899, "step": 5395000 }, { "epoch": 26.73, "learning_rate": 3.663920225125469e-05, "loss": 2.2946, "step": 5395500 }, { "epoch": 26.73, "learning_rate": 3.663796366482861e-05, "loss": 2.2743, "step": 5396000 }, { "epoch": 26.74, "learning_rate": 3.6636725078402524e-05, "loss": 2.2719, "step": 5396500 }, { "epoch": 26.74, "learning_rate": 3.663548649197644e-05, "loss": 2.2736, "step": 5397000 }, { "epoch": 26.74, "learning_rate": 3.663424790555036e-05, "loss": 2.2547, "step": 5397500 }, { "epoch": 26.74, "learning_rate": 3.6633009319124275e-05, "loss": 2.2961, "step": 5398000 }, { "epoch": 26.75, "learning_rate": 3.663177073269819e-05, "loss": 2.2816, "step": 5398500 }, { "epoch": 26.75, "learning_rate": 3.663053214627211e-05, "loss": 2.2523, "step": 5399000 }, { "epoch": 26.75, "learning_rate": 3.662929603701887e-05, "loss": 2.2643, "step": 5399500 }, { "epoch": 26.75, "learning_rate": 3.662805745059279e-05, "loss": 2.2791, "step": 5400000 }, { "epoch": 26.76, "learning_rate": 3.6626818864166705e-05, "loss": 2.2853, "step": 5400500 }, { "epoch": 26.76, "learning_rate": 3.662558027774062e-05, "loss": 2.282, "step": 5401000 }, { "epoch": 26.76, "learning_rate": 3.662434169131454e-05, "loss": 2.2665, "step": 5401500 }, { "epoch": 26.76, "learning_rate": 3.662310558206131e-05, "loss": 2.2855, "step": 5402000 }, { "epoch": 26.77, "learning_rate": 3.6621866995635225e-05, "loss": 2.2654, "step": 5402500 }, { "epoch": 26.77, "learning_rate": 3.662062840920914e-05, "loss": 2.2647, "step": 5403000 }, { "epoch": 26.77, "learning_rate": 3.661938982278306e-05, "loss": 2.2608, "step": 5403500 }, { "epoch": 26.77, "learning_rate": 3.661815371352982e-05, "loss": 2.2639, "step": 5404000 }, { "epoch": 26.78, "learning_rate": 3.661691512710374e-05, "loss": 2.274, "step": 5404500 }, { "epoch": 26.78, "learning_rate": 3.6615676540677654e-05, "loss": 2.2579, "step": 5405000 }, { "epoch": 26.78, "learning_rate": 3.661443795425157e-05, "loss": 2.2698, "step": 5405500 }, { "epoch": 26.78, "learning_rate": 3.661319936782549e-05, "loss": 2.2665, "step": 5406000 }, { "epoch": 26.79, "learning_rate": 3.6611960781399405e-05, "loss": 2.2833, "step": 5406500 }, { "epoch": 26.79, "learning_rate": 3.6610724672146174e-05, "loss": 2.2807, "step": 5407000 }, { "epoch": 26.79, "learning_rate": 3.660948608572009e-05, "loss": 2.2814, "step": 5407500 }, { "epoch": 26.79, "learning_rate": 3.660824749929401e-05, "loss": 2.2917, "step": 5408000 }, { "epoch": 26.8, "learning_rate": 3.6607008912867925e-05, "loss": 2.2824, "step": 5408500 }, { "epoch": 26.8, "learning_rate": 3.660577032644184e-05, "loss": 2.2577, "step": 5409000 }, { "epoch": 26.8, "learning_rate": 3.660453174001576e-05, "loss": 2.2902, "step": 5409500 }, { "epoch": 26.8, "learning_rate": 3.6603293153589675e-05, "loss": 2.2665, "step": 5410000 }, { "epoch": 26.81, "learning_rate": 3.660205456716359e-05, "loss": 2.2558, "step": 5410500 }, { "epoch": 26.81, "learning_rate": 3.660081598073751e-05, "loss": 2.291, "step": 5411000 }, { "epoch": 26.81, "learning_rate": 3.6599577394311426e-05, "loss": 2.2716, "step": 5411500 }, { "epoch": 26.81, "learning_rate": 3.659833880788534e-05, "loss": 2.286, "step": 5412000 }, { "epoch": 26.82, "learning_rate": 3.6597102698632105e-05, "loss": 2.2877, "step": 5412500 }, { "epoch": 26.82, "learning_rate": 3.659586411220602e-05, "loss": 2.272, "step": 5413000 }, { "epoch": 26.82, "learning_rate": 3.659462552577994e-05, "loss": 2.2849, "step": 5413500 }, { "epoch": 26.82, "learning_rate": 3.6593386939353856e-05, "loss": 2.2769, "step": 5414000 }, { "epoch": 26.83, "learning_rate": 3.659214835292777e-05, "loss": 2.2784, "step": 5414500 }, { "epoch": 26.83, "learning_rate": 3.659090976650169e-05, "loss": 2.2649, "step": 5415000 }, { "epoch": 26.83, "learning_rate": 3.658967365724846e-05, "loss": 2.2982, "step": 5415500 }, { "epoch": 26.83, "learning_rate": 3.658843754799523e-05, "loss": 2.2614, "step": 5416000 }, { "epoch": 26.84, "learning_rate": 3.6587198961569144e-05, "loss": 2.3042, "step": 5416500 }, { "epoch": 26.84, "learning_rate": 3.658596037514306e-05, "loss": 2.3011, "step": 5417000 }, { "epoch": 26.84, "learning_rate": 3.658472178871697e-05, "loss": 2.2909, "step": 5417500 }, { "epoch": 26.84, "learning_rate": 3.658348320229089e-05, "loss": 2.2738, "step": 5418000 }, { "epoch": 26.85, "learning_rate": 3.6582244615864805e-05, "loss": 2.2702, "step": 5418500 }, { "epoch": 26.85, "learning_rate": 3.658100602943872e-05, "loss": 2.2858, "step": 5419000 }, { "epoch": 26.85, "learning_rate": 3.657976744301264e-05, "loss": 2.2621, "step": 5419500 }, { "epoch": 26.85, "learning_rate": 3.6578528856586556e-05, "loss": 2.2872, "step": 5420000 }, { "epoch": 26.86, "learning_rate": 3.657729027016047e-05, "loss": 2.2968, "step": 5420500 }, { "epoch": 26.86, "learning_rate": 3.657605416090724e-05, "loss": 2.2733, "step": 5421000 }, { "epoch": 26.86, "learning_rate": 3.657481557448116e-05, "loss": 2.2552, "step": 5421500 }, { "epoch": 26.86, "learning_rate": 3.6573576988055076e-05, "loss": 2.2804, "step": 5422000 }, { "epoch": 26.86, "learning_rate": 3.657233840162899e-05, "loss": 2.2929, "step": 5422500 }, { "epoch": 26.87, "learning_rate": 3.657110229237576e-05, "loss": 2.2556, "step": 5423000 }, { "epoch": 26.87, "learning_rate": 3.656986370594968e-05, "loss": 2.2761, "step": 5423500 }, { "epoch": 26.87, "learning_rate": 3.656862511952359e-05, "loss": 2.289, "step": 5424000 }, { "epoch": 26.87, "learning_rate": 3.6567386533097505e-05, "loss": 2.2732, "step": 5424500 }, { "epoch": 26.88, "learning_rate": 3.6566150423844274e-05, "loss": 2.3058, "step": 5425000 }, { "epoch": 26.88, "learning_rate": 3.656491183741819e-05, "loss": 2.2575, "step": 5425500 }, { "epoch": 26.88, "learning_rate": 3.656367325099211e-05, "loss": 2.2856, "step": 5426000 }, { "epoch": 26.88, "learning_rate": 3.6562434664566025e-05, "loss": 2.2945, "step": 5426500 }, { "epoch": 26.89, "learning_rate": 3.656119607813994e-05, "loss": 2.2855, "step": 5427000 }, { "epoch": 26.89, "learning_rate": 3.655995749171386e-05, "loss": 2.2709, "step": 5427500 }, { "epoch": 26.89, "learning_rate": 3.655872138246063e-05, "loss": 2.2528, "step": 5428000 }, { "epoch": 26.89, "learning_rate": 3.6557485273207397e-05, "loss": 2.2848, "step": 5428500 }, { "epoch": 26.9, "learning_rate": 3.6556246686781313e-05, "loss": 2.2729, "step": 5429000 }, { "epoch": 26.9, "learning_rate": 3.6555008100355224e-05, "loss": 2.3043, "step": 5429500 }, { "epoch": 26.9, "learning_rate": 3.655376951392914e-05, "loss": 2.2512, "step": 5430000 }, { "epoch": 26.9, "learning_rate": 3.655253092750306e-05, "loss": 2.2953, "step": 5430500 }, { "epoch": 26.91, "learning_rate": 3.6551292341076974e-05, "loss": 2.2616, "step": 5431000 }, { "epoch": 26.91, "learning_rate": 3.655005375465089e-05, "loss": 2.2824, "step": 5431500 }, { "epoch": 26.91, "learning_rate": 3.654881516822481e-05, "loss": 2.2861, "step": 5432000 }, { "epoch": 26.91, "learning_rate": 3.6547576581798725e-05, "loss": 2.2852, "step": 5432500 }, { "epoch": 26.92, "learning_rate": 3.654633799537264e-05, "loss": 2.2819, "step": 5433000 }, { "epoch": 26.92, "learning_rate": 3.654509940894656e-05, "loss": 2.2683, "step": 5433500 }, { "epoch": 26.92, "learning_rate": 3.6543860822520476e-05, "loss": 2.2694, "step": 5434000 }, { "epoch": 26.92, "learning_rate": 3.654262223609439e-05, "loss": 2.2698, "step": 5434500 }, { "epoch": 26.93, "learning_rate": 3.654138364966831e-05, "loss": 2.2743, "step": 5435000 }, { "epoch": 26.93, "learning_rate": 3.654014506324223e-05, "loss": 2.2672, "step": 5435500 }, { "epoch": 26.93, "learning_rate": 3.6538906476816144e-05, "loss": 2.2739, "step": 5436000 }, { "epoch": 26.93, "learning_rate": 3.653767036756291e-05, "loss": 2.2586, "step": 5436500 }, { "epoch": 26.94, "learning_rate": 3.653643178113683e-05, "loss": 2.29, "step": 5437000 }, { "epoch": 26.94, "learning_rate": 3.653519319471074e-05, "loss": 2.2828, "step": 5437500 }, { "epoch": 26.94, "learning_rate": 3.6533954608284656e-05, "loss": 2.2699, "step": 5438000 }, { "epoch": 26.94, "learning_rate": 3.6532716021858573e-05, "loss": 2.2833, "step": 5438500 }, { "epoch": 26.95, "learning_rate": 3.653147743543249e-05, "loss": 2.2968, "step": 5439000 }, { "epoch": 26.95, "learning_rate": 3.653023884900641e-05, "loss": 2.2807, "step": 5439500 }, { "epoch": 26.95, "learning_rate": 3.6529000262580324e-05, "loss": 2.2711, "step": 5440000 }, { "epoch": 26.95, "learning_rate": 3.652776167615424e-05, "loss": 2.2559, "step": 5440500 }, { "epoch": 26.96, "learning_rate": 3.652652308972815e-05, "loss": 2.2752, "step": 5441000 }, { "epoch": 26.96, "learning_rate": 3.652528450330207e-05, "loss": 2.2879, "step": 5441500 }, { "epoch": 26.96, "learning_rate": 3.6524050871221696e-05, "loss": 2.2852, "step": 5442000 }, { "epoch": 26.96, "learning_rate": 3.652281228479561e-05, "loss": 2.2834, "step": 5442500 }, { "epoch": 26.97, "learning_rate": 3.652157369836953e-05, "loss": 2.2683, "step": 5443000 }, { "epoch": 26.97, "learning_rate": 3.6520335111943446e-05, "loss": 2.2996, "step": 5443500 }, { "epoch": 26.97, "learning_rate": 3.651909652551736e-05, "loss": 2.2578, "step": 5444000 }, { "epoch": 26.97, "learning_rate": 3.6517857939091274e-05, "loss": 2.2798, "step": 5444500 }, { "epoch": 26.98, "learning_rate": 3.651661935266519e-05, "loss": 2.287, "step": 5445000 }, { "epoch": 26.98, "learning_rate": 3.651538076623911e-05, "loss": 2.2635, "step": 5445500 }, { "epoch": 26.98, "learning_rate": 3.6514144656985876e-05, "loss": 2.2767, "step": 5446000 }, { "epoch": 26.98, "learning_rate": 3.651290607055979e-05, "loss": 2.2714, "step": 5446500 }, { "epoch": 26.99, "learning_rate": 3.651166748413371e-05, "loss": 2.2901, "step": 5447000 }, { "epoch": 26.99, "learning_rate": 3.651042889770763e-05, "loss": 2.2857, "step": 5447500 }, { "epoch": 26.99, "learning_rate": 3.6509190311281544e-05, "loss": 2.2839, "step": 5448000 }, { "epoch": 26.99, "learning_rate": 3.650795172485546e-05, "loss": 2.2809, "step": 5448500 }, { "epoch": 27.0, "learning_rate": 3.650671313842938e-05, "loss": 2.2747, "step": 5449000 }, { "epoch": 27.0, "learning_rate": 3.6505474552003295e-05, "loss": 2.291, "step": 5449500 }, { "epoch": 27.0, "eval_accuracy": 0.6562619972139041, "eval_accuracy_mlm": 0.6113849397655421, "eval_accuracy_nsp": 0.8681356610278516, "eval_loss": 2.3292908668518066, "eval_runtime": 146.2467, "eval_samples_per_second": 1743.349, "eval_steps_per_second": 72.644, "step": 5449761 }, { "epoch": 27.0, "learning_rate": 3.650423596557721e-05, "loss": 2.2555, "step": 5450000 }, { "epoch": 27.0, "learning_rate": 3.650299985632398e-05, "loss": 2.257, "step": 5450500 }, { "epoch": 27.01, "learning_rate": 3.6501766224243594e-05, "loss": 2.2329, "step": 5451000 }, { "epoch": 27.01, "learning_rate": 3.650052763781751e-05, "loss": 2.2794, "step": 5451500 }, { "epoch": 27.01, "learning_rate": 3.649928905139143e-05, "loss": 2.2362, "step": 5452000 }, { "epoch": 27.01, "learning_rate": 3.6498050464965345e-05, "loss": 2.2553, "step": 5452500 }, { "epoch": 27.02, "learning_rate": 3.649681187853926e-05, "loss": 2.2592, "step": 5453000 }, { "epoch": 27.02, "learning_rate": 3.649557329211318e-05, "loss": 2.2158, "step": 5453500 }, { "epoch": 27.02, "learning_rate": 3.6494334705687096e-05, "loss": 2.2312, "step": 5454000 }, { "epoch": 27.02, "learning_rate": 3.649309611926101e-05, "loss": 2.2288, "step": 5454500 }, { "epoch": 27.03, "learning_rate": 3.649185753283493e-05, "loss": 2.2151, "step": 5455000 }, { "epoch": 27.03, "learning_rate": 3.649061894640885e-05, "loss": 2.2561, "step": 5455500 }, { "epoch": 27.03, "learning_rate": 3.6489380359982764e-05, "loss": 2.2237, "step": 5456000 }, { "epoch": 27.03, "learning_rate": 3.648814177355668e-05, "loss": 2.2344, "step": 5456500 }, { "epoch": 27.04, "learning_rate": 3.64869031871306e-05, "loss": 2.2628, "step": 5457000 }, { "epoch": 27.04, "learning_rate": 3.648566707787736e-05, "loss": 2.2354, "step": 5457500 }, { "epoch": 27.04, "learning_rate": 3.6484428491451276e-05, "loss": 2.2272, "step": 5458000 }, { "epoch": 27.04, "learning_rate": 3.648318990502519e-05, "loss": 2.2534, "step": 5458500 }, { "epoch": 27.05, "learning_rate": 3.648195131859911e-05, "loss": 2.2329, "step": 5459000 }, { "epoch": 27.05, "learning_rate": 3.648071273217303e-05, "loss": 2.2415, "step": 5459500 }, { "epoch": 27.05, "learning_rate": 3.6479474145746944e-05, "loss": 2.2537, "step": 5460000 }, { "epoch": 27.05, "learning_rate": 3.647823803649371e-05, "loss": 2.2636, "step": 5460500 }, { "epoch": 27.06, "learning_rate": 3.647699945006763e-05, "loss": 2.2439, "step": 5461000 }, { "epoch": 27.06, "learning_rate": 3.647576086364155e-05, "loss": 2.264, "step": 5461500 }, { "epoch": 27.06, "learning_rate": 3.6474522277215464e-05, "loss": 2.2518, "step": 5462000 }, { "epoch": 27.06, "learning_rate": 3.6473286167962226e-05, "loss": 2.2465, "step": 5462500 }, { "epoch": 27.07, "learning_rate": 3.647204758153614e-05, "loss": 2.246, "step": 5463000 }, { "epoch": 27.07, "learning_rate": 3.647080899511006e-05, "loss": 2.2395, "step": 5463500 }, { "epoch": 27.07, "learning_rate": 3.6469570408683977e-05, "loss": 2.2537, "step": 5464000 }, { "epoch": 27.07, "learning_rate": 3.6468331822257893e-05, "loss": 2.2474, "step": 5464500 }, { "epoch": 27.08, "learning_rate": 3.646709323583181e-05, "loss": 2.2647, "step": 5465000 }, { "epoch": 27.08, "learning_rate": 3.646585712657858e-05, "loss": 2.2496, "step": 5465500 }, { "epoch": 27.08, "learning_rate": 3.646462101732535e-05, "loss": 2.2495, "step": 5466000 }, { "epoch": 27.08, "learning_rate": 3.6463382430899265e-05, "loss": 2.2468, "step": 5466500 }, { "epoch": 27.09, "learning_rate": 3.646214384447318e-05, "loss": 2.2772, "step": 5467000 }, { "epoch": 27.09, "learning_rate": 3.64609052580471e-05, "loss": 2.2372, "step": 5467500 }, { "epoch": 27.09, "learning_rate": 3.645966914879387e-05, "loss": 2.2775, "step": 5468000 }, { "epoch": 27.09, "learning_rate": 3.6458430562367785e-05, "loss": 2.2419, "step": 5468500 }, { "epoch": 27.1, "learning_rate": 3.64571919759417e-05, "loss": 2.2398, "step": 5469000 }, { "epoch": 27.1, "learning_rate": 3.645595586668847e-05, "loss": 2.2505, "step": 5469500 }, { "epoch": 27.1, "learning_rate": 3.645471728026239e-05, "loss": 2.259, "step": 5470000 }, { "epoch": 27.1, "learning_rate": 3.6453478693836304e-05, "loss": 2.2549, "step": 5470500 }, { "epoch": 27.11, "learning_rate": 3.645224010741022e-05, "loss": 2.256, "step": 5471000 }, { "epoch": 27.11, "learning_rate": 3.645100152098414e-05, "loss": 2.2535, "step": 5471500 }, { "epoch": 27.11, "learning_rate": 3.6449762934558055e-05, "loss": 2.2698, "step": 5472000 }, { "epoch": 27.11, "learning_rate": 3.6448524348131965e-05, "loss": 2.2286, "step": 5472500 }, { "epoch": 27.12, "learning_rate": 3.644728576170588e-05, "loss": 2.2582, "step": 5473000 }, { "epoch": 27.12, "learning_rate": 3.64460471752798e-05, "loss": 2.2486, "step": 5473500 }, { "epoch": 27.12, "learning_rate": 3.644481106602657e-05, "loss": 2.261, "step": 5474000 }, { "epoch": 27.12, "learning_rate": 3.6443572479600485e-05, "loss": 2.2363, "step": 5474500 }, { "epoch": 27.13, "learning_rate": 3.64423338931744e-05, "loss": 2.2753, "step": 5475000 }, { "epoch": 27.13, "learning_rate": 3.644109530674832e-05, "loss": 2.2233, "step": 5475500 }, { "epoch": 27.13, "learning_rate": 3.643985672032223e-05, "loss": 2.2671, "step": 5476000 }, { "epoch": 27.13, "learning_rate": 3.6438618133896146e-05, "loss": 2.2589, "step": 5476500 }, { "epoch": 27.13, "learning_rate": 3.643737954747006e-05, "loss": 2.2706, "step": 5477000 }, { "epoch": 27.14, "learning_rate": 3.643614096104398e-05, "loss": 2.247, "step": 5477500 }, { "epoch": 27.14, "learning_rate": 3.6434902374617896e-05, "loss": 2.2405, "step": 5478000 }, { "epoch": 27.14, "learning_rate": 3.643366378819181e-05, "loss": 2.2682, "step": 5478500 }, { "epoch": 27.14, "learning_rate": 3.643242520176573e-05, "loss": 2.2618, "step": 5479000 }, { "epoch": 27.15, "learning_rate": 3.643118661533965e-05, "loss": 2.2553, "step": 5479500 }, { "epoch": 27.15, "learning_rate": 3.6429948028913564e-05, "loss": 2.2585, "step": 5480000 }, { "epoch": 27.15, "learning_rate": 3.642870944248748e-05, "loss": 2.2591, "step": 5480500 }, { "epoch": 27.15, "learning_rate": 3.642747333323425e-05, "loss": 2.2141, "step": 5481000 }, { "epoch": 27.16, "learning_rate": 3.642623474680816e-05, "loss": 2.2618, "step": 5481500 }, { "epoch": 27.16, "learning_rate": 3.642499616038208e-05, "loss": 2.2731, "step": 5482000 }, { "epoch": 27.16, "learning_rate": 3.6423757573955994e-05, "loss": 2.2593, "step": 5482500 }, { "epoch": 27.16, "learning_rate": 3.642252146470276e-05, "loss": 2.2595, "step": 5483000 }, { "epoch": 27.17, "learning_rate": 3.642128287827668e-05, "loss": 2.2376, "step": 5483500 }, { "epoch": 27.17, "learning_rate": 3.6420044291850597e-05, "loss": 2.2437, "step": 5484000 }, { "epoch": 27.17, "learning_rate": 3.6418805705424513e-05, "loss": 2.2427, "step": 5484500 }, { "epoch": 27.17, "learning_rate": 3.641756711899843e-05, "loss": 2.2614, "step": 5485000 }, { "epoch": 27.18, "learning_rate": 3.641633348691805e-05, "loss": 2.2662, "step": 5485500 }, { "epoch": 27.18, "learning_rate": 3.641509490049197e-05, "loss": 2.232, "step": 5486000 }, { "epoch": 27.18, "learning_rate": 3.6413856314065885e-05, "loss": 2.2682, "step": 5486500 }, { "epoch": 27.18, "learning_rate": 3.64126177276398e-05, "loss": 2.2515, "step": 5487000 }, { "epoch": 27.19, "learning_rate": 3.641138161838657e-05, "loss": 2.2346, "step": 5487500 }, { "epoch": 27.19, "learning_rate": 3.641014303196049e-05, "loss": 2.2527, "step": 5488000 }, { "epoch": 27.19, "learning_rate": 3.6408904445534405e-05, "loss": 2.2485, "step": 5488500 }, { "epoch": 27.19, "learning_rate": 3.640766585910832e-05, "loss": 2.2585, "step": 5489000 }, { "epoch": 27.2, "learning_rate": 3.640642727268224e-05, "loss": 2.2364, "step": 5489500 }, { "epoch": 27.2, "learning_rate": 3.6405188686256155e-05, "loss": 2.257, "step": 5490000 }, { "epoch": 27.2, "learning_rate": 3.640395009983007e-05, "loss": 2.2554, "step": 5490500 }, { "epoch": 27.2, "learning_rate": 3.640271151340399e-05, "loss": 2.2422, "step": 5491000 }, { "epoch": 27.21, "learning_rate": 3.6401472926977906e-05, "loss": 2.2549, "step": 5491500 }, { "epoch": 27.21, "learning_rate": 3.640023681772467e-05, "loss": 2.2699, "step": 5492000 }, { "epoch": 27.21, "learning_rate": 3.6398998231298585e-05, "loss": 2.266, "step": 5492500 }, { "epoch": 27.21, "learning_rate": 3.63977596448725e-05, "loss": 2.2627, "step": 5493000 }, { "epoch": 27.22, "learning_rate": 3.639652105844642e-05, "loss": 2.2377, "step": 5493500 }, { "epoch": 27.22, "learning_rate": 3.6395282472020336e-05, "loss": 2.2849, "step": 5494000 }, { "epoch": 27.22, "learning_rate": 3.6394046362767105e-05, "loss": 2.2537, "step": 5494500 }, { "epoch": 27.22, "learning_rate": 3.6392810253513873e-05, "loss": 2.2768, "step": 5495000 }, { "epoch": 27.23, "learning_rate": 3.639157166708779e-05, "loss": 2.2535, "step": 5495500 }, { "epoch": 27.23, "learning_rate": 3.63903330806617e-05, "loss": 2.2404, "step": 5496000 }, { "epoch": 27.23, "learning_rate": 3.638909697140847e-05, "loss": 2.2792, "step": 5496500 }, { "epoch": 27.23, "learning_rate": 3.6387858384982386e-05, "loss": 2.2611, "step": 5497000 }, { "epoch": 27.24, "learning_rate": 3.63866197985563e-05, "loss": 2.2887, "step": 5497500 }, { "epoch": 27.24, "learning_rate": 3.638538121213022e-05, "loss": 2.2696, "step": 5498000 }, { "epoch": 27.24, "learning_rate": 3.638414262570414e-05, "loss": 2.2436, "step": 5498500 }, { "epoch": 27.24, "learning_rate": 3.6382904039278054e-05, "loss": 2.2483, "step": 5499000 }, { "epoch": 27.25, "learning_rate": 3.638166545285197e-05, "loss": 2.2444, "step": 5499500 }, { "epoch": 27.25, "learning_rate": 3.638042686642589e-05, "loss": 2.2416, "step": 5500000 }, { "epoch": 27.25, "learning_rate": 3.637919075717266e-05, "loss": 2.2547, "step": 5500500 }, { "epoch": 27.25, "learning_rate": 3.6377952170746574e-05, "loss": 2.2881, "step": 5501000 }, { "epoch": 27.26, "learning_rate": 3.637671358432049e-05, "loss": 2.2535, "step": 5501500 }, { "epoch": 27.26, "learning_rate": 3.637547747506725e-05, "loss": 2.2535, "step": 5502000 }, { "epoch": 27.26, "learning_rate": 3.637423888864117e-05, "loss": 2.2756, "step": 5502500 }, { "epoch": 27.26, "learning_rate": 3.6373000302215086e-05, "loss": 2.2454, "step": 5503000 }, { "epoch": 27.27, "learning_rate": 3.6371761715789e-05, "loss": 2.2552, "step": 5503500 }, { "epoch": 27.27, "learning_rate": 3.637052312936292e-05, "loss": 2.2726, "step": 5504000 }, { "epoch": 27.27, "learning_rate": 3.636928454293684e-05, "loss": 2.2531, "step": 5504500 }, { "epoch": 27.27, "learning_rate": 3.6368045956510754e-05, "loss": 2.2683, "step": 5505000 }, { "epoch": 27.28, "learning_rate": 3.636680737008467e-05, "loss": 2.2521, "step": 5505500 }, { "epoch": 27.28, "learning_rate": 3.636556878365859e-05, "loss": 2.25, "step": 5506000 }, { "epoch": 27.28, "learning_rate": 3.6364330197232505e-05, "loss": 2.2742, "step": 5506500 }, { "epoch": 27.28, "learning_rate": 3.636309161080642e-05, "loss": 2.261, "step": 5507000 }, { "epoch": 27.29, "learning_rate": 3.636185302438034e-05, "loss": 2.2644, "step": 5507500 }, { "epoch": 27.29, "learning_rate": 3.636061691512711e-05, "loss": 2.2741, "step": 5508000 }, { "epoch": 27.29, "learning_rate": 3.6359378328701024e-05, "loss": 2.2615, "step": 5508500 }, { "epoch": 27.29, "learning_rate": 3.6358142219447787e-05, "loss": 2.235, "step": 5509000 }, { "epoch": 27.3, "learning_rate": 3.6356903633021703e-05, "loss": 2.2733, "step": 5509500 }, { "epoch": 27.3, "learning_rate": 3.635566504659562e-05, "loss": 2.254, "step": 5510000 }, { "epoch": 27.3, "learning_rate": 3.635442646016954e-05, "loss": 2.2436, "step": 5510500 }, { "epoch": 27.3, "learning_rate": 3.6353187873743454e-05, "loss": 2.2736, "step": 5511000 }, { "epoch": 27.31, "learning_rate": 3.635194928731737e-05, "loss": 2.2374, "step": 5511500 }, { "epoch": 27.31, "learning_rate": 3.635071070089129e-05, "loss": 2.2574, "step": 5512000 }, { "epoch": 27.31, "learning_rate": 3.6349472114465205e-05, "loss": 2.253, "step": 5512500 }, { "epoch": 27.31, "learning_rate": 3.634823352803912e-05, "loss": 2.2539, "step": 5513000 }, { "epoch": 27.32, "learning_rate": 3.634699494161304e-05, "loss": 2.2727, "step": 5513500 }, { "epoch": 27.32, "learning_rate": 3.6345756355186956e-05, "loss": 2.2654, "step": 5514000 }, { "epoch": 27.32, "learning_rate": 3.634451776876087e-05, "loss": 2.2546, "step": 5514500 }, { "epoch": 27.32, "learning_rate": 3.634327918233479e-05, "loss": 2.2389, "step": 5515000 }, { "epoch": 27.33, "learning_rate": 3.6342040595908707e-05, "loss": 2.2643, "step": 5515500 }, { "epoch": 27.33, "learning_rate": 3.6340802009482624e-05, "loss": 2.2547, "step": 5516000 }, { "epoch": 27.33, "learning_rate": 3.633956342305654e-05, "loss": 2.2357, "step": 5516500 }, { "epoch": 27.33, "learning_rate": 3.633832483663045e-05, "loss": 2.2576, "step": 5517000 }, { "epoch": 27.34, "learning_rate": 3.633708625020437e-05, "loss": 2.2713, "step": 5517500 }, { "epoch": 27.34, "learning_rate": 3.633585261812399e-05, "loss": 2.2306, "step": 5518000 }, { "epoch": 27.34, "learning_rate": 3.6334614031697905e-05, "loss": 2.2457, "step": 5518500 }, { "epoch": 27.34, "learning_rate": 3.633337544527182e-05, "loss": 2.2603, "step": 5519000 }, { "epoch": 27.35, "learning_rate": 3.633213685884574e-05, "loss": 2.2669, "step": 5519500 }, { "epoch": 27.35, "learning_rate": 3.6330898272419656e-05, "loss": 2.2569, "step": 5520000 }, { "epoch": 27.35, "learning_rate": 3.632965968599357e-05, "loss": 2.2568, "step": 5520500 }, { "epoch": 27.35, "learning_rate": 3.632842109956749e-05, "loss": 2.2558, "step": 5521000 }, { "epoch": 27.36, "learning_rate": 3.632718251314141e-05, "loss": 2.2727, "step": 5521500 }, { "epoch": 27.36, "learning_rate": 3.6325943926715324e-05, "loss": 2.2767, "step": 5522000 }, { "epoch": 27.36, "learning_rate": 3.632470534028924e-05, "loss": 2.2559, "step": 5522500 }, { "epoch": 27.36, "learning_rate": 3.632346675386315e-05, "loss": 2.2577, "step": 5523000 }, { "epoch": 27.37, "learning_rate": 3.632223064460992e-05, "loss": 2.2861, "step": 5523500 }, { "epoch": 27.37, "learning_rate": 3.6320992058183836e-05, "loss": 2.256, "step": 5524000 }, { "epoch": 27.37, "learning_rate": 3.6319753471757753e-05, "loss": 2.2271, "step": 5524500 }, { "epoch": 27.37, "learning_rate": 3.631851488533167e-05, "loss": 2.261, "step": 5525000 }, { "epoch": 27.38, "learning_rate": 3.631727629890559e-05, "loss": 2.245, "step": 5525500 }, { "epoch": 27.38, "learning_rate": 3.63160377124795e-05, "loss": 2.2664, "step": 5526000 }, { "epoch": 27.38, "learning_rate": 3.6314799126053414e-05, "loss": 2.2493, "step": 5526500 }, { "epoch": 27.38, "learning_rate": 3.631356301680019e-05, "loss": 2.2382, "step": 5527000 }, { "epoch": 27.39, "learning_rate": 3.631232443037411e-05, "loss": 2.252, "step": 5527500 }, { "epoch": 27.39, "learning_rate": 3.6311085843948024e-05, "loss": 2.2755, "step": 5528000 }, { "epoch": 27.39, "learning_rate": 3.630984725752194e-05, "loss": 2.2891, "step": 5528500 }, { "epoch": 27.39, "learning_rate": 3.630861114826871e-05, "loss": 2.2629, "step": 5529000 }, { "epoch": 27.4, "learning_rate": 3.630737256184262e-05, "loss": 2.2718, "step": 5529500 }, { "epoch": 27.4, "learning_rate": 3.630613892976224e-05, "loss": 2.2516, "step": 5530000 }, { "epoch": 27.4, "learning_rate": 3.630490034333616e-05, "loss": 2.2641, "step": 5530500 }, { "epoch": 27.4, "learning_rate": 3.6303661756910074e-05, "loss": 2.2786, "step": 5531000 }, { "epoch": 27.4, "learning_rate": 3.630242317048399e-05, "loss": 2.2593, "step": 5531500 }, { "epoch": 27.41, "learning_rate": 3.630118458405791e-05, "loss": 2.2493, "step": 5532000 }, { "epoch": 27.41, "learning_rate": 3.6299945997631825e-05, "loss": 2.2607, "step": 5532500 }, { "epoch": 27.41, "learning_rate": 3.629870741120574e-05, "loss": 2.2367, "step": 5533000 }, { "epoch": 27.41, "learning_rate": 3.629746882477966e-05, "loss": 2.2638, "step": 5533500 }, { "epoch": 27.42, "learning_rate": 3.6296230238353576e-05, "loss": 2.2509, "step": 5534000 }, { "epoch": 27.42, "learning_rate": 3.629499165192749e-05, "loss": 2.2706, "step": 5534500 }, { "epoch": 27.42, "learning_rate": 3.6293755542674255e-05, "loss": 2.2787, "step": 5535000 }, { "epoch": 27.42, "learning_rate": 3.629251695624817e-05, "loss": 2.254, "step": 5535500 }, { "epoch": 27.43, "learning_rate": 3.629127836982209e-05, "loss": 2.2543, "step": 5536000 }, { "epoch": 27.43, "learning_rate": 3.629004226056886e-05, "loss": 2.2467, "step": 5536500 }, { "epoch": 27.43, "learning_rate": 3.6288803674142774e-05, "loss": 2.2442, "step": 5537000 }, { "epoch": 27.43, "learning_rate": 3.628756508771669e-05, "loss": 2.2459, "step": 5537500 }, { "epoch": 27.44, "learning_rate": 3.628632897846346e-05, "loss": 2.285, "step": 5538000 }, { "epoch": 27.44, "learning_rate": 3.628509039203738e-05, "loss": 2.2431, "step": 5538500 }, { "epoch": 27.44, "learning_rate": 3.6283851805611294e-05, "loss": 2.2626, "step": 5539000 }, { "epoch": 27.44, "learning_rate": 3.628261321918521e-05, "loss": 2.2379, "step": 5539500 }, { "epoch": 27.45, "learning_rate": 3.628137710993198e-05, "loss": 2.2729, "step": 5540000 }, { "epoch": 27.45, "learning_rate": 3.6280138523505897e-05, "loss": 2.2661, "step": 5540500 }, { "epoch": 27.45, "learning_rate": 3.6278899937079814e-05, "loss": 2.2461, "step": 5541000 }, { "epoch": 27.45, "learning_rate": 3.627766135065373e-05, "loss": 2.2775, "step": 5541500 }, { "epoch": 27.46, "learning_rate": 3.627642276422765e-05, "loss": 2.2716, "step": 5542000 }, { "epoch": 27.46, "learning_rate": 3.627518417780156e-05, "loss": 2.2505, "step": 5542500 }, { "epoch": 27.46, "learning_rate": 3.6273945591375474e-05, "loss": 2.2486, "step": 5543000 }, { "epoch": 27.46, "learning_rate": 3.627270700494939e-05, "loss": 2.2634, "step": 5543500 }, { "epoch": 27.47, "learning_rate": 3.627146841852331e-05, "loss": 2.2735, "step": 5544000 }, { "epoch": 27.47, "learning_rate": 3.6270229832097225e-05, "loss": 2.2689, "step": 5544500 }, { "epoch": 27.47, "learning_rate": 3.626899124567114e-05, "loss": 2.2402, "step": 5545000 }, { "epoch": 27.47, "learning_rate": 3.626775265924506e-05, "loss": 2.2754, "step": 5545500 }, { "epoch": 27.48, "learning_rate": 3.6266514072818976e-05, "loss": 2.2596, "step": 5546000 }, { "epoch": 27.48, "learning_rate": 3.626527548639289e-05, "loss": 2.2925, "step": 5546500 }, { "epoch": 27.48, "learning_rate": 3.626403689996681e-05, "loss": 2.2827, "step": 5547000 }, { "epoch": 27.48, "learning_rate": 3.626279831354073e-05, "loss": 2.2446, "step": 5547500 }, { "epoch": 27.49, "learning_rate": 3.6261559727114644e-05, "loss": 2.2501, "step": 5548000 }, { "epoch": 27.49, "learning_rate": 3.626032114068856e-05, "loss": 2.2617, "step": 5548500 }, { "epoch": 27.49, "learning_rate": 3.625908255426248e-05, "loss": 2.2591, "step": 5549000 }, { "epoch": 27.49, "learning_rate": 3.6257843967836395e-05, "loss": 2.2573, "step": 5549500 }, { "epoch": 27.5, "learning_rate": 3.6256605381410305e-05, "loss": 2.2601, "step": 5550000 }, { "epoch": 27.5, "learning_rate": 3.625536679498422e-05, "loss": 2.2669, "step": 5550500 }, { "epoch": 27.5, "learning_rate": 3.625412820855814e-05, "loss": 2.2688, "step": 5551000 }, { "epoch": 27.5, "learning_rate": 3.625289209930491e-05, "loss": 2.256, "step": 5551500 }, { "epoch": 27.51, "learning_rate": 3.6251653512878824e-05, "loss": 2.2706, "step": 5552000 }, { "epoch": 27.51, "learning_rate": 3.625041492645274e-05, "loss": 2.2595, "step": 5552500 }, { "epoch": 27.51, "learning_rate": 3.624917634002666e-05, "loss": 2.271, "step": 5553000 }, { "epoch": 27.51, "learning_rate": 3.6247937753600575e-05, "loss": 2.2814, "step": 5553500 }, { "epoch": 27.52, "learning_rate": 3.624669916717449e-05, "loss": 2.277, "step": 5554000 }, { "epoch": 27.52, "learning_rate": 3.62454605807484e-05, "loss": 2.2732, "step": 5554500 }, { "epoch": 27.52, "learning_rate": 3.624422199432232e-05, "loss": 2.2527, "step": 5555000 }, { "epoch": 27.52, "learning_rate": 3.624298836224194e-05, "loss": 2.2457, "step": 5555500 }, { "epoch": 27.53, "learning_rate": 3.624174977581586e-05, "loss": 2.2607, "step": 5556000 }, { "epoch": 27.53, "learning_rate": 3.6240511189389774e-05, "loss": 2.2696, "step": 5556500 }, { "epoch": 27.53, "learning_rate": 3.623927260296369e-05, "loss": 2.2735, "step": 5557000 }, { "epoch": 27.53, "learning_rate": 3.623803401653761e-05, "loss": 2.2682, "step": 5557500 }, { "epoch": 27.54, "learning_rate": 3.6236795430111524e-05, "loss": 2.2556, "step": 5558000 }, { "epoch": 27.54, "learning_rate": 3.623555684368544e-05, "loss": 2.263, "step": 5558500 }, { "epoch": 27.54, "learning_rate": 3.623432073443221e-05, "loss": 2.2585, "step": 5559000 }, { "epoch": 27.54, "learning_rate": 3.623308214800613e-05, "loss": 2.2543, "step": 5559500 }, { "epoch": 27.55, "learning_rate": 3.6231843561580044e-05, "loss": 2.2579, "step": 5560000 }, { "epoch": 27.55, "learning_rate": 3.623060497515396e-05, "loss": 2.2519, "step": 5560500 }, { "epoch": 27.55, "learning_rate": 3.622936638872788e-05, "loss": 2.2483, "step": 5561000 }, { "epoch": 27.55, "learning_rate": 3.6228127802301795e-05, "loss": 2.2644, "step": 5561500 }, { "epoch": 27.56, "learning_rate": 3.622688921587571e-05, "loss": 2.2766, "step": 5562000 }, { "epoch": 27.56, "learning_rate": 3.622565062944963e-05, "loss": 2.2594, "step": 5562500 }, { "epoch": 27.56, "learning_rate": 3.6224412043023546e-05, "loss": 2.2581, "step": 5563000 }, { "epoch": 27.56, "learning_rate": 3.6223173456597456e-05, "loss": 2.2555, "step": 5563500 }, { "epoch": 27.57, "learning_rate": 3.6221937347344225e-05, "loss": 2.2705, "step": 5564000 }, { "epoch": 27.57, "learning_rate": 3.622069876091814e-05, "loss": 2.2884, "step": 5564500 }, { "epoch": 27.57, "learning_rate": 3.621946017449206e-05, "loss": 2.2761, "step": 5565000 }, { "epoch": 27.57, "learning_rate": 3.6218221588065975e-05, "loss": 2.2661, "step": 5565500 }, { "epoch": 27.58, "learning_rate": 3.621698300163989e-05, "loss": 2.2659, "step": 5566000 }, { "epoch": 27.58, "learning_rate": 3.621574441521381e-05, "loss": 2.2419, "step": 5566500 }, { "epoch": 27.58, "learning_rate": 3.621450582878772e-05, "loss": 2.2708, "step": 5567000 }, { "epoch": 27.58, "learning_rate": 3.6213269719534495e-05, "loss": 2.2641, "step": 5567500 }, { "epoch": 27.59, "learning_rate": 3.621203113310841e-05, "loss": 2.2466, "step": 5568000 }, { "epoch": 27.59, "learning_rate": 3.621079254668233e-05, "loss": 2.2507, "step": 5568500 }, { "epoch": 27.59, "learning_rate": 3.6209553960256246e-05, "loss": 2.2843, "step": 5569000 }, { "epoch": 27.59, "learning_rate": 3.620831785100301e-05, "loss": 2.2604, "step": 5569500 }, { "epoch": 27.6, "learning_rate": 3.6207079264576925e-05, "loss": 2.2593, "step": 5570000 }, { "epoch": 27.6, "learning_rate": 3.620584067815084e-05, "loss": 2.2368, "step": 5570500 }, { "epoch": 27.6, "learning_rate": 3.620460209172476e-05, "loss": 2.2762, "step": 5571000 }, { "epoch": 27.6, "learning_rate": 3.6203363505298675e-05, "loss": 2.2359, "step": 5571500 }, { "epoch": 27.61, "learning_rate": 3.6202127396045444e-05, "loss": 2.2649, "step": 5572000 }, { "epoch": 27.61, "learning_rate": 3.6200893763965065e-05, "loss": 2.2789, "step": 5572500 }, { "epoch": 27.61, "learning_rate": 3.619965517753898e-05, "loss": 2.2694, "step": 5573000 }, { "epoch": 27.61, "learning_rate": 3.61984165911129e-05, "loss": 2.3009, "step": 5573500 }, { "epoch": 27.62, "learning_rate": 3.619717800468681e-05, "loss": 2.2679, "step": 5574000 }, { "epoch": 27.62, "learning_rate": 3.6195939418260726e-05, "loss": 2.2534, "step": 5574500 }, { "epoch": 27.62, "learning_rate": 3.619470083183464e-05, "loss": 2.2697, "step": 5575000 }, { "epoch": 27.62, "learning_rate": 3.619346224540856e-05, "loss": 2.2451, "step": 5575500 }, { "epoch": 27.63, "learning_rate": 3.619222365898248e-05, "loss": 2.2733, "step": 5576000 }, { "epoch": 27.63, "learning_rate": 3.6190985072556394e-05, "loss": 2.2543, "step": 5576500 }, { "epoch": 27.63, "learning_rate": 3.618974896330316e-05, "loss": 2.2541, "step": 5577000 }, { "epoch": 27.63, "learning_rate": 3.618851037687708e-05, "loss": 2.2615, "step": 5577500 }, { "epoch": 27.64, "learning_rate": 3.6187271790450996e-05, "loss": 2.2746, "step": 5578000 }, { "epoch": 27.64, "learning_rate": 3.6186035681197765e-05, "loss": 2.2567, "step": 5578500 }, { "epoch": 27.64, "learning_rate": 3.6184799571944534e-05, "loss": 2.2616, "step": 5579000 }, { "epoch": 27.64, "learning_rate": 3.618356098551845e-05, "loss": 2.2827, "step": 5579500 }, { "epoch": 27.65, "learning_rate": 3.618232239909237e-05, "loss": 2.2811, "step": 5580000 }, { "epoch": 27.65, "learning_rate": 3.6181083812666285e-05, "loss": 2.2912, "step": 5580500 }, { "epoch": 27.65, "learning_rate": 3.61798452262402e-05, "loss": 2.31, "step": 5581000 }, { "epoch": 27.65, "learning_rate": 3.617860663981412e-05, "loss": 2.2568, "step": 5581500 }, { "epoch": 27.66, "learning_rate": 3.6177368053388035e-05, "loss": 2.273, "step": 5582000 }, { "epoch": 27.66, "learning_rate": 3.617612946696195e-05, "loss": 2.2506, "step": 5582500 }, { "epoch": 27.66, "learning_rate": 3.617489088053586e-05, "loss": 2.2841, "step": 5583000 }, { "epoch": 27.66, "learning_rate": 3.617365477128263e-05, "loss": 2.2624, "step": 5583500 }, { "epoch": 27.67, "learning_rate": 3.617241618485655e-05, "loss": 2.2689, "step": 5584000 }, { "epoch": 27.67, "learning_rate": 3.6171177598430465e-05, "loss": 2.262, "step": 5584500 }, { "epoch": 27.67, "learning_rate": 3.616993901200438e-05, "loss": 2.2513, "step": 5585000 }, { "epoch": 27.67, "learning_rate": 3.61687004255783e-05, "loss": 2.2472, "step": 5585500 }, { "epoch": 27.67, "learning_rate": 3.6167461839152216e-05, "loss": 2.2559, "step": 5586000 }, { "epoch": 27.68, "learning_rate": 3.6166223252726126e-05, "loss": 2.2452, "step": 5586500 }, { "epoch": 27.68, "learning_rate": 3.616498466630004e-05, "loss": 2.2461, "step": 5587000 }, { "epoch": 27.68, "learning_rate": 3.616374607987396e-05, "loss": 2.2635, "step": 5587500 }, { "epoch": 27.68, "learning_rate": 3.616251244779358e-05, "loss": 2.2708, "step": 5588000 }, { "epoch": 27.69, "learning_rate": 3.61612738613675e-05, "loss": 2.2443, "step": 5588500 }, { "epoch": 27.69, "learning_rate": 3.6160035274941415e-05, "loss": 2.2791, "step": 5589000 }, { "epoch": 27.69, "learning_rate": 3.615879668851533e-05, "loss": 2.2916, "step": 5589500 }, { "epoch": 27.69, "learning_rate": 3.615755810208925e-05, "loss": 2.2805, "step": 5590000 }, { "epoch": 27.7, "learning_rate": 3.6156319515663165e-05, "loss": 2.2724, "step": 5590500 }, { "epoch": 27.7, "learning_rate": 3.615508092923708e-05, "loss": 2.2532, "step": 5591000 }, { "epoch": 27.7, "learning_rate": 3.6153842342811e-05, "loss": 2.2645, "step": 5591500 }, { "epoch": 27.7, "learning_rate": 3.6152603756384916e-05, "loss": 2.264, "step": 5592000 }, { "epoch": 27.71, "learning_rate": 3.6151365169958826e-05, "loss": 2.2729, "step": 5592500 }, { "epoch": 27.71, "learning_rate": 3.615012658353274e-05, "loss": 2.3001, "step": 5593000 }, { "epoch": 27.71, "learning_rate": 3.614888799710666e-05, "loss": 2.2556, "step": 5593500 }, { "epoch": 27.71, "learning_rate": 3.6147651887853436e-05, "loss": 2.2536, "step": 5594000 }, { "epoch": 27.72, "learning_rate": 3.614641330142735e-05, "loss": 2.2779, "step": 5594500 }, { "epoch": 27.72, "learning_rate": 3.614517471500127e-05, "loss": 2.2704, "step": 5595000 }, { "epoch": 27.72, "learning_rate": 3.614393612857518e-05, "loss": 2.2851, "step": 5595500 }, { "epoch": 27.72, "learning_rate": 3.6142697542149097e-05, "loss": 2.2879, "step": 5596000 }, { "epoch": 27.73, "learning_rate": 3.6141461432895865e-05, "loss": 2.2571, "step": 5596500 }, { "epoch": 27.73, "learning_rate": 3.614022284646978e-05, "loss": 2.2481, "step": 5597000 }, { "epoch": 27.73, "learning_rate": 3.61389842600437e-05, "loss": 2.2497, "step": 5597500 }, { "epoch": 27.73, "learning_rate": 3.6137745673617616e-05, "loss": 2.2729, "step": 5598000 }, { "epoch": 27.74, "learning_rate": 3.6136509564364385e-05, "loss": 2.2782, "step": 5598500 }, { "epoch": 27.74, "learning_rate": 3.61352709779383e-05, "loss": 2.2591, "step": 5599000 }, { "epoch": 27.74, "learning_rate": 3.613403239151222e-05, "loss": 2.2868, "step": 5599500 }, { "epoch": 27.74, "learning_rate": 3.6132793805086136e-05, "loss": 2.2517, "step": 5600000 }, { "epoch": 27.75, "learning_rate": 3.613155521866005e-05, "loss": 2.2596, "step": 5600500 }, { "epoch": 27.75, "learning_rate": 3.613031663223397e-05, "loss": 2.2664, "step": 5601000 }, { "epoch": 27.75, "learning_rate": 3.6129078045807887e-05, "loss": 2.2664, "step": 5601500 }, { "epoch": 27.75, "learning_rate": 3.61278394593818e-05, "loss": 2.2545, "step": 5602000 }, { "epoch": 27.76, "learning_rate": 3.6126600872955714e-05, "loss": 2.2648, "step": 5602500 }, { "epoch": 27.76, "learning_rate": 3.612536476370248e-05, "loss": 2.2745, "step": 5603000 }, { "epoch": 27.76, "learning_rate": 3.612412865444925e-05, "loss": 2.2538, "step": 5603500 }, { "epoch": 27.76, "learning_rate": 3.612289006802317e-05, "loss": 2.2435, "step": 5604000 }, { "epoch": 27.77, "learning_rate": 3.6121651481597085e-05, "loss": 2.2784, "step": 5604500 }, { "epoch": 27.77, "learning_rate": 3.6120412895171e-05, "loss": 2.2727, "step": 5605000 }, { "epoch": 27.77, "learning_rate": 3.611917430874492e-05, "loss": 2.2786, "step": 5605500 }, { "epoch": 27.77, "learning_rate": 3.6117935722318836e-05, "loss": 2.2708, "step": 5606000 }, { "epoch": 27.78, "learning_rate": 3.6116699613065605e-05, "loss": 2.268, "step": 5606500 }, { "epoch": 27.78, "learning_rate": 3.611546102663952e-05, "loss": 2.2608, "step": 5607000 }, { "epoch": 27.78, "learning_rate": 3.611422244021344e-05, "loss": 2.2927, "step": 5607500 }, { "epoch": 27.78, "learning_rate": 3.6112983853787356e-05, "loss": 2.2539, "step": 5608000 }, { "epoch": 27.79, "learning_rate": 3.6111745267361266e-05, "loss": 2.2372, "step": 5608500 }, { "epoch": 27.79, "learning_rate": 3.611050668093518e-05, "loss": 2.2446, "step": 5609000 }, { "epoch": 27.79, "learning_rate": 3.61092680945091e-05, "loss": 2.2775, "step": 5609500 }, { "epoch": 27.79, "learning_rate": 3.610803446242872e-05, "loss": 2.2643, "step": 5610000 }, { "epoch": 27.8, "learning_rate": 3.610679587600264e-05, "loss": 2.2885, "step": 5610500 }, { "epoch": 27.8, "learning_rate": 3.6105557289576554e-05, "loss": 2.2784, "step": 5611000 }, { "epoch": 27.8, "learning_rate": 3.610431870315047e-05, "loss": 2.2686, "step": 5611500 }, { "epoch": 27.8, "learning_rate": 3.610308011672439e-05, "loss": 2.2745, "step": 5612000 }, { "epoch": 27.81, "learning_rate": 3.6101841530298305e-05, "loss": 2.2697, "step": 5612500 }, { "epoch": 27.81, "learning_rate": 3.610060294387222e-05, "loss": 2.2637, "step": 5613000 }, { "epoch": 27.81, "learning_rate": 3.609936435744614e-05, "loss": 2.2522, "step": 5613500 }, { "epoch": 27.81, "learning_rate": 3.6098125771020056e-05, "loss": 2.2637, "step": 5614000 }, { "epoch": 27.82, "learning_rate": 3.609688718459397e-05, "loss": 2.275, "step": 5614500 }, { "epoch": 27.82, "learning_rate": 3.6095653552513586e-05, "loss": 2.2715, "step": 5615000 }, { "epoch": 27.82, "learning_rate": 3.6094414966087503e-05, "loss": 2.2733, "step": 5615500 }, { "epoch": 27.82, "learning_rate": 3.609317637966142e-05, "loss": 2.2502, "step": 5616000 }, { "epoch": 27.83, "learning_rate": 3.609193779323534e-05, "loss": 2.2829, "step": 5616500 }, { "epoch": 27.83, "learning_rate": 3.6090699206809254e-05, "loss": 2.2659, "step": 5617000 }, { "epoch": 27.83, "learning_rate": 3.608946062038317e-05, "loss": 2.2579, "step": 5617500 }, { "epoch": 27.83, "learning_rate": 3.608822203395709e-05, "loss": 2.2465, "step": 5618000 }, { "epoch": 27.84, "learning_rate": 3.6086983447531005e-05, "loss": 2.2796, "step": 5618500 }, { "epoch": 27.84, "learning_rate": 3.608574486110492e-05, "loss": 2.2645, "step": 5619000 }, { "epoch": 27.84, "learning_rate": 3.608450627467884e-05, "loss": 2.2538, "step": 5619500 }, { "epoch": 27.84, "learning_rate": 3.6083267688252756e-05, "loss": 2.2571, "step": 5620000 }, { "epoch": 27.85, "learning_rate": 3.608202910182667e-05, "loss": 2.2596, "step": 5620500 }, { "epoch": 27.85, "learning_rate": 3.608079051540059e-05, "loss": 2.2624, "step": 5621000 }, { "epoch": 27.85, "learning_rate": 3.6079551928974507e-05, "loss": 2.2717, "step": 5621500 }, { "epoch": 27.85, "learning_rate": 3.607831829689412e-05, "loss": 2.2788, "step": 5622000 }, { "epoch": 27.86, "learning_rate": 3.607707971046804e-05, "loss": 2.2695, "step": 5622500 }, { "epoch": 27.86, "learning_rate": 3.6075841124041954e-05, "loss": 2.2771, "step": 5623000 }, { "epoch": 27.86, "learning_rate": 3.607460253761587e-05, "loss": 2.2837, "step": 5623500 }, { "epoch": 27.86, "learning_rate": 3.607336395118979e-05, "loss": 2.2757, "step": 5624000 }, { "epoch": 27.87, "learning_rate": 3.6072125364763705e-05, "loss": 2.2864, "step": 5624500 }, { "epoch": 27.87, "learning_rate": 3.607088677833762e-05, "loss": 2.2869, "step": 5625000 }, { "epoch": 27.87, "learning_rate": 3.606964819191154e-05, "loss": 2.2667, "step": 5625500 }, { "epoch": 27.87, "learning_rate": 3.6068409605485456e-05, "loss": 2.2932, "step": 5626000 }, { "epoch": 27.88, "learning_rate": 3.606717349623222e-05, "loss": 2.2459, "step": 5626500 }, { "epoch": 27.88, "learning_rate": 3.6065934909806135e-05, "loss": 2.2514, "step": 5627000 }, { "epoch": 27.88, "learning_rate": 3.606469632338005e-05, "loss": 2.2673, "step": 5627500 }, { "epoch": 27.88, "learning_rate": 3.606345773695397e-05, "loss": 2.2727, "step": 5628000 }, { "epoch": 27.89, "learning_rate": 3.6062219150527886e-05, "loss": 2.2612, "step": 5628500 }, { "epoch": 27.89, "learning_rate": 3.6060983041274654e-05, "loss": 2.2404, "step": 5629000 }, { "epoch": 27.89, "learning_rate": 3.605974445484857e-05, "loss": 2.2698, "step": 5629500 }, { "epoch": 27.89, "learning_rate": 3.605850834559534e-05, "loss": 2.2836, "step": 5630000 }, { "epoch": 27.9, "learning_rate": 3.605726975916926e-05, "loss": 2.2787, "step": 5630500 }, { "epoch": 27.9, "learning_rate": 3.6056031172743174e-05, "loss": 2.2894, "step": 5631000 }, { "epoch": 27.9, "learning_rate": 3.605479506348994e-05, "loss": 2.249, "step": 5631500 }, { "epoch": 27.9, "learning_rate": 3.605355647706386e-05, "loss": 2.2716, "step": 5632000 }, { "epoch": 27.91, "learning_rate": 3.605231789063778e-05, "loss": 2.2512, "step": 5632500 }, { "epoch": 27.91, "learning_rate": 3.6051079304211694e-05, "loss": 2.2884, "step": 5633000 }, { "epoch": 27.91, "learning_rate": 3.604984071778561e-05, "loss": 2.2531, "step": 5633500 }, { "epoch": 27.91, "learning_rate": 3.604860213135952e-05, "loss": 2.2674, "step": 5634000 }, { "epoch": 27.92, "learning_rate": 3.604736354493344e-05, "loss": 2.2461, "step": 5634500 }, { "epoch": 27.92, "learning_rate": 3.604612743568021e-05, "loss": 2.2815, "step": 5635000 }, { "epoch": 27.92, "learning_rate": 3.604488884925413e-05, "loss": 2.2463, "step": 5635500 }, { "epoch": 27.92, "learning_rate": 3.604365026282804e-05, "loss": 2.2841, "step": 5636000 }, { "epoch": 27.93, "learning_rate": 3.604241167640196e-05, "loss": 2.2488, "step": 5636500 }, { "epoch": 27.93, "learning_rate": 3.6041173089975874e-05, "loss": 2.2632, "step": 5637000 }, { "epoch": 27.93, "learning_rate": 3.603993698072264e-05, "loss": 2.2737, "step": 5637500 }, { "epoch": 27.93, "learning_rate": 3.603869839429656e-05, "loss": 2.2868, "step": 5638000 }, { "epoch": 27.94, "learning_rate": 3.603745980787048e-05, "loss": 2.244, "step": 5638500 }, { "epoch": 27.94, "learning_rate": 3.6036221221444394e-05, "loss": 2.2787, "step": 5639000 }, { "epoch": 27.94, "learning_rate": 3.603498263501831e-05, "loss": 2.2593, "step": 5639500 }, { "epoch": 27.94, "learning_rate": 3.603374404859222e-05, "loss": 2.2728, "step": 5640000 }, { "epoch": 27.94, "learning_rate": 3.603250546216614e-05, "loss": 2.2731, "step": 5640500 }, { "epoch": 27.95, "learning_rate": 3.6031266875740055e-05, "loss": 2.2936, "step": 5641000 }, { "epoch": 27.95, "learning_rate": 3.603003076648683e-05, "loss": 2.2646, "step": 5641500 }, { "epoch": 27.95, "learning_rate": 3.602879218006075e-05, "loss": 2.3049, "step": 5642000 }, { "epoch": 27.95, "learning_rate": 3.6027553593634664e-05, "loss": 2.2743, "step": 5642500 }, { "epoch": 27.96, "learning_rate": 3.6026315007208574e-05, "loss": 2.2442, "step": 5643000 }, { "epoch": 27.96, "learning_rate": 3.602507642078249e-05, "loss": 2.2908, "step": 5643500 }, { "epoch": 27.96, "learning_rate": 3.602383783435641e-05, "loss": 2.2864, "step": 5644000 }, { "epoch": 27.96, "learning_rate": 3.6022599247930325e-05, "loss": 2.2823, "step": 5644500 }, { "epoch": 27.97, "learning_rate": 3.602136066150424e-05, "loss": 2.2878, "step": 5645000 }, { "epoch": 27.97, "learning_rate": 3.602012207507815e-05, "loss": 2.2742, "step": 5645500 }, { "epoch": 27.97, "learning_rate": 3.601888844299778e-05, "loss": 2.2998, "step": 5646000 }, { "epoch": 27.97, "learning_rate": 3.6017649856571697e-05, "loss": 2.2767, "step": 5646500 }, { "epoch": 27.98, "learning_rate": 3.6016411270145613e-05, "loss": 2.264, "step": 5647000 }, { "epoch": 27.98, "learning_rate": 3.601517268371953e-05, "loss": 2.2855, "step": 5647500 }, { "epoch": 27.98, "learning_rate": 3.601393409729345e-05, "loss": 2.2364, "step": 5648000 }, { "epoch": 27.98, "learning_rate": 3.6012695510867364e-05, "loss": 2.2728, "step": 5648500 }, { "epoch": 27.99, "learning_rate": 3.601145692444128e-05, "loss": 2.2712, "step": 5649000 }, { "epoch": 27.99, "learning_rate": 3.601021833801519e-05, "loss": 2.2799, "step": 5649500 }, { "epoch": 27.99, "learning_rate": 3.600897975158911e-05, "loss": 2.2532, "step": 5650000 }, { "epoch": 27.99, "learning_rate": 3.6007741165163025e-05, "loss": 2.2599, "step": 5650500 }, { "epoch": 28.0, "learning_rate": 3.600650257873694e-05, "loss": 2.2928, "step": 5651000 }, { "epoch": 28.0, "learning_rate": 3.600526399231086e-05, "loss": 2.2934, "step": 5651500 }, { "epoch": 28.0, "eval_accuracy": 0.6571310181504103, "eval_accuracy_mlm": 0.6123743861697156, "eval_accuracy_nsp": 0.8682572492047741, "eval_loss": 2.3233656883239746, "eval_runtime": 146.0438, "eval_samples_per_second": 1745.771, "eval_steps_per_second": 72.745, "step": 5651604 }, { "epoch": 28.0, "learning_rate": 3.600402540588477e-05, "loss": 2.2378, "step": 5652000 }, { "epoch": 28.0, "learning_rate": 3.6002786819458686e-05, "loss": 2.2507, "step": 5652500 }, { "epoch": 28.01, "learning_rate": 3.6001550710205455e-05, "loss": 2.2047, "step": 5653000 }, { "epoch": 28.01, "learning_rate": 3.600031212377937e-05, "loss": 2.2663, "step": 5653500 }, { "epoch": 28.01, "learning_rate": 3.599907353735329e-05, "loss": 2.2253, "step": 5654000 }, { "epoch": 28.01, "learning_rate": 3.5997834950927206e-05, "loss": 2.2129, "step": 5654500 }, { "epoch": 28.02, "learning_rate": 3.599659636450112e-05, "loss": 2.2342, "step": 5655000 }, { "epoch": 28.02, "learning_rate": 3.599535777807504e-05, "loss": 2.2216, "step": 5655500 }, { "epoch": 28.02, "learning_rate": 3.5994119191648957e-05, "loss": 2.211, "step": 5656000 }, { "epoch": 28.02, "learning_rate": 3.5992883082395725e-05, "loss": 2.2144, "step": 5656500 }, { "epoch": 28.03, "learning_rate": 3.599164449596964e-05, "loss": 2.2563, "step": 5657000 }, { "epoch": 28.03, "learning_rate": 3.599040590954356e-05, "loss": 2.2285, "step": 5657500 }, { "epoch": 28.03, "learning_rate": 3.5989167323117476e-05, "loss": 2.2173, "step": 5658000 }, { "epoch": 28.03, "learning_rate": 3.598792873669139e-05, "loss": 2.2218, "step": 5658500 }, { "epoch": 28.04, "learning_rate": 3.5986692627438155e-05, "loss": 2.2319, "step": 5659000 }, { "epoch": 28.04, "learning_rate": 3.598545404101207e-05, "loss": 2.2306, "step": 5659500 }, { "epoch": 28.04, "learning_rate": 3.598421793175885e-05, "loss": 2.2447, "step": 5660000 }, { "epoch": 28.04, "learning_rate": 3.5982979345332765e-05, "loss": 2.26, "step": 5660500 }, { "epoch": 28.05, "learning_rate": 3.598174075890668e-05, "loss": 2.2248, "step": 5661000 }, { "epoch": 28.05, "learning_rate": 3.59805021724806e-05, "loss": 2.2226, "step": 5661500 }, { "epoch": 28.05, "learning_rate": 3.597926358605451e-05, "loss": 2.2503, "step": 5662000 }, { "epoch": 28.05, "learning_rate": 3.5978024999628425e-05, "loss": 2.2375, "step": 5662500 }, { "epoch": 28.06, "learning_rate": 3.597678641320234e-05, "loss": 2.2391, "step": 5663000 }, { "epoch": 28.06, "learning_rate": 3.597554782677626e-05, "loss": 2.2428, "step": 5663500 }, { "epoch": 28.06, "learning_rate": 3.5974309240350176e-05, "loss": 2.2463, "step": 5664000 }, { "epoch": 28.06, "learning_rate": 3.597307065392409e-05, "loss": 2.2465, "step": 5664500 }, { "epoch": 28.07, "learning_rate": 3.597183206749801e-05, "loss": 2.2468, "step": 5665000 }, { "epoch": 28.07, "learning_rate": 3.597059348107192e-05, "loss": 2.2688, "step": 5665500 }, { "epoch": 28.07, "learning_rate": 3.596935737181869e-05, "loss": 2.2244, "step": 5666000 }, { "epoch": 28.07, "learning_rate": 3.5968118785392606e-05, "loss": 2.2423, "step": 5666500 }, { "epoch": 28.08, "learning_rate": 3.596688019896652e-05, "loss": 2.2341, "step": 5667000 }, { "epoch": 28.08, "learning_rate": 3.596564161254044e-05, "loss": 2.2489, "step": 5667500 }, { "epoch": 28.08, "learning_rate": 3.596440302611436e-05, "loss": 2.2332, "step": 5668000 }, { "epoch": 28.08, "learning_rate": 3.596316939403398e-05, "loss": 2.2225, "step": 5668500 }, { "epoch": 28.09, "learning_rate": 3.5961930807607894e-05, "loss": 2.2174, "step": 5669000 }, { "epoch": 28.09, "learning_rate": 3.596069222118181e-05, "loss": 2.2281, "step": 5669500 }, { "epoch": 28.09, "learning_rate": 3.595945363475573e-05, "loss": 2.2196, "step": 5670000 }, { "epoch": 28.09, "learning_rate": 3.5958215048329645e-05, "loss": 2.273, "step": 5670500 }, { "epoch": 28.1, "learning_rate": 3.595697646190356e-05, "loss": 2.2395, "step": 5671000 }, { "epoch": 28.1, "learning_rate": 3.595573787547747e-05, "loss": 2.2184, "step": 5671500 }, { "epoch": 28.1, "learning_rate": 3.595449928905139e-05, "loss": 2.2504, "step": 5672000 }, { "epoch": 28.1, "learning_rate": 3.5953260702625306e-05, "loss": 2.2414, "step": 5672500 }, { "epoch": 28.11, "learning_rate": 3.595202459337208e-05, "loss": 2.2489, "step": 5673000 }, { "epoch": 28.11, "learning_rate": 3.5950786006946e-05, "loss": 2.2339, "step": 5673500 }, { "epoch": 28.11, "learning_rate": 3.5949547420519916e-05, "loss": 2.2422, "step": 5674000 }, { "epoch": 28.11, "learning_rate": 3.594831626561239e-05, "loss": 2.2523, "step": 5674500 }, { "epoch": 28.12, "learning_rate": 3.59470776791863e-05, "loss": 2.2267, "step": 5675000 }, { "epoch": 28.12, "learning_rate": 3.594584156993307e-05, "loss": 2.2262, "step": 5675500 }, { "epoch": 28.12, "learning_rate": 3.5944602983506984e-05, "loss": 2.2335, "step": 5676000 }, { "epoch": 28.12, "learning_rate": 3.59433643970809e-05, "loss": 2.2257, "step": 5676500 }, { "epoch": 28.13, "learning_rate": 3.594212581065482e-05, "loss": 2.2219, "step": 5677000 }, { "epoch": 28.13, "learning_rate": 3.5940887224228735e-05, "loss": 2.2501, "step": 5677500 }, { "epoch": 28.13, "learning_rate": 3.5939648637802645e-05, "loss": 2.2395, "step": 5678000 }, { "epoch": 28.13, "learning_rate": 3.593841005137656e-05, "loss": 2.2518, "step": 5678500 }, { "epoch": 28.14, "learning_rate": 3.593717394212334e-05, "loss": 2.2567, "step": 5679000 }, { "epoch": 28.14, "learning_rate": 3.5935935355697254e-05, "loss": 2.2426, "step": 5679500 }, { "epoch": 28.14, "learning_rate": 3.593469676927117e-05, "loss": 2.2331, "step": 5680000 }, { "epoch": 28.14, "learning_rate": 3.593346066001794e-05, "loss": 2.2449, "step": 5680500 }, { "epoch": 28.15, "learning_rate": 3.593222207359185e-05, "loss": 2.2522, "step": 5681000 }, { "epoch": 28.15, "learning_rate": 3.593098348716577e-05, "loss": 2.238, "step": 5681500 }, { "epoch": 28.15, "learning_rate": 3.5929744900739684e-05, "loss": 2.2348, "step": 5682000 }, { "epoch": 28.15, "learning_rate": 3.59285063143136e-05, "loss": 2.2584, "step": 5682500 }, { "epoch": 28.16, "learning_rate": 3.592726772788752e-05, "loss": 2.2451, "step": 5683000 }, { "epoch": 28.16, "learning_rate": 3.5926029141461435e-05, "loss": 2.2386, "step": 5683500 }, { "epoch": 28.16, "learning_rate": 3.592479055503535e-05, "loss": 2.2602, "step": 5684000 }, { "epoch": 28.16, "learning_rate": 3.592355196860926e-05, "loss": 2.2475, "step": 5684500 }, { "epoch": 28.17, "learning_rate": 3.592231338218318e-05, "loss": 2.2594, "step": 5685000 }, { "epoch": 28.17, "learning_rate": 3.5921074795757096e-05, "loss": 2.2606, "step": 5685500 }, { "epoch": 28.17, "learning_rate": 3.591983620933101e-05, "loss": 2.2404, "step": 5686000 }, { "epoch": 28.17, "learning_rate": 3.591859762290493e-05, "loss": 2.219, "step": 5686500 }, { "epoch": 28.18, "learning_rate": 3.591735903647885e-05, "loss": 2.2316, "step": 5687000 }, { "epoch": 28.18, "learning_rate": 3.5916122927225615e-05, "loss": 2.2293, "step": 5687500 }, { "epoch": 28.18, "learning_rate": 3.591488434079953e-05, "loss": 2.2417, "step": 5688000 }, { "epoch": 28.18, "learning_rate": 3.591364575437345e-05, "loss": 2.2511, "step": 5688500 }, { "epoch": 28.19, "learning_rate": 3.5912407167947366e-05, "loss": 2.2389, "step": 5689000 }, { "epoch": 28.19, "learning_rate": 3.591116858152128e-05, "loss": 2.2493, "step": 5689500 }, { "epoch": 28.19, "learning_rate": 3.59099299950952e-05, "loss": 2.2295, "step": 5690000 }, { "epoch": 28.19, "learning_rate": 3.590869140866912e-05, "loss": 2.2424, "step": 5690500 }, { "epoch": 28.2, "learning_rate": 3.5907452822243034e-05, "loss": 2.267, "step": 5691000 }, { "epoch": 28.2, "learning_rate": 3.590621423581695e-05, "loss": 2.2283, "step": 5691500 }, { "epoch": 28.2, "learning_rate": 3.590497564939087e-05, "loss": 2.2434, "step": 5692000 }, { "epoch": 28.2, "learning_rate": 3.590373954013763e-05, "loss": 2.2637, "step": 5692500 }, { "epoch": 28.21, "learning_rate": 3.590250095371155e-05, "loss": 2.2305, "step": 5693000 }, { "epoch": 28.21, "learning_rate": 3.5901262367285464e-05, "loss": 2.2433, "step": 5693500 }, { "epoch": 28.21, "learning_rate": 3.590002873520509e-05, "loss": 2.2236, "step": 5694000 }, { "epoch": 28.21, "learning_rate": 3.5898790148779e-05, "loss": 2.261, "step": 5694500 }, { "epoch": 28.21, "learning_rate": 3.589755156235292e-05, "loss": 2.2614, "step": 5695000 }, { "epoch": 28.22, "learning_rate": 3.5896312975926835e-05, "loss": 2.2338, "step": 5695500 }, { "epoch": 28.22, "learning_rate": 3.589507438950075e-05, "loss": 2.2459, "step": 5696000 }, { "epoch": 28.22, "learning_rate": 3.589383580307467e-05, "loss": 2.2287, "step": 5696500 }, { "epoch": 28.22, "learning_rate": 3.589259721664858e-05, "loss": 2.2181, "step": 5697000 }, { "epoch": 28.23, "learning_rate": 3.5891358630222496e-05, "loss": 2.2656, "step": 5697500 }, { "epoch": 28.23, "learning_rate": 3.589012252096927e-05, "loss": 2.2618, "step": 5698000 }, { "epoch": 28.23, "learning_rate": 3.588888393454319e-05, "loss": 2.2414, "step": 5698500 }, { "epoch": 28.23, "learning_rate": 3.5887645348117106e-05, "loss": 2.2461, "step": 5699000 }, { "epoch": 28.24, "learning_rate": 3.588640676169102e-05, "loss": 2.2572, "step": 5699500 }, { "epoch": 28.24, "learning_rate": 3.588516817526493e-05, "loss": 2.2578, "step": 5700000 }, { "epoch": 28.24, "learning_rate": 3.588392958883885e-05, "loss": 2.2657, "step": 5700500 }, { "epoch": 28.24, "learning_rate": 3.5882691002412766e-05, "loss": 2.2494, "step": 5701000 }, { "epoch": 28.25, "learning_rate": 3.5881452415986683e-05, "loss": 2.259, "step": 5701500 }, { "epoch": 28.25, "learning_rate": 3.58802138295606e-05, "loss": 2.2313, "step": 5702000 }, { "epoch": 28.25, "learning_rate": 3.587897772030737e-05, "loss": 2.2567, "step": 5702500 }, { "epoch": 28.25, "learning_rate": 3.5877739133881286e-05, "loss": 2.2508, "step": 5703000 }, { "epoch": 28.26, "learning_rate": 3.58765005474552e-05, "loss": 2.2611, "step": 5703500 }, { "epoch": 28.26, "learning_rate": 3.587526196102911e-05, "loss": 2.2495, "step": 5704000 }, { "epoch": 28.26, "learning_rate": 3.587402337460303e-05, "loss": 2.2509, "step": 5704500 }, { "epoch": 28.26, "learning_rate": 3.587278478817695e-05, "loss": 2.2434, "step": 5705000 }, { "epoch": 28.27, "learning_rate": 3.5871546201750864e-05, "loss": 2.2263, "step": 5705500 }, { "epoch": 28.27, "learning_rate": 3.587030761532478e-05, "loss": 2.2574, "step": 5706000 }, { "epoch": 28.27, "learning_rate": 3.58690690288987e-05, "loss": 2.2488, "step": 5706500 }, { "epoch": 28.27, "learning_rate": 3.5867835396818325e-05, "loss": 2.2452, "step": 5707000 }, { "epoch": 28.28, "learning_rate": 3.586659681039224e-05, "loss": 2.2347, "step": 5707500 }, { "epoch": 28.28, "learning_rate": 3.586535822396615e-05, "loss": 2.2418, "step": 5708000 }, { "epoch": 28.28, "learning_rate": 3.586411963754007e-05, "loss": 2.2475, "step": 5708500 }, { "epoch": 28.28, "learning_rate": 3.5862881051113986e-05, "loss": 2.2471, "step": 5709000 }, { "epoch": 28.29, "learning_rate": 3.58616424646879e-05, "loss": 2.2539, "step": 5709500 }, { "epoch": 28.29, "learning_rate": 3.586040387826182e-05, "loss": 2.2612, "step": 5710000 }, { "epoch": 28.29, "learning_rate": 3.585916529183573e-05, "loss": 2.248, "step": 5710500 }, { "epoch": 28.29, "learning_rate": 3.5857929182582506e-05, "loss": 2.2418, "step": 5711000 }, { "epoch": 28.3, "learning_rate": 3.585669059615642e-05, "loss": 2.2432, "step": 5711500 }, { "epoch": 28.3, "learning_rate": 3.585545200973034e-05, "loss": 2.2694, "step": 5712000 }, { "epoch": 28.3, "learning_rate": 3.585421342330425e-05, "loss": 2.2284, "step": 5712500 }, { "epoch": 28.3, "learning_rate": 3.585297483687817e-05, "loss": 2.2573, "step": 5713000 }, { "epoch": 28.31, "learning_rate": 3.585173872762494e-05, "loss": 2.2553, "step": 5713500 }, { "epoch": 28.31, "learning_rate": 3.585050014119886e-05, "loss": 2.2186, "step": 5714000 }, { "epoch": 28.31, "learning_rate": 3.5849261554772776e-05, "loss": 2.2606, "step": 5714500 }, { "epoch": 28.31, "learning_rate": 3.5848022968346686e-05, "loss": 2.2582, "step": 5715000 }, { "epoch": 28.32, "learning_rate": 3.58467843819206e-05, "loss": 2.2227, "step": 5715500 }, { "epoch": 28.32, "learning_rate": 3.584554579549452e-05, "loss": 2.254, "step": 5716000 }, { "epoch": 28.32, "learning_rate": 3.584430720906844e-05, "loss": 2.2496, "step": 5716500 }, { "epoch": 28.32, "learning_rate": 3.5843068622642354e-05, "loss": 2.2388, "step": 5717000 }, { "epoch": 28.33, "learning_rate": 3.5841830036216264e-05, "loss": 2.243, "step": 5717500 }, { "epoch": 28.33, "learning_rate": 3.584059144979018e-05, "loss": 2.2567, "step": 5718000 }, { "epoch": 28.33, "learning_rate": 3.58393528633641e-05, "loss": 2.2261, "step": 5718500 }, { "epoch": 28.33, "learning_rate": 3.5838114276938015e-05, "loss": 2.2367, "step": 5719000 }, { "epoch": 28.34, "learning_rate": 3.5836878167684784e-05, "loss": 2.272, "step": 5719500 }, { "epoch": 28.34, "learning_rate": 3.583564205843156e-05, "loss": 2.2329, "step": 5720000 }, { "epoch": 28.34, "learning_rate": 3.583440594917832e-05, "loss": 2.2338, "step": 5720500 }, { "epoch": 28.34, "learning_rate": 3.583316736275224e-05, "loss": 2.2532, "step": 5721000 }, { "epoch": 28.35, "learning_rate": 3.583193125349901e-05, "loss": 2.2604, "step": 5721500 }, { "epoch": 28.35, "learning_rate": 3.5830692667072924e-05, "loss": 2.274, "step": 5722000 }, { "epoch": 28.35, "learning_rate": 3.582945408064684e-05, "loss": 2.243, "step": 5722500 }, { "epoch": 28.35, "learning_rate": 3.582821549422076e-05, "loss": 2.2256, "step": 5723000 }, { "epoch": 28.36, "learning_rate": 3.5826976907794675e-05, "loss": 2.2452, "step": 5723500 }, { "epoch": 28.36, "learning_rate": 3.582573832136859e-05, "loss": 2.2638, "step": 5724000 }, { "epoch": 28.36, "learning_rate": 3.582449973494251e-05, "loss": 2.2412, "step": 5724500 }, { "epoch": 28.36, "learning_rate": 3.5823261148516426e-05, "loss": 2.2771, "step": 5725000 }, { "epoch": 28.37, "learning_rate": 3.582202256209034e-05, "loss": 2.2621, "step": 5725500 }, { "epoch": 28.37, "learning_rate": 3.5820788930009956e-05, "loss": 2.23, "step": 5726000 }, { "epoch": 28.37, "learning_rate": 3.5819550343583873e-05, "loss": 2.2686, "step": 5726500 }, { "epoch": 28.37, "learning_rate": 3.581831175715779e-05, "loss": 2.2551, "step": 5727000 }, { "epoch": 28.38, "learning_rate": 3.581707317073171e-05, "loss": 2.2563, "step": 5727500 }, { "epoch": 28.38, "learning_rate": 3.5815834584305624e-05, "loss": 2.2388, "step": 5728000 }, { "epoch": 28.38, "learning_rate": 3.581459599787954e-05, "loss": 2.2399, "step": 5728500 }, { "epoch": 28.38, "learning_rate": 3.581335741145346e-05, "loss": 2.2514, "step": 5729000 }, { "epoch": 28.39, "learning_rate": 3.581212130220023e-05, "loss": 2.2183, "step": 5729500 }, { "epoch": 28.39, "learning_rate": 3.5810885192946996e-05, "loss": 2.236, "step": 5730000 }, { "epoch": 28.39, "learning_rate": 3.580964660652091e-05, "loss": 2.2386, "step": 5730500 }, { "epoch": 28.39, "learning_rate": 3.580840802009483e-05, "loss": 2.251, "step": 5731000 }, { "epoch": 28.4, "learning_rate": 3.5807169433668746e-05, "loss": 2.2459, "step": 5731500 }, { "epoch": 28.4, "learning_rate": 3.5805930847242657e-05, "loss": 2.2252, "step": 5732000 }, { "epoch": 28.4, "learning_rate": 3.5804692260816574e-05, "loss": 2.247, "step": 5732500 }, { "epoch": 28.4, "learning_rate": 3.580345367439049e-05, "loss": 2.2305, "step": 5733000 }, { "epoch": 28.41, "learning_rate": 3.580221508796441e-05, "loss": 2.2407, "step": 5733500 }, { "epoch": 28.41, "learning_rate": 3.5800976501538324e-05, "loss": 2.251, "step": 5734000 }, { "epoch": 28.41, "learning_rate": 3.579973791511224e-05, "loss": 2.2658, "step": 5734500 }, { "epoch": 28.41, "learning_rate": 3.579849932868616e-05, "loss": 2.2874, "step": 5735000 }, { "epoch": 28.42, "learning_rate": 3.5797260742260075e-05, "loss": 2.2551, "step": 5735500 }, { "epoch": 28.42, "learning_rate": 3.579602215583399e-05, "loss": 2.2694, "step": 5736000 }, { "epoch": 28.42, "learning_rate": 3.579478356940791e-05, "loss": 2.2369, "step": 5736500 }, { "epoch": 28.42, "learning_rate": 3.5793544982981826e-05, "loss": 2.2427, "step": 5737000 }, { "epoch": 28.43, "learning_rate": 3.579230639655574e-05, "loss": 2.2528, "step": 5737500 }, { "epoch": 28.43, "learning_rate": 3.579106781012966e-05, "loss": 2.252, "step": 5738000 }, { "epoch": 28.43, "learning_rate": 3.578982922370358e-05, "loss": 2.25, "step": 5738500 }, { "epoch": 28.43, "learning_rate": 3.5788590637277494e-05, "loss": 2.278, "step": 5739000 }, { "epoch": 28.44, "learning_rate": 3.578735205085141e-05, "loss": 2.2768, "step": 5739500 }, { "epoch": 28.44, "learning_rate": 3.578611346442533e-05, "loss": 2.2398, "step": 5740000 }, { "epoch": 28.44, "learning_rate": 3.578487735517209e-05, "loss": 2.2404, "step": 5740500 }, { "epoch": 28.44, "learning_rate": 3.5783638768746006e-05, "loss": 2.2455, "step": 5741000 }, { "epoch": 28.45, "learning_rate": 3.578240018231992e-05, "loss": 2.2767, "step": 5741500 }, { "epoch": 28.45, "learning_rate": 3.578116159589384e-05, "loss": 2.2687, "step": 5742000 }, { "epoch": 28.45, "learning_rate": 3.577992300946776e-05, "loss": 2.2557, "step": 5742500 }, { "epoch": 28.45, "learning_rate": 3.5778684423041674e-05, "loss": 2.25, "step": 5743000 }, { "epoch": 28.46, "learning_rate": 3.577744583661559e-05, "loss": 2.2547, "step": 5743500 }, { "epoch": 28.46, "learning_rate": 3.57762072501895e-05, "loss": 2.2511, "step": 5744000 }, { "epoch": 28.46, "learning_rate": 3.577497114093628e-05, "loss": 2.2485, "step": 5744500 }, { "epoch": 28.46, "learning_rate": 3.577373503168304e-05, "loss": 2.2567, "step": 5745000 }, { "epoch": 28.47, "learning_rate": 3.577249892242981e-05, "loss": 2.2858, "step": 5745500 }, { "epoch": 28.47, "learning_rate": 3.5771260336003725e-05, "loss": 2.2511, "step": 5746000 }, { "epoch": 28.47, "learning_rate": 3.577002174957764e-05, "loss": 2.248, "step": 5746500 }, { "epoch": 28.47, "learning_rate": 3.576878564032442e-05, "loss": 2.2604, "step": 5747000 }, { "epoch": 28.48, "learning_rate": 3.576754705389833e-05, "loss": 2.2871, "step": 5747500 }, { "epoch": 28.48, "learning_rate": 3.5766308467472244e-05, "loss": 2.2474, "step": 5748000 }, { "epoch": 28.48, "learning_rate": 3.576506988104616e-05, "loss": 2.2581, "step": 5748500 }, { "epoch": 28.48, "learning_rate": 3.576383129462008e-05, "loss": 2.228, "step": 5749000 }, { "epoch": 28.49, "learning_rate": 3.5762592708193995e-05, "loss": 2.2505, "step": 5749500 }, { "epoch": 28.49, "learning_rate": 3.576135412176791e-05, "loss": 2.2651, "step": 5750000 }, { "epoch": 28.49, "learning_rate": 3.576011553534183e-05, "loss": 2.24, "step": 5750500 }, { "epoch": 28.49, "learning_rate": 3.5758876948915746e-05, "loss": 2.2268, "step": 5751000 }, { "epoch": 28.49, "learning_rate": 3.575763836248966e-05, "loss": 2.2565, "step": 5751500 }, { "epoch": 28.5, "learning_rate": 3.575639977606357e-05, "loss": 2.25, "step": 5752000 }, { "epoch": 28.5, "learning_rate": 3.575516118963749e-05, "loss": 2.2529, "step": 5752500 }, { "epoch": 28.5, "learning_rate": 3.575392508038426e-05, "loss": 2.2352, "step": 5753000 }, { "epoch": 28.5, "learning_rate": 3.5752686493958175e-05, "loss": 2.2627, "step": 5753500 }, { "epoch": 28.51, "learning_rate": 3.575144790753209e-05, "loss": 2.2574, "step": 5754000 }, { "epoch": 28.51, "learning_rate": 3.575020932110601e-05, "loss": 2.2656, "step": 5754500 }, { "epoch": 28.51, "learning_rate": 3.574897321185278e-05, "loss": 2.2663, "step": 5755000 }, { "epoch": 28.51, "learning_rate": 3.5747734625426695e-05, "loss": 2.2273, "step": 5755500 }, { "epoch": 28.52, "learning_rate": 3.574649603900061e-05, "loss": 2.2567, "step": 5756000 }, { "epoch": 28.52, "learning_rate": 3.574525745257453e-05, "loss": 2.2755, "step": 5756500 }, { "epoch": 28.52, "learning_rate": 3.5744018866148446e-05, "loss": 2.246, "step": 5757000 }, { "epoch": 28.52, "learning_rate": 3.574278027972236e-05, "loss": 2.2647, "step": 5757500 }, { "epoch": 28.53, "learning_rate": 3.574154169329628e-05, "loss": 2.2595, "step": 5758000 }, { "epoch": 28.53, "learning_rate": 3.574030558404304e-05, "loss": 2.2468, "step": 5758500 }, { "epoch": 28.53, "learning_rate": 3.573906699761696e-05, "loss": 2.2384, "step": 5759000 }, { "epoch": 28.53, "learning_rate": 3.5737828411190876e-05, "loss": 2.2599, "step": 5759500 }, { "epoch": 28.54, "learning_rate": 3.573658982476479e-05, "loss": 2.2706, "step": 5760000 }, { "epoch": 28.54, "learning_rate": 3.573535123833871e-05, "loss": 2.2765, "step": 5760500 }, { "epoch": 28.54, "learning_rate": 3.5734112651912626e-05, "loss": 2.2622, "step": 5761000 }, { "epoch": 28.54, "learning_rate": 3.573287406548654e-05, "loss": 2.2588, "step": 5761500 }, { "epoch": 28.55, "learning_rate": 3.573163547906046e-05, "loss": 2.2274, "step": 5762000 }, { "epoch": 28.55, "learning_rate": 3.573039689263438e-05, "loss": 2.264, "step": 5762500 }, { "epoch": 28.55, "learning_rate": 3.5729158306208294e-05, "loss": 2.2544, "step": 5763000 }, { "epoch": 28.55, "learning_rate": 3.572792219695506e-05, "loss": 2.2477, "step": 5763500 }, { "epoch": 28.56, "learning_rate": 3.572668361052898e-05, "loss": 2.2682, "step": 5764000 }, { "epoch": 28.56, "learning_rate": 3.57254450241029e-05, "loss": 2.238, "step": 5764500 }, { "epoch": 28.56, "learning_rate": 3.5724206437676814e-05, "loss": 2.2259, "step": 5765000 }, { "epoch": 28.56, "learning_rate": 3.5722970328423576e-05, "loss": 2.2703, "step": 5765500 }, { "epoch": 28.57, "learning_rate": 3.572173174199749e-05, "loss": 2.2302, "step": 5766000 }, { "epoch": 28.57, "learning_rate": 3.572049315557141e-05, "loss": 2.2499, "step": 5766500 }, { "epoch": 28.57, "learning_rate": 3.5719254569145326e-05, "loss": 2.2483, "step": 5767000 }, { "epoch": 28.57, "learning_rate": 3.5718015982719243e-05, "loss": 2.272, "step": 5767500 }, { "epoch": 28.58, "learning_rate": 3.571677987346601e-05, "loss": 2.2575, "step": 5768000 }, { "epoch": 28.58, "learning_rate": 3.571554128703993e-05, "loss": 2.2488, "step": 5768500 }, { "epoch": 28.58, "learning_rate": 3.5714302700613846e-05, "loss": 2.2621, "step": 5769000 }, { "epoch": 28.58, "learning_rate": 3.571306411418776e-05, "loss": 2.2267, "step": 5769500 }, { "epoch": 28.59, "learning_rate": 3.571182552776168e-05, "loss": 2.2648, "step": 5770000 }, { "epoch": 28.59, "learning_rate": 3.57105869413356e-05, "loss": 2.235, "step": 5770500 }, { "epoch": 28.59, "learning_rate": 3.5709348354909514e-05, "loss": 2.2069, "step": 5771000 }, { "epoch": 28.59, "learning_rate": 3.570810976848343e-05, "loss": 2.2329, "step": 5771500 }, { "epoch": 28.6, "learning_rate": 3.570687365923019e-05, "loss": 2.2703, "step": 5772000 }, { "epoch": 28.6, "learning_rate": 3.570563507280411e-05, "loss": 2.2646, "step": 5772500 }, { "epoch": 28.6, "learning_rate": 3.570439648637803e-05, "loss": 2.2603, "step": 5773000 }, { "epoch": 28.6, "learning_rate": 3.5703160377124795e-05, "loss": 2.2573, "step": 5773500 }, { "epoch": 28.61, "learning_rate": 3.5701924267871564e-05, "loss": 2.2794, "step": 5774000 }, { "epoch": 28.61, "learning_rate": 3.570068568144548e-05, "loss": 2.247, "step": 5774500 }, { "epoch": 28.61, "learning_rate": 3.56994470950194e-05, "loss": 2.2595, "step": 5775000 }, { "epoch": 28.61, "learning_rate": 3.569820850859331e-05, "loss": 2.2653, "step": 5775500 }, { "epoch": 28.62, "learning_rate": 3.5696972399340084e-05, "loss": 2.2649, "step": 5776000 }, { "epoch": 28.62, "learning_rate": 3.5695733812914e-05, "loss": 2.2686, "step": 5776500 }, { "epoch": 28.62, "learning_rate": 3.569449522648792e-05, "loss": 2.2608, "step": 5777000 }, { "epoch": 28.62, "learning_rate": 3.5693256640061835e-05, "loss": 2.2304, "step": 5777500 }, { "epoch": 28.63, "learning_rate": 3.5692020530808603e-05, "loss": 2.2519, "step": 5778000 }, { "epoch": 28.63, "learning_rate": 3.569078194438252e-05, "loss": 2.2614, "step": 5778500 }, { "epoch": 28.63, "learning_rate": 3.568954335795644e-05, "loss": 2.2653, "step": 5779000 }, { "epoch": 28.63, "learning_rate": 3.5688304771530354e-05, "loss": 2.2743, "step": 5779500 }, { "epoch": 28.64, "learning_rate": 3.5687066185104264e-05, "loss": 2.2705, "step": 5780000 }, { "epoch": 28.64, "learning_rate": 3.568583007585103e-05, "loss": 2.2531, "step": 5780500 }, { "epoch": 28.64, "learning_rate": 3.568459148942495e-05, "loss": 2.2766, "step": 5781000 }, { "epoch": 28.64, "learning_rate": 3.568335290299887e-05, "loss": 2.2445, "step": 5781500 }, { "epoch": 28.65, "learning_rate": 3.5682114316572784e-05, "loss": 2.2499, "step": 5782000 }, { "epoch": 28.65, "learning_rate": 3.56808757301467e-05, "loss": 2.2532, "step": 5782500 }, { "epoch": 28.65, "learning_rate": 3.567963714372062e-05, "loss": 2.2463, "step": 5783000 }, { "epoch": 28.65, "learning_rate": 3.5678398557294535e-05, "loss": 2.2268, "step": 5783500 }, { "epoch": 28.66, "learning_rate": 3.567715997086845e-05, "loss": 2.2623, "step": 5784000 }, { "epoch": 28.66, "learning_rate": 3.567592138444237e-05, "loss": 2.2633, "step": 5784500 }, { "epoch": 28.66, "learning_rate": 3.567468279801628e-05, "loss": 2.2451, "step": 5785000 }, { "epoch": 28.66, "learning_rate": 3.5673444211590196e-05, "loss": 2.2643, "step": 5785500 }, { "epoch": 28.67, "learning_rate": 3.567220562516411e-05, "loss": 2.2295, "step": 5786000 }, { "epoch": 28.67, "learning_rate": 3.567096703873803e-05, "loss": 2.2327, "step": 5786500 }, { "epoch": 28.67, "learning_rate": 3.5669728452311946e-05, "loss": 2.2591, "step": 5787000 }, { "epoch": 28.67, "learning_rate": 3.5668489865885863e-05, "loss": 2.2324, "step": 5787500 }, { "epoch": 28.68, "learning_rate": 3.566725375663263e-05, "loss": 2.279, "step": 5788000 }, { "epoch": 28.68, "learning_rate": 3.566601517020655e-05, "loss": 2.2687, "step": 5788500 }, { "epoch": 28.68, "learning_rate": 3.566477658378046e-05, "loss": 2.2327, "step": 5789000 }, { "epoch": 28.68, "learning_rate": 3.5663537997354376e-05, "loss": 2.2751, "step": 5789500 }, { "epoch": 28.69, "learning_rate": 3.566230188810115e-05, "loss": 2.2476, "step": 5790000 }, { "epoch": 28.69, "learning_rate": 3.566106330167507e-05, "loss": 2.2564, "step": 5790500 }, { "epoch": 28.69, "learning_rate": 3.5659824715248986e-05, "loss": 2.2629, "step": 5791000 }, { "epoch": 28.69, "learning_rate": 3.5658586128822896e-05, "loss": 2.2568, "step": 5791500 }, { "epoch": 28.7, "learning_rate": 3.565735001956967e-05, "loss": 2.2414, "step": 5792000 }, { "epoch": 28.7, "learning_rate": 3.565611143314359e-05, "loss": 2.2787, "step": 5792500 }, { "epoch": 28.7, "learning_rate": 3.565487532389035e-05, "loss": 2.2533, "step": 5793000 }, { "epoch": 28.7, "learning_rate": 3.565363673746427e-05, "loss": 2.2566, "step": 5793500 }, { "epoch": 28.71, "learning_rate": 3.5652400628211036e-05, "loss": 2.2658, "step": 5794000 }, { "epoch": 28.71, "learning_rate": 3.565116204178495e-05, "loss": 2.2591, "step": 5794500 }, { "epoch": 28.71, "learning_rate": 3.564992345535887e-05, "loss": 2.2673, "step": 5795000 }, { "epoch": 28.71, "learning_rate": 3.564868486893279e-05, "loss": 2.2577, "step": 5795500 }, { "epoch": 28.72, "learning_rate": 3.5647446282506704e-05, "loss": 2.2438, "step": 5796000 }, { "epoch": 28.72, "learning_rate": 3.564620769608062e-05, "loss": 2.2575, "step": 5796500 }, { "epoch": 28.72, "learning_rate": 3.564496910965454e-05, "loss": 2.2445, "step": 5797000 }, { "epoch": 28.72, "learning_rate": 3.5643730523228455e-05, "loss": 2.2777, "step": 5797500 }, { "epoch": 28.73, "learning_rate": 3.564249193680237e-05, "loss": 2.2571, "step": 5798000 }, { "epoch": 28.73, "learning_rate": 3.564125335037629e-05, "loss": 2.2519, "step": 5798500 }, { "epoch": 28.73, "learning_rate": 3.5640014763950205e-05, "loss": 2.2186, "step": 5799000 }, { "epoch": 28.73, "learning_rate": 3.563877617752412e-05, "loss": 2.2601, "step": 5799500 }, { "epoch": 28.74, "learning_rate": 3.563753759109803e-05, "loss": 2.2653, "step": 5800000 }, { "epoch": 28.74, "learning_rate": 3.563629900467195e-05, "loss": 2.2519, "step": 5800500 }, { "epoch": 28.74, "learning_rate": 3.563506289541872e-05, "loss": 2.2545, "step": 5801000 }, { "epoch": 28.74, "learning_rate": 3.5633824308992635e-05, "loss": 2.2799, "step": 5801500 }, { "epoch": 28.75, "learning_rate": 3.563258572256655e-05, "loss": 2.2447, "step": 5802000 }, { "epoch": 28.75, "learning_rate": 3.563134961331332e-05, "loss": 2.297, "step": 5802500 }, { "epoch": 28.75, "learning_rate": 3.563011102688724e-05, "loss": 2.2558, "step": 5803000 }, { "epoch": 28.75, "learning_rate": 3.5628872440461155e-05, "loss": 2.2682, "step": 5803500 }, { "epoch": 28.76, "learning_rate": 3.562763385403507e-05, "loss": 2.2454, "step": 5804000 }, { "epoch": 28.76, "learning_rate": 3.562639526760899e-05, "loss": 2.2382, "step": 5804500 }, { "epoch": 28.76, "learning_rate": 3.5625156681182905e-05, "loss": 2.2444, "step": 5805000 }, { "epoch": 28.76, "learning_rate": 3.562391809475682e-05, "loss": 2.2791, "step": 5805500 }, { "epoch": 28.76, "learning_rate": 3.562267950833074e-05, "loss": 2.2489, "step": 5806000 }, { "epoch": 28.77, "learning_rate": 3.56214433990775e-05, "loss": 2.2662, "step": 5806500 }, { "epoch": 28.77, "learning_rate": 3.562020481265142e-05, "loss": 2.241, "step": 5807000 }, { "epoch": 28.77, "learning_rate": 3.5618966226225335e-05, "loss": 2.2429, "step": 5807500 }, { "epoch": 28.77, "learning_rate": 3.561772763979925e-05, "loss": 2.2777, "step": 5808000 }, { "epoch": 28.78, "learning_rate": 3.561648905337317e-05, "loss": 2.2731, "step": 5808500 }, { "epoch": 28.78, "learning_rate": 3.5615250466947086e-05, "loss": 2.2572, "step": 5809000 }, { "epoch": 28.78, "learning_rate": 3.5614014357693855e-05, "loss": 2.2283, "step": 5809500 }, { "epoch": 28.78, "learning_rate": 3.561277577126777e-05, "loss": 2.2644, "step": 5810000 }, { "epoch": 28.79, "learning_rate": 3.561153718484169e-05, "loss": 2.2584, "step": 5810500 }, { "epoch": 28.79, "learning_rate": 3.5610298598415606e-05, "loss": 2.2588, "step": 5811000 }, { "epoch": 28.79, "learning_rate": 3.560906001198952e-05, "loss": 2.263, "step": 5811500 }, { "epoch": 28.79, "learning_rate": 3.5607826379909136e-05, "loss": 2.2713, "step": 5812000 }, { "epoch": 28.8, "learning_rate": 3.560658779348305e-05, "loss": 2.2487, "step": 5812500 }, { "epoch": 28.8, "learning_rate": 3.560534920705697e-05, "loss": 2.2539, "step": 5813000 }, { "epoch": 28.8, "learning_rate": 3.560411062063089e-05, "loss": 2.2511, "step": 5813500 }, { "epoch": 28.8, "learning_rate": 3.5602872034204804e-05, "loss": 2.2785, "step": 5814000 }, { "epoch": 28.81, "learning_rate": 3.560163592495157e-05, "loss": 2.2514, "step": 5814500 }, { "epoch": 28.81, "learning_rate": 3.560039733852549e-05, "loss": 2.2603, "step": 5815000 }, { "epoch": 28.81, "learning_rate": 3.559915875209941e-05, "loss": 2.2606, "step": 5815500 }, { "epoch": 28.81, "learning_rate": 3.5597920165673324e-05, "loss": 2.247, "step": 5816000 }, { "epoch": 28.82, "learning_rate": 3.559668157924724e-05, "loss": 2.2564, "step": 5816500 }, { "epoch": 28.82, "learning_rate": 3.559544299282115e-05, "loss": 2.2528, "step": 5817000 }, { "epoch": 28.82, "learning_rate": 3.559420688356792e-05, "loss": 2.2559, "step": 5817500 }, { "epoch": 28.82, "learning_rate": 3.5592968297141837e-05, "loss": 2.2693, "step": 5818000 }, { "epoch": 28.83, "learning_rate": 3.559173218788861e-05, "loss": 2.2497, "step": 5818500 }, { "epoch": 28.83, "learning_rate": 3.559049360146253e-05, "loss": 2.2595, "step": 5819000 }, { "epoch": 28.83, "learning_rate": 3.5589255015036446e-05, "loss": 2.2414, "step": 5819500 }, { "epoch": 28.83, "learning_rate": 3.5588016428610356e-05, "loss": 2.2523, "step": 5820000 }, { "epoch": 28.84, "learning_rate": 3.558677784218427e-05, "loss": 2.2844, "step": 5820500 }, { "epoch": 28.84, "learning_rate": 3.558553925575819e-05, "loss": 2.2532, "step": 5821000 }, { "epoch": 28.84, "learning_rate": 3.558430314650496e-05, "loss": 2.245, "step": 5821500 }, { "epoch": 28.84, "learning_rate": 3.5583064560078876e-05, "loss": 2.2501, "step": 5822000 }, { "epoch": 28.85, "learning_rate": 3.558182597365279e-05, "loss": 2.2561, "step": 5822500 }, { "epoch": 28.85, "learning_rate": 3.558058738722671e-05, "loss": 2.2602, "step": 5823000 }, { "epoch": 28.85, "learning_rate": 3.557935127797348e-05, "loss": 2.2512, "step": 5823500 }, { "epoch": 28.85, "learning_rate": 3.5578112691547395e-05, "loss": 2.2484, "step": 5824000 }, { "epoch": 28.86, "learning_rate": 3.557687410512131e-05, "loss": 2.2785, "step": 5824500 }, { "epoch": 28.86, "learning_rate": 3.557563551869523e-05, "loss": 2.2271, "step": 5825000 }, { "epoch": 28.86, "learning_rate": 3.5574396932269146e-05, "loss": 2.264, "step": 5825500 }, { "epoch": 28.86, "learning_rate": 3.557315834584306e-05, "loss": 2.2774, "step": 5826000 }, { "epoch": 28.87, "learning_rate": 3.557191975941697e-05, "loss": 2.2599, "step": 5826500 }, { "epoch": 28.87, "learning_rate": 3.557068117299089e-05, "loss": 2.2745, "step": 5827000 }, { "epoch": 28.87, "learning_rate": 3.556944258656481e-05, "loss": 2.2626, "step": 5827500 }, { "epoch": 28.87, "learning_rate": 3.5568206477311576e-05, "loss": 2.2708, "step": 5828000 }, { "epoch": 28.88, "learning_rate": 3.556696789088549e-05, "loss": 2.277, "step": 5828500 }, { "epoch": 28.88, "learning_rate": 3.556572930445941e-05, "loss": 2.2848, "step": 5829000 }, { "epoch": 28.88, "learning_rate": 3.556449319520618e-05, "loss": 2.2448, "step": 5829500 }, { "epoch": 28.88, "learning_rate": 3.5563254608780095e-05, "loss": 2.2617, "step": 5830000 }, { "epoch": 28.89, "learning_rate": 3.556201602235401e-05, "loss": 2.2809, "step": 5830500 }, { "epoch": 28.89, "learning_rate": 3.556077743592793e-05, "loss": 2.2586, "step": 5831000 }, { "epoch": 28.89, "learning_rate": 3.5559538849501846e-05, "loss": 2.232, "step": 5831500 }, { "epoch": 28.89, "learning_rate": 3.555830026307576e-05, "loss": 2.2542, "step": 5832000 }, { "epoch": 28.9, "learning_rate": 3.555706167664967e-05, "loss": 2.2311, "step": 5832500 }, { "epoch": 28.9, "learning_rate": 3.555582556739644e-05, "loss": 2.2316, "step": 5833000 }, { "epoch": 28.9, "learning_rate": 3.555458945814321e-05, "loss": 2.2274, "step": 5833500 }, { "epoch": 28.9, "learning_rate": 3.555335087171713e-05, "loss": 2.2638, "step": 5834000 }, { "epoch": 28.91, "learning_rate": 3.5552112285291045e-05, "loss": 2.2413, "step": 5834500 }, { "epoch": 28.91, "learning_rate": 3.555087369886496e-05, "loss": 2.2671, "step": 5835000 }, { "epoch": 28.91, "learning_rate": 3.554963511243888e-05, "loss": 2.2601, "step": 5835500 }, { "epoch": 28.91, "learning_rate": 3.5548396526012796e-05, "loss": 2.2831, "step": 5836000 }, { "epoch": 28.92, "learning_rate": 3.554715793958671e-05, "loss": 2.2749, "step": 5836500 }, { "epoch": 28.92, "learning_rate": 3.554591935316063e-05, "loss": 2.2563, "step": 5837000 }, { "epoch": 28.92, "learning_rate": 3.5544680766734546e-05, "loss": 2.285, "step": 5837500 }, { "epoch": 28.92, "learning_rate": 3.554344713465416e-05, "loss": 2.2429, "step": 5838000 }, { "epoch": 28.93, "learning_rate": 3.554220854822808e-05, "loss": 2.2898, "step": 5838500 }, { "epoch": 28.93, "learning_rate": 3.5540969961801994e-05, "loss": 2.243, "step": 5839000 }, { "epoch": 28.93, "learning_rate": 3.553973137537591e-05, "loss": 2.2507, "step": 5839500 }, { "epoch": 28.93, "learning_rate": 3.553849278894983e-05, "loss": 2.2531, "step": 5840000 }, { "epoch": 28.94, "learning_rate": 3.5537254202523745e-05, "loss": 2.2665, "step": 5840500 }, { "epoch": 28.94, "learning_rate": 3.553601561609766e-05, "loss": 2.2682, "step": 5841000 }, { "epoch": 28.94, "learning_rate": 3.553477702967158e-05, "loss": 2.2489, "step": 5841500 }, { "epoch": 28.94, "learning_rate": 3.5533538443245496e-05, "loss": 2.2418, "step": 5842000 }, { "epoch": 28.95, "learning_rate": 3.5532302333992265e-05, "loss": 2.2649, "step": 5842500 }, { "epoch": 28.95, "learning_rate": 3.553106374756618e-05, "loss": 2.2535, "step": 5843000 }, { "epoch": 28.95, "learning_rate": 3.55298251611401e-05, "loss": 2.2434, "step": 5843500 }, { "epoch": 28.95, "learning_rate": 3.5528586574714015e-05, "loss": 2.2766, "step": 5844000 }, { "epoch": 28.96, "learning_rate": 3.552734798828793e-05, "loss": 2.2398, "step": 5844500 }, { "epoch": 28.96, "learning_rate": 3.552610940186184e-05, "loss": 2.2472, "step": 5845000 }, { "epoch": 28.96, "learning_rate": 3.552487081543576e-05, "loss": 2.2474, "step": 5845500 }, { "epoch": 28.96, "learning_rate": 3.5523632229009676e-05, "loss": 2.2566, "step": 5846000 }, { "epoch": 28.97, "learning_rate": 3.552239364258359e-05, "loss": 2.2593, "step": 5846500 }, { "epoch": 28.97, "learning_rate": 3.552115505615751e-05, "loss": 2.2721, "step": 5847000 }, { "epoch": 28.97, "learning_rate": 3.551992142407713e-05, "loss": 2.2743, "step": 5847500 }, { "epoch": 28.97, "learning_rate": 3.551868283765105e-05, "loss": 2.2448, "step": 5848000 }, { "epoch": 28.98, "learning_rate": 3.5517444251224965e-05, "loss": 2.289, "step": 5848500 }, { "epoch": 28.98, "learning_rate": 3.551620566479888e-05, "loss": 2.264, "step": 5849000 }, { "epoch": 28.98, "learning_rate": 3.55149670783728e-05, "loss": 2.2393, "step": 5849500 }, { "epoch": 28.98, "learning_rate": 3.5513728491946715e-05, "loss": 2.241, "step": 5850000 }, { "epoch": 28.99, "learning_rate": 3.551249238269348e-05, "loss": 2.232, "step": 5850500 }, { "epoch": 28.99, "learning_rate": 3.5511253796267394e-05, "loss": 2.2528, "step": 5851000 }, { "epoch": 28.99, "learning_rate": 3.551001520984131e-05, "loss": 2.2253, "step": 5851500 }, { "epoch": 28.99, "learning_rate": 3.550877662341523e-05, "loss": 2.2537, "step": 5852000 }, { "epoch": 29.0, "learning_rate": 3.5507538036989145e-05, "loss": 2.2496, "step": 5852500 }, { "epoch": 29.0, "learning_rate": 3.550629945056306e-05, "loss": 2.2503, "step": 5853000 }, { "epoch": 29.0, "eval_accuracy": 0.6575265605183357, "eval_accuracy_mlm": 0.6131820592582494, "eval_accuracy_nsp": 0.8667511246906365, "eval_loss": 2.3342039585113525, "eval_runtime": 146.0117, "eval_samples_per_second": 1746.155, "eval_steps_per_second": 72.761, "step": 5853447 }, { "epoch": 29.0, "learning_rate": 3.550506581848268e-05, "loss": 2.2472, "step": 5853500 }, { "epoch": 29.0, "learning_rate": 3.55038272320566e-05, "loss": 2.2281, "step": 5854000 }, { "epoch": 29.01, "learning_rate": 3.550258864563052e-05, "loss": 2.233, "step": 5854500 }, { "epoch": 29.01, "learning_rate": 3.550135005920443e-05, "loss": 2.2422, "step": 5855000 }, { "epoch": 29.01, "learning_rate": 3.5500111472778344e-05, "loss": 2.2156, "step": 5855500 }, { "epoch": 29.01, "learning_rate": 3.549887288635226e-05, "loss": 2.2308, "step": 5856000 }, { "epoch": 29.02, "learning_rate": 3.549763429992618e-05, "loss": 2.2383, "step": 5856500 }, { "epoch": 29.02, "learning_rate": 3.5496395713500095e-05, "loss": 2.2424, "step": 5857000 }, { "epoch": 29.02, "learning_rate": 3.549515712707401e-05, "loss": 2.2365, "step": 5857500 }, { "epoch": 29.02, "learning_rate": 3.549391854064793e-05, "loss": 2.2243, "step": 5858000 }, { "epoch": 29.03, "learning_rate": 3.54926824313947e-05, "loss": 2.2149, "step": 5858500 }, { "epoch": 29.03, "learning_rate": 3.549144632214147e-05, "loss": 2.2196, "step": 5859000 }, { "epoch": 29.03, "learning_rate": 3.549020773571538e-05, "loss": 2.2177, "step": 5859500 }, { "epoch": 29.03, "learning_rate": 3.54889691492893e-05, "loss": 2.2114, "step": 5860000 }, { "epoch": 29.03, "learning_rate": 3.548773056286322e-05, "loss": 2.2293, "step": 5860500 }, { "epoch": 29.04, "learning_rate": 3.5486491976437134e-05, "loss": 2.2463, "step": 5861000 }, { "epoch": 29.04, "learning_rate": 3.548525339001105e-05, "loss": 2.2409, "step": 5861500 }, { "epoch": 29.04, "learning_rate": 3.548401480358496e-05, "loss": 2.2488, "step": 5862000 }, { "epoch": 29.04, "learning_rate": 3.548277621715888e-05, "loss": 2.2524, "step": 5862500 }, { "epoch": 29.05, "learning_rate": 3.5481537630732795e-05, "loss": 2.2146, "step": 5863000 }, { "epoch": 29.05, "learning_rate": 3.548029904430671e-05, "loss": 2.2241, "step": 5863500 }, { "epoch": 29.05, "learning_rate": 3.547906045788063e-05, "loss": 2.2167, "step": 5864000 }, { "epoch": 29.05, "learning_rate": 3.5477821871454545e-05, "loss": 2.2205, "step": 5864500 }, { "epoch": 29.06, "learning_rate": 3.547658328502846e-05, "loss": 2.2322, "step": 5865000 }, { "epoch": 29.06, "learning_rate": 3.547534469860238e-05, "loss": 2.2218, "step": 5865500 }, { "epoch": 29.06, "learning_rate": 3.5474106112176296e-05, "loss": 2.2323, "step": 5866000 }, { "epoch": 29.06, "learning_rate": 3.547286752575021e-05, "loss": 2.2263, "step": 5866500 }, { "epoch": 29.07, "learning_rate": 3.547162893932413e-05, "loss": 2.1892, "step": 5867000 }, { "epoch": 29.07, "learning_rate": 3.547039035289805e-05, "loss": 2.2291, "step": 5867500 }, { "epoch": 29.07, "learning_rate": 3.5469151766471964e-05, "loss": 2.2354, "step": 5868000 }, { "epoch": 29.07, "learning_rate": 3.546791318004588e-05, "loss": 2.2302, "step": 5868500 }, { "epoch": 29.08, "learning_rate": 3.54666745936198e-05, "loss": 2.2066, "step": 5869000 }, { "epoch": 29.08, "learning_rate": 3.5465436007193715e-05, "loss": 2.219, "step": 5869500 }, { "epoch": 29.08, "learning_rate": 3.5464197420767625e-05, "loss": 2.2361, "step": 5870000 }, { "epoch": 29.08, "learning_rate": 3.54629613115144e-05, "loss": 2.2123, "step": 5870500 }, { "epoch": 29.09, "learning_rate": 3.546172272508832e-05, "loss": 2.2175, "step": 5871000 }, { "epoch": 29.09, "learning_rate": 3.546048661583508e-05, "loss": 2.2331, "step": 5871500 }, { "epoch": 29.09, "learning_rate": 3.545925050658185e-05, "loss": 2.2405, "step": 5872000 }, { "epoch": 29.09, "learning_rate": 3.5458011920155765e-05, "loss": 2.2258, "step": 5872500 }, { "epoch": 29.1, "learning_rate": 3.5456775810902534e-05, "loss": 2.2055, "step": 5873000 }, { "epoch": 29.1, "learning_rate": 3.545553722447645e-05, "loss": 2.2264, "step": 5873500 }, { "epoch": 29.1, "learning_rate": 3.545429863805037e-05, "loss": 2.2461, "step": 5874000 }, { "epoch": 29.1, "learning_rate": 3.5453060051624285e-05, "loss": 2.2322, "step": 5874500 }, { "epoch": 29.11, "learning_rate": 3.54518214651982e-05, "loss": 2.2506, "step": 5875000 }, { "epoch": 29.11, "learning_rate": 3.545058287877211e-05, "loss": 2.2381, "step": 5875500 }, { "epoch": 29.11, "learning_rate": 3.544934429234603e-05, "loss": 2.2179, "step": 5876000 }, { "epoch": 29.11, "learning_rate": 3.5448105705919946e-05, "loss": 2.2353, "step": 5876500 }, { "epoch": 29.12, "learning_rate": 3.544686711949386e-05, "loss": 2.2316, "step": 5877000 }, { "epoch": 29.12, "learning_rate": 3.544563101024063e-05, "loss": 2.2306, "step": 5877500 }, { "epoch": 29.12, "learning_rate": 3.544439242381455e-05, "loss": 2.2229, "step": 5878000 }, { "epoch": 29.12, "learning_rate": 3.5443153837388465e-05, "loss": 2.2512, "step": 5878500 }, { "epoch": 29.13, "learning_rate": 3.544191525096238e-05, "loss": 2.2306, "step": 5879000 }, { "epoch": 29.13, "learning_rate": 3.54406766645363e-05, "loss": 2.2534, "step": 5879500 }, { "epoch": 29.13, "learning_rate": 3.543944055528307e-05, "loss": 2.2212, "step": 5880000 }, { "epoch": 29.13, "learning_rate": 3.5438201968856985e-05, "loss": 2.2609, "step": 5880500 }, { "epoch": 29.14, "learning_rate": 3.54369633824309e-05, "loss": 2.2195, "step": 5881000 }, { "epoch": 29.14, "learning_rate": 3.543572727317767e-05, "loss": 2.2432, "step": 5881500 }, { "epoch": 29.14, "learning_rate": 3.543448868675159e-05, "loss": 2.2379, "step": 5882000 }, { "epoch": 29.14, "learning_rate": 3.5433250100325504e-05, "loss": 2.2313, "step": 5882500 }, { "epoch": 29.15, "learning_rate": 3.543201151389942e-05, "loss": 2.2383, "step": 5883000 }, { "epoch": 29.15, "learning_rate": 3.543077292747333e-05, "loss": 2.2507, "step": 5883500 }, { "epoch": 29.15, "learning_rate": 3.542953434104725e-05, "loss": 2.2331, "step": 5884000 }, { "epoch": 29.15, "learning_rate": 3.5428295754621165e-05, "loss": 2.2345, "step": 5884500 }, { "epoch": 29.16, "learning_rate": 3.542705964536794e-05, "loss": 2.2113, "step": 5885000 }, { "epoch": 29.16, "learning_rate": 3.542582105894186e-05, "loss": 2.2003, "step": 5885500 }, { "epoch": 29.16, "learning_rate": 3.5424582472515775e-05, "loss": 2.2238, "step": 5886000 }, { "epoch": 29.16, "learning_rate": 3.5423343886089685e-05, "loss": 2.2471, "step": 5886500 }, { "epoch": 29.17, "learning_rate": 3.54221052996636e-05, "loss": 2.1966, "step": 5887000 }, { "epoch": 29.17, "learning_rate": 3.542086671323752e-05, "loss": 2.2397, "step": 5887500 }, { "epoch": 29.17, "learning_rate": 3.5419628126811436e-05, "loss": 2.2606, "step": 5888000 }, { "epoch": 29.17, "learning_rate": 3.541838954038535e-05, "loss": 2.2447, "step": 5888500 }, { "epoch": 29.18, "learning_rate": 3.541715095395926e-05, "loss": 2.2425, "step": 5889000 }, { "epoch": 29.18, "learning_rate": 3.541591236753318e-05, "loss": 2.226, "step": 5889500 }, { "epoch": 29.18, "learning_rate": 3.54146737811071e-05, "loss": 2.2204, "step": 5890000 }, { "epoch": 29.18, "learning_rate": 3.5413437671853866e-05, "loss": 2.2296, "step": 5890500 }, { "epoch": 29.19, "learning_rate": 3.541220156260064e-05, "loss": 2.2481, "step": 5891000 }, { "epoch": 29.19, "learning_rate": 3.541096297617456e-05, "loss": 2.2228, "step": 5891500 }, { "epoch": 29.19, "learning_rate": 3.5409724389748475e-05, "loss": 2.2576, "step": 5892000 }, { "epoch": 29.19, "learning_rate": 3.5408485803322385e-05, "loss": 2.2459, "step": 5892500 }, { "epoch": 29.2, "learning_rate": 3.54072472168963e-05, "loss": 2.2061, "step": 5893000 }, { "epoch": 29.2, "learning_rate": 3.540600863047022e-05, "loss": 2.2236, "step": 5893500 }, { "epoch": 29.2, "learning_rate": 3.5404770044044136e-05, "loss": 2.2591, "step": 5894000 }, { "epoch": 29.2, "learning_rate": 3.540353145761805e-05, "loss": 2.2513, "step": 5894500 }, { "epoch": 29.21, "learning_rate": 3.540229534836482e-05, "loss": 2.2299, "step": 5895000 }, { "epoch": 29.21, "learning_rate": 3.540105676193874e-05, "loss": 2.2314, "step": 5895500 }, { "epoch": 29.21, "learning_rate": 3.539981817551265e-05, "loss": 2.2248, "step": 5896000 }, { "epoch": 29.21, "learning_rate": 3.5398579589086566e-05, "loss": 2.2565, "step": 5896500 }, { "epoch": 29.22, "learning_rate": 3.539734100266048e-05, "loss": 2.207, "step": 5897000 }, { "epoch": 29.22, "learning_rate": 3.53961024162344e-05, "loss": 2.2213, "step": 5897500 }, { "epoch": 29.22, "learning_rate": 3.5394863829808316e-05, "loss": 2.2324, "step": 5898000 }, { "epoch": 29.22, "learning_rate": 3.539362524338223e-05, "loss": 2.2222, "step": 5898500 }, { "epoch": 29.23, "learning_rate": 3.5392389134129e-05, "loss": 2.2263, "step": 5899000 }, { "epoch": 29.23, "learning_rate": 3.539115054770292e-05, "loss": 2.2123, "step": 5899500 }, { "epoch": 29.23, "learning_rate": 3.5389911961276836e-05, "loss": 2.2744, "step": 5900000 }, { "epoch": 29.23, "learning_rate": 3.538867337485075e-05, "loss": 2.231, "step": 5900500 }, { "epoch": 29.24, "learning_rate": 3.538743478842467e-05, "loss": 2.2395, "step": 5901000 }, { "epoch": 29.24, "learning_rate": 3.538619867917144e-05, "loss": 2.2213, "step": 5901500 }, { "epoch": 29.24, "learning_rate": 3.538496256991821e-05, "loss": 2.2558, "step": 5902000 }, { "epoch": 29.24, "learning_rate": 3.5383723983492124e-05, "loss": 2.234, "step": 5902500 }, { "epoch": 29.25, "learning_rate": 3.538248539706604e-05, "loss": 2.2555, "step": 5903000 }, { "epoch": 29.25, "learning_rate": 3.538124681063996e-05, "loss": 2.245, "step": 5903500 }, { "epoch": 29.25, "learning_rate": 3.5380008224213875e-05, "loss": 2.239, "step": 5904000 }, { "epoch": 29.25, "learning_rate": 3.537876963778779e-05, "loss": 2.2479, "step": 5904500 }, { "epoch": 29.26, "learning_rate": 3.53775310513617e-05, "loss": 2.2461, "step": 5905000 }, { "epoch": 29.26, "learning_rate": 3.537629494210847e-05, "loss": 2.2416, "step": 5905500 }, { "epoch": 29.26, "learning_rate": 3.537505635568239e-05, "loss": 2.2077, "step": 5906000 }, { "epoch": 29.26, "learning_rate": 3.5373817769256305e-05, "loss": 2.2354, "step": 5906500 }, { "epoch": 29.27, "learning_rate": 3.537257918283022e-05, "loss": 2.2289, "step": 5907000 }, { "epoch": 29.27, "learning_rate": 3.537134059640414e-05, "loss": 2.2317, "step": 5907500 }, { "epoch": 29.27, "learning_rate": 3.5370102009978056e-05, "loss": 2.2199, "step": 5908000 }, { "epoch": 29.27, "learning_rate": 3.5368863423551966e-05, "loss": 2.2475, "step": 5908500 }, { "epoch": 29.28, "learning_rate": 3.536762483712588e-05, "loss": 2.2429, "step": 5909000 }, { "epoch": 29.28, "learning_rate": 3.53663862506998e-05, "loss": 2.2527, "step": 5909500 }, { "epoch": 29.28, "learning_rate": 3.5365150141446575e-05, "loss": 2.2502, "step": 5910000 }, { "epoch": 29.28, "learning_rate": 3.536391403219334e-05, "loss": 2.2442, "step": 5910500 }, { "epoch": 29.29, "learning_rate": 3.5362675445767254e-05, "loss": 2.2257, "step": 5911000 }, { "epoch": 29.29, "learning_rate": 3.536143685934117e-05, "loss": 2.2407, "step": 5911500 }, { "epoch": 29.29, "learning_rate": 3.536019827291509e-05, "loss": 2.238, "step": 5912000 }, { "epoch": 29.29, "learning_rate": 3.5358959686489005e-05, "loss": 2.2623, "step": 5912500 }, { "epoch": 29.3, "learning_rate": 3.535772110006292e-05, "loss": 2.228, "step": 5913000 }, { "epoch": 29.3, "learning_rate": 3.535648251363684e-05, "loss": 2.2201, "step": 5913500 }, { "epoch": 29.3, "learning_rate": 3.535524640438361e-05, "loss": 2.2372, "step": 5914000 }, { "epoch": 29.3, "learning_rate": 3.5354007817957525e-05, "loss": 2.2437, "step": 5914500 }, { "epoch": 29.3, "learning_rate": 3.535276923153144e-05, "loss": 2.2395, "step": 5915000 }, { "epoch": 29.31, "learning_rate": 3.535153312227821e-05, "loss": 2.223, "step": 5915500 }, { "epoch": 29.31, "learning_rate": 3.535029453585213e-05, "loss": 2.2415, "step": 5916000 }, { "epoch": 29.31, "learning_rate": 3.5349055949426044e-05, "loss": 2.2512, "step": 5916500 }, { "epoch": 29.31, "learning_rate": 3.5347817362999954e-05, "loss": 2.2071, "step": 5917000 }, { "epoch": 29.32, "learning_rate": 3.534657877657387e-05, "loss": 2.2317, "step": 5917500 }, { "epoch": 29.32, "learning_rate": 3.534534019014779e-05, "loss": 2.2262, "step": 5918000 }, { "epoch": 29.32, "learning_rate": 3.5344101603721705e-05, "loss": 2.2189, "step": 5918500 }, { "epoch": 29.32, "learning_rate": 3.534286301729562e-05, "loss": 2.2357, "step": 5919000 }, { "epoch": 29.33, "learning_rate": 3.534162690804239e-05, "loss": 2.245, "step": 5919500 }, { "epoch": 29.33, "learning_rate": 3.534038832161631e-05, "loss": 2.2388, "step": 5920000 }, { "epoch": 29.33, "learning_rate": 3.5339149735190225e-05, "loss": 2.2373, "step": 5920500 }, { "epoch": 29.33, "learning_rate": 3.533791114876414e-05, "loss": 2.2406, "step": 5921000 }, { "epoch": 29.34, "learning_rate": 3.533667256233806e-05, "loss": 2.2509, "step": 5921500 }, { "epoch": 29.34, "learning_rate": 3.5335433975911976e-05, "loss": 2.2308, "step": 5922000 }, { "epoch": 29.34, "learning_rate": 3.533419538948589e-05, "loss": 2.2571, "step": 5922500 }, { "epoch": 29.34, "learning_rate": 3.533295680305981e-05, "loss": 2.2244, "step": 5923000 }, { "epoch": 29.35, "learning_rate": 3.5331718216633726e-05, "loss": 2.2334, "step": 5923500 }, { "epoch": 29.35, "learning_rate": 3.5330479630207637e-05, "loss": 2.2752, "step": 5924000 }, { "epoch": 29.35, "learning_rate": 3.5329241043781553e-05, "loss": 2.2395, "step": 5924500 }, { "epoch": 29.35, "learning_rate": 3.532800245735547e-05, "loss": 2.2595, "step": 5925000 }, { "epoch": 29.36, "learning_rate": 3.532676387092939e-05, "loss": 2.2495, "step": 5925500 }, { "epoch": 29.36, "learning_rate": 3.5325525284503304e-05, "loss": 2.2548, "step": 5926000 }, { "epoch": 29.36, "learning_rate": 3.532428669807722e-05, "loss": 2.2415, "step": 5926500 }, { "epoch": 29.36, "learning_rate": 3.532304811165114e-05, "loss": 2.2515, "step": 5927000 }, { "epoch": 29.37, "learning_rate": 3.532181447957076e-05, "loss": 2.2455, "step": 5927500 }, { "epoch": 29.37, "learning_rate": 3.532057837031753e-05, "loss": 2.2333, "step": 5928000 }, { "epoch": 29.37, "learning_rate": 3.5319339783891445e-05, "loss": 2.2581, "step": 5928500 }, { "epoch": 29.37, "learning_rate": 3.531810119746536e-05, "loss": 2.2516, "step": 5929000 }, { "epoch": 29.38, "learning_rate": 3.5316865088212123e-05, "loss": 2.2583, "step": 5929500 }, { "epoch": 29.38, "learning_rate": 3.53156289789589e-05, "loss": 2.2352, "step": 5930000 }, { "epoch": 29.38, "learning_rate": 3.5314390392532816e-05, "loss": 2.2317, "step": 5930500 }, { "epoch": 29.38, "learning_rate": 3.5313151806106726e-05, "loss": 2.2426, "step": 5931000 }, { "epoch": 29.39, "learning_rate": 3.531191321968064e-05, "loss": 2.2369, "step": 5931500 }, { "epoch": 29.39, "learning_rate": 3.531067463325456e-05, "loss": 2.2255, "step": 5932000 }, { "epoch": 29.39, "learning_rate": 3.530943604682848e-05, "loss": 2.2445, "step": 5932500 }, { "epoch": 29.39, "learning_rate": 3.5308197460402394e-05, "loss": 2.2298, "step": 5933000 }, { "epoch": 29.4, "learning_rate": 3.530695887397631e-05, "loss": 2.2297, "step": 5933500 }, { "epoch": 29.4, "learning_rate": 3.530572028755023e-05, "loss": 2.2726, "step": 5934000 }, { "epoch": 29.4, "learning_rate": 3.5304481701124145e-05, "loss": 2.2399, "step": 5934500 }, { "epoch": 29.4, "learning_rate": 3.530324311469806e-05, "loss": 2.262, "step": 5935000 }, { "epoch": 29.41, "learning_rate": 3.530200452827198e-05, "loss": 2.2255, "step": 5935500 }, { "epoch": 29.41, "learning_rate": 3.5300765941845895e-05, "loss": 2.2357, "step": 5936000 }, { "epoch": 29.41, "learning_rate": 3.529952735541981e-05, "loss": 2.2195, "step": 5936500 }, { "epoch": 29.41, "learning_rate": 3.5298291246166574e-05, "loss": 2.2428, "step": 5937000 }, { "epoch": 29.42, "learning_rate": 3.529705513691334e-05, "loss": 2.2513, "step": 5937500 }, { "epoch": 29.42, "learning_rate": 3.529581655048726e-05, "loss": 2.2211, "step": 5938000 }, { "epoch": 29.42, "learning_rate": 3.529457796406118e-05, "loss": 2.2356, "step": 5938500 }, { "epoch": 29.42, "learning_rate": 3.5293339377635094e-05, "loss": 2.2263, "step": 5939000 }, { "epoch": 29.43, "learning_rate": 3.529210079120901e-05, "loss": 2.2276, "step": 5939500 }, { "epoch": 29.43, "learning_rate": 3.529086220478293e-05, "loss": 2.2462, "step": 5940000 }, { "epoch": 29.43, "learning_rate": 3.5289623618356845e-05, "loss": 2.1902, "step": 5940500 }, { "epoch": 29.43, "learning_rate": 3.528838503193076e-05, "loss": 2.26, "step": 5941000 }, { "epoch": 29.44, "learning_rate": 3.528714644550468e-05, "loss": 2.2554, "step": 5941500 }, { "epoch": 29.44, "learning_rate": 3.5285907859078596e-05, "loss": 2.2377, "step": 5942000 }, { "epoch": 29.44, "learning_rate": 3.528466927265251e-05, "loss": 2.2355, "step": 5942500 }, { "epoch": 29.44, "learning_rate": 3.528343068622643e-05, "loss": 2.2532, "step": 5943000 }, { "epoch": 29.45, "learning_rate": 3.528219705414604e-05, "loss": 2.2435, "step": 5943500 }, { "epoch": 29.45, "learning_rate": 3.528096094489282e-05, "loss": 2.2458, "step": 5944000 }, { "epoch": 29.45, "learning_rate": 3.527972235846673e-05, "loss": 2.2236, "step": 5944500 }, { "epoch": 29.45, "learning_rate": 3.5278483772040646e-05, "loss": 2.2477, "step": 5945000 }, { "epoch": 29.46, "learning_rate": 3.527724518561456e-05, "loss": 2.2348, "step": 5945500 }, { "epoch": 29.46, "learning_rate": 3.527600659918848e-05, "loss": 2.2568, "step": 5946000 }, { "epoch": 29.46, "learning_rate": 3.52747680127624e-05, "loss": 2.2553, "step": 5946500 }, { "epoch": 29.46, "learning_rate": 3.527352942633631e-05, "loss": 2.246, "step": 5947000 }, { "epoch": 29.47, "learning_rate": 3.5272290839910224e-05, "loss": 2.258, "step": 5947500 }, { "epoch": 29.47, "learning_rate": 3.527105225348414e-05, "loss": 2.2518, "step": 5948000 }, { "epoch": 29.47, "learning_rate": 3.5269816144230916e-05, "loss": 2.2239, "step": 5948500 }, { "epoch": 29.47, "learning_rate": 3.526857755780483e-05, "loss": 2.2415, "step": 5949000 }, { "epoch": 29.48, "learning_rate": 3.5267338971378743e-05, "loss": 2.256, "step": 5949500 }, { "epoch": 29.48, "learning_rate": 3.526610038495266e-05, "loss": 2.2548, "step": 5950000 }, { "epoch": 29.48, "learning_rate": 3.5264864275699436e-05, "loss": 2.2458, "step": 5950500 }, { "epoch": 29.48, "learning_rate": 3.526362568927335e-05, "loss": 2.2636, "step": 5951000 }, { "epoch": 29.49, "learning_rate": 3.526238710284726e-05, "loss": 2.221, "step": 5951500 }, { "epoch": 29.49, "learning_rate": 3.526114851642118e-05, "loss": 2.2384, "step": 5952000 }, { "epoch": 29.49, "learning_rate": 3.525991240716795e-05, "loss": 2.2514, "step": 5952500 }, { "epoch": 29.49, "learning_rate": 3.5258673820741866e-05, "loss": 2.2481, "step": 5953000 }, { "epoch": 29.5, "learning_rate": 3.525743523431578e-05, "loss": 2.205, "step": 5953500 }, { "epoch": 29.5, "learning_rate": 3.52561966478897e-05, "loss": 2.2412, "step": 5954000 }, { "epoch": 29.5, "learning_rate": 3.5254958061463617e-05, "loss": 2.2481, "step": 5954500 }, { "epoch": 29.5, "learning_rate": 3.5253719475037533e-05, "loss": 2.239, "step": 5955000 }, { "epoch": 29.51, "learning_rate": 3.52524833657843e-05, "loss": 2.2468, "step": 5955500 }, { "epoch": 29.51, "learning_rate": 3.525124477935822e-05, "loss": 2.2548, "step": 5956000 }, { "epoch": 29.51, "learning_rate": 3.5250006192932136e-05, "loss": 2.2493, "step": 5956500 }, { "epoch": 29.51, "learning_rate": 3.524876760650605e-05, "loss": 2.2602, "step": 5957000 }, { "epoch": 29.52, "learning_rate": 3.5247531497252815e-05, "loss": 2.255, "step": 5957500 }, { "epoch": 29.52, "learning_rate": 3.524629291082673e-05, "loss": 2.2757, "step": 5958000 }, { "epoch": 29.52, "learning_rate": 3.524505432440065e-05, "loss": 2.2548, "step": 5958500 }, { "epoch": 29.52, "learning_rate": 3.5243815737974566e-05, "loss": 2.2651, "step": 5959000 }, { "epoch": 29.53, "learning_rate": 3.524257715154848e-05, "loss": 2.2414, "step": 5959500 }, { "epoch": 29.53, "learning_rate": 3.52413385651224e-05, "loss": 2.2512, "step": 5960000 }, { "epoch": 29.53, "learning_rate": 3.524009997869632e-05, "loss": 2.2631, "step": 5960500 }, { "epoch": 29.53, "learning_rate": 3.5238861392270234e-05, "loss": 2.254, "step": 5961000 }, { "epoch": 29.54, "learning_rate": 3.523762280584415e-05, "loss": 2.2334, "step": 5961500 }, { "epoch": 29.54, "learning_rate": 3.523638421941806e-05, "loss": 2.2553, "step": 5962000 }, { "epoch": 29.54, "learning_rate": 3.5235148110164836e-05, "loss": 2.2297, "step": 5962500 }, { "epoch": 29.54, "learning_rate": 3.523390952373875e-05, "loss": 2.2426, "step": 5963000 }, { "epoch": 29.55, "learning_rate": 3.5232673414485515e-05, "loss": 2.2523, "step": 5963500 }, { "epoch": 29.55, "learning_rate": 3.523143482805943e-05, "loss": 2.2493, "step": 5964000 }, { "epoch": 29.55, "learning_rate": 3.523019624163335e-05, "loss": 2.2559, "step": 5964500 }, { "epoch": 29.55, "learning_rate": 3.5228957655207266e-05, "loss": 2.2758, "step": 5965000 }, { "epoch": 29.56, "learning_rate": 3.522771906878118e-05, "loss": 2.2565, "step": 5965500 }, { "epoch": 29.56, "learning_rate": 3.52264804823551e-05, "loss": 2.2483, "step": 5966000 }, { "epoch": 29.56, "learning_rate": 3.522524189592902e-05, "loss": 2.2588, "step": 5966500 }, { "epoch": 29.56, "learning_rate": 3.5224003309502934e-05, "loss": 2.2445, "step": 5967000 }, { "epoch": 29.57, "learning_rate": 3.52227672002497e-05, "loss": 2.274, "step": 5967500 }, { "epoch": 29.57, "learning_rate": 3.522152861382362e-05, "loss": 2.2289, "step": 5968000 }, { "epoch": 29.57, "learning_rate": 3.5220290027397536e-05, "loss": 2.2546, "step": 5968500 }, { "epoch": 29.57, "learning_rate": 3.52190539181443e-05, "loss": 2.2503, "step": 5969000 }, { "epoch": 29.57, "learning_rate": 3.5217815331718215e-05, "loss": 2.2506, "step": 5969500 }, { "epoch": 29.58, "learning_rate": 3.521657674529213e-05, "loss": 2.251, "step": 5970000 }, { "epoch": 29.58, "learning_rate": 3.52153406360389e-05, "loss": 2.2336, "step": 5970500 }, { "epoch": 29.58, "learning_rate": 3.521410204961282e-05, "loss": 2.2587, "step": 5971000 }, { "epoch": 29.58, "learning_rate": 3.5212863463186735e-05, "loss": 2.2595, "step": 5971500 }, { "epoch": 29.59, "learning_rate": 3.5211627353933504e-05, "loss": 2.2517, "step": 5972000 }, { "epoch": 29.59, "learning_rate": 3.521038876750742e-05, "loss": 2.258, "step": 5972500 }, { "epoch": 29.59, "learning_rate": 3.520915018108134e-05, "loss": 2.251, "step": 5973000 }, { "epoch": 29.59, "learning_rate": 3.5207911594655255e-05, "loss": 2.2584, "step": 5973500 }, { "epoch": 29.6, "learning_rate": 3.520667300822917e-05, "loss": 2.2444, "step": 5974000 }, { "epoch": 29.6, "learning_rate": 3.520543442180309e-05, "loss": 2.2307, "step": 5974500 }, { "epoch": 29.6, "learning_rate": 3.5204195835377e-05, "loss": 2.2548, "step": 5975000 }, { "epoch": 29.6, "learning_rate": 3.5202957248950915e-05, "loss": 2.2333, "step": 5975500 }, { "epoch": 29.61, "learning_rate": 3.520171866252483e-05, "loss": 2.2488, "step": 5976000 }, { "epoch": 29.61, "learning_rate": 3.520048007609875e-05, "loss": 2.2246, "step": 5976500 }, { "epoch": 29.61, "learning_rate": 3.5199241489672666e-05, "loss": 2.2331, "step": 5977000 }, { "epoch": 29.61, "learning_rate": 3.519800290324658e-05, "loss": 2.2524, "step": 5977500 }, { "epoch": 29.62, "learning_rate": 3.51967643168205e-05, "loss": 2.2479, "step": 5978000 }, { "epoch": 29.62, "learning_rate": 3.519552573039442e-05, "loss": 2.2346, "step": 5978500 }, { "epoch": 29.62, "learning_rate": 3.5194287143968334e-05, "loss": 2.2478, "step": 5979000 }, { "epoch": 29.62, "learning_rate": 3.519304855754225e-05, "loss": 2.2465, "step": 5979500 }, { "epoch": 29.63, "learning_rate": 3.519180997111617e-05, "loss": 2.2337, "step": 5980000 }, { "epoch": 29.63, "learning_rate": 3.5190571384690085e-05, "loss": 2.2392, "step": 5980500 }, { "epoch": 29.63, "learning_rate": 3.5189335275436854e-05, "loss": 2.2563, "step": 5981000 }, { "epoch": 29.63, "learning_rate": 3.518809668901077e-05, "loss": 2.2376, "step": 5981500 }, { "epoch": 29.64, "learning_rate": 3.5186863056930384e-05, "loss": 2.2474, "step": 5982000 }, { "epoch": 29.64, "learning_rate": 3.51856244705043e-05, "loss": 2.2415, "step": 5982500 }, { "epoch": 29.64, "learning_rate": 3.518438588407822e-05, "loss": 2.2424, "step": 5983000 }, { "epoch": 29.64, "learning_rate": 3.5183147297652135e-05, "loss": 2.2372, "step": 5983500 }, { "epoch": 29.65, "learning_rate": 3.518190871122605e-05, "loss": 2.2606, "step": 5984000 }, { "epoch": 29.65, "learning_rate": 3.518067260197282e-05, "loss": 2.2476, "step": 5984500 }, { "epoch": 29.65, "learning_rate": 3.517943401554674e-05, "loss": 2.2595, "step": 5985000 }, { "epoch": 29.65, "learning_rate": 3.5178195429120655e-05, "loss": 2.2325, "step": 5985500 }, { "epoch": 29.66, "learning_rate": 3.517695684269457e-05, "loss": 2.255, "step": 5986000 }, { "epoch": 29.66, "learning_rate": 3.517571825626849e-05, "loss": 2.2224, "step": 5986500 }, { "epoch": 29.66, "learning_rate": 3.5174479669842406e-05, "loss": 2.2385, "step": 5987000 }, { "epoch": 29.66, "learning_rate": 3.517324108341632e-05, "loss": 2.2482, "step": 5987500 }, { "epoch": 29.67, "learning_rate": 3.517200249699024e-05, "loss": 2.2485, "step": 5988000 }, { "epoch": 29.67, "learning_rate": 3.517076391056415e-05, "loss": 2.2526, "step": 5988500 }, { "epoch": 29.67, "learning_rate": 3.5169525324138066e-05, "loss": 2.2193, "step": 5989000 }, { "epoch": 29.67, "learning_rate": 3.5168286737711983e-05, "loss": 2.2374, "step": 5989500 }, { "epoch": 29.68, "learning_rate": 3.51670481512859e-05, "loss": 2.2634, "step": 5990000 }, { "epoch": 29.68, "learning_rate": 3.516580956485982e-05, "loss": 2.24, "step": 5990500 }, { "epoch": 29.68, "learning_rate": 3.5164570978433734e-05, "loss": 2.2492, "step": 5991000 }, { "epoch": 29.68, "learning_rate": 3.516333239200765e-05, "loss": 2.265, "step": 5991500 }, { "epoch": 29.69, "learning_rate": 3.516209380558157e-05, "loss": 2.2368, "step": 5992000 }, { "epoch": 29.69, "learning_rate": 3.5160855219155485e-05, "loss": 2.2397, "step": 5992500 }, { "epoch": 29.69, "learning_rate": 3.51596166327294e-05, "loss": 2.2589, "step": 5993000 }, { "epoch": 29.69, "learning_rate": 3.515837804630331e-05, "loss": 2.246, "step": 5993500 }, { "epoch": 29.7, "learning_rate": 3.515714193705009e-05, "loss": 2.2362, "step": 5994000 }, { "epoch": 29.7, "learning_rate": 3.5155905827796856e-05, "loss": 2.2473, "step": 5994500 }, { "epoch": 29.7, "learning_rate": 3.515466724137077e-05, "loss": 2.2488, "step": 5995000 }, { "epoch": 29.7, "learning_rate": 3.5153428654944684e-05, "loss": 2.2277, "step": 5995500 }, { "epoch": 29.71, "learning_rate": 3.51521900685186e-05, "loss": 2.2283, "step": 5996000 }, { "epoch": 29.71, "learning_rate": 3.515095148209252e-05, "loss": 2.245, "step": 5996500 }, { "epoch": 29.71, "learning_rate": 3.5149715372839286e-05, "loss": 2.2349, "step": 5997000 }, { "epoch": 29.71, "learning_rate": 3.51484767864132e-05, "loss": 2.2465, "step": 5997500 }, { "epoch": 29.72, "learning_rate": 3.514723819998712e-05, "loss": 2.2382, "step": 5998000 }, { "epoch": 29.72, "learning_rate": 3.514599961356104e-05, "loss": 2.2591, "step": 5998500 }, { "epoch": 29.72, "learning_rate": 3.5144761027134954e-05, "loss": 2.2273, "step": 5999000 }, { "epoch": 29.72, "learning_rate": 3.514352244070887e-05, "loss": 2.2438, "step": 5999500 }, { "epoch": 29.73, "learning_rate": 3.514228385428279e-05, "loss": 2.2216, "step": 6000000 }, { "epoch": 29.73, "learning_rate": 3.5141045267856705e-05, "loss": 2.2397, "step": 6000500 }, { "epoch": 29.73, "learning_rate": 3.513980668143062e-05, "loss": 2.2442, "step": 6001000 }, { "epoch": 29.73, "learning_rate": 3.513856809500454e-05, "loss": 2.2634, "step": 6001500 }, { "epoch": 29.74, "learning_rate": 3.5137329508578455e-05, "loss": 2.2353, "step": 6002000 }, { "epoch": 29.74, "learning_rate": 3.5136090922152366e-05, "loss": 2.2581, "step": 6002500 }, { "epoch": 29.74, "learning_rate": 3.513485233572628e-05, "loss": 2.2491, "step": 6003000 }, { "epoch": 29.74, "learning_rate": 3.51336137493002e-05, "loss": 2.2587, "step": 6003500 }, { "epoch": 29.75, "learning_rate": 3.513238011721982e-05, "loss": 2.2485, "step": 6004000 }, { "epoch": 29.75, "learning_rate": 3.513114153079374e-05, "loss": 2.2202, "step": 6004500 }, { "epoch": 29.75, "learning_rate": 3.5129902944367654e-05, "loss": 2.2374, "step": 6005000 }, { "epoch": 29.75, "learning_rate": 3.512866435794157e-05, "loss": 2.2464, "step": 6005500 }, { "epoch": 29.76, "learning_rate": 3.512742824868834e-05, "loss": 2.2466, "step": 6006000 }, { "epoch": 29.76, "learning_rate": 3.512618966226226e-05, "loss": 2.2135, "step": 6006500 }, { "epoch": 29.76, "learning_rate": 3.5124951075836174e-05, "loss": 2.2327, "step": 6007000 }, { "epoch": 29.76, "learning_rate": 3.512371248941009e-05, "loss": 2.2538, "step": 6007500 }, { "epoch": 29.77, "learning_rate": 3.512247390298401e-05, "loss": 2.241, "step": 6008000 }, { "epoch": 29.77, "learning_rate": 3.512123779373077e-05, "loss": 2.234, "step": 6008500 }, { "epoch": 29.77, "learning_rate": 3.5119999207304686e-05, "loss": 2.2631, "step": 6009000 }, { "epoch": 29.77, "learning_rate": 3.51187606208786e-05, "loss": 2.2543, "step": 6009500 }, { "epoch": 29.78, "learning_rate": 3.511752203445252e-05, "loss": 2.2295, "step": 6010000 }, { "epoch": 29.78, "learning_rate": 3.511628344802644e-05, "loss": 2.2397, "step": 6010500 }, { "epoch": 29.78, "learning_rate": 3.5115044861600354e-05, "loss": 2.2528, "step": 6011000 }, { "epoch": 29.78, "learning_rate": 3.511380627517427e-05, "loss": 2.2429, "step": 6011500 }, { "epoch": 29.79, "learning_rate": 3.511256768874819e-05, "loss": 2.2785, "step": 6012000 }, { "epoch": 29.79, "learning_rate": 3.5111329102322105e-05, "loss": 2.2572, "step": 6012500 }, { "epoch": 29.79, "learning_rate": 3.511009051589602e-05, "loss": 2.2329, "step": 6013000 }, { "epoch": 29.79, "learning_rate": 3.510885192946994e-05, "loss": 2.2503, "step": 6013500 }, { "epoch": 29.8, "learning_rate": 3.5107613343043856e-05, "loss": 2.2507, "step": 6014000 }, { "epoch": 29.8, "learning_rate": 3.510637475661777e-05, "loss": 2.2361, "step": 6014500 }, { "epoch": 29.8, "learning_rate": 3.5105141124537387e-05, "loss": 2.2559, "step": 6015000 }, { "epoch": 29.8, "learning_rate": 3.5103902538111303e-05, "loss": 2.2426, "step": 6015500 }, { "epoch": 29.81, "learning_rate": 3.510266395168522e-05, "loss": 2.2477, "step": 6016000 }, { "epoch": 29.81, "learning_rate": 3.510142536525914e-05, "loss": 2.235, "step": 6016500 }, { "epoch": 29.81, "learning_rate": 3.5100186778833054e-05, "loss": 2.2402, "step": 6017000 }, { "epoch": 29.81, "learning_rate": 3.509894819240697e-05, "loss": 2.2432, "step": 6017500 }, { "epoch": 29.82, "learning_rate": 3.509770960598089e-05, "loss": 2.2734, "step": 6018000 }, { "epoch": 29.82, "learning_rate": 3.5096471019554805e-05, "loss": 2.2639, "step": 6018500 }, { "epoch": 29.82, "learning_rate": 3.5095234910301574e-05, "loss": 2.2591, "step": 6019000 }, { "epoch": 29.82, "learning_rate": 3.509399632387549e-05, "loss": 2.2149, "step": 6019500 }, { "epoch": 29.83, "learning_rate": 3.509276021462225e-05, "loss": 2.2437, "step": 6020000 }, { "epoch": 29.83, "learning_rate": 3.509152162819617e-05, "loss": 2.2636, "step": 6020500 }, { "epoch": 29.83, "learning_rate": 3.509028304177009e-05, "loss": 2.2386, "step": 6021000 }, { "epoch": 29.83, "learning_rate": 3.5089044455344004e-05, "loss": 2.2542, "step": 6021500 }, { "epoch": 29.84, "learning_rate": 3.508780586891792e-05, "loss": 2.2627, "step": 6022000 }, { "epoch": 29.84, "learning_rate": 3.508656975966469e-05, "loss": 2.2434, "step": 6022500 }, { "epoch": 29.84, "learning_rate": 3.5085331173238606e-05, "loss": 2.2404, "step": 6023000 }, { "epoch": 29.84, "learning_rate": 3.508409258681252e-05, "loss": 2.2144, "step": 6023500 }, { "epoch": 29.84, "learning_rate": 3.508285400038644e-05, "loss": 2.2302, "step": 6024000 }, { "epoch": 29.85, "learning_rate": 3.508161541396036e-05, "loss": 2.2015, "step": 6024500 }, { "epoch": 29.85, "learning_rate": 3.5080379304707126e-05, "loss": 2.2531, "step": 6025000 }, { "epoch": 29.85, "learning_rate": 3.507914071828104e-05, "loss": 2.2415, "step": 6025500 }, { "epoch": 29.85, "learning_rate": 3.507790213185495e-05, "loss": 2.2345, "step": 6026000 }, { "epoch": 29.86, "learning_rate": 3.507666354542887e-05, "loss": 2.246, "step": 6026500 }, { "epoch": 29.86, "learning_rate": 3.507542495900279e-05, "loss": 2.2626, "step": 6027000 }, { "epoch": 29.86, "learning_rate": 3.5074186372576704e-05, "loss": 2.2182, "step": 6027500 }, { "epoch": 29.86, "learning_rate": 3.507295026332348e-05, "loss": 2.2315, "step": 6028000 }, { "epoch": 29.87, "learning_rate": 3.507171167689739e-05, "loss": 2.26, "step": 6028500 }, { "epoch": 29.87, "learning_rate": 3.5070473090471306e-05, "loss": 2.2435, "step": 6029000 }, { "epoch": 29.87, "learning_rate": 3.506923450404522e-05, "loss": 2.2568, "step": 6029500 }, { "epoch": 29.87, "learning_rate": 3.506799591761914e-05, "loss": 2.2584, "step": 6030000 }, { "epoch": 29.88, "learning_rate": 3.506675733119306e-05, "loss": 2.2313, "step": 6030500 }, { "epoch": 29.88, "learning_rate": 3.5065518744766974e-05, "loss": 2.2419, "step": 6031000 }, { "epoch": 29.88, "learning_rate": 3.506428015834089e-05, "loss": 2.2516, "step": 6031500 }, { "epoch": 29.88, "learning_rate": 3.506304157191481e-05, "loss": 2.2528, "step": 6032000 }, { "epoch": 29.89, "learning_rate": 3.506180793983443e-05, "loss": 2.2518, "step": 6032500 }, { "epoch": 29.89, "learning_rate": 3.5060569353408346e-05, "loss": 2.2389, "step": 6033000 }, { "epoch": 29.89, "learning_rate": 3.505933076698226e-05, "loss": 2.2587, "step": 6033500 }, { "epoch": 29.89, "learning_rate": 3.505809218055618e-05, "loss": 2.2524, "step": 6034000 }, { "epoch": 29.9, "learning_rate": 3.505685607130295e-05, "loss": 2.2516, "step": 6034500 }, { "epoch": 29.9, "learning_rate": 3.5055617484876865e-05, "loss": 2.244, "step": 6035000 }, { "epoch": 29.9, "learning_rate": 3.505437889845078e-05, "loss": 2.2784, "step": 6035500 }, { "epoch": 29.9, "learning_rate": 3.50531403120247e-05, "loss": 2.2512, "step": 6036000 }, { "epoch": 29.91, "learning_rate": 3.5051901725598616e-05, "loss": 2.2421, "step": 6036500 }, { "epoch": 29.91, "learning_rate": 3.5050663139172526e-05, "loss": 2.2322, "step": 6037000 }, { "epoch": 29.91, "learning_rate": 3.5049427029919295e-05, "loss": 2.2487, "step": 6037500 }, { "epoch": 29.91, "learning_rate": 3.5048190920666064e-05, "loss": 2.2687, "step": 6038000 }, { "epoch": 29.92, "learning_rate": 3.504695233423998e-05, "loss": 2.2331, "step": 6038500 }, { "epoch": 29.92, "learning_rate": 3.50457137478139e-05, "loss": 2.2588, "step": 6039000 }, { "epoch": 29.92, "learning_rate": 3.5044475161387815e-05, "loss": 2.2409, "step": 6039500 }, { "epoch": 29.92, "learning_rate": 3.504323657496173e-05, "loss": 2.2369, "step": 6040000 }, { "epoch": 29.93, "learning_rate": 3.504199798853565e-05, "loss": 2.2599, "step": 6040500 }, { "epoch": 29.93, "learning_rate": 3.504076683362812e-05, "loss": 2.2484, "step": 6041000 }, { "epoch": 29.93, "learning_rate": 3.503952824720204e-05, "loss": 2.2689, "step": 6041500 }, { "epoch": 29.93, "learning_rate": 3.5038289660775955e-05, "loss": 2.2367, "step": 6042000 }, { "epoch": 29.94, "learning_rate": 3.503705107434987e-05, "loss": 2.2461, "step": 6042500 }, { "epoch": 29.94, "learning_rate": 3.503581248792379e-05, "loss": 2.2407, "step": 6043000 }, { "epoch": 29.94, "learning_rate": 3.5034573901497706e-05, "loss": 2.2564, "step": 6043500 }, { "epoch": 29.94, "learning_rate": 3.503333531507162e-05, "loss": 2.2676, "step": 6044000 }, { "epoch": 29.95, "learning_rate": 3.503209672864553e-05, "loss": 2.2478, "step": 6044500 }, { "epoch": 29.95, "learning_rate": 3.503085814221945e-05, "loss": 2.2507, "step": 6045000 }, { "epoch": 29.95, "learning_rate": 3.5029619555793367e-05, "loss": 2.2605, "step": 6045500 }, { "epoch": 29.95, "learning_rate": 3.5028383446540135e-05, "loss": 2.2413, "step": 6046000 }, { "epoch": 29.96, "learning_rate": 3.502714486011405e-05, "loss": 2.2463, "step": 6046500 }, { "epoch": 29.96, "learning_rate": 3.502590627368797e-05, "loss": 2.2444, "step": 6047000 }, { "epoch": 29.96, "learning_rate": 3.5024667687261886e-05, "loss": 2.2214, "step": 6047500 }, { "epoch": 29.96, "learning_rate": 3.5023429100835796e-05, "loss": 2.2366, "step": 6048000 }, { "epoch": 29.97, "learning_rate": 3.502219051440971e-05, "loss": 2.2388, "step": 6048500 }, { "epoch": 29.97, "learning_rate": 3.502095192798363e-05, "loss": 2.265, "step": 6049000 }, { "epoch": 29.97, "learning_rate": 3.501971334155755e-05, "loss": 2.2427, "step": 6049500 }, { "epoch": 29.97, "learning_rate": 3.5018474755131464e-05, "loss": 2.243, "step": 6050000 }, { "epoch": 29.98, "learning_rate": 3.501723616870538e-05, "loss": 2.2914, "step": 6050500 }, { "epoch": 29.98, "learning_rate": 3.50159975822793e-05, "loss": 2.2452, "step": 6051000 }, { "epoch": 29.98, "learning_rate": 3.5014758995853215e-05, "loss": 2.2627, "step": 6051500 }, { "epoch": 29.98, "learning_rate": 3.501352040942713e-05, "loss": 2.2594, "step": 6052000 }, { "epoch": 29.99, "learning_rate": 3.50122843001739e-05, "loss": 2.2361, "step": 6052500 }, { "epoch": 29.99, "learning_rate": 3.501104571374782e-05, "loss": 2.2669, "step": 6053000 }, { "epoch": 29.99, "learning_rate": 3.500980712732173e-05, "loss": 2.264, "step": 6053500 }, { "epoch": 29.99, "learning_rate": 3.5008568540895645e-05, "loss": 2.244, "step": 6054000 }, { "epoch": 30.0, "learning_rate": 3.500732995446956e-05, "loss": 2.2693, "step": 6054500 }, { "epoch": 30.0, "learning_rate": 3.500609384521633e-05, "loss": 2.2239, "step": 6055000 }, { "epoch": 30.0, "eval_accuracy": 0.6577156623596847, "eval_accuracy_mlm": 0.6136358534754317, "eval_accuracy_nsp": 0.8656725198953558, "eval_loss": 2.330178737640381, "eval_runtime": 146.0024, "eval_samples_per_second": 1746.266, "eval_steps_per_second": 72.766, "step": 6055290 }, { "epoch": 30.0, "learning_rate": 3.500485525879025e-05, "loss": 2.2413, "step": 6055500 }, { "epoch": 30.0, "learning_rate": 3.5003616672364164e-05, "loss": 2.2212, "step": 6056000 }, { "epoch": 30.01, "learning_rate": 3.500237808593808e-05, "loss": 2.2128, "step": 6056500 }, { "epoch": 30.01, "learning_rate": 3.500114197668485e-05, "loss": 2.225, "step": 6057000 }, { "epoch": 30.01, "learning_rate": 3.499990339025877e-05, "loss": 2.2007, "step": 6057500 }, { "epoch": 30.01, "learning_rate": 3.4998664803832684e-05, "loss": 2.2145, "step": 6058000 }, { "epoch": 30.02, "learning_rate": 3.499742869457945e-05, "loss": 2.2361, "step": 6058500 }, { "epoch": 30.02, "learning_rate": 3.499619010815337e-05, "loss": 2.2315, "step": 6059000 }, { "epoch": 30.02, "learning_rate": 3.4994951521727286e-05, "loss": 2.2215, "step": 6059500 }, { "epoch": 30.02, "learning_rate": 3.49937129353012e-05, "loss": 2.2348, "step": 6060000 }, { "epoch": 30.03, "learning_rate": 3.4992474348875113e-05, "loss": 2.198, "step": 6060500 }, { "epoch": 30.03, "learning_rate": 3.499123576244903e-05, "loss": 2.2208, "step": 6061000 }, { "epoch": 30.03, "learning_rate": 3.498999717602295e-05, "loss": 2.2319, "step": 6061500 }, { "epoch": 30.03, "learning_rate": 3.4988758589596864e-05, "loss": 2.1845, "step": 6062000 }, { "epoch": 30.04, "learning_rate": 3.498752000317078e-05, "loss": 2.2193, "step": 6062500 }, { "epoch": 30.04, "learning_rate": 3.49862814167447e-05, "loss": 2.215, "step": 6063000 }, { "epoch": 30.04, "learning_rate": 3.4985042830318615e-05, "loss": 2.2272, "step": 6063500 }, { "epoch": 30.04, "learning_rate": 3.4983806721065384e-05, "loss": 2.2079, "step": 6064000 }, { "epoch": 30.05, "learning_rate": 3.49825681346393e-05, "loss": 2.2012, "step": 6064500 }, { "epoch": 30.05, "learning_rate": 3.498132954821322e-05, "loss": 2.2302, "step": 6065000 }, { "epoch": 30.05, "learning_rate": 3.4980090961787135e-05, "loss": 2.2268, "step": 6065500 }, { "epoch": 30.05, "learning_rate": 3.497885237536105e-05, "loss": 2.2134, "step": 6066000 }, { "epoch": 30.06, "learning_rate": 3.497761378893497e-05, "loss": 2.2258, "step": 6066500 }, { "epoch": 30.06, "learning_rate": 3.497637520250888e-05, "loss": 2.244, "step": 6067000 }, { "epoch": 30.06, "learning_rate": 3.4975136616082796e-05, "loss": 2.2202, "step": 6067500 }, { "epoch": 30.06, "learning_rate": 3.4973900506829564e-05, "loss": 2.2133, "step": 6068000 }, { "epoch": 30.07, "learning_rate": 3.497266192040348e-05, "loss": 2.2548, "step": 6068500 }, { "epoch": 30.07, "learning_rate": 3.49714233339774e-05, "loss": 2.2356, "step": 6069000 }, { "epoch": 30.07, "learning_rate": 3.497018970189702e-05, "loss": 2.2452, "step": 6069500 }, { "epoch": 30.07, "learning_rate": 3.4968951115470936e-05, "loss": 2.2254, "step": 6070000 }, { "epoch": 30.08, "learning_rate": 3.4967715006217705e-05, "loss": 2.2346, "step": 6070500 }, { "epoch": 30.08, "learning_rate": 3.496647641979162e-05, "loss": 2.2092, "step": 6071000 }, { "epoch": 30.08, "learning_rate": 3.496523783336554e-05, "loss": 2.2229, "step": 6071500 }, { "epoch": 30.08, "learning_rate": 3.4963999246939455e-05, "loss": 2.2513, "step": 6072000 }, { "epoch": 30.09, "learning_rate": 3.496276066051337e-05, "loss": 2.2273, "step": 6072500 }, { "epoch": 30.09, "learning_rate": 3.496152207408729e-05, "loss": 2.2222, "step": 6073000 }, { "epoch": 30.09, "learning_rate": 3.4960283487661206e-05, "loss": 2.2396, "step": 6073500 }, { "epoch": 30.09, "learning_rate": 3.495904490123512e-05, "loss": 2.2165, "step": 6074000 }, { "epoch": 30.1, "learning_rate": 3.495780879198189e-05, "loss": 2.1869, "step": 6074500 }, { "epoch": 30.1, "learning_rate": 3.49565702055558e-05, "loss": 2.2352, "step": 6075000 }, { "epoch": 30.1, "learning_rate": 3.495533161912972e-05, "loss": 2.2344, "step": 6075500 }, { "epoch": 30.1, "learning_rate": 3.4954093032703636e-05, "loss": 2.2467, "step": 6076000 }, { "epoch": 30.11, "learning_rate": 3.495285444627755e-05, "loss": 2.2179, "step": 6076500 }, { "epoch": 30.11, "learning_rate": 3.495161585985147e-05, "loss": 2.2305, "step": 6077000 }, { "epoch": 30.11, "learning_rate": 3.495037727342539e-05, "loss": 2.2301, "step": 6077500 }, { "epoch": 30.11, "learning_rate": 3.494914364134501e-05, "loss": 2.2311, "step": 6078000 }, { "epoch": 30.11, "learning_rate": 3.4947905054918924e-05, "loss": 2.2474, "step": 6078500 }, { "epoch": 30.12, "learning_rate": 3.494666646849284e-05, "loss": 2.2372, "step": 6079000 }, { "epoch": 30.12, "learning_rate": 3.494542788206676e-05, "loss": 2.2275, "step": 6079500 }, { "epoch": 30.12, "learning_rate": 3.4944189295640675e-05, "loss": 2.2308, "step": 6080000 }, { "epoch": 30.12, "learning_rate": 3.494295070921459e-05, "loss": 2.2416, "step": 6080500 }, { "epoch": 30.13, "learning_rate": 3.494171212278851e-05, "loss": 2.2317, "step": 6081000 }, { "epoch": 30.13, "learning_rate": 3.494047353636242e-05, "loss": 2.1961, "step": 6081500 }, { "epoch": 30.13, "learning_rate": 3.4939234949936336e-05, "loss": 2.2102, "step": 6082000 }, { "epoch": 30.13, "learning_rate": 3.493799636351025e-05, "loss": 2.2449, "step": 6082500 }, { "epoch": 30.14, "learning_rate": 3.493676025425702e-05, "loss": 2.2392, "step": 6083000 }, { "epoch": 30.14, "learning_rate": 3.493552166783094e-05, "loss": 2.2121, "step": 6083500 }, { "epoch": 30.14, "learning_rate": 3.4934283081404856e-05, "loss": 2.2322, "step": 6084000 }, { "epoch": 30.14, "learning_rate": 3.493304449497877e-05, "loss": 2.2107, "step": 6084500 }, { "epoch": 30.15, "learning_rate": 3.493180590855269e-05, "loss": 2.2081, "step": 6085000 }, { "epoch": 30.15, "learning_rate": 3.4930567322126606e-05, "loss": 2.2319, "step": 6085500 }, { "epoch": 30.15, "learning_rate": 3.4929328735700523e-05, "loss": 2.2277, "step": 6086000 }, { "epoch": 30.15, "learning_rate": 3.492809262644729e-05, "loss": 2.2258, "step": 6086500 }, { "epoch": 30.16, "learning_rate": 3.492685404002121e-05, "loss": 2.224, "step": 6087000 }, { "epoch": 30.16, "learning_rate": 3.4925615453595126e-05, "loss": 2.2234, "step": 6087500 }, { "epoch": 30.16, "learning_rate": 3.492437934434189e-05, "loss": 2.2201, "step": 6088000 }, { "epoch": 30.16, "learning_rate": 3.4923140757915805e-05, "loss": 2.2361, "step": 6088500 }, { "epoch": 30.17, "learning_rate": 3.492190217148972e-05, "loss": 2.2149, "step": 6089000 }, { "epoch": 30.17, "learning_rate": 3.492066358506364e-05, "loss": 2.2179, "step": 6089500 }, { "epoch": 30.17, "learning_rate": 3.4919424998637556e-05, "loss": 2.2521, "step": 6090000 }, { "epoch": 30.17, "learning_rate": 3.491818641221147e-05, "loss": 2.2198, "step": 6090500 }, { "epoch": 30.18, "learning_rate": 3.491694782578539e-05, "loss": 2.2251, "step": 6091000 }, { "epoch": 30.18, "learning_rate": 3.4915709239359307e-05, "loss": 2.2303, "step": 6091500 }, { "epoch": 30.18, "learning_rate": 3.4914470652933224e-05, "loss": 2.2546, "step": 6092000 }, { "epoch": 30.18, "learning_rate": 3.491323206650714e-05, "loss": 2.2206, "step": 6092500 }, { "epoch": 30.19, "learning_rate": 3.491199595725391e-05, "loss": 2.2407, "step": 6093000 }, { "epoch": 30.19, "learning_rate": 3.4910757370827826e-05, "loss": 2.2124, "step": 6093500 }, { "epoch": 30.19, "learning_rate": 3.490951878440174e-05, "loss": 2.2374, "step": 6094000 }, { "epoch": 30.19, "learning_rate": 3.490828019797566e-05, "loss": 2.2252, "step": 6094500 }, { "epoch": 30.2, "learning_rate": 3.490704161154957e-05, "loss": 2.2356, "step": 6095000 }, { "epoch": 30.2, "learning_rate": 3.490580302512349e-05, "loss": 2.2251, "step": 6095500 }, { "epoch": 30.2, "learning_rate": 3.4904564438697404e-05, "loss": 2.2153, "step": 6096000 }, { "epoch": 30.2, "learning_rate": 3.490332585227132e-05, "loss": 2.2476, "step": 6096500 }, { "epoch": 30.21, "learning_rate": 3.490208726584524e-05, "loss": 2.2666, "step": 6097000 }, { "epoch": 30.21, "learning_rate": 3.4900848679419155e-05, "loss": 2.2188, "step": 6097500 }, { "epoch": 30.21, "learning_rate": 3.4899610092993065e-05, "loss": 2.2538, "step": 6098000 }, { "epoch": 30.21, "learning_rate": 3.489837398373984e-05, "loss": 2.2291, "step": 6098500 }, { "epoch": 30.22, "learning_rate": 3.489713539731376e-05, "loss": 2.2363, "step": 6099000 }, { "epoch": 30.22, "learning_rate": 3.4895896810887674e-05, "loss": 2.2546, "step": 6099500 }, { "epoch": 30.22, "learning_rate": 3.489465822446159e-05, "loss": 2.2155, "step": 6100000 }, { "epoch": 30.22, "learning_rate": 3.489341963803551e-05, "loss": 2.2316, "step": 6100500 }, { "epoch": 30.23, "learning_rate": 3.489218105160942e-05, "loss": 2.2429, "step": 6101000 }, { "epoch": 30.23, "learning_rate": 3.4890944942356194e-05, "loss": 2.2284, "step": 6101500 }, { "epoch": 30.23, "learning_rate": 3.4889708833102956e-05, "loss": 2.2208, "step": 6102000 }, { "epoch": 30.23, "learning_rate": 3.488847024667687e-05, "loss": 2.2401, "step": 6102500 }, { "epoch": 30.24, "learning_rate": 3.488723166025079e-05, "loss": 2.2289, "step": 6103000 }, { "epoch": 30.24, "learning_rate": 3.488599307382471e-05, "loss": 2.2329, "step": 6103500 }, { "epoch": 30.24, "learning_rate": 3.4884754487398624e-05, "loss": 2.2288, "step": 6104000 }, { "epoch": 30.24, "learning_rate": 3.488351590097254e-05, "loss": 2.2316, "step": 6104500 }, { "epoch": 30.25, "learning_rate": 3.488227731454646e-05, "loss": 2.2553, "step": 6105000 }, { "epoch": 30.25, "learning_rate": 3.4881038728120375e-05, "loss": 2.21, "step": 6105500 }, { "epoch": 30.25, "learning_rate": 3.487980261886714e-05, "loss": 2.2134, "step": 6106000 }, { "epoch": 30.25, "learning_rate": 3.487856403244106e-05, "loss": 2.2151, "step": 6106500 }, { "epoch": 30.26, "learning_rate": 3.487732544601498e-05, "loss": 2.229, "step": 6107000 }, { "epoch": 30.26, "learning_rate": 3.4876086859588894e-05, "loss": 2.2177, "step": 6107500 }, { "epoch": 30.26, "learning_rate": 3.4874850750335656e-05, "loss": 2.2384, "step": 6108000 }, { "epoch": 30.26, "learning_rate": 3.487361216390957e-05, "loss": 2.2151, "step": 6108500 }, { "epoch": 30.27, "learning_rate": 3.487237357748349e-05, "loss": 2.2436, "step": 6109000 }, { "epoch": 30.27, "learning_rate": 3.487113746823026e-05, "loss": 2.2404, "step": 6109500 }, { "epoch": 30.27, "learning_rate": 3.4869898881804176e-05, "loss": 2.2555, "step": 6110000 }, { "epoch": 30.27, "learning_rate": 3.486866029537809e-05, "loss": 2.2404, "step": 6110500 }, { "epoch": 30.28, "learning_rate": 3.486742170895201e-05, "loss": 2.2173, "step": 6111000 }, { "epoch": 30.28, "learning_rate": 3.486618559969878e-05, "loss": 2.2282, "step": 6111500 }, { "epoch": 30.28, "learning_rate": 3.486494701327269e-05, "loss": 2.2582, "step": 6112000 }, { "epoch": 30.28, "learning_rate": 3.4863708426846606e-05, "loss": 2.2264, "step": 6112500 }, { "epoch": 30.29, "learning_rate": 3.486246984042052e-05, "loss": 2.2294, "step": 6113000 }, { "epoch": 30.29, "learning_rate": 3.486123125399444e-05, "loss": 2.2493, "step": 6113500 }, { "epoch": 30.29, "learning_rate": 3.4859992667568356e-05, "loss": 2.2327, "step": 6114000 }, { "epoch": 30.29, "learning_rate": 3.485875408114227e-05, "loss": 2.2406, "step": 6114500 }, { "epoch": 30.3, "learning_rate": 3.485751549471619e-05, "loss": 2.2053, "step": 6115000 }, { "epoch": 30.3, "learning_rate": 3.485627690829011e-05, "loss": 2.2343, "step": 6115500 }, { "epoch": 30.3, "learning_rate": 3.4855040799036876e-05, "loss": 2.2061, "step": 6116000 }, { "epoch": 30.3, "learning_rate": 3.4853804689783645e-05, "loss": 2.2168, "step": 6116500 }, { "epoch": 30.31, "learning_rate": 3.485256610335756e-05, "loss": 2.2477, "step": 6117000 }, { "epoch": 30.31, "learning_rate": 3.485132751693148e-05, "loss": 2.233, "step": 6117500 }, { "epoch": 30.31, "learning_rate": 3.4850088930505395e-05, "loss": 2.2193, "step": 6118000 }, { "epoch": 30.31, "learning_rate": 3.484885034407931e-05, "loss": 2.224, "step": 6118500 }, { "epoch": 30.32, "learning_rate": 3.484761175765322e-05, "loss": 2.221, "step": 6119000 }, { "epoch": 30.32, "learning_rate": 3.48463756484e-05, "loss": 2.2206, "step": 6119500 }, { "epoch": 30.32, "learning_rate": 3.4845137061973915e-05, "loss": 2.2106, "step": 6120000 }, { "epoch": 30.32, "learning_rate": 3.4843898475547825e-05, "loss": 2.2688, "step": 6120500 }, { "epoch": 30.33, "learning_rate": 3.484265988912174e-05, "loss": 2.224, "step": 6121000 }, { "epoch": 30.33, "learning_rate": 3.484142130269566e-05, "loss": 2.2378, "step": 6121500 }, { "epoch": 30.33, "learning_rate": 3.4840182716269576e-05, "loss": 2.1927, "step": 6122000 }, { "epoch": 30.33, "learning_rate": 3.483894412984349e-05, "loss": 2.2423, "step": 6122500 }, { "epoch": 30.34, "learning_rate": 3.483770554341741e-05, "loss": 2.2092, "step": 6123000 }, { "epoch": 30.34, "learning_rate": 3.483646695699133e-05, "loss": 2.242, "step": 6123500 }, { "epoch": 30.34, "learning_rate": 3.4835228370565244e-05, "loss": 2.217, "step": 6124000 }, { "epoch": 30.34, "learning_rate": 3.483398978413916e-05, "loss": 2.2285, "step": 6124500 }, { "epoch": 30.35, "learning_rate": 3.483275119771308e-05, "loss": 2.2339, "step": 6125000 }, { "epoch": 30.35, "learning_rate": 3.4831512611286995e-05, "loss": 2.235, "step": 6125500 }, { "epoch": 30.35, "learning_rate": 3.483027402486091e-05, "loss": 2.2277, "step": 6126000 }, { "epoch": 30.35, "learning_rate": 3.482903543843483e-05, "loss": 2.2282, "step": 6126500 }, { "epoch": 30.36, "learning_rate": 3.482779932918159e-05, "loss": 2.2184, "step": 6127000 }, { "epoch": 30.36, "learning_rate": 3.482656321992836e-05, "loss": 2.2337, "step": 6127500 }, { "epoch": 30.36, "learning_rate": 3.4825324633502276e-05, "loss": 2.256, "step": 6128000 }, { "epoch": 30.36, "learning_rate": 3.482408852424905e-05, "loss": 2.2668, "step": 6128500 }, { "epoch": 30.37, "learning_rate": 3.482284993782297e-05, "loss": 2.254, "step": 6129000 }, { "epoch": 30.37, "learning_rate": 3.482161382856973e-05, "loss": 2.2228, "step": 6129500 }, { "epoch": 30.37, "learning_rate": 3.482037524214365e-05, "loss": 2.2304, "step": 6130000 }, { "epoch": 30.37, "learning_rate": 3.4819136655717565e-05, "loss": 2.2172, "step": 6130500 }, { "epoch": 30.38, "learning_rate": 3.481789806929148e-05, "loss": 2.2406, "step": 6131000 }, { "epoch": 30.38, "learning_rate": 3.481666196003825e-05, "loss": 2.2339, "step": 6131500 }, { "epoch": 30.38, "learning_rate": 3.481542337361217e-05, "loss": 2.2278, "step": 6132000 }, { "epoch": 30.38, "learning_rate": 3.4814184787186084e-05, "loss": 2.2436, "step": 6132500 }, { "epoch": 30.39, "learning_rate": 3.481294620076e-05, "loss": 2.2639, "step": 6133000 }, { "epoch": 30.39, "learning_rate": 3.481170761433392e-05, "loss": 2.2498, "step": 6133500 }, { "epoch": 30.39, "learning_rate": 3.481047150508068e-05, "loss": 2.2274, "step": 6134000 }, { "epoch": 30.39, "learning_rate": 3.48092329186546e-05, "loss": 2.251, "step": 6134500 }, { "epoch": 30.39, "learning_rate": 3.4807994332228514e-05, "loss": 2.2089, "step": 6135000 }, { "epoch": 30.4, "learning_rate": 3.480675574580243e-05, "loss": 2.2332, "step": 6135500 }, { "epoch": 30.4, "learning_rate": 3.48055196365492e-05, "loss": 2.2195, "step": 6136000 }, { "epoch": 30.4, "learning_rate": 3.4804281050123117e-05, "loss": 2.2192, "step": 6136500 }, { "epoch": 30.4, "learning_rate": 3.4803042463697033e-05, "loss": 2.2242, "step": 6137000 }, { "epoch": 30.41, "learning_rate": 3.480180387727095e-05, "loss": 2.2118, "step": 6137500 }, { "epoch": 30.41, "learning_rate": 3.480056529084487e-05, "loss": 2.2459, "step": 6138000 }, { "epoch": 30.41, "learning_rate": 3.4799326704418784e-05, "loss": 2.2424, "step": 6138500 }, { "epoch": 30.41, "learning_rate": 3.47980881179927e-05, "loss": 2.2348, "step": 6139000 }, { "epoch": 30.42, "learning_rate": 3.479684953156662e-05, "loss": 2.2165, "step": 6139500 }, { "epoch": 30.42, "learning_rate": 3.4795610945140535e-05, "loss": 2.2224, "step": 6140000 }, { "epoch": 30.42, "learning_rate": 3.47943748358873e-05, "loss": 2.2588, "step": 6140500 }, { "epoch": 30.42, "learning_rate": 3.4793138726634066e-05, "loss": 2.2282, "step": 6141000 }, { "epoch": 30.43, "learning_rate": 3.479190014020798e-05, "loss": 2.2549, "step": 6141500 }, { "epoch": 30.43, "learning_rate": 3.47906615537819e-05, "loss": 2.2456, "step": 6142000 }, { "epoch": 30.43, "learning_rate": 3.478942296735582e-05, "loss": 2.2379, "step": 6142500 }, { "epoch": 30.43, "learning_rate": 3.4788184380929734e-05, "loss": 2.2362, "step": 6143000 }, { "epoch": 30.44, "learning_rate": 3.478694579450365e-05, "loss": 2.2371, "step": 6143500 }, { "epoch": 30.44, "learning_rate": 3.478570720807757e-05, "loss": 2.2568, "step": 6144000 }, { "epoch": 30.44, "learning_rate": 3.4784468621651484e-05, "loss": 2.2387, "step": 6144500 }, { "epoch": 30.44, "learning_rate": 3.47832300352254e-05, "loss": 2.2459, "step": 6145000 }, { "epoch": 30.45, "learning_rate": 3.478199144879932e-05, "loss": 2.2056, "step": 6145500 }, { "epoch": 30.45, "learning_rate": 3.4780752862373235e-05, "loss": 2.2531, "step": 6146000 }, { "epoch": 30.45, "learning_rate": 3.477951427594715e-05, "loss": 2.2194, "step": 6146500 }, { "epoch": 30.45, "learning_rate": 3.477827568952107e-05, "loss": 2.2402, "step": 6147000 }, { "epoch": 30.46, "learning_rate": 3.4777037103094986e-05, "loss": 2.2502, "step": 6147500 }, { "epoch": 30.46, "learning_rate": 3.47757985166689e-05, "loss": 2.2541, "step": 6148000 }, { "epoch": 30.46, "learning_rate": 3.477455993024281e-05, "loss": 2.2304, "step": 6148500 }, { "epoch": 30.46, "learning_rate": 3.477332134381673e-05, "loss": 2.2356, "step": 6149000 }, { "epoch": 30.47, "learning_rate": 3.47720852345635e-05, "loss": 2.2525, "step": 6149500 }, { "epoch": 30.47, "learning_rate": 3.477084912531027e-05, "loss": 2.2437, "step": 6150000 }, { "epoch": 30.47, "learning_rate": 3.4769610538884185e-05, "loss": 2.2182, "step": 6150500 }, { "epoch": 30.47, "learning_rate": 3.47683719524581e-05, "loss": 2.2214, "step": 6151000 }, { "epoch": 30.48, "learning_rate": 3.476713336603202e-05, "loss": 2.2403, "step": 6151500 }, { "epoch": 30.48, "learning_rate": 3.4765894779605935e-05, "loss": 2.2509, "step": 6152000 }, { "epoch": 30.48, "learning_rate": 3.476465619317985e-05, "loss": 2.2426, "step": 6152500 }, { "epoch": 30.48, "learning_rate": 3.476342008392662e-05, "loss": 2.2147, "step": 6153000 }, { "epoch": 30.49, "learning_rate": 3.476218149750053e-05, "loss": 2.2379, "step": 6153500 }, { "epoch": 30.49, "learning_rate": 3.476094291107445e-05, "loss": 2.1983, "step": 6154000 }, { "epoch": 30.49, "learning_rate": 3.4759704324648365e-05, "loss": 2.2076, "step": 6154500 }, { "epoch": 30.49, "learning_rate": 3.4758468215395134e-05, "loss": 2.22, "step": 6155000 }, { "epoch": 30.5, "learning_rate": 3.475722962896905e-05, "loss": 2.2341, "step": 6155500 }, { "epoch": 30.5, "learning_rate": 3.475599351971582e-05, "loss": 2.2196, "step": 6156000 }, { "epoch": 30.5, "learning_rate": 3.475475741046259e-05, "loss": 2.2361, "step": 6156500 }, { "epoch": 30.5, "learning_rate": 3.4753518824036505e-05, "loss": 2.2396, "step": 6157000 }, { "epoch": 30.51, "learning_rate": 3.475228023761042e-05, "loss": 2.2363, "step": 6157500 }, { "epoch": 30.51, "learning_rate": 3.475104165118434e-05, "loss": 2.235, "step": 6158000 }, { "epoch": 30.51, "learning_rate": 3.474980306475825e-05, "loss": 2.247, "step": 6158500 }, { "epoch": 30.51, "learning_rate": 3.4748564478332166e-05, "loss": 2.2427, "step": 6159000 }, { "epoch": 30.52, "learning_rate": 3.474732589190608e-05, "loss": 2.2194, "step": 6159500 }, { "epoch": 30.52, "learning_rate": 3.474608730548e-05, "loss": 2.2395, "step": 6160000 }, { "epoch": 30.52, "learning_rate": 3.474484871905392e-05, "loss": 2.2607, "step": 6160500 }, { "epoch": 30.52, "learning_rate": 3.4743610132627834e-05, "loss": 2.2377, "step": 6161000 }, { "epoch": 30.53, "learning_rate": 3.474237154620175e-05, "loss": 2.2333, "step": 6161500 }, { "epoch": 30.53, "learning_rate": 3.474113295977567e-05, "loss": 2.226, "step": 6162000 }, { "epoch": 30.53, "learning_rate": 3.4739894373349585e-05, "loss": 2.2342, "step": 6162500 }, { "epoch": 30.53, "learning_rate": 3.4738658264096354e-05, "loss": 2.2176, "step": 6163000 }, { "epoch": 30.54, "learning_rate": 3.473741967767027e-05, "loss": 2.2501, "step": 6163500 }, { "epoch": 30.54, "learning_rate": 3.473618109124419e-05, "loss": 2.2182, "step": 6164000 }, { "epoch": 30.54, "learning_rate": 3.4734942504818104e-05, "loss": 2.2549, "step": 6164500 }, { "epoch": 30.54, "learning_rate": 3.473370391839202e-05, "loss": 2.2135, "step": 6165000 }, { "epoch": 30.55, "learning_rate": 3.473246533196594e-05, "loss": 2.2249, "step": 6165500 }, { "epoch": 30.55, "learning_rate": 3.47312292227127e-05, "loss": 2.2566, "step": 6166000 }, { "epoch": 30.55, "learning_rate": 3.4729993113459476e-05, "loss": 2.2149, "step": 6166500 }, { "epoch": 30.55, "learning_rate": 3.472875452703339e-05, "loss": 2.2436, "step": 6167000 }, { "epoch": 30.56, "learning_rate": 3.472751594060731e-05, "loss": 2.2307, "step": 6167500 }, { "epoch": 30.56, "learning_rate": 3.472627983135407e-05, "loss": 2.2567, "step": 6168000 }, { "epoch": 30.56, "learning_rate": 3.472504124492799e-05, "loss": 2.198, "step": 6168500 }, { "epoch": 30.56, "learning_rate": 3.4723802658501906e-05, "loss": 2.2303, "step": 6169000 }, { "epoch": 30.57, "learning_rate": 3.472256407207582e-05, "loss": 2.229, "step": 6169500 }, { "epoch": 30.57, "learning_rate": 3.472132548564974e-05, "loss": 2.2487, "step": 6170000 }, { "epoch": 30.57, "learning_rate": 3.4720086899223656e-05, "loss": 2.2399, "step": 6170500 }, { "epoch": 30.57, "learning_rate": 3.4718848312797567e-05, "loss": 2.2207, "step": 6171000 }, { "epoch": 30.58, "learning_rate": 3.4717609726371483e-05, "loss": 2.2181, "step": 6171500 }, { "epoch": 30.58, "learning_rate": 3.47163711399454e-05, "loss": 2.2427, "step": 6172000 }, { "epoch": 30.58, "learning_rate": 3.471513255351932e-05, "loss": 2.2214, "step": 6172500 }, { "epoch": 30.58, "learning_rate": 3.4713893967093234e-05, "loss": 2.2395, "step": 6173000 }, { "epoch": 30.59, "learning_rate": 3.471265538066715e-05, "loss": 2.2376, "step": 6173500 }, { "epoch": 30.59, "learning_rate": 3.471141679424107e-05, "loss": 2.2301, "step": 6174000 }, { "epoch": 30.59, "learning_rate": 3.4710178207814985e-05, "loss": 2.2256, "step": 6174500 }, { "epoch": 30.59, "learning_rate": 3.47089396213889e-05, "loss": 2.2518, "step": 6175000 }, { "epoch": 30.6, "learning_rate": 3.470770103496282e-05, "loss": 2.2303, "step": 6175500 }, { "epoch": 30.6, "learning_rate": 3.4706462448536736e-05, "loss": 2.2201, "step": 6176000 }, { "epoch": 30.6, "learning_rate": 3.470522386211065e-05, "loss": 2.224, "step": 6176500 }, { "epoch": 30.6, "learning_rate": 3.470398527568457e-05, "loss": 2.2206, "step": 6177000 }, { "epoch": 30.61, "learning_rate": 3.4702746689258487e-05, "loss": 2.2475, "step": 6177500 }, { "epoch": 30.61, "learning_rate": 3.4701510580005255e-05, "loss": 2.2503, "step": 6178000 }, { "epoch": 30.61, "learning_rate": 3.470027199357917e-05, "loss": 2.2358, "step": 6178500 }, { "epoch": 30.61, "learning_rate": 3.4699035884325934e-05, "loss": 2.2493, "step": 6179000 }, { "epoch": 30.62, "learning_rate": 3.469779729789985e-05, "loss": 2.2415, "step": 6179500 }, { "epoch": 30.62, "learning_rate": 3.469655871147377e-05, "loss": 2.1972, "step": 6180000 }, { "epoch": 30.62, "learning_rate": 3.4695325079393396e-05, "loss": 2.2483, "step": 6180500 }, { "epoch": 30.62, "learning_rate": 3.469408649296731e-05, "loss": 2.2393, "step": 6181000 }, { "epoch": 30.63, "learning_rate": 3.469284790654122e-05, "loss": 2.2498, "step": 6181500 }, { "epoch": 30.63, "learning_rate": 3.469160932011514e-05, "loss": 2.2415, "step": 6182000 }, { "epoch": 30.63, "learning_rate": 3.4690370733689057e-05, "loss": 2.2731, "step": 6182500 }, { "epoch": 30.63, "learning_rate": 3.4689132147262974e-05, "loss": 2.265, "step": 6183000 }, { "epoch": 30.64, "learning_rate": 3.468789356083689e-05, "loss": 2.2388, "step": 6183500 }, { "epoch": 30.64, "learning_rate": 3.468665745158366e-05, "loss": 2.2341, "step": 6184000 }, { "epoch": 30.64, "learning_rate": 3.4685418865157576e-05, "loss": 2.2543, "step": 6184500 }, { "epoch": 30.64, "learning_rate": 3.468418027873149e-05, "loss": 2.2344, "step": 6185000 }, { "epoch": 30.65, "learning_rate": 3.468294169230541e-05, "loss": 2.2658, "step": 6185500 }, { "epoch": 30.65, "learning_rate": 3.468170310587933e-05, "loss": 2.2603, "step": 6186000 }, { "epoch": 30.65, "learning_rate": 3.4680466996626096e-05, "loss": 2.2264, "step": 6186500 }, { "epoch": 30.65, "learning_rate": 3.467922841020001e-05, "loss": 2.2426, "step": 6187000 }, { "epoch": 30.66, "learning_rate": 3.467798982377393e-05, "loss": 2.2392, "step": 6187500 }, { "epoch": 30.66, "learning_rate": 3.467675123734784e-05, "loss": 2.2032, "step": 6188000 }, { "epoch": 30.66, "learning_rate": 3.467551265092176e-05, "loss": 2.2448, "step": 6188500 }, { "epoch": 30.66, "learning_rate": 3.4674274064495674e-05, "loss": 2.2308, "step": 6189000 }, { "epoch": 30.66, "learning_rate": 3.467303547806959e-05, "loss": 2.2433, "step": 6189500 }, { "epoch": 30.67, "learning_rate": 3.467179689164351e-05, "loss": 2.2387, "step": 6190000 }, { "epoch": 30.67, "learning_rate": 3.4670560782390276e-05, "loss": 2.2341, "step": 6190500 }, { "epoch": 30.67, "learning_rate": 3.466932219596419e-05, "loss": 2.2467, "step": 6191000 }, { "epoch": 30.67, "learning_rate": 3.466808360953811e-05, "loss": 2.2455, "step": 6191500 }, { "epoch": 30.68, "learning_rate": 3.466684502311203e-05, "loss": 2.2521, "step": 6192000 }, { "epoch": 30.68, "learning_rate": 3.4665606436685944e-05, "loss": 2.2215, "step": 6192500 }, { "epoch": 30.68, "learning_rate": 3.4664367850259854e-05, "loss": 2.2362, "step": 6193000 }, { "epoch": 30.68, "learning_rate": 3.466313174100663e-05, "loss": 2.2413, "step": 6193500 }, { "epoch": 30.69, "learning_rate": 3.466189315458055e-05, "loss": 2.2268, "step": 6194000 }, { "epoch": 30.69, "learning_rate": 3.4660654568154464e-05, "loss": 2.2273, "step": 6194500 }, { "epoch": 30.69, "learning_rate": 3.4659415981728374e-05, "loss": 2.2315, "step": 6195000 }, { "epoch": 30.69, "learning_rate": 3.465817987247514e-05, "loss": 2.2424, "step": 6195500 }, { "epoch": 30.7, "learning_rate": 3.465694128604906e-05, "loss": 2.2435, "step": 6196000 }, { "epoch": 30.7, "learning_rate": 3.465570517679583e-05, "loss": 2.2266, "step": 6196500 }, { "epoch": 30.7, "learning_rate": 3.4654466590369745e-05, "loss": 2.2315, "step": 6197000 }, { "epoch": 30.7, "learning_rate": 3.465322800394366e-05, "loss": 2.2434, "step": 6197500 }, { "epoch": 30.71, "learning_rate": 3.465198941751758e-05, "loss": 2.2493, "step": 6198000 }, { "epoch": 30.71, "learning_rate": 3.4650750831091496e-05, "loss": 2.2375, "step": 6198500 }, { "epoch": 30.71, "learning_rate": 3.464951224466541e-05, "loss": 2.2524, "step": 6199000 }, { "epoch": 30.71, "learning_rate": 3.4648276135412175e-05, "loss": 2.2466, "step": 6199500 }, { "epoch": 30.72, "learning_rate": 3.464703754898609e-05, "loss": 2.2237, "step": 6200000 }, { "epoch": 30.72, "learning_rate": 3.464579896256001e-05, "loss": 2.2402, "step": 6200500 }, { "epoch": 30.72, "learning_rate": 3.4644560376133926e-05, "loss": 2.246, "step": 6201000 }, { "epoch": 30.72, "learning_rate": 3.464332178970784e-05, "loss": 2.2472, "step": 6201500 }, { "epoch": 30.73, "learning_rate": 3.464208320328176e-05, "loss": 2.2377, "step": 6202000 }, { "epoch": 30.73, "learning_rate": 3.4640844616855677e-05, "loss": 2.27, "step": 6202500 }, { "epoch": 30.73, "learning_rate": 3.4639606030429594e-05, "loss": 2.2133, "step": 6203000 }, { "epoch": 30.73, "learning_rate": 3.463836744400351e-05, "loss": 2.2362, "step": 6203500 }, { "epoch": 30.74, "learning_rate": 3.463712885757743e-05, "loss": 2.2287, "step": 6204000 }, { "epoch": 30.74, "learning_rate": 3.4635892748324196e-05, "loss": 2.2377, "step": 6204500 }, { "epoch": 30.74, "learning_rate": 3.463465416189811e-05, "loss": 2.2474, "step": 6205000 }, { "epoch": 30.74, "learning_rate": 3.463341557547203e-05, "loss": 2.2293, "step": 6205500 }, { "epoch": 30.75, "learning_rate": 3.463217698904595e-05, "loss": 2.2539, "step": 6206000 }, { "epoch": 30.75, "learning_rate": 3.463094087979271e-05, "loss": 2.2161, "step": 6206500 }, { "epoch": 30.75, "learning_rate": 3.4629702293366626e-05, "loss": 2.2283, "step": 6207000 }, { "epoch": 30.75, "learning_rate": 3.462846370694054e-05, "loss": 2.2341, "step": 6207500 }, { "epoch": 30.76, "learning_rate": 3.462722512051446e-05, "loss": 2.2445, "step": 6208000 }, { "epoch": 30.76, "learning_rate": 3.462598653408838e-05, "loss": 2.2316, "step": 6208500 }, { "epoch": 30.76, "learning_rate": 3.4624747947662294e-05, "loss": 2.242, "step": 6209000 }, { "epoch": 30.76, "learning_rate": 3.462350936123621e-05, "loss": 2.245, "step": 6209500 }, { "epoch": 30.77, "learning_rate": 3.462227325198298e-05, "loss": 2.253, "step": 6210000 }, { "epoch": 30.77, "learning_rate": 3.4621034665556896e-05, "loss": 2.2291, "step": 6210500 }, { "epoch": 30.77, "learning_rate": 3.461979607913081e-05, "loss": 2.2505, "step": 6211000 }, { "epoch": 30.77, "learning_rate": 3.461855996987758e-05, "loss": 2.2503, "step": 6211500 }, { "epoch": 30.78, "learning_rate": 3.461732138345149e-05, "loss": 2.2289, "step": 6212000 }, { "epoch": 30.78, "learning_rate": 3.461608527419826e-05, "loss": 2.2613, "step": 6212500 }, { "epoch": 30.78, "learning_rate": 3.461484668777218e-05, "loss": 2.2444, "step": 6213000 }, { "epoch": 30.78, "learning_rate": 3.4613608101346095e-05, "loss": 2.2416, "step": 6213500 }, { "epoch": 30.79, "learning_rate": 3.461236951492001e-05, "loss": 2.2211, "step": 6214000 }, { "epoch": 30.79, "learning_rate": 3.461113092849393e-05, "loss": 2.2169, "step": 6214500 }, { "epoch": 30.79, "learning_rate": 3.4609892342067846e-05, "loss": 2.2292, "step": 6215000 }, { "epoch": 30.79, "learning_rate": 3.460865375564176e-05, "loss": 2.2373, "step": 6215500 }, { "epoch": 30.8, "learning_rate": 3.460741516921568e-05, "loss": 2.2433, "step": 6216000 }, { "epoch": 30.8, "learning_rate": 3.4606176582789596e-05, "loss": 2.2271, "step": 6216500 }, { "epoch": 30.8, "learning_rate": 3.460493799636351e-05, "loss": 2.2382, "step": 6217000 }, { "epoch": 30.8, "learning_rate": 3.460369940993743e-05, "loss": 2.2384, "step": 6217500 }, { "epoch": 30.81, "learning_rate": 3.460246082351135e-05, "loss": 2.241, "step": 6218000 }, { "epoch": 30.81, "learning_rate": 3.4601222237085264e-05, "loss": 2.2461, "step": 6218500 }, { "epoch": 30.81, "learning_rate": 3.459998365065918e-05, "loss": 2.2499, "step": 6219000 }, { "epoch": 30.81, "learning_rate": 3.45987450642331e-05, "loss": 2.2335, "step": 6219500 }, { "epoch": 30.82, "learning_rate": 3.4597506477807015e-05, "loss": 2.2417, "step": 6220000 }, { "epoch": 30.82, "learning_rate": 3.459626789138093e-05, "loss": 2.2655, "step": 6220500 }, { "epoch": 30.82, "learning_rate": 3.4595034259300546e-05, "loss": 2.2529, "step": 6221000 }, { "epoch": 30.82, "learning_rate": 3.459379567287446e-05, "loss": 2.2362, "step": 6221500 }, { "epoch": 30.83, "learning_rate": 3.459255708644838e-05, "loss": 2.2316, "step": 6222000 }, { "epoch": 30.83, "learning_rate": 3.4591318500022297e-05, "loss": 2.2179, "step": 6222500 }, { "epoch": 30.83, "learning_rate": 3.4590079913596213e-05, "loss": 2.2264, "step": 6223000 }, { "epoch": 30.83, "learning_rate": 3.458884132717013e-05, "loss": 2.2518, "step": 6223500 }, { "epoch": 30.84, "learning_rate": 3.45876052179169e-05, "loss": 2.2177, "step": 6224000 }, { "epoch": 30.84, "learning_rate": 3.4586366631490816e-05, "loss": 2.2561, "step": 6224500 }, { "epoch": 30.84, "learning_rate": 3.458512804506473e-05, "loss": 2.2601, "step": 6225000 }, { "epoch": 30.84, "learning_rate": 3.458388945863864e-05, "loss": 2.2479, "step": 6225500 }, { "epoch": 30.85, "learning_rate": 3.458265582655827e-05, "loss": 2.2314, "step": 6226000 }, { "epoch": 30.85, "learning_rate": 3.458141724013219e-05, "loss": 2.2399, "step": 6226500 }, { "epoch": 30.85, "learning_rate": 3.4580178653706105e-05, "loss": 2.2339, "step": 6227000 }, { "epoch": 30.85, "learning_rate": 3.457894006728002e-05, "loss": 2.2389, "step": 6227500 }, { "epoch": 30.86, "learning_rate": 3.457770148085393e-05, "loss": 2.2303, "step": 6228000 }, { "epoch": 30.86, "learning_rate": 3.457646289442785e-05, "loss": 2.2388, "step": 6228500 }, { "epoch": 30.86, "learning_rate": 3.4575224308001765e-05, "loss": 2.2413, "step": 6229000 }, { "epoch": 30.86, "learning_rate": 3.457398572157568e-05, "loss": 2.2108, "step": 6229500 }, { "epoch": 30.87, "learning_rate": 3.45727471351496e-05, "loss": 2.2406, "step": 6230000 }, { "epoch": 30.87, "learning_rate": 3.457151102589637e-05, "loss": 2.2665, "step": 6230500 }, { "epoch": 30.87, "learning_rate": 3.457027243947028e-05, "loss": 2.2316, "step": 6231000 }, { "epoch": 30.87, "learning_rate": 3.4569033853044195e-05, "loss": 2.2667, "step": 6231500 }, { "epoch": 30.88, "learning_rate": 3.456779526661811e-05, "loss": 2.2364, "step": 6232000 }, { "epoch": 30.88, "learning_rate": 3.456655668019203e-05, "loss": 2.2286, "step": 6232500 }, { "epoch": 30.88, "learning_rate": 3.4565318093765946e-05, "loss": 2.251, "step": 6233000 }, { "epoch": 30.88, "learning_rate": 3.456407950733986e-05, "loss": 2.2181, "step": 6233500 }, { "epoch": 30.89, "learning_rate": 3.456284092091378e-05, "loss": 2.2578, "step": 6234000 }, { "epoch": 30.89, "learning_rate": 3.456160481166055e-05, "loss": 2.268, "step": 6234500 }, { "epoch": 30.89, "learning_rate": 3.4560366225234466e-05, "loss": 2.2286, "step": 6235000 }, { "epoch": 30.89, "learning_rate": 3.455912763880838e-05, "loss": 2.2199, "step": 6235500 }, { "epoch": 30.9, "learning_rate": 3.45578890523823e-05, "loss": 2.2377, "step": 6236000 }, { "epoch": 30.9, "learning_rate": 3.4556650465956216e-05, "loss": 2.238, "step": 6236500 }, { "epoch": 30.9, "learning_rate": 3.455541187953013e-05, "loss": 2.2317, "step": 6237000 }, { "epoch": 30.9, "learning_rate": 3.455417329310405e-05, "loss": 2.2309, "step": 6237500 }, { "epoch": 30.91, "learning_rate": 3.455293470667797e-05, "loss": 2.2461, "step": 6238000 }, { "epoch": 30.91, "learning_rate": 3.4551696120251884e-05, "loss": 2.2172, "step": 6238500 }, { "epoch": 30.91, "learning_rate": 3.4550457533825794e-05, "loss": 2.2666, "step": 6239000 }, { "epoch": 30.91, "learning_rate": 3.454921894739971e-05, "loss": 2.2173, "step": 6239500 }, { "epoch": 30.92, "learning_rate": 3.454798036097363e-05, "loss": 2.2493, "step": 6240000 }, { "epoch": 30.92, "learning_rate": 3.4546741774547545e-05, "loss": 2.2422, "step": 6240500 }, { "epoch": 30.92, "learning_rate": 3.4545505665294314e-05, "loss": 2.2461, "step": 6241000 }, { "epoch": 30.92, "learning_rate": 3.454426707886823e-05, "loss": 2.2144, "step": 6241500 }, { "epoch": 30.93, "learning_rate": 3.4543030969615e-05, "loss": 2.241, "step": 6242000 }, { "epoch": 30.93, "learning_rate": 3.4541792383188917e-05, "loss": 2.2454, "step": 6242500 }, { "epoch": 30.93, "learning_rate": 3.4540553796762833e-05, "loss": 2.2331, "step": 6243000 }, { "epoch": 30.93, "learning_rate": 3.453931521033675e-05, "loss": 2.241, "step": 6243500 }, { "epoch": 30.93, "learning_rate": 3.453807910108351e-05, "loss": 2.2426, "step": 6244000 }, { "epoch": 30.94, "learning_rate": 3.453684051465743e-05, "loss": 2.2353, "step": 6244500 }, { "epoch": 30.94, "learning_rate": 3.4535601928231346e-05, "loss": 2.2339, "step": 6245000 }, { "epoch": 30.94, "learning_rate": 3.453436334180526e-05, "loss": 2.2464, "step": 6245500 }, { "epoch": 30.94, "learning_rate": 3.453312723255204e-05, "loss": 2.247, "step": 6246000 }, { "epoch": 30.95, "learning_rate": 3.453188864612595e-05, "loss": 2.242, "step": 6246500 }, { "epoch": 30.95, "learning_rate": 3.4530650059699866e-05, "loss": 2.2535, "step": 6247000 }, { "epoch": 30.95, "learning_rate": 3.452941147327378e-05, "loss": 2.2252, "step": 6247500 }, { "epoch": 30.95, "learning_rate": 3.45281728868477e-05, "loss": 2.2676, "step": 6248000 }, { "epoch": 30.96, "learning_rate": 3.452693677759447e-05, "loss": 2.2438, "step": 6248500 }, { "epoch": 30.96, "learning_rate": 3.4525698191168385e-05, "loss": 2.2296, "step": 6249000 }, { "epoch": 30.96, "learning_rate": 3.45244596047423e-05, "loss": 2.254, "step": 6249500 }, { "epoch": 30.96, "learning_rate": 3.452322101831621e-05, "loss": 2.2367, "step": 6250000 }, { "epoch": 30.97, "learning_rate": 3.452198243189013e-05, "loss": 2.2447, "step": 6250500 }, { "epoch": 30.97, "learning_rate": 3.4520746322636905e-05, "loss": 2.2428, "step": 6251000 }, { "epoch": 30.97, "learning_rate": 3.451950773621082e-05, "loss": 2.224, "step": 6251500 }, { "epoch": 30.97, "learning_rate": 3.451826914978474e-05, "loss": 2.2347, "step": 6252000 }, { "epoch": 30.98, "learning_rate": 3.4517030563358656e-05, "loss": 2.2252, "step": 6252500 }, { "epoch": 30.98, "learning_rate": 3.451579445410542e-05, "loss": 2.2325, "step": 6253000 }, { "epoch": 30.98, "learning_rate": 3.4514555867679335e-05, "loss": 2.2505, "step": 6253500 }, { "epoch": 30.98, "learning_rate": 3.4513319758426104e-05, "loss": 2.25, "step": 6254000 }, { "epoch": 30.99, "learning_rate": 3.451208117200002e-05, "loss": 2.2485, "step": 6254500 }, { "epoch": 30.99, "learning_rate": 3.451084258557394e-05, "loss": 2.2436, "step": 6255000 }, { "epoch": 30.99, "learning_rate": 3.4509603999147854e-05, "loss": 2.2469, "step": 6255500 }, { "epoch": 30.99, "learning_rate": 3.450836788989462e-05, "loss": 2.2563, "step": 6256000 }, { "epoch": 31.0, "learning_rate": 3.450712930346854e-05, "loss": 2.2587, "step": 6256500 }, { "epoch": 31.0, "learning_rate": 3.450589071704246e-05, "loss": 2.235, "step": 6257000 }, { "epoch": 31.0, "eval_accuracy": 0.6597478495877519, "eval_accuracy_mlm": 0.6156466741652056, "eval_accuracy_nsp": 0.8678179628881506, "eval_loss": 2.3193306922912598, "eval_runtime": 145.8562, "eval_samples_per_second": 1748.017, "eval_steps_per_second": 72.839, "step": 6257133 }, { "epoch": 31.0, "learning_rate": 3.4504652130616374e-05, "loss": 2.1808, "step": 6257500 }, { "epoch": 31.0, "learning_rate": 3.4503416021363136e-05, "loss": 2.213, "step": 6258000 }, { "epoch": 31.01, "learning_rate": 3.450217743493705e-05, "loss": 2.2195, "step": 6258500 }, { "epoch": 31.01, "learning_rate": 3.450093884851097e-05, "loss": 2.2085, "step": 6259000 }, { "epoch": 31.01, "learning_rate": 3.449970026208489e-05, "loss": 2.2248, "step": 6259500 }, { "epoch": 31.01, "learning_rate": 3.4498461675658804e-05, "loss": 2.2321, "step": 6260000 }, { "epoch": 31.02, "learning_rate": 3.449722308923272e-05, "loss": 2.2127, "step": 6260500 }, { "epoch": 31.02, "learning_rate": 3.449598450280664e-05, "loss": 2.1961, "step": 6261000 }, { "epoch": 31.02, "learning_rate": 3.4494745916380554e-05, "loss": 2.2115, "step": 6261500 }, { "epoch": 31.02, "learning_rate": 3.449350980712732e-05, "loss": 2.1928, "step": 6262000 }, { "epoch": 31.03, "learning_rate": 3.449227122070124e-05, "loss": 2.2092, "step": 6262500 }, { "epoch": 31.03, "learning_rate": 3.449103263427516e-05, "loss": 2.2159, "step": 6263000 }, { "epoch": 31.03, "learning_rate": 3.4489794047849074e-05, "loss": 2.189, "step": 6263500 }, { "epoch": 31.03, "learning_rate": 3.448855546142299e-05, "loss": 2.21, "step": 6264000 }, { "epoch": 31.04, "learning_rate": 3.448731687499691e-05, "loss": 2.2087, "step": 6264500 }, { "epoch": 31.04, "learning_rate": 3.4486078288570825e-05, "loss": 2.2178, "step": 6265000 }, { "epoch": 31.04, "learning_rate": 3.448483970214474e-05, "loss": 2.2034, "step": 6265500 }, { "epoch": 31.04, "learning_rate": 3.448360111571866e-05, "loss": 2.2245, "step": 6266000 }, { "epoch": 31.05, "learning_rate": 3.448236252929257e-05, "loss": 2.203, "step": 6266500 }, { "epoch": 31.05, "learning_rate": 3.4481123942866486e-05, "loss": 2.1872, "step": 6267000 }, { "epoch": 31.05, "learning_rate": 3.4479887833613255e-05, "loss": 2.1915, "step": 6267500 }, { "epoch": 31.05, "learning_rate": 3.447864924718717e-05, "loss": 2.2446, "step": 6268000 }, { "epoch": 31.06, "learning_rate": 3.447741066076109e-05, "loss": 2.2023, "step": 6268500 }, { "epoch": 31.06, "learning_rate": 3.447617455150786e-05, "loss": 2.2039, "step": 6269000 }, { "epoch": 31.06, "learning_rate": 3.4474935965081774e-05, "loss": 2.2216, "step": 6269500 }, { "epoch": 31.06, "learning_rate": 3.447369737865569e-05, "loss": 2.2161, "step": 6270000 }, { "epoch": 31.07, "learning_rate": 3.447245879222961e-05, "loss": 2.1849, "step": 6270500 }, { "epoch": 31.07, "learning_rate": 3.4471220205803525e-05, "loss": 2.2113, "step": 6271000 }, { "epoch": 31.07, "learning_rate": 3.446998161937744e-05, "loss": 2.2109, "step": 6271500 }, { "epoch": 31.07, "learning_rate": 3.446874303295136e-05, "loss": 2.2381, "step": 6272000 }, { "epoch": 31.08, "learning_rate": 3.4467504446525276e-05, "loss": 2.2078, "step": 6272500 }, { "epoch": 31.08, "learning_rate": 3.446626586009919e-05, "loss": 2.2187, "step": 6273000 }, { "epoch": 31.08, "learning_rate": 3.44650272736731e-05, "loss": 2.2024, "step": 6273500 }, { "epoch": 31.08, "learning_rate": 3.446378868724702e-05, "loss": 2.2256, "step": 6274000 }, { "epoch": 31.09, "learning_rate": 3.446255010082094e-05, "loss": 2.2071, "step": 6274500 }, { "epoch": 31.09, "learning_rate": 3.4461311514394854e-05, "loss": 2.2189, "step": 6275000 }, { "epoch": 31.09, "learning_rate": 3.446007540514162e-05, "loss": 2.2198, "step": 6275500 }, { "epoch": 31.09, "learning_rate": 3.445883929588839e-05, "loss": 2.219, "step": 6276000 }, { "epoch": 31.1, "learning_rate": 3.445760070946231e-05, "loss": 2.218, "step": 6276500 }, { "epoch": 31.1, "learning_rate": 3.4456362123036225e-05, "loss": 2.2116, "step": 6277000 }, { "epoch": 31.1, "learning_rate": 3.445512353661014e-05, "loss": 2.2026, "step": 6277500 }, { "epoch": 31.1, "learning_rate": 3.445388495018406e-05, "loss": 2.2124, "step": 6278000 }, { "epoch": 31.11, "learning_rate": 3.4452646363757976e-05, "loss": 2.2316, "step": 6278500 }, { "epoch": 31.11, "learning_rate": 3.445140777733189e-05, "loss": 2.2332, "step": 6279000 }, { "epoch": 31.11, "learning_rate": 3.445016919090581e-05, "loss": 2.2116, "step": 6279500 }, { "epoch": 31.11, "learning_rate": 3.444893060447973e-05, "loss": 2.2152, "step": 6280000 }, { "epoch": 31.12, "learning_rate": 3.444769201805364e-05, "loss": 2.211, "step": 6280500 }, { "epoch": 31.12, "learning_rate": 3.4446453431627554e-05, "loss": 2.2078, "step": 6281000 }, { "epoch": 31.12, "learning_rate": 3.444521484520147e-05, "loss": 2.2311, "step": 6281500 }, { "epoch": 31.12, "learning_rate": 3.444397625877539e-05, "loss": 2.208, "step": 6282000 }, { "epoch": 31.13, "learning_rate": 3.44427376723493e-05, "loss": 2.2156, "step": 6282500 }, { "epoch": 31.13, "learning_rate": 3.444150156309607e-05, "loss": 2.2341, "step": 6283000 }, { "epoch": 31.13, "learning_rate": 3.444026545384284e-05, "loss": 2.2018, "step": 6283500 }, { "epoch": 31.13, "learning_rate": 3.4439029344589604e-05, "loss": 2.2093, "step": 6284000 }, { "epoch": 31.14, "learning_rate": 3.443779075816352e-05, "loss": 2.2189, "step": 6284500 }, { "epoch": 31.14, "learning_rate": 3.443655217173744e-05, "loss": 2.2098, "step": 6285000 }, { "epoch": 31.14, "learning_rate": 3.4435318539657066e-05, "loss": 2.2191, "step": 6285500 }, { "epoch": 31.14, "learning_rate": 3.443407995323098e-05, "loss": 2.2183, "step": 6286000 }, { "epoch": 31.15, "learning_rate": 3.44328413668049e-05, "loss": 2.2258, "step": 6286500 }, { "epoch": 31.15, "learning_rate": 3.4431602780378816e-05, "loss": 2.2116, "step": 6287000 }, { "epoch": 31.15, "learning_rate": 3.443036419395273e-05, "loss": 2.2322, "step": 6287500 }, { "epoch": 31.15, "learning_rate": 3.4429128084699495e-05, "loss": 2.212, "step": 6288000 }, { "epoch": 31.16, "learning_rate": 3.442788949827341e-05, "loss": 2.2144, "step": 6288500 }, { "epoch": 31.16, "learning_rate": 3.442665091184733e-05, "loss": 2.1998, "step": 6289000 }, { "epoch": 31.16, "learning_rate": 3.4425412325421246e-05, "loss": 2.1758, "step": 6289500 }, { "epoch": 31.16, "learning_rate": 3.442417373899516e-05, "loss": 2.2297, "step": 6290000 }, { "epoch": 31.17, "learning_rate": 3.442293515256908e-05, "loss": 2.211, "step": 6290500 }, { "epoch": 31.17, "learning_rate": 3.442169656614299e-05, "loss": 2.212, "step": 6291000 }, { "epoch": 31.17, "learning_rate": 3.442045797971691e-05, "loss": 2.2227, "step": 6291500 }, { "epoch": 31.17, "learning_rate": 3.4419219393290824e-05, "loss": 2.2025, "step": 6292000 }, { "epoch": 31.18, "learning_rate": 3.441798080686474e-05, "loss": 2.2118, "step": 6292500 }, { "epoch": 31.18, "learning_rate": 3.441674222043866e-05, "loss": 2.235, "step": 6293000 }, { "epoch": 31.18, "learning_rate": 3.4415503634012575e-05, "loss": 2.2199, "step": 6293500 }, { "epoch": 31.18, "learning_rate": 3.441426504758649e-05, "loss": 2.2552, "step": 6294000 }, { "epoch": 31.19, "learning_rate": 3.441302646116041e-05, "loss": 2.207, "step": 6294500 }, { "epoch": 31.19, "learning_rate": 3.4411787874734325e-05, "loss": 2.2162, "step": 6295000 }, { "epoch": 31.19, "learning_rate": 3.4410551765481094e-05, "loss": 2.2181, "step": 6295500 }, { "epoch": 31.19, "learning_rate": 3.440931565622786e-05, "loss": 2.228, "step": 6296000 }, { "epoch": 31.2, "learning_rate": 3.440807706980178e-05, "loss": 2.2119, "step": 6296500 }, { "epoch": 31.2, "learning_rate": 3.44068384833757e-05, "loss": 2.2169, "step": 6297000 }, { "epoch": 31.2, "learning_rate": 3.440559989694961e-05, "loss": 2.2164, "step": 6297500 }, { "epoch": 31.2, "learning_rate": 3.4404361310523524e-05, "loss": 2.2253, "step": 6298000 }, { "epoch": 31.2, "learning_rate": 3.440312272409744e-05, "loss": 2.2121, "step": 6298500 }, { "epoch": 31.21, "learning_rate": 3.440188413767136e-05, "loss": 2.2298, "step": 6299000 }, { "epoch": 31.21, "learning_rate": 3.4400645551245275e-05, "loss": 2.2243, "step": 6299500 }, { "epoch": 31.21, "learning_rate": 3.439940696481919e-05, "loss": 2.2202, "step": 6300000 }, { "epoch": 31.21, "learning_rate": 3.439817085556596e-05, "loss": 2.2288, "step": 6300500 }, { "epoch": 31.22, "learning_rate": 3.439693226913988e-05, "loss": 2.2252, "step": 6301000 }, { "epoch": 31.22, "learning_rate": 3.4395693682713794e-05, "loss": 2.2148, "step": 6301500 }, { "epoch": 31.22, "learning_rate": 3.439445509628771e-05, "loss": 2.2291, "step": 6302000 }, { "epoch": 31.22, "learning_rate": 3.439321650986163e-05, "loss": 2.2196, "step": 6302500 }, { "epoch": 31.23, "learning_rate": 3.4391977923435545e-05, "loss": 2.2144, "step": 6303000 }, { "epoch": 31.23, "learning_rate": 3.439073933700946e-05, "loss": 2.2116, "step": 6303500 }, { "epoch": 31.23, "learning_rate": 3.438950075058337e-05, "loss": 2.2102, "step": 6304000 }, { "epoch": 31.23, "learning_rate": 3.438826216415729e-05, "loss": 2.2304, "step": 6304500 }, { "epoch": 31.24, "learning_rate": 3.438702605490406e-05, "loss": 2.2181, "step": 6305000 }, { "epoch": 31.24, "learning_rate": 3.438579242282368e-05, "loss": 2.2039, "step": 6305500 }, { "epoch": 31.24, "learning_rate": 3.438455631357045e-05, "loss": 2.2286, "step": 6306000 }, { "epoch": 31.24, "learning_rate": 3.4383317727144364e-05, "loss": 2.2183, "step": 6306500 }, { "epoch": 31.25, "learning_rate": 3.438207914071828e-05, "loss": 2.2127, "step": 6307000 }, { "epoch": 31.25, "learning_rate": 3.43808405542922e-05, "loss": 2.2101, "step": 6307500 }, { "epoch": 31.25, "learning_rate": 3.4379601967866115e-05, "loss": 2.203, "step": 6308000 }, { "epoch": 31.25, "learning_rate": 3.437836338144003e-05, "loss": 2.2003, "step": 6308500 }, { "epoch": 31.26, "learning_rate": 3.437712479501395e-05, "loss": 2.1954, "step": 6309000 }, { "epoch": 31.26, "learning_rate": 3.4375886208587866e-05, "loss": 2.2006, "step": 6309500 }, { "epoch": 31.26, "learning_rate": 3.437464762216178e-05, "loss": 2.2085, "step": 6310000 }, { "epoch": 31.26, "learning_rate": 3.43734090357357e-05, "loss": 2.2127, "step": 6310500 }, { "epoch": 31.27, "learning_rate": 3.437217292648247e-05, "loss": 2.2205, "step": 6311000 }, { "epoch": 31.27, "learning_rate": 3.437093434005638e-05, "loss": 2.2085, "step": 6311500 }, { "epoch": 31.27, "learning_rate": 3.4369695753630296e-05, "loss": 2.2141, "step": 6312000 }, { "epoch": 31.27, "learning_rate": 3.436845716720421e-05, "loss": 2.226, "step": 6312500 }, { "epoch": 31.28, "learning_rate": 3.436722105795098e-05, "loss": 2.2442, "step": 6313000 }, { "epoch": 31.28, "learning_rate": 3.43659824715249e-05, "loss": 2.2363, "step": 6313500 }, { "epoch": 31.28, "learning_rate": 3.436474636227167e-05, "loss": 2.208, "step": 6314000 }, { "epoch": 31.28, "learning_rate": 3.4363507775845584e-05, "loss": 2.233, "step": 6314500 }, { "epoch": 31.29, "learning_rate": 3.43622691894195e-05, "loss": 2.2111, "step": 6315000 }, { "epoch": 31.29, "learning_rate": 3.436103060299342e-05, "loss": 2.2063, "step": 6315500 }, { "epoch": 31.29, "learning_rate": 3.4359792016567335e-05, "loss": 2.2115, "step": 6316000 }, { "epoch": 31.29, "learning_rate": 3.435855343014125e-05, "loss": 2.1919, "step": 6316500 }, { "epoch": 31.3, "learning_rate": 3.435731484371517e-05, "loss": 2.2389, "step": 6317000 }, { "epoch": 31.3, "learning_rate": 3.4356076257289086e-05, "loss": 2.2189, "step": 6317500 }, { "epoch": 31.3, "learning_rate": 3.435484014803585e-05, "loss": 2.2316, "step": 6318000 }, { "epoch": 31.3, "learning_rate": 3.4353601561609765e-05, "loss": 2.2294, "step": 6318500 }, { "epoch": 31.31, "learning_rate": 3.435236297518368e-05, "loss": 2.228, "step": 6319000 }, { "epoch": 31.31, "learning_rate": 3.43511243887576e-05, "loss": 2.2099, "step": 6319500 }, { "epoch": 31.31, "learning_rate": 3.4349885802331515e-05, "loss": 2.2214, "step": 6320000 }, { "epoch": 31.31, "learning_rate": 3.434864721590543e-05, "loss": 2.2146, "step": 6320500 }, { "epoch": 31.32, "learning_rate": 3.434740862947935e-05, "loss": 2.2259, "step": 6321000 }, { "epoch": 31.32, "learning_rate": 3.4346170043053266e-05, "loss": 2.2005, "step": 6321500 }, { "epoch": 31.32, "learning_rate": 3.434493145662718e-05, "loss": 2.2291, "step": 6322000 }, { "epoch": 31.32, "learning_rate": 3.43436928702011e-05, "loss": 2.219, "step": 6322500 }, { "epoch": 31.33, "learning_rate": 3.434245428377502e-05, "loss": 2.1981, "step": 6323000 }, { "epoch": 31.33, "learning_rate": 3.4341215697348934e-05, "loss": 2.2405, "step": 6323500 }, { "epoch": 31.33, "learning_rate": 3.433997711092285e-05, "loss": 2.1992, "step": 6324000 }, { "epoch": 31.33, "learning_rate": 3.433874100166962e-05, "loss": 2.2415, "step": 6324500 }, { "epoch": 31.34, "learning_rate": 3.433750241524353e-05, "loss": 2.2222, "step": 6325000 }, { "epoch": 31.34, "learning_rate": 3.433626382881745e-05, "loss": 2.2247, "step": 6325500 }, { "epoch": 31.34, "learning_rate": 3.4335025242391364e-05, "loss": 2.2394, "step": 6326000 }, { "epoch": 31.34, "learning_rate": 3.433378665596528e-05, "loss": 2.2441, "step": 6326500 }, { "epoch": 31.35, "learning_rate": 3.43325480695392e-05, "loss": 2.2145, "step": 6327000 }, { "epoch": 31.35, "learning_rate": 3.4331309483113115e-05, "loss": 2.2068, "step": 6327500 }, { "epoch": 31.35, "learning_rate": 3.433007089668703e-05, "loss": 2.2079, "step": 6328000 }, { "epoch": 31.35, "learning_rate": 3.432883231026094e-05, "loss": 2.2071, "step": 6328500 }, { "epoch": 31.36, "learning_rate": 3.432759620100772e-05, "loss": 2.2209, "step": 6329000 }, { "epoch": 31.36, "learning_rate": 3.4326357614581634e-05, "loss": 2.2367, "step": 6329500 }, { "epoch": 31.36, "learning_rate": 3.432511902815555e-05, "loss": 2.2124, "step": 6330000 }, { "epoch": 31.36, "learning_rate": 3.432388044172947e-05, "loss": 2.2072, "step": 6330500 }, { "epoch": 31.37, "learning_rate": 3.4322641855303385e-05, "loss": 2.2324, "step": 6331000 }, { "epoch": 31.37, "learning_rate": 3.4321408223223e-05, "loss": 2.2163, "step": 6331500 }, { "epoch": 31.37, "learning_rate": 3.4320172113969774e-05, "loss": 2.2097, "step": 6332000 }, { "epoch": 31.37, "learning_rate": 3.4318936004716536e-05, "loss": 2.2137, "step": 6332500 }, { "epoch": 31.38, "learning_rate": 3.431769741829045e-05, "loss": 2.2035, "step": 6333000 }, { "epoch": 31.38, "learning_rate": 3.431645883186437e-05, "loss": 2.2252, "step": 6333500 }, { "epoch": 31.38, "learning_rate": 3.431522024543829e-05, "loss": 2.2297, "step": 6334000 }, { "epoch": 31.38, "learning_rate": 3.4313981659012204e-05, "loss": 2.2349, "step": 6334500 }, { "epoch": 31.39, "learning_rate": 3.431274307258612e-05, "loss": 2.2302, "step": 6335000 }, { "epoch": 31.39, "learning_rate": 3.431150696333289e-05, "loss": 2.2064, "step": 6335500 }, { "epoch": 31.39, "learning_rate": 3.431026837690681e-05, "loss": 2.2251, "step": 6336000 }, { "epoch": 31.39, "learning_rate": 3.4309029790480724e-05, "loss": 2.238, "step": 6336500 }, { "epoch": 31.4, "learning_rate": 3.430779120405464e-05, "loss": 2.2249, "step": 6337000 }, { "epoch": 31.4, "learning_rate": 3.430655261762856e-05, "loss": 2.2352, "step": 6337500 }, { "epoch": 31.4, "learning_rate": 3.4305316508375326e-05, "loss": 2.2189, "step": 6338000 }, { "epoch": 31.4, "learning_rate": 3.430407792194924e-05, "loss": 2.2131, "step": 6338500 }, { "epoch": 31.41, "learning_rate": 3.430283933552316e-05, "loss": 2.2507, "step": 6339000 }, { "epoch": 31.41, "learning_rate": 3.430160074909707e-05, "loss": 2.2302, "step": 6339500 }, { "epoch": 31.41, "learning_rate": 3.430036216267099e-05, "loss": 2.2123, "step": 6340000 }, { "epoch": 31.41, "learning_rate": 3.4299123576244904e-05, "loss": 2.2245, "step": 6340500 }, { "epoch": 31.42, "learning_rate": 3.429788498981882e-05, "loss": 2.2152, "step": 6341000 }, { "epoch": 31.42, "learning_rate": 3.429664888056559e-05, "loss": 2.2311, "step": 6341500 }, { "epoch": 31.42, "learning_rate": 3.429541029413951e-05, "loss": 2.2054, "step": 6342000 }, { "epoch": 31.42, "learning_rate": 3.4294171707713424e-05, "loss": 2.2194, "step": 6342500 }, { "epoch": 31.43, "learning_rate": 3.429293312128734e-05, "loss": 2.2208, "step": 6343000 }, { "epoch": 31.43, "learning_rate": 3.429169453486126e-05, "loss": 2.2581, "step": 6343500 }, { "epoch": 31.43, "learning_rate": 3.4290455948435175e-05, "loss": 2.1989, "step": 6344000 }, { "epoch": 31.43, "learning_rate": 3.428921736200909e-05, "loss": 2.2191, "step": 6344500 }, { "epoch": 31.44, "learning_rate": 3.4287978775583e-05, "loss": 2.239, "step": 6345000 }, { "epoch": 31.44, "learning_rate": 3.428674018915692e-05, "loss": 2.2267, "step": 6345500 }, { "epoch": 31.44, "learning_rate": 3.428550407990369e-05, "loss": 2.2187, "step": 6346000 }, { "epoch": 31.44, "learning_rate": 3.4284265493477604e-05, "loss": 2.2207, "step": 6346500 }, { "epoch": 31.45, "learning_rate": 3.428302690705152e-05, "loss": 2.2389, "step": 6347000 }, { "epoch": 31.45, "learning_rate": 3.428178832062544e-05, "loss": 2.2213, "step": 6347500 }, { "epoch": 31.45, "learning_rate": 3.4280549734199355e-05, "loss": 2.2062, "step": 6348000 }, { "epoch": 31.45, "learning_rate": 3.4279311147773265e-05, "loss": 2.218, "step": 6348500 }, { "epoch": 31.46, "learning_rate": 3.427807256134718e-05, "loss": 2.2122, "step": 6349000 }, { "epoch": 31.46, "learning_rate": 3.42768339749211e-05, "loss": 2.2257, "step": 6349500 }, { "epoch": 31.46, "learning_rate": 3.4275595388495016e-05, "loss": 2.2072, "step": 6350000 }, { "epoch": 31.46, "learning_rate": 3.427435927924179e-05, "loss": 2.2411, "step": 6350500 }, { "epoch": 31.47, "learning_rate": 3.427312316998856e-05, "loss": 2.2063, "step": 6351000 }, { "epoch": 31.47, "learning_rate": 3.427188458356248e-05, "loss": 2.2173, "step": 6351500 }, { "epoch": 31.47, "learning_rate": 3.4270645997136394e-05, "loss": 2.2306, "step": 6352000 }, { "epoch": 31.47, "learning_rate": 3.426940741071031e-05, "loss": 2.2277, "step": 6352500 }, { "epoch": 31.47, "learning_rate": 3.426816882428422e-05, "loss": 2.229, "step": 6353000 }, { "epoch": 31.48, "learning_rate": 3.426693023785814e-05, "loss": 2.2136, "step": 6353500 }, { "epoch": 31.48, "learning_rate": 3.4265691651432055e-05, "loss": 2.2245, "step": 6354000 }, { "epoch": 31.48, "learning_rate": 3.426445306500597e-05, "loss": 2.2111, "step": 6354500 }, { "epoch": 31.48, "learning_rate": 3.426321447857989e-05, "loss": 2.2525, "step": 6355000 }, { "epoch": 31.49, "learning_rate": 3.42619758921538e-05, "loss": 2.2219, "step": 6355500 }, { "epoch": 31.49, "learning_rate": 3.4260737305727716e-05, "loss": 2.2086, "step": 6356000 }, { "epoch": 31.49, "learning_rate": 3.425949871930163e-05, "loss": 2.2263, "step": 6356500 }, { "epoch": 31.49, "learning_rate": 3.425826013287555e-05, "loss": 2.2265, "step": 6357000 }, { "epoch": 31.5, "learning_rate": 3.425702154644947e-05, "loss": 2.2005, "step": 6357500 }, { "epoch": 31.5, "learning_rate": 3.4255782960023384e-05, "loss": 2.2232, "step": 6358000 }, { "epoch": 31.5, "learning_rate": 3.42545443735973e-05, "loss": 2.2305, "step": 6358500 }, { "epoch": 31.5, "learning_rate": 3.425330826434407e-05, "loss": 2.1797, "step": 6359000 }, { "epoch": 31.51, "learning_rate": 3.425206967791799e-05, "loss": 2.2228, "step": 6359500 }, { "epoch": 31.51, "learning_rate": 3.4250831091491904e-05, "loss": 2.2035, "step": 6360000 }, { "epoch": 31.51, "learning_rate": 3.424959498223867e-05, "loss": 2.2232, "step": 6360500 }, { "epoch": 31.51, "learning_rate": 3.424835639581259e-05, "loss": 2.2096, "step": 6361000 }, { "epoch": 31.52, "learning_rate": 3.4247117809386506e-05, "loss": 2.203, "step": 6361500 }, { "epoch": 31.52, "learning_rate": 3.4245879222960416e-05, "loss": 2.2527, "step": 6362000 }, { "epoch": 31.52, "learning_rate": 3.424464063653433e-05, "loss": 2.2428, "step": 6362500 }, { "epoch": 31.52, "learning_rate": 3.424340452728111e-05, "loss": 2.1951, "step": 6363000 }, { "epoch": 31.53, "learning_rate": 3.424216594085502e-05, "loss": 2.2294, "step": 6363500 }, { "epoch": 31.53, "learning_rate": 3.4240927354428936e-05, "loss": 2.2303, "step": 6364000 }, { "epoch": 31.53, "learning_rate": 3.423968876800285e-05, "loss": 2.2402, "step": 6364500 }, { "epoch": 31.53, "learning_rate": 3.423845265874963e-05, "loss": 2.2149, "step": 6365000 }, { "epoch": 31.54, "learning_rate": 3.423721654949639e-05, "loss": 2.2285, "step": 6365500 }, { "epoch": 31.54, "learning_rate": 3.423597796307031e-05, "loss": 2.2355, "step": 6366000 }, { "epoch": 31.54, "learning_rate": 3.4234739376644224e-05, "loss": 2.2315, "step": 6366500 }, { "epoch": 31.54, "learning_rate": 3.423350079021814e-05, "loss": 2.2164, "step": 6367000 }, { "epoch": 31.55, "learning_rate": 3.423226220379206e-05, "loss": 2.2494, "step": 6367500 }, { "epoch": 31.55, "learning_rate": 3.4231023617365975e-05, "loss": 2.21, "step": 6368000 }, { "epoch": 31.55, "learning_rate": 3.4229787508112744e-05, "loss": 2.2279, "step": 6368500 }, { "epoch": 31.55, "learning_rate": 3.422854892168666e-05, "loss": 2.2203, "step": 6369000 }, { "epoch": 31.56, "learning_rate": 3.422731033526058e-05, "loss": 2.2472, "step": 6369500 }, { "epoch": 31.56, "learning_rate": 3.4226071748834495e-05, "loss": 2.2179, "step": 6370000 }, { "epoch": 31.56, "learning_rate": 3.422483316240841e-05, "loss": 2.2375, "step": 6370500 }, { "epoch": 31.56, "learning_rate": 3.422359457598233e-05, "loss": 2.2469, "step": 6371000 }, { "epoch": 31.57, "learning_rate": 3.4222355989556246e-05, "loss": 2.2507, "step": 6371500 }, { "epoch": 31.57, "learning_rate": 3.422111740313016e-05, "loss": 2.2195, "step": 6372000 }, { "epoch": 31.57, "learning_rate": 3.421987881670408e-05, "loss": 2.2346, "step": 6372500 }, { "epoch": 31.57, "learning_rate": 3.421864023027799e-05, "loss": 2.2264, "step": 6373000 }, { "epoch": 31.58, "learning_rate": 3.4217401643851906e-05, "loss": 2.2479, "step": 6373500 }, { "epoch": 31.58, "learning_rate": 3.421616305742582e-05, "loss": 2.2097, "step": 6374000 }, { "epoch": 31.58, "learning_rate": 3.421492694817259e-05, "loss": 2.23, "step": 6374500 }, { "epoch": 31.58, "learning_rate": 3.421368836174651e-05, "loss": 2.2047, "step": 6375000 }, { "epoch": 31.59, "learning_rate": 3.4212449775320426e-05, "loss": 2.2001, "step": 6375500 }, { "epoch": 31.59, "learning_rate": 3.4211211188894336e-05, "loss": 2.1968, "step": 6376000 }, { "epoch": 31.59, "learning_rate": 3.420997507964111e-05, "loss": 2.219, "step": 6376500 }, { "epoch": 31.59, "learning_rate": 3.420873649321503e-05, "loss": 2.2418, "step": 6377000 }, { "epoch": 31.6, "learning_rate": 3.4207497906788946e-05, "loss": 2.2385, "step": 6377500 }, { "epoch": 31.6, "learning_rate": 3.420625932036286e-05, "loss": 2.2359, "step": 6378000 }, { "epoch": 31.6, "learning_rate": 3.420502073393678e-05, "loss": 2.2245, "step": 6378500 }, { "epoch": 31.6, "learning_rate": 3.420378462468354e-05, "loss": 2.231, "step": 6379000 }, { "epoch": 31.61, "learning_rate": 3.420254603825746e-05, "loss": 2.1829, "step": 6379500 }, { "epoch": 31.61, "learning_rate": 3.420130992900423e-05, "loss": 2.2169, "step": 6380000 }, { "epoch": 31.61, "learning_rate": 3.4200071342578144e-05, "loss": 2.2441, "step": 6380500 }, { "epoch": 31.61, "learning_rate": 3.419883275615206e-05, "loss": 2.2208, "step": 6381000 }, { "epoch": 31.62, "learning_rate": 3.419759416972598e-05, "loss": 2.2438, "step": 6381500 }, { "epoch": 31.62, "learning_rate": 3.4196355583299895e-05, "loss": 2.2072, "step": 6382000 }, { "epoch": 31.62, "learning_rate": 3.419511699687381e-05, "loss": 2.2324, "step": 6382500 }, { "epoch": 31.62, "learning_rate": 3.419387841044773e-05, "loss": 2.2065, "step": 6383000 }, { "epoch": 31.63, "learning_rate": 3.4192639824021646e-05, "loss": 2.2306, "step": 6383500 }, { "epoch": 31.63, "learning_rate": 3.419140123759556e-05, "loss": 2.2126, "step": 6384000 }, { "epoch": 31.63, "learning_rate": 3.419016265116948e-05, "loss": 2.2266, "step": 6384500 }, { "epoch": 31.63, "learning_rate": 3.4188924064743397e-05, "loss": 2.2249, "step": 6385000 }, { "epoch": 31.64, "learning_rate": 3.418768547831731e-05, "loss": 2.221, "step": 6385500 }, { "epoch": 31.64, "learning_rate": 3.4186446891891224e-05, "loss": 2.245, "step": 6386000 }, { "epoch": 31.64, "learning_rate": 3.418520830546514e-05, "loss": 2.2187, "step": 6386500 }, { "epoch": 31.64, "learning_rate": 3.418396971903906e-05, "loss": 2.2144, "step": 6387000 }, { "epoch": 31.65, "learning_rate": 3.4182731132612974e-05, "loss": 2.2362, "step": 6387500 }, { "epoch": 31.65, "learning_rate": 3.418149254618689e-05, "loss": 2.1898, "step": 6388000 }, { "epoch": 31.65, "learning_rate": 3.418025643693365e-05, "loss": 2.2426, "step": 6388500 }, { "epoch": 31.65, "learning_rate": 3.417901785050757e-05, "loss": 2.2156, "step": 6389000 }, { "epoch": 31.66, "learning_rate": 3.417777926408149e-05, "loss": 2.2327, "step": 6389500 }, { "epoch": 31.66, "learning_rate": 3.4176540677655404e-05, "loss": 2.2054, "step": 6390000 }, { "epoch": 31.66, "learning_rate": 3.417530209122932e-05, "loss": 2.2216, "step": 6390500 }, { "epoch": 31.66, "learning_rate": 3.417406350480324e-05, "loss": 2.2077, "step": 6391000 }, { "epoch": 31.67, "learning_rate": 3.4172824918377155e-05, "loss": 2.2122, "step": 6391500 }, { "epoch": 31.67, "learning_rate": 3.417158633195107e-05, "loss": 2.2206, "step": 6392000 }, { "epoch": 31.67, "learning_rate": 3.417034774552499e-05, "loss": 2.2276, "step": 6392500 }, { "epoch": 31.67, "learning_rate": 3.4169109159098906e-05, "loss": 2.2207, "step": 6393000 }, { "epoch": 31.68, "learning_rate": 3.416787057267282e-05, "loss": 2.2294, "step": 6393500 }, { "epoch": 31.68, "learning_rate": 3.416663446341959e-05, "loss": 2.2208, "step": 6394000 }, { "epoch": 31.68, "learning_rate": 3.416539835416636e-05, "loss": 2.2432, "step": 6394500 }, { "epoch": 31.68, "learning_rate": 3.416415976774027e-05, "loss": 2.2231, "step": 6395000 }, { "epoch": 31.69, "learning_rate": 3.416292118131419e-05, "loss": 2.2351, "step": 6395500 }, { "epoch": 31.69, "learning_rate": 3.4161682594888104e-05, "loss": 2.219, "step": 6396000 }, { "epoch": 31.69, "learning_rate": 3.416044400846202e-05, "loss": 2.2291, "step": 6396500 }, { "epoch": 31.69, "learning_rate": 3.415920542203594e-05, "loss": 2.2375, "step": 6397000 }, { "epoch": 31.7, "learning_rate": 3.4157966835609855e-05, "loss": 2.2292, "step": 6397500 }, { "epoch": 31.7, "learning_rate": 3.415672824918377e-05, "loss": 2.2253, "step": 6398000 }, { "epoch": 31.7, "learning_rate": 3.415548966275769e-05, "loss": 2.2333, "step": 6398500 }, { "epoch": 31.7, "learning_rate": 3.415425603067731e-05, "loss": 2.2152, "step": 6399000 }, { "epoch": 31.71, "learning_rate": 3.415301992142408e-05, "loss": 2.2123, "step": 6399500 }, { "epoch": 31.71, "learning_rate": 3.415178381217085e-05, "loss": 2.235, "step": 6400000 }, { "epoch": 31.71, "learning_rate": 3.4150545225744764e-05, "loss": 2.2219, "step": 6400500 }, { "epoch": 31.71, "learning_rate": 3.414930663931868e-05, "loss": 2.2168, "step": 6401000 }, { "epoch": 31.72, "learning_rate": 3.41480680528926e-05, "loss": 2.2214, "step": 6401500 }, { "epoch": 31.72, "learning_rate": 3.4146829466466515e-05, "loss": 2.2568, "step": 6402000 }, { "epoch": 31.72, "learning_rate": 3.414559088004043e-05, "loss": 2.1955, "step": 6402500 }, { "epoch": 31.72, "learning_rate": 3.4144354770787194e-05, "loss": 2.2304, "step": 6403000 }, { "epoch": 31.73, "learning_rate": 3.414311618436111e-05, "loss": 2.2254, "step": 6403500 }, { "epoch": 31.73, "learning_rate": 3.414187759793503e-05, "loss": 2.2343, "step": 6404000 }, { "epoch": 31.73, "learning_rate": 3.4140639011508945e-05, "loss": 2.2122, "step": 6404500 }, { "epoch": 31.73, "learning_rate": 3.413940042508286e-05, "loss": 2.2378, "step": 6405000 }, { "epoch": 31.74, "learning_rate": 3.413816431582963e-05, "loss": 2.2572, "step": 6405500 }, { "epoch": 31.74, "learning_rate": 3.413692572940355e-05, "loss": 2.2494, "step": 6406000 }, { "epoch": 31.74, "learning_rate": 3.4135687142977464e-05, "loss": 2.2201, "step": 6406500 }, { "epoch": 31.74, "learning_rate": 3.413444855655138e-05, "loss": 2.2259, "step": 6407000 }, { "epoch": 31.74, "learning_rate": 3.41332099701253e-05, "loss": 2.2272, "step": 6407500 }, { "epoch": 31.75, "learning_rate": 3.4131971383699215e-05, "loss": 2.235, "step": 6408000 }, { "epoch": 31.75, "learning_rate": 3.413073279727313e-05, "loss": 2.2403, "step": 6408500 }, { "epoch": 31.75, "learning_rate": 3.412949421084705e-05, "loss": 2.246, "step": 6409000 }, { "epoch": 31.75, "learning_rate": 3.4128255624420966e-05, "loss": 2.2335, "step": 6409500 }, { "epoch": 31.76, "learning_rate": 3.412701703799488e-05, "loss": 2.247, "step": 6410000 }, { "epoch": 31.76, "learning_rate": 3.412577845156879e-05, "loss": 2.2347, "step": 6410500 }, { "epoch": 31.76, "learning_rate": 3.412453986514271e-05, "loss": 2.2364, "step": 6411000 }, { "epoch": 31.76, "learning_rate": 3.412330127871663e-05, "loss": 2.2555, "step": 6411500 }, { "epoch": 31.77, "learning_rate": 3.4122062692290544e-05, "loss": 2.2152, "step": 6412000 }, { "epoch": 31.77, "learning_rate": 3.412082658303731e-05, "loss": 2.2121, "step": 6412500 }, { "epoch": 31.77, "learning_rate": 3.411959047378408e-05, "loss": 2.2103, "step": 6413000 }, { "epoch": 31.77, "learning_rate": 3.411835436453085e-05, "loss": 2.211, "step": 6413500 }, { "epoch": 31.78, "learning_rate": 3.411711577810477e-05, "loss": 2.222, "step": 6414000 }, { "epoch": 31.78, "learning_rate": 3.411587719167868e-05, "loss": 2.2353, "step": 6414500 }, { "epoch": 31.78, "learning_rate": 3.4114638605252594e-05, "loss": 2.2561, "step": 6415000 }, { "epoch": 31.78, "learning_rate": 3.411340001882651e-05, "loss": 2.2272, "step": 6415500 }, { "epoch": 31.79, "learning_rate": 3.411216143240043e-05, "loss": 2.2308, "step": 6416000 }, { "epoch": 31.79, "learning_rate": 3.4110922845974345e-05, "loss": 2.2271, "step": 6416500 }, { "epoch": 31.79, "learning_rate": 3.410968425954826e-05, "loss": 2.203, "step": 6417000 }, { "epoch": 31.79, "learning_rate": 3.410844567312218e-05, "loss": 2.2542, "step": 6417500 }, { "epoch": 31.8, "learning_rate": 3.4107207086696096e-05, "loss": 2.2169, "step": 6418000 }, { "epoch": 31.8, "learning_rate": 3.410596850027001e-05, "loss": 2.225, "step": 6418500 }, { "epoch": 31.8, "learning_rate": 3.410473239101678e-05, "loss": 2.225, "step": 6419000 }, { "epoch": 31.8, "learning_rate": 3.41034938045907e-05, "loss": 2.24, "step": 6419500 }, { "epoch": 31.81, "learning_rate": 3.4102255218164615e-05, "loss": 2.2394, "step": 6420000 }, { "epoch": 31.81, "learning_rate": 3.410101663173853e-05, "loss": 2.255, "step": 6420500 }, { "epoch": 31.81, "learning_rate": 3.409977804531245e-05, "loss": 2.2297, "step": 6421000 }, { "epoch": 31.81, "learning_rate": 3.4098539458886366e-05, "loss": 2.2249, "step": 6421500 }, { "epoch": 31.82, "learning_rate": 3.409730087246028e-05, "loss": 2.2136, "step": 6422000 }, { "epoch": 31.82, "learning_rate": 3.40960622860342e-05, "loss": 2.252, "step": 6422500 }, { "epoch": 31.82, "learning_rate": 3.409482369960812e-05, "loss": 2.213, "step": 6423000 }, { "epoch": 31.82, "learning_rate": 3.4093585113182034e-05, "loss": 2.2281, "step": 6423500 }, { "epoch": 31.83, "learning_rate": 3.4092346526755944e-05, "loss": 2.2149, "step": 6424000 }, { "epoch": 31.83, "learning_rate": 3.409110794032986e-05, "loss": 2.2163, "step": 6424500 }, { "epoch": 31.83, "learning_rate": 3.408987183107663e-05, "loss": 2.2166, "step": 6425000 }, { "epoch": 31.83, "learning_rate": 3.408863324465055e-05, "loss": 2.2333, "step": 6425500 }, { "epoch": 31.84, "learning_rate": 3.4087397135397315e-05, "loss": 2.2312, "step": 6426000 }, { "epoch": 31.84, "learning_rate": 3.4086161026144084e-05, "loss": 2.226, "step": 6426500 }, { "epoch": 31.84, "learning_rate": 3.4084922439718e-05, "loss": 2.2535, "step": 6427000 }, { "epoch": 31.84, "learning_rate": 3.408368385329191e-05, "loss": 2.2257, "step": 6427500 }, { "epoch": 31.85, "learning_rate": 3.408244526686583e-05, "loss": 2.232, "step": 6428000 }, { "epoch": 31.85, "learning_rate": 3.4081206680439745e-05, "loss": 2.2277, "step": 6428500 }, { "epoch": 31.85, "learning_rate": 3.407996809401366e-05, "loss": 2.215, "step": 6429000 }, { "epoch": 31.85, "learning_rate": 3.407872950758758e-05, "loss": 2.2315, "step": 6429500 }, { "epoch": 31.86, "learning_rate": 3.4077490921161496e-05, "loss": 2.2276, "step": 6430000 }, { "epoch": 31.86, "learning_rate": 3.4076254811908265e-05, "loss": 2.2121, "step": 6430500 }, { "epoch": 31.86, "learning_rate": 3.407501622548218e-05, "loss": 2.2239, "step": 6431000 }, { "epoch": 31.86, "learning_rate": 3.40737776390561e-05, "loss": 2.2147, "step": 6431500 }, { "epoch": 31.87, "learning_rate": 3.4072539052630016e-05, "loss": 2.2294, "step": 6432000 }, { "epoch": 31.87, "learning_rate": 3.407130046620393e-05, "loss": 2.2209, "step": 6432500 }, { "epoch": 31.87, "learning_rate": 3.407006187977785e-05, "loss": 2.2195, "step": 6433000 }, { "epoch": 31.87, "learning_rate": 3.4068823293351766e-05, "loss": 2.231, "step": 6433500 }, { "epoch": 31.88, "learning_rate": 3.406758470692568e-05, "loss": 2.2281, "step": 6434000 }, { "epoch": 31.88, "learning_rate": 3.40663461204996e-05, "loss": 2.2155, "step": 6434500 }, { "epoch": 31.88, "learning_rate": 3.406511248841922e-05, "loss": 2.2162, "step": 6435000 }, { "epoch": 31.88, "learning_rate": 3.406387390199314e-05, "loss": 2.217, "step": 6435500 }, { "epoch": 31.89, "learning_rate": 3.406263531556705e-05, "loss": 2.1886, "step": 6436000 }, { "epoch": 31.89, "learning_rate": 3.4061396729140965e-05, "loss": 2.2471, "step": 6436500 }, { "epoch": 31.89, "learning_rate": 3.406015814271488e-05, "loss": 2.2321, "step": 6437000 }, { "epoch": 31.89, "learning_rate": 3.40589195562888e-05, "loss": 2.2454, "step": 6437500 }, { "epoch": 31.9, "learning_rate": 3.4057680969862716e-05, "loss": 2.2335, "step": 6438000 }, { "epoch": 31.9, "learning_rate": 3.4056444860609485e-05, "loss": 2.2461, "step": 6438500 }, { "epoch": 31.9, "learning_rate": 3.40552062741834e-05, "loss": 2.2311, "step": 6439000 }, { "epoch": 31.9, "learning_rate": 3.405396768775732e-05, "loss": 2.2144, "step": 6439500 }, { "epoch": 31.91, "learning_rate": 3.4052729101331235e-05, "loss": 2.2356, "step": 6440000 }, { "epoch": 31.91, "learning_rate": 3.405149051490515e-05, "loss": 2.2321, "step": 6440500 }, { "epoch": 31.91, "learning_rate": 3.405025440565192e-05, "loss": 2.2336, "step": 6441000 }, { "epoch": 31.91, "learning_rate": 3.404901581922584e-05, "loss": 2.2452, "step": 6441500 }, { "epoch": 31.92, "learning_rate": 3.4047777232799755e-05, "loss": 2.2066, "step": 6442000 }, { "epoch": 31.92, "learning_rate": 3.4046538646373665e-05, "loss": 2.2472, "step": 6442500 }, { "epoch": 31.92, "learning_rate": 3.404530005994758e-05, "loss": 2.2411, "step": 6443000 }, { "epoch": 31.92, "learning_rate": 3.40440664278672e-05, "loss": 2.2134, "step": 6443500 }, { "epoch": 31.93, "learning_rate": 3.404282784144112e-05, "loss": 2.2387, "step": 6444000 }, { "epoch": 31.93, "learning_rate": 3.4041589255015037e-05, "loss": 2.2511, "step": 6444500 }, { "epoch": 31.93, "learning_rate": 3.4040350668588953e-05, "loss": 2.1958, "step": 6445000 }, { "epoch": 31.93, "learning_rate": 3.403911208216287e-05, "loss": 2.2224, "step": 6445500 }, { "epoch": 31.94, "learning_rate": 3.403787349573679e-05, "loss": 2.1971, "step": 6446000 }, { "epoch": 31.94, "learning_rate": 3.4036634909310704e-05, "loss": 2.2369, "step": 6446500 }, { "epoch": 31.94, "learning_rate": 3.403539880005747e-05, "loss": 2.2274, "step": 6447000 }, { "epoch": 31.94, "learning_rate": 3.403416021363139e-05, "loss": 2.2175, "step": 6447500 }, { "epoch": 31.95, "learning_rate": 3.403292162720531e-05, "loss": 2.2479, "step": 6448000 }, { "epoch": 31.95, "learning_rate": 3.403168551795207e-05, "loss": 2.2391, "step": 6448500 }, { "epoch": 31.95, "learning_rate": 3.4030449408698845e-05, "loss": 2.2423, "step": 6449000 }, { "epoch": 31.95, "learning_rate": 3.4029210822272755e-05, "loss": 2.2215, "step": 6449500 }, { "epoch": 31.96, "learning_rate": 3.402797223584667e-05, "loss": 2.2542, "step": 6450000 }, { "epoch": 31.96, "learning_rate": 3.402673364942059e-05, "loss": 2.2291, "step": 6450500 }, { "epoch": 31.96, "learning_rate": 3.4025495062994505e-05, "loss": 2.2185, "step": 6451000 }, { "epoch": 31.96, "learning_rate": 3.402425895374128e-05, "loss": 2.2083, "step": 6451500 }, { "epoch": 31.97, "learning_rate": 3.40230203673152e-05, "loss": 2.2151, "step": 6452000 }, { "epoch": 31.97, "learning_rate": 3.402178178088911e-05, "loss": 2.2216, "step": 6452500 }, { "epoch": 31.97, "learning_rate": 3.4020543194463025e-05, "loss": 2.2025, "step": 6453000 }, { "epoch": 31.97, "learning_rate": 3.401930460803694e-05, "loss": 2.2272, "step": 6453500 }, { "epoch": 31.98, "learning_rate": 3.401806602161086e-05, "loss": 2.2315, "step": 6454000 }, { "epoch": 31.98, "learning_rate": 3.4016827435184776e-05, "loss": 2.2421, "step": 6454500 }, { "epoch": 31.98, "learning_rate": 3.4015588848758686e-05, "loss": 2.229, "step": 6455000 }, { "epoch": 31.98, "learning_rate": 3.40143502623326e-05, "loss": 2.2382, "step": 6455500 }, { "epoch": 31.99, "learning_rate": 3.401311167590652e-05, "loss": 2.2209, "step": 6456000 }, { "epoch": 31.99, "learning_rate": 3.401187308948044e-05, "loss": 2.2126, "step": 6456500 }, { "epoch": 31.99, "learning_rate": 3.4010634503054354e-05, "loss": 2.2322, "step": 6457000 }, { "epoch": 31.99, "learning_rate": 3.400939591662827e-05, "loss": 2.2372, "step": 6457500 }, { "epoch": 32.0, "learning_rate": 3.400815733020219e-05, "loss": 2.236, "step": 6458000 }, { "epoch": 32.0, "learning_rate": 3.4006918743776104e-05, "loss": 2.2363, "step": 6458500 }, { "epoch": 32.0, "eval_accuracy": 0.6593229081640353, "eval_accuracy_mlm": 0.6153610829620848, "eval_accuracy_nsp": 0.8667393580928698, "eval_loss": 2.3164103031158447, "eval_runtime": 145.6685, "eval_samples_per_second": 1750.269, "eval_steps_per_second": 72.933, "step": 6458976 }, { "epoch": 32.0, "learning_rate": 3.400568015735002e-05, "loss": 2.2388, "step": 6459000 }, { "epoch": 32.0, "learning_rate": 3.400444652526964e-05, "loss": 2.2307, "step": 6459500 }, { "epoch": 32.01, "learning_rate": 3.400320793884356e-05, "loss": 2.203, "step": 6460000 }, { "epoch": 32.01, "learning_rate": 3.4001969352417476e-05, "loss": 2.1939, "step": 6460500 }, { "epoch": 32.01, "learning_rate": 3.400073076599139e-05, "loss": 2.1834, "step": 6461000 }, { "epoch": 32.01, "learning_rate": 3.399949217956531e-05, "loss": 2.2082, "step": 6461500 }, { "epoch": 32.01, "learning_rate": 3.399825607031207e-05, "loss": 2.2135, "step": 6462000 }, { "epoch": 32.02, "learning_rate": 3.399701748388599e-05, "loss": 2.2041, "step": 6462500 }, { "epoch": 32.02, "learning_rate": 3.3995778897459906e-05, "loss": 2.1927, "step": 6463000 }, { "epoch": 32.02, "learning_rate": 3.399454031103382e-05, "loss": 2.1848, "step": 6463500 }, { "epoch": 32.02, "learning_rate": 3.399330172460774e-05, "loss": 2.2194, "step": 6464000 }, { "epoch": 32.03, "learning_rate": 3.3992063138181656e-05, "loss": 2.1935, "step": 6464500 }, { "epoch": 32.03, "learning_rate": 3.3990824551755573e-05, "loss": 2.22, "step": 6465000 }, { "epoch": 32.03, "learning_rate": 3.398958596532949e-05, "loss": 2.1954, "step": 6465500 }, { "epoch": 32.03, "learning_rate": 3.398834737890341e-05, "loss": 2.2214, "step": 6466000 }, { "epoch": 32.04, "learning_rate": 3.3987111269650176e-05, "loss": 2.1903, "step": 6466500 }, { "epoch": 32.04, "learning_rate": 3.398587268322409e-05, "loss": 2.1986, "step": 6467000 }, { "epoch": 32.04, "learning_rate": 3.398463409679801e-05, "loss": 2.1881, "step": 6467500 }, { "epoch": 32.04, "learning_rate": 3.398339798754477e-05, "loss": 2.2115, "step": 6468000 }, { "epoch": 32.05, "learning_rate": 3.398215940111869e-05, "loss": 2.2028, "step": 6468500 }, { "epoch": 32.05, "learning_rate": 3.3980923291865464e-05, "loss": 2.1965, "step": 6469000 }, { "epoch": 32.05, "learning_rate": 3.3979687182612227e-05, "loss": 2.2209, "step": 6469500 }, { "epoch": 32.05, "learning_rate": 3.3978448596186143e-05, "loss": 2.1943, "step": 6470000 }, { "epoch": 32.06, "learning_rate": 3.397721000976006e-05, "loss": 2.2164, "step": 6470500 }, { "epoch": 32.06, "learning_rate": 3.397597390050683e-05, "loss": 2.1946, "step": 6471000 }, { "epoch": 32.06, "learning_rate": 3.3974735314080746e-05, "loss": 2.2251, "step": 6471500 }, { "epoch": 32.06, "learning_rate": 3.397349672765466e-05, "loss": 2.1968, "step": 6472000 }, { "epoch": 32.07, "learning_rate": 3.397225814122858e-05, "loss": 2.212, "step": 6472500 }, { "epoch": 32.07, "learning_rate": 3.39710195548025e-05, "loss": 2.2174, "step": 6473000 }, { "epoch": 32.07, "learning_rate": 3.3969780968376414e-05, "loss": 2.2001, "step": 6473500 }, { "epoch": 32.07, "learning_rate": 3.396854238195033e-05, "loss": 2.2193, "step": 6474000 }, { "epoch": 32.08, "learning_rate": 3.396730379552425e-05, "loss": 2.2152, "step": 6474500 }, { "epoch": 32.08, "learning_rate": 3.3966065209098165e-05, "loss": 2.1806, "step": 6475000 }, { "epoch": 32.08, "learning_rate": 3.3964829099844933e-05, "loss": 2.2087, "step": 6475500 }, { "epoch": 32.08, "learning_rate": 3.396359051341885e-05, "loss": 2.1978, "step": 6476000 }, { "epoch": 32.09, "learning_rate": 3.396235192699276e-05, "loss": 2.2147, "step": 6476500 }, { "epoch": 32.09, "learning_rate": 3.396111334056668e-05, "loss": 2.2109, "step": 6477000 }, { "epoch": 32.09, "learning_rate": 3.3959874754140594e-05, "loss": 2.2149, "step": 6477500 }, { "epoch": 32.09, "learning_rate": 3.395863616771451e-05, "loss": 2.2203, "step": 6478000 }, { "epoch": 32.1, "learning_rate": 3.395739758128843e-05, "loss": 2.208, "step": 6478500 }, { "epoch": 32.1, "learning_rate": 3.3956158994862345e-05, "loss": 2.2123, "step": 6479000 }, { "epoch": 32.1, "learning_rate": 3.395492040843626e-05, "loss": 2.1831, "step": 6479500 }, { "epoch": 32.1, "learning_rate": 3.395368182201018e-05, "loss": 2.2049, "step": 6480000 }, { "epoch": 32.11, "learning_rate": 3.395244323558409e-05, "loss": 2.2029, "step": 6480500 }, { "epoch": 32.11, "learning_rate": 3.3951207126330865e-05, "loss": 2.1977, "step": 6481000 }, { "epoch": 32.11, "learning_rate": 3.394996853990478e-05, "loss": 2.2033, "step": 6481500 }, { "epoch": 32.11, "learning_rate": 3.39487299534787e-05, "loss": 2.2213, "step": 6482000 }, { "epoch": 32.12, "learning_rate": 3.394749384422547e-05, "loss": 2.2247, "step": 6482500 }, { "epoch": 32.12, "learning_rate": 3.394625525779938e-05, "loss": 2.2051, "step": 6483000 }, { "epoch": 32.12, "learning_rate": 3.3945016671373294e-05, "loss": 2.1888, "step": 6483500 }, { "epoch": 32.12, "learning_rate": 3.394377808494721e-05, "loss": 2.2, "step": 6484000 }, { "epoch": 32.13, "learning_rate": 3.394253949852113e-05, "loss": 2.2133, "step": 6484500 }, { "epoch": 32.13, "learning_rate": 3.3941300912095045e-05, "loss": 2.207, "step": 6485000 }, { "epoch": 32.13, "learning_rate": 3.394006232566896e-05, "loss": 2.2215, "step": 6485500 }, { "epoch": 32.13, "learning_rate": 3.393882373924288e-05, "loss": 2.2094, "step": 6486000 }, { "epoch": 32.14, "learning_rate": 3.3937585152816796e-05, "loss": 2.1998, "step": 6486500 }, { "epoch": 32.14, "learning_rate": 3.3936346566390706e-05, "loss": 2.1855, "step": 6487000 }, { "epoch": 32.14, "learning_rate": 3.393510797996462e-05, "loss": 2.2094, "step": 6487500 }, { "epoch": 32.14, "learning_rate": 3.393386939353854e-05, "loss": 2.1967, "step": 6488000 }, { "epoch": 32.15, "learning_rate": 3.393263080711246e-05, "loss": 2.2105, "step": 6488500 }, { "epoch": 32.15, "learning_rate": 3.3931392220686374e-05, "loss": 2.2006, "step": 6489000 }, { "epoch": 32.15, "learning_rate": 3.393015363426029e-05, "loss": 2.1944, "step": 6489500 }, { "epoch": 32.15, "learning_rate": 3.392891504783421e-05, "loss": 2.1904, "step": 6490000 }, { "epoch": 32.16, "learning_rate": 3.3927676461408125e-05, "loss": 2.1949, "step": 6490500 }, { "epoch": 32.16, "learning_rate": 3.392643787498204e-05, "loss": 2.23, "step": 6491000 }, { "epoch": 32.16, "learning_rate": 3.392519928855596e-05, "loss": 2.2266, "step": 6491500 }, { "epoch": 32.16, "learning_rate": 3.392396317930273e-05, "loss": 2.2044, "step": 6492000 }, { "epoch": 32.17, "learning_rate": 3.3922724592876644e-05, "loss": 2.2013, "step": 6492500 }, { "epoch": 32.17, "learning_rate": 3.392148600645056e-05, "loss": 2.2037, "step": 6493000 }, { "epoch": 32.17, "learning_rate": 3.392024742002448e-05, "loss": 2.213, "step": 6493500 }, { "epoch": 32.17, "learning_rate": 3.3919008833598395e-05, "loss": 2.2009, "step": 6494000 }, { "epoch": 32.18, "learning_rate": 3.391777272434516e-05, "loss": 2.1952, "step": 6494500 }, { "epoch": 32.18, "learning_rate": 3.3916534137919074e-05, "loss": 2.2143, "step": 6495000 }, { "epoch": 32.18, "learning_rate": 3.391529555149299e-05, "loss": 2.208, "step": 6495500 }, { "epoch": 32.18, "learning_rate": 3.391405696506691e-05, "loss": 2.1843, "step": 6496000 }, { "epoch": 32.19, "learning_rate": 3.3912818378640825e-05, "loss": 2.1988, "step": 6496500 }, { "epoch": 32.19, "learning_rate": 3.391157979221474e-05, "loss": 2.2156, "step": 6497000 }, { "epoch": 32.19, "learning_rate": 3.391034120578866e-05, "loss": 2.1969, "step": 6497500 }, { "epoch": 32.19, "learning_rate": 3.3909102619362576e-05, "loss": 2.1982, "step": 6498000 }, { "epoch": 32.2, "learning_rate": 3.3907866510109344e-05, "loss": 2.1881, "step": 6498500 }, { "epoch": 32.2, "learning_rate": 3.390663040085611e-05, "loss": 2.198, "step": 6499000 }, { "epoch": 32.2, "learning_rate": 3.390539181443002e-05, "loss": 2.2107, "step": 6499500 }, { "epoch": 32.2, "learning_rate": 3.390415322800394e-05, "loss": 2.2016, "step": 6500000 }, { "epoch": 32.21, "learning_rate": 3.390291464157786e-05, "loss": 2.2143, "step": 6500500 }, { "epoch": 32.21, "learning_rate": 3.390167853232463e-05, "loss": 2.2103, "step": 6501000 }, { "epoch": 32.21, "learning_rate": 3.390043994589855e-05, "loss": 2.1963, "step": 6501500 }, { "epoch": 32.21, "learning_rate": 3.389920383664532e-05, "loss": 2.2025, "step": 6502000 }, { "epoch": 32.22, "learning_rate": 3.3897965250219235e-05, "loss": 2.2115, "step": 6502500 }, { "epoch": 32.22, "learning_rate": 3.389672666379315e-05, "loss": 2.2162, "step": 6503000 }, { "epoch": 32.22, "learning_rate": 3.389548807736706e-05, "loss": 2.2093, "step": 6503500 }, { "epoch": 32.22, "learning_rate": 3.389424949094098e-05, "loss": 2.2149, "step": 6504000 }, { "epoch": 32.23, "learning_rate": 3.3893010904514896e-05, "loss": 2.2041, "step": 6504500 }, { "epoch": 32.23, "learning_rate": 3.389177231808881e-05, "loss": 2.1998, "step": 6505000 }, { "epoch": 32.23, "learning_rate": 3.389053373166273e-05, "loss": 2.1898, "step": 6505500 }, { "epoch": 32.23, "learning_rate": 3.388929514523664e-05, "loss": 2.2154, "step": 6506000 }, { "epoch": 32.24, "learning_rate": 3.388805655881056e-05, "loss": 2.2269, "step": 6506500 }, { "epoch": 32.24, "learning_rate": 3.3886817972384474e-05, "loss": 2.2098, "step": 6507000 }, { "epoch": 32.24, "learning_rate": 3.388557938595839e-05, "loss": 2.2206, "step": 6507500 }, { "epoch": 32.24, "learning_rate": 3.388434079953231e-05, "loss": 2.2283, "step": 6508000 }, { "epoch": 32.25, "learning_rate": 3.388310469027908e-05, "loss": 2.2005, "step": 6508500 }, { "epoch": 32.25, "learning_rate": 3.3881866103852994e-05, "loss": 2.2254, "step": 6509000 }, { "epoch": 32.25, "learning_rate": 3.388062751742691e-05, "loss": 2.1927, "step": 6509500 }, { "epoch": 32.25, "learning_rate": 3.387938893100083e-05, "loss": 2.201, "step": 6510000 }, { "epoch": 32.26, "learning_rate": 3.3878150344574745e-05, "loss": 2.1961, "step": 6510500 }, { "epoch": 32.26, "learning_rate": 3.387691175814866e-05, "loss": 2.1983, "step": 6511000 }, { "epoch": 32.26, "learning_rate": 3.387567317172258e-05, "loss": 2.2046, "step": 6511500 }, { "epoch": 32.26, "learning_rate": 3.3874434585296495e-05, "loss": 2.2265, "step": 6512000 }, { "epoch": 32.27, "learning_rate": 3.387319599887041e-05, "loss": 2.214, "step": 6512500 }, { "epoch": 32.27, "learning_rate": 3.387195741244433e-05, "loss": 2.194, "step": 6513000 }, { "epoch": 32.27, "learning_rate": 3.3870718826018246e-05, "loss": 2.2226, "step": 6513500 }, { "epoch": 32.27, "learning_rate": 3.386948271676501e-05, "loss": 2.1941, "step": 6514000 }, { "epoch": 32.28, "learning_rate": 3.3868246607511784e-05, "loss": 2.1788, "step": 6514500 }, { "epoch": 32.28, "learning_rate": 3.3867008021085694e-05, "loss": 2.205, "step": 6515000 }, { "epoch": 32.28, "learning_rate": 3.386576943465961e-05, "loss": 2.2079, "step": 6515500 }, { "epoch": 32.28, "learning_rate": 3.386453084823353e-05, "loss": 2.1918, "step": 6516000 }, { "epoch": 32.28, "learning_rate": 3.3863294738980303e-05, "loss": 2.2067, "step": 6516500 }, { "epoch": 32.29, "learning_rate": 3.3862056152554214e-05, "loss": 2.1896, "step": 6517000 }, { "epoch": 32.29, "learning_rate": 3.386081756612813e-05, "loss": 2.1909, "step": 6517500 }, { "epoch": 32.29, "learning_rate": 3.385957897970205e-05, "loss": 2.2219, "step": 6518000 }, { "epoch": 32.29, "learning_rate": 3.3858342870448816e-05, "loss": 2.2144, "step": 6518500 }, { "epoch": 32.3, "learning_rate": 3.385710428402273e-05, "loss": 2.2162, "step": 6519000 }, { "epoch": 32.3, "learning_rate": 3.385586569759665e-05, "loss": 2.2064, "step": 6519500 }, { "epoch": 32.3, "learning_rate": 3.385462958834342e-05, "loss": 2.2063, "step": 6520000 }, { "epoch": 32.3, "learning_rate": 3.3853391001917336e-05, "loss": 2.2012, "step": 6520500 }, { "epoch": 32.31, "learning_rate": 3.385215241549125e-05, "loss": 2.2028, "step": 6521000 }, { "epoch": 32.31, "learning_rate": 3.385091382906517e-05, "loss": 2.1945, "step": 6521500 }, { "epoch": 32.31, "learning_rate": 3.3849680196984784e-05, "loss": 2.2054, "step": 6522000 }, { "epoch": 32.31, "learning_rate": 3.38484416105587e-05, "loss": 2.2139, "step": 6522500 }, { "epoch": 32.32, "learning_rate": 3.384720302413262e-05, "loss": 2.203, "step": 6523000 }, { "epoch": 32.32, "learning_rate": 3.3845964437706534e-05, "loss": 2.2034, "step": 6523500 }, { "epoch": 32.32, "learning_rate": 3.384472585128045e-05, "loss": 2.2103, "step": 6524000 }, { "epoch": 32.32, "learning_rate": 3.384348726485437e-05, "loss": 2.2098, "step": 6524500 }, { "epoch": 32.33, "learning_rate": 3.3842248678428285e-05, "loss": 2.1964, "step": 6525000 }, { "epoch": 32.33, "learning_rate": 3.38410100920022e-05, "loss": 2.1948, "step": 6525500 }, { "epoch": 32.33, "learning_rate": 3.383977150557612e-05, "loss": 2.2176, "step": 6526000 }, { "epoch": 32.33, "learning_rate": 3.3838532919150036e-05, "loss": 2.2094, "step": 6526500 }, { "epoch": 32.34, "learning_rate": 3.383729433272395e-05, "loss": 2.2111, "step": 6527000 }, { "epoch": 32.34, "learning_rate": 3.383605574629787e-05, "loss": 2.2003, "step": 6527500 }, { "epoch": 32.34, "learning_rate": 3.383481715987179e-05, "loss": 2.2115, "step": 6528000 }, { "epoch": 32.34, "learning_rate": 3.3833578573445704e-05, "loss": 2.2292, "step": 6528500 }, { "epoch": 32.35, "learning_rate": 3.3832342464192466e-05, "loss": 2.2087, "step": 6529000 }, { "epoch": 32.35, "learning_rate": 3.383110387776638e-05, "loss": 2.2061, "step": 6529500 }, { "epoch": 32.35, "learning_rate": 3.38298652913403e-05, "loss": 2.2259, "step": 6530000 }, { "epoch": 32.35, "learning_rate": 3.3828626704914217e-05, "loss": 2.203, "step": 6530500 }, { "epoch": 32.36, "learning_rate": 3.3827388118488133e-05, "loss": 2.2353, "step": 6531000 }, { "epoch": 32.36, "learning_rate": 3.382614953206205e-05, "loss": 2.2215, "step": 6531500 }, { "epoch": 32.36, "learning_rate": 3.382491342280882e-05, "loss": 2.2307, "step": 6532000 }, { "epoch": 32.36, "learning_rate": 3.3823674836382736e-05, "loss": 2.2312, "step": 6532500 }, { "epoch": 32.37, "learning_rate": 3.382243624995665e-05, "loss": 2.2237, "step": 6533000 }, { "epoch": 32.37, "learning_rate": 3.382119766353057e-05, "loss": 2.2023, "step": 6533500 }, { "epoch": 32.37, "learning_rate": 3.381996155427733e-05, "loss": 2.2172, "step": 6534000 }, { "epoch": 32.37, "learning_rate": 3.381872296785125e-05, "loss": 2.2087, "step": 6534500 }, { "epoch": 32.38, "learning_rate": 3.3817484381425166e-05, "loss": 2.2205, "step": 6535000 }, { "epoch": 32.38, "learning_rate": 3.381624579499908e-05, "loss": 2.2083, "step": 6535500 }, { "epoch": 32.38, "learning_rate": 3.3815007208573e-05, "loss": 2.2052, "step": 6536000 }, { "epoch": 32.38, "learning_rate": 3.381376862214692e-05, "loss": 2.2252, "step": 6536500 }, { "epoch": 32.39, "learning_rate": 3.3812530035720834e-05, "loss": 2.1906, "step": 6537000 }, { "epoch": 32.39, "learning_rate": 3.381129144929475e-05, "loss": 2.2277, "step": 6537500 }, { "epoch": 32.39, "learning_rate": 3.381005286286867e-05, "loss": 2.2075, "step": 6538000 }, { "epoch": 32.39, "learning_rate": 3.3808816753615436e-05, "loss": 2.2181, "step": 6538500 }, { "epoch": 32.4, "learning_rate": 3.380757816718935e-05, "loss": 2.2153, "step": 6539000 }, { "epoch": 32.4, "learning_rate": 3.380634205793612e-05, "loss": 2.1944, "step": 6539500 }, { "epoch": 32.4, "learning_rate": 3.380510842585574e-05, "loss": 2.2176, "step": 6540000 }, { "epoch": 32.4, "learning_rate": 3.380387231660251e-05, "loss": 2.2233, "step": 6540500 }, { "epoch": 32.41, "learning_rate": 3.380263373017643e-05, "loss": 2.2152, "step": 6541000 }, { "epoch": 32.41, "learning_rate": 3.380139514375034e-05, "loss": 2.2154, "step": 6541500 }, { "epoch": 32.41, "learning_rate": 3.3800156557324255e-05, "loss": 2.2299, "step": 6542000 }, { "epoch": 32.41, "learning_rate": 3.379891797089817e-05, "loss": 2.2184, "step": 6542500 }, { "epoch": 32.42, "learning_rate": 3.379767938447209e-05, "loss": 2.2195, "step": 6543000 }, { "epoch": 32.42, "learning_rate": 3.379644327521886e-05, "loss": 2.2001, "step": 6543500 }, { "epoch": 32.42, "learning_rate": 3.3795204688792775e-05, "loss": 2.2322, "step": 6544000 }, { "epoch": 32.42, "learning_rate": 3.379396610236669e-05, "loss": 2.207, "step": 6544500 }, { "epoch": 32.43, "learning_rate": 3.379272751594061e-05, "loss": 2.2361, "step": 6545000 }, { "epoch": 32.43, "learning_rate": 3.3791488929514526e-05, "loss": 2.2226, "step": 6545500 }, { "epoch": 32.43, "learning_rate": 3.379025034308844e-05, "loss": 2.2174, "step": 6546000 }, { "epoch": 32.43, "learning_rate": 3.378901175666236e-05, "loss": 2.2221, "step": 6546500 }, { "epoch": 32.44, "learning_rate": 3.378777317023628e-05, "loss": 2.1688, "step": 6547000 }, { "epoch": 32.44, "learning_rate": 3.3786534583810194e-05, "loss": 2.2214, "step": 6547500 }, { "epoch": 32.44, "learning_rate": 3.378529599738411e-05, "loss": 2.2168, "step": 6548000 }, { "epoch": 32.44, "learning_rate": 3.378405741095803e-05, "loss": 2.2249, "step": 6548500 }, { "epoch": 32.45, "learning_rate": 3.3782818824531944e-05, "loss": 2.2091, "step": 6549000 }, { "epoch": 32.45, "learning_rate": 3.378158023810586e-05, "loss": 2.2079, "step": 6549500 }, { "epoch": 32.45, "learning_rate": 3.378034165167977e-05, "loss": 2.236, "step": 6550000 }, { "epoch": 32.45, "learning_rate": 3.377910306525369e-05, "loss": 2.219, "step": 6550500 }, { "epoch": 32.46, "learning_rate": 3.3777864478827605e-05, "loss": 2.212, "step": 6551000 }, { "epoch": 32.46, "learning_rate": 3.377662589240152e-05, "loss": 2.2248, "step": 6551500 }, { "epoch": 32.46, "learning_rate": 3.377538730597544e-05, "loss": 2.2177, "step": 6552000 }, { "epoch": 32.46, "learning_rate": 3.3774148719549356e-05, "loss": 2.2114, "step": 6552500 }, { "epoch": 32.47, "learning_rate": 3.377291013312327e-05, "loss": 2.2005, "step": 6553000 }, { "epoch": 32.47, "learning_rate": 3.377167154669719e-05, "loss": 2.2251, "step": 6553500 }, { "epoch": 32.47, "learning_rate": 3.37704329602711e-05, "loss": 2.2158, "step": 6554000 }, { "epoch": 32.47, "learning_rate": 3.376919685101787e-05, "loss": 2.1986, "step": 6554500 }, { "epoch": 32.48, "learning_rate": 3.3767960741764644e-05, "loss": 2.2362, "step": 6555000 }, { "epoch": 32.48, "learning_rate": 3.376672215533856e-05, "loss": 2.2072, "step": 6555500 }, { "epoch": 32.48, "learning_rate": 3.3765488523258175e-05, "loss": 2.2369, "step": 6556000 }, { "epoch": 32.48, "learning_rate": 3.376424993683209e-05, "loss": 2.2121, "step": 6556500 }, { "epoch": 32.49, "learning_rate": 3.376301135040601e-05, "loss": 2.2245, "step": 6557000 }, { "epoch": 32.49, "learning_rate": 3.3761772763979926e-05, "loss": 2.2273, "step": 6557500 }, { "epoch": 32.49, "learning_rate": 3.376053417755384e-05, "loss": 2.2315, "step": 6558000 }, { "epoch": 32.49, "learning_rate": 3.375929559112776e-05, "loss": 2.221, "step": 6558500 }, { "epoch": 32.5, "learning_rate": 3.375805700470168e-05, "loss": 2.2076, "step": 6559000 }, { "epoch": 32.5, "learning_rate": 3.3756820895448446e-05, "loss": 2.2375, "step": 6559500 }, { "epoch": 32.5, "learning_rate": 3.375558230902236e-05, "loss": 2.233, "step": 6560000 }, { "epoch": 32.5, "learning_rate": 3.375434372259628e-05, "loss": 2.2013, "step": 6560500 }, { "epoch": 32.51, "learning_rate": 3.3753105136170196e-05, "loss": 2.224, "step": 6561000 }, { "epoch": 32.51, "learning_rate": 3.375186654974411e-05, "loss": 2.2265, "step": 6561500 }, { "epoch": 32.51, "learning_rate": 3.3750627963318024e-05, "loss": 2.2233, "step": 6562000 }, { "epoch": 32.51, "learning_rate": 3.374938937689194e-05, "loss": 2.22, "step": 6562500 }, { "epoch": 32.52, "learning_rate": 3.374815079046586e-05, "loss": 2.2073, "step": 6563000 }, { "epoch": 32.52, "learning_rate": 3.3746912204039774e-05, "loss": 2.2204, "step": 6563500 }, { "epoch": 32.52, "learning_rate": 3.374567609478654e-05, "loss": 2.1987, "step": 6564000 }, { "epoch": 32.52, "learning_rate": 3.374443750836046e-05, "loss": 2.252, "step": 6564500 }, { "epoch": 32.53, "learning_rate": 3.374319892193438e-05, "loss": 2.2045, "step": 6565000 }, { "epoch": 32.53, "learning_rate": 3.3741960335508294e-05, "loss": 2.2147, "step": 6565500 }, { "epoch": 32.53, "learning_rate": 3.374072174908221e-05, "loss": 2.2299, "step": 6566000 }, { "epoch": 32.53, "learning_rate": 3.373948316265613e-05, "loss": 2.2087, "step": 6566500 }, { "epoch": 32.54, "learning_rate": 3.3738244576230045e-05, "loss": 2.2143, "step": 6567000 }, { "epoch": 32.54, "learning_rate": 3.373700598980396e-05, "loss": 2.2074, "step": 6567500 }, { "epoch": 32.54, "learning_rate": 3.373576740337788e-05, "loss": 2.2112, "step": 6568000 }, { "epoch": 32.54, "learning_rate": 3.373453129412464e-05, "loss": 2.1904, "step": 6568500 }, { "epoch": 32.55, "learning_rate": 3.373329518487141e-05, "loss": 2.1953, "step": 6569000 }, { "epoch": 32.55, "learning_rate": 3.3732056598445326e-05, "loss": 2.2283, "step": 6569500 }, { "epoch": 32.55, "learning_rate": 3.373081801201924e-05, "loss": 2.2039, "step": 6570000 }, { "epoch": 32.55, "learning_rate": 3.372957942559316e-05, "loss": 2.2101, "step": 6570500 }, { "epoch": 32.56, "learning_rate": 3.372834083916708e-05, "loss": 2.2203, "step": 6571000 }, { "epoch": 32.56, "learning_rate": 3.3727102252740994e-05, "loss": 2.1973, "step": 6571500 }, { "epoch": 32.56, "learning_rate": 3.372586366631491e-05, "loss": 2.212, "step": 6572000 }, { "epoch": 32.56, "learning_rate": 3.372462755706168e-05, "loss": 2.2298, "step": 6572500 }, { "epoch": 32.56, "learning_rate": 3.37233889706356e-05, "loss": 2.201, "step": 6573000 }, { "epoch": 32.57, "learning_rate": 3.372215286138236e-05, "loss": 2.2432, "step": 6573500 }, { "epoch": 32.57, "learning_rate": 3.3720914274956276e-05, "loss": 2.2075, "step": 6574000 }, { "epoch": 32.57, "learning_rate": 3.371967568853019e-05, "loss": 2.2052, "step": 6574500 }, { "epoch": 32.57, "learning_rate": 3.371843710210411e-05, "loss": 2.201, "step": 6575000 }, { "epoch": 32.58, "learning_rate": 3.3717198515678026e-05, "loss": 2.2347, "step": 6575500 }, { "epoch": 32.58, "learning_rate": 3.3715959929251943e-05, "loss": 2.2136, "step": 6576000 }, { "epoch": 32.58, "learning_rate": 3.371472134282586e-05, "loss": 2.2285, "step": 6576500 }, { "epoch": 32.58, "learning_rate": 3.371348275639978e-05, "loss": 2.2092, "step": 6577000 }, { "epoch": 32.59, "learning_rate": 3.3712244169973694e-05, "loss": 2.2303, "step": 6577500 }, { "epoch": 32.59, "learning_rate": 3.371100806072046e-05, "loss": 2.2372, "step": 6578000 }, { "epoch": 32.59, "learning_rate": 3.370976947429438e-05, "loss": 2.2142, "step": 6578500 }, { "epoch": 32.59, "learning_rate": 3.37085308878683e-05, "loss": 2.2163, "step": 6579000 }, { "epoch": 32.6, "learning_rate": 3.3707292301442214e-05, "loss": 2.2067, "step": 6579500 }, { "epoch": 32.6, "learning_rate": 3.370605371501613e-05, "loss": 2.2277, "step": 6580000 }, { "epoch": 32.6, "learning_rate": 3.370481512859005e-05, "loss": 2.2055, "step": 6580500 }, { "epoch": 32.6, "learning_rate": 3.3703576542163965e-05, "loss": 2.1938, "step": 6581000 }, { "epoch": 32.61, "learning_rate": 3.370233795573788e-05, "loss": 2.2145, "step": 6581500 }, { "epoch": 32.61, "learning_rate": 3.370109936931179e-05, "loss": 2.1997, "step": 6582000 }, { "epoch": 32.61, "learning_rate": 3.369986078288571e-05, "loss": 2.2109, "step": 6582500 }, { "epoch": 32.61, "learning_rate": 3.3698622196459625e-05, "loss": 2.2218, "step": 6583000 }, { "epoch": 32.62, "learning_rate": 3.369738361003354e-05, "loss": 2.2179, "step": 6583500 }, { "epoch": 32.62, "learning_rate": 3.369614502360746e-05, "loss": 2.2124, "step": 6584000 }, { "epoch": 32.62, "learning_rate": 3.369490891435423e-05, "loss": 2.218, "step": 6584500 }, { "epoch": 32.62, "learning_rate": 3.3693670327928145e-05, "loss": 2.2309, "step": 6585000 }, { "epoch": 32.63, "learning_rate": 3.369243174150206e-05, "loss": 2.1999, "step": 6585500 }, { "epoch": 32.63, "learning_rate": 3.369119315507598e-05, "loss": 2.2156, "step": 6586000 }, { "epoch": 32.63, "learning_rate": 3.368995704582275e-05, "loss": 2.2088, "step": 6586500 }, { "epoch": 32.63, "learning_rate": 3.3688718459396665e-05, "loss": 2.2409, "step": 6587000 }, { "epoch": 32.64, "learning_rate": 3.368748235014343e-05, "loss": 2.2263, "step": 6587500 }, { "epoch": 32.64, "learning_rate": 3.3686243763717344e-05, "loss": 2.2184, "step": 6588000 }, { "epoch": 32.64, "learning_rate": 3.368500517729126e-05, "loss": 2.2329, "step": 6588500 }, { "epoch": 32.64, "learning_rate": 3.368376659086518e-05, "loss": 2.2595, "step": 6589000 }, { "epoch": 32.65, "learning_rate": 3.3682528004439094e-05, "loss": 2.2049, "step": 6589500 }, { "epoch": 32.65, "learning_rate": 3.368128941801301e-05, "loss": 2.2098, "step": 6590000 }, { "epoch": 32.65, "learning_rate": 3.368005083158693e-05, "loss": 2.2009, "step": 6590500 }, { "epoch": 32.65, "learning_rate": 3.3678812245160845e-05, "loss": 2.2141, "step": 6591000 }, { "epoch": 32.66, "learning_rate": 3.367757365873476e-05, "loss": 2.2486, "step": 6591500 }, { "epoch": 32.66, "learning_rate": 3.367633507230868e-05, "loss": 2.1941, "step": 6592000 }, { "epoch": 32.66, "learning_rate": 3.3675096485882596e-05, "loss": 2.2165, "step": 6592500 }, { "epoch": 32.66, "learning_rate": 3.367385789945651e-05, "loss": 2.1934, "step": 6593000 }, { "epoch": 32.67, "learning_rate": 3.367261931303042e-05, "loss": 2.2103, "step": 6593500 }, { "epoch": 32.67, "learning_rate": 3.367138072660434e-05, "loss": 2.2143, "step": 6594000 }, { "epoch": 32.67, "learning_rate": 3.367014214017826e-05, "loss": 2.2099, "step": 6594500 }, { "epoch": 32.67, "learning_rate": 3.366890603092503e-05, "loss": 2.1974, "step": 6595000 }, { "epoch": 32.68, "learning_rate": 3.366766744449894e-05, "loss": 2.2372, "step": 6595500 }, { "epoch": 32.68, "learning_rate": 3.366643133524571e-05, "loss": 2.2234, "step": 6596000 }, { "epoch": 32.68, "learning_rate": 3.366519274881963e-05, "loss": 2.2023, "step": 6596500 }, { "epoch": 32.68, "learning_rate": 3.3663954162393545e-05, "loss": 2.2142, "step": 6597000 }, { "epoch": 32.69, "learning_rate": 3.366271557596746e-05, "loss": 2.2005, "step": 6597500 }, { "epoch": 32.69, "learning_rate": 3.366147698954138e-05, "loss": 2.219, "step": 6598000 }, { "epoch": 32.69, "learning_rate": 3.366024088028815e-05, "loss": 2.2181, "step": 6598500 }, { "epoch": 32.69, "learning_rate": 3.3659002293862065e-05, "loss": 2.2129, "step": 6599000 }, { "epoch": 32.7, "learning_rate": 3.365776618460883e-05, "loss": 2.2157, "step": 6599500 }, { "epoch": 32.7, "learning_rate": 3.3656527598182744e-05, "loss": 2.2164, "step": 6600000 }, { "epoch": 32.7, "learning_rate": 3.365528901175666e-05, "loss": 2.1974, "step": 6600500 }, { "epoch": 32.7, "learning_rate": 3.365405042533058e-05, "loss": 2.2306, "step": 6601000 }, { "epoch": 32.71, "learning_rate": 3.3652814316077347e-05, "loss": 2.2077, "step": 6601500 }, { "epoch": 32.71, "learning_rate": 3.3651575729651263e-05, "loss": 2.2149, "step": 6602000 }, { "epoch": 32.71, "learning_rate": 3.365033714322518e-05, "loss": 2.2067, "step": 6602500 }, { "epoch": 32.71, "learning_rate": 3.36490985567991e-05, "loss": 2.2259, "step": 6603000 }, { "epoch": 32.72, "learning_rate": 3.3647859970373014e-05, "loss": 2.2163, "step": 6603500 }, { "epoch": 32.72, "learning_rate": 3.364662138394693e-05, "loss": 2.2088, "step": 6604000 }, { "epoch": 32.72, "learning_rate": 3.364538279752085e-05, "loss": 2.2304, "step": 6604500 }, { "epoch": 32.72, "learning_rate": 3.3644144211094765e-05, "loss": 2.2071, "step": 6605000 }, { "epoch": 32.73, "learning_rate": 3.364290562466868e-05, "loss": 2.2017, "step": 6605500 }, { "epoch": 32.73, "learning_rate": 3.36416670382426e-05, "loss": 2.2221, "step": 6606000 }, { "epoch": 32.73, "learning_rate": 3.3640428451816516e-05, "loss": 2.2516, "step": 6606500 }, { "epoch": 32.73, "learning_rate": 3.363918986539043e-05, "loss": 2.2249, "step": 6607000 }, { "epoch": 32.74, "learning_rate": 3.363795127896435e-05, "loss": 2.2237, "step": 6607500 }, { "epoch": 32.74, "learning_rate": 3.363671269253827e-05, "loss": 2.2048, "step": 6608000 }, { "epoch": 32.74, "learning_rate": 3.3635474106112184e-05, "loss": 2.2148, "step": 6608500 }, { "epoch": 32.74, "learning_rate": 3.3634235519686094e-05, "loss": 2.2256, "step": 6609000 }, { "epoch": 32.75, "learning_rate": 3.363299693326001e-05, "loss": 2.2463, "step": 6609500 }, { "epoch": 32.75, "learning_rate": 3.363175834683393e-05, "loss": 2.2072, "step": 6610000 }, { "epoch": 32.75, "learning_rate": 3.3630519760407844e-05, "loss": 2.2111, "step": 6610500 }, { "epoch": 32.75, "learning_rate": 3.362928365115461e-05, "loss": 2.2146, "step": 6611000 }, { "epoch": 32.76, "learning_rate": 3.362804506472853e-05, "loss": 2.252, "step": 6611500 }, { "epoch": 32.76, "learning_rate": 3.362680647830245e-05, "loss": 2.2347, "step": 6612000 }, { "epoch": 32.76, "learning_rate": 3.362556789187636e-05, "loss": 2.2109, "step": 6612500 }, { "epoch": 32.76, "learning_rate": 3.3624329305450274e-05, "loss": 2.2187, "step": 6613000 }, { "epoch": 32.77, "learning_rate": 3.362309319619705e-05, "loss": 2.2218, "step": 6613500 }, { "epoch": 32.77, "learning_rate": 3.362185460977097e-05, "loss": 2.226, "step": 6614000 }, { "epoch": 32.77, "learning_rate": 3.3620616023344884e-05, "loss": 2.2319, "step": 6614500 }, { "epoch": 32.77, "learning_rate": 3.36193774369188e-05, "loss": 2.2312, "step": 6615000 }, { "epoch": 32.78, "learning_rate": 3.361814132766556e-05, "loss": 2.224, "step": 6615500 }, { "epoch": 32.78, "learning_rate": 3.361690274123948e-05, "loss": 2.242, "step": 6616000 }, { "epoch": 32.78, "learning_rate": 3.3615664154813396e-05, "loss": 2.2007, "step": 6616500 }, { "epoch": 32.78, "learning_rate": 3.361443052273302e-05, "loss": 2.2512, "step": 6617000 }, { "epoch": 32.79, "learning_rate": 3.3613191936306934e-05, "loss": 2.2141, "step": 6617500 }, { "epoch": 32.79, "learning_rate": 3.361195334988085e-05, "loss": 2.221, "step": 6618000 }, { "epoch": 32.79, "learning_rate": 3.361071476345477e-05, "loss": 2.2071, "step": 6618500 }, { "epoch": 32.79, "learning_rate": 3.360947617702868e-05, "loss": 2.2425, "step": 6619000 }, { "epoch": 32.8, "learning_rate": 3.3608237590602595e-05, "loss": 2.223, "step": 6619500 }, { "epoch": 32.8, "learning_rate": 3.360699900417651e-05, "loss": 2.2144, "step": 6620000 }, { "epoch": 32.8, "learning_rate": 3.360576041775043e-05, "loss": 2.2007, "step": 6620500 }, { "epoch": 32.8, "learning_rate": 3.3604521831324346e-05, "loss": 2.204, "step": 6621000 }, { "epoch": 32.81, "learning_rate": 3.3603285722071115e-05, "loss": 2.2528, "step": 6621500 }, { "epoch": 32.81, "learning_rate": 3.360204713564503e-05, "loss": 2.21, "step": 6622000 }, { "epoch": 32.81, "learning_rate": 3.360080854921895e-05, "loss": 2.2036, "step": 6622500 }, { "epoch": 32.81, "learning_rate": 3.3599569962792865e-05, "loss": 2.2467, "step": 6623000 }, { "epoch": 32.82, "learning_rate": 3.3598336330712486e-05, "loss": 2.214, "step": 6623500 }, { "epoch": 32.82, "learning_rate": 3.35970977442864e-05, "loss": 2.2221, "step": 6624000 }, { "epoch": 32.82, "learning_rate": 3.359585915786032e-05, "loss": 2.2069, "step": 6624500 }, { "epoch": 32.82, "learning_rate": 3.359462057143424e-05, "loss": 2.228, "step": 6625000 }, { "epoch": 32.83, "learning_rate": 3.359338198500815e-05, "loss": 2.2318, "step": 6625500 }, { "epoch": 32.83, "learning_rate": 3.359214587575492e-05, "loss": 2.2248, "step": 6626000 }, { "epoch": 32.83, "learning_rate": 3.359090728932884e-05, "loss": 2.2005, "step": 6626500 }, { "epoch": 32.83, "learning_rate": 3.3589673657248453e-05, "loss": 2.2361, "step": 6627000 }, { "epoch": 32.83, "learning_rate": 3.358843507082237e-05, "loss": 2.2029, "step": 6627500 }, { "epoch": 32.84, "learning_rate": 3.358719648439629e-05, "loss": 2.2289, "step": 6628000 }, { "epoch": 32.84, "learning_rate": 3.3585957897970204e-05, "loss": 2.2083, "step": 6628500 }, { "epoch": 32.84, "learning_rate": 3.358471931154412e-05, "loss": 2.2152, "step": 6629000 }, { "epoch": 32.84, "learning_rate": 3.358348072511804e-05, "loss": 2.2215, "step": 6629500 }, { "epoch": 32.85, "learning_rate": 3.3582242138691955e-05, "loss": 2.1913, "step": 6630000 }, { "epoch": 32.85, "learning_rate": 3.358100355226587e-05, "loss": 2.2186, "step": 6630500 }, { "epoch": 32.85, "learning_rate": 3.357976496583979e-05, "loss": 2.2363, "step": 6631000 }, { "epoch": 32.85, "learning_rate": 3.3578526379413706e-05, "loss": 2.2145, "step": 6631500 }, { "epoch": 32.86, "learning_rate": 3.357728779298762e-05, "loss": 2.2131, "step": 6632000 }, { "epoch": 32.86, "learning_rate": 3.357604920656154e-05, "loss": 2.2287, "step": 6632500 }, { "epoch": 32.86, "learning_rate": 3.357481062013546e-05, "loss": 2.2282, "step": 6633000 }, { "epoch": 32.86, "learning_rate": 3.3573572033709374e-05, "loss": 2.222, "step": 6633500 }, { "epoch": 32.87, "learning_rate": 3.357233344728329e-05, "loss": 2.2288, "step": 6634000 }, { "epoch": 32.87, "learning_rate": 3.357109486085721e-05, "loss": 2.2067, "step": 6634500 }, { "epoch": 32.87, "learning_rate": 3.356985627443112e-05, "loss": 2.2245, "step": 6635000 }, { "epoch": 32.87, "learning_rate": 3.3568620165177886e-05, "loss": 2.2202, "step": 6635500 }, { "epoch": 32.88, "learning_rate": 3.35673815787518e-05, "loss": 2.2116, "step": 6636000 }, { "epoch": 32.88, "learning_rate": 3.356614546949857e-05, "loss": 2.2298, "step": 6636500 }, { "epoch": 32.88, "learning_rate": 3.356490688307249e-05, "loss": 2.2243, "step": 6637000 }, { "epoch": 32.88, "learning_rate": 3.3563668296646406e-05, "loss": 2.2296, "step": 6637500 }, { "epoch": 32.89, "learning_rate": 3.356242971022032e-05, "loss": 2.2225, "step": 6638000 }, { "epoch": 32.89, "learning_rate": 3.356119112379424e-05, "loss": 2.1984, "step": 6638500 }, { "epoch": 32.89, "learning_rate": 3.355995253736816e-05, "loss": 2.2297, "step": 6639000 }, { "epoch": 32.89, "learning_rate": 3.3558713950942074e-05, "loss": 2.2113, "step": 6639500 }, { "epoch": 32.9, "learning_rate": 3.355747536451599e-05, "loss": 2.2124, "step": 6640000 }, { "epoch": 32.9, "learning_rate": 3.355623677808991e-05, "loss": 2.213, "step": 6640500 }, { "epoch": 32.9, "learning_rate": 3.355500066883667e-05, "loss": 2.1908, "step": 6641000 }, { "epoch": 32.9, "learning_rate": 3.3553762082410586e-05, "loss": 2.2046, "step": 6641500 }, { "epoch": 32.91, "learning_rate": 3.3552523495984503e-05, "loss": 2.2032, "step": 6642000 }, { "epoch": 32.91, "learning_rate": 3.355128738673127e-05, "loss": 2.1987, "step": 6642500 }, { "epoch": 32.91, "learning_rate": 3.355004880030519e-05, "loss": 2.2311, "step": 6643000 }, { "epoch": 32.91, "learning_rate": 3.3548810213879106e-05, "loss": 2.2356, "step": 6643500 }, { "epoch": 32.92, "learning_rate": 3.354757162745302e-05, "loss": 2.2262, "step": 6644000 }, { "epoch": 32.92, "learning_rate": 3.354633304102694e-05, "loss": 2.196, "step": 6644500 }, { "epoch": 32.92, "learning_rate": 3.354509445460086e-05, "loss": 2.2177, "step": 6645000 }, { "epoch": 32.92, "learning_rate": 3.3543855868174774e-05, "loss": 2.2002, "step": 6645500 }, { "epoch": 32.93, "learning_rate": 3.354261728174869e-05, "loss": 2.2402, "step": 6646000 }, { "epoch": 32.93, "learning_rate": 3.354137869532261e-05, "loss": 2.2201, "step": 6646500 }, { "epoch": 32.93, "learning_rate": 3.3540140108896525e-05, "loss": 2.2249, "step": 6647000 }, { "epoch": 32.93, "learning_rate": 3.3538901522470435e-05, "loss": 2.2081, "step": 6647500 }, { "epoch": 32.94, "learning_rate": 3.3537665413217204e-05, "loss": 2.2199, "step": 6648000 }, { "epoch": 32.94, "learning_rate": 3.353642682679112e-05, "loss": 2.2042, "step": 6648500 }, { "epoch": 32.94, "learning_rate": 3.353518824036504e-05, "loss": 2.2251, "step": 6649000 }, { "epoch": 32.94, "learning_rate": 3.3533952131111806e-05, "loss": 2.2405, "step": 6649500 }, { "epoch": 32.95, "learning_rate": 3.353271354468572e-05, "loss": 2.2288, "step": 6650000 }, { "epoch": 32.95, "learning_rate": 3.353147495825964e-05, "loss": 2.2055, "step": 6650500 }, { "epoch": 32.95, "learning_rate": 3.353023637183356e-05, "loss": 2.2243, "step": 6651000 }, { "epoch": 32.95, "learning_rate": 3.3528997785407474e-05, "loss": 2.2378, "step": 6651500 }, { "epoch": 32.96, "learning_rate": 3.352775919898139e-05, "loss": 2.2031, "step": 6652000 }, { "epoch": 32.96, "learning_rate": 3.352652061255531e-05, "loss": 2.2246, "step": 6652500 }, { "epoch": 32.96, "learning_rate": 3.3525282026129225e-05, "loss": 2.1836, "step": 6653000 }, { "epoch": 32.96, "learning_rate": 3.3524045916875994e-05, "loss": 2.2215, "step": 6653500 }, { "epoch": 32.97, "learning_rate": 3.3522807330449904e-05, "loss": 2.2132, "step": 6654000 }, { "epoch": 32.97, "learning_rate": 3.352157122119667e-05, "loss": 2.2263, "step": 6654500 }, { "epoch": 32.97, "learning_rate": 3.352033511194344e-05, "loss": 2.2455, "step": 6655000 }, { "epoch": 32.97, "learning_rate": 3.351909652551736e-05, "loss": 2.2298, "step": 6655500 }, { "epoch": 32.98, "learning_rate": 3.3517857939091275e-05, "loss": 2.2223, "step": 6656000 }, { "epoch": 32.98, "learning_rate": 3.351661935266519e-05, "loss": 2.2317, "step": 6656500 }, { "epoch": 32.98, "learning_rate": 3.351538324341196e-05, "loss": 2.2343, "step": 6657000 }, { "epoch": 32.98, "learning_rate": 3.351414465698587e-05, "loss": 2.2235, "step": 6657500 }, { "epoch": 32.99, "learning_rate": 3.351290607055979e-05, "loss": 2.2268, "step": 6658000 }, { "epoch": 32.99, "learning_rate": 3.3511667484133705e-05, "loss": 2.2339, "step": 6658500 }, { "epoch": 32.99, "learning_rate": 3.351042889770762e-05, "loss": 2.2136, "step": 6659000 }, { "epoch": 32.99, "learning_rate": 3.350919031128154e-05, "loss": 2.1893, "step": 6659500 }, { "epoch": 33.0, "learning_rate": 3.3507951724855456e-05, "loss": 2.2058, "step": 6660000 }, { "epoch": 33.0, "learning_rate": 3.350671313842937e-05, "loss": 2.2326, "step": 6660500 }, { "epoch": 33.0, "eval_accuracy": 0.660817023764832, "eval_accuracy_mlm": 0.6167854855212918, "eval_accuracy_nsp": 0.8684219815735079, "eval_loss": 2.316751480102539, "eval_runtime": 145.7034, "eval_samples_per_second": 1749.849, "eval_steps_per_second": 72.915, "step": 6660819 }, { "epoch": 33.0, "learning_rate": 3.350547455200329e-05, "loss": 2.1953, "step": 6661000 }, { "epoch": 33.0, "learning_rate": 3.3504235965577206e-05, "loss": 2.1906, "step": 6661500 }, { "epoch": 33.01, "learning_rate": 3.350299737915112e-05, "loss": 2.1715, "step": 6662000 }, { "epoch": 33.01, "learning_rate": 3.350175879272504e-05, "loss": 2.1679, "step": 6662500 }, { "epoch": 33.01, "learning_rate": 3.350052020629896e-05, "loss": 2.2096, "step": 6663000 }, { "epoch": 33.01, "learning_rate": 3.3499281619872874e-05, "loss": 2.1684, "step": 6663500 }, { "epoch": 33.02, "learning_rate": 3.349804551061964e-05, "loss": 2.1789, "step": 6664000 }, { "epoch": 33.02, "learning_rate": 3.349680692419356e-05, "loss": 2.1883, "step": 6664500 }, { "epoch": 33.02, "learning_rate": 3.349557081494032e-05, "loss": 2.1976, "step": 6665000 }, { "epoch": 33.02, "learning_rate": 3.349433222851424e-05, "loss": 2.1609, "step": 6665500 }, { "epoch": 33.03, "learning_rate": 3.3493093642088156e-05, "loss": 2.1907, "step": 6666000 }, { "epoch": 33.03, "learning_rate": 3.349185505566207e-05, "loss": 2.1754, "step": 6666500 }, { "epoch": 33.03, "learning_rate": 3.349061894640884e-05, "loss": 2.1871, "step": 6667000 }, { "epoch": 33.03, "learning_rate": 3.348938035998276e-05, "loss": 2.1858, "step": 6667500 }, { "epoch": 33.04, "learning_rate": 3.348814425072953e-05, "loss": 2.1992, "step": 6668000 }, { "epoch": 33.04, "learning_rate": 3.3486905664303444e-05, "loss": 2.1737, "step": 6668500 }, { "epoch": 33.04, "learning_rate": 3.348566707787736e-05, "loss": 2.1944, "step": 6669000 }, { "epoch": 33.04, "learning_rate": 3.348442849145128e-05, "loss": 2.1859, "step": 6669500 }, { "epoch": 33.05, "learning_rate": 3.348319238219805e-05, "loss": 2.1786, "step": 6670000 }, { "epoch": 33.05, "learning_rate": 3.3481953795771964e-05, "loss": 2.1959, "step": 6670500 }, { "epoch": 33.05, "learning_rate": 3.348071520934588e-05, "loss": 2.2086, "step": 6671000 }, { "epoch": 33.05, "learning_rate": 3.34794766229198e-05, "loss": 2.1758, "step": 6671500 }, { "epoch": 33.06, "learning_rate": 3.3478238036493715e-05, "loss": 2.1904, "step": 6672000 }, { "epoch": 33.06, "learning_rate": 3.347699945006763e-05, "loss": 2.1964, "step": 6672500 }, { "epoch": 33.06, "learning_rate": 3.347576086364154e-05, "loss": 2.1856, "step": 6673000 }, { "epoch": 33.06, "learning_rate": 3.347452227721546e-05, "loss": 2.1881, "step": 6673500 }, { "epoch": 33.07, "learning_rate": 3.3473283690789376e-05, "loss": 2.1798, "step": 6674000 }, { "epoch": 33.07, "learning_rate": 3.347204510436329e-05, "loss": 2.1917, "step": 6674500 }, { "epoch": 33.07, "learning_rate": 3.347080651793721e-05, "loss": 2.1751, "step": 6675000 }, { "epoch": 33.07, "learning_rate": 3.3469567931511126e-05, "loss": 2.2065, "step": 6675500 }, { "epoch": 33.08, "learning_rate": 3.346832934508504e-05, "loss": 2.1857, "step": 6676000 }, { "epoch": 33.08, "learning_rate": 3.346709075865896e-05, "loss": 2.1952, "step": 6676500 }, { "epoch": 33.08, "learning_rate": 3.346585217223288e-05, "loss": 2.1908, "step": 6677000 }, { "epoch": 33.08, "learning_rate": 3.3464613585806794e-05, "loss": 2.1971, "step": 6677500 }, { "epoch": 33.09, "learning_rate": 3.346337499938071e-05, "loss": 2.194, "step": 6678000 }, { "epoch": 33.09, "learning_rate": 3.346213889012747e-05, "loss": 2.164, "step": 6678500 }, { "epoch": 33.09, "learning_rate": 3.346090030370139e-05, "loss": 2.1589, "step": 6679000 }, { "epoch": 33.09, "learning_rate": 3.345966171727531e-05, "loss": 2.216, "step": 6679500 }, { "epoch": 33.1, "learning_rate": 3.3458423130849224e-05, "loss": 2.1777, "step": 6680000 }, { "epoch": 33.1, "learning_rate": 3.345718454442314e-05, "loss": 2.2036, "step": 6680500 }, { "epoch": 33.1, "learning_rate": 3.345594595799706e-05, "loss": 2.1821, "step": 6681000 }, { "epoch": 33.1, "learning_rate": 3.3454707371570975e-05, "loss": 2.1714, "step": 6681500 }, { "epoch": 33.1, "learning_rate": 3.345346878514489e-05, "loss": 2.199, "step": 6682000 }, { "epoch": 33.11, "learning_rate": 3.345223019871881e-05, "loss": 2.1905, "step": 6682500 }, { "epoch": 33.11, "learning_rate": 3.3450991612292725e-05, "loss": 2.1875, "step": 6683000 }, { "epoch": 33.11, "learning_rate": 3.344975302586664e-05, "loss": 2.1735, "step": 6683500 }, { "epoch": 33.11, "learning_rate": 3.344851443944056e-05, "loss": 2.2194, "step": 6684000 }, { "epoch": 33.12, "learning_rate": 3.3447275853014476e-05, "loss": 2.1537, "step": 6684500 }, { "epoch": 33.12, "learning_rate": 3.3446037266588386e-05, "loss": 2.209, "step": 6685000 }, { "epoch": 33.12, "learning_rate": 3.344480115733516e-05, "loss": 2.1662, "step": 6685500 }, { "epoch": 33.12, "learning_rate": 3.344356257090908e-05, "loss": 2.1802, "step": 6686000 }, { "epoch": 33.13, "learning_rate": 3.344232646165584e-05, "loss": 2.211, "step": 6686500 }, { "epoch": 33.13, "learning_rate": 3.344108787522976e-05, "loss": 2.1945, "step": 6687000 }, { "epoch": 33.13, "learning_rate": 3.3439849288803675e-05, "loss": 2.1963, "step": 6687500 }, { "epoch": 33.13, "learning_rate": 3.343861070237759e-05, "loss": 2.1963, "step": 6688000 }, { "epoch": 33.14, "learning_rate": 3.343737211595151e-05, "loss": 2.1822, "step": 6688500 }, { "epoch": 33.14, "learning_rate": 3.3436133529525425e-05, "loss": 2.1843, "step": 6689000 }, { "epoch": 33.14, "learning_rate": 3.343489494309934e-05, "loss": 2.1904, "step": 6689500 }, { "epoch": 33.14, "learning_rate": 3.343365635667326e-05, "loss": 2.1861, "step": 6690000 }, { "epoch": 33.15, "learning_rate": 3.3432417770247176e-05, "loss": 2.2003, "step": 6690500 }, { "epoch": 33.15, "learning_rate": 3.3431181660993945e-05, "loss": 2.1901, "step": 6691000 }, { "epoch": 33.15, "learning_rate": 3.342994307456786e-05, "loss": 2.2043, "step": 6691500 }, { "epoch": 33.15, "learning_rate": 3.342870448814178e-05, "loss": 2.198, "step": 6692000 }, { "epoch": 33.16, "learning_rate": 3.342746837888854e-05, "loss": 2.1838, "step": 6692500 }, { "epoch": 33.16, "learning_rate": 3.342622979246246e-05, "loss": 2.196, "step": 6693000 }, { "epoch": 33.16, "learning_rate": 3.342499368320923e-05, "loss": 2.1899, "step": 6693500 }, { "epoch": 33.16, "learning_rate": 3.3423757573956e-05, "loss": 2.1982, "step": 6694000 }, { "epoch": 33.17, "learning_rate": 3.342251898752992e-05, "loss": 2.1961, "step": 6694500 }, { "epoch": 33.17, "learning_rate": 3.342128040110383e-05, "loss": 2.2007, "step": 6695000 }, { "epoch": 33.17, "learning_rate": 3.3420041814677746e-05, "loss": 2.1791, "step": 6695500 }, { "epoch": 33.17, "learning_rate": 3.341880322825166e-05, "loss": 2.2048, "step": 6696000 }, { "epoch": 33.18, "learning_rate": 3.341756464182558e-05, "loss": 2.197, "step": 6696500 }, { "epoch": 33.18, "learning_rate": 3.34163260553995e-05, "loss": 2.2261, "step": 6697000 }, { "epoch": 33.18, "learning_rate": 3.3415089946146266e-05, "loss": 2.1916, "step": 6697500 }, { "epoch": 33.18, "learning_rate": 3.3413851359720176e-05, "loss": 2.1948, "step": 6698000 }, { "epoch": 33.19, "learning_rate": 3.341261277329409e-05, "loss": 2.1977, "step": 6698500 }, { "epoch": 33.19, "learning_rate": 3.341137418686801e-05, "loss": 2.1964, "step": 6699000 }, { "epoch": 33.19, "learning_rate": 3.341013560044193e-05, "loss": 2.2078, "step": 6699500 }, { "epoch": 33.19, "learning_rate": 3.3408897014015844e-05, "loss": 2.1995, "step": 6700000 }, { "epoch": 33.2, "learning_rate": 3.340765842758976e-05, "loss": 2.1763, "step": 6700500 }, { "epoch": 33.2, "learning_rate": 3.340641984116368e-05, "loss": 2.1989, "step": 6701000 }, { "epoch": 33.2, "learning_rate": 3.3405181254737595e-05, "loss": 2.1885, "step": 6701500 }, { "epoch": 33.2, "learning_rate": 3.340394266831151e-05, "loss": 2.1716, "step": 6702000 }, { "epoch": 33.21, "learning_rate": 3.340270408188543e-05, "loss": 2.2069, "step": 6702500 }, { "epoch": 33.21, "learning_rate": 3.3401465495459345e-05, "loss": 2.1995, "step": 6703000 }, { "epoch": 33.21, "learning_rate": 3.3400229386206114e-05, "loss": 2.2251, "step": 6703500 }, { "epoch": 33.21, "learning_rate": 3.339899079978003e-05, "loss": 2.2079, "step": 6704000 }, { "epoch": 33.22, "learning_rate": 3.339775221335394e-05, "loss": 2.1914, "step": 6704500 }, { "epoch": 33.22, "learning_rate": 3.339651610410071e-05, "loss": 2.1867, "step": 6705000 }, { "epoch": 33.22, "learning_rate": 3.339527751767463e-05, "loss": 2.2132, "step": 6705500 }, { "epoch": 33.22, "learning_rate": 3.33940414084214e-05, "loss": 2.1824, "step": 6706000 }, { "epoch": 33.23, "learning_rate": 3.339280282199532e-05, "loss": 2.1999, "step": 6706500 }, { "epoch": 33.23, "learning_rate": 3.3391564235569236e-05, "loss": 2.1785, "step": 6707000 }, { "epoch": 33.23, "learning_rate": 3.3390325649143147e-05, "loss": 2.2085, "step": 6707500 }, { "epoch": 33.23, "learning_rate": 3.3389087062717063e-05, "loss": 2.2031, "step": 6708000 }, { "epoch": 33.24, "learning_rate": 3.338784847629098e-05, "loss": 2.181, "step": 6708500 }, { "epoch": 33.24, "learning_rate": 3.33866098898649e-05, "loss": 2.2132, "step": 6709000 }, { "epoch": 33.24, "learning_rate": 3.3385371303438814e-05, "loss": 2.1918, "step": 6709500 }, { "epoch": 33.24, "learning_rate": 3.338413271701273e-05, "loss": 2.1732, "step": 6710000 }, { "epoch": 33.25, "learning_rate": 3.338289413058665e-05, "loss": 2.1797, "step": 6710500 }, { "epoch": 33.25, "learning_rate": 3.3381655544160565e-05, "loss": 2.2188, "step": 6711000 }, { "epoch": 33.25, "learning_rate": 3.3380416957734475e-05, "loss": 2.1945, "step": 6711500 }, { "epoch": 33.25, "learning_rate": 3.3379180848481244e-05, "loss": 2.1775, "step": 6712000 }, { "epoch": 33.26, "learning_rate": 3.337794226205516e-05, "loss": 2.2078, "step": 6712500 }, { "epoch": 33.26, "learning_rate": 3.337670367562908e-05, "loss": 2.1919, "step": 6713000 }, { "epoch": 33.26, "learning_rate": 3.3375465089202995e-05, "loss": 2.2168, "step": 6713500 }, { "epoch": 33.26, "learning_rate": 3.337422650277691e-05, "loss": 2.1926, "step": 6714000 }, { "epoch": 33.27, "learning_rate": 3.337299039352368e-05, "loss": 2.2111, "step": 6714500 }, { "epoch": 33.27, "learning_rate": 3.33717518070976e-05, "loss": 2.1717, "step": 6715000 }, { "epoch": 33.27, "learning_rate": 3.3370515697844366e-05, "loss": 2.1892, "step": 6715500 }, { "epoch": 33.27, "learning_rate": 3.336927711141828e-05, "loss": 2.1819, "step": 6716000 }, { "epoch": 33.28, "learning_rate": 3.33680385249922e-05, "loss": 2.1656, "step": 6716500 }, { "epoch": 33.28, "learning_rate": 3.336680241573897e-05, "loss": 2.1817, "step": 6717000 }, { "epoch": 33.28, "learning_rate": 3.3365563829312886e-05, "loss": 2.2084, "step": 6717500 }, { "epoch": 33.28, "learning_rate": 3.33643252428868e-05, "loss": 2.1876, "step": 6718000 }, { "epoch": 33.29, "learning_rate": 3.336308665646072e-05, "loss": 2.2138, "step": 6718500 }, { "epoch": 33.29, "learning_rate": 3.3361848070034637e-05, "loss": 2.2026, "step": 6719000 }, { "epoch": 33.29, "learning_rate": 3.3360609483608554e-05, "loss": 2.1882, "step": 6719500 }, { "epoch": 33.29, "learning_rate": 3.3359370897182464e-05, "loss": 2.2171, "step": 6720000 }, { "epoch": 33.3, "learning_rate": 3.335813231075638e-05, "loss": 2.1742, "step": 6720500 }, { "epoch": 33.3, "learning_rate": 3.33568937243303e-05, "loss": 2.2045, "step": 6721000 }, { "epoch": 33.3, "learning_rate": 3.3355655137904214e-05, "loss": 2.2029, "step": 6721500 }, { "epoch": 33.3, "learning_rate": 3.335441655147813e-05, "loss": 2.1792, "step": 6722000 }, { "epoch": 33.31, "learning_rate": 3.335317796505205e-05, "loss": 2.1961, "step": 6722500 }, { "epoch": 33.31, "learning_rate": 3.3351939378625965e-05, "loss": 2.2094, "step": 6723000 }, { "epoch": 33.31, "learning_rate": 3.335070079219988e-05, "loss": 2.2066, "step": 6723500 }, { "epoch": 33.31, "learning_rate": 3.33494622057738e-05, "loss": 2.1974, "step": 6724000 }, { "epoch": 33.32, "learning_rate": 3.3348223619347716e-05, "loss": 2.1959, "step": 6724500 }, { "epoch": 33.32, "learning_rate": 3.334698998726734e-05, "loss": 2.2206, "step": 6725000 }, { "epoch": 33.32, "learning_rate": 3.3345751400841254e-05, "loss": 2.1876, "step": 6725500 }, { "epoch": 33.32, "learning_rate": 3.334451281441517e-05, "loss": 2.1978, "step": 6726000 }, { "epoch": 33.33, "learning_rate": 3.334327422798908e-05, "loss": 2.1713, "step": 6726500 }, { "epoch": 33.33, "learning_rate": 3.3342035641563e-05, "loss": 2.2117, "step": 6727000 }, { "epoch": 33.33, "learning_rate": 3.3340799532309766e-05, "loss": 2.2119, "step": 6727500 }, { "epoch": 33.33, "learning_rate": 3.3339560945883683e-05, "loss": 2.191, "step": 6728000 }, { "epoch": 33.34, "learning_rate": 3.33383223594576e-05, "loss": 2.1935, "step": 6728500 }, { "epoch": 33.34, "learning_rate": 3.333708377303152e-05, "loss": 2.1783, "step": 6729000 }, { "epoch": 33.34, "learning_rate": 3.333584518660543e-05, "loss": 2.2009, "step": 6729500 }, { "epoch": 33.34, "learning_rate": 3.3334606600179344e-05, "loss": 2.1902, "step": 6730000 }, { "epoch": 33.35, "learning_rate": 3.333336801375326e-05, "loss": 2.2013, "step": 6730500 }, { "epoch": 33.35, "learning_rate": 3.333212942732718e-05, "loss": 2.2053, "step": 6731000 }, { "epoch": 33.35, "learning_rate": 3.3330893318073954e-05, "loss": 2.2068, "step": 6731500 }, { "epoch": 33.35, "learning_rate": 3.332965473164787e-05, "loss": 2.1893, "step": 6732000 }, { "epoch": 33.36, "learning_rate": 3.332841614522178e-05, "loss": 2.1971, "step": 6732500 }, { "epoch": 33.36, "learning_rate": 3.33271775587957e-05, "loss": 2.2038, "step": 6733000 }, { "epoch": 33.36, "learning_rate": 3.3325938972369615e-05, "loss": 2.1941, "step": 6733500 }, { "epoch": 33.36, "learning_rate": 3.3324705340289235e-05, "loss": 2.2214, "step": 6734000 }, { "epoch": 33.37, "learning_rate": 3.332346675386315e-05, "loss": 2.1986, "step": 6734500 }, { "epoch": 33.37, "learning_rate": 3.332222816743707e-05, "loss": 2.2197, "step": 6735000 }, { "epoch": 33.37, "learning_rate": 3.332099205818384e-05, "loss": 2.198, "step": 6735500 }, { "epoch": 33.37, "learning_rate": 3.3319753471757755e-05, "loss": 2.2016, "step": 6736000 }, { "epoch": 33.37, "learning_rate": 3.331851488533167e-05, "loss": 2.2062, "step": 6736500 }, { "epoch": 33.38, "learning_rate": 3.331727629890559e-05, "loss": 2.2213, "step": 6737000 }, { "epoch": 33.38, "learning_rate": 3.3316037712479506e-05, "loss": 2.2173, "step": 6737500 }, { "epoch": 33.38, "learning_rate": 3.331479912605342e-05, "loss": 2.196, "step": 6738000 }, { "epoch": 33.38, "learning_rate": 3.331356053962734e-05, "loss": 2.2056, "step": 6738500 }, { "epoch": 33.39, "learning_rate": 3.331232195320125e-05, "loss": 2.2122, "step": 6739000 }, { "epoch": 33.39, "learning_rate": 3.331108336677517e-05, "loss": 2.1967, "step": 6739500 }, { "epoch": 33.39, "learning_rate": 3.3309844780349084e-05, "loss": 2.2116, "step": 6740000 }, { "epoch": 33.39, "learning_rate": 3.3308606193923e-05, "loss": 2.2288, "step": 6740500 }, { "epoch": 33.4, "learning_rate": 3.330736760749692e-05, "loss": 2.1971, "step": 6741000 }, { "epoch": 33.4, "learning_rate": 3.3306129021070834e-05, "loss": 2.2274, "step": 6741500 }, { "epoch": 33.4, "learning_rate": 3.33048929118176e-05, "loss": 2.2, "step": 6742000 }, { "epoch": 33.4, "learning_rate": 3.330365432539152e-05, "loss": 2.2053, "step": 6742500 }, { "epoch": 33.41, "learning_rate": 3.330241573896544e-05, "loss": 2.2427, "step": 6743000 }, { "epoch": 33.41, "learning_rate": 3.3301177152539354e-05, "loss": 2.2006, "step": 6743500 }, { "epoch": 33.41, "learning_rate": 3.329994104328612e-05, "loss": 2.1955, "step": 6744000 }, { "epoch": 33.41, "learning_rate": 3.329870245686004e-05, "loss": 2.2025, "step": 6744500 }, { "epoch": 33.42, "learning_rate": 3.329746387043396e-05, "loss": 2.2052, "step": 6745000 }, { "epoch": 33.42, "learning_rate": 3.3296225284007874e-05, "loss": 2.199, "step": 6745500 }, { "epoch": 33.42, "learning_rate": 3.3294986697581784e-05, "loss": 2.2119, "step": 6746000 }, { "epoch": 33.42, "learning_rate": 3.329375058832855e-05, "loss": 2.1805, "step": 6746500 }, { "epoch": 33.43, "learning_rate": 3.329251200190247e-05, "loss": 2.2214, "step": 6747000 }, { "epoch": 33.43, "learning_rate": 3.3291273415476386e-05, "loss": 2.1932, "step": 6747500 }, { "epoch": 33.43, "learning_rate": 3.32900348290503e-05, "loss": 2.21, "step": 6748000 }, { "epoch": 33.43, "learning_rate": 3.328879871979707e-05, "loss": 2.1815, "step": 6748500 }, { "epoch": 33.44, "learning_rate": 3.328756013337099e-05, "loss": 2.2231, "step": 6749000 }, { "epoch": 33.44, "learning_rate": 3.3286321546944906e-05, "loss": 2.1896, "step": 6749500 }, { "epoch": 33.44, "learning_rate": 3.328508296051882e-05, "loss": 2.1942, "step": 6750000 }, { "epoch": 33.44, "learning_rate": 3.328384437409274e-05, "loss": 2.2055, "step": 6750500 }, { "epoch": 33.45, "learning_rate": 3.328260578766666e-05, "loss": 2.2069, "step": 6751000 }, { "epoch": 33.45, "learning_rate": 3.3281367201240574e-05, "loss": 2.2045, "step": 6751500 }, { "epoch": 33.45, "learning_rate": 3.3280131091987336e-05, "loss": 2.2114, "step": 6752000 }, { "epoch": 33.45, "learning_rate": 3.327889250556125e-05, "loss": 2.2043, "step": 6752500 }, { "epoch": 33.46, "learning_rate": 3.327765391913517e-05, "loss": 2.2053, "step": 6753000 }, { "epoch": 33.46, "learning_rate": 3.3276415332709087e-05, "loss": 2.2035, "step": 6753500 }, { "epoch": 33.46, "learning_rate": 3.3275179223455855e-05, "loss": 2.1932, "step": 6754000 }, { "epoch": 33.46, "learning_rate": 3.327394063702977e-05, "loss": 2.2068, "step": 6754500 }, { "epoch": 33.47, "learning_rate": 3.327270205060369e-05, "loss": 2.1834, "step": 6755000 }, { "epoch": 33.47, "learning_rate": 3.3271463464177606e-05, "loss": 2.1792, "step": 6755500 }, { "epoch": 33.47, "learning_rate": 3.327022735492437e-05, "loss": 2.1915, "step": 6756000 }, { "epoch": 33.47, "learning_rate": 3.3268988768498285e-05, "loss": 2.2222, "step": 6756500 }, { "epoch": 33.48, "learning_rate": 3.32677501820722e-05, "loss": 2.2125, "step": 6757000 }, { "epoch": 33.48, "learning_rate": 3.326651159564612e-05, "loss": 2.1967, "step": 6757500 }, { "epoch": 33.48, "learning_rate": 3.3265273009220036e-05, "loss": 2.2272, "step": 6758000 }, { "epoch": 33.48, "learning_rate": 3.326403442279395e-05, "loss": 2.2209, "step": 6758500 }, { "epoch": 33.49, "learning_rate": 3.326279831354072e-05, "loss": 2.2223, "step": 6759000 }, { "epoch": 33.49, "learning_rate": 3.326155972711464e-05, "loss": 2.21, "step": 6759500 }, { "epoch": 33.49, "learning_rate": 3.3260321140688556e-05, "loss": 2.2063, "step": 6760000 }, { "epoch": 33.49, "learning_rate": 3.325908255426247e-05, "loss": 2.2272, "step": 6760500 }, { "epoch": 33.5, "learning_rate": 3.325784396783639e-05, "loss": 2.1844, "step": 6761000 }, { "epoch": 33.5, "learning_rate": 3.3256605381410306e-05, "loss": 2.2001, "step": 6761500 }, { "epoch": 33.5, "learning_rate": 3.3255369272157075e-05, "loss": 2.1846, "step": 6762000 }, { "epoch": 33.5, "learning_rate": 3.3254133162903844e-05, "loss": 2.2296, "step": 6762500 }, { "epoch": 33.51, "learning_rate": 3.325289457647776e-05, "loss": 2.2138, "step": 6763000 }, { "epoch": 33.51, "learning_rate": 3.325165599005168e-05, "loss": 2.2029, "step": 6763500 }, { "epoch": 33.51, "learning_rate": 3.3250417403625595e-05, "loss": 2.1842, "step": 6764000 }, { "epoch": 33.51, "learning_rate": 3.3249181294372364e-05, "loss": 2.216, "step": 6764500 }, { "epoch": 33.52, "learning_rate": 3.324794270794628e-05, "loss": 2.2137, "step": 6765000 }, { "epoch": 33.52, "learning_rate": 3.32467041215202e-05, "loss": 2.1953, "step": 6765500 }, { "epoch": 33.52, "learning_rate": 3.324546801226696e-05, "loss": 2.209, "step": 6766000 }, { "epoch": 33.52, "learning_rate": 3.3244229425840876e-05, "loss": 2.2116, "step": 6766500 }, { "epoch": 33.53, "learning_rate": 3.324299083941479e-05, "loss": 2.2116, "step": 6767000 }, { "epoch": 33.53, "learning_rate": 3.324175225298871e-05, "loss": 2.2068, "step": 6767500 }, { "epoch": 33.53, "learning_rate": 3.324051366656263e-05, "loss": 2.1877, "step": 6768000 }, { "epoch": 33.53, "learning_rate": 3.3239275080136544e-05, "loss": 2.2073, "step": 6768500 }, { "epoch": 33.54, "learning_rate": 3.323803649371046e-05, "loss": 2.1873, "step": 6769000 }, { "epoch": 33.54, "learning_rate": 3.323679790728438e-05, "loss": 2.2067, "step": 6769500 }, { "epoch": 33.54, "learning_rate": 3.3235559320858295e-05, "loss": 2.1925, "step": 6770000 }, { "epoch": 33.54, "learning_rate": 3.3234320734432205e-05, "loss": 2.2227, "step": 6770500 }, { "epoch": 33.55, "learning_rate": 3.323308214800612e-05, "loss": 2.1975, "step": 6771000 }, { "epoch": 33.55, "learning_rate": 3.323184356158004e-05, "loss": 2.1838, "step": 6771500 }, { "epoch": 33.55, "learning_rate": 3.3230604975153956e-05, "loss": 2.2219, "step": 6772000 }, { "epoch": 33.55, "learning_rate": 3.322936638872787e-05, "loss": 2.1908, "step": 6772500 }, { "epoch": 33.56, "learning_rate": 3.322812780230179e-05, "loss": 2.1813, "step": 6773000 }, { "epoch": 33.56, "learning_rate": 3.3226889215875707e-05, "loss": 2.1919, "step": 6773500 }, { "epoch": 33.56, "learning_rate": 3.3225650629449623e-05, "loss": 2.2287, "step": 6774000 }, { "epoch": 33.56, "learning_rate": 3.322441204302354e-05, "loss": 2.2185, "step": 6774500 }, { "epoch": 33.57, "learning_rate": 3.322317345659746e-05, "loss": 2.1899, "step": 6775000 }, { "epoch": 33.57, "learning_rate": 3.3221934870171374e-05, "loss": 2.204, "step": 6775500 }, { "epoch": 33.57, "learning_rate": 3.322069628374529e-05, "loss": 2.2152, "step": 6776000 }, { "epoch": 33.57, "learning_rate": 3.321945769731921e-05, "loss": 2.1904, "step": 6776500 }, { "epoch": 33.58, "learning_rate": 3.3218219110893125e-05, "loss": 2.2197, "step": 6777000 }, { "epoch": 33.58, "learning_rate": 3.321698052446704e-05, "loss": 2.1919, "step": 6777500 }, { "epoch": 33.58, "learning_rate": 3.321574193804096e-05, "loss": 2.2174, "step": 6778000 }, { "epoch": 33.58, "learning_rate": 3.3214503351614876e-05, "loss": 2.1984, "step": 6778500 }, { "epoch": 33.59, "learning_rate": 3.321326971953449e-05, "loss": 2.2013, "step": 6779000 }, { "epoch": 33.59, "learning_rate": 3.3212033610281265e-05, "loss": 2.2408, "step": 6779500 }, { "epoch": 33.59, "learning_rate": 3.3210795023855175e-05, "loss": 2.2103, "step": 6780000 }, { "epoch": 33.59, "learning_rate": 3.320955643742909e-05, "loss": 2.1746, "step": 6780500 }, { "epoch": 33.6, "learning_rate": 3.320831785100301e-05, "loss": 2.2278, "step": 6781000 }, { "epoch": 33.6, "learning_rate": 3.3207079264576926e-05, "loss": 2.1785, "step": 6781500 }, { "epoch": 33.6, "learning_rate": 3.320584067815084e-05, "loss": 2.2099, "step": 6782000 }, { "epoch": 33.6, "learning_rate": 3.320460209172476e-05, "loss": 2.1942, "step": 6782500 }, { "epoch": 33.61, "learning_rate": 3.320336845964438e-05, "loss": 2.2025, "step": 6783000 }, { "epoch": 33.61, "learning_rate": 3.32021298732183e-05, "loss": 2.2365, "step": 6783500 }, { "epoch": 33.61, "learning_rate": 3.3200891286792215e-05, "loss": 2.2065, "step": 6784000 }, { "epoch": 33.61, "learning_rate": 3.319965270036613e-05, "loss": 2.1954, "step": 6784500 }, { "epoch": 33.62, "learning_rate": 3.319841411394005e-05, "loss": 2.2004, "step": 6785000 }, { "epoch": 33.62, "learning_rate": 3.3197175527513965e-05, "loss": 2.211, "step": 6785500 }, { "epoch": 33.62, "learning_rate": 3.319593694108788e-05, "loss": 2.2276, "step": 6786000 }, { "epoch": 33.62, "learning_rate": 3.319469835466179e-05, "loss": 2.2108, "step": 6786500 }, { "epoch": 33.63, "learning_rate": 3.319345976823571e-05, "loss": 2.2176, "step": 6787000 }, { "epoch": 33.63, "learning_rate": 3.3192221181809626e-05, "loss": 2.2349, "step": 6787500 }, { "epoch": 33.63, "learning_rate": 3.3190985072556395e-05, "loss": 2.2048, "step": 6788000 }, { "epoch": 33.63, "learning_rate": 3.318974648613031e-05, "loss": 2.1891, "step": 6788500 }, { "epoch": 33.64, "learning_rate": 3.318850789970423e-05, "loss": 2.2304, "step": 6789000 }, { "epoch": 33.64, "learning_rate": 3.318726931327814e-05, "loss": 2.21, "step": 6789500 }, { "epoch": 33.64, "learning_rate": 3.3186030726852056e-05, "loss": 2.1899, "step": 6790000 }, { "epoch": 33.64, "learning_rate": 3.318479214042597e-05, "loss": 2.2241, "step": 6790500 }, { "epoch": 33.64, "learning_rate": 3.318355355399989e-05, "loss": 2.2191, "step": 6791000 }, { "epoch": 33.65, "learning_rate": 3.318231496757381e-05, "loss": 2.2102, "step": 6791500 }, { "epoch": 33.65, "learning_rate": 3.318107885832058e-05, "loss": 2.1883, "step": 6792000 }, { "epoch": 33.65, "learning_rate": 3.317984027189449e-05, "loss": 2.1964, "step": 6792500 }, { "epoch": 33.65, "learning_rate": 3.317860168546841e-05, "loss": 2.2054, "step": 6793000 }, { "epoch": 33.66, "learning_rate": 3.3177363099042327e-05, "loss": 2.1987, "step": 6793500 }, { "epoch": 33.66, "learning_rate": 3.3176126989789095e-05, "loss": 2.2227, "step": 6794000 }, { "epoch": 33.66, "learning_rate": 3.317488840336301e-05, "loss": 2.217, "step": 6794500 }, { "epoch": 33.66, "learning_rate": 3.317364981693693e-05, "loss": 2.1929, "step": 6795000 }, { "epoch": 33.67, "learning_rate": 3.317241123051084e-05, "loss": 2.2129, "step": 6795500 }, { "epoch": 33.67, "learning_rate": 3.3171172644084756e-05, "loss": 2.2122, "step": 6796000 }, { "epoch": 33.67, "learning_rate": 3.316993405765867e-05, "loss": 2.2232, "step": 6796500 }, { "epoch": 33.67, "learning_rate": 3.316869547123259e-05, "loss": 2.2027, "step": 6797000 }, { "epoch": 33.68, "learning_rate": 3.316745688480651e-05, "loss": 2.1981, "step": 6797500 }, { "epoch": 33.68, "learning_rate": 3.3166218298380424e-05, "loss": 2.1982, "step": 6798000 }, { "epoch": 33.68, "learning_rate": 3.316497971195434e-05, "loss": 2.1927, "step": 6798500 }, { "epoch": 33.68, "learning_rate": 3.316374112552826e-05, "loss": 2.1843, "step": 6799000 }, { "epoch": 33.69, "learning_rate": 3.3162502539102175e-05, "loss": 2.2093, "step": 6799500 }, { "epoch": 33.69, "learning_rate": 3.3161266429848944e-05, "loss": 2.1841, "step": 6800000 }, { "epoch": 33.69, "learning_rate": 3.316003032059571e-05, "loss": 2.2062, "step": 6800500 }, { "epoch": 33.69, "learning_rate": 3.315879173416963e-05, "loss": 2.1887, "step": 6801000 }, { "epoch": 33.7, "learning_rate": 3.3157553147743546e-05, "loss": 2.2124, "step": 6801500 }, { "epoch": 33.7, "learning_rate": 3.3156314561317456e-05, "loss": 2.18, "step": 6802000 }, { "epoch": 33.7, "learning_rate": 3.315507597489137e-05, "loss": 2.2071, "step": 6802500 }, { "epoch": 33.7, "learning_rate": 3.315383986563815e-05, "loss": 2.1993, "step": 6803000 }, { "epoch": 33.71, "learning_rate": 3.315260375638492e-05, "loss": 2.1961, "step": 6803500 }, { "epoch": 33.71, "learning_rate": 3.3151365169958835e-05, "loss": 2.1915, "step": 6804000 }, { "epoch": 33.71, "learning_rate": 3.3150126583532745e-05, "loss": 2.1792, "step": 6804500 }, { "epoch": 33.71, "learning_rate": 3.314888799710666e-05, "loss": 2.192, "step": 6805000 }, { "epoch": 33.72, "learning_rate": 3.314764941068058e-05, "loss": 2.1913, "step": 6805500 }, { "epoch": 33.72, "learning_rate": 3.3146410824254496e-05, "loss": 2.1953, "step": 6806000 }, { "epoch": 33.72, "learning_rate": 3.314517223782841e-05, "loss": 2.1918, "step": 6806500 }, { "epoch": 33.72, "learning_rate": 3.314393365140233e-05, "loss": 2.189, "step": 6807000 }, { "epoch": 33.73, "learning_rate": 3.31426975421491e-05, "loss": 2.1915, "step": 6807500 }, { "epoch": 33.73, "learning_rate": 3.314146143289587e-05, "loss": 2.2173, "step": 6808000 }, { "epoch": 33.73, "learning_rate": 3.3140222846469784e-05, "loss": 2.2431, "step": 6808500 }, { "epoch": 33.73, "learning_rate": 3.31389842600437e-05, "loss": 2.2125, "step": 6809000 }, { "epoch": 33.74, "learning_rate": 3.313774815079046e-05, "loss": 2.224, "step": 6809500 }, { "epoch": 33.74, "learning_rate": 3.313651204153724e-05, "loss": 2.2055, "step": 6810000 }, { "epoch": 33.74, "learning_rate": 3.3135273455111155e-05, "loss": 2.21, "step": 6810500 }, { "epoch": 33.74, "learning_rate": 3.313403486868507e-05, "loss": 2.2161, "step": 6811000 }, { "epoch": 33.75, "learning_rate": 3.313279628225899e-05, "loss": 2.1847, "step": 6811500 }, { "epoch": 33.75, "learning_rate": 3.31315576958329e-05, "loss": 2.2189, "step": 6812000 }, { "epoch": 33.75, "learning_rate": 3.3130319109406816e-05, "loss": 2.2207, "step": 6812500 }, { "epoch": 33.75, "learning_rate": 3.312908052298073e-05, "loss": 2.1814, "step": 6813000 }, { "epoch": 33.76, "learning_rate": 3.312784193655465e-05, "loss": 2.2075, "step": 6813500 }, { "epoch": 33.76, "learning_rate": 3.312660335012857e-05, "loss": 2.2068, "step": 6814000 }, { "epoch": 33.76, "learning_rate": 3.3125364763702484e-05, "loss": 2.2041, "step": 6814500 }, { "epoch": 33.76, "learning_rate": 3.3124128654449246e-05, "loss": 2.2088, "step": 6815000 }, { "epoch": 33.77, "learning_rate": 3.312289006802316e-05, "loss": 2.2029, "step": 6815500 }, { "epoch": 33.77, "learning_rate": 3.312165148159708e-05, "loss": 2.1921, "step": 6816000 }, { "epoch": 33.77, "learning_rate": 3.3120412895171e-05, "loss": 2.216, "step": 6816500 }, { "epoch": 33.77, "learning_rate": 3.3119174308744914e-05, "loss": 2.195, "step": 6817000 }, { "epoch": 33.78, "learning_rate": 3.311793572231883e-05, "loss": 2.2238, "step": 6817500 }, { "epoch": 33.78, "learning_rate": 3.311669713589275e-05, "loss": 2.2144, "step": 6818000 }, { "epoch": 33.78, "learning_rate": 3.3115458549466665e-05, "loss": 2.2092, "step": 6818500 }, { "epoch": 33.78, "learning_rate": 3.3114222440213433e-05, "loss": 2.2108, "step": 6819000 }, { "epoch": 33.79, "learning_rate": 3.311298385378735e-05, "loss": 2.2043, "step": 6819500 }, { "epoch": 33.79, "learning_rate": 3.311174526736127e-05, "loss": 2.1984, "step": 6820000 }, { "epoch": 33.79, "learning_rate": 3.3110509158108036e-05, "loss": 2.1975, "step": 6820500 }, { "epoch": 33.79, "learning_rate": 3.310927057168195e-05, "loss": 2.1769, "step": 6821000 }, { "epoch": 33.8, "learning_rate": 3.310803198525586e-05, "loss": 2.1828, "step": 6821500 }, { "epoch": 33.8, "learning_rate": 3.310679339882978e-05, "loss": 2.2122, "step": 6822000 }, { "epoch": 33.8, "learning_rate": 3.31055548124037e-05, "loss": 2.1925, "step": 6822500 }, { "epoch": 33.8, "learning_rate": 3.3104316225977614e-05, "loss": 2.1946, "step": 6823000 }, { "epoch": 33.81, "learning_rate": 3.310307763955153e-05, "loss": 2.2058, "step": 6823500 }, { "epoch": 33.81, "learning_rate": 3.310183905312545e-05, "loss": 2.2011, "step": 6824000 }, { "epoch": 33.81, "learning_rate": 3.3100600466699365e-05, "loss": 2.2053, "step": 6824500 }, { "epoch": 33.81, "learning_rate": 3.3099364357446134e-05, "loss": 2.2091, "step": 6825000 }, { "epoch": 33.82, "learning_rate": 3.309812577102005e-05, "loss": 2.2024, "step": 6825500 }, { "epoch": 33.82, "learning_rate": 3.309688718459397e-05, "loss": 2.1989, "step": 6826000 }, { "epoch": 33.82, "learning_rate": 3.3095648598167884e-05, "loss": 2.197, "step": 6826500 }, { "epoch": 33.82, "learning_rate": 3.30944100117418e-05, "loss": 2.2224, "step": 6827000 }, { "epoch": 33.83, "learning_rate": 3.309317142531572e-05, "loss": 2.2263, "step": 6827500 }, { "epoch": 33.83, "learning_rate": 3.3091932838889635e-05, "loss": 2.2002, "step": 6828000 }, { "epoch": 33.83, "learning_rate": 3.309069425246355e-05, "loss": 2.2009, "step": 6828500 }, { "epoch": 33.83, "learning_rate": 3.308945566603747e-05, "loss": 2.2113, "step": 6829000 }, { "epoch": 33.84, "learning_rate": 3.3088217079611386e-05, "loss": 2.2273, "step": 6829500 }, { "epoch": 33.84, "learning_rate": 3.30869784931853e-05, "loss": 2.2292, "step": 6830000 }, { "epoch": 33.84, "learning_rate": 3.308573990675922e-05, "loss": 2.1992, "step": 6830500 }, { "epoch": 33.84, "learning_rate": 3.308450132033314e-05, "loss": 2.217, "step": 6831000 }, { "epoch": 33.85, "learning_rate": 3.308326273390705e-05, "loss": 2.2222, "step": 6831500 }, { "epoch": 33.85, "learning_rate": 3.3082026624653816e-05, "loss": 2.1777, "step": 6832000 }, { "epoch": 33.85, "learning_rate": 3.308078803822773e-05, "loss": 2.1957, "step": 6832500 }, { "epoch": 33.85, "learning_rate": 3.30795519289745e-05, "loss": 2.2035, "step": 6833000 }, { "epoch": 33.86, "learning_rate": 3.307831334254842e-05, "loss": 2.2322, "step": 6833500 }, { "epoch": 33.86, "learning_rate": 3.3077074756122335e-05, "loss": 2.2006, "step": 6834000 }, { "epoch": 33.86, "learning_rate": 3.307583616969625e-05, "loss": 2.2075, "step": 6834500 }, { "epoch": 33.86, "learning_rate": 3.3074600060443014e-05, "loss": 2.2022, "step": 6835000 }, { "epoch": 33.87, "learning_rate": 3.307336147401693e-05, "loss": 2.239, "step": 6835500 }, { "epoch": 33.87, "learning_rate": 3.307212288759085e-05, "loss": 2.2284, "step": 6836000 }, { "epoch": 33.87, "learning_rate": 3.3070884301164765e-05, "loss": 2.2016, "step": 6836500 }, { "epoch": 33.87, "learning_rate": 3.3069648191911534e-05, "loss": 2.2318, "step": 6837000 }, { "epoch": 33.88, "learning_rate": 3.306840960548545e-05, "loss": 2.2189, "step": 6837500 }, { "epoch": 33.88, "learning_rate": 3.306717101905937e-05, "loss": 2.2088, "step": 6838000 }, { "epoch": 33.88, "learning_rate": 3.3065932432633285e-05, "loss": 2.215, "step": 6838500 }, { "epoch": 33.88, "learning_rate": 3.30646938462072e-05, "loss": 2.2143, "step": 6839000 }, { "epoch": 33.89, "learning_rate": 3.306345525978112e-05, "loss": 2.2258, "step": 6839500 }, { "epoch": 33.89, "learning_rate": 3.3062216673355035e-05, "loss": 2.2073, "step": 6840000 }, { "epoch": 33.89, "learning_rate": 3.306097808692895e-05, "loss": 2.2141, "step": 6840500 }, { "epoch": 33.89, "learning_rate": 3.305973950050287e-05, "loss": 2.2215, "step": 6841000 }, { "epoch": 33.9, "learning_rate": 3.3058500914076786e-05, "loss": 2.2281, "step": 6841500 }, { "epoch": 33.9, "learning_rate": 3.30572623276507e-05, "loss": 2.1636, "step": 6842000 }, { "epoch": 33.9, "learning_rate": 3.305602374122462e-05, "loss": 2.2143, "step": 6842500 }, { "epoch": 33.9, "learning_rate": 3.305478515479854e-05, "loss": 2.2349, "step": 6843000 }, { "epoch": 33.91, "learning_rate": 3.30535490455453e-05, "loss": 2.2016, "step": 6843500 }, { "epoch": 33.91, "learning_rate": 3.305231293629207e-05, "loss": 2.1975, "step": 6844000 }, { "epoch": 33.91, "learning_rate": 3.3051074349865985e-05, "loss": 2.2012, "step": 6844500 }, { "epoch": 33.91, "learning_rate": 3.30498357634399e-05, "loss": 2.2201, "step": 6845000 }, { "epoch": 33.91, "learning_rate": 3.304859717701382e-05, "loss": 2.1905, "step": 6845500 }, { "epoch": 33.92, "learning_rate": 3.3047358590587735e-05, "loss": 2.2118, "step": 6846000 }, { "epoch": 33.92, "learning_rate": 3.304612000416165e-05, "loss": 2.2002, "step": 6846500 }, { "epoch": 33.92, "learning_rate": 3.304488141773557e-05, "loss": 2.2046, "step": 6847000 }, { "epoch": 33.92, "learning_rate": 3.3043642831309486e-05, "loss": 2.192, "step": 6847500 }, { "epoch": 33.93, "learning_rate": 3.304240672205625e-05, "loss": 2.2318, "step": 6848000 }, { "epoch": 33.93, "learning_rate": 3.3041168135630165e-05, "loss": 2.2128, "step": 6848500 }, { "epoch": 33.93, "learning_rate": 3.303992954920408e-05, "loss": 2.2345, "step": 6849000 }, { "epoch": 33.93, "learning_rate": 3.303869343995085e-05, "loss": 2.2106, "step": 6849500 }, { "epoch": 33.94, "learning_rate": 3.303745485352477e-05, "loss": 2.2147, "step": 6850000 }, { "epoch": 33.94, "learning_rate": 3.3036216267098685e-05, "loss": 2.2091, "step": 6850500 }, { "epoch": 33.94, "learning_rate": 3.30349776806726e-05, "loss": 2.1914, "step": 6851000 }, { "epoch": 33.94, "learning_rate": 3.303373909424652e-05, "loss": 2.2098, "step": 6851500 }, { "epoch": 33.95, "learning_rate": 3.3032500507820436e-05, "loss": 2.1919, "step": 6852000 }, { "epoch": 33.95, "learning_rate": 3.303126192139435e-05, "loss": 2.1861, "step": 6852500 }, { "epoch": 33.95, "learning_rate": 3.303002828931397e-05, "loss": 2.202, "step": 6853000 }, { "epoch": 33.95, "learning_rate": 3.302878970288789e-05, "loss": 2.2175, "step": 6853500 }, { "epoch": 33.96, "learning_rate": 3.302755111646181e-05, "loss": 2.2061, "step": 6854000 }, { "epoch": 33.96, "learning_rate": 3.3026312530035724e-05, "loss": 2.1985, "step": 6854500 }, { "epoch": 33.96, "learning_rate": 3.302507394360964e-05, "loss": 2.2186, "step": 6855000 }, { "epoch": 33.96, "learning_rate": 3.302383783435641e-05, "loss": 2.2056, "step": 6855500 }, { "epoch": 33.97, "learning_rate": 3.302259924793033e-05, "loss": 2.2069, "step": 6856000 }, { "epoch": 33.97, "learning_rate": 3.3021360661504244e-05, "loss": 2.2249, "step": 6856500 }, { "epoch": 33.97, "learning_rate": 3.302012207507816e-05, "loss": 2.2189, "step": 6857000 }, { "epoch": 33.97, "learning_rate": 3.301888348865208e-05, "loss": 2.2219, "step": 6857500 }, { "epoch": 33.98, "learning_rate": 3.3017644902225994e-05, "loss": 2.1775, "step": 6858000 }, { "epoch": 33.98, "learning_rate": 3.301640631579991e-05, "loss": 2.1861, "step": 6858500 }, { "epoch": 33.98, "learning_rate": 3.301516772937382e-05, "loss": 2.2028, "step": 6859000 }, { "epoch": 33.98, "learning_rate": 3.301393162012059e-05, "loss": 2.1903, "step": 6859500 }, { "epoch": 33.99, "learning_rate": 3.301269303369451e-05, "loss": 2.2199, "step": 6860000 }, { "epoch": 33.99, "learning_rate": 3.3011454447268424e-05, "loss": 2.2266, "step": 6860500 }, { "epoch": 33.99, "learning_rate": 3.301021586084234e-05, "loss": 2.223, "step": 6861000 }, { "epoch": 33.99, "learning_rate": 3.300897975158911e-05, "loss": 2.2017, "step": 6861500 }, { "epoch": 34.0, "learning_rate": 3.300774116516303e-05, "loss": 2.2275, "step": 6862000 }, { "epoch": 34.0, "learning_rate": 3.3006502578736944e-05, "loss": 2.1995, "step": 6862500 }, { "epoch": 34.0, "eval_accuracy": 0.6615834582009188, "eval_accuracy_mlm": 0.6176810786763514, "eval_accuracy_nsp": 0.8686612357280975, "eval_loss": 2.2982945442199707, "eval_runtime": 145.6589, "eval_samples_per_second": 1750.384, "eval_steps_per_second": 72.938, "step": 6862662 }, { "epoch": 34.0, "learning_rate": 3.300526399231086e-05, "loss": 2.234, "step": 6863000 }, { "epoch": 34.0, "learning_rate": 3.300402540588478e-05, "loss": 2.1825, "step": 6863500 }, { "epoch": 34.01, "learning_rate": 3.3002786819458695e-05, "loss": 2.1727, "step": 6864000 }, { "epoch": 34.01, "learning_rate": 3.300154823303261e-05, "loss": 2.1697, "step": 6864500 }, { "epoch": 34.01, "learning_rate": 3.300030964660652e-05, "loss": 2.1944, "step": 6865000 }, { "epoch": 34.01, "learning_rate": 3.299907106018044e-05, "loss": 2.1777, "step": 6865500 }, { "epoch": 34.02, "learning_rate": 3.2997832473754355e-05, "loss": 2.1701, "step": 6866000 }, { "epoch": 34.02, "learning_rate": 3.299659388732827e-05, "loss": 2.2099, "step": 6866500 }, { "epoch": 34.02, "learning_rate": 3.299536025524789e-05, "loss": 2.1635, "step": 6867000 }, { "epoch": 34.02, "learning_rate": 3.299412166882181e-05, "loss": 2.173, "step": 6867500 }, { "epoch": 34.03, "learning_rate": 3.299288308239573e-05, "loss": 2.1623, "step": 6868000 }, { "epoch": 34.03, "learning_rate": 3.2991644495969644e-05, "loss": 2.1848, "step": 6868500 }, { "epoch": 34.03, "learning_rate": 3.299040590954356e-05, "loss": 2.1915, "step": 6869000 }, { "epoch": 34.03, "learning_rate": 3.298916732311748e-05, "loss": 2.193, "step": 6869500 }, { "epoch": 34.04, "learning_rate": 3.2987928736691395e-05, "loss": 2.2049, "step": 6870000 }, { "epoch": 34.04, "learning_rate": 3.298669015026531e-05, "loss": 2.1921, "step": 6870500 }, { "epoch": 34.04, "learning_rate": 3.298545156383923e-05, "loss": 2.1759, "step": 6871000 }, { "epoch": 34.04, "learning_rate": 3.298421297741314e-05, "loss": 2.1883, "step": 6871500 }, { "epoch": 34.05, "learning_rate": 3.298297686815991e-05, "loss": 2.1694, "step": 6872000 }, { "epoch": 34.05, "learning_rate": 3.2981738281733824e-05, "loss": 2.1732, "step": 6872500 }, { "epoch": 34.05, "learning_rate": 3.298049969530774e-05, "loss": 2.1889, "step": 6873000 }, { "epoch": 34.05, "learning_rate": 3.297926110888166e-05, "loss": 2.1705, "step": 6873500 }, { "epoch": 34.06, "learning_rate": 3.2978022522455575e-05, "loss": 2.1701, "step": 6874000 }, { "epoch": 34.06, "learning_rate": 3.2976786413202344e-05, "loss": 2.1949, "step": 6874500 }, { "epoch": 34.06, "learning_rate": 3.297554782677626e-05, "loss": 2.1956, "step": 6875000 }, { "epoch": 34.06, "learning_rate": 3.297430924035018e-05, "loss": 2.1968, "step": 6875500 }, { "epoch": 34.07, "learning_rate": 3.2973070653924095e-05, "loss": 2.1826, "step": 6876000 }, { "epoch": 34.07, "learning_rate": 3.297183206749801e-05, "loss": 2.169, "step": 6876500 }, { "epoch": 34.07, "learning_rate": 3.2970595958244774e-05, "loss": 2.1826, "step": 6877000 }, { "epoch": 34.07, "learning_rate": 3.296935737181869e-05, "loss": 2.1644, "step": 6877500 }, { "epoch": 34.08, "learning_rate": 3.296811878539261e-05, "loss": 2.1973, "step": 6878000 }, { "epoch": 34.08, "learning_rate": 3.2966880198966525e-05, "loss": 2.1765, "step": 6878500 }, { "epoch": 34.08, "learning_rate": 3.296564161254044e-05, "loss": 2.1869, "step": 6879000 }, { "epoch": 34.08, "learning_rate": 3.296440302611436e-05, "loss": 2.1799, "step": 6879500 }, { "epoch": 34.09, "learning_rate": 3.2963164439688275e-05, "loss": 2.1789, "step": 6880000 }, { "epoch": 34.09, "learning_rate": 3.2961925853262185e-05, "loss": 2.1891, "step": 6880500 }, { "epoch": 34.09, "learning_rate": 3.29606872668361e-05, "loss": 2.2122, "step": 6881000 }, { "epoch": 34.09, "learning_rate": 3.295944868041002e-05, "loss": 2.163, "step": 6881500 }, { "epoch": 34.1, "learning_rate": 3.2958212571156795e-05, "loss": 2.1841, "step": 6882000 }, { "epoch": 34.1, "learning_rate": 3.2956976461903564e-05, "loss": 2.188, "step": 6882500 }, { "epoch": 34.1, "learning_rate": 3.2955740352650326e-05, "loss": 2.1696, "step": 6883000 }, { "epoch": 34.1, "learning_rate": 3.295450176622424e-05, "loss": 2.1982, "step": 6883500 }, { "epoch": 34.11, "learning_rate": 3.295326317979816e-05, "loss": 2.165, "step": 6884000 }, { "epoch": 34.11, "learning_rate": 3.2952024593372077e-05, "loss": 2.1517, "step": 6884500 }, { "epoch": 34.11, "learning_rate": 3.2950786006945993e-05, "loss": 2.1447, "step": 6885000 }, { "epoch": 34.11, "learning_rate": 3.294954742051991e-05, "loss": 2.1803, "step": 6885500 }, { "epoch": 34.12, "learning_rate": 3.294830883409383e-05, "loss": 2.1727, "step": 6886000 }, { "epoch": 34.12, "learning_rate": 3.2947070247667744e-05, "loss": 2.1894, "step": 6886500 }, { "epoch": 34.12, "learning_rate": 3.2945836615587365e-05, "loss": 2.1625, "step": 6887000 }, { "epoch": 34.12, "learning_rate": 3.2944598029161275e-05, "loss": 2.1874, "step": 6887500 }, { "epoch": 34.13, "learning_rate": 3.294335944273519e-05, "loss": 2.1846, "step": 6888000 }, { "epoch": 34.13, "learning_rate": 3.294212333348197e-05, "loss": 2.1903, "step": 6888500 }, { "epoch": 34.13, "learning_rate": 3.2940884747055885e-05, "loss": 2.1673, "step": 6889000 }, { "epoch": 34.13, "learning_rate": 3.29396461606298e-05, "loss": 2.1791, "step": 6889500 }, { "epoch": 34.14, "learning_rate": 3.293840757420372e-05, "loss": 2.1863, "step": 6890000 }, { "epoch": 34.14, "learning_rate": 3.2937168987777635e-05, "loss": 2.1819, "step": 6890500 }, { "epoch": 34.14, "learning_rate": 3.2935930401351545e-05, "loss": 2.2075, "step": 6891000 }, { "epoch": 34.14, "learning_rate": 3.293469181492546e-05, "loss": 2.2059, "step": 6891500 }, { "epoch": 34.15, "learning_rate": 3.293345322849938e-05, "loss": 2.1733, "step": 6892000 }, { "epoch": 34.15, "learning_rate": 3.2932214642073296e-05, "loss": 2.1764, "step": 6892500 }, { "epoch": 34.15, "learning_rate": 3.293097605564721e-05, "loss": 2.2026, "step": 6893000 }, { "epoch": 34.15, "learning_rate": 3.292973746922113e-05, "loss": 2.1988, "step": 6893500 }, { "epoch": 34.16, "learning_rate": 3.292849888279505e-05, "loss": 2.1872, "step": 6894000 }, { "epoch": 34.16, "learning_rate": 3.2927260296368964e-05, "loss": 2.1832, "step": 6894500 }, { "epoch": 34.16, "learning_rate": 3.292602170994288e-05, "loss": 2.1525, "step": 6895000 }, { "epoch": 34.16, "learning_rate": 3.292478560068964e-05, "loss": 2.1841, "step": 6895500 }, { "epoch": 34.17, "learning_rate": 3.292354701426356e-05, "loss": 2.1954, "step": 6896000 }, { "epoch": 34.17, "learning_rate": 3.292230842783748e-05, "loss": 2.2155, "step": 6896500 }, { "epoch": 34.17, "learning_rate": 3.2921069841411394e-05, "loss": 2.1929, "step": 6897000 }, { "epoch": 34.17, "learning_rate": 3.291983125498531e-05, "loss": 2.1928, "step": 6897500 }, { "epoch": 34.18, "learning_rate": 3.291859266855923e-05, "loss": 2.2045, "step": 6898000 }, { "epoch": 34.18, "learning_rate": 3.2917356559305996e-05, "loss": 2.1672, "step": 6898500 }, { "epoch": 34.18, "learning_rate": 3.291611797287991e-05, "loss": 2.1694, "step": 6899000 }, { "epoch": 34.18, "learning_rate": 3.291487938645383e-05, "loss": 2.19, "step": 6899500 }, { "epoch": 34.18, "learning_rate": 3.291364080002775e-05, "loss": 2.1734, "step": 6900000 }, { "epoch": 34.19, "learning_rate": 3.291240469077451e-05, "loss": 2.1893, "step": 6900500 }, { "epoch": 34.19, "learning_rate": 3.2911166104348426e-05, "loss": 2.16, "step": 6901000 }, { "epoch": 34.19, "learning_rate": 3.290992751792234e-05, "loss": 2.1769, "step": 6901500 }, { "epoch": 34.19, "learning_rate": 3.290868893149626e-05, "loss": 2.1874, "step": 6902000 }, { "epoch": 34.2, "learning_rate": 3.290745034507018e-05, "loss": 2.1739, "step": 6902500 }, { "epoch": 34.2, "learning_rate": 3.2906211758644094e-05, "loss": 2.1859, "step": 6903000 }, { "epoch": 34.2, "learning_rate": 3.290497317221801e-05, "loss": 2.2051, "step": 6903500 }, { "epoch": 34.2, "learning_rate": 3.290373458579193e-05, "loss": 2.1717, "step": 6904000 }, { "epoch": 34.21, "learning_rate": 3.2902495999365845e-05, "loss": 2.1797, "step": 6904500 }, { "epoch": 34.21, "learning_rate": 3.2901262367285465e-05, "loss": 2.1786, "step": 6905000 }, { "epoch": 34.21, "learning_rate": 3.290002378085938e-05, "loss": 2.1727, "step": 6905500 }, { "epoch": 34.21, "learning_rate": 3.28987851944333e-05, "loss": 2.1891, "step": 6906000 }, { "epoch": 34.22, "learning_rate": 3.289754660800721e-05, "loss": 2.1985, "step": 6906500 }, { "epoch": 34.22, "learning_rate": 3.2896308021581126e-05, "loss": 2.1938, "step": 6907000 }, { "epoch": 34.22, "learning_rate": 3.28950719123279e-05, "loss": 2.1897, "step": 6907500 }, { "epoch": 34.22, "learning_rate": 3.289383332590182e-05, "loss": 2.166, "step": 6908000 }, { "epoch": 34.23, "learning_rate": 3.2892594739475736e-05, "loss": 2.1857, "step": 6908500 }, { "epoch": 34.23, "learning_rate": 3.2891358630222504e-05, "loss": 2.1884, "step": 6909000 }, { "epoch": 34.23, "learning_rate": 3.289012004379642e-05, "loss": 2.1808, "step": 6909500 }, { "epoch": 34.23, "learning_rate": 3.288888145737034e-05, "loss": 2.1817, "step": 6910000 }, { "epoch": 34.24, "learning_rate": 3.2887642870944255e-05, "loss": 2.209, "step": 6910500 }, { "epoch": 34.24, "learning_rate": 3.2886404284518165e-05, "loss": 2.1976, "step": 6911000 }, { "epoch": 34.24, "learning_rate": 3.288516569809208e-05, "loss": 2.2096, "step": 6911500 }, { "epoch": 34.24, "learning_rate": 3.2883927111666e-05, "loss": 2.1899, "step": 6912000 }, { "epoch": 34.25, "learning_rate": 3.2882688525239916e-05, "loss": 2.1939, "step": 6912500 }, { "epoch": 34.25, "learning_rate": 3.2881449938813826e-05, "loss": 2.1881, "step": 6913000 }, { "epoch": 34.25, "learning_rate": 3.288021135238774e-05, "loss": 2.2121, "step": 6913500 }, { "epoch": 34.25, "learning_rate": 3.287897276596166e-05, "loss": 2.1759, "step": 6914000 }, { "epoch": 34.26, "learning_rate": 3.287773417953558e-05, "loss": 2.1824, "step": 6914500 }, { "epoch": 34.26, "learning_rate": 3.287649807028235e-05, "loss": 2.1901, "step": 6915000 }, { "epoch": 34.26, "learning_rate": 3.287525948385627e-05, "loss": 2.1768, "step": 6915500 }, { "epoch": 34.26, "learning_rate": 3.287402089743018e-05, "loss": 2.1994, "step": 6916000 }, { "epoch": 34.27, "learning_rate": 3.28727823110041e-05, "loss": 2.1775, "step": 6916500 }, { "epoch": 34.27, "learning_rate": 3.287154867892372e-05, "loss": 2.2061, "step": 6917000 }, { "epoch": 34.27, "learning_rate": 3.2870310092497634e-05, "loss": 2.1988, "step": 6917500 }, { "epoch": 34.27, "learning_rate": 3.286907150607155e-05, "loss": 2.174, "step": 6918000 }, { "epoch": 34.28, "learning_rate": 3.286783291964547e-05, "loss": 2.1943, "step": 6918500 }, { "epoch": 34.28, "learning_rate": 3.2866594333219385e-05, "loss": 2.2035, "step": 6919000 }, { "epoch": 34.28, "learning_rate": 3.2865358223966154e-05, "loss": 2.1593, "step": 6919500 }, { "epoch": 34.28, "learning_rate": 3.286411963754007e-05, "loss": 2.1835, "step": 6920000 }, { "epoch": 34.29, "learning_rate": 3.286288105111399e-05, "loss": 2.2108, "step": 6920500 }, { "epoch": 34.29, "learning_rate": 3.2861642464687905e-05, "loss": 2.2148, "step": 6921000 }, { "epoch": 34.29, "learning_rate": 3.286040635543467e-05, "loss": 2.193, "step": 6921500 }, { "epoch": 34.29, "learning_rate": 3.2859167769008584e-05, "loss": 2.2132, "step": 6922000 }, { "epoch": 34.3, "learning_rate": 3.28579291825825e-05, "loss": 2.1787, "step": 6922500 }, { "epoch": 34.3, "learning_rate": 3.285669555050213e-05, "loss": 2.1977, "step": 6923000 }, { "epoch": 34.3, "learning_rate": 3.2855456964076045e-05, "loss": 2.1799, "step": 6923500 }, { "epoch": 34.3, "learning_rate": 3.285421837764996e-05, "loss": 2.186, "step": 6924000 }, { "epoch": 34.31, "learning_rate": 3.285297979122388e-05, "loss": 2.1851, "step": 6924500 }, { "epoch": 34.31, "learning_rate": 3.285174120479779e-05, "loss": 2.1996, "step": 6925000 }, { "epoch": 34.31, "learning_rate": 3.285050509554456e-05, "loss": 2.1971, "step": 6925500 }, { "epoch": 34.31, "learning_rate": 3.2849266509118475e-05, "loss": 2.1959, "step": 6926000 }, { "epoch": 34.32, "learning_rate": 3.284802792269239e-05, "loss": 2.1869, "step": 6926500 }, { "epoch": 34.32, "learning_rate": 3.284678933626631e-05, "loss": 2.196, "step": 6927000 }, { "epoch": 34.32, "learning_rate": 3.2845550749840226e-05, "loss": 2.1712, "step": 6927500 }, { "epoch": 34.32, "learning_rate": 3.284431216341414e-05, "loss": 2.1767, "step": 6928000 }, { "epoch": 34.33, "learning_rate": 3.284307357698806e-05, "loss": 2.1987, "step": 6928500 }, { "epoch": 34.33, "learning_rate": 3.284183746773483e-05, "loss": 2.2139, "step": 6929000 }, { "epoch": 34.33, "learning_rate": 3.2840598881308745e-05, "loss": 2.1773, "step": 6929500 }, { "epoch": 34.33, "learning_rate": 3.283936029488266e-05, "loss": 2.1861, "step": 6930000 }, { "epoch": 34.34, "learning_rate": 3.283812170845658e-05, "loss": 2.1925, "step": 6930500 }, { "epoch": 34.34, "learning_rate": 3.2836883122030496e-05, "loss": 2.1843, "step": 6931000 }, { "epoch": 34.34, "learning_rate": 3.283564453560441e-05, "loss": 2.1896, "step": 6931500 }, { "epoch": 34.34, "learning_rate": 3.283440594917832e-05, "loss": 2.2057, "step": 6932000 }, { "epoch": 34.35, "learning_rate": 3.283316983992509e-05, "loss": 2.1789, "step": 6932500 }, { "epoch": 34.35, "learning_rate": 3.283193125349901e-05, "loss": 2.1865, "step": 6933000 }, { "epoch": 34.35, "learning_rate": 3.2830692667072926e-05, "loss": 2.18, "step": 6933500 }, { "epoch": 34.35, "learning_rate": 3.282945408064684e-05, "loss": 2.195, "step": 6934000 }, { "epoch": 34.36, "learning_rate": 3.282821549422076e-05, "loss": 2.2251, "step": 6934500 }, { "epoch": 34.36, "learning_rate": 3.282697690779467e-05, "loss": 2.176, "step": 6935000 }, { "epoch": 34.36, "learning_rate": 3.2825738321368587e-05, "loss": 2.1886, "step": 6935500 }, { "epoch": 34.36, "learning_rate": 3.2824499734942504e-05, "loss": 2.1885, "step": 6936000 }, { "epoch": 34.37, "learning_rate": 3.282326114851642e-05, "loss": 2.2061, "step": 6936500 }, { "epoch": 34.37, "learning_rate": 3.282202256209034e-05, "loss": 2.2056, "step": 6937000 }, { "epoch": 34.37, "learning_rate": 3.282078645283711e-05, "loss": 2.1923, "step": 6937500 }, { "epoch": 34.37, "learning_rate": 3.2819550343583875e-05, "loss": 2.1916, "step": 6938000 }, { "epoch": 34.38, "learning_rate": 3.281831175715779e-05, "loss": 2.1716, "step": 6938500 }, { "epoch": 34.38, "learning_rate": 3.281707317073171e-05, "loss": 2.1798, "step": 6939000 }, { "epoch": 34.38, "learning_rate": 3.2815834584305626e-05, "loss": 2.1916, "step": 6939500 }, { "epoch": 34.38, "learning_rate": 3.281459599787954e-05, "loss": 2.1689, "step": 6940000 }, { "epoch": 34.39, "learning_rate": 3.281335741145346e-05, "loss": 2.227, "step": 6940500 }, { "epoch": 34.39, "learning_rate": 3.281212377937308e-05, "loss": 2.1959, "step": 6941000 }, { "epoch": 34.39, "learning_rate": 3.2810885192947e-05, "loss": 2.2158, "step": 6941500 }, { "epoch": 34.39, "learning_rate": 3.280964660652091e-05, "loss": 2.2076, "step": 6942000 }, { "epoch": 34.4, "learning_rate": 3.2808408020094824e-05, "loss": 2.2063, "step": 6942500 }, { "epoch": 34.4, "learning_rate": 3.280716943366874e-05, "loss": 2.2234, "step": 6943000 }, { "epoch": 34.4, "learning_rate": 3.280593084724266e-05, "loss": 2.2024, "step": 6943500 }, { "epoch": 34.4, "learning_rate": 3.2804692260816575e-05, "loss": 2.1942, "step": 6944000 }, { "epoch": 34.41, "learning_rate": 3.280345367439049e-05, "loss": 2.1817, "step": 6944500 }, { "epoch": 34.41, "learning_rate": 3.280221508796441e-05, "loss": 2.2017, "step": 6945000 }, { "epoch": 34.41, "learning_rate": 3.2800976501538326e-05, "loss": 2.198, "step": 6945500 }, { "epoch": 34.41, "learning_rate": 3.279973791511224e-05, "loss": 2.1933, "step": 6946000 }, { "epoch": 34.42, "learning_rate": 3.279849932868616e-05, "loss": 2.2008, "step": 6946500 }, { "epoch": 34.42, "learning_rate": 3.279726074226008e-05, "loss": 2.2, "step": 6947000 }, { "epoch": 34.42, "learning_rate": 3.279602215583399e-05, "loss": 2.174, "step": 6947500 }, { "epoch": 34.42, "learning_rate": 3.2794783569407904e-05, "loss": 2.1931, "step": 6948000 }, { "epoch": 34.43, "learning_rate": 3.279354746015468e-05, "loss": 2.1865, "step": 6948500 }, { "epoch": 34.43, "learning_rate": 3.2792308873728596e-05, "loss": 2.2144, "step": 6949000 }, { "epoch": 34.43, "learning_rate": 3.279107028730251e-05, "loss": 2.2073, "step": 6949500 }, { "epoch": 34.43, "learning_rate": 3.278983170087643e-05, "loss": 2.2077, "step": 6950000 }, { "epoch": 34.44, "learning_rate": 3.278859311445035e-05, "loss": 2.201, "step": 6950500 }, { "epoch": 34.44, "learning_rate": 3.278735700519711e-05, "loss": 2.1901, "step": 6951000 }, { "epoch": 34.44, "learning_rate": 3.2786118418771026e-05, "loss": 2.202, "step": 6951500 }, { "epoch": 34.44, "learning_rate": 3.278487983234494e-05, "loss": 2.184, "step": 6952000 }, { "epoch": 34.45, "learning_rate": 3.278364124591886e-05, "loss": 2.191, "step": 6952500 }, { "epoch": 34.45, "learning_rate": 3.278240513666563e-05, "loss": 2.2036, "step": 6953000 }, { "epoch": 34.45, "learning_rate": 3.2781166550239546e-05, "loss": 2.1638, "step": 6953500 }, { "epoch": 34.45, "learning_rate": 3.277992796381346e-05, "loss": 2.1686, "step": 6954000 }, { "epoch": 34.45, "learning_rate": 3.277868937738738e-05, "loss": 2.2152, "step": 6954500 }, { "epoch": 34.46, "learning_rate": 3.2777450790961296e-05, "loss": 2.196, "step": 6955000 }, { "epoch": 34.46, "learning_rate": 3.277621220453521e-05, "loss": 2.2021, "step": 6955500 }, { "epoch": 34.46, "learning_rate": 3.277497361810913e-05, "loss": 2.1876, "step": 6956000 }, { "epoch": 34.46, "learning_rate": 3.277373503168305e-05, "loss": 2.2034, "step": 6956500 }, { "epoch": 34.47, "learning_rate": 3.277249892242981e-05, "loss": 2.1923, "step": 6957000 }, { "epoch": 34.47, "learning_rate": 3.2771260336003726e-05, "loss": 2.2079, "step": 6957500 }, { "epoch": 34.47, "learning_rate": 3.277002174957764e-05, "loss": 2.1761, "step": 6958000 }, { "epoch": 34.47, "learning_rate": 3.276878316315156e-05, "loss": 2.1964, "step": 6958500 }, { "epoch": 34.48, "learning_rate": 3.276754457672548e-05, "loss": 2.1802, "step": 6959000 }, { "epoch": 34.48, "learning_rate": 3.2766305990299394e-05, "loss": 2.196, "step": 6959500 }, { "epoch": 34.48, "learning_rate": 3.2765067403873304e-05, "loss": 2.2058, "step": 6960000 }, { "epoch": 34.48, "learning_rate": 3.276383129462008e-05, "loss": 2.1698, "step": 6960500 }, { "epoch": 34.49, "learning_rate": 3.276259518536685e-05, "loss": 2.1963, "step": 6961000 }, { "epoch": 34.49, "learning_rate": 3.2761356598940765e-05, "loss": 2.1675, "step": 6961500 }, { "epoch": 34.49, "learning_rate": 3.276011801251468e-05, "loss": 2.1949, "step": 6962000 }, { "epoch": 34.49, "learning_rate": 3.275887942608859e-05, "loss": 2.2113, "step": 6962500 }, { "epoch": 34.5, "learning_rate": 3.275764083966251e-05, "loss": 2.2099, "step": 6963000 }, { "epoch": 34.5, "learning_rate": 3.2756402253236426e-05, "loss": 2.2009, "step": 6963500 }, { "epoch": 34.5, "learning_rate": 3.275516366681034e-05, "loss": 2.1868, "step": 6964000 }, { "epoch": 34.5, "learning_rate": 3.275392508038426e-05, "loss": 2.2024, "step": 6964500 }, { "epoch": 34.51, "learning_rate": 3.275268649395818e-05, "loss": 2.1997, "step": 6965000 }, { "epoch": 34.51, "learning_rate": 3.2751447907532094e-05, "loss": 2.1985, "step": 6965500 }, { "epoch": 34.51, "learning_rate": 3.275021179827886e-05, "loss": 2.1987, "step": 6966000 }, { "epoch": 34.51, "learning_rate": 3.274897321185278e-05, "loss": 2.2142, "step": 6966500 }, { "epoch": 34.52, "learning_rate": 3.27477346254267e-05, "loss": 2.2064, "step": 6967000 }, { "epoch": 34.52, "learning_rate": 3.2746496039000614e-05, "loss": 2.1865, "step": 6967500 }, { "epoch": 34.52, "learning_rate": 3.274525745257453e-05, "loss": 2.1711, "step": 6968000 }, { "epoch": 34.52, "learning_rate": 3.274401886614845e-05, "loss": 2.2059, "step": 6968500 }, { "epoch": 34.53, "learning_rate": 3.2742780279722364e-05, "loss": 2.199, "step": 6969000 }, { "epoch": 34.53, "learning_rate": 3.2741544170469126e-05, "loss": 2.2009, "step": 6969500 }, { "epoch": 34.53, "learning_rate": 3.274030558404304e-05, "loss": 2.1918, "step": 6970000 }, { "epoch": 34.53, "learning_rate": 3.273906699761696e-05, "loss": 2.1988, "step": 6970500 }, { "epoch": 34.54, "learning_rate": 3.273782841119088e-05, "loss": 2.2398, "step": 6971000 }, { "epoch": 34.54, "learning_rate": 3.2736592301937646e-05, "loss": 2.2103, "step": 6971500 }, { "epoch": 34.54, "learning_rate": 3.273535371551156e-05, "loss": 2.1703, "step": 6972000 }, { "epoch": 34.54, "learning_rate": 3.273411512908548e-05, "loss": 2.189, "step": 6972500 }, { "epoch": 34.55, "learning_rate": 3.27328765426594e-05, "loss": 2.21, "step": 6973000 }, { "epoch": 34.55, "learning_rate": 3.2731637956233314e-05, "loss": 2.201, "step": 6973500 }, { "epoch": 34.55, "learning_rate": 3.273040184698008e-05, "loss": 2.2171, "step": 6974000 }, { "epoch": 34.55, "learning_rate": 3.2729163260554e-05, "loss": 2.2048, "step": 6974500 }, { "epoch": 34.56, "learning_rate": 3.2727924674127916e-05, "loss": 2.2046, "step": 6975000 }, { "epoch": 34.56, "learning_rate": 3.272668608770183e-05, "loss": 2.1846, "step": 6975500 }, { "epoch": 34.56, "learning_rate": 3.2725447501275743e-05, "loss": 2.1931, "step": 6976000 }, { "epoch": 34.56, "learning_rate": 3.2724213869195364e-05, "loss": 2.1719, "step": 6976500 }, { "epoch": 34.57, "learning_rate": 3.272297528276928e-05, "loss": 2.181, "step": 6977000 }, { "epoch": 34.57, "learning_rate": 3.27217366963432e-05, "loss": 2.18, "step": 6977500 }, { "epoch": 34.57, "learning_rate": 3.2720498109917115e-05, "loss": 2.2069, "step": 6978000 }, { "epoch": 34.57, "learning_rate": 3.271925952349103e-05, "loss": 2.2135, "step": 6978500 }, { "epoch": 34.58, "learning_rate": 3.271802093706495e-05, "loss": 2.2009, "step": 6979000 }, { "epoch": 34.58, "learning_rate": 3.2716782350638866e-05, "loss": 2.1917, "step": 6979500 }, { "epoch": 34.58, "learning_rate": 3.271554376421278e-05, "loss": 2.2079, "step": 6980000 }, { "epoch": 34.58, "learning_rate": 3.27143051777867e-05, "loss": 2.182, "step": 6980500 }, { "epoch": 34.59, "learning_rate": 3.2713066591360617e-05, "loss": 2.2022, "step": 6981000 }, { "epoch": 34.59, "learning_rate": 3.271183048210738e-05, "loss": 2.199, "step": 6981500 }, { "epoch": 34.59, "learning_rate": 3.2710591895681295e-05, "loss": 2.1813, "step": 6982000 }, { "epoch": 34.59, "learning_rate": 3.270935330925521e-05, "loss": 2.1995, "step": 6982500 }, { "epoch": 34.6, "learning_rate": 3.270811472282913e-05, "loss": 2.2181, "step": 6983000 }, { "epoch": 34.6, "learning_rate": 3.2706876136403046e-05, "loss": 2.2099, "step": 6983500 }, { "epoch": 34.6, "learning_rate": 3.2705640027149815e-05, "loss": 2.1953, "step": 6984000 }, { "epoch": 34.6, "learning_rate": 3.270440144072373e-05, "loss": 2.2185, "step": 6984500 }, { "epoch": 34.61, "learning_rate": 3.270316285429765e-05, "loss": 2.1995, "step": 6985000 }, { "epoch": 34.61, "learning_rate": 3.2701924267871566e-05, "loss": 2.1897, "step": 6985500 }, { "epoch": 34.61, "learning_rate": 3.270068568144548e-05, "loss": 2.2206, "step": 6986000 }, { "epoch": 34.61, "learning_rate": 3.2699449572192245e-05, "loss": 2.1854, "step": 6986500 }, { "epoch": 34.62, "learning_rate": 3.269821098576616e-05, "loss": 2.2143, "step": 6987000 }, { "epoch": 34.62, "learning_rate": 3.269697239934008e-05, "loss": 2.196, "step": 6987500 }, { "epoch": 34.62, "learning_rate": 3.2695733812913996e-05, "loss": 2.2031, "step": 6988000 }, { "epoch": 34.62, "learning_rate": 3.269449522648791e-05, "loss": 2.1955, "step": 6988500 }, { "epoch": 34.63, "learning_rate": 3.269325911723468e-05, "loss": 2.1923, "step": 6989000 }, { "epoch": 34.63, "learning_rate": 3.26920205308086e-05, "loss": 2.2108, "step": 6989500 }, { "epoch": 34.63, "learning_rate": 3.2690781944382515e-05, "loss": 2.2116, "step": 6990000 }, { "epoch": 34.63, "learning_rate": 3.268954335795643e-05, "loss": 2.2157, "step": 6990500 }, { "epoch": 34.64, "learning_rate": 3.268830477153035e-05, "loss": 2.1915, "step": 6991000 }, { "epoch": 34.64, "learning_rate": 3.2687066185104266e-05, "loss": 2.2114, "step": 6991500 }, { "epoch": 34.64, "learning_rate": 3.268582759867818e-05, "loss": 2.1786, "step": 6992000 }, { "epoch": 34.64, "learning_rate": 3.26845890122521e-05, "loss": 2.1929, "step": 6992500 }, { "epoch": 34.65, "learning_rate": 3.268335042582602e-05, "loss": 2.1735, "step": 6993000 }, { "epoch": 34.65, "learning_rate": 3.2682111839399934e-05, "loss": 2.205, "step": 6993500 }, { "epoch": 34.65, "learning_rate": 3.268087325297385e-05, "loss": 2.1987, "step": 6994000 }, { "epoch": 34.65, "learning_rate": 3.267963466654777e-05, "loss": 2.1879, "step": 6994500 }, { "epoch": 34.66, "learning_rate": 3.267839855729453e-05, "loss": 2.2025, "step": 6995000 }, { "epoch": 34.66, "learning_rate": 3.2677159970868447e-05, "loss": 2.1804, "step": 6995500 }, { "epoch": 34.66, "learning_rate": 3.2675926338788074e-05, "loss": 2.2173, "step": 6996000 }, { "epoch": 34.66, "learning_rate": 3.267468775236199e-05, "loss": 2.1693, "step": 6996500 }, { "epoch": 34.67, "learning_rate": 3.26734491659359e-05, "loss": 2.1852, "step": 6997000 }, { "epoch": 34.67, "learning_rate": 3.267221057950982e-05, "loss": 2.2025, "step": 6997500 }, { "epoch": 34.67, "learning_rate": 3.2670971993083735e-05, "loss": 2.1977, "step": 6998000 }, { "epoch": 34.67, "learning_rate": 3.266973340665765e-05, "loss": 2.1816, "step": 6998500 }, { "epoch": 34.68, "learning_rate": 3.266849482023157e-05, "loss": 2.2068, "step": 6999000 }, { "epoch": 34.68, "learning_rate": 3.266725623380548e-05, "loss": 2.1962, "step": 6999500 }, { "epoch": 34.68, "learning_rate": 3.2666017647379396e-05, "loss": 2.2068, "step": 7000000 }, { "epoch": 34.68, "learning_rate": 3.266478153812617e-05, "loss": 2.2036, "step": 7000500 }, { "epoch": 34.69, "learning_rate": 3.266354295170009e-05, "loss": 2.2308, "step": 7001000 }, { "epoch": 34.69, "learning_rate": 3.2662304365274e-05, "loss": 2.1966, "step": 7001500 }, { "epoch": 34.69, "learning_rate": 3.2661065778847915e-05, "loss": 2.207, "step": 7002000 }, { "epoch": 34.69, "learning_rate": 3.265982719242183e-05, "loss": 2.1952, "step": 7002500 }, { "epoch": 34.7, "learning_rate": 3.265859108316861e-05, "loss": 2.1811, "step": 7003000 }, { "epoch": 34.7, "learning_rate": 3.265735497391537e-05, "loss": 2.1887, "step": 7003500 }, { "epoch": 34.7, "learning_rate": 3.265611638748929e-05, "loss": 2.2001, "step": 7004000 }, { "epoch": 34.7, "learning_rate": 3.2654877801063204e-05, "loss": 2.1961, "step": 7004500 }, { "epoch": 34.71, "learning_rate": 3.265363921463712e-05, "loss": 2.21, "step": 7005000 }, { "epoch": 34.71, "learning_rate": 3.265240062821104e-05, "loss": 2.1943, "step": 7005500 }, { "epoch": 34.71, "learning_rate": 3.2651162041784955e-05, "loss": 2.2111, "step": 7006000 }, { "epoch": 34.71, "learning_rate": 3.264992345535887e-05, "loss": 2.1912, "step": 7006500 }, { "epoch": 34.72, "learning_rate": 3.264868486893279e-05, "loss": 2.1894, "step": 7007000 }, { "epoch": 34.72, "learning_rate": 3.26474462825067e-05, "loss": 2.1854, "step": 7007500 }, { "epoch": 34.72, "learning_rate": 3.2646207696080616e-05, "loss": 2.2093, "step": 7008000 }, { "epoch": 34.72, "learning_rate": 3.264497158682739e-05, "loss": 2.2025, "step": 7008500 }, { "epoch": 34.73, "learning_rate": 3.264373300040131e-05, "loss": 2.1801, "step": 7009000 }, { "epoch": 34.73, "learning_rate": 3.2642494413975225e-05, "loss": 2.1916, "step": 7009500 }, { "epoch": 34.73, "learning_rate": 3.264125582754914e-05, "loss": 2.2205, "step": 7010000 }, { "epoch": 34.73, "learning_rate": 3.264001724112305e-05, "loss": 2.1982, "step": 7010500 }, { "epoch": 34.73, "learning_rate": 3.263877865469697e-05, "loss": 2.1991, "step": 7011000 }, { "epoch": 34.74, "learning_rate": 3.2637540068270886e-05, "loss": 2.2063, "step": 7011500 }, { "epoch": 34.74, "learning_rate": 3.26363014818448e-05, "loss": 2.1884, "step": 7012000 }, { "epoch": 34.74, "learning_rate": 3.263506289541872e-05, "loss": 2.1918, "step": 7012500 }, { "epoch": 34.74, "learning_rate": 3.263382430899263e-05, "loss": 2.2083, "step": 7013000 }, { "epoch": 34.75, "learning_rate": 3.2632588199739406e-05, "loss": 2.1801, "step": 7013500 }, { "epoch": 34.75, "learning_rate": 3.2631349613313316e-05, "loss": 2.1982, "step": 7014000 }, { "epoch": 34.75, "learning_rate": 3.263011102688723e-05, "loss": 2.1761, "step": 7014500 }, { "epoch": 34.75, "learning_rate": 3.262887491763401e-05, "loss": 2.2228, "step": 7015000 }, { "epoch": 34.76, "learning_rate": 3.2627636331207925e-05, "loss": 2.1739, "step": 7015500 }, { "epoch": 34.76, "learning_rate": 3.262639774478184e-05, "loss": 2.2176, "step": 7016000 }, { "epoch": 34.76, "learning_rate": 3.2625161635528604e-05, "loss": 2.1896, "step": 7016500 }, { "epoch": 34.76, "learning_rate": 3.262392304910252e-05, "loss": 2.1842, "step": 7017000 }, { "epoch": 34.77, "learning_rate": 3.262268446267644e-05, "loss": 2.2216, "step": 7017500 }, { "epoch": 34.77, "learning_rate": 3.2621445876250355e-05, "loss": 2.1878, "step": 7018000 }, { "epoch": 34.77, "learning_rate": 3.262020728982427e-05, "loss": 2.2043, "step": 7018500 }, { "epoch": 34.77, "learning_rate": 3.261896870339819e-05, "loss": 2.192, "step": 7019000 }, { "epoch": 34.78, "learning_rate": 3.2617730116972106e-05, "loss": 2.2035, "step": 7019500 }, { "epoch": 34.78, "learning_rate": 3.2616491530546016e-05, "loss": 2.2266, "step": 7020000 }, { "epoch": 34.78, "learning_rate": 3.261525294411993e-05, "loss": 2.1957, "step": 7020500 }, { "epoch": 34.78, "learning_rate": 3.261401435769385e-05, "loss": 2.2057, "step": 7021000 }, { "epoch": 34.79, "learning_rate": 3.2612778248440625e-05, "loss": 2.1826, "step": 7021500 }, { "epoch": 34.79, "learning_rate": 3.261153966201454e-05, "loss": 2.1922, "step": 7022000 }, { "epoch": 34.79, "learning_rate": 3.261030107558846e-05, "loss": 2.1976, "step": 7022500 }, { "epoch": 34.79, "learning_rate": 3.2609062489162376e-05, "loss": 2.2111, "step": 7023000 }, { "epoch": 34.8, "learning_rate": 3.2607823902736286e-05, "loss": 2.1923, "step": 7023500 }, { "epoch": 34.8, "learning_rate": 3.2606587793483055e-05, "loss": 2.2014, "step": 7024000 }, { "epoch": 34.8, "learning_rate": 3.260534920705697e-05, "loss": 2.1941, "step": 7024500 }, { "epoch": 34.8, "learning_rate": 3.260411062063089e-05, "loss": 2.1953, "step": 7025000 }, { "epoch": 34.81, "learning_rate": 3.2602872034204806e-05, "loss": 2.213, "step": 7025500 }, { "epoch": 34.81, "learning_rate": 3.2601635924951575e-05, "loss": 2.1574, "step": 7026000 }, { "epoch": 34.81, "learning_rate": 3.260039733852549e-05, "loss": 2.2092, "step": 7026500 }, { "epoch": 34.81, "learning_rate": 3.259915875209941e-05, "loss": 2.2085, "step": 7027000 }, { "epoch": 34.82, "learning_rate": 3.259792264284617e-05, "loss": 2.1949, "step": 7027500 }, { "epoch": 34.82, "learning_rate": 3.259668405642009e-05, "loss": 2.2119, "step": 7028000 }, { "epoch": 34.82, "learning_rate": 3.2595445469994004e-05, "loss": 2.1811, "step": 7028500 }, { "epoch": 34.82, "learning_rate": 3.259420688356792e-05, "loss": 2.1964, "step": 7029000 }, { "epoch": 34.83, "learning_rate": 3.259297077431469e-05, "loss": 2.2017, "step": 7029500 }, { "epoch": 34.83, "learning_rate": 3.259173218788861e-05, "loss": 2.1988, "step": 7030000 }, { "epoch": 34.83, "learning_rate": 3.2590493601462524e-05, "loss": 2.2063, "step": 7030500 }, { "epoch": 34.83, "learning_rate": 3.258925501503644e-05, "loss": 2.1986, "step": 7031000 }, { "epoch": 34.84, "learning_rate": 3.258801642861036e-05, "loss": 2.1811, "step": 7031500 }, { "epoch": 34.84, "learning_rate": 3.2586777842184275e-05, "loss": 2.1918, "step": 7032000 }, { "epoch": 34.84, "learning_rate": 3.258553925575819e-05, "loss": 2.2125, "step": 7032500 }, { "epoch": 34.84, "learning_rate": 3.258430066933211e-05, "loss": 2.209, "step": 7033000 }, { "epoch": 34.85, "learning_rate": 3.2583062082906026e-05, "loss": 2.2314, "step": 7033500 }, { "epoch": 34.85, "learning_rate": 3.258182349647994e-05, "loss": 2.1891, "step": 7034000 }, { "epoch": 34.85, "learning_rate": 3.258058491005386e-05, "loss": 2.2034, "step": 7034500 }, { "epoch": 34.85, "learning_rate": 3.2579346323627776e-05, "loss": 2.1971, "step": 7035000 }, { "epoch": 34.86, "learning_rate": 3.257810773720169e-05, "loss": 2.1942, "step": 7035500 }, { "epoch": 34.86, "learning_rate": 3.25768691507756e-05, "loss": 2.1755, "step": 7036000 }, { "epoch": 34.86, "learning_rate": 3.257563304152237e-05, "loss": 2.1964, "step": 7036500 }, { "epoch": 34.86, "learning_rate": 3.257439940944199e-05, "loss": 2.1967, "step": 7037000 }, { "epoch": 34.87, "learning_rate": 3.257316082301591e-05, "loss": 2.2141, "step": 7037500 }, { "epoch": 34.87, "learning_rate": 3.257192223658983e-05, "loss": 2.2195, "step": 7038000 }, { "epoch": 34.87, "learning_rate": 3.2570683650163744e-05, "loss": 2.2072, "step": 7038500 }, { "epoch": 34.87, "learning_rate": 3.256944506373766e-05, "loss": 2.2148, "step": 7039000 }, { "epoch": 34.88, "learning_rate": 3.256820647731158e-05, "loss": 2.213, "step": 7039500 }, { "epoch": 34.88, "learning_rate": 3.2566967890885494e-05, "loss": 2.1913, "step": 7040000 }, { "epoch": 34.88, "learning_rate": 3.2565731781632256e-05, "loss": 2.2049, "step": 7040500 }, { "epoch": 34.88, "learning_rate": 3.2564493195206173e-05, "loss": 2.2205, "step": 7041000 }, { "epoch": 34.89, "learning_rate": 3.256325460878009e-05, "loss": 2.1822, "step": 7041500 }, { "epoch": 34.89, "learning_rate": 3.256201602235401e-05, "loss": 2.1775, "step": 7042000 }, { "epoch": 34.89, "learning_rate": 3.2560777435927924e-05, "loss": 2.2201, "step": 7042500 }, { "epoch": 34.89, "learning_rate": 3.255953884950184e-05, "loss": 2.1917, "step": 7043000 }, { "epoch": 34.9, "learning_rate": 3.255830026307576e-05, "loss": 2.2215, "step": 7043500 }, { "epoch": 34.9, "learning_rate": 3.2557061676649675e-05, "loss": 2.212, "step": 7044000 }, { "epoch": 34.9, "learning_rate": 3.2555825567396444e-05, "loss": 2.1995, "step": 7044500 }, { "epoch": 34.9, "learning_rate": 3.255458698097036e-05, "loss": 2.2085, "step": 7045000 }, { "epoch": 34.91, "learning_rate": 3.255334839454428e-05, "loss": 2.1986, "step": 7045500 }, { "epoch": 34.91, "learning_rate": 3.2552109808118195e-05, "loss": 2.1863, "step": 7046000 }, { "epoch": 34.91, "learning_rate": 3.255087122169211e-05, "loss": 2.2211, "step": 7046500 }, { "epoch": 34.91, "learning_rate": 3.2549635112438874e-05, "loss": 2.1993, "step": 7047000 }, { "epoch": 34.92, "learning_rate": 3.254839652601279e-05, "loss": 2.1953, "step": 7047500 }, { "epoch": 34.92, "learning_rate": 3.254715793958671e-05, "loss": 2.2076, "step": 7048000 }, { "epoch": 34.92, "learning_rate": 3.2545919353160624e-05, "loss": 2.1892, "step": 7048500 }, { "epoch": 34.92, "learning_rate": 3.254468324390739e-05, "loss": 2.2138, "step": 7049000 }, { "epoch": 34.93, "learning_rate": 3.254344465748131e-05, "loss": 2.1946, "step": 7049500 }, { "epoch": 34.93, "learning_rate": 3.254220607105523e-05, "loss": 2.204, "step": 7050000 }, { "epoch": 34.93, "learning_rate": 3.2540967484629144e-05, "loss": 2.212, "step": 7050500 }, { "epoch": 34.93, "learning_rate": 3.253972889820306e-05, "loss": 2.1801, "step": 7051000 }, { "epoch": 34.94, "learning_rate": 3.253849031177698e-05, "loss": 2.1985, "step": 7051500 }, { "epoch": 34.94, "learning_rate": 3.2537251725350895e-05, "loss": 2.1711, "step": 7052000 }, { "epoch": 34.94, "learning_rate": 3.253601313892481e-05, "loss": 2.2165, "step": 7052500 }, { "epoch": 34.94, "learning_rate": 3.2534777029671574e-05, "loss": 2.2252, "step": 7053000 }, { "epoch": 34.95, "learning_rate": 3.253353844324549e-05, "loss": 2.1821, "step": 7053500 }, { "epoch": 34.95, "learning_rate": 3.2532302333992266e-05, "loss": 2.1778, "step": 7054000 }, { "epoch": 34.95, "learning_rate": 3.253106374756618e-05, "loss": 2.2102, "step": 7054500 }, { "epoch": 34.95, "learning_rate": 3.252982516114009e-05, "loss": 2.2164, "step": 7055000 }, { "epoch": 34.96, "learning_rate": 3.252858657471401e-05, "loss": 2.2235, "step": 7055500 }, { "epoch": 34.96, "learning_rate": 3.252734798828793e-05, "loss": 2.2477, "step": 7056000 }, { "epoch": 34.96, "learning_rate": 3.2526111879034696e-05, "loss": 2.1983, "step": 7056500 }, { "epoch": 34.96, "learning_rate": 3.252487329260861e-05, "loss": 2.2055, "step": 7057000 }, { "epoch": 34.97, "learning_rate": 3.252363718335538e-05, "loss": 2.2165, "step": 7057500 }, { "epoch": 34.97, "learning_rate": 3.25223985969293e-05, "loss": 2.2136, "step": 7058000 }, { "epoch": 34.97, "learning_rate": 3.2521160010503216e-05, "loss": 2.2037, "step": 7058500 }, { "epoch": 34.97, "learning_rate": 3.251992142407713e-05, "loss": 2.1987, "step": 7059000 }, { "epoch": 34.98, "learning_rate": 3.251868283765105e-05, "loss": 2.2126, "step": 7059500 }, { "epoch": 34.98, "learning_rate": 3.2517444251224966e-05, "loss": 2.2202, "step": 7060000 }, { "epoch": 34.98, "learning_rate": 3.251620566479888e-05, "loss": 2.2023, "step": 7060500 }, { "epoch": 34.98, "learning_rate": 3.25149670783728e-05, "loss": 2.2168, "step": 7061000 }, { "epoch": 34.99, "learning_rate": 3.251372849194671e-05, "loss": 2.1998, "step": 7061500 }, { "epoch": 34.99, "learning_rate": 3.251249238269348e-05, "loss": 2.1941, "step": 7062000 }, { "epoch": 34.99, "learning_rate": 3.2511253796267396e-05, "loss": 2.1872, "step": 7062500 }, { "epoch": 34.99, "learning_rate": 3.2510017687014165e-05, "loss": 2.2097, "step": 7063000 }, { "epoch": 35.0, "learning_rate": 3.250877910058808e-05, "loss": 2.2149, "step": 7063500 }, { "epoch": 35.0, "learning_rate": 3.2507540514162e-05, "loss": 2.2237, "step": 7064000 }, { "epoch": 35.0, "learning_rate": 3.2506301927735916e-05, "loss": 2.201, "step": 7064500 }, { "epoch": 35.0, "eval_accuracy": 0.662072274008178, "eval_accuracy_mlm": 0.6186920132733609, "eval_accuracy_nsp": 0.8666060033181806, "eval_loss": 2.2997031211853027, "eval_runtime": 145.6764, "eval_samples_per_second": 1750.173, "eval_steps_per_second": 72.929, "step": 7064505 }, { "epoch": 35.0, "learning_rate": 3.250506334130983e-05, "loss": 2.1687, "step": 7065000 }, { "epoch": 35.0, "learning_rate": 3.25038272320566e-05, "loss": 2.1827, "step": 7065500 }, { "epoch": 35.01, "learning_rate": 3.250258864563052e-05, "loss": 2.1597, "step": 7066000 }, { "epoch": 35.01, "learning_rate": 3.2501350059204435e-05, "loss": 2.1611, "step": 7066500 }, { "epoch": 35.01, "learning_rate": 3.250011147277835e-05, "loss": 2.1661, "step": 7067000 }, { "epoch": 35.01, "learning_rate": 3.249887288635227e-05, "loss": 2.158, "step": 7067500 }, { "epoch": 35.02, "learning_rate": 3.249763677709903e-05, "loss": 2.1792, "step": 7068000 }, { "epoch": 35.02, "learning_rate": 3.249639819067295e-05, "loss": 2.1561, "step": 7068500 }, { "epoch": 35.02, "learning_rate": 3.2495159604246865e-05, "loss": 2.1664, "step": 7069000 }, { "epoch": 35.02, "learning_rate": 3.2493923494993634e-05, "loss": 2.1907, "step": 7069500 }, { "epoch": 35.03, "learning_rate": 3.249268490856755e-05, "loss": 2.1791, "step": 7070000 }, { "epoch": 35.03, "learning_rate": 3.249144632214147e-05, "loss": 2.1948, "step": 7070500 }, { "epoch": 35.03, "learning_rate": 3.2490207735715385e-05, "loss": 2.1503, "step": 7071000 }, { "epoch": 35.03, "learning_rate": 3.24889691492893e-05, "loss": 2.1801, "step": 7071500 }, { "epoch": 35.04, "learning_rate": 3.248773056286322e-05, "loss": 2.1924, "step": 7072000 }, { "epoch": 35.04, "learning_rate": 3.2486491976437135e-05, "loss": 2.1651, "step": 7072500 }, { "epoch": 35.04, "learning_rate": 3.248525339001105e-05, "loss": 2.1392, "step": 7073000 }, { "epoch": 35.04, "learning_rate": 3.248401480358497e-05, "loss": 2.1702, "step": 7073500 }, { "epoch": 35.05, "learning_rate": 3.2482776217158886e-05, "loss": 2.1746, "step": 7074000 }, { "epoch": 35.05, "learning_rate": 3.24815376307328e-05, "loss": 2.1503, "step": 7074500 }, { "epoch": 35.05, "learning_rate": 3.248029904430672e-05, "loss": 2.1645, "step": 7075000 }, { "epoch": 35.05, "learning_rate": 3.247906045788063e-05, "loss": 2.1635, "step": 7075500 }, { "epoch": 35.06, "learning_rate": 3.247782187145455e-05, "loss": 2.1599, "step": 7076000 }, { "epoch": 35.06, "learning_rate": 3.2476583285028464e-05, "loss": 2.164, "step": 7076500 }, { "epoch": 35.06, "learning_rate": 3.247534469860238e-05, "loss": 2.1623, "step": 7077000 }, { "epoch": 35.06, "learning_rate": 3.24741061121763e-05, "loss": 2.1668, "step": 7077500 }, { "epoch": 35.07, "learning_rate": 3.247287000292307e-05, "loss": 2.1809, "step": 7078000 }, { "epoch": 35.07, "learning_rate": 3.2471633893669835e-05, "loss": 2.1799, "step": 7078500 }, { "epoch": 35.07, "learning_rate": 3.247039530724375e-05, "loss": 2.1867, "step": 7079000 }, { "epoch": 35.07, "learning_rate": 3.246915672081767e-05, "loss": 2.1799, "step": 7079500 }, { "epoch": 35.08, "learning_rate": 3.2467918134391586e-05, "loss": 2.1792, "step": 7080000 }, { "epoch": 35.08, "learning_rate": 3.24666795479655e-05, "loss": 2.1811, "step": 7080500 }, { "epoch": 35.08, "learning_rate": 3.246544096153942e-05, "loss": 2.1582, "step": 7081000 }, { "epoch": 35.08, "learning_rate": 3.246420237511334e-05, "loss": 2.1756, "step": 7081500 }, { "epoch": 35.09, "learning_rate": 3.2462963788687254e-05, "loss": 2.2023, "step": 7082000 }, { "epoch": 35.09, "learning_rate": 3.2461725202261164e-05, "loss": 2.1908, "step": 7082500 }, { "epoch": 35.09, "learning_rate": 3.246048909300793e-05, "loss": 2.1654, "step": 7083000 }, { "epoch": 35.09, "learning_rate": 3.245925050658185e-05, "loss": 2.1965, "step": 7083500 }, { "epoch": 35.1, "learning_rate": 3.245801192015577e-05, "loss": 2.1755, "step": 7084000 }, { "epoch": 35.1, "learning_rate": 3.2456773333729684e-05, "loss": 2.1879, "step": 7084500 }, { "epoch": 35.1, "learning_rate": 3.24555347473036e-05, "loss": 2.1588, "step": 7085000 }, { "epoch": 35.1, "learning_rate": 3.245429863805037e-05, "loss": 2.1715, "step": 7085500 }, { "epoch": 35.11, "learning_rate": 3.2453060051624286e-05, "loss": 2.1767, "step": 7086000 }, { "epoch": 35.11, "learning_rate": 3.24518214651982e-05, "loss": 2.1845, "step": 7086500 }, { "epoch": 35.11, "learning_rate": 3.245058287877212e-05, "loss": 2.1522, "step": 7087000 }, { "epoch": 35.11, "learning_rate": 3.244934429234604e-05, "loss": 2.1601, "step": 7087500 }, { "epoch": 35.12, "learning_rate": 3.244811066026565e-05, "loss": 2.1818, "step": 7088000 }, { "epoch": 35.12, "learning_rate": 3.244687455101243e-05, "loss": 2.198, "step": 7088500 }, { "epoch": 35.12, "learning_rate": 3.2445635964586344e-05, "loss": 2.1568, "step": 7089000 }, { "epoch": 35.12, "learning_rate": 3.244439737816026e-05, "loss": 2.1736, "step": 7089500 }, { "epoch": 35.13, "learning_rate": 3.244315879173417e-05, "loss": 2.1725, "step": 7090000 }, { "epoch": 35.13, "learning_rate": 3.244192020530809e-05, "loss": 2.1716, "step": 7090500 }, { "epoch": 35.13, "learning_rate": 3.2440681618882005e-05, "loss": 2.1593, "step": 7091000 }, { "epoch": 35.13, "learning_rate": 3.243944303245592e-05, "loss": 2.1417, "step": 7091500 }, { "epoch": 35.14, "learning_rate": 3.243820444602984e-05, "loss": 2.1594, "step": 7092000 }, { "epoch": 35.14, "learning_rate": 3.243696585960375e-05, "loss": 2.1771, "step": 7092500 }, { "epoch": 35.14, "learning_rate": 3.2435727273177665e-05, "loss": 2.1621, "step": 7093000 }, { "epoch": 35.14, "learning_rate": 3.243448868675158e-05, "loss": 2.1602, "step": 7093500 }, { "epoch": 35.15, "learning_rate": 3.24332501003255e-05, "loss": 2.1644, "step": 7094000 }, { "epoch": 35.15, "learning_rate": 3.2432011513899416e-05, "loss": 2.1716, "step": 7094500 }, { "epoch": 35.15, "learning_rate": 3.243077292747333e-05, "loss": 2.1893, "step": 7095000 }, { "epoch": 35.15, "learning_rate": 3.242953434104725e-05, "loss": 2.1837, "step": 7095500 }, { "epoch": 35.16, "learning_rate": 3.242829575462117e-05, "loss": 2.1803, "step": 7096000 }, { "epoch": 35.16, "learning_rate": 3.2427059645367936e-05, "loss": 2.1727, "step": 7096500 }, { "epoch": 35.16, "learning_rate": 3.242582105894185e-05, "loss": 2.1676, "step": 7097000 }, { "epoch": 35.16, "learning_rate": 3.242458247251577e-05, "loss": 2.1982, "step": 7097500 }, { "epoch": 35.17, "learning_rate": 3.242334388608969e-05, "loss": 2.203, "step": 7098000 }, { "epoch": 35.17, "learning_rate": 3.2422105299663604e-05, "loss": 2.1756, "step": 7098500 }, { "epoch": 35.17, "learning_rate": 3.2420869190410366e-05, "loss": 2.1352, "step": 7099000 }, { "epoch": 35.17, "learning_rate": 3.241963060398428e-05, "loss": 2.1828, "step": 7099500 }, { "epoch": 35.18, "learning_rate": 3.24183920175582e-05, "loss": 2.1801, "step": 7100000 }, { "epoch": 35.18, "learning_rate": 3.2417153431132116e-05, "loss": 2.1608, "step": 7100500 }, { "epoch": 35.18, "learning_rate": 3.241591484470603e-05, "loss": 2.1727, "step": 7101000 }, { "epoch": 35.18, "learning_rate": 3.241467625827995e-05, "loss": 2.1829, "step": 7101500 }, { "epoch": 35.19, "learning_rate": 3.241343767185387e-05, "loss": 2.1832, "step": 7102000 }, { "epoch": 35.19, "learning_rate": 3.2412199085427784e-05, "loss": 2.1647, "step": 7102500 }, { "epoch": 35.19, "learning_rate": 3.241096297617455e-05, "loss": 2.153, "step": 7103000 }, { "epoch": 35.19, "learning_rate": 3.240972438974847e-05, "loss": 2.164, "step": 7103500 }, { "epoch": 35.2, "learning_rate": 3.240848580332239e-05, "loss": 2.1728, "step": 7104000 }, { "epoch": 35.2, "learning_rate": 3.2407247216896304e-05, "loss": 2.1788, "step": 7104500 }, { "epoch": 35.2, "learning_rate": 3.240600863047022e-05, "loss": 2.1901, "step": 7105000 }, { "epoch": 35.2, "learning_rate": 3.240477004404414e-05, "loss": 2.1619, "step": 7105500 }, { "epoch": 35.21, "learning_rate": 3.2403531457618054e-05, "loss": 2.1746, "step": 7106000 }, { "epoch": 35.21, "learning_rate": 3.240229287119197e-05, "loss": 2.19, "step": 7106500 }, { "epoch": 35.21, "learning_rate": 3.240105428476589e-05, "loss": 2.1823, "step": 7107000 }, { "epoch": 35.21, "learning_rate": 3.239981817551265e-05, "loss": 2.1837, "step": 7107500 }, { "epoch": 35.22, "learning_rate": 3.239857958908657e-05, "loss": 2.1808, "step": 7108000 }, { "epoch": 35.22, "learning_rate": 3.2397341002660484e-05, "loss": 2.1844, "step": 7108500 }, { "epoch": 35.22, "learning_rate": 3.23961024162344e-05, "loss": 2.1701, "step": 7109000 }, { "epoch": 35.22, "learning_rate": 3.239486382980832e-05, "loss": 2.1671, "step": 7109500 }, { "epoch": 35.23, "learning_rate": 3.2393625243382235e-05, "loss": 2.2084, "step": 7110000 }, { "epoch": 35.23, "learning_rate": 3.2392389134129004e-05, "loss": 2.1831, "step": 7110500 }, { "epoch": 35.23, "learning_rate": 3.239115054770292e-05, "loss": 2.1894, "step": 7111000 }, { "epoch": 35.23, "learning_rate": 3.238991196127684e-05, "loss": 2.1976, "step": 7111500 }, { "epoch": 35.24, "learning_rate": 3.2388673374850755e-05, "loss": 2.1777, "step": 7112000 }, { "epoch": 35.24, "learning_rate": 3.238743726559752e-05, "loss": 2.1676, "step": 7112500 }, { "epoch": 35.24, "learning_rate": 3.2386198679171434e-05, "loss": 2.1834, "step": 7113000 }, { "epoch": 35.24, "learning_rate": 3.238496504709106e-05, "loss": 2.1865, "step": 7113500 }, { "epoch": 35.25, "learning_rate": 3.238372646066498e-05, "loss": 2.1847, "step": 7114000 }, { "epoch": 35.25, "learning_rate": 3.238249035141174e-05, "loss": 2.1596, "step": 7114500 }, { "epoch": 35.25, "learning_rate": 3.238125176498566e-05, "loss": 2.1785, "step": 7115000 }, { "epoch": 35.25, "learning_rate": 3.2380013178559574e-05, "loss": 2.1876, "step": 7115500 }, { "epoch": 35.26, "learning_rate": 3.237877459213349e-05, "loss": 2.1801, "step": 7116000 }, { "epoch": 35.26, "learning_rate": 3.237753600570741e-05, "loss": 2.1576, "step": 7116500 }, { "epoch": 35.26, "learning_rate": 3.2376297419281325e-05, "loss": 2.1708, "step": 7117000 }, { "epoch": 35.26, "learning_rate": 3.237505883285524e-05, "loss": 2.188, "step": 7117500 }, { "epoch": 35.27, "learning_rate": 3.237382024642916e-05, "loss": 2.1749, "step": 7118000 }, { "epoch": 35.27, "learning_rate": 3.237258166000307e-05, "loss": 2.1658, "step": 7118500 }, { "epoch": 35.27, "learning_rate": 3.2371345550749844e-05, "loss": 2.1957, "step": 7119000 }, { "epoch": 35.27, "learning_rate": 3.237010696432376e-05, "loss": 2.1925, "step": 7119500 }, { "epoch": 35.27, "learning_rate": 3.236886837789768e-05, "loss": 2.1954, "step": 7120000 }, { "epoch": 35.28, "learning_rate": 3.2367629791471595e-05, "loss": 2.171, "step": 7120500 }, { "epoch": 35.28, "learning_rate": 3.236639120504551e-05, "loss": 2.1574, "step": 7121000 }, { "epoch": 35.28, "learning_rate": 3.236515261861942e-05, "loss": 2.1797, "step": 7121500 }, { "epoch": 35.28, "learning_rate": 3.236391403219334e-05, "loss": 2.1708, "step": 7122000 }, { "epoch": 35.29, "learning_rate": 3.2362675445767256e-05, "loss": 2.1776, "step": 7122500 }, { "epoch": 35.29, "learning_rate": 3.236143685934117e-05, "loss": 2.1824, "step": 7123000 }, { "epoch": 35.29, "learning_rate": 3.236019827291509e-05, "loss": 2.1585, "step": 7123500 }, { "epoch": 35.29, "learning_rate": 3.235895968648901e-05, "loss": 2.1665, "step": 7124000 }, { "epoch": 35.3, "learning_rate": 3.2357721100062924e-05, "loss": 2.162, "step": 7124500 }, { "epoch": 35.3, "learning_rate": 3.235648251363684e-05, "loss": 2.1863, "step": 7125000 }, { "epoch": 35.3, "learning_rate": 3.235524392721076e-05, "loss": 2.1742, "step": 7125500 }, { "epoch": 35.3, "learning_rate": 3.2354005340784674e-05, "loss": 2.1806, "step": 7126000 }, { "epoch": 35.31, "learning_rate": 3.2352766754358585e-05, "loss": 2.1616, "step": 7126500 }, { "epoch": 35.31, "learning_rate": 3.23515281679325e-05, "loss": 2.1634, "step": 7127000 }, { "epoch": 35.31, "learning_rate": 3.235029453585213e-05, "loss": 2.1948, "step": 7127500 }, { "epoch": 35.31, "learning_rate": 3.234905594942604e-05, "loss": 2.1992, "step": 7128000 }, { "epoch": 35.32, "learning_rate": 3.2347817362999956e-05, "loss": 2.1518, "step": 7128500 }, { "epoch": 35.32, "learning_rate": 3.234657877657387e-05, "loss": 2.1799, "step": 7129000 }, { "epoch": 35.32, "learning_rate": 3.234534266732064e-05, "loss": 2.1858, "step": 7129500 }, { "epoch": 35.32, "learning_rate": 3.234410408089456e-05, "loss": 2.1683, "step": 7130000 }, { "epoch": 35.33, "learning_rate": 3.2342865494468476e-05, "loss": 2.1536, "step": 7130500 }, { "epoch": 35.33, "learning_rate": 3.2341626908042386e-05, "loss": 2.1983, "step": 7131000 }, { "epoch": 35.33, "learning_rate": 3.23403883216163e-05, "loss": 2.1816, "step": 7131500 }, { "epoch": 35.33, "learning_rate": 3.233914973519022e-05, "loss": 2.1769, "step": 7132000 }, { "epoch": 35.34, "learning_rate": 3.2337911148764137e-05, "loss": 2.1931, "step": 7132500 }, { "epoch": 35.34, "learning_rate": 3.2336672562338054e-05, "loss": 2.1675, "step": 7133000 }, { "epoch": 35.34, "learning_rate": 3.233543397591197e-05, "loss": 2.1926, "step": 7133500 }, { "epoch": 35.34, "learning_rate": 3.233419538948589e-05, "loss": 2.1825, "step": 7134000 }, { "epoch": 35.35, "learning_rate": 3.2332959280232656e-05, "loss": 2.196, "step": 7134500 }, { "epoch": 35.35, "learning_rate": 3.2331723170979425e-05, "loss": 2.173, "step": 7135000 }, { "epoch": 35.35, "learning_rate": 3.2330487061726194e-05, "loss": 2.1661, "step": 7135500 }, { "epoch": 35.35, "learning_rate": 3.232924847530011e-05, "loss": 2.1848, "step": 7136000 }, { "epoch": 35.36, "learning_rate": 3.232800988887403e-05, "loss": 2.1861, "step": 7136500 }, { "epoch": 35.36, "learning_rate": 3.2326771302447945e-05, "loss": 2.1877, "step": 7137000 }, { "epoch": 35.36, "learning_rate": 3.232553271602186e-05, "loss": 2.2125, "step": 7137500 }, { "epoch": 35.36, "learning_rate": 3.232429412959578e-05, "loss": 2.1783, "step": 7138000 }, { "epoch": 35.37, "learning_rate": 3.232305802034255e-05, "loss": 2.1706, "step": 7138500 }, { "epoch": 35.37, "learning_rate": 3.2321819433916464e-05, "loss": 2.1612, "step": 7139000 }, { "epoch": 35.37, "learning_rate": 3.232058084749038e-05, "loss": 2.1701, "step": 7139500 }, { "epoch": 35.37, "learning_rate": 3.23193422610643e-05, "loss": 2.1693, "step": 7140000 }, { "epoch": 35.38, "learning_rate": 3.231810615181106e-05, "loss": 2.1877, "step": 7140500 }, { "epoch": 35.38, "learning_rate": 3.231686756538498e-05, "loss": 2.1918, "step": 7141000 }, { "epoch": 35.38, "learning_rate": 3.2315628978958894e-05, "loss": 2.1736, "step": 7141500 }, { "epoch": 35.38, "learning_rate": 3.231439039253281e-05, "loss": 2.1831, "step": 7142000 }, { "epoch": 35.39, "learning_rate": 3.231315180610673e-05, "loss": 2.1866, "step": 7142500 }, { "epoch": 35.39, "learning_rate": 3.2311913219680645e-05, "loss": 2.2025, "step": 7143000 }, { "epoch": 35.39, "learning_rate": 3.231067463325456e-05, "loss": 2.1617, "step": 7143500 }, { "epoch": 35.39, "learning_rate": 3.230943604682848e-05, "loss": 2.2045, "step": 7144000 }, { "epoch": 35.4, "learning_rate": 3.2308197460402395e-05, "loss": 2.1891, "step": 7144500 }, { "epoch": 35.4, "learning_rate": 3.230695887397631e-05, "loss": 2.1938, "step": 7145000 }, { "epoch": 35.4, "learning_rate": 3.230572028755023e-05, "loss": 2.1791, "step": 7145500 }, { "epoch": 35.4, "learning_rate": 3.2304484178297e-05, "loss": 2.1913, "step": 7146000 }, { "epoch": 35.41, "learning_rate": 3.2303245591870915e-05, "loss": 2.1784, "step": 7146500 }, { "epoch": 35.41, "learning_rate": 3.230200700544483e-05, "loss": 2.1682, "step": 7147000 }, { "epoch": 35.41, "learning_rate": 3.230076841901874e-05, "loss": 2.1989, "step": 7147500 }, { "epoch": 35.41, "learning_rate": 3.229952983259266e-05, "loss": 2.173, "step": 7148000 }, { "epoch": 35.42, "learning_rate": 3.229829372333943e-05, "loss": 2.1915, "step": 7148500 }, { "epoch": 35.42, "learning_rate": 3.2297055136913345e-05, "loss": 2.1816, "step": 7149000 }, { "epoch": 35.42, "learning_rate": 3.229581655048726e-05, "loss": 2.1887, "step": 7149500 }, { "epoch": 35.42, "learning_rate": 3.229458044123403e-05, "loss": 2.1812, "step": 7150000 }, { "epoch": 35.43, "learning_rate": 3.229334185480795e-05, "loss": 2.1866, "step": 7150500 }, { "epoch": 35.43, "learning_rate": 3.2292103268381864e-05, "loss": 2.1714, "step": 7151000 }, { "epoch": 35.43, "learning_rate": 3.229086468195578e-05, "loss": 2.1648, "step": 7151500 }, { "epoch": 35.43, "learning_rate": 3.22896260955297e-05, "loss": 2.1689, "step": 7152000 }, { "epoch": 35.44, "learning_rate": 3.2288387509103615e-05, "loss": 2.1751, "step": 7152500 }, { "epoch": 35.44, "learning_rate": 3.228715139985038e-05, "loss": 2.1721, "step": 7153000 }, { "epoch": 35.44, "learning_rate": 3.2285912813424294e-05, "loss": 2.1756, "step": 7153500 }, { "epoch": 35.44, "learning_rate": 3.228467422699821e-05, "loss": 2.1678, "step": 7154000 }, { "epoch": 35.45, "learning_rate": 3.228343564057213e-05, "loss": 2.2027, "step": 7154500 }, { "epoch": 35.45, "learning_rate": 3.22821995313189e-05, "loss": 2.2017, "step": 7155000 }, { "epoch": 35.45, "learning_rate": 3.2280960944892814e-05, "loss": 2.1792, "step": 7155500 }, { "epoch": 35.45, "learning_rate": 3.227972483563958e-05, "loss": 2.203, "step": 7156000 }, { "epoch": 35.46, "learning_rate": 3.22784862492135e-05, "loss": 2.174, "step": 7156500 }, { "epoch": 35.46, "learning_rate": 3.2277247662787416e-05, "loss": 2.1771, "step": 7157000 }, { "epoch": 35.46, "learning_rate": 3.2276009076361327e-05, "loss": 2.2021, "step": 7157500 }, { "epoch": 35.46, "learning_rate": 3.2274770489935244e-05, "loss": 2.1874, "step": 7158000 }, { "epoch": 35.47, "learning_rate": 3.227353190350916e-05, "loss": 2.1839, "step": 7158500 }, { "epoch": 35.47, "learning_rate": 3.227229331708308e-05, "loss": 2.1788, "step": 7159000 }, { "epoch": 35.47, "learning_rate": 3.2271054730656994e-05, "loss": 2.1972, "step": 7159500 }, { "epoch": 35.47, "learning_rate": 3.226981614423091e-05, "loss": 2.1936, "step": 7160000 }, { "epoch": 35.48, "learning_rate": 3.226857755780483e-05, "loss": 2.1646, "step": 7160500 }, { "epoch": 35.48, "learning_rate": 3.2267338971378745e-05, "loss": 2.1965, "step": 7161000 }, { "epoch": 35.48, "learning_rate": 3.226610038495266e-05, "loss": 2.1918, "step": 7161500 }, { "epoch": 35.48, "learning_rate": 3.226486179852658e-05, "loss": 2.2044, "step": 7162000 }, { "epoch": 35.49, "learning_rate": 3.2263623212100496e-05, "loss": 2.1982, "step": 7162500 }, { "epoch": 35.49, "learning_rate": 3.2262387102847265e-05, "loss": 2.1941, "step": 7163000 }, { "epoch": 35.49, "learning_rate": 3.226114851642118e-05, "loss": 2.1976, "step": 7163500 }, { "epoch": 35.49, "learning_rate": 3.22599099299951e-05, "loss": 2.1736, "step": 7164000 }, { "epoch": 35.5, "learning_rate": 3.2258671343569015e-05, "loss": 2.1829, "step": 7164500 }, { "epoch": 35.5, "learning_rate": 3.225743275714293e-05, "loss": 2.1834, "step": 7165000 }, { "epoch": 35.5, "learning_rate": 3.225619417071685e-05, "loss": 2.1704, "step": 7165500 }, { "epoch": 35.5, "learning_rate": 3.2254955584290766e-05, "loss": 2.1748, "step": 7166000 }, { "epoch": 35.51, "learning_rate": 3.225371699786468e-05, "loss": 2.1674, "step": 7166500 }, { "epoch": 35.51, "learning_rate": 3.22524784114386e-05, "loss": 2.2049, "step": 7167000 }, { "epoch": 35.51, "learning_rate": 3.225123982501251e-05, "loss": 2.1529, "step": 7167500 }, { "epoch": 35.51, "learning_rate": 3.225000619293213e-05, "loss": 2.1924, "step": 7168000 }, { "epoch": 35.52, "learning_rate": 3.224876760650605e-05, "loss": 2.1786, "step": 7168500 }, { "epoch": 35.52, "learning_rate": 3.2247529020079965e-05, "loss": 2.1862, "step": 7169000 }, { "epoch": 35.52, "learning_rate": 3.224629043365388e-05, "loss": 2.1665, "step": 7169500 }, { "epoch": 35.52, "learning_rate": 3.22450518472278e-05, "loss": 2.1811, "step": 7170000 }, { "epoch": 35.53, "learning_rate": 3.224381573797457e-05, "loss": 2.1747, "step": 7170500 }, { "epoch": 35.53, "learning_rate": 3.224257715154848e-05, "loss": 2.2019, "step": 7171000 }, { "epoch": 35.53, "learning_rate": 3.2241338565122395e-05, "loss": 2.2123, "step": 7171500 }, { "epoch": 35.53, "learning_rate": 3.224009997869631e-05, "loss": 2.1879, "step": 7172000 }, { "epoch": 35.54, "learning_rate": 3.223886386944308e-05, "loss": 2.2036, "step": 7172500 }, { "epoch": 35.54, "learning_rate": 3.2237625283017e-05, "loss": 2.1967, "step": 7173000 }, { "epoch": 35.54, "learning_rate": 3.2236386696590914e-05, "loss": 2.191, "step": 7173500 }, { "epoch": 35.54, "learning_rate": 3.223514811016483e-05, "loss": 2.1963, "step": 7174000 }, { "epoch": 35.54, "learning_rate": 3.223390952373875e-05, "loss": 2.1806, "step": 7174500 }, { "epoch": 35.55, "learning_rate": 3.2232670937312665e-05, "loss": 2.202, "step": 7175000 }, { "epoch": 35.55, "learning_rate": 3.2231434828059434e-05, "loss": 2.1755, "step": 7175500 }, { "epoch": 35.55, "learning_rate": 3.223019624163335e-05, "loss": 2.1722, "step": 7176000 }, { "epoch": 35.55, "learning_rate": 3.222895765520727e-05, "loss": 2.1985, "step": 7176500 }, { "epoch": 35.56, "learning_rate": 3.2227719068781185e-05, "loss": 2.1716, "step": 7177000 }, { "epoch": 35.56, "learning_rate": 3.22264804823551e-05, "loss": 2.1865, "step": 7177500 }, { "epoch": 35.56, "learning_rate": 3.222524189592901e-05, "loss": 2.1747, "step": 7178000 }, { "epoch": 35.56, "learning_rate": 3.222400578667578e-05, "loss": 2.1841, "step": 7178500 }, { "epoch": 35.57, "learning_rate": 3.22227672002497e-05, "loss": 2.2028, "step": 7179000 }, { "epoch": 35.57, "learning_rate": 3.2221528613823614e-05, "loss": 2.1779, "step": 7179500 }, { "epoch": 35.57, "learning_rate": 3.222029002739753e-05, "loss": 2.2021, "step": 7180000 }, { "epoch": 35.57, "learning_rate": 3.221905391814431e-05, "loss": 2.1877, "step": 7180500 }, { "epoch": 35.58, "learning_rate": 3.2217815331718224e-05, "loss": 2.192, "step": 7181000 }, { "epoch": 35.58, "learning_rate": 3.2216576745292134e-05, "loss": 2.1805, "step": 7181500 }, { "epoch": 35.58, "learning_rate": 3.221533815886605e-05, "loss": 2.2088, "step": 7182000 }, { "epoch": 35.58, "learning_rate": 3.221409957243997e-05, "loss": 2.1966, "step": 7182500 }, { "epoch": 35.59, "learning_rate": 3.2212860986013885e-05, "loss": 2.1925, "step": 7183000 }, { "epoch": 35.59, "learning_rate": 3.22116223995878e-05, "loss": 2.1957, "step": 7183500 }, { "epoch": 35.59, "learning_rate": 3.221038629033457e-05, "loss": 2.1982, "step": 7184000 }, { "epoch": 35.59, "learning_rate": 3.220914770390848e-05, "loss": 2.2168, "step": 7184500 }, { "epoch": 35.6, "learning_rate": 3.22079091174824e-05, "loss": 2.1911, "step": 7185000 }, { "epoch": 35.6, "learning_rate": 3.2206670531056314e-05, "loss": 2.1777, "step": 7185500 }, { "epoch": 35.6, "learning_rate": 3.220543194463023e-05, "loss": 2.1778, "step": 7186000 }, { "epoch": 35.6, "learning_rate": 3.220419335820415e-05, "loss": 2.1783, "step": 7186500 }, { "epoch": 35.61, "learning_rate": 3.2202954771778065e-05, "loss": 2.2068, "step": 7187000 }, { "epoch": 35.61, "learning_rate": 3.220171618535198e-05, "loss": 2.2157, "step": 7187500 }, { "epoch": 35.61, "learning_rate": 3.22004775989259e-05, "loss": 2.1735, "step": 7188000 }, { "epoch": 35.61, "learning_rate": 3.219924148967267e-05, "loss": 2.166, "step": 7188500 }, { "epoch": 35.62, "learning_rate": 3.2198002903246585e-05, "loss": 2.195, "step": 7189000 }, { "epoch": 35.62, "learning_rate": 3.2196766793993354e-05, "loss": 2.2019, "step": 7189500 }, { "epoch": 35.62, "learning_rate": 3.219552820756727e-05, "loss": 2.1698, "step": 7190000 }, { "epoch": 35.62, "learning_rate": 3.219428962114119e-05, "loss": 2.1708, "step": 7190500 }, { "epoch": 35.63, "learning_rate": 3.21930510347151e-05, "loss": 2.198, "step": 7191000 }, { "epoch": 35.63, "learning_rate": 3.2191812448289015e-05, "loss": 2.1899, "step": 7191500 }, { "epoch": 35.63, "learning_rate": 3.219057633903579e-05, "loss": 2.188, "step": 7192000 }, { "epoch": 35.63, "learning_rate": 3.218934022978255e-05, "loss": 2.2009, "step": 7192500 }, { "epoch": 35.64, "learning_rate": 3.218810164335647e-05, "loss": 2.189, "step": 7193000 }, { "epoch": 35.64, "learning_rate": 3.2186863056930386e-05, "loss": 2.1848, "step": 7193500 }, { "epoch": 35.64, "learning_rate": 3.21856244705043e-05, "loss": 2.2159, "step": 7194000 }, { "epoch": 35.64, "learning_rate": 3.218438588407822e-05, "loss": 2.1881, "step": 7194500 }, { "epoch": 35.65, "learning_rate": 3.218314729765214e-05, "loss": 2.1793, "step": 7195000 }, { "epoch": 35.65, "learning_rate": 3.2181908711226054e-05, "loss": 2.1856, "step": 7195500 }, { "epoch": 35.65, "learning_rate": 3.218067012479997e-05, "loss": 2.1974, "step": 7196000 }, { "epoch": 35.65, "learning_rate": 3.217943153837389e-05, "loss": 2.1806, "step": 7196500 }, { "epoch": 35.66, "learning_rate": 3.21781929519478e-05, "loss": 2.1729, "step": 7197000 }, { "epoch": 35.66, "learning_rate": 3.2176954365521715e-05, "loss": 2.1895, "step": 7197500 }, { "epoch": 35.66, "learning_rate": 3.217571577909563e-05, "loss": 2.2215, "step": 7198000 }, { "epoch": 35.66, "learning_rate": 3.217447719266955e-05, "loss": 2.1851, "step": 7198500 }, { "epoch": 35.67, "learning_rate": 3.2173238606243465e-05, "loss": 2.1652, "step": 7199000 }, { "epoch": 35.67, "learning_rate": 3.217200001981738e-05, "loss": 2.192, "step": 7199500 }, { "epoch": 35.67, "learning_rate": 3.21707614333913e-05, "loss": 2.2075, "step": 7200000 }, { "epoch": 35.67, "learning_rate": 3.2169522846965216e-05, "loss": 2.1748, "step": 7200500 }, { "epoch": 35.68, "learning_rate": 3.2168286737711985e-05, "loss": 2.1801, "step": 7201000 }, { "epoch": 35.68, "learning_rate": 3.21670481512859e-05, "loss": 2.1767, "step": 7201500 }, { "epoch": 35.68, "learning_rate": 3.216580956485982e-05, "loss": 2.1897, "step": 7202000 }, { "epoch": 35.68, "learning_rate": 3.2164570978433736e-05, "loss": 2.2161, "step": 7202500 }, { "epoch": 35.69, "learning_rate": 3.216333239200765e-05, "loss": 2.2114, "step": 7203000 }, { "epoch": 35.69, "learning_rate": 3.2162096282754415e-05, "loss": 2.2018, "step": 7203500 }, { "epoch": 35.69, "learning_rate": 3.216086017350119e-05, "loss": 2.1898, "step": 7204000 }, { "epoch": 35.69, "learning_rate": 3.215962158707511e-05, "loss": 2.2153, "step": 7204500 }, { "epoch": 35.7, "learning_rate": 3.2158385477821876e-05, "loss": 2.1962, "step": 7205000 }, { "epoch": 35.7, "learning_rate": 3.2157146891395786e-05, "loss": 2.2066, "step": 7205500 }, { "epoch": 35.7, "learning_rate": 3.21559083049697e-05, "loss": 2.1808, "step": 7206000 }, { "epoch": 35.7, "learning_rate": 3.215466971854362e-05, "loss": 2.1784, "step": 7206500 }, { "epoch": 35.71, "learning_rate": 3.215343113211754e-05, "loss": 2.1879, "step": 7207000 }, { "epoch": 35.71, "learning_rate": 3.2152192545691454e-05, "loss": 2.2027, "step": 7207500 }, { "epoch": 35.71, "learning_rate": 3.215095395926537e-05, "loss": 2.149, "step": 7208000 }, { "epoch": 35.71, "learning_rate": 3.214971537283929e-05, "loss": 2.1831, "step": 7208500 }, { "epoch": 35.72, "learning_rate": 3.2148476786413205e-05, "loss": 2.216, "step": 7209000 }, { "epoch": 35.72, "learning_rate": 3.2147238199987115e-05, "loss": 2.216, "step": 7209500 }, { "epoch": 35.72, "learning_rate": 3.214600209073389e-05, "loss": 2.1782, "step": 7210000 }, { "epoch": 35.72, "learning_rate": 3.214476350430781e-05, "loss": 2.1767, "step": 7210500 }, { "epoch": 35.73, "learning_rate": 3.2143524917881724e-05, "loss": 2.1726, "step": 7211000 }, { "epoch": 35.73, "learning_rate": 3.214228633145564e-05, "loss": 2.1896, "step": 7211500 }, { "epoch": 35.73, "learning_rate": 3.214104774502956e-05, "loss": 2.1825, "step": 7212000 }, { "epoch": 35.73, "learning_rate": 3.2139809158603475e-05, "loss": 2.1887, "step": 7212500 }, { "epoch": 35.74, "learning_rate": 3.2138570572177385e-05, "loss": 2.1715, "step": 7213000 }, { "epoch": 35.74, "learning_rate": 3.21373319857513e-05, "loss": 2.187, "step": 7213500 }, { "epoch": 35.74, "learning_rate": 3.213609339932522e-05, "loss": 2.1838, "step": 7214000 }, { "epoch": 35.74, "learning_rate": 3.213485729007199e-05, "loss": 2.194, "step": 7214500 }, { "epoch": 35.75, "learning_rate": 3.2133618703645905e-05, "loss": 2.1984, "step": 7215000 }, { "epoch": 35.75, "learning_rate": 3.213238011721982e-05, "loss": 2.2033, "step": 7215500 }, { "epoch": 35.75, "learning_rate": 3.213114153079373e-05, "loss": 2.1934, "step": 7216000 }, { "epoch": 35.75, "learning_rate": 3.212990294436765e-05, "loss": 2.2022, "step": 7216500 }, { "epoch": 35.76, "learning_rate": 3.2128664357941566e-05, "loss": 2.185, "step": 7217000 }, { "epoch": 35.76, "learning_rate": 3.212742577151548e-05, "loss": 2.1806, "step": 7217500 }, { "epoch": 35.76, "learning_rate": 3.212618966226226e-05, "loss": 2.1936, "step": 7218000 }, { "epoch": 35.76, "learning_rate": 3.2124951075836175e-05, "loss": 2.163, "step": 7218500 }, { "epoch": 35.77, "learning_rate": 3.2123712489410085e-05, "loss": 2.1739, "step": 7219000 }, { "epoch": 35.77, "learning_rate": 3.2122473902984e-05, "loss": 2.1609, "step": 7219500 }, { "epoch": 35.77, "learning_rate": 3.212123531655792e-05, "loss": 2.1824, "step": 7220000 }, { "epoch": 35.77, "learning_rate": 3.2119996730131836e-05, "loss": 2.1913, "step": 7220500 }, { "epoch": 35.78, "learning_rate": 3.211875814370575e-05, "loss": 2.2127, "step": 7221000 }, { "epoch": 35.78, "learning_rate": 3.211751955727967e-05, "loss": 2.1843, "step": 7221500 }, { "epoch": 35.78, "learning_rate": 3.211628097085359e-05, "loss": 2.1812, "step": 7222000 }, { "epoch": 35.78, "learning_rate": 3.211504486160035e-05, "loss": 2.1801, "step": 7222500 }, { "epoch": 35.79, "learning_rate": 3.2113806275174266e-05, "loss": 2.1778, "step": 7223000 }, { "epoch": 35.79, "learning_rate": 3.211256768874818e-05, "loss": 2.1982, "step": 7223500 }, { "epoch": 35.79, "learning_rate": 3.21113291023221e-05, "loss": 2.2053, "step": 7224000 }, { "epoch": 35.79, "learning_rate": 3.211009051589602e-05, "loss": 2.1966, "step": 7224500 }, { "epoch": 35.8, "learning_rate": 3.2108851929469934e-05, "loss": 2.1682, "step": 7225000 }, { "epoch": 35.8, "learning_rate": 3.210761334304385e-05, "loss": 2.2173, "step": 7225500 }, { "epoch": 35.8, "learning_rate": 3.210637475661777e-05, "loss": 2.1867, "step": 7226000 }, { "epoch": 35.8, "learning_rate": 3.2105136170191684e-05, "loss": 2.1976, "step": 7226500 }, { "epoch": 35.81, "learning_rate": 3.210390006093845e-05, "loss": 2.185, "step": 7227000 }, { "epoch": 35.81, "learning_rate": 3.210266147451237e-05, "loss": 2.1922, "step": 7227500 }, { "epoch": 35.81, "learning_rate": 3.210142536525914e-05, "loss": 2.1783, "step": 7228000 }, { "epoch": 35.81, "learning_rate": 3.210018677883305e-05, "loss": 2.1788, "step": 7228500 }, { "epoch": 35.81, "learning_rate": 3.2098950669579825e-05, "loss": 2.2044, "step": 7229000 }, { "epoch": 35.82, "learning_rate": 3.209771208315374e-05, "loss": 2.1966, "step": 7229500 }, { "epoch": 35.82, "learning_rate": 3.209647349672766e-05, "loss": 2.2111, "step": 7230000 }, { "epoch": 35.82, "learning_rate": 3.2095234910301575e-05, "loss": 2.1748, "step": 7230500 }, { "epoch": 35.82, "learning_rate": 3.209400127822119e-05, "loss": 2.1646, "step": 7231000 }, { "epoch": 35.83, "learning_rate": 3.2092765168967965e-05, "loss": 2.1985, "step": 7231500 }, { "epoch": 35.83, "learning_rate": 3.2091526582541875e-05, "loss": 2.1675, "step": 7232000 }, { "epoch": 35.83, "learning_rate": 3.209028799611579e-05, "loss": 2.1727, "step": 7232500 }, { "epoch": 35.83, "learning_rate": 3.208905188686257e-05, "loss": 2.1669, "step": 7233000 }, { "epoch": 35.84, "learning_rate": 3.208781330043648e-05, "loss": 2.19, "step": 7233500 }, { "epoch": 35.84, "learning_rate": 3.2086574714010395e-05, "loss": 2.1846, "step": 7234000 }, { "epoch": 35.84, "learning_rate": 3.208533612758431e-05, "loss": 2.2084, "step": 7234500 }, { "epoch": 35.84, "learning_rate": 3.208409754115823e-05, "loss": 2.1889, "step": 7235000 }, { "epoch": 35.85, "learning_rate": 3.2082858954732146e-05, "loss": 2.1709, "step": 7235500 }, { "epoch": 35.85, "learning_rate": 3.2081620368306056e-05, "loss": 2.192, "step": 7236000 }, { "epoch": 35.85, "learning_rate": 3.208038425905283e-05, "loss": 2.1989, "step": 7236500 }, { "epoch": 35.85, "learning_rate": 3.207914567262675e-05, "loss": 2.1778, "step": 7237000 }, { "epoch": 35.86, "learning_rate": 3.2077907086200665e-05, "loss": 2.2039, "step": 7237500 }, { "epoch": 35.86, "learning_rate": 3.207666849977458e-05, "loss": 2.1986, "step": 7238000 }, { "epoch": 35.86, "learning_rate": 3.207542991334849e-05, "loss": 2.1762, "step": 7238500 }, { "epoch": 35.86, "learning_rate": 3.207419132692241e-05, "loss": 2.177, "step": 7239000 }, { "epoch": 35.87, "learning_rate": 3.2072952740496326e-05, "loss": 2.1804, "step": 7239500 }, { "epoch": 35.87, "learning_rate": 3.207171415407024e-05, "loss": 2.194, "step": 7240000 }, { "epoch": 35.87, "learning_rate": 3.207047556764416e-05, "loss": 2.1995, "step": 7240500 }, { "epoch": 35.87, "learning_rate": 3.206923698121808e-05, "loss": 2.227, "step": 7241000 }, { "epoch": 35.88, "learning_rate": 3.2067998394791994e-05, "loss": 2.1887, "step": 7241500 }, { "epoch": 35.88, "learning_rate": 3.206675980836591e-05, "loss": 2.1928, "step": 7242000 }, { "epoch": 35.88, "learning_rate": 3.206552122193983e-05, "loss": 2.1956, "step": 7242500 }, { "epoch": 35.88, "learning_rate": 3.206428511268659e-05, "loss": 2.217, "step": 7243000 }, { "epoch": 35.89, "learning_rate": 3.2063046526260507e-05, "loss": 2.212, "step": 7243500 }, { "epoch": 35.89, "learning_rate": 3.206181041700728e-05, "loss": 2.1715, "step": 7244000 }, { "epoch": 35.89, "learning_rate": 3.206057183058119e-05, "loss": 2.1658, "step": 7244500 }, { "epoch": 35.89, "learning_rate": 3.205933324415511e-05, "loss": 2.1871, "step": 7245000 }, { "epoch": 35.9, "learning_rate": 3.2058097134901885e-05, "loss": 2.183, "step": 7245500 }, { "epoch": 35.9, "learning_rate": 3.20568585484758e-05, "loss": 2.1936, "step": 7246000 }, { "epoch": 35.9, "learning_rate": 3.205561996204972e-05, "loss": 2.1959, "step": 7246500 }, { "epoch": 35.9, "learning_rate": 3.205438137562363e-05, "loss": 2.2226, "step": 7247000 }, { "epoch": 35.91, "learning_rate": 3.2053142789197546e-05, "loss": 2.1854, "step": 7247500 }, { "epoch": 35.91, "learning_rate": 3.205190420277146e-05, "loss": 2.2058, "step": 7248000 }, { "epoch": 35.91, "learning_rate": 3.205066561634538e-05, "loss": 2.1896, "step": 7248500 }, { "epoch": 35.91, "learning_rate": 3.2049427029919297e-05, "loss": 2.1738, "step": 7249000 }, { "epoch": 35.92, "learning_rate": 3.204818844349321e-05, "loss": 2.1808, "step": 7249500 }, { "epoch": 35.92, "learning_rate": 3.2046949857067124e-05, "loss": 2.2039, "step": 7250000 }, { "epoch": 35.92, "learning_rate": 3.204571127064104e-05, "loss": 2.1845, "step": 7250500 }, { "epoch": 35.92, "learning_rate": 3.204447268421496e-05, "loss": 2.203, "step": 7251000 }, { "epoch": 35.93, "learning_rate": 3.2043234097788874e-05, "loss": 2.1995, "step": 7251500 }, { "epoch": 35.93, "learning_rate": 3.204199551136279e-05, "loss": 2.2299, "step": 7252000 }, { "epoch": 35.93, "learning_rate": 3.204075692493671e-05, "loss": 2.2146, "step": 7252500 }, { "epoch": 35.93, "learning_rate": 3.2039518338510625e-05, "loss": 2.2082, "step": 7253000 }, { "epoch": 35.94, "learning_rate": 3.2038282229257394e-05, "loss": 2.1813, "step": 7253500 }, { "epoch": 35.94, "learning_rate": 3.203704364283131e-05, "loss": 2.1665, "step": 7254000 }, { "epoch": 35.94, "learning_rate": 3.203580505640523e-05, "loss": 2.1758, "step": 7254500 }, { "epoch": 35.94, "learning_rate": 3.2034566469979145e-05, "loss": 2.1813, "step": 7255000 }, { "epoch": 35.95, "learning_rate": 3.2033330360725914e-05, "loss": 2.1977, "step": 7255500 }, { "epoch": 35.95, "learning_rate": 3.203209177429983e-05, "loss": 2.1928, "step": 7256000 }, { "epoch": 35.95, "learning_rate": 3.203085318787374e-05, "loss": 2.2045, "step": 7256500 }, { "epoch": 35.95, "learning_rate": 3.202961460144766e-05, "loss": 2.1974, "step": 7257000 }, { "epoch": 35.96, "learning_rate": 3.2028378492194426e-05, "loss": 2.1832, "step": 7257500 }, { "epoch": 35.96, "learning_rate": 3.202713990576834e-05, "loss": 2.1904, "step": 7258000 }, { "epoch": 35.96, "learning_rate": 3.202590131934226e-05, "loss": 2.1817, "step": 7258500 }, { "epoch": 35.96, "learning_rate": 3.202466273291618e-05, "loss": 2.2003, "step": 7259000 }, { "epoch": 35.97, "learning_rate": 3.2023424146490094e-05, "loss": 2.1702, "step": 7259500 }, { "epoch": 35.97, "learning_rate": 3.202218556006401e-05, "loss": 2.1826, "step": 7260000 }, { "epoch": 35.97, "learning_rate": 3.202094697363793e-05, "loss": 2.1929, "step": 7260500 }, { "epoch": 35.97, "learning_rate": 3.20197108643847e-05, "loss": 2.1713, "step": 7261000 }, { "epoch": 35.98, "learning_rate": 3.2018472277958614e-05, "loss": 2.2067, "step": 7261500 }, { "epoch": 35.98, "learning_rate": 3.201723369153253e-05, "loss": 2.1851, "step": 7262000 }, { "epoch": 35.98, "learning_rate": 3.201599510510645e-05, "loss": 2.2131, "step": 7262500 }, { "epoch": 35.98, "learning_rate": 3.2014758995853216e-05, "loss": 2.1984, "step": 7263000 }, { "epoch": 35.99, "learning_rate": 3.2013520409427127e-05, "loss": 2.1667, "step": 7263500 }, { "epoch": 35.99, "learning_rate": 3.2012281823001043e-05, "loss": 2.1926, "step": 7264000 }, { "epoch": 35.99, "learning_rate": 3.201104323657496e-05, "loss": 2.2088, "step": 7264500 }, { "epoch": 35.99, "learning_rate": 3.200980465014888e-05, "loss": 2.1889, "step": 7265000 }, { "epoch": 36.0, "learning_rate": 3.2008566063722794e-05, "loss": 2.1909, "step": 7265500 }, { "epoch": 36.0, "learning_rate": 3.200732747729671e-05, "loss": 2.2, "step": 7266000 }, { "epoch": 36.0, "eval_accuracy": 0.6622320326905691, "eval_accuracy_mlm": 0.6186748779181579, "eval_accuracy_nsp": 0.8673472989774826, "eval_loss": 2.315115213394165, "eval_runtime": 146.032, "eval_samples_per_second": 1745.911, "eval_steps_per_second": 72.751, "step": 7266348 }, { "epoch": 36.0, "learning_rate": 3.200608889087063e-05, "loss": 2.1755, "step": 7266500 }, { "epoch": 36.0, "learning_rate": 3.20048527816174e-05, "loss": 2.1472, "step": 7267000 }, { "epoch": 36.01, "learning_rate": 3.2003614195191314e-05, "loss": 2.1568, "step": 7267500 }, { "epoch": 36.01, "learning_rate": 3.200237808593808e-05, "loss": 2.1619, "step": 7268000 }, { "epoch": 36.01, "learning_rate": 3.2001139499512e-05, "loss": 2.1834, "step": 7268500 }, { "epoch": 36.01, "learning_rate": 3.1999900913085917e-05, "loss": 2.1446, "step": 7269000 }, { "epoch": 36.02, "learning_rate": 3.199866232665983e-05, "loss": 2.1727, "step": 7269500 }, { "epoch": 36.02, "learning_rate": 3.1997423740233744e-05, "loss": 2.1695, "step": 7270000 }, { "epoch": 36.02, "learning_rate": 3.199618515380766e-05, "loss": 2.1422, "step": 7270500 }, { "epoch": 36.02, "learning_rate": 3.199494656738158e-05, "loss": 2.1672, "step": 7271000 }, { "epoch": 36.03, "learning_rate": 3.1993707980955494e-05, "loss": 2.1533, "step": 7271500 }, { "epoch": 36.03, "learning_rate": 3.199247187170227e-05, "loss": 2.1539, "step": 7272000 }, { "epoch": 36.03, "learning_rate": 3.199123576244903e-05, "loss": 2.1517, "step": 7272500 }, { "epoch": 36.03, "learning_rate": 3.198999717602295e-05, "loss": 2.1898, "step": 7273000 }, { "epoch": 36.04, "learning_rate": 3.1988758589596866e-05, "loss": 2.1593, "step": 7273500 }, { "epoch": 36.04, "learning_rate": 3.198752000317078e-05, "loss": 2.1246, "step": 7274000 }, { "epoch": 36.04, "learning_rate": 3.19862814167447e-05, "loss": 2.1623, "step": 7274500 }, { "epoch": 36.04, "learning_rate": 3.198504283031862e-05, "loss": 2.1748, "step": 7275000 }, { "epoch": 36.05, "learning_rate": 3.1983804243892534e-05, "loss": 2.1472, "step": 7275500 }, { "epoch": 36.05, "learning_rate": 3.1982565657466444e-05, "loss": 2.1667, "step": 7276000 }, { "epoch": 36.05, "learning_rate": 3.198132954821322e-05, "loss": 2.185, "step": 7276500 }, { "epoch": 36.05, "learning_rate": 3.1980090961787136e-05, "loss": 2.1632, "step": 7277000 }, { "epoch": 36.06, "learning_rate": 3.197885237536105e-05, "loss": 2.1667, "step": 7277500 }, { "epoch": 36.06, "learning_rate": 3.1977616266107815e-05, "loss": 2.1795, "step": 7278000 }, { "epoch": 36.06, "learning_rate": 3.197637767968173e-05, "loss": 2.1736, "step": 7278500 }, { "epoch": 36.06, "learning_rate": 3.197513909325565e-05, "loss": 2.1804, "step": 7279000 }, { "epoch": 36.07, "learning_rate": 3.1973900506829566e-05, "loss": 2.1707, "step": 7279500 }, { "epoch": 36.07, "learning_rate": 3.197266192040348e-05, "loss": 2.16, "step": 7280000 }, { "epoch": 36.07, "learning_rate": 3.19714233339774e-05, "loss": 2.1459, "step": 7280500 }, { "epoch": 36.07, "learning_rate": 3.197018474755132e-05, "loss": 2.176, "step": 7281000 }, { "epoch": 36.08, "learning_rate": 3.1968946161125234e-05, "loss": 2.188, "step": 7281500 }, { "epoch": 36.08, "learning_rate": 3.1967707574699144e-05, "loss": 2.1487, "step": 7282000 }, { "epoch": 36.08, "learning_rate": 3.196646898827306e-05, "loss": 2.1741, "step": 7282500 }, { "epoch": 36.08, "learning_rate": 3.196523040184698e-05, "loss": 2.1627, "step": 7283000 }, { "epoch": 36.08, "learning_rate": 3.1963991815420895e-05, "loss": 2.1524, "step": 7283500 }, { "epoch": 36.09, "learning_rate": 3.196275322899481e-05, "loss": 2.1659, "step": 7284000 }, { "epoch": 36.09, "learning_rate": 3.196151464256873e-05, "loss": 2.1753, "step": 7284500 }, { "epoch": 36.09, "learning_rate": 3.1960276056142645e-05, "loss": 2.1853, "step": 7285000 }, { "epoch": 36.09, "learning_rate": 3.195903746971656e-05, "loss": 2.1606, "step": 7285500 }, { "epoch": 36.1, "learning_rate": 3.195779888329048e-05, "loss": 2.1595, "step": 7286000 }, { "epoch": 36.1, "learning_rate": 3.195656277403725e-05, "loss": 2.1824, "step": 7286500 }, { "epoch": 36.1, "learning_rate": 3.1955324187611165e-05, "loss": 2.1565, "step": 7287000 }, { "epoch": 36.1, "learning_rate": 3.195408560118508e-05, "loss": 2.1437, "step": 7287500 }, { "epoch": 36.11, "learning_rate": 3.195284949193185e-05, "loss": 2.1699, "step": 7288000 }, { "epoch": 36.11, "learning_rate": 3.195161090550576e-05, "loss": 2.1642, "step": 7288500 }, { "epoch": 36.11, "learning_rate": 3.195037231907968e-05, "loss": 2.1632, "step": 7289000 }, { "epoch": 36.11, "learning_rate": 3.1949133732653595e-05, "loss": 2.1737, "step": 7289500 }, { "epoch": 36.12, "learning_rate": 3.194789514622751e-05, "loss": 2.1658, "step": 7290000 }, { "epoch": 36.12, "learning_rate": 3.194665655980143e-05, "loss": 2.1809, "step": 7290500 }, { "epoch": 36.12, "learning_rate": 3.1945417973375346e-05, "loss": 2.1634, "step": 7291000 }, { "epoch": 36.12, "learning_rate": 3.194417938694926e-05, "loss": 2.151, "step": 7291500 }, { "epoch": 36.13, "learning_rate": 3.194294080052318e-05, "loss": 2.1942, "step": 7292000 }, { "epoch": 36.13, "learning_rate": 3.194170469126995e-05, "loss": 2.1557, "step": 7292500 }, { "epoch": 36.13, "learning_rate": 3.1940466104843865e-05, "loss": 2.1469, "step": 7293000 }, { "epoch": 36.13, "learning_rate": 3.193922751841778e-05, "loss": 2.1525, "step": 7293500 }, { "epoch": 36.14, "learning_rate": 3.19379889319917e-05, "loss": 2.1464, "step": 7294000 }, { "epoch": 36.14, "learning_rate": 3.1936750345565616e-05, "loss": 2.1576, "step": 7294500 }, { "epoch": 36.14, "learning_rate": 3.193551175913953e-05, "loss": 2.1805, "step": 7295000 }, { "epoch": 36.14, "learning_rate": 3.1934278127059154e-05, "loss": 2.1826, "step": 7295500 }, { "epoch": 36.15, "learning_rate": 3.193303954063307e-05, "loss": 2.1589, "step": 7296000 }, { "epoch": 36.15, "learning_rate": 3.193180095420699e-05, "loss": 2.1538, "step": 7296500 }, { "epoch": 36.15, "learning_rate": 3.1930562367780904e-05, "loss": 2.1704, "step": 7297000 }, { "epoch": 36.15, "learning_rate": 3.192932378135482e-05, "loss": 2.1759, "step": 7297500 }, { "epoch": 36.16, "learning_rate": 3.192808519492873e-05, "loss": 2.1512, "step": 7298000 }, { "epoch": 36.16, "learning_rate": 3.192684660850265e-05, "loss": 2.1743, "step": 7298500 }, { "epoch": 36.16, "learning_rate": 3.1925608022076565e-05, "loss": 2.1707, "step": 7299000 }, { "epoch": 36.16, "learning_rate": 3.192436943565048e-05, "loss": 2.1599, "step": 7299500 }, { "epoch": 36.17, "learning_rate": 3.19231308492244e-05, "loss": 2.1745, "step": 7300000 }, { "epoch": 36.17, "learning_rate": 3.192189473997117e-05, "loss": 2.1474, "step": 7300500 }, { "epoch": 36.17, "learning_rate": 3.192065615354508e-05, "loss": 2.1428, "step": 7301000 }, { "epoch": 36.17, "learning_rate": 3.1919420044291854e-05, "loss": 2.1729, "step": 7301500 }, { "epoch": 36.18, "learning_rate": 3.191818145786577e-05, "loss": 2.1742, "step": 7302000 }, { "epoch": 36.18, "learning_rate": 3.191694287143969e-05, "loss": 2.1903, "step": 7302500 }, { "epoch": 36.18, "learning_rate": 3.1915704285013604e-05, "loss": 2.1836, "step": 7303000 }, { "epoch": 36.18, "learning_rate": 3.191446817576037e-05, "loss": 2.1727, "step": 7303500 }, { "epoch": 36.19, "learning_rate": 3.191322958933429e-05, "loss": 2.1738, "step": 7304000 }, { "epoch": 36.19, "learning_rate": 3.19119910029082e-05, "loss": 2.165, "step": 7304500 }, { "epoch": 36.19, "learning_rate": 3.191075241648212e-05, "loss": 2.171, "step": 7305000 }, { "epoch": 36.19, "learning_rate": 3.1909513830056034e-05, "loss": 2.1837, "step": 7305500 }, { "epoch": 36.2, "learning_rate": 3.190827524362995e-05, "loss": 2.1613, "step": 7306000 }, { "epoch": 36.2, "learning_rate": 3.190703665720387e-05, "loss": 2.1567, "step": 7306500 }, { "epoch": 36.2, "learning_rate": 3.190579807077778e-05, "loss": 2.1435, "step": 7307000 }, { "epoch": 36.2, "learning_rate": 3.1904559484351695e-05, "loss": 2.1601, "step": 7307500 }, { "epoch": 36.21, "learning_rate": 3.190332089792561e-05, "loss": 2.1849, "step": 7308000 }, { "epoch": 36.21, "learning_rate": 3.190208231149953e-05, "loss": 2.1669, "step": 7308500 }, { "epoch": 36.21, "learning_rate": 3.1900843725073446e-05, "loss": 2.1879, "step": 7309000 }, { "epoch": 36.21, "learning_rate": 3.189960513864736e-05, "loss": 2.1559, "step": 7309500 }, { "epoch": 36.22, "learning_rate": 3.189836902939414e-05, "loss": 2.1691, "step": 7310000 }, { "epoch": 36.22, "learning_rate": 3.189713292014091e-05, "loss": 2.163, "step": 7310500 }, { "epoch": 36.22, "learning_rate": 3.1895894333714824e-05, "loss": 2.1601, "step": 7311000 }, { "epoch": 36.22, "learning_rate": 3.1894655747288734e-05, "loss": 2.1817, "step": 7311500 }, { "epoch": 36.23, "learning_rate": 3.189341716086265e-05, "loss": 2.197, "step": 7312000 }, { "epoch": 36.23, "learning_rate": 3.189217857443657e-05, "loss": 2.1816, "step": 7312500 }, { "epoch": 36.23, "learning_rate": 3.1890939988010485e-05, "loss": 2.152, "step": 7313000 }, { "epoch": 36.23, "learning_rate": 3.18897014015844e-05, "loss": 2.1727, "step": 7313500 }, { "epoch": 36.24, "learning_rate": 3.188846281515831e-05, "loss": 2.1761, "step": 7314000 }, { "epoch": 36.24, "learning_rate": 3.188722670590509e-05, "loss": 2.1642, "step": 7314500 }, { "epoch": 36.24, "learning_rate": 3.1885988119479005e-05, "loss": 2.1726, "step": 7315000 }, { "epoch": 36.24, "learning_rate": 3.1884752010225773e-05, "loss": 2.1537, "step": 7315500 }, { "epoch": 36.25, "learning_rate": 3.188351342379969e-05, "loss": 2.1697, "step": 7316000 }, { "epoch": 36.25, "learning_rate": 3.188227483737361e-05, "loss": 2.179, "step": 7316500 }, { "epoch": 36.25, "learning_rate": 3.1881036250947524e-05, "loss": 2.1536, "step": 7317000 }, { "epoch": 36.25, "learning_rate": 3.187979766452144e-05, "loss": 2.1655, "step": 7317500 }, { "epoch": 36.26, "learning_rate": 3.18785615552682e-05, "loss": 2.1739, "step": 7318000 }, { "epoch": 36.26, "learning_rate": 3.187732296884212e-05, "loss": 2.1631, "step": 7318500 }, { "epoch": 36.26, "learning_rate": 3.187608685958889e-05, "loss": 2.164, "step": 7319000 }, { "epoch": 36.26, "learning_rate": 3.1874848273162806e-05, "loss": 2.1755, "step": 7319500 }, { "epoch": 36.27, "learning_rate": 3.187360968673672e-05, "loss": 2.1961, "step": 7320000 }, { "epoch": 36.27, "learning_rate": 3.187237357748349e-05, "loss": 2.1747, "step": 7320500 }, { "epoch": 36.27, "learning_rate": 3.187113499105741e-05, "loss": 2.1745, "step": 7321000 }, { "epoch": 36.27, "learning_rate": 3.186989640463132e-05, "loss": 2.146, "step": 7321500 }, { "epoch": 36.28, "learning_rate": 3.1868657818205236e-05, "loss": 2.1678, "step": 7322000 }, { "epoch": 36.28, "learning_rate": 3.186741923177915e-05, "loss": 2.1745, "step": 7322500 }, { "epoch": 36.28, "learning_rate": 3.186618312252593e-05, "loss": 2.1547, "step": 7323000 }, { "epoch": 36.28, "learning_rate": 3.186494453609984e-05, "loss": 2.1704, "step": 7323500 }, { "epoch": 36.29, "learning_rate": 3.1863705949673755e-05, "loss": 2.1843, "step": 7324000 }, { "epoch": 36.29, "learning_rate": 3.186246736324767e-05, "loss": 2.1645, "step": 7324500 }, { "epoch": 36.29, "learning_rate": 3.186122877682159e-05, "loss": 2.1699, "step": 7325000 }, { "epoch": 36.29, "learning_rate": 3.1859990190395506e-05, "loss": 2.188, "step": 7325500 }, { "epoch": 36.3, "learning_rate": 3.1858754081142275e-05, "loss": 2.172, "step": 7326000 }, { "epoch": 36.3, "learning_rate": 3.185751549471619e-05, "loss": 2.1751, "step": 7326500 }, { "epoch": 36.3, "learning_rate": 3.185627690829011e-05, "loss": 2.1842, "step": 7327000 }, { "epoch": 36.3, "learning_rate": 3.185504079903688e-05, "loss": 2.1814, "step": 7327500 }, { "epoch": 36.31, "learning_rate": 3.1853802212610794e-05, "loss": 2.1958, "step": 7328000 }, { "epoch": 36.31, "learning_rate": 3.185256362618471e-05, "loss": 2.1448, "step": 7328500 }, { "epoch": 36.31, "learning_rate": 3.185132503975863e-05, "loss": 2.1794, "step": 7329000 }, { "epoch": 36.31, "learning_rate": 3.1850086453332545e-05, "loss": 2.1839, "step": 7329500 }, { "epoch": 36.32, "learning_rate": 3.1848847866906455e-05, "loss": 2.2051, "step": 7330000 }, { "epoch": 36.32, "learning_rate": 3.184760928048037e-05, "loss": 2.1578, "step": 7330500 }, { "epoch": 36.32, "learning_rate": 3.184637069405429e-05, "loss": 2.1948, "step": 7331000 }, { "epoch": 36.32, "learning_rate": 3.1845132107628206e-05, "loss": 2.1852, "step": 7331500 }, { "epoch": 36.33, "learning_rate": 3.184389599837498e-05, "loss": 2.1675, "step": 7332000 }, { "epoch": 36.33, "learning_rate": 3.184265741194889e-05, "loss": 2.1613, "step": 7332500 }, { "epoch": 36.33, "learning_rate": 3.184141882552281e-05, "loss": 2.1787, "step": 7333000 }, { "epoch": 36.33, "learning_rate": 3.1840180239096726e-05, "loss": 2.1896, "step": 7333500 }, { "epoch": 36.34, "learning_rate": 3.183894165267064e-05, "loss": 2.1535, "step": 7334000 }, { "epoch": 36.34, "learning_rate": 3.183770306624456e-05, "loss": 2.1596, "step": 7334500 }, { "epoch": 36.34, "learning_rate": 3.183646447981847e-05, "loss": 2.1774, "step": 7335000 }, { "epoch": 36.34, "learning_rate": 3.183522589339239e-05, "loss": 2.1598, "step": 7335500 }, { "epoch": 36.35, "learning_rate": 3.1833987306966304e-05, "loss": 2.1618, "step": 7336000 }, { "epoch": 36.35, "learning_rate": 3.183274872054022e-05, "loss": 2.1732, "step": 7336500 }, { "epoch": 36.35, "learning_rate": 3.183151013411414e-05, "loss": 2.2031, "step": 7337000 }, { "epoch": 36.35, "learning_rate": 3.1830274024860906e-05, "loss": 2.1618, "step": 7337500 }, { "epoch": 36.35, "learning_rate": 3.182903543843482e-05, "loss": 2.1865, "step": 7338000 }, { "epoch": 36.36, "learning_rate": 3.182779685200874e-05, "loss": 2.1547, "step": 7338500 }, { "epoch": 36.36, "learning_rate": 3.182655826558266e-05, "loss": 2.1551, "step": 7339000 }, { "epoch": 36.36, "learning_rate": 3.1825319679156574e-05, "loss": 2.169, "step": 7339500 }, { "epoch": 36.36, "learning_rate": 3.182408356990334e-05, "loss": 2.1587, "step": 7340000 }, { "epoch": 36.37, "learning_rate": 3.182284498347726e-05, "loss": 2.1691, "step": 7340500 }, { "epoch": 36.37, "learning_rate": 3.182160639705118e-05, "loss": 2.161, "step": 7341000 }, { "epoch": 36.37, "learning_rate": 3.1820367810625094e-05, "loss": 2.1649, "step": 7341500 }, { "epoch": 36.37, "learning_rate": 3.181913170137186e-05, "loss": 2.1787, "step": 7342000 }, { "epoch": 36.38, "learning_rate": 3.181789311494577e-05, "loss": 2.1742, "step": 7342500 }, { "epoch": 36.38, "learning_rate": 3.181665452851969e-05, "loss": 2.1531, "step": 7343000 }, { "epoch": 36.38, "learning_rate": 3.1815415942093606e-05, "loss": 2.1657, "step": 7343500 }, { "epoch": 36.38, "learning_rate": 3.181417735566752e-05, "loss": 2.1664, "step": 7344000 }, { "epoch": 36.39, "learning_rate": 3.181293876924144e-05, "loss": 2.1728, "step": 7344500 }, { "epoch": 36.39, "learning_rate": 3.181170018281536e-05, "loss": 2.1725, "step": 7345000 }, { "epoch": 36.39, "learning_rate": 3.1810461596389274e-05, "loss": 2.1948, "step": 7345500 }, { "epoch": 36.39, "learning_rate": 3.180922300996319e-05, "loss": 2.1758, "step": 7346000 }, { "epoch": 36.4, "learning_rate": 3.180798442353711e-05, "loss": 2.1829, "step": 7346500 }, { "epoch": 36.4, "learning_rate": 3.1806745837111025e-05, "loss": 2.2015, "step": 7347000 }, { "epoch": 36.4, "learning_rate": 3.180550725068494e-05, "loss": 2.1804, "step": 7347500 }, { "epoch": 36.4, "learning_rate": 3.180426866425886e-05, "loss": 2.179, "step": 7348000 }, { "epoch": 36.41, "learning_rate": 3.1803030077832776e-05, "loss": 2.1528, "step": 7348500 }, { "epoch": 36.41, "learning_rate": 3.180179396857954e-05, "loss": 2.193, "step": 7349000 }, { "epoch": 36.41, "learning_rate": 3.1800555382153455e-05, "loss": 2.154, "step": 7349500 }, { "epoch": 36.41, "learning_rate": 3.179931679572737e-05, "loss": 2.1824, "step": 7350000 }, { "epoch": 36.42, "learning_rate": 3.179808068647414e-05, "loss": 2.1668, "step": 7350500 }, { "epoch": 36.42, "learning_rate": 3.179684210004806e-05, "loss": 2.1718, "step": 7351000 }, { "epoch": 36.42, "learning_rate": 3.1795603513621974e-05, "loss": 2.1842, "step": 7351500 }, { "epoch": 36.42, "learning_rate": 3.179436492719589e-05, "loss": 2.1563, "step": 7352000 }, { "epoch": 36.43, "learning_rate": 3.179312634076981e-05, "loss": 2.1707, "step": 7352500 }, { "epoch": 36.43, "learning_rate": 3.1791887754343725e-05, "loss": 2.1689, "step": 7353000 }, { "epoch": 36.43, "learning_rate": 3.179064916791764e-05, "loss": 2.1638, "step": 7353500 }, { "epoch": 36.43, "learning_rate": 3.178941058149156e-05, "loss": 2.1388, "step": 7354000 }, { "epoch": 36.44, "learning_rate": 3.1788171995065476e-05, "loss": 2.1856, "step": 7354500 }, { "epoch": 36.44, "learning_rate": 3.178693340863939e-05, "loss": 2.1731, "step": 7355000 }, { "epoch": 36.44, "learning_rate": 3.1785697299386155e-05, "loss": 2.1694, "step": 7355500 }, { "epoch": 36.44, "learning_rate": 3.1784461190132924e-05, "loss": 2.1709, "step": 7356000 }, { "epoch": 36.45, "learning_rate": 3.178322260370684e-05, "loss": 2.1522, "step": 7356500 }, { "epoch": 36.45, "learning_rate": 3.178198401728076e-05, "loss": 2.1841, "step": 7357000 }, { "epoch": 36.45, "learning_rate": 3.1780745430854674e-05, "loss": 2.1854, "step": 7357500 }, { "epoch": 36.45, "learning_rate": 3.177950932160144e-05, "loss": 2.2015, "step": 7358000 }, { "epoch": 36.46, "learning_rate": 3.177827073517536e-05, "loss": 2.1727, "step": 7358500 }, { "epoch": 36.46, "learning_rate": 3.177703214874928e-05, "loss": 2.1739, "step": 7359000 }, { "epoch": 36.46, "learning_rate": 3.17757985166689e-05, "loss": 2.1939, "step": 7359500 }, { "epoch": 36.46, "learning_rate": 3.1774559930242815e-05, "loss": 2.1778, "step": 7360000 }, { "epoch": 36.47, "learning_rate": 3.177332134381673e-05, "loss": 2.1905, "step": 7360500 }, { "epoch": 36.47, "learning_rate": 3.177208275739065e-05, "loss": 2.1869, "step": 7361000 }, { "epoch": 36.47, "learning_rate": 3.1770844170964565e-05, "loss": 2.1856, "step": 7361500 }, { "epoch": 36.47, "learning_rate": 3.176960558453848e-05, "loss": 2.1606, "step": 7362000 }, { "epoch": 36.48, "learning_rate": 3.17683669981124e-05, "loss": 2.1803, "step": 7362500 }, { "epoch": 36.48, "learning_rate": 3.1767128411686316e-05, "loss": 2.1836, "step": 7363000 }, { "epoch": 36.48, "learning_rate": 3.176588982526023e-05, "loss": 2.1588, "step": 7363500 }, { "epoch": 36.48, "learning_rate": 3.176465123883414e-05, "loss": 2.1725, "step": 7364000 }, { "epoch": 36.49, "learning_rate": 3.176341512958091e-05, "loss": 2.1961, "step": 7364500 }, { "epoch": 36.49, "learning_rate": 3.176217654315483e-05, "loss": 2.1519, "step": 7365000 }, { "epoch": 36.49, "learning_rate": 3.1760937956728746e-05, "loss": 2.1619, "step": 7365500 }, { "epoch": 36.49, "learning_rate": 3.175969937030266e-05, "loss": 2.1723, "step": 7366000 }, { "epoch": 36.5, "learning_rate": 3.175846078387658e-05, "loss": 2.1882, "step": 7366500 }, { "epoch": 36.5, "learning_rate": 3.17572221974505e-05, "loss": 2.1582, "step": 7367000 }, { "epoch": 36.5, "learning_rate": 3.175598361102441e-05, "loss": 2.171, "step": 7367500 }, { "epoch": 36.5, "learning_rate": 3.1754745024598324e-05, "loss": 2.174, "step": 7368000 }, { "epoch": 36.51, "learning_rate": 3.175350643817224e-05, "loss": 2.1664, "step": 7368500 }, { "epoch": 36.51, "learning_rate": 3.175226785174616e-05, "loss": 2.1808, "step": 7369000 }, { "epoch": 36.51, "learning_rate": 3.1751029265320075e-05, "loss": 2.212, "step": 7369500 }, { "epoch": 36.51, "learning_rate": 3.174979315606685e-05, "loss": 2.1871, "step": 7370000 }, { "epoch": 36.52, "learning_rate": 3.174855456964076e-05, "loss": 2.1889, "step": 7370500 }, { "epoch": 36.52, "learning_rate": 3.174731598321468e-05, "loss": 2.1857, "step": 7371000 }, { "epoch": 36.52, "learning_rate": 3.1746077396788594e-05, "loss": 2.1746, "step": 7371500 }, { "epoch": 36.52, "learning_rate": 3.174483881036251e-05, "loss": 2.1546, "step": 7372000 }, { "epoch": 36.53, "learning_rate": 3.174360022393643e-05, "loss": 2.1701, "step": 7372500 }, { "epoch": 36.53, "learning_rate": 3.1742361637510345e-05, "loss": 2.1688, "step": 7373000 }, { "epoch": 36.53, "learning_rate": 3.174112552825711e-05, "loss": 2.1697, "step": 7373500 }, { "epoch": 36.53, "learning_rate": 3.1739886941831024e-05, "loss": 2.1417, "step": 7374000 }, { "epoch": 36.54, "learning_rate": 3.173864835540494e-05, "loss": 2.1972, "step": 7374500 }, { "epoch": 36.54, "learning_rate": 3.1737412246151716e-05, "loss": 2.1895, "step": 7375000 }, { "epoch": 36.54, "learning_rate": 3.1736173659725633e-05, "loss": 2.1635, "step": 7375500 }, { "epoch": 36.54, "learning_rate": 3.17349375504724e-05, "loss": 2.1726, "step": 7376000 }, { "epoch": 36.55, "learning_rate": 3.173369896404631e-05, "loss": 2.191, "step": 7376500 }, { "epoch": 36.55, "learning_rate": 3.173246037762023e-05, "loss": 2.1821, "step": 7377000 }, { "epoch": 36.55, "learning_rate": 3.1731221791194146e-05, "loss": 2.1846, "step": 7377500 }, { "epoch": 36.55, "learning_rate": 3.172998320476806e-05, "loss": 2.1596, "step": 7378000 }, { "epoch": 36.56, "learning_rate": 3.172874461834198e-05, "loss": 2.1764, "step": 7378500 }, { "epoch": 36.56, "learning_rate": 3.17275060319159e-05, "loss": 2.1806, "step": 7379000 }, { "epoch": 36.56, "learning_rate": 3.1726267445489814e-05, "loss": 2.1604, "step": 7379500 }, { "epoch": 36.56, "learning_rate": 3.1725028859063724e-05, "loss": 2.2086, "step": 7380000 }, { "epoch": 36.57, "learning_rate": 3.172379027263764e-05, "loss": 2.1964, "step": 7380500 }, { "epoch": 36.57, "learning_rate": 3.172255168621156e-05, "loss": 2.1792, "step": 7381000 }, { "epoch": 36.57, "learning_rate": 3.1721313099785475e-05, "loss": 2.17, "step": 7381500 }, { "epoch": 36.57, "learning_rate": 3.172007451335939e-05, "loss": 2.1855, "step": 7382000 }, { "epoch": 36.58, "learning_rate": 3.171883592693331e-05, "loss": 2.2097, "step": 7382500 }, { "epoch": 36.58, "learning_rate": 3.1717597340507226e-05, "loss": 2.1862, "step": 7383000 }, { "epoch": 36.58, "learning_rate": 3.171635875408114e-05, "loss": 2.184, "step": 7383500 }, { "epoch": 36.58, "learning_rate": 3.171512016765506e-05, "loss": 2.1804, "step": 7384000 }, { "epoch": 36.59, "learning_rate": 3.171388405840183e-05, "loss": 2.1422, "step": 7384500 }, { "epoch": 36.59, "learning_rate": 3.17126479491486e-05, "loss": 2.1633, "step": 7385000 }, { "epoch": 36.59, "learning_rate": 3.1711411839895366e-05, "loss": 2.1765, "step": 7385500 }, { "epoch": 36.59, "learning_rate": 3.171017325346928e-05, "loss": 2.1716, "step": 7386000 }, { "epoch": 36.6, "learning_rate": 3.17089346670432e-05, "loss": 2.1797, "step": 7386500 }, { "epoch": 36.6, "learning_rate": 3.170769608061712e-05, "loss": 2.2091, "step": 7387000 }, { "epoch": 36.6, "learning_rate": 3.1706457494191034e-05, "loss": 2.1754, "step": 7387500 }, { "epoch": 36.6, "learning_rate": 3.170521890776495e-05, "loss": 2.1538, "step": 7388000 }, { "epoch": 36.61, "learning_rate": 3.170398032133887e-05, "loss": 2.1559, "step": 7388500 }, { "epoch": 36.61, "learning_rate": 3.170274173491278e-05, "loss": 2.1987, "step": 7389000 }, { "epoch": 36.61, "learning_rate": 3.170150562565955e-05, "loss": 2.194, "step": 7389500 }, { "epoch": 36.61, "learning_rate": 3.1700267039233463e-05, "loss": 2.1804, "step": 7390000 }, { "epoch": 36.62, "learning_rate": 3.169902845280738e-05, "loss": 2.2139, "step": 7390500 }, { "epoch": 36.62, "learning_rate": 3.16977898663813e-05, "loss": 2.1842, "step": 7391000 }, { "epoch": 36.62, "learning_rate": 3.1696553757128066e-05, "loss": 2.1761, "step": 7391500 }, { "epoch": 36.62, "learning_rate": 3.169531517070198e-05, "loss": 2.1856, "step": 7392000 }, { "epoch": 36.63, "learning_rate": 3.16940765842759e-05, "loss": 2.1945, "step": 7392500 }, { "epoch": 36.63, "learning_rate": 3.169283799784982e-05, "loss": 2.1783, "step": 7393000 }, { "epoch": 36.63, "learning_rate": 3.1691599411423734e-05, "loss": 2.182, "step": 7393500 }, { "epoch": 36.63, "learning_rate": 3.169036082499765e-05, "loss": 2.1868, "step": 7394000 }, { "epoch": 36.63, "learning_rate": 3.168912223857157e-05, "loss": 2.1836, "step": 7394500 }, { "epoch": 36.64, "learning_rate": 3.1687886129318336e-05, "loss": 2.1808, "step": 7395000 }, { "epoch": 36.64, "learning_rate": 3.168664754289225e-05, "loss": 2.1812, "step": 7395500 }, { "epoch": 36.64, "learning_rate": 3.168540895646617e-05, "loss": 2.1927, "step": 7396000 }, { "epoch": 36.64, "learning_rate": 3.168417037004008e-05, "loss": 2.2173, "step": 7396500 }, { "epoch": 36.65, "learning_rate": 3.1682931783614e-05, "loss": 2.168, "step": 7397000 }, { "epoch": 36.65, "learning_rate": 3.1681693197187914e-05, "loss": 2.1856, "step": 7397500 }, { "epoch": 36.65, "learning_rate": 3.168045461076183e-05, "loss": 2.1885, "step": 7398000 }, { "epoch": 36.65, "learning_rate": 3.167921602433575e-05, "loss": 2.1744, "step": 7398500 }, { "epoch": 36.66, "learning_rate": 3.167797743790966e-05, "loss": 2.1875, "step": 7399000 }, { "epoch": 36.66, "learning_rate": 3.1676738851483575e-05, "loss": 2.1897, "step": 7399500 }, { "epoch": 36.66, "learning_rate": 3.167550026505749e-05, "loss": 2.1933, "step": 7400000 }, { "epoch": 36.66, "learning_rate": 3.167426415580427e-05, "loss": 2.1792, "step": 7400500 }, { "epoch": 36.67, "learning_rate": 3.1673025569378185e-05, "loss": 2.1766, "step": 7401000 }, { "epoch": 36.67, "learning_rate": 3.1671786982952095e-05, "loss": 2.1659, "step": 7401500 }, { "epoch": 36.67, "learning_rate": 3.167054839652601e-05, "loss": 2.169, "step": 7402000 }, { "epoch": 36.67, "learning_rate": 3.166930981009993e-05, "loss": 2.1943, "step": 7402500 }, { "epoch": 36.68, "learning_rate": 3.1668071223673846e-05, "loss": 2.1795, "step": 7403000 }, { "epoch": 36.68, "learning_rate": 3.166683263724776e-05, "loss": 2.1787, "step": 7403500 }, { "epoch": 36.68, "learning_rate": 3.166559405082168e-05, "loss": 2.1887, "step": 7404000 }, { "epoch": 36.68, "learning_rate": 3.1664355464395596e-05, "loss": 2.1796, "step": 7404500 }, { "epoch": 36.69, "learning_rate": 3.1663119355142365e-05, "loss": 2.1882, "step": 7405000 }, { "epoch": 36.69, "learning_rate": 3.166188076871628e-05, "loss": 2.1677, "step": 7405500 }, { "epoch": 36.69, "learning_rate": 3.166064465946305e-05, "loss": 2.183, "step": 7406000 }, { "epoch": 36.69, "learning_rate": 3.165940607303697e-05, "loss": 2.1747, "step": 7406500 }, { "epoch": 36.7, "learning_rate": 3.1658167486610885e-05, "loss": 2.1711, "step": 7407000 }, { "epoch": 36.7, "learning_rate": 3.16569289001848e-05, "loss": 2.1921, "step": 7407500 }, { "epoch": 36.7, "learning_rate": 3.165569031375871e-05, "loss": 2.2008, "step": 7408000 }, { "epoch": 36.7, "learning_rate": 3.165445172733263e-05, "loss": 2.1975, "step": 7408500 }, { "epoch": 36.71, "learning_rate": 3.1653213140906546e-05, "loss": 2.1755, "step": 7409000 }, { "epoch": 36.71, "learning_rate": 3.1651979508826166e-05, "loss": 2.1676, "step": 7409500 }, { "epoch": 36.71, "learning_rate": 3.165074092240008e-05, "loss": 2.1594, "step": 7410000 }, { "epoch": 36.71, "learning_rate": 3.1649502335974e-05, "loss": 2.1739, "step": 7410500 }, { "epoch": 36.72, "learning_rate": 3.164826374954792e-05, "loss": 2.1926, "step": 7411000 }, { "epoch": 36.72, "learning_rate": 3.1647025163121834e-05, "loss": 2.1763, "step": 7411500 }, { "epoch": 36.72, "learning_rate": 3.164578657669575e-05, "loss": 2.192, "step": 7412000 }, { "epoch": 36.72, "learning_rate": 3.164455046744252e-05, "loss": 2.1917, "step": 7412500 }, { "epoch": 36.73, "learning_rate": 3.164331188101644e-05, "loss": 2.1724, "step": 7413000 }, { "epoch": 36.73, "learning_rate": 3.16420757717632e-05, "loss": 2.2043, "step": 7413500 }, { "epoch": 36.73, "learning_rate": 3.1640837185337116e-05, "loss": 2.1866, "step": 7414000 }, { "epoch": 36.73, "learning_rate": 3.163959859891103e-05, "loss": 2.1808, "step": 7414500 }, { "epoch": 36.74, "learning_rate": 3.163836001248495e-05, "loss": 2.1876, "step": 7415000 }, { "epoch": 36.74, "learning_rate": 3.1637121426058867e-05, "loss": 2.1957, "step": 7415500 }, { "epoch": 36.74, "learning_rate": 3.1635882839632783e-05, "loss": 2.1583, "step": 7416000 }, { "epoch": 36.74, "learning_rate": 3.163464673037955e-05, "loss": 2.1689, "step": 7416500 }, { "epoch": 36.75, "learning_rate": 3.163340814395347e-05, "loss": 2.1737, "step": 7417000 }, { "epoch": 36.75, "learning_rate": 3.1632169557527386e-05, "loss": 2.2015, "step": 7417500 }, { "epoch": 36.75, "learning_rate": 3.16309309711013e-05, "loss": 2.1799, "step": 7418000 }, { "epoch": 36.75, "learning_rate": 3.162969238467522e-05, "loss": 2.1741, "step": 7418500 }, { "epoch": 36.76, "learning_rate": 3.162845379824914e-05, "loss": 2.1589, "step": 7419000 }, { "epoch": 36.76, "learning_rate": 3.1627215211823054e-05, "loss": 2.1929, "step": 7419500 }, { "epoch": 36.76, "learning_rate": 3.162597662539697e-05, "loss": 2.1637, "step": 7420000 }, { "epoch": 36.76, "learning_rate": 3.162473803897089e-05, "loss": 2.1771, "step": 7420500 }, { "epoch": 36.77, "learning_rate": 3.1623499452544805e-05, "loss": 2.193, "step": 7421000 }, { "epoch": 36.77, "learning_rate": 3.162226086611872e-05, "loss": 2.197, "step": 7421500 }, { "epoch": 36.77, "learning_rate": 3.162102227969264e-05, "loss": 2.179, "step": 7422000 }, { "epoch": 36.77, "learning_rate": 3.16197861704394e-05, "loss": 2.1923, "step": 7422500 }, { "epoch": 36.78, "learning_rate": 3.161854758401332e-05, "loss": 2.2144, "step": 7423000 }, { "epoch": 36.78, "learning_rate": 3.1617308997587234e-05, "loss": 2.1794, "step": 7423500 }, { "epoch": 36.78, "learning_rate": 3.161607041116115e-05, "loss": 2.1906, "step": 7424000 }, { "epoch": 36.78, "learning_rate": 3.161483182473507e-05, "loss": 2.177, "step": 7424500 }, { "epoch": 36.79, "learning_rate": 3.1613593238308985e-05, "loss": 2.1639, "step": 7425000 }, { "epoch": 36.79, "learning_rate": 3.16123546518829e-05, "loss": 2.1855, "step": 7425500 }, { "epoch": 36.79, "learning_rate": 3.161111606545682e-05, "loss": 2.1537, "step": 7426000 }, { "epoch": 36.79, "learning_rate": 3.160987747903073e-05, "loss": 2.1746, "step": 7426500 }, { "epoch": 36.8, "learning_rate": 3.1608638892604646e-05, "loss": 2.1806, "step": 7427000 }, { "epoch": 36.8, "learning_rate": 3.160740030617856e-05, "loss": 2.1547, "step": 7427500 }, { "epoch": 36.8, "learning_rate": 3.160616171975248e-05, "loss": 2.1845, "step": 7428000 }, { "epoch": 36.8, "learning_rate": 3.1604925610499256e-05, "loss": 2.1727, "step": 7428500 }, { "epoch": 36.81, "learning_rate": 3.160368702407317e-05, "loss": 2.1838, "step": 7429000 }, { "epoch": 36.81, "learning_rate": 3.160244843764709e-05, "loss": 2.1918, "step": 7429500 }, { "epoch": 36.81, "learning_rate": 3.1601209851221e-05, "loss": 2.2049, "step": 7430000 }, { "epoch": 36.81, "learning_rate": 3.1599971264794916e-05, "loss": 2.1743, "step": 7430500 }, { "epoch": 36.82, "learning_rate": 3.1598735155541685e-05, "loss": 2.1785, "step": 7431000 }, { "epoch": 36.82, "learning_rate": 3.15974965691156e-05, "loss": 2.1674, "step": 7431500 }, { "epoch": 36.82, "learning_rate": 3.159625798268952e-05, "loss": 2.1937, "step": 7432000 }, { "epoch": 36.82, "learning_rate": 3.1595019396263436e-05, "loss": 2.1767, "step": 7432500 }, { "epoch": 36.83, "learning_rate": 3.1593780809837346e-05, "loss": 2.1666, "step": 7433000 }, { "epoch": 36.83, "learning_rate": 3.159254470058412e-05, "loss": 2.1748, "step": 7433500 }, { "epoch": 36.83, "learning_rate": 3.159130611415804e-05, "loss": 2.2004, "step": 7434000 }, { "epoch": 36.83, "learning_rate": 3.1590067527731956e-05, "loss": 2.1619, "step": 7434500 }, { "epoch": 36.84, "learning_rate": 3.158883141847872e-05, "loss": 2.1845, "step": 7435000 }, { "epoch": 36.84, "learning_rate": 3.1587592832052635e-05, "loss": 2.1957, "step": 7435500 }, { "epoch": 36.84, "learning_rate": 3.1586356722799403e-05, "loss": 2.1968, "step": 7436000 }, { "epoch": 36.84, "learning_rate": 3.158511813637332e-05, "loss": 2.1949, "step": 7436500 }, { "epoch": 36.85, "learning_rate": 3.158387954994724e-05, "loss": 2.183, "step": 7437000 }, { "epoch": 36.85, "learning_rate": 3.1582640963521154e-05, "loss": 2.1959, "step": 7437500 }, { "epoch": 36.85, "learning_rate": 3.158140485426792e-05, "loss": 2.1838, "step": 7438000 }, { "epoch": 36.85, "learning_rate": 3.158016874501469e-05, "loss": 2.1668, "step": 7438500 }, { "epoch": 36.86, "learning_rate": 3.157893015858861e-05, "loss": 2.1742, "step": 7439000 }, { "epoch": 36.86, "learning_rate": 3.1577691572162526e-05, "loss": 2.2019, "step": 7439500 }, { "epoch": 36.86, "learning_rate": 3.1576452985736436e-05, "loss": 2.1639, "step": 7440000 }, { "epoch": 36.86, "learning_rate": 3.157521687648321e-05, "loss": 2.1704, "step": 7440500 }, { "epoch": 36.87, "learning_rate": 3.157397829005713e-05, "loss": 2.1903, "step": 7441000 }, { "epoch": 36.87, "learning_rate": 3.1572739703631045e-05, "loss": 2.2107, "step": 7441500 }, { "epoch": 36.87, "learning_rate": 3.157150111720496e-05, "loss": 2.1606, "step": 7442000 }, { "epoch": 36.87, "learning_rate": 3.157026253077888e-05, "loss": 2.1743, "step": 7442500 }, { "epoch": 36.88, "learning_rate": 3.156902394435279e-05, "loss": 2.1895, "step": 7443000 }, { "epoch": 36.88, "learning_rate": 3.1567785357926706e-05, "loss": 2.2057, "step": 7443500 }, { "epoch": 36.88, "learning_rate": 3.156654677150062e-05, "loss": 2.1841, "step": 7444000 }, { "epoch": 36.88, "learning_rate": 3.156530818507454e-05, "loss": 2.1785, "step": 7444500 }, { "epoch": 36.89, "learning_rate": 3.156406959864846e-05, "loss": 2.1737, "step": 7445000 }, { "epoch": 36.89, "learning_rate": 3.1562831012222374e-05, "loss": 2.1713, "step": 7445500 }, { "epoch": 36.89, "learning_rate": 3.156159242579629e-05, "loss": 2.17, "step": 7446000 }, { "epoch": 36.89, "learning_rate": 3.156035383937021e-05, "loss": 2.1862, "step": 7446500 }, { "epoch": 36.9, "learning_rate": 3.1559115252944125e-05, "loss": 2.1914, "step": 7447000 }, { "epoch": 36.9, "learning_rate": 3.1557876666518035e-05, "loss": 2.1914, "step": 7447500 }, { "epoch": 36.9, "learning_rate": 3.155663808009195e-05, "loss": 2.1828, "step": 7448000 }, { "epoch": 36.9, "learning_rate": 3.155539949366587e-05, "loss": 2.1949, "step": 7448500 }, { "epoch": 36.9, "learning_rate": 3.1554160907239786e-05, "loss": 2.1743, "step": 7449000 }, { "epoch": 36.91, "learning_rate": 3.1552927275159406e-05, "loss": 2.1951, "step": 7449500 }, { "epoch": 36.91, "learning_rate": 3.1551691165906175e-05, "loss": 2.2043, "step": 7450000 }, { "epoch": 36.91, "learning_rate": 3.155045257948009e-05, "loss": 2.172, "step": 7450500 }, { "epoch": 36.91, "learning_rate": 3.154921399305401e-05, "loss": 2.1704, "step": 7451000 }, { "epoch": 36.92, "learning_rate": 3.1547975406627926e-05, "loss": 2.1903, "step": 7451500 }, { "epoch": 36.92, "learning_rate": 3.154673682020184e-05, "loss": 2.1893, "step": 7452000 }, { "epoch": 36.92, "learning_rate": 3.154549823377575e-05, "loss": 2.1799, "step": 7452500 }, { "epoch": 36.92, "learning_rate": 3.154425964734967e-05, "loss": 2.1961, "step": 7453000 }, { "epoch": 36.93, "learning_rate": 3.154302106092359e-05, "loss": 2.1879, "step": 7453500 }, { "epoch": 36.93, "learning_rate": 3.1541782474497504e-05, "loss": 2.2118, "step": 7454000 }, { "epoch": 36.93, "learning_rate": 3.154054388807142e-05, "loss": 2.1753, "step": 7454500 }, { "epoch": 36.93, "learning_rate": 3.153930530164534e-05, "loss": 2.1622, "step": 7455000 }, { "epoch": 36.94, "learning_rate": 3.1538066715219255e-05, "loss": 2.1666, "step": 7455500 }, { "epoch": 36.94, "learning_rate": 3.153682812879317e-05, "loss": 2.18, "step": 7456000 }, { "epoch": 36.94, "learning_rate": 3.153558954236709e-05, "loss": 2.2071, "step": 7456500 }, { "epoch": 36.94, "learning_rate": 3.153435343311386e-05, "loss": 2.1606, "step": 7457000 }, { "epoch": 36.95, "learning_rate": 3.1533114846687774e-05, "loss": 2.183, "step": 7457500 }, { "epoch": 36.95, "learning_rate": 3.153187626026169e-05, "loss": 2.176, "step": 7458000 }, { "epoch": 36.95, "learning_rate": 3.153063767383561e-05, "loss": 2.1672, "step": 7458500 }, { "epoch": 36.95, "learning_rate": 3.1529399087409525e-05, "loss": 2.1663, "step": 7459000 }, { "epoch": 36.96, "learning_rate": 3.152816050098344e-05, "loss": 2.2065, "step": 7459500 }, { "epoch": 36.96, "learning_rate": 3.152692191455736e-05, "loss": 2.1735, "step": 7460000 }, { "epoch": 36.96, "learning_rate": 3.1525683328131276e-05, "loss": 2.1947, "step": 7460500 }, { "epoch": 36.96, "learning_rate": 3.152444721887804e-05, "loss": 2.206, "step": 7461000 }, { "epoch": 36.97, "learning_rate": 3.1523211109624807e-05, "loss": 2.1445, "step": 7461500 }, { "epoch": 36.97, "learning_rate": 3.1521975000371575e-05, "loss": 2.1961, "step": 7462000 }, { "epoch": 36.97, "learning_rate": 3.152073641394549e-05, "loss": 2.1584, "step": 7462500 }, { "epoch": 36.97, "learning_rate": 3.151949782751941e-05, "loss": 2.161, "step": 7463000 }, { "epoch": 36.98, "learning_rate": 3.151826171826618e-05, "loss": 2.1464, "step": 7463500 }, { "epoch": 36.98, "learning_rate": 3.1517023131840095e-05, "loss": 2.1763, "step": 7464000 }, { "epoch": 36.98, "learning_rate": 3.151578454541401e-05, "loss": 2.1944, "step": 7464500 }, { "epoch": 36.98, "learning_rate": 3.151454843616078e-05, "loss": 2.179, "step": 7465000 }, { "epoch": 36.99, "learning_rate": 3.15133098497347e-05, "loss": 2.204, "step": 7465500 }, { "epoch": 36.99, "learning_rate": 3.1512071263308615e-05, "loss": 2.179, "step": 7466000 }, { "epoch": 36.99, "learning_rate": 3.151083515405538e-05, "loss": 2.1788, "step": 7466500 }, { "epoch": 36.99, "learning_rate": 3.1509596567629294e-05, "loss": 2.2008, "step": 7467000 }, { "epoch": 37.0, "learning_rate": 3.150835798120321e-05, "loss": 2.168, "step": 7467500 }, { "epoch": 37.0, "learning_rate": 3.150711939477713e-05, "loss": 2.1852, "step": 7468000 }, { "epoch": 37.0, "eval_accuracy": 0.662816636573663, "eval_accuracy_mlm": 0.6197658435400042, "eval_accuracy_nsp": 0.8659980624335677, "eval_loss": 2.305152416229248, "eval_runtime": 145.847, "eval_samples_per_second": 1748.126, "eval_steps_per_second": 72.843, "step": 7468191 }, { "epoch": 37.0, "learning_rate": 3.1505880808351044e-05, "loss": 2.1681, "step": 7468500 }, { "epoch": 37.0, "learning_rate": 3.150464222192496e-05, "loss": 2.1523, "step": 7469000 }, { "epoch": 37.01, "learning_rate": 3.150340363549888e-05, "loss": 2.1706, "step": 7469500 }, { "epoch": 37.01, "learning_rate": 3.1502165049072795e-05, "loss": 2.1522, "step": 7470000 }, { "epoch": 37.01, "learning_rate": 3.150092646264671e-05, "loss": 2.1346, "step": 7470500 }, { "epoch": 37.01, "learning_rate": 3.149968787622063e-05, "loss": 2.1607, "step": 7471000 }, { "epoch": 37.02, "learning_rate": 3.1498449289794546e-05, "loss": 2.1727, "step": 7471500 }, { "epoch": 37.02, "learning_rate": 3.149721070336846e-05, "loss": 2.1578, "step": 7472000 }, { "epoch": 37.02, "learning_rate": 3.149597459411523e-05, "loss": 2.1535, "step": 7472500 }, { "epoch": 37.02, "learning_rate": 3.149473600768915e-05, "loss": 2.1529, "step": 7473000 }, { "epoch": 37.03, "learning_rate": 3.1493497421263066e-05, "loss": 2.1526, "step": 7473500 }, { "epoch": 37.03, "learning_rate": 3.149225883483698e-05, "loss": 2.1306, "step": 7474000 }, { "epoch": 37.03, "learning_rate": 3.1491022725583744e-05, "loss": 2.1446, "step": 7474500 }, { "epoch": 37.03, "learning_rate": 3.148978413915766e-05, "loss": 2.1611, "step": 7475000 }, { "epoch": 37.04, "learning_rate": 3.148854555273158e-05, "loss": 2.1508, "step": 7475500 }, { "epoch": 37.04, "learning_rate": 3.1487306966305495e-05, "loss": 2.1516, "step": 7476000 }, { "epoch": 37.04, "learning_rate": 3.148606837987941e-05, "loss": 2.1576, "step": 7476500 }, { "epoch": 37.04, "learning_rate": 3.148482979345333e-05, "loss": 2.1554, "step": 7477000 }, { "epoch": 37.05, "learning_rate": 3.1483591207027246e-05, "loss": 2.1549, "step": 7477500 }, { "epoch": 37.05, "learning_rate": 3.148235262060116e-05, "loss": 2.137, "step": 7478000 }, { "epoch": 37.05, "learning_rate": 3.148111403417508e-05, "loss": 2.1573, "step": 7478500 }, { "epoch": 37.05, "learning_rate": 3.1479875447749e-05, "loss": 2.1439, "step": 7479000 }, { "epoch": 37.06, "learning_rate": 3.1478636861322914e-05, "loss": 2.1432, "step": 7479500 }, { "epoch": 37.06, "learning_rate": 3.147739827489683e-05, "loss": 2.1633, "step": 7480000 }, { "epoch": 37.06, "learning_rate": 3.14761621656436e-05, "loss": 2.1335, "step": 7480500 }, { "epoch": 37.06, "learning_rate": 3.1474923579217516e-05, "loss": 2.1549, "step": 7481000 }, { "epoch": 37.07, "learning_rate": 3.147368499279143e-05, "loss": 2.1481, "step": 7481500 }, { "epoch": 37.07, "learning_rate": 3.1472446406365344e-05, "loss": 2.144, "step": 7482000 }, { "epoch": 37.07, "learning_rate": 3.147121029711211e-05, "loss": 2.1578, "step": 7482500 }, { "epoch": 37.07, "learning_rate": 3.146997418785888e-05, "loss": 2.1529, "step": 7483000 }, { "epoch": 37.08, "learning_rate": 3.14687356014328e-05, "loss": 2.1599, "step": 7483500 }, { "epoch": 37.08, "learning_rate": 3.1467497015006715e-05, "loss": 2.1403, "step": 7484000 }, { "epoch": 37.08, "learning_rate": 3.146625842858063e-05, "loss": 2.1674, "step": 7484500 }, { "epoch": 37.08, "learning_rate": 3.146501984215455e-05, "loss": 2.1574, "step": 7485000 }, { "epoch": 37.09, "learning_rate": 3.1463781255728466e-05, "loss": 2.1732, "step": 7485500 }, { "epoch": 37.09, "learning_rate": 3.146254266930238e-05, "loss": 2.1428, "step": 7486000 }, { "epoch": 37.09, "learning_rate": 3.14613040828763e-05, "loss": 2.1599, "step": 7486500 }, { "epoch": 37.09, "learning_rate": 3.1460065496450217e-05, "loss": 2.138, "step": 7487000 }, { "epoch": 37.1, "learning_rate": 3.1458826910024133e-05, "loss": 2.1664, "step": 7487500 }, { "epoch": 37.1, "learning_rate": 3.145758832359805e-05, "loss": 2.1581, "step": 7488000 }, { "epoch": 37.1, "learning_rate": 3.145635221434481e-05, "loss": 2.176, "step": 7488500 }, { "epoch": 37.1, "learning_rate": 3.145511362791873e-05, "loss": 2.1708, "step": 7489000 }, { "epoch": 37.11, "learning_rate": 3.1453875041492646e-05, "loss": 2.1769, "step": 7489500 }, { "epoch": 37.11, "learning_rate": 3.145263645506656e-05, "loss": 2.1438, "step": 7490000 }, { "epoch": 37.11, "learning_rate": 3.145139786864048e-05, "loss": 2.1697, "step": 7490500 }, { "epoch": 37.11, "learning_rate": 3.14501592822144e-05, "loss": 2.1765, "step": 7491000 }, { "epoch": 37.12, "learning_rate": 3.1448920695788314e-05, "loss": 2.1288, "step": 7491500 }, { "epoch": 37.12, "learning_rate": 3.144768210936223e-05, "loss": 2.143, "step": 7492000 }, { "epoch": 37.12, "learning_rate": 3.1446446000109e-05, "loss": 2.1439, "step": 7492500 }, { "epoch": 37.12, "learning_rate": 3.144520741368292e-05, "loss": 2.1486, "step": 7493000 }, { "epoch": 37.13, "learning_rate": 3.1443968827256834e-05, "loss": 2.1771, "step": 7493500 }, { "epoch": 37.13, "learning_rate": 3.144273024083075e-05, "loss": 2.1703, "step": 7494000 }, { "epoch": 37.13, "learning_rate": 3.144149165440467e-05, "loss": 2.1512, "step": 7494500 }, { "epoch": 37.13, "learning_rate": 3.1440253067978584e-05, "loss": 2.1605, "step": 7495000 }, { "epoch": 37.14, "learning_rate": 3.1439014481552495e-05, "loss": 2.1528, "step": 7495500 }, { "epoch": 37.14, "learning_rate": 3.143777589512641e-05, "loss": 2.1338, "step": 7496000 }, { "epoch": 37.14, "learning_rate": 3.143653978587318e-05, "loss": 2.1437, "step": 7496500 }, { "epoch": 37.14, "learning_rate": 3.14353011994471e-05, "loss": 2.1349, "step": 7497000 }, { "epoch": 37.15, "learning_rate": 3.1434062613021014e-05, "loss": 2.1594, "step": 7497500 }, { "epoch": 37.15, "learning_rate": 3.143282650376778e-05, "loss": 2.1769, "step": 7498000 }, { "epoch": 37.15, "learning_rate": 3.14315879173417e-05, "loss": 2.1253, "step": 7498500 }, { "epoch": 37.15, "learning_rate": 3.143034933091562e-05, "loss": 2.1595, "step": 7499000 }, { "epoch": 37.16, "learning_rate": 3.1429110744489534e-05, "loss": 2.1514, "step": 7499500 }, { "epoch": 37.16, "learning_rate": 3.142787215806345e-05, "loss": 2.1622, "step": 7500000 }, { "epoch": 37.16, "learning_rate": 3.142663357163737e-05, "loss": 2.1447, "step": 7500500 }, { "epoch": 37.16, "learning_rate": 3.1425394985211285e-05, "loss": 2.1597, "step": 7501000 }, { "epoch": 37.17, "learning_rate": 3.14241563987852e-05, "loss": 2.1814, "step": 7501500 }, { "epoch": 37.17, "learning_rate": 3.142291781235912e-05, "loss": 2.1547, "step": 7502000 }, { "epoch": 37.17, "learning_rate": 3.142168170310588e-05, "loss": 2.1666, "step": 7502500 }, { "epoch": 37.17, "learning_rate": 3.14204431166798e-05, "loss": 2.1768, "step": 7503000 }, { "epoch": 37.17, "learning_rate": 3.1419204530253714e-05, "loss": 2.1721, "step": 7503500 }, { "epoch": 37.18, "learning_rate": 3.141796842100048e-05, "loss": 2.1622, "step": 7504000 }, { "epoch": 37.18, "learning_rate": 3.14167298345744e-05, "loss": 2.1507, "step": 7504500 }, { "epoch": 37.18, "learning_rate": 3.141549372532117e-05, "loss": 2.152, "step": 7505000 }, { "epoch": 37.18, "learning_rate": 3.141425513889508e-05, "loss": 2.1878, "step": 7505500 }, { "epoch": 37.19, "learning_rate": 3.1413016552468996e-05, "loss": 2.1722, "step": 7506000 }, { "epoch": 37.19, "learning_rate": 3.141177796604291e-05, "loss": 2.1489, "step": 7506500 }, { "epoch": 37.19, "learning_rate": 3.141053937961683e-05, "loss": 2.1713, "step": 7507000 }, { "epoch": 37.19, "learning_rate": 3.140930079319075e-05, "loss": 2.1761, "step": 7507500 }, { "epoch": 37.2, "learning_rate": 3.1408062206764664e-05, "loss": 2.1787, "step": 7508000 }, { "epoch": 37.2, "learning_rate": 3.140682362033858e-05, "loss": 2.1452, "step": 7508500 }, { "epoch": 37.2, "learning_rate": 3.140558751108535e-05, "loss": 2.1495, "step": 7509000 }, { "epoch": 37.2, "learning_rate": 3.140435140183212e-05, "loss": 2.165, "step": 7509500 }, { "epoch": 37.21, "learning_rate": 3.1403112815406035e-05, "loss": 2.1504, "step": 7510000 }, { "epoch": 37.21, "learning_rate": 3.140187422897995e-05, "loss": 2.1651, "step": 7510500 }, { "epoch": 37.21, "learning_rate": 3.140063564255387e-05, "loss": 2.1685, "step": 7511000 }, { "epoch": 37.21, "learning_rate": 3.1399397056127786e-05, "loss": 2.199, "step": 7511500 }, { "epoch": 37.22, "learning_rate": 3.13981584697017e-05, "loss": 2.1633, "step": 7512000 }, { "epoch": 37.22, "learning_rate": 3.139691988327561e-05, "loss": 2.1725, "step": 7512500 }, { "epoch": 37.22, "learning_rate": 3.139568129684953e-05, "loss": 2.1644, "step": 7513000 }, { "epoch": 37.22, "learning_rate": 3.139444271042345e-05, "loss": 2.1411, "step": 7513500 }, { "epoch": 37.23, "learning_rate": 3.1393204123997364e-05, "loss": 2.1723, "step": 7514000 }, { "epoch": 37.23, "learning_rate": 3.139196553757128e-05, "loss": 2.1631, "step": 7514500 }, { "epoch": 37.23, "learning_rate": 3.139072942831805e-05, "loss": 2.1702, "step": 7515000 }, { "epoch": 37.23, "learning_rate": 3.1389490841891966e-05, "loss": 2.1552, "step": 7515500 }, { "epoch": 37.24, "learning_rate": 3.138825225546588e-05, "loss": 2.1477, "step": 7516000 }, { "epoch": 37.24, "learning_rate": 3.13870136690398e-05, "loss": 2.142, "step": 7516500 }, { "epoch": 37.24, "learning_rate": 3.138577508261372e-05, "loss": 2.1627, "step": 7517000 }, { "epoch": 37.24, "learning_rate": 3.1384536496187634e-05, "loss": 2.1808, "step": 7517500 }, { "epoch": 37.25, "learning_rate": 3.138329790976155e-05, "loss": 2.1587, "step": 7518000 }, { "epoch": 37.25, "learning_rate": 3.138205932333547e-05, "loss": 2.163, "step": 7518500 }, { "epoch": 37.25, "learning_rate": 3.1380820736909385e-05, "loss": 2.1469, "step": 7519000 }, { "epoch": 37.25, "learning_rate": 3.13795821504833e-05, "loss": 2.1555, "step": 7519500 }, { "epoch": 37.26, "learning_rate": 3.137834356405722e-05, "loss": 2.1745, "step": 7520000 }, { "epoch": 37.26, "learning_rate": 3.1377104977631136e-05, "loss": 2.1535, "step": 7520500 }, { "epoch": 37.26, "learning_rate": 3.1375866391205046e-05, "loss": 2.177, "step": 7521000 }, { "epoch": 37.26, "learning_rate": 3.137462780477896e-05, "loss": 2.1823, "step": 7521500 }, { "epoch": 37.27, "learning_rate": 3.137338921835288e-05, "loss": 2.1531, "step": 7522000 }, { "epoch": 37.27, "learning_rate": 3.1372150631926797e-05, "loss": 2.154, "step": 7522500 }, { "epoch": 37.27, "learning_rate": 3.1370912045500714e-05, "loss": 2.1678, "step": 7523000 }, { "epoch": 37.27, "learning_rate": 3.136967593624748e-05, "loss": 2.1881, "step": 7523500 }, { "epoch": 37.28, "learning_rate": 3.136843734982139e-05, "loss": 2.1854, "step": 7524000 }, { "epoch": 37.28, "learning_rate": 3.136719876339531e-05, "loss": 2.1702, "step": 7524500 }, { "epoch": 37.28, "learning_rate": 3.1365960176969226e-05, "loss": 2.1338, "step": 7525000 }, { "epoch": 37.28, "learning_rate": 3.1364724067716e-05, "loss": 2.178, "step": 7525500 }, { "epoch": 37.29, "learning_rate": 3.1363487958462764e-05, "loss": 2.1832, "step": 7526000 }, { "epoch": 37.29, "learning_rate": 3.136224937203668e-05, "loss": 2.1513, "step": 7526500 }, { "epoch": 37.29, "learning_rate": 3.13610107856106e-05, "loss": 2.1825, "step": 7527000 }, { "epoch": 37.29, "learning_rate": 3.1359772199184515e-05, "loss": 2.1724, "step": 7527500 }, { "epoch": 37.3, "learning_rate": 3.135853361275843e-05, "loss": 2.1605, "step": 7528000 }, { "epoch": 37.3, "learning_rate": 3.135729502633235e-05, "loss": 2.1593, "step": 7528500 }, { "epoch": 37.3, "learning_rate": 3.1356056439906266e-05, "loss": 2.1526, "step": 7529000 }, { "epoch": 37.3, "learning_rate": 3.135481785348018e-05, "loss": 2.1773, "step": 7529500 }, { "epoch": 37.31, "learning_rate": 3.13535792670541e-05, "loss": 2.1671, "step": 7530000 }, { "epoch": 37.31, "learning_rate": 3.135234563497372e-05, "loss": 2.1669, "step": 7530500 }, { "epoch": 37.31, "learning_rate": 3.135110704854764e-05, "loss": 2.1934, "step": 7531000 }, { "epoch": 37.31, "learning_rate": 3.1349868462121554e-05, "loss": 2.1723, "step": 7531500 }, { "epoch": 37.32, "learning_rate": 3.134862987569547e-05, "loss": 2.1526, "step": 7532000 }, { "epoch": 37.32, "learning_rate": 3.134739376644223e-05, "loss": 2.1498, "step": 7532500 }, { "epoch": 37.32, "learning_rate": 3.134615518001615e-05, "loss": 2.1688, "step": 7533000 }, { "epoch": 37.32, "learning_rate": 3.134491659359007e-05, "loss": 2.1782, "step": 7533500 }, { "epoch": 37.33, "learning_rate": 3.1343678007163984e-05, "loss": 2.1479, "step": 7534000 }, { "epoch": 37.33, "learning_rate": 3.13424394207379e-05, "loss": 2.1636, "step": 7534500 }, { "epoch": 37.33, "learning_rate": 3.134120083431182e-05, "loss": 2.1481, "step": 7535000 }, { "epoch": 37.33, "learning_rate": 3.1339962247885734e-05, "loss": 2.1379, "step": 7535500 }, { "epoch": 37.34, "learning_rate": 3.133872366145965e-05, "loss": 2.1667, "step": 7536000 }, { "epoch": 37.34, "learning_rate": 3.133748755220642e-05, "loss": 2.1851, "step": 7536500 }, { "epoch": 37.34, "learning_rate": 3.133624896578034e-05, "loss": 2.1634, "step": 7537000 }, { "epoch": 37.34, "learning_rate": 3.1335010379354254e-05, "loss": 2.1672, "step": 7537500 }, { "epoch": 37.35, "learning_rate": 3.1333774270101016e-05, "loss": 2.1684, "step": 7538000 }, { "epoch": 37.35, "learning_rate": 3.133253568367493e-05, "loss": 2.1728, "step": 7538500 }, { "epoch": 37.35, "learning_rate": 3.133129709724885e-05, "loss": 2.1735, "step": 7539000 }, { "epoch": 37.35, "learning_rate": 3.133005851082277e-05, "loss": 2.1688, "step": 7539500 }, { "epoch": 37.36, "learning_rate": 3.1328819924396684e-05, "loss": 2.1657, "step": 7540000 }, { "epoch": 37.36, "learning_rate": 3.13275813379706e-05, "loss": 2.1685, "step": 7540500 }, { "epoch": 37.36, "learning_rate": 3.132634275154452e-05, "loss": 2.168, "step": 7541000 }, { "epoch": 37.36, "learning_rate": 3.1325104165118435e-05, "loss": 2.1783, "step": 7541500 }, { "epoch": 37.37, "learning_rate": 3.132386557869235e-05, "loss": 2.1726, "step": 7542000 }, { "epoch": 37.37, "learning_rate": 3.132262946943912e-05, "loss": 2.1833, "step": 7542500 }, { "epoch": 37.37, "learning_rate": 3.132139088301304e-05, "loss": 2.1462, "step": 7543000 }, { "epoch": 37.37, "learning_rate": 3.13201547737598e-05, "loss": 2.1548, "step": 7543500 }, { "epoch": 37.38, "learning_rate": 3.1318916187333716e-05, "loss": 2.1871, "step": 7544000 }, { "epoch": 37.38, "learning_rate": 3.131767760090763e-05, "loss": 2.151, "step": 7544500 }, { "epoch": 37.38, "learning_rate": 3.131643901448155e-05, "loss": 2.1504, "step": 7545000 }, { "epoch": 37.38, "learning_rate": 3.131520042805547e-05, "loss": 2.1335, "step": 7545500 }, { "epoch": 37.39, "learning_rate": 3.1313961841629384e-05, "loss": 2.1584, "step": 7546000 }, { "epoch": 37.39, "learning_rate": 3.13127232552033e-05, "loss": 2.1635, "step": 7546500 }, { "epoch": 37.39, "learning_rate": 3.131148714595007e-05, "loss": 2.1373, "step": 7547000 }, { "epoch": 37.39, "learning_rate": 3.1310248559523987e-05, "loss": 2.1691, "step": 7547500 }, { "epoch": 37.4, "learning_rate": 3.1309009973097904e-05, "loss": 2.1757, "step": 7548000 }, { "epoch": 37.4, "learning_rate": 3.130777386384467e-05, "loss": 2.1534, "step": 7548500 }, { "epoch": 37.4, "learning_rate": 3.130653527741859e-05, "loss": 2.1707, "step": 7549000 }, { "epoch": 37.4, "learning_rate": 3.1305296690992506e-05, "loss": 2.1685, "step": 7549500 }, { "epoch": 37.41, "learning_rate": 3.1304058104566416e-05, "loss": 2.1556, "step": 7550000 }, { "epoch": 37.41, "learning_rate": 3.130281951814033e-05, "loss": 2.1833, "step": 7550500 }, { "epoch": 37.41, "learning_rate": 3.130158093171425e-05, "loss": 2.1858, "step": 7551000 }, { "epoch": 37.41, "learning_rate": 3.130034234528817e-05, "loss": 2.1862, "step": 7551500 }, { "epoch": 37.42, "learning_rate": 3.129910623603494e-05, "loss": 2.1598, "step": 7552000 }, { "epoch": 37.42, "learning_rate": 3.129786764960886e-05, "loss": 2.1699, "step": 7552500 }, { "epoch": 37.42, "learning_rate": 3.129663154035563e-05, "loss": 2.1736, "step": 7553000 }, { "epoch": 37.42, "learning_rate": 3.1295392953929545e-05, "loss": 2.162, "step": 7553500 }, { "epoch": 37.43, "learning_rate": 3.1294154367503456e-05, "loss": 2.1583, "step": 7554000 }, { "epoch": 37.43, "learning_rate": 3.129291578107737e-05, "loss": 2.1947, "step": 7554500 }, { "epoch": 37.43, "learning_rate": 3.129167719465129e-05, "loss": 2.1973, "step": 7555000 }, { "epoch": 37.43, "learning_rate": 3.129044108539806e-05, "loss": 2.1579, "step": 7555500 }, { "epoch": 37.44, "learning_rate": 3.1289202498971975e-05, "loss": 2.1586, "step": 7556000 }, { "epoch": 37.44, "learning_rate": 3.128796391254589e-05, "loss": 2.1745, "step": 7556500 }, { "epoch": 37.44, "learning_rate": 3.128672532611981e-05, "loss": 2.1567, "step": 7557000 }, { "epoch": 37.44, "learning_rate": 3.1285486739693726e-05, "loss": 2.1785, "step": 7557500 }, { "epoch": 37.44, "learning_rate": 3.1284250630440495e-05, "loss": 2.1605, "step": 7558000 }, { "epoch": 37.45, "learning_rate": 3.128301204401441e-05, "loss": 2.17, "step": 7558500 }, { "epoch": 37.45, "learning_rate": 3.128177345758833e-05, "loss": 2.1761, "step": 7559000 }, { "epoch": 37.45, "learning_rate": 3.1280534871162246e-05, "loss": 2.1689, "step": 7559500 }, { "epoch": 37.45, "learning_rate": 3.127929628473616e-05, "loss": 2.1641, "step": 7560000 }, { "epoch": 37.46, "learning_rate": 3.1278060175482924e-05, "loss": 2.1337, "step": 7560500 }, { "epoch": 37.46, "learning_rate": 3.127682158905684e-05, "loss": 2.1564, "step": 7561000 }, { "epoch": 37.46, "learning_rate": 3.127558300263076e-05, "loss": 2.1722, "step": 7561500 }, { "epoch": 37.46, "learning_rate": 3.1274344416204675e-05, "loss": 2.1591, "step": 7562000 }, { "epoch": 37.47, "learning_rate": 3.127310582977859e-05, "loss": 2.1634, "step": 7562500 }, { "epoch": 37.47, "learning_rate": 3.127186724335251e-05, "loss": 2.1877, "step": 7563000 }, { "epoch": 37.47, "learning_rate": 3.1270628656926426e-05, "loss": 2.159, "step": 7563500 }, { "epoch": 37.47, "learning_rate": 3.126939007050034e-05, "loss": 2.1643, "step": 7564000 }, { "epoch": 37.48, "learning_rate": 3.126815148407426e-05, "loss": 2.1624, "step": 7564500 }, { "epoch": 37.48, "learning_rate": 3.126691289764818e-05, "loss": 2.1848, "step": 7565000 }, { "epoch": 37.48, "learning_rate": 3.126567431122209e-05, "loss": 2.1678, "step": 7565500 }, { "epoch": 37.48, "learning_rate": 3.1264435724796004e-05, "loss": 2.1883, "step": 7566000 }, { "epoch": 37.49, "learning_rate": 3.126319713836992e-05, "loss": 2.174, "step": 7566500 }, { "epoch": 37.49, "learning_rate": 3.126195855194384e-05, "loss": 2.1423, "step": 7567000 }, { "epoch": 37.49, "learning_rate": 3.1260719965517755e-05, "loss": 2.1437, "step": 7567500 }, { "epoch": 37.49, "learning_rate": 3.1259483856264523e-05, "loss": 2.1632, "step": 7568000 }, { "epoch": 37.5, "learning_rate": 3.125824526983844e-05, "loss": 2.1706, "step": 7568500 }, { "epoch": 37.5, "learning_rate": 3.125700668341236e-05, "loss": 2.139, "step": 7569000 }, { "epoch": 37.5, "learning_rate": 3.1255770574159126e-05, "loss": 2.1831, "step": 7569500 }, { "epoch": 37.5, "learning_rate": 3.125453198773304e-05, "loss": 2.1732, "step": 7570000 }, { "epoch": 37.51, "learning_rate": 3.125329340130696e-05, "loss": 2.1728, "step": 7570500 }, { "epoch": 37.51, "learning_rate": 3.125205481488088e-05, "loss": 2.1557, "step": 7571000 }, { "epoch": 37.51, "learning_rate": 3.125081622845479e-05, "loss": 2.1746, "step": 7571500 }, { "epoch": 37.51, "learning_rate": 3.1249577642028704e-05, "loss": 2.1592, "step": 7572000 }, { "epoch": 37.52, "learning_rate": 3.124833905560262e-05, "loss": 2.1405, "step": 7572500 }, { "epoch": 37.52, "learning_rate": 3.124710542352224e-05, "loss": 2.161, "step": 7573000 }, { "epoch": 37.52, "learning_rate": 3.124586683709616e-05, "loss": 2.1546, "step": 7573500 }, { "epoch": 37.52, "learning_rate": 3.1244628250670076e-05, "loss": 2.161, "step": 7574000 }, { "epoch": 37.53, "learning_rate": 3.124338966424399e-05, "loss": 2.1624, "step": 7574500 }, { "epoch": 37.53, "learning_rate": 3.124215603216361e-05, "loss": 2.1927, "step": 7575000 }, { "epoch": 37.53, "learning_rate": 3.124091744573753e-05, "loss": 2.1766, "step": 7575500 }, { "epoch": 37.53, "learning_rate": 3.123967885931145e-05, "loss": 2.1885, "step": 7576000 }, { "epoch": 37.54, "learning_rate": 3.1238440272885364e-05, "loss": 2.1728, "step": 7576500 }, { "epoch": 37.54, "learning_rate": 3.123720168645928e-05, "loss": 2.1817, "step": 7577000 }, { "epoch": 37.54, "learning_rate": 3.123596310003319e-05, "loss": 2.2075, "step": 7577500 }, { "epoch": 37.54, "learning_rate": 3.123472451360711e-05, "loss": 2.1638, "step": 7578000 }, { "epoch": 37.55, "learning_rate": 3.1233485927181025e-05, "loss": 2.1478, "step": 7578500 }, { "epoch": 37.55, "learning_rate": 3.123224734075494e-05, "loss": 2.1594, "step": 7579000 }, { "epoch": 37.55, "learning_rate": 3.123100875432886e-05, "loss": 2.1663, "step": 7579500 }, { "epoch": 37.55, "learning_rate": 3.1229770167902776e-05, "loss": 2.1883, "step": 7580000 }, { "epoch": 37.56, "learning_rate": 3.122853158147669e-05, "loss": 2.1795, "step": 7580500 }, { "epoch": 37.56, "learning_rate": 3.122729299505061e-05, "loss": 2.1715, "step": 7581000 }, { "epoch": 37.56, "learning_rate": 3.1226054408624526e-05, "loss": 2.1624, "step": 7581500 }, { "epoch": 37.56, "learning_rate": 3.122481582219844e-05, "loss": 2.1937, "step": 7582000 }, { "epoch": 37.57, "learning_rate": 3.122357723577236e-05, "loss": 2.1976, "step": 7582500 }, { "epoch": 37.57, "learning_rate": 3.122233864934628e-05, "loss": 2.1896, "step": 7583000 }, { "epoch": 37.57, "learning_rate": 3.1221100062920194e-05, "loss": 2.1723, "step": 7583500 }, { "epoch": 37.57, "learning_rate": 3.1219861476494104e-05, "loss": 2.171, "step": 7584000 }, { "epoch": 37.58, "learning_rate": 3.121862289006802e-05, "loss": 2.1689, "step": 7584500 }, { "epoch": 37.58, "learning_rate": 3.121738430364194e-05, "loss": 2.1642, "step": 7585000 }, { "epoch": 37.58, "learning_rate": 3.1216148194388714e-05, "loss": 2.1587, "step": 7585500 }, { "epoch": 37.58, "learning_rate": 3.121490960796263e-05, "loss": 2.1621, "step": 7586000 }, { "epoch": 37.59, "learning_rate": 3.121367102153655e-05, "loss": 2.2021, "step": 7586500 }, { "epoch": 37.59, "learning_rate": 3.1212432435110465e-05, "loss": 2.1851, "step": 7587000 }, { "epoch": 37.59, "learning_rate": 3.1211193848684375e-05, "loss": 2.1986, "step": 7587500 }, { "epoch": 37.59, "learning_rate": 3.120995526225829e-05, "loss": 2.1908, "step": 7588000 }, { "epoch": 37.6, "learning_rate": 3.120871667583221e-05, "loss": 2.1592, "step": 7588500 }, { "epoch": 37.6, "learning_rate": 3.1207478089406125e-05, "loss": 2.1781, "step": 7589000 }, { "epoch": 37.6, "learning_rate": 3.1206241980152894e-05, "loss": 2.1659, "step": 7589500 }, { "epoch": 37.6, "learning_rate": 3.120500339372681e-05, "loss": 2.1833, "step": 7590000 }, { "epoch": 37.61, "learning_rate": 3.120376480730072e-05, "loss": 2.1932, "step": 7590500 }, { "epoch": 37.61, "learning_rate": 3.12025286980475e-05, "loss": 2.1681, "step": 7591000 }, { "epoch": 37.61, "learning_rate": 3.1201290111621414e-05, "loss": 2.1914, "step": 7591500 }, { "epoch": 37.61, "learning_rate": 3.120005152519533e-05, "loss": 2.1588, "step": 7592000 }, { "epoch": 37.62, "learning_rate": 3.119881541594209e-05, "loss": 2.1643, "step": 7592500 }, { "epoch": 37.62, "learning_rate": 3.119757682951601e-05, "loss": 2.1474, "step": 7593000 }, { "epoch": 37.62, "learning_rate": 3.119633824308993e-05, "loss": 2.1326, "step": 7593500 }, { "epoch": 37.62, "learning_rate": 3.1195099656663844e-05, "loss": 2.1581, "step": 7594000 }, { "epoch": 37.63, "learning_rate": 3.119386107023776e-05, "loss": 2.1565, "step": 7594500 }, { "epoch": 37.63, "learning_rate": 3.119262496098453e-05, "loss": 2.158, "step": 7595000 }, { "epoch": 37.63, "learning_rate": 3.1191386374558446e-05, "loss": 2.1375, "step": 7595500 }, { "epoch": 37.63, "learning_rate": 3.119014778813236e-05, "loss": 2.1917, "step": 7596000 }, { "epoch": 37.64, "learning_rate": 3.118890920170628e-05, "loss": 2.1699, "step": 7596500 }, { "epoch": 37.64, "learning_rate": 3.11876706152802e-05, "loss": 2.178, "step": 7597000 }, { "epoch": 37.64, "learning_rate": 3.1186434506026966e-05, "loss": 2.1686, "step": 7597500 }, { "epoch": 37.64, "learning_rate": 3.1185195919600876e-05, "loss": 2.1555, "step": 7598000 }, { "epoch": 37.65, "learning_rate": 3.118395733317479e-05, "loss": 2.1557, "step": 7598500 }, { "epoch": 37.65, "learning_rate": 3.118271874674871e-05, "loss": 2.175, "step": 7599000 }, { "epoch": 37.65, "learning_rate": 3.118148016032263e-05, "loss": 2.1777, "step": 7599500 }, { "epoch": 37.65, "learning_rate": 3.1180244051069396e-05, "loss": 2.1642, "step": 7600000 }, { "epoch": 37.66, "learning_rate": 3.117900546464331e-05, "loss": 2.1832, "step": 7600500 }, { "epoch": 37.66, "learning_rate": 3.117776687821723e-05, "loss": 2.1524, "step": 7601000 }, { "epoch": 37.66, "learning_rate": 3.1176528291791146e-05, "loss": 2.175, "step": 7601500 }, { "epoch": 37.66, "learning_rate": 3.117528970536506e-05, "loss": 2.158, "step": 7602000 }, { "epoch": 37.67, "learning_rate": 3.117405111893898e-05, "loss": 2.1643, "step": 7602500 }, { "epoch": 37.67, "learning_rate": 3.117281500968575e-05, "loss": 2.198, "step": 7603000 }, { "epoch": 37.67, "learning_rate": 3.1171576423259666e-05, "loss": 2.1969, "step": 7603500 }, { "epoch": 37.67, "learning_rate": 3.117033783683358e-05, "loss": 2.1576, "step": 7604000 }, { "epoch": 37.68, "learning_rate": 3.116909925040749e-05, "loss": 2.1697, "step": 7604500 }, { "epoch": 37.68, "learning_rate": 3.116786066398141e-05, "loss": 2.1615, "step": 7605000 }, { "epoch": 37.68, "learning_rate": 3.116662207755533e-05, "loss": 2.1891, "step": 7605500 }, { "epoch": 37.68, "learning_rate": 3.1165383491129244e-05, "loss": 2.1839, "step": 7606000 }, { "epoch": 37.69, "learning_rate": 3.116414738187601e-05, "loss": 2.1447, "step": 7606500 }, { "epoch": 37.69, "learning_rate": 3.116290879544993e-05, "loss": 2.178, "step": 7607000 }, { "epoch": 37.69, "learning_rate": 3.1161670209023847e-05, "loss": 2.1625, "step": 7607500 }, { "epoch": 37.69, "learning_rate": 3.1160431622597763e-05, "loss": 2.1604, "step": 7608000 }, { "epoch": 37.7, "learning_rate": 3.115919303617168e-05, "loss": 2.1825, "step": 7608500 }, { "epoch": 37.7, "learning_rate": 3.11579544497456e-05, "loss": 2.1545, "step": 7609000 }, { "epoch": 37.7, "learning_rate": 3.1156718340492366e-05, "loss": 2.1914, "step": 7609500 }, { "epoch": 37.7, "learning_rate": 3.115548223123913e-05, "loss": 2.1585, "step": 7610000 }, { "epoch": 37.71, "learning_rate": 3.1154243644813045e-05, "loss": 2.1652, "step": 7610500 }, { "epoch": 37.71, "learning_rate": 3.115300505838696e-05, "loss": 2.1715, "step": 7611000 }, { "epoch": 37.71, "learning_rate": 3.115176647196088e-05, "loss": 2.1806, "step": 7611500 }, { "epoch": 37.71, "learning_rate": 3.1150527885534796e-05, "loss": 2.1622, "step": 7612000 }, { "epoch": 37.71, "learning_rate": 3.114928929910871e-05, "loss": 2.1724, "step": 7612500 }, { "epoch": 37.72, "learning_rate": 3.114805071268263e-05, "loss": 2.1598, "step": 7613000 }, { "epoch": 37.72, "learning_rate": 3.114681212625655e-05, "loss": 2.1558, "step": 7613500 }, { "epoch": 37.72, "learning_rate": 3.1145573539830464e-05, "loss": 2.1799, "step": 7614000 }, { "epoch": 37.72, "learning_rate": 3.114433495340438e-05, "loss": 2.1604, "step": 7614500 }, { "epoch": 37.73, "learning_rate": 3.11430963669783e-05, "loss": 2.1678, "step": 7615000 }, { "epoch": 37.73, "learning_rate": 3.1141857780552214e-05, "loss": 2.1516, "step": 7615500 }, { "epoch": 37.73, "learning_rate": 3.114062167129898e-05, "loss": 2.1665, "step": 7616000 }, { "epoch": 37.73, "learning_rate": 3.11393830848729e-05, "loss": 2.1556, "step": 7616500 }, { "epoch": 37.74, "learning_rate": 3.113814449844682e-05, "loss": 2.1651, "step": 7617000 }, { "epoch": 37.74, "learning_rate": 3.1136905912020734e-05, "loss": 2.1926, "step": 7617500 }, { "epoch": 37.74, "learning_rate": 3.1135669802767496e-05, "loss": 2.1944, "step": 7618000 }, { "epoch": 37.74, "learning_rate": 3.113443121634141e-05, "loss": 2.1642, "step": 7618500 }, { "epoch": 37.75, "learning_rate": 3.113319262991533e-05, "loss": 2.17, "step": 7619000 }, { "epoch": 37.75, "learning_rate": 3.113195404348925e-05, "loss": 2.168, "step": 7619500 }, { "epoch": 37.75, "learning_rate": 3.1130715457063164e-05, "loss": 2.1791, "step": 7620000 }, { "epoch": 37.75, "learning_rate": 3.112947687063708e-05, "loss": 2.1555, "step": 7620500 }, { "epoch": 37.76, "learning_rate": 3.1128238284211e-05, "loss": 2.1896, "step": 7621000 }, { "epoch": 37.76, "learning_rate": 3.1126999697784914e-05, "loss": 2.184, "step": 7621500 }, { "epoch": 37.76, "learning_rate": 3.112576111135883e-05, "loss": 2.1875, "step": 7622000 }, { "epoch": 37.76, "learning_rate": 3.112452252493275e-05, "loss": 2.1642, "step": 7622500 }, { "epoch": 37.77, "learning_rate": 3.1123283938506665e-05, "loss": 2.1688, "step": 7623000 }, { "epoch": 37.77, "learning_rate": 3.1122047829253434e-05, "loss": 2.1603, "step": 7623500 }, { "epoch": 37.77, "learning_rate": 3.112080924282735e-05, "loss": 2.1725, "step": 7624000 }, { "epoch": 37.77, "learning_rate": 3.111957065640127e-05, "loss": 2.1852, "step": 7624500 }, { "epoch": 37.78, "learning_rate": 3.111833206997518e-05, "loss": 2.1877, "step": 7625000 }, { "epoch": 37.78, "learning_rate": 3.111709596072195e-05, "loss": 2.1658, "step": 7625500 }, { "epoch": 37.78, "learning_rate": 3.1115857374295864e-05, "loss": 2.191, "step": 7626000 }, { "epoch": 37.78, "learning_rate": 3.111461878786978e-05, "loss": 2.1775, "step": 7626500 }, { "epoch": 37.79, "learning_rate": 3.11133802014437e-05, "loss": 2.1898, "step": 7627000 }, { "epoch": 37.79, "learning_rate": 3.1112141615017615e-05, "loss": 2.1753, "step": 7627500 }, { "epoch": 37.79, "learning_rate": 3.111090302859153e-05, "loss": 2.1766, "step": 7628000 }, { "epoch": 37.79, "learning_rate": 3.110966444216545e-05, "loss": 2.153, "step": 7628500 }, { "epoch": 37.8, "learning_rate": 3.110842833291222e-05, "loss": 2.1655, "step": 7629000 }, { "epoch": 37.8, "learning_rate": 3.110719222365898e-05, "loss": 2.1837, "step": 7629500 }, { "epoch": 37.8, "learning_rate": 3.1105956114405755e-05, "loss": 2.1567, "step": 7630000 }, { "epoch": 37.8, "learning_rate": 3.110471752797967e-05, "loss": 2.1809, "step": 7630500 }, { "epoch": 37.81, "learning_rate": 3.110347894155359e-05, "loss": 2.1559, "step": 7631000 }, { "epoch": 37.81, "learning_rate": 3.1102240355127506e-05, "loss": 2.1647, "step": 7631500 }, { "epoch": 37.81, "learning_rate": 3.1101001768701416e-05, "loss": 2.1759, "step": 7632000 }, { "epoch": 37.81, "learning_rate": 3.109976318227533e-05, "loss": 2.1911, "step": 7632500 }, { "epoch": 37.82, "learning_rate": 3.109852459584925e-05, "loss": 2.1902, "step": 7633000 }, { "epoch": 37.82, "learning_rate": 3.1097286009423167e-05, "loss": 2.1445, "step": 7633500 }, { "epoch": 37.82, "learning_rate": 3.1096047422997084e-05, "loss": 2.1595, "step": 7634000 }, { "epoch": 37.82, "learning_rate": 3.1094808836571e-05, "loss": 2.1681, "step": 7634500 }, { "epoch": 37.83, "learning_rate": 3.109357520449062e-05, "loss": 2.1935, "step": 7635000 }, { "epoch": 37.83, "learning_rate": 3.109233661806454e-05, "loss": 2.1732, "step": 7635500 }, { "epoch": 37.83, "learning_rate": 3.1091098031638455e-05, "loss": 2.1912, "step": 7636000 }, { "epoch": 37.83, "learning_rate": 3.108985944521237e-05, "loss": 2.165, "step": 7636500 }, { "epoch": 37.84, "learning_rate": 3.108862085878629e-05, "loss": 2.1409, "step": 7637000 }, { "epoch": 37.84, "learning_rate": 3.1087382272360206e-05, "loss": 2.1897, "step": 7637500 }, { "epoch": 37.84, "learning_rate": 3.1086143685934116e-05, "loss": 2.1517, "step": 7638000 }, { "epoch": 37.84, "learning_rate": 3.108490509950803e-05, "loss": 2.1736, "step": 7638500 }, { "epoch": 37.85, "learning_rate": 3.108366651308195e-05, "loss": 2.1654, "step": 7639000 }, { "epoch": 37.85, "learning_rate": 3.108242792665587e-05, "loss": 2.1516, "step": 7639500 }, { "epoch": 37.85, "learning_rate": 3.1081189340229784e-05, "loss": 2.1426, "step": 7640000 }, { "epoch": 37.85, "learning_rate": 3.107995323097655e-05, "loss": 2.2071, "step": 7640500 }, { "epoch": 37.86, "learning_rate": 3.107871464455047e-05, "loss": 2.1758, "step": 7641000 }, { "epoch": 37.86, "learning_rate": 3.107747605812438e-05, "loss": 2.1858, "step": 7641500 }, { "epoch": 37.86, "learning_rate": 3.1076237471698296e-05, "loss": 2.1854, "step": 7642000 }, { "epoch": 37.86, "learning_rate": 3.1074998885272213e-05, "loss": 2.1974, "step": 7642500 }, { "epoch": 37.87, "learning_rate": 3.107376029884613e-05, "loss": 2.1805, "step": 7643000 }, { "epoch": 37.87, "learning_rate": 3.107252171242005e-05, "loss": 2.1728, "step": 7643500 }, { "epoch": 37.87, "learning_rate": 3.1071283125993964e-05, "loss": 2.1834, "step": 7644000 }, { "epoch": 37.87, "learning_rate": 3.107004453956788e-05, "loss": 2.1629, "step": 7644500 }, { "epoch": 37.88, "learning_rate": 3.106880843031465e-05, "loss": 2.179, "step": 7645000 }, { "epoch": 37.88, "learning_rate": 3.106756984388857e-05, "loss": 2.1791, "step": 7645500 }, { "epoch": 37.88, "learning_rate": 3.1066331257462484e-05, "loss": 2.1528, "step": 7646000 }, { "epoch": 37.88, "learning_rate": 3.10650926710364e-05, "loss": 2.1654, "step": 7646500 }, { "epoch": 37.89, "learning_rate": 3.106385408461032e-05, "loss": 2.1706, "step": 7647000 }, { "epoch": 37.89, "learning_rate": 3.1062615498184235e-05, "loss": 2.1603, "step": 7647500 }, { "epoch": 37.89, "learning_rate": 3.106137691175815e-05, "loss": 2.1772, "step": 7648000 }, { "epoch": 37.89, "learning_rate": 3.106013832533207e-05, "loss": 2.1699, "step": 7648500 }, { "epoch": 37.9, "learning_rate": 3.1058899738905985e-05, "loss": 2.187, "step": 7649000 }, { "epoch": 37.9, "learning_rate": 3.105766362965275e-05, "loss": 2.1842, "step": 7649500 }, { "epoch": 37.9, "learning_rate": 3.105642752039952e-05, "loss": 2.186, "step": 7650000 }, { "epoch": 37.9, "learning_rate": 3.105518893397343e-05, "loss": 2.2046, "step": 7650500 }, { "epoch": 37.91, "learning_rate": 3.105395034754735e-05, "loss": 2.1892, "step": 7651000 }, { "epoch": 37.91, "learning_rate": 3.105271176112127e-05, "loss": 2.1576, "step": 7651500 }, { "epoch": 37.91, "learning_rate": 3.1051473174695184e-05, "loss": 2.1767, "step": 7652000 }, { "epoch": 37.91, "learning_rate": 3.10502345882691e-05, "loss": 2.1738, "step": 7652500 }, { "epoch": 37.92, "learning_rate": 3.104899600184302e-05, "loss": 2.1886, "step": 7653000 }, { "epoch": 37.92, "learning_rate": 3.1047757415416935e-05, "loss": 2.1829, "step": 7653500 }, { "epoch": 37.92, "learning_rate": 3.104651882899085e-05, "loss": 2.145, "step": 7654000 }, { "epoch": 37.92, "learning_rate": 3.104528024256477e-05, "loss": 2.1616, "step": 7654500 }, { "epoch": 37.93, "learning_rate": 3.1044041656138685e-05, "loss": 2.2104, "step": 7655000 }, { "epoch": 37.93, "learning_rate": 3.10428030697126e-05, "loss": 2.167, "step": 7655500 }, { "epoch": 37.93, "learning_rate": 3.1041566960459364e-05, "loss": 2.1776, "step": 7656000 }, { "epoch": 37.93, "learning_rate": 3.104032837403328e-05, "loss": 2.1884, "step": 7656500 }, { "epoch": 37.94, "learning_rate": 3.103909226478005e-05, "loss": 2.1785, "step": 7657000 }, { "epoch": 37.94, "learning_rate": 3.103785367835397e-05, "loss": 2.1838, "step": 7657500 }, { "epoch": 37.94, "learning_rate": 3.1036615091927884e-05, "loss": 2.1869, "step": 7658000 }, { "epoch": 37.94, "learning_rate": 3.10353765055018e-05, "loss": 2.159, "step": 7658500 }, { "epoch": 37.95, "learning_rate": 3.103413791907572e-05, "loss": 2.1483, "step": 7659000 }, { "epoch": 37.95, "learning_rate": 3.1032899332649635e-05, "loss": 2.1951, "step": 7659500 }, { "epoch": 37.95, "learning_rate": 3.103166074622355e-05, "loss": 2.1653, "step": 7660000 }, { "epoch": 37.95, "learning_rate": 3.103042215979747e-05, "loss": 2.163, "step": 7660500 }, { "epoch": 37.96, "learning_rate": 3.1029183573371386e-05, "loss": 2.1676, "step": 7661000 }, { "epoch": 37.96, "learning_rate": 3.10279449869453e-05, "loss": 2.1827, "step": 7661500 }, { "epoch": 37.96, "learning_rate": 3.1026708877692065e-05, "loss": 2.1703, "step": 7662000 }, { "epoch": 37.96, "learning_rate": 3.102547276843884e-05, "loss": 2.2001, "step": 7662500 }, { "epoch": 37.97, "learning_rate": 3.102423418201275e-05, "loss": 2.1585, "step": 7663000 }, { "epoch": 37.97, "learning_rate": 3.102299559558667e-05, "loss": 2.1608, "step": 7663500 }, { "epoch": 37.97, "learning_rate": 3.1021757009160584e-05, "loss": 2.182, "step": 7664000 }, { "epoch": 37.97, "learning_rate": 3.10205184227345e-05, "loss": 2.1963, "step": 7664500 }, { "epoch": 37.98, "learning_rate": 3.101927983630842e-05, "loss": 2.1576, "step": 7665000 }, { "epoch": 37.98, "learning_rate": 3.1018041249882335e-05, "loss": 2.1685, "step": 7665500 }, { "epoch": 37.98, "learning_rate": 3.101680266345625e-05, "loss": 2.1808, "step": 7666000 }, { "epoch": 37.98, "learning_rate": 3.101556407703017e-05, "loss": 2.1778, "step": 7666500 }, { "epoch": 37.98, "learning_rate": 3.101432796777694e-05, "loss": 2.1627, "step": 7667000 }, { "epoch": 37.99, "learning_rate": 3.1013089381350855e-05, "loss": 2.1722, "step": 7667500 }, { "epoch": 37.99, "learning_rate": 3.101185079492477e-05, "loss": 2.1858, "step": 7668000 }, { "epoch": 37.99, "learning_rate": 3.101061220849868e-05, "loss": 2.2135, "step": 7668500 }, { "epoch": 37.99, "learning_rate": 3.100937609924546e-05, "loss": 2.1998, "step": 7669000 }, { "epoch": 38.0, "learning_rate": 3.100813751281937e-05, "loss": 2.1548, "step": 7669500 }, { "epoch": 38.0, "learning_rate": 3.1006898926393284e-05, "loss": 2.1506, "step": 7670000 }, { "epoch": 38.0, "eval_accuracy": 0.6640129221845454, "eval_accuracy_mlm": 0.6211758980997927, "eval_accuracy_nsp": 0.8663432159680576, "eval_loss": 2.2940046787261963, "eval_runtime": 146.024, "eval_samples_per_second": 1746.007, "eval_steps_per_second": 72.755, "step": 7670034 }, { "epoch": 38.0, "learning_rate": 3.10056603399672e-05, "loss": 2.1341, "step": 7670500 }, { "epoch": 38.0, "learning_rate": 3.100442175354112e-05, "loss": 2.1179, "step": 7671000 }, { "epoch": 38.01, "learning_rate": 3.1003183167115035e-05, "loss": 2.1475, "step": 7671500 }, { "epoch": 38.01, "learning_rate": 3.100194458068895e-05, "loss": 2.1367, "step": 7672000 }, { "epoch": 38.01, "learning_rate": 3.100070599426287e-05, "loss": 2.152, "step": 7672500 }, { "epoch": 38.01, "learning_rate": 3.0999467407836786e-05, "loss": 2.1322, "step": 7673000 }, { "epoch": 38.02, "learning_rate": 3.09982288214107e-05, "loss": 2.1336, "step": 7673500 }, { "epoch": 38.02, "learning_rate": 3.099699023498462e-05, "loss": 2.1332, "step": 7674000 }, { "epoch": 38.02, "learning_rate": 3.099575164855854e-05, "loss": 2.1682, "step": 7674500 }, { "epoch": 38.02, "learning_rate": 3.0994515539305305e-05, "loss": 2.1566, "step": 7675000 }, { "epoch": 38.03, "learning_rate": 3.099327943005207e-05, "loss": 2.1353, "step": 7675500 }, { "epoch": 38.03, "learning_rate": 3.099204332079884e-05, "loss": 2.1698, "step": 7676000 }, { "epoch": 38.03, "learning_rate": 3.099080473437276e-05, "loss": 2.1583, "step": 7676500 }, { "epoch": 38.03, "learning_rate": 3.098956614794668e-05, "loss": 2.1478, "step": 7677000 }, { "epoch": 38.04, "learning_rate": 3.0988327561520594e-05, "loss": 2.1403, "step": 7677500 }, { "epoch": 38.04, "learning_rate": 3.098708897509451e-05, "loss": 2.1576, "step": 7678000 }, { "epoch": 38.04, "learning_rate": 3.098585038866842e-05, "loss": 2.1442, "step": 7678500 }, { "epoch": 38.04, "learning_rate": 3.098461180224234e-05, "loss": 2.1502, "step": 7679000 }, { "epoch": 38.05, "learning_rate": 3.0983373215816255e-05, "loss": 2.1695, "step": 7679500 }, { "epoch": 38.05, "learning_rate": 3.098213462939017e-05, "loss": 2.1417, "step": 7680000 }, { "epoch": 38.05, "learning_rate": 3.098089604296409e-05, "loss": 2.1439, "step": 7680500 }, { "epoch": 38.05, "learning_rate": 3.0979657456538006e-05, "loss": 2.1506, "step": 7681000 }, { "epoch": 38.06, "learning_rate": 3.097841887011192e-05, "loss": 2.107, "step": 7681500 }, { "epoch": 38.06, "learning_rate": 3.0977182760858685e-05, "loss": 2.1619, "step": 7682000 }, { "epoch": 38.06, "learning_rate": 3.097594665160546e-05, "loss": 2.1356, "step": 7682500 }, { "epoch": 38.06, "learning_rate": 3.097470806517938e-05, "loss": 2.1428, "step": 7683000 }, { "epoch": 38.07, "learning_rate": 3.0973469478753294e-05, "loss": 2.1517, "step": 7683500 }, { "epoch": 38.07, "learning_rate": 3.097223089232721e-05, "loss": 2.1489, "step": 7684000 }, { "epoch": 38.07, "learning_rate": 3.097099478307397e-05, "loss": 2.1292, "step": 7684500 }, { "epoch": 38.07, "learning_rate": 3.096975619664789e-05, "loss": 2.1504, "step": 7685000 }, { "epoch": 38.08, "learning_rate": 3.096851761022181e-05, "loss": 2.1655, "step": 7685500 }, { "epoch": 38.08, "learning_rate": 3.0967281500968576e-05, "loss": 2.1519, "step": 7686000 }, { "epoch": 38.08, "learning_rate": 3.096604291454249e-05, "loss": 2.1419, "step": 7686500 }, { "epoch": 38.08, "learning_rate": 3.096480432811641e-05, "loss": 2.1692, "step": 7687000 }, { "epoch": 38.09, "learning_rate": 3.0963565741690326e-05, "loss": 2.1659, "step": 7687500 }, { "epoch": 38.09, "learning_rate": 3.096232715526424e-05, "loss": 2.1339, "step": 7688000 }, { "epoch": 38.09, "learning_rate": 3.096108856883816e-05, "loss": 2.1517, "step": 7688500 }, { "epoch": 38.09, "learning_rate": 3.095985245958493e-05, "loss": 2.1371, "step": 7689000 }, { "epoch": 38.1, "learning_rate": 3.095861635033169e-05, "loss": 2.1456, "step": 7689500 }, { "epoch": 38.1, "learning_rate": 3.095737776390561e-05, "loss": 2.1537, "step": 7690000 }, { "epoch": 38.1, "learning_rate": 3.0956139177479525e-05, "loss": 2.167, "step": 7690500 }, { "epoch": 38.1, "learning_rate": 3.095490059105344e-05, "loss": 2.1423, "step": 7691000 }, { "epoch": 38.11, "learning_rate": 3.095366200462736e-05, "loss": 2.1428, "step": 7691500 }, { "epoch": 38.11, "learning_rate": 3.095242589537413e-05, "loss": 2.1569, "step": 7692000 }, { "epoch": 38.11, "learning_rate": 3.0951187308948045e-05, "loss": 2.1359, "step": 7692500 }, { "epoch": 38.11, "learning_rate": 3.094994872252196e-05, "loss": 2.1448, "step": 7693000 }, { "epoch": 38.12, "learning_rate": 3.094871013609588e-05, "loss": 2.1413, "step": 7693500 }, { "epoch": 38.12, "learning_rate": 3.0947471549669795e-05, "loss": 2.1571, "step": 7694000 }, { "epoch": 38.12, "learning_rate": 3.094623296324371e-05, "loss": 2.1341, "step": 7694500 }, { "epoch": 38.12, "learning_rate": 3.094499437681763e-05, "loss": 2.1578, "step": 7695000 }, { "epoch": 38.13, "learning_rate": 3.0943755790391546e-05, "loss": 2.1347, "step": 7695500 }, { "epoch": 38.13, "learning_rate": 3.094251720396546e-05, "loss": 2.1585, "step": 7696000 }, { "epoch": 38.13, "learning_rate": 3.094127861753937e-05, "loss": 2.132, "step": 7696500 }, { "epoch": 38.13, "learning_rate": 3.094004003111329e-05, "loss": 2.1421, "step": 7697000 }, { "epoch": 38.14, "learning_rate": 3.093880144468721e-05, "loss": 2.1641, "step": 7697500 }, { "epoch": 38.14, "learning_rate": 3.0937562858261124e-05, "loss": 2.1336, "step": 7698000 }, { "epoch": 38.14, "learning_rate": 3.093632427183504e-05, "loss": 2.1314, "step": 7698500 }, { "epoch": 38.14, "learning_rate": 3.093508816258181e-05, "loss": 2.1504, "step": 7699000 }, { "epoch": 38.15, "learning_rate": 3.093385205332858e-05, "loss": 2.1328, "step": 7699500 }, { "epoch": 38.15, "learning_rate": 3.0932613466902495e-05, "loss": 2.1288, "step": 7700000 }, { "epoch": 38.15, "learning_rate": 3.093137488047641e-05, "loss": 2.1542, "step": 7700500 }, { "epoch": 38.15, "learning_rate": 3.093013629405033e-05, "loss": 2.1708, "step": 7701000 }, { "epoch": 38.16, "learning_rate": 3.0928897707624246e-05, "loss": 2.1498, "step": 7701500 }, { "epoch": 38.16, "learning_rate": 3.092765912119816e-05, "loss": 2.1544, "step": 7702000 }, { "epoch": 38.16, "learning_rate": 3.0926423011944925e-05, "loss": 2.1696, "step": 7702500 }, { "epoch": 38.16, "learning_rate": 3.09251869026917e-05, "loss": 2.1817, "step": 7703000 }, { "epoch": 38.17, "learning_rate": 3.092394831626562e-05, "loss": 2.1451, "step": 7703500 }, { "epoch": 38.17, "learning_rate": 3.0922709729839535e-05, "loss": 2.1684, "step": 7704000 }, { "epoch": 38.17, "learning_rate": 3.0921471143413445e-05, "loss": 2.1609, "step": 7704500 }, { "epoch": 38.17, "learning_rate": 3.092023255698736e-05, "loss": 2.17, "step": 7705000 }, { "epoch": 38.18, "learning_rate": 3.091899397056128e-05, "loss": 2.1697, "step": 7705500 }, { "epoch": 38.18, "learning_rate": 3.09177603384809e-05, "loss": 2.1148, "step": 7706000 }, { "epoch": 38.18, "learning_rate": 3.0916521752054816e-05, "loss": 2.1489, "step": 7706500 }, { "epoch": 38.18, "learning_rate": 3.091528316562873e-05, "loss": 2.1828, "step": 7707000 }, { "epoch": 38.19, "learning_rate": 3.091404457920265e-05, "loss": 2.1395, "step": 7707500 }, { "epoch": 38.19, "learning_rate": 3.091280599277657e-05, "loss": 2.1432, "step": 7708000 }, { "epoch": 38.19, "learning_rate": 3.0911567406350484e-05, "loss": 2.1308, "step": 7708500 }, { "epoch": 38.19, "learning_rate": 3.09103288199244e-05, "loss": 2.1493, "step": 7709000 }, { "epoch": 38.2, "learning_rate": 3.090909271067117e-05, "loss": 2.1679, "step": 7709500 }, { "epoch": 38.2, "learning_rate": 3.090785412424509e-05, "loss": 2.1403, "step": 7710000 }, { "epoch": 38.2, "learning_rate": 3.0906615537819004e-05, "loss": 2.1312, "step": 7710500 }, { "epoch": 38.2, "learning_rate": 3.0905376951392914e-05, "loss": 2.1532, "step": 7711000 }, { "epoch": 38.21, "learning_rate": 3.090413836496683e-05, "loss": 2.155, "step": 7711500 }, { "epoch": 38.21, "learning_rate": 3.090289977854075e-05, "loss": 2.1598, "step": 7712000 }, { "epoch": 38.21, "learning_rate": 3.0901661192114664e-05, "loss": 2.1614, "step": 7712500 }, { "epoch": 38.21, "learning_rate": 3.090042260568858e-05, "loss": 2.1175, "step": 7713000 }, { "epoch": 38.22, "learning_rate": 3.089918401926249e-05, "loss": 2.1656, "step": 7713500 }, { "epoch": 38.22, "learning_rate": 3.089794543283641e-05, "loss": 2.1467, "step": 7714000 }, { "epoch": 38.22, "learning_rate": 3.0896706846410325e-05, "loss": 2.1399, "step": 7714500 }, { "epoch": 38.22, "learning_rate": 3.089546825998424e-05, "loss": 2.1499, "step": 7715000 }, { "epoch": 38.23, "learning_rate": 3.089422967355816e-05, "loss": 2.1806, "step": 7715500 }, { "epoch": 38.23, "learning_rate": 3.0892991087132076e-05, "loss": 2.1437, "step": 7716000 }, { "epoch": 38.23, "learning_rate": 3.089175250070599e-05, "loss": 2.1507, "step": 7716500 }, { "epoch": 38.23, "learning_rate": 3.089051639145276e-05, "loss": 2.1541, "step": 7717000 }, { "epoch": 38.24, "learning_rate": 3.088928028219954e-05, "loss": 2.1685, "step": 7717500 }, { "epoch": 38.24, "learning_rate": 3.088804169577345e-05, "loss": 2.1489, "step": 7718000 }, { "epoch": 38.24, "learning_rate": 3.0886803109347365e-05, "loss": 2.1454, "step": 7718500 }, { "epoch": 38.24, "learning_rate": 3.088556452292128e-05, "loss": 2.158, "step": 7719000 }, { "epoch": 38.25, "learning_rate": 3.08843259364952e-05, "loss": 2.1734, "step": 7719500 }, { "epoch": 38.25, "learning_rate": 3.0883087350069115e-05, "loss": 2.1772, "step": 7720000 }, { "epoch": 38.25, "learning_rate": 3.0881848763643026e-05, "loss": 2.1496, "step": 7720500 }, { "epoch": 38.25, "learning_rate": 3.088061017721694e-05, "loss": 2.1361, "step": 7721000 }, { "epoch": 38.25, "learning_rate": 3.087937159079086e-05, "loss": 2.1307, "step": 7721500 }, { "epoch": 38.26, "learning_rate": 3.087813795871049e-05, "loss": 2.178, "step": 7722000 }, { "epoch": 38.26, "learning_rate": 3.087690184945725e-05, "loss": 2.1533, "step": 7722500 }, { "epoch": 38.26, "learning_rate": 3.0875663263031166e-05, "loss": 2.1338, "step": 7723000 }, { "epoch": 38.26, "learning_rate": 3.087442467660508e-05, "loss": 2.149, "step": 7723500 }, { "epoch": 38.27, "learning_rate": 3.0873186090179e-05, "loss": 2.1582, "step": 7724000 }, { "epoch": 38.27, "learning_rate": 3.0871947503752917e-05, "loss": 2.1487, "step": 7724500 }, { "epoch": 38.27, "learning_rate": 3.0870708917326834e-05, "loss": 2.1591, "step": 7725000 }, { "epoch": 38.27, "learning_rate": 3.086947033090075e-05, "loss": 2.1655, "step": 7725500 }, { "epoch": 38.28, "learning_rate": 3.086823174447467e-05, "loss": 2.1918, "step": 7726000 }, { "epoch": 38.28, "learning_rate": 3.0866993158048584e-05, "loss": 2.1604, "step": 7726500 }, { "epoch": 38.28, "learning_rate": 3.08657545716225e-05, "loss": 2.1778, "step": 7727000 }, { "epoch": 38.28, "learning_rate": 3.086451598519642e-05, "loss": 2.1669, "step": 7727500 }, { "epoch": 38.29, "learning_rate": 3.086327987594319e-05, "loss": 2.1821, "step": 7728000 }, { "epoch": 38.29, "learning_rate": 3.0862041289517104e-05, "loss": 2.1247, "step": 7728500 }, { "epoch": 38.29, "learning_rate": 3.086080270309102e-05, "loss": 2.1724, "step": 7729000 }, { "epoch": 38.29, "learning_rate": 3.085956411666494e-05, "loss": 2.1415, "step": 7729500 }, { "epoch": 38.3, "learning_rate": 3.0858325530238855e-05, "loss": 2.1568, "step": 7730000 }, { "epoch": 38.3, "learning_rate": 3.085708694381277e-05, "loss": 2.1605, "step": 7730500 }, { "epoch": 38.3, "learning_rate": 3.085584835738669e-05, "loss": 2.1404, "step": 7731000 }, { "epoch": 38.3, "learning_rate": 3.08546097709606e-05, "loss": 2.166, "step": 7731500 }, { "epoch": 38.31, "learning_rate": 3.085337366170737e-05, "loss": 2.1407, "step": 7732000 }, { "epoch": 38.31, "learning_rate": 3.0852135075281284e-05, "loss": 2.151, "step": 7732500 }, { "epoch": 38.31, "learning_rate": 3.08508964888552e-05, "loss": 2.1633, "step": 7733000 }, { "epoch": 38.31, "learning_rate": 3.084965790242912e-05, "loss": 2.1448, "step": 7733500 }, { "epoch": 38.32, "learning_rate": 3.0848419316003035e-05, "loss": 2.1802, "step": 7734000 }, { "epoch": 38.32, "learning_rate": 3.084718072957695e-05, "loss": 2.1672, "step": 7734500 }, { "epoch": 38.32, "learning_rate": 3.084594214315087e-05, "loss": 2.1376, "step": 7735000 }, { "epoch": 38.32, "learning_rate": 3.084470355672478e-05, "loss": 2.1488, "step": 7735500 }, { "epoch": 38.33, "learning_rate": 3.0843464970298696e-05, "loss": 2.1596, "step": 7736000 }, { "epoch": 38.33, "learning_rate": 3.084222638387261e-05, "loss": 2.1616, "step": 7736500 }, { "epoch": 38.33, "learning_rate": 3.084099027461939e-05, "loss": 2.1577, "step": 7737000 }, { "epoch": 38.33, "learning_rate": 3.0839751688193306e-05, "loss": 2.1608, "step": 7737500 }, { "epoch": 38.34, "learning_rate": 3.083851557894007e-05, "loss": 2.1492, "step": 7738000 }, { "epoch": 38.34, "learning_rate": 3.0837276992513985e-05, "loss": 2.1875, "step": 7738500 }, { "epoch": 38.34, "learning_rate": 3.08360384060879e-05, "loss": 2.1617, "step": 7739000 }, { "epoch": 38.34, "learning_rate": 3.083479981966182e-05, "loss": 2.1713, "step": 7739500 }, { "epoch": 38.35, "learning_rate": 3.0833561233235735e-05, "loss": 2.1445, "step": 7740000 }, { "epoch": 38.35, "learning_rate": 3.083232264680965e-05, "loss": 2.1644, "step": 7740500 }, { "epoch": 38.35, "learning_rate": 3.083108406038357e-05, "loss": 2.1407, "step": 7741000 }, { "epoch": 38.35, "learning_rate": 3.082984795113034e-05, "loss": 2.1581, "step": 7741500 }, { "epoch": 38.36, "learning_rate": 3.0828609364704255e-05, "loss": 2.1386, "step": 7742000 }, { "epoch": 38.36, "learning_rate": 3.082737077827817e-05, "loss": 2.1513, "step": 7742500 }, { "epoch": 38.36, "learning_rate": 3.082613219185209e-05, "loss": 2.1501, "step": 7743000 }, { "epoch": 38.36, "learning_rate": 3.082489608259885e-05, "loss": 2.1552, "step": 7743500 }, { "epoch": 38.37, "learning_rate": 3.082365749617277e-05, "loss": 2.1685, "step": 7744000 }, { "epoch": 38.37, "learning_rate": 3.0822418909746685e-05, "loss": 2.1543, "step": 7744500 }, { "epoch": 38.37, "learning_rate": 3.08211803233206e-05, "loss": 2.1393, "step": 7745000 }, { "epoch": 38.37, "learning_rate": 3.081994173689452e-05, "loss": 2.1423, "step": 7745500 }, { "epoch": 38.38, "learning_rate": 3.0818703150468435e-05, "loss": 2.1453, "step": 7746000 }, { "epoch": 38.38, "learning_rate": 3.081746456404235e-05, "loss": 2.1586, "step": 7746500 }, { "epoch": 38.38, "learning_rate": 3.081622597761627e-05, "loss": 2.1634, "step": 7747000 }, { "epoch": 38.38, "learning_rate": 3.0814987391190186e-05, "loss": 2.1584, "step": 7747500 }, { "epoch": 38.39, "learning_rate": 3.0813748804764096e-05, "loss": 2.1648, "step": 7748000 }, { "epoch": 38.39, "learning_rate": 3.081251021833801e-05, "loss": 2.1442, "step": 7748500 }, { "epoch": 38.39, "learning_rate": 3.081127163191193e-05, "loss": 2.1382, "step": 7749000 }, { "epoch": 38.39, "learning_rate": 3.081003304548585e-05, "loss": 2.139, "step": 7749500 }, { "epoch": 38.4, "learning_rate": 3.0808794459059764e-05, "loss": 2.1524, "step": 7750000 }, { "epoch": 38.4, "learning_rate": 3.080755587263368e-05, "loss": 2.1801, "step": 7750500 }, { "epoch": 38.4, "learning_rate": 3.080631976338045e-05, "loss": 2.1802, "step": 7751000 }, { "epoch": 38.4, "learning_rate": 3.080508365412722e-05, "loss": 2.1441, "step": 7751500 }, { "epoch": 38.41, "learning_rate": 3.0803845067701136e-05, "loss": 2.1448, "step": 7752000 }, { "epoch": 38.41, "learning_rate": 3.0802608958447904e-05, "loss": 2.1827, "step": 7752500 }, { "epoch": 38.41, "learning_rate": 3.080137037202182e-05, "loss": 2.1571, "step": 7753000 }, { "epoch": 38.41, "learning_rate": 3.080013178559574e-05, "loss": 2.1433, "step": 7753500 }, { "epoch": 38.42, "learning_rate": 3.0798893199169655e-05, "loss": 2.1795, "step": 7754000 }, { "epoch": 38.42, "learning_rate": 3.079765461274357e-05, "loss": 2.1581, "step": 7754500 }, { "epoch": 38.42, "learning_rate": 3.079641602631749e-05, "loss": 2.1609, "step": 7755000 }, { "epoch": 38.42, "learning_rate": 3.0795177439891406e-05, "loss": 2.1601, "step": 7755500 }, { "epoch": 38.43, "learning_rate": 3.079394133063817e-05, "loss": 2.1734, "step": 7756000 }, { "epoch": 38.43, "learning_rate": 3.0792702744212085e-05, "loss": 2.1364, "step": 7756500 }, { "epoch": 38.43, "learning_rate": 3.0791464157786e-05, "loss": 2.1786, "step": 7757000 }, { "epoch": 38.43, "learning_rate": 3.079022557135992e-05, "loss": 2.174, "step": 7757500 }, { "epoch": 38.44, "learning_rate": 3.0788986984933836e-05, "loss": 2.138, "step": 7758000 }, { "epoch": 38.44, "learning_rate": 3.078774839850775e-05, "loss": 2.1685, "step": 7758500 }, { "epoch": 38.44, "learning_rate": 3.078650981208167e-05, "loss": 2.1682, "step": 7759000 }, { "epoch": 38.44, "learning_rate": 3.0785271225655587e-05, "loss": 2.1384, "step": 7759500 }, { "epoch": 38.45, "learning_rate": 3.0784032639229503e-05, "loss": 2.1512, "step": 7760000 }, { "epoch": 38.45, "learning_rate": 3.0782794052803414e-05, "loss": 2.1479, "step": 7760500 }, { "epoch": 38.45, "learning_rate": 3.078155546637733e-05, "loss": 2.1676, "step": 7761000 }, { "epoch": 38.45, "learning_rate": 3.078031687995125e-05, "loss": 2.1731, "step": 7761500 }, { "epoch": 38.46, "learning_rate": 3.0779078293525164e-05, "loss": 2.1725, "step": 7762000 }, { "epoch": 38.46, "learning_rate": 3.077784218427194e-05, "loss": 2.1648, "step": 7762500 }, { "epoch": 38.46, "learning_rate": 3.07766060750187e-05, "loss": 2.1484, "step": 7763000 }, { "epoch": 38.46, "learning_rate": 3.077536748859262e-05, "loss": 2.1783, "step": 7763500 }, { "epoch": 38.47, "learning_rate": 3.0774128902166536e-05, "loss": 2.1605, "step": 7764000 }, { "epoch": 38.47, "learning_rate": 3.077289031574045e-05, "loss": 2.1725, "step": 7764500 }, { "epoch": 38.47, "learning_rate": 3.077165172931437e-05, "loss": 2.1651, "step": 7765000 }, { "epoch": 38.47, "learning_rate": 3.077041562006114e-05, "loss": 2.1763, "step": 7765500 }, { "epoch": 38.48, "learning_rate": 3.0769177033635055e-05, "loss": 2.146, "step": 7766000 }, { "epoch": 38.48, "learning_rate": 3.076793844720897e-05, "loss": 2.168, "step": 7766500 }, { "epoch": 38.48, "learning_rate": 3.076669986078289e-05, "loss": 2.145, "step": 7767000 }, { "epoch": 38.48, "learning_rate": 3.0765461274356806e-05, "loss": 2.1671, "step": 7767500 }, { "epoch": 38.49, "learning_rate": 3.076422268793072e-05, "loss": 2.1792, "step": 7768000 }, { "epoch": 38.49, "learning_rate": 3.076298410150464e-05, "loss": 2.1563, "step": 7768500 }, { "epoch": 38.49, "learning_rate": 3.076174551507856e-05, "loss": 2.1745, "step": 7769000 }, { "epoch": 38.49, "learning_rate": 3.0760506928652474e-05, "loss": 2.1663, "step": 7769500 }, { "epoch": 38.5, "learning_rate": 3.0759270819399236e-05, "loss": 2.1363, "step": 7770000 }, { "epoch": 38.5, "learning_rate": 3.075803223297315e-05, "loss": 2.1503, "step": 7770500 }, { "epoch": 38.5, "learning_rate": 3.075679364654707e-05, "loss": 2.1608, "step": 7771000 }, { "epoch": 38.5, "learning_rate": 3.075555506012099e-05, "loss": 2.1483, "step": 7771500 }, { "epoch": 38.51, "learning_rate": 3.0754316473694904e-05, "loss": 2.1652, "step": 7772000 }, { "epoch": 38.51, "learning_rate": 3.075307788726882e-05, "loss": 2.148, "step": 7772500 }, { "epoch": 38.51, "learning_rate": 3.075183930084273e-05, "loss": 2.1688, "step": 7773000 }, { "epoch": 38.51, "learning_rate": 3.0750603191589506e-05, "loss": 2.1357, "step": 7773500 }, { "epoch": 38.52, "learning_rate": 3.074936460516342e-05, "loss": 2.1664, "step": 7774000 }, { "epoch": 38.52, "learning_rate": 3.074812849591019e-05, "loss": 2.1626, "step": 7774500 }, { "epoch": 38.52, "learning_rate": 3.074688990948411e-05, "loss": 2.15, "step": 7775000 }, { "epoch": 38.52, "learning_rate": 3.074565132305802e-05, "loss": 2.1828, "step": 7775500 }, { "epoch": 38.52, "learning_rate": 3.0744412736631936e-05, "loss": 2.1476, "step": 7776000 }, { "epoch": 38.53, "learning_rate": 3.0743176627378705e-05, "loss": 2.1792, "step": 7776500 }, { "epoch": 38.53, "learning_rate": 3.074193804095262e-05, "loss": 2.179, "step": 7777000 }, { "epoch": 38.53, "learning_rate": 3.074069945452654e-05, "loss": 2.1365, "step": 7777500 }, { "epoch": 38.53, "learning_rate": 3.0739460868100456e-05, "loss": 2.1565, "step": 7778000 }, { "epoch": 38.54, "learning_rate": 3.073822228167437e-05, "loss": 2.1655, "step": 7778500 }, { "epoch": 38.54, "learning_rate": 3.073698369524829e-05, "loss": 2.1732, "step": 7779000 }, { "epoch": 38.54, "learning_rate": 3.0735745108822206e-05, "loss": 2.1556, "step": 7779500 }, { "epoch": 38.54, "learning_rate": 3.0734506522396123e-05, "loss": 2.1604, "step": 7780000 }, { "epoch": 38.55, "learning_rate": 3.073326793597004e-05, "loss": 2.1461, "step": 7780500 }, { "epoch": 38.55, "learning_rate": 3.073203182671681e-05, "loss": 2.1542, "step": 7781000 }, { "epoch": 38.55, "learning_rate": 3.0730793240290726e-05, "loss": 2.164, "step": 7781500 }, { "epoch": 38.55, "learning_rate": 3.0729554653864636e-05, "loss": 2.1792, "step": 7782000 }, { "epoch": 38.56, "learning_rate": 3.0728321021784264e-05, "loss": 2.1498, "step": 7782500 }, { "epoch": 38.56, "learning_rate": 3.0727082435358174e-05, "loss": 2.1584, "step": 7783000 }, { "epoch": 38.56, "learning_rate": 3.072584384893209e-05, "loss": 2.1797, "step": 7783500 }, { "epoch": 38.56, "learning_rate": 3.072460526250601e-05, "loss": 2.1565, "step": 7784000 }, { "epoch": 38.57, "learning_rate": 3.0723366676079925e-05, "loss": 2.1586, "step": 7784500 }, { "epoch": 38.57, "learning_rate": 3.072212808965384e-05, "loss": 2.1527, "step": 7785000 }, { "epoch": 38.57, "learning_rate": 3.072088950322776e-05, "loss": 2.1582, "step": 7785500 }, { "epoch": 38.57, "learning_rate": 3.0719650916801675e-05, "loss": 2.1819, "step": 7786000 }, { "epoch": 38.58, "learning_rate": 3.071841233037559e-05, "loss": 2.1677, "step": 7786500 }, { "epoch": 38.58, "learning_rate": 3.071717374394951e-05, "loss": 2.1631, "step": 7787000 }, { "epoch": 38.58, "learning_rate": 3.0715935157523426e-05, "loss": 2.153, "step": 7787500 }, { "epoch": 38.58, "learning_rate": 3.071469657109734e-05, "loss": 2.1566, "step": 7788000 }, { "epoch": 38.59, "learning_rate": 3.071345798467126e-05, "loss": 2.1773, "step": 7788500 }, { "epoch": 38.59, "learning_rate": 3.071221939824517e-05, "loss": 2.1527, "step": 7789000 }, { "epoch": 38.59, "learning_rate": 3.071098081181909e-05, "loss": 2.1608, "step": 7789500 }, { "epoch": 38.59, "learning_rate": 3.0709744702565856e-05, "loss": 2.1594, "step": 7790000 }, { "epoch": 38.6, "learning_rate": 3.070850611613977e-05, "loss": 2.1544, "step": 7790500 }, { "epoch": 38.6, "learning_rate": 3.070726752971369e-05, "loss": 2.1647, "step": 7791000 }, { "epoch": 38.6, "learning_rate": 3.070603142046046e-05, "loss": 2.146, "step": 7791500 }, { "epoch": 38.6, "learning_rate": 3.0704792834034376e-05, "loss": 2.1485, "step": 7792000 }, { "epoch": 38.61, "learning_rate": 3.070355424760829e-05, "loss": 2.1703, "step": 7792500 }, { "epoch": 38.61, "learning_rate": 3.070231566118221e-05, "loss": 2.1722, "step": 7793000 }, { "epoch": 38.61, "learning_rate": 3.0701077074756126e-05, "loss": 2.1854, "step": 7793500 }, { "epoch": 38.61, "learning_rate": 3.069983848833004e-05, "loss": 2.1533, "step": 7794000 }, { "epoch": 38.62, "learning_rate": 3.069859990190396e-05, "loss": 2.141, "step": 7794500 }, { "epoch": 38.62, "learning_rate": 3.069736131547788e-05, "loss": 2.1703, "step": 7795000 }, { "epoch": 38.62, "learning_rate": 3.069612520622464e-05, "loss": 2.1582, "step": 7795500 }, { "epoch": 38.62, "learning_rate": 3.0694886619798556e-05, "loss": 2.1393, "step": 7796000 }, { "epoch": 38.63, "learning_rate": 3.0693650510545325e-05, "loss": 2.1568, "step": 7796500 }, { "epoch": 38.63, "learning_rate": 3.069241192411924e-05, "loss": 2.1373, "step": 7797000 }, { "epoch": 38.63, "learning_rate": 3.069117333769316e-05, "loss": 2.1609, "step": 7797500 }, { "epoch": 38.63, "learning_rate": 3.0689934751267076e-05, "loss": 2.1931, "step": 7798000 }, { "epoch": 38.64, "learning_rate": 3.068869616484099e-05, "loss": 2.1741, "step": 7798500 }, { "epoch": 38.64, "learning_rate": 3.068745757841491e-05, "loss": 2.1588, "step": 7799000 }, { "epoch": 38.64, "learning_rate": 3.068622146916167e-05, "loss": 2.1735, "step": 7799500 }, { "epoch": 38.64, "learning_rate": 3.068498288273559e-05, "loss": 2.1634, "step": 7800000 }, { "epoch": 38.65, "learning_rate": 3.0683744296309505e-05, "loss": 2.1464, "step": 7800500 }, { "epoch": 38.65, "learning_rate": 3.068250570988342e-05, "loss": 2.1824, "step": 7801000 }, { "epoch": 38.65, "learning_rate": 3.068126712345734e-05, "loss": 2.1648, "step": 7801500 }, { "epoch": 38.65, "learning_rate": 3.0680028537031256e-05, "loss": 2.1464, "step": 7802000 }, { "epoch": 38.66, "learning_rate": 3.067878995060517e-05, "loss": 2.1653, "step": 7802500 }, { "epoch": 38.66, "learning_rate": 3.067755136417909e-05, "loss": 2.1694, "step": 7803000 }, { "epoch": 38.66, "learning_rate": 3.067631277775301e-05, "loss": 2.1317, "step": 7803500 }, { "epoch": 38.66, "learning_rate": 3.0675076668499776e-05, "loss": 2.166, "step": 7804000 }, { "epoch": 38.67, "learning_rate": 3.067383808207369e-05, "loss": 2.1336, "step": 7804500 }, { "epoch": 38.67, "learning_rate": 3.067259949564761e-05, "loss": 2.1756, "step": 7805000 }, { "epoch": 38.67, "learning_rate": 3.0671360909221527e-05, "loss": 2.1769, "step": 7805500 }, { "epoch": 38.67, "learning_rate": 3.0670122322795443e-05, "loss": 2.168, "step": 7806000 }, { "epoch": 38.68, "learning_rate": 3.066888373636936e-05, "loss": 2.1716, "step": 7806500 }, { "epoch": 38.68, "learning_rate": 3.066764762711612e-05, "loss": 2.1611, "step": 7807000 }, { "epoch": 38.68, "learning_rate": 3.066640904069004e-05, "loss": 2.1616, "step": 7807500 }, { "epoch": 38.68, "learning_rate": 3.0665170454263956e-05, "loss": 2.1478, "step": 7808000 }, { "epoch": 38.69, "learning_rate": 3.066393186783787e-05, "loss": 2.1736, "step": 7808500 }, { "epoch": 38.69, "learning_rate": 3.066269328141179e-05, "loss": 2.1648, "step": 7809000 }, { "epoch": 38.69, "learning_rate": 3.066145469498571e-05, "loss": 2.1752, "step": 7809500 }, { "epoch": 38.69, "learning_rate": 3.0660216108559624e-05, "loss": 2.1323, "step": 7810000 }, { "epoch": 38.7, "learning_rate": 3.065897752213354e-05, "loss": 2.1641, "step": 7810500 }, { "epoch": 38.7, "learning_rate": 3.065773893570746e-05, "loss": 2.2023, "step": 7811000 }, { "epoch": 38.7, "learning_rate": 3.0656500349281375e-05, "loss": 2.1507, "step": 7811500 }, { "epoch": 38.7, "learning_rate": 3.065526176285529e-05, "loss": 2.1431, "step": 7812000 }, { "epoch": 38.71, "learning_rate": 3.065402317642921e-05, "loss": 2.1635, "step": 7812500 }, { "epoch": 38.71, "learning_rate": 3.0652784590003126e-05, "loss": 2.1486, "step": 7813000 }, { "epoch": 38.71, "learning_rate": 3.0651548480749894e-05, "loss": 2.1406, "step": 7813500 }, { "epoch": 38.71, "learning_rate": 3.065030989432381e-05, "loss": 2.1673, "step": 7814000 }, { "epoch": 38.72, "learning_rate": 3.064907130789773e-05, "loss": 2.1746, "step": 7814500 }, { "epoch": 38.72, "learning_rate": 3.0647832721471645e-05, "loss": 2.163, "step": 7815000 }, { "epoch": 38.72, "learning_rate": 3.064659661221841e-05, "loss": 2.1701, "step": 7815500 }, { "epoch": 38.72, "learning_rate": 3.0645358025792324e-05, "loss": 2.1663, "step": 7816000 }, { "epoch": 38.73, "learning_rate": 3.064411943936624e-05, "loss": 2.1682, "step": 7816500 }, { "epoch": 38.73, "learning_rate": 3.064288085294016e-05, "loss": 2.1865, "step": 7817000 }, { "epoch": 38.73, "learning_rate": 3.064164474368693e-05, "loss": 2.1702, "step": 7817500 }, { "epoch": 38.73, "learning_rate": 3.0640406157260844e-05, "loss": 2.1665, "step": 7818000 }, { "epoch": 38.74, "learning_rate": 3.063916757083476e-05, "loss": 2.1762, "step": 7818500 }, { "epoch": 38.74, "learning_rate": 3.063792898440868e-05, "loss": 2.146, "step": 7819000 }, { "epoch": 38.74, "learning_rate": 3.0636690397982595e-05, "loss": 2.1585, "step": 7819500 }, { "epoch": 38.74, "learning_rate": 3.063545181155651e-05, "loss": 2.1604, "step": 7820000 }, { "epoch": 38.75, "learning_rate": 3.063421322513043e-05, "loss": 2.1483, "step": 7820500 }, { "epoch": 38.75, "learning_rate": 3.0632974638704345e-05, "loss": 2.1786, "step": 7821000 }, { "epoch": 38.75, "learning_rate": 3.063173605227826e-05, "loss": 2.1559, "step": 7821500 }, { "epoch": 38.75, "learning_rate": 3.063049746585218e-05, "loss": 2.1845, "step": 7822000 }, { "epoch": 38.76, "learning_rate": 3.062925887942609e-05, "loss": 2.1505, "step": 7822500 }, { "epoch": 38.76, "learning_rate": 3.0628020293000006e-05, "loss": 2.1552, "step": 7823000 }, { "epoch": 38.76, "learning_rate": 3.0626784183746775e-05, "loss": 2.1628, "step": 7823500 }, { "epoch": 38.76, "learning_rate": 3.062554559732069e-05, "loss": 2.154, "step": 7824000 }, { "epoch": 38.77, "learning_rate": 3.062430701089461e-05, "loss": 2.1525, "step": 7824500 }, { "epoch": 38.77, "learning_rate": 3.0623068424468526e-05, "loss": 2.1739, "step": 7825000 }, { "epoch": 38.77, "learning_rate": 3.062182983804244e-05, "loss": 2.1721, "step": 7825500 }, { "epoch": 38.77, "learning_rate": 3.062059125161635e-05, "loss": 2.1654, "step": 7826000 }, { "epoch": 38.78, "learning_rate": 3.061935266519027e-05, "loss": 2.1693, "step": 7826500 }, { "epoch": 38.78, "learning_rate": 3.061811407876419e-05, "loss": 2.1554, "step": 7827000 }, { "epoch": 38.78, "learning_rate": 3.061687796951096e-05, "loss": 2.1482, "step": 7827500 }, { "epoch": 38.78, "learning_rate": 3.061563938308488e-05, "loss": 2.1546, "step": 7828000 }, { "epoch": 38.79, "learning_rate": 3.0614400796658796e-05, "loss": 2.1799, "step": 7828500 }, { "epoch": 38.79, "learning_rate": 3.061316221023271e-05, "loss": 2.1825, "step": 7829000 }, { "epoch": 38.79, "learning_rate": 3.061192857815233e-05, "loss": 2.1486, "step": 7829500 }, { "epoch": 38.79, "learning_rate": 3.0610692468899096e-05, "loss": 2.1515, "step": 7830000 }, { "epoch": 38.8, "learning_rate": 3.060945388247301e-05, "loss": 2.1725, "step": 7830500 }, { "epoch": 38.8, "learning_rate": 3.060821529604693e-05, "loss": 2.1588, "step": 7831000 }, { "epoch": 38.8, "learning_rate": 3.060697670962085e-05, "loss": 2.1636, "step": 7831500 }, { "epoch": 38.8, "learning_rate": 3.0605738123194764e-05, "loss": 2.1585, "step": 7832000 }, { "epoch": 38.8, "learning_rate": 3.0604499536768674e-05, "loss": 2.1466, "step": 7832500 }, { "epoch": 38.81, "learning_rate": 3.060326095034259e-05, "loss": 2.1806, "step": 7833000 }, { "epoch": 38.81, "learning_rate": 3.060202236391651e-05, "loss": 2.1412, "step": 7833500 }, { "epoch": 38.81, "learning_rate": 3.0600783777490425e-05, "loss": 2.1794, "step": 7834000 }, { "epoch": 38.81, "learning_rate": 3.059954519106434e-05, "loss": 2.1508, "step": 7834500 }, { "epoch": 38.82, "learning_rate": 3.059830660463826e-05, "loss": 2.1664, "step": 7835000 }, { "epoch": 38.82, "learning_rate": 3.0597068018212175e-05, "loss": 2.1578, "step": 7835500 }, { "epoch": 38.82, "learning_rate": 3.059582943178609e-05, "loss": 2.1817, "step": 7836000 }, { "epoch": 38.82, "learning_rate": 3.059459084536001e-05, "loss": 2.1615, "step": 7836500 }, { "epoch": 38.83, "learning_rate": 3.059335473610678e-05, "loss": 2.1464, "step": 7837000 }, { "epoch": 38.83, "learning_rate": 3.0592116149680695e-05, "loss": 2.1565, "step": 7837500 }, { "epoch": 38.83, "learning_rate": 3.059087756325461e-05, "loss": 2.1671, "step": 7838000 }, { "epoch": 38.83, "learning_rate": 3.058964145400138e-05, "loss": 2.1436, "step": 7838500 }, { "epoch": 38.84, "learning_rate": 3.05884028675753e-05, "loss": 2.1457, "step": 7839000 }, { "epoch": 38.84, "learning_rate": 3.058716428114921e-05, "loss": 2.1704, "step": 7839500 }, { "epoch": 38.84, "learning_rate": 3.0585925694723125e-05, "loss": 2.1605, "step": 7840000 }, { "epoch": 38.84, "learning_rate": 3.058469206264275e-05, "loss": 2.1886, "step": 7840500 }, { "epoch": 38.85, "learning_rate": 3.058345347621667e-05, "loss": 2.1732, "step": 7841000 }, { "epoch": 38.85, "learning_rate": 3.0582214889790586e-05, "loss": 2.1919, "step": 7841500 }, { "epoch": 38.85, "learning_rate": 3.05809763033645e-05, "loss": 2.1547, "step": 7842000 }, { "epoch": 38.85, "learning_rate": 3.057973771693841e-05, "loss": 2.182, "step": 7842500 }, { "epoch": 38.86, "learning_rate": 3.057849913051233e-05, "loss": 2.1522, "step": 7843000 }, { "epoch": 38.86, "learning_rate": 3.057726054408625e-05, "loss": 2.1648, "step": 7843500 }, { "epoch": 38.86, "learning_rate": 3.0576021957660164e-05, "loss": 2.1581, "step": 7844000 }, { "epoch": 38.86, "learning_rate": 3.057478337123408e-05, "loss": 2.1647, "step": 7844500 }, { "epoch": 38.87, "learning_rate": 3.0573544784808e-05, "loss": 2.1649, "step": 7845000 }, { "epoch": 38.87, "learning_rate": 3.0572306198381915e-05, "loss": 2.149, "step": 7845500 }, { "epoch": 38.87, "learning_rate": 3.0571067611955825e-05, "loss": 2.1535, "step": 7846000 }, { "epoch": 38.87, "learning_rate": 3.056982902552974e-05, "loss": 2.1511, "step": 7846500 }, { "epoch": 38.88, "learning_rate": 3.056859043910366e-05, "loss": 2.1786, "step": 7847000 }, { "epoch": 38.88, "learning_rate": 3.0567351852677576e-05, "loss": 2.1551, "step": 7847500 }, { "epoch": 38.88, "learning_rate": 3.056611326625149e-05, "loss": 2.1863, "step": 7848000 }, { "epoch": 38.88, "learning_rate": 3.056487715699826e-05, "loss": 2.1915, "step": 7848500 }, { "epoch": 38.89, "learning_rate": 3.056364104774503e-05, "loss": 2.163, "step": 7849000 }, { "epoch": 38.89, "learning_rate": 3.056240246131895e-05, "loss": 2.1685, "step": 7849500 }, { "epoch": 38.89, "learning_rate": 3.0561166352065716e-05, "loss": 2.1734, "step": 7850000 }, { "epoch": 38.89, "learning_rate": 3.055992776563963e-05, "loss": 2.1701, "step": 7850500 }, { "epoch": 38.9, "learning_rate": 3.055868917921355e-05, "loss": 2.1444, "step": 7851000 }, { "epoch": 38.9, "learning_rate": 3.055745059278747e-05, "loss": 2.1707, "step": 7851500 }, { "epoch": 38.9, "learning_rate": 3.055621200636138e-05, "loss": 2.1581, "step": 7852000 }, { "epoch": 38.9, "learning_rate": 3.0554973419935294e-05, "loss": 2.1871, "step": 7852500 }, { "epoch": 38.91, "learning_rate": 3.055373483350921e-05, "loss": 2.19, "step": 7853000 }, { "epoch": 38.91, "learning_rate": 3.055249624708313e-05, "loss": 2.1667, "step": 7853500 }, { "epoch": 38.91, "learning_rate": 3.0551257660657044e-05, "loss": 2.1786, "step": 7854000 }, { "epoch": 38.91, "learning_rate": 3.055002155140382e-05, "loss": 2.1762, "step": 7854500 }, { "epoch": 38.92, "learning_rate": 3.054878296497773e-05, "loss": 2.1552, "step": 7855000 }, { "epoch": 38.92, "learning_rate": 3.054754437855165e-05, "loss": 2.1586, "step": 7855500 }, { "epoch": 38.92, "learning_rate": 3.0546305792125564e-05, "loss": 2.1457, "step": 7856000 }, { "epoch": 38.92, "learning_rate": 3.054506968287233e-05, "loss": 2.1658, "step": 7856500 }, { "epoch": 38.93, "learning_rate": 3.054383109644625e-05, "loss": 2.1832, "step": 7857000 }, { "epoch": 38.93, "learning_rate": 3.054259251002017e-05, "loss": 2.1612, "step": 7857500 }, { "epoch": 38.93, "learning_rate": 3.054135392359408e-05, "loss": 2.1709, "step": 7858000 }, { "epoch": 38.93, "learning_rate": 3.0540115337167994e-05, "loss": 2.1588, "step": 7858500 }, { "epoch": 38.94, "learning_rate": 3.053887922791477e-05, "loss": 2.1646, "step": 7859000 }, { "epoch": 38.94, "learning_rate": 3.0537640641488686e-05, "loss": 2.1794, "step": 7859500 }, { "epoch": 38.94, "learning_rate": 3.0536404532235455e-05, "loss": 2.178, "step": 7860000 }, { "epoch": 38.94, "learning_rate": 3.0535165945809365e-05, "loss": 2.1825, "step": 7860500 }, { "epoch": 38.95, "learning_rate": 3.053392735938328e-05, "loss": 2.1545, "step": 7861000 }, { "epoch": 38.95, "learning_rate": 3.05326887729572e-05, "loss": 2.1719, "step": 7861500 }, { "epoch": 38.95, "learning_rate": 3.053145266370397e-05, "loss": 2.1506, "step": 7862000 }, { "epoch": 38.95, "learning_rate": 3.0530214077277885e-05, "loss": 2.1656, "step": 7862500 }, { "epoch": 38.96, "learning_rate": 3.05289754908518e-05, "loss": 2.1701, "step": 7863000 }, { "epoch": 38.96, "learning_rate": 3.052773690442572e-05, "loss": 2.1722, "step": 7863500 }, { "epoch": 38.96, "learning_rate": 3.052650079517249e-05, "loss": 2.1621, "step": 7864000 }, { "epoch": 38.96, "learning_rate": 3.0525262208746404e-05, "loss": 2.1586, "step": 7864500 }, { "epoch": 38.97, "learning_rate": 3.052402362232032e-05, "loss": 2.1747, "step": 7865000 }, { "epoch": 38.97, "learning_rate": 3.052278503589424e-05, "loss": 2.1669, "step": 7865500 }, { "epoch": 38.97, "learning_rate": 3.0521546449468155e-05, "loss": 2.17, "step": 7866000 }, { "epoch": 38.97, "learning_rate": 3.052030786304207e-05, "loss": 2.1544, "step": 7866500 }, { "epoch": 38.98, "learning_rate": 3.051906927661599e-05, "loss": 2.161, "step": 7867000 }, { "epoch": 38.98, "learning_rate": 3.05178306901899e-05, "loss": 2.1717, "step": 7867500 }, { "epoch": 38.98, "learning_rate": 3.0516594580936668e-05, "loss": 2.1653, "step": 7868000 }, { "epoch": 38.98, "learning_rate": 3.0515355994510585e-05, "loss": 2.1722, "step": 7868500 }, { "epoch": 38.99, "learning_rate": 3.0514117408084502e-05, "loss": 2.1728, "step": 7869000 }, { "epoch": 38.99, "learning_rate": 3.051287882165842e-05, "loss": 2.1704, "step": 7869500 }, { "epoch": 38.99, "learning_rate": 3.0511640235232336e-05, "loss": 2.1638, "step": 7870000 }, { "epoch": 38.99, "learning_rate": 3.0510401648806253e-05, "loss": 2.1512, "step": 7870500 }, { "epoch": 39.0, "learning_rate": 3.0509165539553018e-05, "loss": 2.1884, "step": 7871000 }, { "epoch": 39.0, "learning_rate": 3.0507926953126935e-05, "loss": 2.1752, "step": 7871500 }, { "epoch": 39.0, "eval_accuracy": 0.6649884989419301, "eval_accuracy_mlm": 0.6219056881216726, "eval_accuracy_nsp": 0.867931706666562, "eval_loss": 2.296628952026367, "eval_runtime": 146.0071, "eval_samples_per_second": 1746.209, "eval_steps_per_second": 72.764, "step": 7871877 }, { "epoch": 39.0, "learning_rate": 3.0506688366700852e-05, "loss": 2.1657, "step": 7872000 }, { "epoch": 39.0, "learning_rate": 3.050544978027477e-05, "loss": 2.1253, "step": 7872500 }, { "epoch": 39.01, "learning_rate": 3.0504211193848686e-05, "loss": 2.1353, "step": 7873000 }, { "epoch": 39.01, "learning_rate": 3.0502972607422603e-05, "loss": 2.1536, "step": 7873500 }, { "epoch": 39.01, "learning_rate": 3.0501736498169368e-05, "loss": 2.1249, "step": 7874000 }, { "epoch": 39.01, "learning_rate": 3.0500497911743285e-05, "loss": 2.1628, "step": 7874500 }, { "epoch": 39.02, "learning_rate": 3.0499259325317202e-05, "loss": 2.1184, "step": 7875000 }, { "epoch": 39.02, "learning_rate": 3.049802073889112e-05, "loss": 2.1196, "step": 7875500 }, { "epoch": 39.02, "learning_rate": 3.0496782152465036e-05, "loss": 2.1508, "step": 7876000 }, { "epoch": 39.02, "learning_rate": 3.0495546043211805e-05, "loss": 2.1501, "step": 7876500 }, { "epoch": 39.03, "learning_rate": 3.049430745678572e-05, "loss": 2.1245, "step": 7877000 }, { "epoch": 39.03, "learning_rate": 3.0493068870359635e-05, "loss": 2.1496, "step": 7877500 }, { "epoch": 39.03, "learning_rate": 3.0491830283933552e-05, "loss": 2.1452, "step": 7878000 }, { "epoch": 39.03, "learning_rate": 3.049059169750747e-05, "loss": 2.1498, "step": 7878500 }, { "epoch": 39.04, "learning_rate": 3.0489353111081386e-05, "loss": 2.1396, "step": 7879000 }, { "epoch": 39.04, "learning_rate": 3.0488114524655303e-05, "loss": 2.1236, "step": 7879500 }, { "epoch": 39.04, "learning_rate": 3.048687593822922e-05, "loss": 2.1515, "step": 7880000 }, { "epoch": 39.04, "learning_rate": 3.048563982897599e-05, "loss": 2.1484, "step": 7880500 }, { "epoch": 39.05, "learning_rate": 3.0484401242549902e-05, "loss": 2.133, "step": 7881000 }, { "epoch": 39.05, "learning_rate": 3.048316265612382e-05, "loss": 2.1314, "step": 7881500 }, { "epoch": 39.05, "learning_rate": 3.0481926546870588e-05, "loss": 2.1429, "step": 7882000 }, { "epoch": 39.05, "learning_rate": 3.0480687960444505e-05, "loss": 2.1548, "step": 7882500 }, { "epoch": 39.06, "learning_rate": 3.0479451851191277e-05, "loss": 2.1394, "step": 7883000 }, { "epoch": 39.06, "learning_rate": 3.0478213264765194e-05, "loss": 2.1301, "step": 7883500 }, { "epoch": 39.06, "learning_rate": 3.0476974678339104e-05, "loss": 2.1422, "step": 7884000 }, { "epoch": 39.06, "learning_rate": 3.047573609191302e-05, "loss": 2.137, "step": 7884500 }, { "epoch": 39.07, "learning_rate": 3.0474497505486938e-05, "loss": 2.1319, "step": 7885000 }, { "epoch": 39.07, "learning_rate": 3.0473258919060855e-05, "loss": 2.1218, "step": 7885500 }, { "epoch": 39.07, "learning_rate": 3.0472020332634772e-05, "loss": 2.1418, "step": 7886000 }, { "epoch": 39.07, "learning_rate": 3.047078174620869e-05, "loss": 2.1179, "step": 7886500 }, { "epoch": 39.07, "learning_rate": 3.0469543159782606e-05, "loss": 2.133, "step": 7887000 }, { "epoch": 39.08, "learning_rate": 3.046830457335652e-05, "loss": 2.1354, "step": 7887500 }, { "epoch": 39.08, "learning_rate": 3.0467065986930436e-05, "loss": 2.1265, "step": 7888000 }, { "epoch": 39.08, "learning_rate": 3.0465827400504353e-05, "loss": 2.137, "step": 7888500 }, { "epoch": 39.08, "learning_rate": 3.046458881407827e-05, "loss": 2.1538, "step": 7889000 }, { "epoch": 39.09, "learning_rate": 3.0463350227652187e-05, "loss": 2.1145, "step": 7889500 }, { "epoch": 39.09, "learning_rate": 3.0462111641226104e-05, "loss": 2.1411, "step": 7890000 }, { "epoch": 39.09, "learning_rate": 3.046087305480002e-05, "loss": 2.1373, "step": 7890500 }, { "epoch": 39.09, "learning_rate": 3.0459634468373938e-05, "loss": 2.1356, "step": 7891000 }, { "epoch": 39.1, "learning_rate": 3.0458398359120703e-05, "loss": 2.1337, "step": 7891500 }, { "epoch": 39.1, "learning_rate": 3.045715977269462e-05, "loss": 2.1247, "step": 7892000 }, { "epoch": 39.1, "learning_rate": 3.0455921186268537e-05, "loss": 2.1473, "step": 7892500 }, { "epoch": 39.1, "learning_rate": 3.0454682599842454e-05, "loss": 2.1322, "step": 7893000 }, { "epoch": 39.11, "learning_rate": 3.045344401341637e-05, "loss": 2.1385, "step": 7893500 }, { "epoch": 39.11, "learning_rate": 3.0452205426990288e-05, "loss": 2.1519, "step": 7894000 }, { "epoch": 39.11, "learning_rate": 3.0450966840564205e-05, "loss": 2.1409, "step": 7894500 }, { "epoch": 39.11, "learning_rate": 3.044973073131097e-05, "loss": 2.1508, "step": 7895000 }, { "epoch": 39.12, "learning_rate": 3.0448492144884887e-05, "loss": 2.1595, "step": 7895500 }, { "epoch": 39.12, "learning_rate": 3.0447253558458804e-05, "loss": 2.1137, "step": 7896000 }, { "epoch": 39.12, "learning_rate": 3.044601497203272e-05, "loss": 2.1519, "step": 7896500 }, { "epoch": 39.12, "learning_rate": 3.044477886277949e-05, "loss": 2.1222, "step": 7897000 }, { "epoch": 39.13, "learning_rate": 3.0443540276353407e-05, "loss": 2.1545, "step": 7897500 }, { "epoch": 39.13, "learning_rate": 3.044230168992732e-05, "loss": 2.132, "step": 7898000 }, { "epoch": 39.13, "learning_rate": 3.0441063103501237e-05, "loss": 2.1354, "step": 7898500 }, { "epoch": 39.13, "learning_rate": 3.0439826994248006e-05, "loss": 2.1435, "step": 7899000 }, { "epoch": 39.14, "learning_rate": 3.0438588407821923e-05, "loss": 2.1475, "step": 7899500 }, { "epoch": 39.14, "learning_rate": 3.043734982139584e-05, "loss": 2.1216, "step": 7900000 }, { "epoch": 39.14, "learning_rate": 3.0436113712142605e-05, "loss": 2.1333, "step": 7900500 }, { "epoch": 39.14, "learning_rate": 3.0434875125716522e-05, "loss": 2.1707, "step": 7901000 }, { "epoch": 39.15, "learning_rate": 3.043363653929044e-05, "loss": 2.1407, "step": 7901500 }, { "epoch": 39.15, "learning_rate": 3.0432397952864356e-05, "loss": 2.1273, "step": 7902000 }, { "epoch": 39.15, "learning_rate": 3.0431159366438273e-05, "loss": 2.1465, "step": 7902500 }, { "epoch": 39.15, "learning_rate": 3.042992325718504e-05, "loss": 2.1403, "step": 7903000 }, { "epoch": 39.16, "learning_rate": 3.0428684670758955e-05, "loss": 2.1304, "step": 7903500 }, { "epoch": 39.16, "learning_rate": 3.0427446084332872e-05, "loss": 2.1517, "step": 7904000 }, { "epoch": 39.16, "learning_rate": 3.042620749790679e-05, "loss": 2.1458, "step": 7904500 }, { "epoch": 39.16, "learning_rate": 3.0424968911480706e-05, "loss": 2.1455, "step": 7905000 }, { "epoch": 39.17, "learning_rate": 3.0423730325054623e-05, "loss": 2.1511, "step": 7905500 }, { "epoch": 39.17, "learning_rate": 3.042249173862854e-05, "loss": 2.1395, "step": 7906000 }, { "epoch": 39.17, "learning_rate": 3.0421253152202457e-05, "loss": 2.1483, "step": 7906500 }, { "epoch": 39.17, "learning_rate": 3.0420014565776374e-05, "loss": 2.1353, "step": 7907000 }, { "epoch": 39.18, "learning_rate": 3.041877597935029e-05, "loss": 2.1086, "step": 7907500 }, { "epoch": 39.18, "learning_rate": 3.0417537392924204e-05, "loss": 2.1319, "step": 7908000 }, { "epoch": 39.18, "learning_rate": 3.041629880649812e-05, "loss": 2.1362, "step": 7908500 }, { "epoch": 39.18, "learning_rate": 3.041506269724489e-05, "loss": 2.1574, "step": 7909000 }, { "epoch": 39.19, "learning_rate": 3.0413826587991655e-05, "loss": 2.149, "step": 7909500 }, { "epoch": 39.19, "learning_rate": 3.0412588001565572e-05, "loss": 2.1484, "step": 7910000 }, { "epoch": 39.19, "learning_rate": 3.041134941513949e-05, "loss": 2.1333, "step": 7910500 }, { "epoch": 39.19, "learning_rate": 3.0410110828713406e-05, "loss": 2.1348, "step": 7911000 }, { "epoch": 39.2, "learning_rate": 3.0408872242287323e-05, "loss": 2.1438, "step": 7911500 }, { "epoch": 39.2, "learning_rate": 3.040763365586124e-05, "loss": 2.1444, "step": 7912000 }, { "epoch": 39.2, "learning_rate": 3.0406395069435157e-05, "loss": 2.1385, "step": 7912500 }, { "epoch": 39.2, "learning_rate": 3.0405156483009074e-05, "loss": 2.1308, "step": 7913000 }, { "epoch": 39.21, "learning_rate": 3.040391789658299e-05, "loss": 2.1406, "step": 7913500 }, { "epoch": 39.21, "learning_rate": 3.0402679310156908e-05, "loss": 2.1446, "step": 7914000 }, { "epoch": 39.21, "learning_rate": 3.040144072373082e-05, "loss": 2.1441, "step": 7914500 }, { "epoch": 39.21, "learning_rate": 3.040020461447759e-05, "loss": 2.1577, "step": 7915000 }, { "epoch": 39.22, "learning_rate": 3.0398966028051507e-05, "loss": 2.1304, "step": 7915500 }, { "epoch": 39.22, "learning_rate": 3.0397727441625424e-05, "loss": 2.1456, "step": 7916000 }, { "epoch": 39.22, "learning_rate": 3.039648885519934e-05, "loss": 2.1595, "step": 7916500 }, { "epoch": 39.22, "learning_rate": 3.0395252745946106e-05, "loss": 2.1297, "step": 7917000 }, { "epoch": 39.23, "learning_rate": 3.0394014159520023e-05, "loss": 2.1346, "step": 7917500 }, { "epoch": 39.23, "learning_rate": 3.039277557309394e-05, "loss": 2.1546, "step": 7918000 }, { "epoch": 39.23, "learning_rate": 3.0391536986667857e-05, "loss": 2.1356, "step": 7918500 }, { "epoch": 39.23, "learning_rate": 3.0390298400241774e-05, "loss": 2.1417, "step": 7919000 }, { "epoch": 39.24, "learning_rate": 3.038905981381569e-05, "loss": 2.1567, "step": 7919500 }, { "epoch": 39.24, "learning_rate": 3.0387821227389608e-05, "loss": 2.1488, "step": 7920000 }, { "epoch": 39.24, "learning_rate": 3.0386582640963525e-05, "loss": 2.1396, "step": 7920500 }, { "epoch": 39.24, "learning_rate": 3.0385344054537442e-05, "loss": 2.1383, "step": 7921000 }, { "epoch": 39.25, "learning_rate": 3.0384105468111355e-05, "loss": 2.099, "step": 7921500 }, { "epoch": 39.25, "learning_rate": 3.0382866881685272e-05, "loss": 2.141, "step": 7922000 }, { "epoch": 39.25, "learning_rate": 3.038162829525919e-05, "loss": 2.1426, "step": 7922500 }, { "epoch": 39.25, "learning_rate": 3.0380392186005958e-05, "loss": 2.1428, "step": 7923000 }, { "epoch": 39.26, "learning_rate": 3.0379153599579875e-05, "loss": 2.1092, "step": 7923500 }, { "epoch": 39.26, "learning_rate": 3.037791749032664e-05, "loss": 2.1557, "step": 7924000 }, { "epoch": 39.26, "learning_rate": 3.0376678903900557e-05, "loss": 2.1542, "step": 7924500 }, { "epoch": 39.26, "learning_rate": 3.0375440317474474e-05, "loss": 2.1493, "step": 7925000 }, { "epoch": 39.27, "learning_rate": 3.037420173104839e-05, "loss": 2.1659, "step": 7925500 }, { "epoch": 39.27, "learning_rate": 3.0372965621795157e-05, "loss": 2.1458, "step": 7926000 }, { "epoch": 39.27, "learning_rate": 3.037172951254193e-05, "loss": 2.1533, "step": 7926500 }, { "epoch": 39.27, "learning_rate": 3.0370490926115846e-05, "loss": 2.1417, "step": 7927000 }, { "epoch": 39.28, "learning_rate": 3.036925233968976e-05, "loss": 2.1492, "step": 7927500 }, { "epoch": 39.28, "learning_rate": 3.0368013753263673e-05, "loss": 2.1322, "step": 7928000 }, { "epoch": 39.28, "learning_rate": 3.036677516683759e-05, "loss": 2.1395, "step": 7928500 }, { "epoch": 39.28, "learning_rate": 3.0365536580411507e-05, "loss": 2.1302, "step": 7929000 }, { "epoch": 39.29, "learning_rate": 3.0364297993985424e-05, "loss": 2.1257, "step": 7929500 }, { "epoch": 39.29, "learning_rate": 3.036305940755934e-05, "loss": 2.1455, "step": 7930000 }, { "epoch": 39.29, "learning_rate": 3.0361820821133257e-05, "loss": 2.1264, "step": 7930500 }, { "epoch": 39.29, "learning_rate": 3.0360582234707174e-05, "loss": 2.1652, "step": 7931000 }, { "epoch": 39.3, "learning_rate": 3.035934364828109e-05, "loss": 2.1703, "step": 7931500 }, { "epoch": 39.3, "learning_rate": 3.0358105061855008e-05, "loss": 2.1462, "step": 7932000 }, { "epoch": 39.3, "learning_rate": 3.0356866475428925e-05, "loss": 2.1323, "step": 7932500 }, { "epoch": 39.3, "learning_rate": 3.035563036617569e-05, "loss": 2.1556, "step": 7933000 }, { "epoch": 39.31, "learning_rate": 3.0354391779749607e-05, "loss": 2.1391, "step": 7933500 }, { "epoch": 39.31, "learning_rate": 3.0353155670496376e-05, "loss": 2.1458, "step": 7934000 }, { "epoch": 39.31, "learning_rate": 3.0351917084070293e-05, "loss": 2.1295, "step": 7934500 }, { "epoch": 39.31, "learning_rate": 3.0350678497644207e-05, "loss": 2.1523, "step": 7935000 }, { "epoch": 39.32, "learning_rate": 3.0349439911218124e-05, "loss": 2.1252, "step": 7935500 }, { "epoch": 39.32, "learning_rate": 3.0348203801964896e-05, "loss": 2.1702, "step": 7936000 }, { "epoch": 39.32, "learning_rate": 3.0346965215538813e-05, "loss": 2.1224, "step": 7936500 }, { "epoch": 39.32, "learning_rate": 3.034572910628558e-05, "loss": 2.1537, "step": 7937000 }, { "epoch": 39.33, "learning_rate": 3.03444905198595e-05, "loss": 2.1324, "step": 7937500 }, { "epoch": 39.33, "learning_rate": 3.0343251933433415e-05, "loss": 2.1567, "step": 7938000 }, { "epoch": 39.33, "learning_rate": 3.0342013347007332e-05, "loss": 2.1463, "step": 7938500 }, { "epoch": 39.33, "learning_rate": 3.0340774760581246e-05, "loss": 2.1471, "step": 7939000 }, { "epoch": 39.34, "learning_rate": 3.0339538651328015e-05, "loss": 2.1427, "step": 7939500 }, { "epoch": 39.34, "learning_rate": 3.033830006490193e-05, "loss": 2.1476, "step": 7940000 }, { "epoch": 39.34, "learning_rate": 3.033706147847585e-05, "loss": 2.1357, "step": 7940500 }, { "epoch": 39.34, "learning_rate": 3.0335822892049765e-05, "loss": 2.1364, "step": 7941000 }, { "epoch": 39.34, "learning_rate": 3.0334584305623682e-05, "loss": 2.1433, "step": 7941500 }, { "epoch": 39.35, "learning_rate": 3.0333348196370448e-05, "loss": 2.1451, "step": 7942000 }, { "epoch": 39.35, "learning_rate": 3.0332109609944365e-05, "loss": 2.1652, "step": 7942500 }, { "epoch": 39.35, "learning_rate": 3.0330871023518282e-05, "loss": 2.16, "step": 7943000 }, { "epoch": 39.35, "learning_rate": 3.03296324370922e-05, "loss": 2.1619, "step": 7943500 }, { "epoch": 39.36, "learning_rate": 3.0328393850666116e-05, "loss": 2.1616, "step": 7944000 }, { "epoch": 39.36, "learning_rate": 3.032715774141288e-05, "loss": 2.1725, "step": 7944500 }, { "epoch": 39.36, "learning_rate": 3.0325919154986798e-05, "loss": 2.1325, "step": 7945000 }, { "epoch": 39.36, "learning_rate": 3.0324680568560715e-05, "loss": 2.1277, "step": 7945500 }, { "epoch": 39.37, "learning_rate": 3.0323441982134632e-05, "loss": 2.1332, "step": 7946000 }, { "epoch": 39.37, "learning_rate": 3.032220339570855e-05, "loss": 2.1359, "step": 7946500 }, { "epoch": 39.37, "learning_rate": 3.0320967286455314e-05, "loss": 2.1517, "step": 7947000 }, { "epoch": 39.37, "learning_rate": 3.031972870002923e-05, "loss": 2.152, "step": 7947500 }, { "epoch": 39.38, "learning_rate": 3.0318490113603148e-05, "loss": 2.1653, "step": 7948000 }, { "epoch": 39.38, "learning_rate": 3.0317251527177065e-05, "loss": 2.1808, "step": 7948500 }, { "epoch": 39.38, "learning_rate": 3.0316012940750982e-05, "loss": 2.1778, "step": 7949000 }, { "epoch": 39.38, "learning_rate": 3.03147743543249e-05, "loss": 2.1583, "step": 7949500 }, { "epoch": 39.39, "learning_rate": 3.0313535767898816e-05, "loss": 2.1499, "step": 7950000 }, { "epoch": 39.39, "learning_rate": 3.0312297181472733e-05, "loss": 2.1613, "step": 7950500 }, { "epoch": 39.39, "learning_rate": 3.031105859504665e-05, "loss": 2.1439, "step": 7951000 }, { "epoch": 39.39, "learning_rate": 3.0309820008620566e-05, "loss": 2.1565, "step": 7951500 }, { "epoch": 39.4, "learning_rate": 3.0308581422194483e-05, "loss": 2.1586, "step": 7952000 }, { "epoch": 39.4, "learning_rate": 3.0307342835768394e-05, "loss": 2.1902, "step": 7952500 }, { "epoch": 39.4, "learning_rate": 3.030610424934231e-05, "loss": 2.1583, "step": 7953000 }, { "epoch": 39.4, "learning_rate": 3.0304865662916227e-05, "loss": 2.1433, "step": 7953500 }, { "epoch": 39.41, "learning_rate": 3.0303627076490144e-05, "loss": 2.1525, "step": 7954000 }, { "epoch": 39.41, "learning_rate": 3.030238849006406e-05, "loss": 2.1442, "step": 7954500 }, { "epoch": 39.41, "learning_rate": 3.0301149903637975e-05, "loss": 2.1515, "step": 7955000 }, { "epoch": 39.41, "learning_rate": 3.0299911317211892e-05, "loss": 2.146, "step": 7955500 }, { "epoch": 39.42, "learning_rate": 3.029867520795866e-05, "loss": 2.1483, "step": 7956000 }, { "epoch": 39.42, "learning_rate": 3.0297436621532577e-05, "loss": 2.1599, "step": 7956500 }, { "epoch": 39.42, "learning_rate": 3.0296198035106494e-05, "loss": 2.1642, "step": 7957000 }, { "epoch": 39.42, "learning_rate": 3.0294961925853267e-05, "loss": 2.1511, "step": 7957500 }, { "epoch": 39.43, "learning_rate": 3.0293723339427184e-05, "loss": 2.1465, "step": 7958000 }, { "epoch": 39.43, "learning_rate": 3.02924847530011e-05, "loss": 2.157, "step": 7958500 }, { "epoch": 39.43, "learning_rate": 3.0291248643747866e-05, "loss": 2.1562, "step": 7959000 }, { "epoch": 39.43, "learning_rate": 3.0290010057321783e-05, "loss": 2.1536, "step": 7959500 }, { "epoch": 39.44, "learning_rate": 3.02887714708957e-05, "loss": 2.1747, "step": 7960000 }, { "epoch": 39.44, "learning_rate": 3.0287532884469617e-05, "loss": 2.1465, "step": 7960500 }, { "epoch": 39.44, "learning_rate": 3.0286296775216382e-05, "loss": 2.1475, "step": 7961000 }, { "epoch": 39.44, "learning_rate": 3.02850581887903e-05, "loss": 2.1554, "step": 7961500 }, { "epoch": 39.45, "learning_rate": 3.0283819602364216e-05, "loss": 2.1414, "step": 7962000 }, { "epoch": 39.45, "learning_rate": 3.0282581015938133e-05, "loss": 2.1614, "step": 7962500 }, { "epoch": 39.45, "learning_rate": 3.028134242951205e-05, "loss": 2.1447, "step": 7963000 }, { "epoch": 39.45, "learning_rate": 3.0280103843085967e-05, "loss": 2.1288, "step": 7963500 }, { "epoch": 39.46, "learning_rate": 3.0278865256659884e-05, "loss": 2.1451, "step": 7964000 }, { "epoch": 39.46, "learning_rate": 3.02776266702338e-05, "loss": 2.1575, "step": 7964500 }, { "epoch": 39.46, "learning_rate": 3.027638808380771e-05, "loss": 2.1413, "step": 7965000 }, { "epoch": 39.46, "learning_rate": 3.0275149497381628e-05, "loss": 2.1849, "step": 7965500 }, { "epoch": 39.47, "learning_rate": 3.02739133881284e-05, "loss": 2.1532, "step": 7966000 }, { "epoch": 39.47, "learning_rate": 3.0272674801702317e-05, "loss": 2.1684, "step": 7966500 }, { "epoch": 39.47, "learning_rate": 3.0271436215276234e-05, "loss": 2.1787, "step": 7967000 }, { "epoch": 39.47, "learning_rate": 3.027019762885015e-05, "loss": 2.1244, "step": 7967500 }, { "epoch": 39.48, "learning_rate": 3.0268961519596916e-05, "loss": 2.1448, "step": 7968000 }, { "epoch": 39.48, "learning_rate": 3.0267725410343685e-05, "loss": 2.1404, "step": 7968500 }, { "epoch": 39.48, "learning_rate": 3.0266486823917602e-05, "loss": 2.1448, "step": 7969000 }, { "epoch": 39.48, "learning_rate": 3.0265250714664367e-05, "loss": 2.1466, "step": 7969500 }, { "epoch": 39.49, "learning_rate": 3.0264012128238284e-05, "loss": 2.1603, "step": 7970000 }, { "epoch": 39.49, "learning_rate": 3.02627735418122e-05, "loss": 2.1393, "step": 7970500 }, { "epoch": 39.49, "learning_rate": 3.0261534955386118e-05, "loss": 2.1433, "step": 7971000 }, { "epoch": 39.49, "learning_rate": 3.0260296368960035e-05, "loss": 2.1232, "step": 7971500 }, { "epoch": 39.5, "learning_rate": 3.0259057782533952e-05, "loss": 2.1082, "step": 7972000 }, { "epoch": 39.5, "learning_rate": 3.025781919610787e-05, "loss": 2.1548, "step": 7972500 }, { "epoch": 39.5, "learning_rate": 3.0256580609681782e-05, "loss": 2.1618, "step": 7973000 }, { "epoch": 39.5, "learning_rate": 3.02553420232557e-05, "loss": 2.1554, "step": 7973500 }, { "epoch": 39.51, "learning_rate": 3.0254103436829616e-05, "loss": 2.1582, "step": 7974000 }, { "epoch": 39.51, "learning_rate": 3.0252864850403533e-05, "loss": 2.1767, "step": 7974500 }, { "epoch": 39.51, "learning_rate": 3.0251628741150302e-05, "loss": 2.1367, "step": 7975000 }, { "epoch": 39.51, "learning_rate": 3.025039015472422e-05, "loss": 2.151, "step": 7975500 }, { "epoch": 39.52, "learning_rate": 3.0249154045470984e-05, "loss": 2.1421, "step": 7976000 }, { "epoch": 39.52, "learning_rate": 3.02479154590449e-05, "loss": 2.1534, "step": 7976500 }, { "epoch": 39.52, "learning_rate": 3.0246676872618818e-05, "loss": 2.1454, "step": 7977000 }, { "epoch": 39.52, "learning_rate": 3.0245438286192735e-05, "loss": 2.1565, "step": 7977500 }, { "epoch": 39.53, "learning_rate": 3.0244199699766652e-05, "loss": 2.134, "step": 7978000 }, { "epoch": 39.53, "learning_rate": 3.024296111334057e-05, "loss": 2.1578, "step": 7978500 }, { "epoch": 39.53, "learning_rate": 3.0241722526914486e-05, "loss": 2.1315, "step": 7979000 }, { "epoch": 39.53, "learning_rate": 3.02404839404884e-05, "loss": 2.1359, "step": 7979500 }, { "epoch": 39.54, "learning_rate": 3.0239245354062316e-05, "loss": 2.1432, "step": 7980000 }, { "epoch": 39.54, "learning_rate": 3.0238006767636233e-05, "loss": 2.1618, "step": 7980500 }, { "epoch": 39.54, "learning_rate": 3.0236770658383002e-05, "loss": 2.1471, "step": 7981000 }, { "epoch": 39.54, "learning_rate": 3.023553207195692e-05, "loss": 2.1546, "step": 7981500 }, { "epoch": 39.55, "learning_rate": 3.0234293485530836e-05, "loss": 2.1428, "step": 7982000 }, { "epoch": 39.55, "learning_rate": 3.0233054899104753e-05, "loss": 2.1344, "step": 7982500 }, { "epoch": 39.55, "learning_rate": 3.0231816312678666e-05, "loss": 2.1659, "step": 7983000 }, { "epoch": 39.55, "learning_rate": 3.0230577726252583e-05, "loss": 2.156, "step": 7983500 }, { "epoch": 39.56, "learning_rate": 3.02293391398265e-05, "loss": 2.1345, "step": 7984000 }, { "epoch": 39.56, "learning_rate": 3.0228100553400417e-05, "loss": 2.1534, "step": 7984500 }, { "epoch": 39.56, "learning_rate": 3.0226861966974334e-05, "loss": 2.1419, "step": 7985000 }, { "epoch": 39.56, "learning_rate": 3.022562338054825e-05, "loss": 2.1344, "step": 7985500 }, { "epoch": 39.57, "learning_rate": 3.022438727129502e-05, "loss": 2.1341, "step": 7986000 }, { "epoch": 39.57, "learning_rate": 3.0223148684868933e-05, "loss": 2.132, "step": 7986500 }, { "epoch": 39.57, "learning_rate": 3.022191009844285e-05, "loss": 2.1422, "step": 7987000 }, { "epoch": 39.57, "learning_rate": 3.022067398918962e-05, "loss": 2.1957, "step": 7987500 }, { "epoch": 39.58, "learning_rate": 3.0219435402763536e-05, "loss": 2.1574, "step": 7988000 }, { "epoch": 39.58, "learning_rate": 3.0218196816337453e-05, "loss": 2.1652, "step": 7988500 }, { "epoch": 39.58, "learning_rate": 3.021695822991137e-05, "loss": 2.1417, "step": 7989000 }, { "epoch": 39.58, "learning_rate": 3.0215719643485287e-05, "loss": 2.1492, "step": 7989500 }, { "epoch": 39.59, "learning_rate": 3.02144810570592e-05, "loss": 2.1364, "step": 7990000 }, { "epoch": 39.59, "learning_rate": 3.0213242470633117e-05, "loss": 2.1672, "step": 7990500 }, { "epoch": 39.59, "learning_rate": 3.0212003884207034e-05, "loss": 2.1569, "step": 7991000 }, { "epoch": 39.59, "learning_rate": 3.021076529778095e-05, "loss": 2.1703, "step": 7991500 }, { "epoch": 39.6, "learning_rate": 3.0209526711354868e-05, "loss": 2.1459, "step": 7992000 }, { "epoch": 39.6, "learning_rate": 3.0208288124928785e-05, "loss": 2.164, "step": 7992500 }, { "epoch": 39.6, "learning_rate": 3.0207052015675554e-05, "loss": 2.1593, "step": 7993000 }, { "epoch": 39.6, "learning_rate": 3.0205813429249467e-05, "loss": 2.1342, "step": 7993500 }, { "epoch": 39.61, "learning_rate": 3.0204574842823384e-05, "loss": 2.1347, "step": 7994000 }, { "epoch": 39.61, "learning_rate": 3.02033362563973e-05, "loss": 2.1442, "step": 7994500 }, { "epoch": 39.61, "learning_rate": 3.0202097669971218e-05, "loss": 2.1486, "step": 7995000 }, { "epoch": 39.61, "learning_rate": 3.0200859083545135e-05, "loss": 2.1455, "step": 7995500 }, { "epoch": 39.61, "learning_rate": 3.0199620497119052e-05, "loss": 2.166, "step": 7996000 }, { "epoch": 39.62, "learning_rate": 3.0198381910692962e-05, "loss": 2.1218, "step": 7996500 }, { "epoch": 39.62, "learning_rate": 3.0197145801439734e-05, "loss": 2.1713, "step": 7997000 }, { "epoch": 39.62, "learning_rate": 3.019590721501365e-05, "loss": 2.1567, "step": 7997500 }, { "epoch": 39.62, "learning_rate": 3.0194668628587568e-05, "loss": 2.132, "step": 7998000 }, { "epoch": 39.63, "learning_rate": 3.0193430042161485e-05, "loss": 2.1449, "step": 7998500 }, { "epoch": 39.63, "learning_rate": 3.0192191455735402e-05, "loss": 2.1452, "step": 7999000 }, { "epoch": 39.63, "learning_rate": 3.0190952869309312e-05, "loss": 2.1455, "step": 7999500 }, { "epoch": 39.63, "learning_rate": 3.018971428288323e-05, "loss": 2.1394, "step": 8000000 }, { "epoch": 39.64, "learning_rate": 3.0188475696457146e-05, "loss": 2.1418, "step": 8000500 }, { "epoch": 39.64, "learning_rate": 3.0187237110031063e-05, "loss": 2.1492, "step": 8001000 }, { "epoch": 39.64, "learning_rate": 3.0186001000777835e-05, "loss": 2.1505, "step": 8001500 }, { "epoch": 39.64, "learning_rate": 3.0184762414351752e-05, "loss": 2.1721, "step": 8002000 }, { "epoch": 39.65, "learning_rate": 3.0183523827925662e-05, "loss": 2.1447, "step": 8002500 }, { "epoch": 39.65, "learning_rate": 3.0182290195845286e-05, "loss": 2.1685, "step": 8003000 }, { "epoch": 39.65, "learning_rate": 3.0181051609419203e-05, "loss": 2.1416, "step": 8003500 }, { "epoch": 39.65, "learning_rate": 3.017981302299312e-05, "loss": 2.1407, "step": 8004000 }, { "epoch": 39.66, "learning_rate": 3.0178574436567037e-05, "loss": 2.1782, "step": 8004500 }, { "epoch": 39.66, "learning_rate": 3.0177335850140954e-05, "loss": 2.143, "step": 8005000 }, { "epoch": 39.66, "learning_rate": 3.017609726371487e-05, "loss": 2.1823, "step": 8005500 }, { "epoch": 39.66, "learning_rate": 3.0174861154461636e-05, "loss": 2.1475, "step": 8006000 }, { "epoch": 39.67, "learning_rate": 3.0173622568035553e-05, "loss": 2.1581, "step": 8006500 }, { "epoch": 39.67, "learning_rate": 3.017238645878232e-05, "loss": 2.1574, "step": 8007000 }, { "epoch": 39.67, "learning_rate": 3.0171147872356236e-05, "loss": 2.1332, "step": 8007500 }, { "epoch": 39.67, "learning_rate": 3.0169909285930153e-05, "loss": 2.1649, "step": 8008000 }, { "epoch": 39.68, "learning_rate": 3.016867069950407e-05, "loss": 2.1523, "step": 8008500 }, { "epoch": 39.68, "learning_rate": 3.0167432113077986e-05, "loss": 2.1674, "step": 8009000 }, { "epoch": 39.68, "learning_rate": 3.0166193526651903e-05, "loss": 2.1503, "step": 8009500 }, { "epoch": 39.68, "learning_rate": 3.016495494022582e-05, "loss": 2.1785, "step": 8010000 }, { "epoch": 39.69, "learning_rate": 3.0163716353799737e-05, "loss": 2.1555, "step": 8010500 }, { "epoch": 39.69, "learning_rate": 3.0162477767373654e-05, "loss": 2.1361, "step": 8011000 }, { "epoch": 39.69, "learning_rate": 3.016123918094757e-05, "loss": 2.1458, "step": 8011500 }, { "epoch": 39.69, "learning_rate": 3.0160000594521488e-05, "loss": 2.1399, "step": 8012000 }, { "epoch": 39.7, "learning_rate": 3.0158762008095405e-05, "loss": 2.1611, "step": 8012500 }, { "epoch": 39.7, "learning_rate": 3.0157523421669322e-05, "loss": 2.1584, "step": 8013000 }, { "epoch": 39.7, "learning_rate": 3.0156287312416087e-05, "loss": 2.1321, "step": 8013500 }, { "epoch": 39.7, "learning_rate": 3.0155048725990004e-05, "loss": 2.1513, "step": 8014000 }, { "epoch": 39.71, "learning_rate": 3.015381013956392e-05, "loss": 2.1694, "step": 8014500 }, { "epoch": 39.71, "learning_rate": 3.0152571553137838e-05, "loss": 2.1494, "step": 8015000 }, { "epoch": 39.71, "learning_rate": 3.0151332966711755e-05, "loss": 2.1445, "step": 8015500 }, { "epoch": 39.71, "learning_rate": 3.0150094380285672e-05, "loss": 2.1644, "step": 8016000 }, { "epoch": 39.72, "learning_rate": 3.0148858271032437e-05, "loss": 2.1543, "step": 8016500 }, { "epoch": 39.72, "learning_rate": 3.0147619684606354e-05, "loss": 2.1643, "step": 8017000 }, { "epoch": 39.72, "learning_rate": 3.014638109818027e-05, "loss": 2.1538, "step": 8017500 }, { "epoch": 39.72, "learning_rate": 3.0145142511754188e-05, "loss": 2.1518, "step": 8018000 }, { "epoch": 39.73, "learning_rate": 3.0143903925328105e-05, "loss": 2.1669, "step": 8018500 }, { "epoch": 39.73, "learning_rate": 3.0142665338902022e-05, "loss": 2.1774, "step": 8019000 }, { "epoch": 39.73, "learning_rate": 3.014142675247594e-05, "loss": 2.1471, "step": 8019500 }, { "epoch": 39.73, "learning_rate": 3.0140188166049856e-05, "loss": 2.1726, "step": 8020000 }, { "epoch": 39.74, "learning_rate": 3.013895205679662e-05, "loss": 2.1384, "step": 8020500 }, { "epoch": 39.74, "learning_rate": 3.0137713470370538e-05, "loss": 2.1756, "step": 8021000 }, { "epoch": 39.74, "learning_rate": 3.0136474883944455e-05, "loss": 2.1773, "step": 8021500 }, { "epoch": 39.74, "learning_rate": 3.0135241251864072e-05, "loss": 2.1633, "step": 8022000 }, { "epoch": 39.75, "learning_rate": 3.013400266543799e-05, "loss": 2.1297, "step": 8022500 }, { "epoch": 39.75, "learning_rate": 3.013276655618476e-05, "loss": 2.1697, "step": 8023000 }, { "epoch": 39.75, "learning_rate": 3.013152796975868e-05, "loss": 2.1987, "step": 8023500 }, { "epoch": 39.75, "learning_rate": 3.0130289383332595e-05, "loss": 2.1599, "step": 8024000 }, { "epoch": 39.76, "learning_rate": 3.012905327407936e-05, "loss": 2.1246, "step": 8024500 }, { "epoch": 39.76, "learning_rate": 3.0127814687653278e-05, "loss": 2.1334, "step": 8025000 }, { "epoch": 39.76, "learning_rate": 3.0126576101227195e-05, "loss": 2.1671, "step": 8025500 }, { "epoch": 39.76, "learning_rate": 3.012533751480111e-05, "loss": 2.1581, "step": 8026000 }, { "epoch": 39.77, "learning_rate": 3.012409892837503e-05, "loss": 2.1355, "step": 8026500 }, { "epoch": 39.77, "learning_rate": 3.0122860341948945e-05, "loss": 2.153, "step": 8027000 }, { "epoch": 39.77, "learning_rate": 3.0121621755522862e-05, "loss": 2.1714, "step": 8027500 }, { "epoch": 39.77, "learning_rate": 3.0120383169096773e-05, "loss": 2.1358, "step": 8028000 }, { "epoch": 39.78, "learning_rate": 3.011914458267069e-05, "loss": 2.1435, "step": 8028500 }, { "epoch": 39.78, "learning_rate": 3.0117905996244606e-05, "loss": 2.1649, "step": 8029000 }, { "epoch": 39.78, "learning_rate": 3.0116667409818523e-05, "loss": 2.1489, "step": 8029500 }, { "epoch": 39.78, "learning_rate": 3.011542882339244e-05, "loss": 2.1574, "step": 8030000 }, { "epoch": 39.79, "learning_rate": 3.0114190236966354e-05, "loss": 2.1635, "step": 8030500 }, { "epoch": 39.79, "learning_rate": 3.011295412771313e-05, "loss": 2.1656, "step": 8031000 }, { "epoch": 39.79, "learning_rate": 3.011171554128704e-05, "loss": 2.1903, "step": 8031500 }, { "epoch": 39.79, "learning_rate": 3.0110476954860956e-05, "loss": 2.1556, "step": 8032000 }, { "epoch": 39.8, "learning_rate": 3.0109238368434873e-05, "loss": 2.145, "step": 8032500 }, { "epoch": 39.8, "learning_rate": 3.0108002259181646e-05, "loss": 2.1641, "step": 8033000 }, { "epoch": 39.8, "learning_rate": 3.0106763672755563e-05, "loss": 2.1497, "step": 8033500 }, { "epoch": 39.8, "learning_rate": 3.010552508632948e-05, "loss": 2.1683, "step": 8034000 }, { "epoch": 39.81, "learning_rate": 3.010428649990339e-05, "loss": 2.1542, "step": 8034500 }, { "epoch": 39.81, "learning_rate": 3.0103047913477307e-05, "loss": 2.1433, "step": 8035000 }, { "epoch": 39.81, "learning_rate": 3.0101809327051223e-05, "loss": 2.1602, "step": 8035500 }, { "epoch": 39.81, "learning_rate": 3.010057074062514e-05, "loss": 2.1568, "step": 8036000 }, { "epoch": 39.82, "learning_rate": 3.0099332154199057e-05, "loss": 2.1549, "step": 8036500 }, { "epoch": 39.82, "learning_rate": 3.009809356777297e-05, "loss": 2.1468, "step": 8037000 }, { "epoch": 39.82, "learning_rate": 3.0096859935692595e-05, "loss": 2.1577, "step": 8037500 }, { "epoch": 39.82, "learning_rate": 3.009562382643936e-05, "loss": 2.1663, "step": 8038000 }, { "epoch": 39.83, "learning_rate": 3.0094385240013277e-05, "loss": 2.1725, "step": 8038500 }, { "epoch": 39.83, "learning_rate": 3.0093146653587194e-05, "loss": 2.1502, "step": 8039000 }, { "epoch": 39.83, "learning_rate": 3.009190806716111e-05, "loss": 2.1691, "step": 8039500 }, { "epoch": 39.83, "learning_rate": 3.0090669480735028e-05, "loss": 2.1382, "step": 8040000 }, { "epoch": 39.84, "learning_rate": 3.0089430894308945e-05, "loss": 2.1767, "step": 8040500 }, { "epoch": 39.84, "learning_rate": 3.0088192307882862e-05, "loss": 2.1753, "step": 8041000 }, { "epoch": 39.84, "learning_rate": 3.008695372145678e-05, "loss": 2.161, "step": 8041500 }, { "epoch": 39.84, "learning_rate": 3.0085715135030696e-05, "loss": 2.1657, "step": 8042000 }, { "epoch": 39.85, "learning_rate": 3.0084476548604613e-05, "loss": 2.1477, "step": 8042500 }, { "epoch": 39.85, "learning_rate": 3.008323796217853e-05, "loss": 2.1735, "step": 8043000 }, { "epoch": 39.85, "learning_rate": 3.0081999375752447e-05, "loss": 2.1239, "step": 8043500 }, { "epoch": 39.85, "learning_rate": 3.0080760789326357e-05, "loss": 2.1411, "step": 8044000 }, { "epoch": 39.86, "learning_rate": 3.0079522202900274e-05, "loss": 2.1439, "step": 8044500 }, { "epoch": 39.86, "learning_rate": 3.0078288570819894e-05, "loss": 2.1604, "step": 8045000 }, { "epoch": 39.86, "learning_rate": 3.007704998439381e-05, "loss": 2.1593, "step": 8045500 }, { "epoch": 39.86, "learning_rate": 3.0075811397967728e-05, "loss": 2.1567, "step": 8046000 }, { "epoch": 39.87, "learning_rate": 3.0074572811541645e-05, "loss": 2.1343, "step": 8046500 }, { "epoch": 39.87, "learning_rate": 3.0073336702288414e-05, "loss": 2.1458, "step": 8047000 }, { "epoch": 39.87, "learning_rate": 3.007209811586233e-05, "loss": 2.1592, "step": 8047500 }, { "epoch": 39.87, "learning_rate": 3.0070859529436244e-05, "loss": 2.1504, "step": 8048000 }, { "epoch": 39.88, "learning_rate": 3.006962094301016e-05, "loss": 2.1612, "step": 8048500 }, { "epoch": 39.88, "learning_rate": 3.0068382356584078e-05, "loss": 2.1423, "step": 8049000 }, { "epoch": 39.88, "learning_rate": 3.0067143770157995e-05, "loss": 2.1497, "step": 8049500 }, { "epoch": 39.88, "learning_rate": 3.0065907660904764e-05, "loss": 2.1624, "step": 8050000 }, { "epoch": 39.88, "learning_rate": 3.006466907447868e-05, "loss": 2.1358, "step": 8050500 }, { "epoch": 39.89, "learning_rate": 3.0063430488052598e-05, "loss": 2.161, "step": 8051000 }, { "epoch": 39.89, "learning_rate": 3.006219190162651e-05, "loss": 2.1392, "step": 8051500 }, { "epoch": 39.89, "learning_rate": 3.006095331520043e-05, "loss": 2.1358, "step": 8052000 }, { "epoch": 39.89, "learning_rate": 3.0059714728774345e-05, "loss": 2.1436, "step": 8052500 }, { "epoch": 39.9, "learning_rate": 3.0058476142348262e-05, "loss": 2.1518, "step": 8053000 }, { "epoch": 39.9, "learning_rate": 3.005723755592218e-05, "loss": 2.1306, "step": 8053500 }, { "epoch": 39.9, "learning_rate": 3.0055998969496096e-05, "loss": 2.1684, "step": 8054000 }, { "epoch": 39.9, "learning_rate": 3.0054762860242865e-05, "loss": 2.1548, "step": 8054500 }, { "epoch": 39.91, "learning_rate": 3.005352427381678e-05, "loss": 2.1863, "step": 8055000 }, { "epoch": 39.91, "learning_rate": 3.0052285687390695e-05, "loss": 2.1506, "step": 8055500 }, { "epoch": 39.91, "learning_rate": 3.0051047100964612e-05, "loss": 2.1482, "step": 8056000 }, { "epoch": 39.91, "learning_rate": 3.004980851453853e-05, "loss": 2.1735, "step": 8056500 }, { "epoch": 39.92, "learning_rate": 3.0048569928112446e-05, "loss": 2.1738, "step": 8057000 }, { "epoch": 39.92, "learning_rate": 3.0047331341686363e-05, "loss": 2.1549, "step": 8057500 }, { "epoch": 39.92, "learning_rate": 3.004609275526028e-05, "loss": 2.1322, "step": 8058000 }, { "epoch": 39.92, "learning_rate": 3.0044854168834197e-05, "loss": 2.1524, "step": 8058500 }, { "epoch": 39.93, "learning_rate": 3.0043615582408114e-05, "loss": 2.141, "step": 8059000 }, { "epoch": 39.93, "learning_rate": 3.0042376995982024e-05, "loss": 2.1674, "step": 8059500 }, { "epoch": 39.93, "learning_rate": 3.004113840955594e-05, "loss": 2.1547, "step": 8060000 }, { "epoch": 39.93, "learning_rate": 3.0039899823129858e-05, "loss": 2.1545, "step": 8060500 }, { "epoch": 39.94, "learning_rate": 3.003866371387663e-05, "loss": 2.166, "step": 8061000 }, { "epoch": 39.94, "learning_rate": 3.0037425127450547e-05, "loss": 2.1595, "step": 8061500 }, { "epoch": 39.94, "learning_rate": 3.0036186541024464e-05, "loss": 2.1371, "step": 8062000 }, { "epoch": 39.94, "learning_rate": 3.0034947954598374e-05, "loss": 2.1685, "step": 8062500 }, { "epoch": 39.95, "learning_rate": 3.0033711845345146e-05, "loss": 2.1529, "step": 8063000 }, { "epoch": 39.95, "learning_rate": 3.0032475736091915e-05, "loss": 2.1464, "step": 8063500 }, { "epoch": 39.95, "learning_rate": 3.0031237149665832e-05, "loss": 2.1639, "step": 8064000 }, { "epoch": 39.95, "learning_rate": 3.0030001040412597e-05, "loss": 2.1509, "step": 8064500 }, { "epoch": 39.96, "learning_rate": 3.0028762453986514e-05, "loss": 2.1675, "step": 8065000 }, { "epoch": 39.96, "learning_rate": 3.002752386756043e-05, "loss": 2.1495, "step": 8065500 }, { "epoch": 39.96, "learning_rate": 3.0026285281134348e-05, "loss": 2.1633, "step": 8066000 }, { "epoch": 39.96, "learning_rate": 3.0025046694708265e-05, "loss": 2.1319, "step": 8066500 }, { "epoch": 39.97, "learning_rate": 3.0023808108282182e-05, "loss": 2.1646, "step": 8067000 }, { "epoch": 39.97, "learning_rate": 3.00225695218561e-05, "loss": 2.1526, "step": 8067500 }, { "epoch": 39.97, "learning_rate": 3.0021330935430016e-05, "loss": 2.1123, "step": 8068000 }, { "epoch": 39.97, "learning_rate": 3.002009482617678e-05, "loss": 2.1465, "step": 8068500 }, { "epoch": 39.98, "learning_rate": 3.0018858716923553e-05, "loss": 2.1841, "step": 8069000 }, { "epoch": 39.98, "learning_rate": 3.0017620130497464e-05, "loss": 2.1716, "step": 8069500 }, { "epoch": 39.98, "learning_rate": 3.001638154407138e-05, "loss": 2.17, "step": 8070000 }, { "epoch": 39.98, "learning_rate": 3.0015142957645298e-05, "loss": 2.1723, "step": 8070500 }, { "epoch": 39.99, "learning_rate": 3.0013904371219214e-05, "loss": 2.1409, "step": 8071000 }, { "epoch": 39.99, "learning_rate": 3.001266578479313e-05, "loss": 2.1666, "step": 8071500 }, { "epoch": 39.99, "learning_rate": 3.0011427198367048e-05, "loss": 2.1527, "step": 8072000 }, { "epoch": 39.99, "learning_rate": 3.0010188611940965e-05, "loss": 2.1557, "step": 8072500 }, { "epoch": 40.0, "learning_rate": 3.0008950025514882e-05, "loss": 2.1458, "step": 8073000 }, { "epoch": 40.0, "learning_rate": 3.00077114390888e-05, "loss": 2.1418, "step": 8073500 }, { "epoch": 40.0, "eval_accuracy": 0.6648354250857901, "eval_accuracy_mlm": 0.6219327105976565, "eval_accuracy_nsp": 0.8670649006310819, "eval_loss": 2.2923583984375, "eval_runtime": 146.917, "eval_samples_per_second": 1735.395, "eval_steps_per_second": 72.313, "step": 8073720 }, { "epoch": 40.0, "learning_rate": 3.0006472852662716e-05, "loss": 2.1342, "step": 8074000 }, { "epoch": 40.0, "learning_rate": 3.0005234266236633e-05, "loss": 2.1227, "step": 8074500 }, { "epoch": 40.01, "learning_rate": 3.0003995679810546e-05, "loss": 2.1248, "step": 8075000 }, { "epoch": 40.01, "learning_rate": 3.0002759570557315e-05, "loss": 2.1363, "step": 8075500 }, { "epoch": 40.01, "learning_rate": 3.000152346130408e-05, "loss": 2.1442, "step": 8076000 }, { "epoch": 40.01, "learning_rate": 3.0000284874877998e-05, "loss": 2.1229, "step": 8076500 }, { "epoch": 40.02, "learning_rate": 2.9999046288451915e-05, "loss": 2.1532, "step": 8077000 }, { "epoch": 40.02, "learning_rate": 2.999780770202583e-05, "loss": 2.1384, "step": 8077500 }, { "epoch": 40.02, "learning_rate": 2.999656911559975e-05, "loss": 2.13, "step": 8078000 }, { "epoch": 40.02, "learning_rate": 2.999533300634652e-05, "loss": 2.1004, "step": 8078500 }, { "epoch": 40.03, "learning_rate": 2.999409441992043e-05, "loss": 2.134, "step": 8079000 }, { "epoch": 40.03, "learning_rate": 2.9992855833494348e-05, "loss": 2.1205, "step": 8079500 }, { "epoch": 40.03, "learning_rate": 2.9991617247068265e-05, "loss": 2.1459, "step": 8080000 }, { "epoch": 40.03, "learning_rate": 2.999037866064218e-05, "loss": 2.1614, "step": 8080500 }, { "epoch": 40.04, "learning_rate": 2.99891400742161e-05, "loss": 2.129, "step": 8081000 }, { "epoch": 40.04, "learning_rate": 2.9987901487790015e-05, "loss": 2.1302, "step": 8081500 }, { "epoch": 40.04, "learning_rate": 2.9986662901363932e-05, "loss": 2.1194, "step": 8082000 }, { "epoch": 40.04, "learning_rate": 2.998542431493785e-05, "loss": 2.1316, "step": 8082500 }, { "epoch": 40.05, "learning_rate": 2.9984185728511766e-05, "loss": 2.1234, "step": 8083000 }, { "epoch": 40.05, "learning_rate": 2.998294961925853e-05, "loss": 2.1373, "step": 8083500 }, { "epoch": 40.05, "learning_rate": 2.998171103283245e-05, "loss": 2.1313, "step": 8084000 }, { "epoch": 40.05, "learning_rate": 2.9980472446406365e-05, "loss": 2.1274, "step": 8084500 }, { "epoch": 40.06, "learning_rate": 2.9979233859980282e-05, "loss": 2.1258, "step": 8085000 }, { "epoch": 40.06, "learning_rate": 2.99779952735542e-05, "loss": 2.1333, "step": 8085500 }, { "epoch": 40.06, "learning_rate": 2.9976759164300965e-05, "loss": 2.1593, "step": 8086000 }, { "epoch": 40.06, "learning_rate": 2.997552057787488e-05, "loss": 2.1003, "step": 8086500 }, { "epoch": 40.07, "learning_rate": 2.99742819914488e-05, "loss": 2.1457, "step": 8087000 }, { "epoch": 40.07, "learning_rate": 2.9973043405022716e-05, "loss": 2.1141, "step": 8087500 }, { "epoch": 40.07, "learning_rate": 2.9971804818596632e-05, "loss": 2.1126, "step": 8088000 }, { "epoch": 40.07, "learning_rate": 2.997056623217055e-05, "loss": 2.1274, "step": 8088500 }, { "epoch": 40.08, "learning_rate": 2.9969327645744466e-05, "loss": 2.1438, "step": 8089000 }, { "epoch": 40.08, "learning_rate": 2.9968089059318383e-05, "loss": 2.1173, "step": 8089500 }, { "epoch": 40.08, "learning_rate": 2.99668504728923e-05, "loss": 2.1389, "step": 8090000 }, { "epoch": 40.08, "learning_rate": 2.9965614363639066e-05, "loss": 2.1184, "step": 8090500 }, { "epoch": 40.09, "learning_rate": 2.9964375777212983e-05, "loss": 2.1613, "step": 8091000 }, { "epoch": 40.09, "learning_rate": 2.99631371907869e-05, "loss": 2.1322, "step": 8091500 }, { "epoch": 40.09, "learning_rate": 2.9961903558706524e-05, "loss": 2.1343, "step": 8092000 }, { "epoch": 40.09, "learning_rate": 2.996066497228044e-05, "loss": 2.1082, "step": 8092500 }, { "epoch": 40.1, "learning_rate": 2.9959426385854354e-05, "loss": 2.0986, "step": 8093000 }, { "epoch": 40.1, "learning_rate": 2.9958190276601123e-05, "loss": 2.1276, "step": 8093500 }, { "epoch": 40.1, "learning_rate": 2.995695169017504e-05, "loss": 2.1186, "step": 8094000 }, { "epoch": 40.1, "learning_rate": 2.9955713103748957e-05, "loss": 2.15, "step": 8094500 }, { "epoch": 40.11, "learning_rate": 2.9954474517322874e-05, "loss": 2.1204, "step": 8095000 }, { "epoch": 40.11, "learning_rate": 2.995323593089679e-05, "loss": 2.1353, "step": 8095500 }, { "epoch": 40.11, "learning_rate": 2.9951997344470707e-05, "loss": 2.1316, "step": 8096000 }, { "epoch": 40.11, "learning_rate": 2.995075875804462e-05, "loss": 2.1207, "step": 8096500 }, { "epoch": 40.12, "learning_rate": 2.9949520171618538e-05, "loss": 2.1492, "step": 8097000 }, { "epoch": 40.12, "learning_rate": 2.994828158519245e-05, "loss": 2.1395, "step": 8097500 }, { "epoch": 40.12, "learning_rate": 2.994704299876637e-05, "loss": 2.1136, "step": 8098000 }, { "epoch": 40.12, "learning_rate": 2.9945804412340285e-05, "loss": 2.1247, "step": 8098500 }, { "epoch": 40.13, "learning_rate": 2.9944568303087058e-05, "loss": 2.1372, "step": 8099000 }, { "epoch": 40.13, "learning_rate": 2.9943332193833823e-05, "loss": 2.1474, "step": 8099500 }, { "epoch": 40.13, "learning_rate": 2.994209360740774e-05, "loss": 2.1378, "step": 8100000 }, { "epoch": 40.13, "learning_rate": 2.9940855020981657e-05, "loss": 2.1532, "step": 8100500 }, { "epoch": 40.14, "learning_rate": 2.9939616434555574e-05, "loss": 2.1475, "step": 8101000 }, { "epoch": 40.14, "learning_rate": 2.993837784812949e-05, "loss": 2.1217, "step": 8101500 }, { "epoch": 40.14, "learning_rate": 2.9937139261703408e-05, "loss": 2.1103, "step": 8102000 }, { "epoch": 40.14, "learning_rate": 2.9935900675277324e-05, "loss": 2.1138, "step": 8102500 }, { "epoch": 40.15, "learning_rate": 2.9934662088851238e-05, "loss": 2.1503, "step": 8103000 }, { "epoch": 40.15, "learning_rate": 2.9933423502425155e-05, "loss": 2.1445, "step": 8103500 }, { "epoch": 40.15, "learning_rate": 2.993218491599907e-05, "loss": 2.1325, "step": 8104000 }, { "epoch": 40.15, "learning_rate": 2.9930946329572985e-05, "loss": 2.1285, "step": 8104500 }, { "epoch": 40.15, "learning_rate": 2.9929707743146902e-05, "loss": 2.1121, "step": 8105000 }, { "epoch": 40.16, "learning_rate": 2.9928469156720816e-05, "loss": 2.1058, "step": 8105500 }, { "epoch": 40.16, "learning_rate": 2.9927230570294733e-05, "loss": 2.1334, "step": 8106000 }, { "epoch": 40.16, "learning_rate": 2.992599198386865e-05, "loss": 2.1259, "step": 8106500 }, { "epoch": 40.16, "learning_rate": 2.9924753397442567e-05, "loss": 2.1388, "step": 8107000 }, { "epoch": 40.17, "learning_rate": 2.9923514811016484e-05, "loss": 2.1493, "step": 8107500 }, { "epoch": 40.17, "learning_rate": 2.9922278701763252e-05, "loss": 2.1049, "step": 8108000 }, { "epoch": 40.17, "learning_rate": 2.992104011533717e-05, "loss": 2.0983, "step": 8108500 }, { "epoch": 40.17, "learning_rate": 2.9919801528911083e-05, "loss": 2.1216, "step": 8109000 }, { "epoch": 40.18, "learning_rate": 2.9918562942485e-05, "loss": 2.0751, "step": 8109500 }, { "epoch": 40.18, "learning_rate": 2.9917324356058917e-05, "loss": 2.1298, "step": 8110000 }, { "epoch": 40.18, "learning_rate": 2.9916088246805686e-05, "loss": 2.1462, "step": 8110500 }, { "epoch": 40.18, "learning_rate": 2.9914849660379602e-05, "loss": 2.1068, "step": 8111000 }, { "epoch": 40.19, "learning_rate": 2.9913613551126375e-05, "loss": 2.1508, "step": 8111500 }, { "epoch": 40.19, "learning_rate": 2.991237496470029e-05, "loss": 2.1505, "step": 8112000 }, { "epoch": 40.19, "learning_rate": 2.991113637827421e-05, "loss": 2.1378, "step": 8112500 }, { "epoch": 40.19, "learning_rate": 2.990989779184812e-05, "loss": 2.149, "step": 8113000 }, { "epoch": 40.2, "learning_rate": 2.9908659205422036e-05, "loss": 2.1122, "step": 8113500 }, { "epoch": 40.2, "learning_rate": 2.9907420618995953e-05, "loss": 2.1375, "step": 8114000 }, { "epoch": 40.2, "learning_rate": 2.990618203256987e-05, "loss": 2.1135, "step": 8114500 }, { "epoch": 40.2, "learning_rate": 2.990494592331664e-05, "loss": 2.1219, "step": 8115000 }, { "epoch": 40.21, "learning_rate": 2.990370733689056e-05, "loss": 2.1243, "step": 8115500 }, { "epoch": 40.21, "learning_rate": 2.9902468750464476e-05, "loss": 2.1451, "step": 8116000 }, { "epoch": 40.21, "learning_rate": 2.9901230164038386e-05, "loss": 2.1357, "step": 8116500 }, { "epoch": 40.21, "learning_rate": 2.9899991577612303e-05, "loss": 2.1385, "step": 8117000 }, { "epoch": 40.22, "learning_rate": 2.989875299118622e-05, "loss": 2.1459, "step": 8117500 }, { "epoch": 40.22, "learning_rate": 2.9897514404760136e-05, "loss": 2.1412, "step": 8118000 }, { "epoch": 40.22, "learning_rate": 2.9896275818334053e-05, "loss": 2.1466, "step": 8118500 }, { "epoch": 40.22, "learning_rate": 2.9895037231907967e-05, "loss": 2.136, "step": 8119000 }, { "epoch": 40.23, "learning_rate": 2.9893798645481884e-05, "loss": 2.135, "step": 8119500 }, { "epoch": 40.23, "learning_rate": 2.9892562536228653e-05, "loss": 2.1466, "step": 8120000 }, { "epoch": 40.23, "learning_rate": 2.989132394980257e-05, "loss": 2.1594, "step": 8120500 }, { "epoch": 40.23, "learning_rate": 2.9890085363376487e-05, "loss": 2.1325, "step": 8121000 }, { "epoch": 40.24, "learning_rate": 2.988884925412326e-05, "loss": 2.1401, "step": 8121500 }, { "epoch": 40.24, "learning_rate": 2.9887610667697176e-05, "loss": 2.1444, "step": 8122000 }, { "epoch": 40.24, "learning_rate": 2.9886372081271086e-05, "loss": 2.1417, "step": 8122500 }, { "epoch": 40.24, "learning_rate": 2.9885133494845003e-05, "loss": 2.1485, "step": 8123000 }, { "epoch": 40.25, "learning_rate": 2.988389490841892e-05, "loss": 2.1554, "step": 8123500 }, { "epoch": 40.25, "learning_rate": 2.988266127633854e-05, "loss": 2.1377, "step": 8124000 }, { "epoch": 40.25, "learning_rate": 2.9881422689912457e-05, "loss": 2.1687, "step": 8124500 }, { "epoch": 40.25, "learning_rate": 2.9880186580659226e-05, "loss": 2.215, "step": 8125000 }, { "epoch": 40.26, "learning_rate": 2.9878947994233143e-05, "loss": 2.1785, "step": 8125500 }, { "epoch": 40.26, "learning_rate": 2.9877711884979915e-05, "loss": 2.1527, "step": 8126000 }, { "epoch": 40.26, "learning_rate": 2.9876473298553825e-05, "loss": 2.1615, "step": 8126500 }, { "epoch": 40.26, "learning_rate": 2.9875234712127742e-05, "loss": 2.1626, "step": 8127000 }, { "epoch": 40.27, "learning_rate": 2.987399612570166e-05, "loss": 2.1302, "step": 8127500 }, { "epoch": 40.27, "learning_rate": 2.9872757539275576e-05, "loss": 2.1343, "step": 8128000 }, { "epoch": 40.27, "learning_rate": 2.9871518952849493e-05, "loss": 2.1382, "step": 8128500 }, { "epoch": 40.27, "learning_rate": 2.987028036642341e-05, "loss": 2.1504, "step": 8129000 }, { "epoch": 40.28, "learning_rate": 2.9869041779997327e-05, "loss": 2.1479, "step": 8129500 }, { "epoch": 40.28, "learning_rate": 2.986780319357124e-05, "loss": 2.1477, "step": 8130000 }, { "epoch": 40.28, "learning_rate": 2.9866564607145157e-05, "loss": 2.1263, "step": 8130500 }, { "epoch": 40.28, "learning_rate": 2.9865326020719074e-05, "loss": 2.1442, "step": 8131000 }, { "epoch": 40.29, "learning_rate": 2.986408743429299e-05, "loss": 2.1558, "step": 8131500 }, { "epoch": 40.29, "learning_rate": 2.9862848847866908e-05, "loss": 2.1284, "step": 8132000 }, { "epoch": 40.29, "learning_rate": 2.9861610261440825e-05, "loss": 2.1304, "step": 8132500 }, { "epoch": 40.29, "learning_rate": 2.9860371675014742e-05, "loss": 2.1206, "step": 8133000 }, { "epoch": 40.3, "learning_rate": 2.985913308858866e-05, "loss": 2.1063, "step": 8133500 }, { "epoch": 40.3, "learning_rate": 2.9857894502162576e-05, "loss": 2.1265, "step": 8134000 }, { "epoch": 40.3, "learning_rate": 2.9856660870082193e-05, "loss": 2.1442, "step": 8134500 }, { "epoch": 40.3, "learning_rate": 2.9855427238001814e-05, "loss": 2.1301, "step": 8135000 }, { "epoch": 40.31, "learning_rate": 2.9854191128748583e-05, "loss": 2.1717, "step": 8135500 }, { "epoch": 40.31, "learning_rate": 2.98529525423225e-05, "loss": 2.1429, "step": 8136000 }, { "epoch": 40.31, "learning_rate": 2.9851713955896417e-05, "loss": 2.1513, "step": 8136500 }, { "epoch": 40.31, "learning_rate": 2.9850475369470333e-05, "loss": 2.118, "step": 8137000 }, { "epoch": 40.32, "learning_rate": 2.984923678304425e-05, "loss": 2.1346, "step": 8137500 }, { "epoch": 40.32, "learning_rate": 2.9847998196618164e-05, "loss": 2.1257, "step": 8138000 }, { "epoch": 40.32, "learning_rate": 2.984675961019208e-05, "loss": 2.1541, "step": 8138500 }, { "epoch": 40.32, "learning_rate": 2.9845521023765998e-05, "loss": 2.1451, "step": 8139000 }, { "epoch": 40.33, "learning_rate": 2.9844282437339915e-05, "loss": 2.1555, "step": 8139500 }, { "epoch": 40.33, "learning_rate": 2.984304385091383e-05, "loss": 2.1503, "step": 8140000 }, { "epoch": 40.33, "learning_rate": 2.984180526448775e-05, "loss": 2.1411, "step": 8140500 }, { "epoch": 40.33, "learning_rate": 2.9840566678061666e-05, "loss": 2.1364, "step": 8141000 }, { "epoch": 40.34, "learning_rate": 2.983933056880843e-05, "loss": 2.1253, "step": 8141500 }, { "epoch": 40.34, "learning_rate": 2.9838091982382348e-05, "loss": 2.1602, "step": 8142000 }, { "epoch": 40.34, "learning_rate": 2.9836853395956265e-05, "loss": 2.1643, "step": 8142500 }, { "epoch": 40.34, "learning_rate": 2.9835614809530182e-05, "loss": 2.1554, "step": 8143000 }, { "epoch": 40.35, "learning_rate": 2.98343762231041e-05, "loss": 2.1505, "step": 8143500 }, { "epoch": 40.35, "learning_rate": 2.9833137636678016e-05, "loss": 2.15, "step": 8144000 }, { "epoch": 40.35, "learning_rate": 2.9831899050251933e-05, "loss": 2.1437, "step": 8144500 }, { "epoch": 40.35, "learning_rate": 2.9830660463825843e-05, "loss": 2.1637, "step": 8145000 }, { "epoch": 40.36, "learning_rate": 2.982942187739976e-05, "loss": 2.14, "step": 8145500 }, { "epoch": 40.36, "learning_rate": 2.9828183290973677e-05, "loss": 2.1371, "step": 8146000 }, { "epoch": 40.36, "learning_rate": 2.982694718172045e-05, "loss": 2.1282, "step": 8146500 }, { "epoch": 40.36, "learning_rate": 2.9825708595294366e-05, "loss": 2.1387, "step": 8147000 }, { "epoch": 40.37, "learning_rate": 2.9824470008868283e-05, "loss": 2.1435, "step": 8147500 }, { "epoch": 40.37, "learning_rate": 2.98232314224422e-05, "loss": 2.1502, "step": 8148000 }, { "epoch": 40.37, "learning_rate": 2.982199283601611e-05, "loss": 2.153, "step": 8148500 }, { "epoch": 40.37, "learning_rate": 2.9820754249590027e-05, "loss": 2.1335, "step": 8149000 }, { "epoch": 40.38, "learning_rate": 2.9819515663163944e-05, "loss": 2.1548, "step": 8149500 }, { "epoch": 40.38, "learning_rate": 2.981827707673786e-05, "loss": 2.1659, "step": 8150000 }, { "epoch": 40.38, "learning_rate": 2.9817038490311777e-05, "loss": 2.1311, "step": 8150500 }, { "epoch": 40.38, "learning_rate": 2.9815799903885694e-05, "loss": 2.136, "step": 8151000 }, { "epoch": 40.39, "learning_rate": 2.981456379463246e-05, "loss": 2.149, "step": 8151500 }, { "epoch": 40.39, "learning_rate": 2.9813327685379232e-05, "loss": 2.1433, "step": 8152000 }, { "epoch": 40.39, "learning_rate": 2.981208909895315e-05, "loss": 2.1379, "step": 8152500 }, { "epoch": 40.39, "learning_rate": 2.9810852989699918e-05, "loss": 2.1536, "step": 8153000 }, { "epoch": 40.4, "learning_rate": 2.9809614403273835e-05, "loss": 2.1314, "step": 8153500 }, { "epoch": 40.4, "learning_rate": 2.980837581684775e-05, "loss": 2.1322, "step": 8154000 }, { "epoch": 40.4, "learning_rate": 2.9807137230421665e-05, "loss": 2.1628, "step": 8154500 }, { "epoch": 40.4, "learning_rate": 2.9805898643995582e-05, "loss": 2.1503, "step": 8155000 }, { "epoch": 40.41, "learning_rate": 2.98046600575695e-05, "loss": 2.1587, "step": 8155500 }, { "epoch": 40.41, "learning_rate": 2.9803421471143416e-05, "loss": 2.1236, "step": 8156000 }, { "epoch": 40.41, "learning_rate": 2.9802182884717333e-05, "loss": 2.1312, "step": 8156500 }, { "epoch": 40.41, "learning_rate": 2.980094429829125e-05, "loss": 2.169, "step": 8157000 }, { "epoch": 40.42, "learning_rate": 2.979970571186516e-05, "loss": 2.1623, "step": 8157500 }, { "epoch": 40.42, "learning_rate": 2.9798467125439077e-05, "loss": 2.1281, "step": 8158000 }, { "epoch": 40.42, "learning_rate": 2.9797228539012994e-05, "loss": 2.1154, "step": 8158500 }, { "epoch": 40.42, "learning_rate": 2.979598995258691e-05, "loss": 2.1451, "step": 8159000 }, { "epoch": 40.42, "learning_rate": 2.9794751366160828e-05, "loss": 2.1587, "step": 8159500 }, { "epoch": 40.43, "learning_rate": 2.9793512779734744e-05, "loss": 2.1318, "step": 8160000 }, { "epoch": 40.43, "learning_rate": 2.979227419330866e-05, "loss": 2.1348, "step": 8160500 }, { "epoch": 40.43, "learning_rate": 2.979103560688258e-05, "loss": 2.1497, "step": 8161000 }, { "epoch": 40.43, "learning_rate": 2.9789797020456495e-05, "loss": 2.1634, "step": 8161500 }, { "epoch": 40.44, "learning_rate": 2.9788558434030412e-05, "loss": 2.1154, "step": 8162000 }, { "epoch": 40.44, "learning_rate": 2.9787322324777178e-05, "loss": 2.1517, "step": 8162500 }, { "epoch": 40.44, "learning_rate": 2.978608621552395e-05, "loss": 2.1431, "step": 8163000 }, { "epoch": 40.44, "learning_rate": 2.9784852583443567e-05, "loss": 2.1595, "step": 8163500 }, { "epoch": 40.45, "learning_rate": 2.9783613997017484e-05, "loss": 2.165, "step": 8164000 }, { "epoch": 40.45, "learning_rate": 2.97823754105914e-05, "loss": 2.1401, "step": 8164500 }, { "epoch": 40.45, "learning_rate": 2.9781136824165318e-05, "loss": 2.1622, "step": 8165000 }, { "epoch": 40.45, "learning_rate": 2.9779898237739235e-05, "loss": 2.1476, "step": 8165500 }, { "epoch": 40.46, "learning_rate": 2.9778659651313152e-05, "loss": 2.1271, "step": 8166000 }, { "epoch": 40.46, "learning_rate": 2.977742106488707e-05, "loss": 2.1533, "step": 8166500 }, { "epoch": 40.46, "learning_rate": 2.9776182478460986e-05, "loss": 2.13, "step": 8167000 }, { "epoch": 40.46, "learning_rate": 2.977494636920775e-05, "loss": 2.1355, "step": 8167500 }, { "epoch": 40.47, "learning_rate": 2.9773707782781668e-05, "loss": 2.1572, "step": 8168000 }, { "epoch": 40.47, "learning_rate": 2.9772469196355585e-05, "loss": 2.1508, "step": 8168500 }, { "epoch": 40.47, "learning_rate": 2.9771230609929502e-05, "loss": 2.1521, "step": 8169000 }, { "epoch": 40.47, "learning_rate": 2.976999202350342e-05, "loss": 2.1509, "step": 8169500 }, { "epoch": 40.48, "learning_rate": 2.9768753437077336e-05, "loss": 2.1451, "step": 8170000 }, { "epoch": 40.48, "learning_rate": 2.9767514850651253e-05, "loss": 2.1555, "step": 8170500 }, { "epoch": 40.48, "learning_rate": 2.976627626422517e-05, "loss": 2.1105, "step": 8171000 }, { "epoch": 40.48, "learning_rate": 2.9765037677799083e-05, "loss": 2.1396, "step": 8171500 }, { "epoch": 40.49, "learning_rate": 2.9763801568545852e-05, "loss": 2.1211, "step": 8172000 }, { "epoch": 40.49, "learning_rate": 2.976256298211977e-05, "loss": 2.1426, "step": 8172500 }, { "epoch": 40.49, "learning_rate": 2.9761324395693686e-05, "loss": 2.1305, "step": 8173000 }, { "epoch": 40.49, "learning_rate": 2.9760085809267603e-05, "loss": 2.1449, "step": 8173500 }, { "epoch": 40.5, "learning_rate": 2.975884722284152e-05, "loss": 2.1658, "step": 8174000 }, { "epoch": 40.5, "learning_rate": 2.9757608636415437e-05, "loss": 2.1255, "step": 8174500 }, { "epoch": 40.5, "learning_rate": 2.975637004998935e-05, "loss": 2.1613, "step": 8175000 }, { "epoch": 40.5, "learning_rate": 2.9755131463563267e-05, "loss": 2.1441, "step": 8175500 }, { "epoch": 40.51, "learning_rate": 2.9753895354310036e-05, "loss": 2.1353, "step": 8176000 }, { "epoch": 40.51, "learning_rate": 2.9752656767883953e-05, "loss": 2.1568, "step": 8176500 }, { "epoch": 40.51, "learning_rate": 2.975141818145787e-05, "loss": 2.1289, "step": 8177000 }, { "epoch": 40.51, "learning_rate": 2.9750179595031787e-05, "loss": 2.1479, "step": 8177500 }, { "epoch": 40.52, "learning_rate": 2.9748943485778552e-05, "loss": 2.1135, "step": 8178000 }, { "epoch": 40.52, "learning_rate": 2.9747707376525317e-05, "loss": 2.1282, "step": 8178500 }, { "epoch": 40.52, "learning_rate": 2.974647374444494e-05, "loss": 2.136, "step": 8179000 }, { "epoch": 40.52, "learning_rate": 2.974523515801886e-05, "loss": 2.136, "step": 8179500 }, { "epoch": 40.53, "learning_rate": 2.9743996571592775e-05, "loss": 2.1272, "step": 8180000 }, { "epoch": 40.53, "learning_rate": 2.9742757985166692e-05, "loss": 2.1207, "step": 8180500 }, { "epoch": 40.53, "learning_rate": 2.974151939874061e-05, "loss": 2.159, "step": 8181000 }, { "epoch": 40.53, "learning_rate": 2.9740280812314526e-05, "loss": 2.1289, "step": 8181500 }, { "epoch": 40.54, "learning_rate": 2.9739042225888443e-05, "loss": 2.1468, "step": 8182000 }, { "epoch": 40.54, "learning_rate": 2.9737803639462357e-05, "loss": 2.1327, "step": 8182500 }, { "epoch": 40.54, "learning_rate": 2.9736565053036274e-05, "loss": 2.1359, "step": 8183000 }, { "epoch": 40.54, "learning_rate": 2.9735326466610187e-05, "loss": 2.1308, "step": 8183500 }, { "epoch": 40.55, "learning_rate": 2.9734087880184104e-05, "loss": 2.1634, "step": 8184000 }, { "epoch": 40.55, "learning_rate": 2.973284929375802e-05, "loss": 2.1361, "step": 8184500 }, { "epoch": 40.55, "learning_rate": 2.9731610707331934e-05, "loss": 2.1333, "step": 8185000 }, { "epoch": 40.55, "learning_rate": 2.973037212090585e-05, "loss": 2.1317, "step": 8185500 }, { "epoch": 40.56, "learning_rate": 2.972913353447977e-05, "loss": 2.1209, "step": 8186000 }, { "epoch": 40.56, "learning_rate": 2.9727894948053685e-05, "loss": 2.1398, "step": 8186500 }, { "epoch": 40.56, "learning_rate": 2.9726656361627602e-05, "loss": 2.1439, "step": 8187000 }, { "epoch": 40.56, "learning_rate": 2.972541777520152e-05, "loss": 2.1254, "step": 8187500 }, { "epoch": 40.57, "learning_rate": 2.9724181665948288e-05, "loss": 2.1654, "step": 8188000 }, { "epoch": 40.57, "learning_rate": 2.972294555669506e-05, "loss": 2.1444, "step": 8188500 }, { "epoch": 40.57, "learning_rate": 2.9721709447441826e-05, "loss": 2.139, "step": 8189000 }, { "epoch": 40.57, "learning_rate": 2.9720470861015742e-05, "loss": 2.1315, "step": 8189500 }, { "epoch": 40.58, "learning_rate": 2.971923227458966e-05, "loss": 2.1444, "step": 8190000 }, { "epoch": 40.58, "learning_rate": 2.9717993688163576e-05, "loss": 2.1465, "step": 8190500 }, { "epoch": 40.58, "learning_rate": 2.9716755101737493e-05, "loss": 2.1152, "step": 8191000 }, { "epoch": 40.58, "learning_rate": 2.971551651531141e-05, "loss": 2.1569, "step": 8191500 }, { "epoch": 40.59, "learning_rate": 2.9714277928885327e-05, "loss": 2.1352, "step": 8192000 }, { "epoch": 40.59, "learning_rate": 2.9713039342459237e-05, "loss": 2.1341, "step": 8192500 }, { "epoch": 40.59, "learning_rate": 2.9711800756033154e-05, "loss": 2.1265, "step": 8193000 }, { "epoch": 40.59, "learning_rate": 2.9710564646779926e-05, "loss": 2.1616, "step": 8193500 }, { "epoch": 40.6, "learning_rate": 2.9709326060353843e-05, "loss": 2.1456, "step": 8194000 }, { "epoch": 40.6, "learning_rate": 2.970808747392776e-05, "loss": 2.1449, "step": 8194500 }, { "epoch": 40.6, "learning_rate": 2.9706848887501677e-05, "loss": 2.1651, "step": 8195000 }, { "epoch": 40.6, "learning_rate": 2.9705610301075594e-05, "loss": 2.1391, "step": 8195500 }, { "epoch": 40.61, "learning_rate": 2.970437419182236e-05, "loss": 2.1123, "step": 8196000 }, { "epoch": 40.61, "learning_rate": 2.9703135605396276e-05, "loss": 2.1382, "step": 8196500 }, { "epoch": 40.61, "learning_rate": 2.9701897018970193e-05, "loss": 2.1334, "step": 8197000 }, { "epoch": 40.61, "learning_rate": 2.970066090971696e-05, "loss": 2.1495, "step": 8197500 }, { "epoch": 40.62, "learning_rate": 2.9699422323290876e-05, "loss": 2.1516, "step": 8198000 }, { "epoch": 40.62, "learning_rate": 2.9698183736864793e-05, "loss": 2.1425, "step": 8198500 }, { "epoch": 40.62, "learning_rate": 2.969694515043871e-05, "loss": 2.1172, "step": 8199000 }, { "epoch": 40.62, "learning_rate": 2.9695706564012627e-05, "loss": 2.1628, "step": 8199500 }, { "epoch": 40.63, "learning_rate": 2.9694467977586543e-05, "loss": 2.1605, "step": 8200000 }, { "epoch": 40.63, "learning_rate": 2.969322939116046e-05, "loss": 2.1481, "step": 8200500 }, { "epoch": 40.63, "learning_rate": 2.9691990804734377e-05, "loss": 2.1267, "step": 8201000 }, { "epoch": 40.63, "learning_rate": 2.9690752218308294e-05, "loss": 2.1422, "step": 8201500 }, { "epoch": 40.64, "learning_rate": 2.9689513631882204e-05, "loss": 2.1388, "step": 8202000 }, { "epoch": 40.64, "learning_rate": 2.968827504545612e-05, "loss": 2.1116, "step": 8202500 }, { "epoch": 40.64, "learning_rate": 2.9687036459030038e-05, "loss": 2.1248, "step": 8203000 }, { "epoch": 40.64, "learning_rate": 2.9685797872603955e-05, "loss": 2.1602, "step": 8203500 }, { "epoch": 40.65, "learning_rate": 2.9684559286177872e-05, "loss": 2.1441, "step": 8204000 }, { "epoch": 40.65, "learning_rate": 2.9683323176924644e-05, "loss": 2.14, "step": 8204500 }, { "epoch": 40.65, "learning_rate": 2.9682084590498554e-05, "loss": 2.13, "step": 8205000 }, { "epoch": 40.65, "learning_rate": 2.968084600407247e-05, "loss": 2.1499, "step": 8205500 }, { "epoch": 40.66, "learning_rate": 2.967960741764639e-05, "loss": 2.1588, "step": 8206000 }, { "epoch": 40.66, "learning_rate": 2.9678368831220305e-05, "loss": 2.1316, "step": 8206500 }, { "epoch": 40.66, "learning_rate": 2.9677130244794222e-05, "loss": 2.1634, "step": 8207000 }, { "epoch": 40.66, "learning_rate": 2.967589165836814e-05, "loss": 2.1469, "step": 8207500 }, { "epoch": 40.67, "learning_rate": 2.9674653071942056e-05, "loss": 2.148, "step": 8208000 }, { "epoch": 40.67, "learning_rate": 2.967341448551597e-05, "loss": 2.156, "step": 8208500 }, { "epoch": 40.67, "learning_rate": 2.967217837626274e-05, "loss": 2.1469, "step": 8209000 }, { "epoch": 40.67, "learning_rate": 2.9670939789836655e-05, "loss": 2.1475, "step": 8209500 }, { "epoch": 40.68, "learning_rate": 2.9669703680583427e-05, "loss": 2.1454, "step": 8210000 }, { "epoch": 40.68, "learning_rate": 2.9668467571330193e-05, "loss": 2.1399, "step": 8210500 }, { "epoch": 40.68, "learning_rate": 2.966722898490411e-05, "loss": 2.1576, "step": 8211000 }, { "epoch": 40.68, "learning_rate": 2.966599287565088e-05, "loss": 2.134, "step": 8211500 }, { "epoch": 40.69, "learning_rate": 2.9664754289224796e-05, "loss": 2.1378, "step": 8212000 }, { "epoch": 40.69, "learning_rate": 2.9663515702798713e-05, "loss": 2.1026, "step": 8212500 }, { "epoch": 40.69, "learning_rate": 2.9662277116372626e-05, "loss": 2.1388, "step": 8213000 }, { "epoch": 40.69, "learning_rate": 2.9661038529946543e-05, "loss": 2.1206, "step": 8213500 }, { "epoch": 40.69, "learning_rate": 2.965979994352046e-05, "loss": 2.1255, "step": 8214000 }, { "epoch": 40.7, "learning_rate": 2.9658561357094377e-05, "loss": 2.1373, "step": 8214500 }, { "epoch": 40.7, "learning_rate": 2.9657322770668294e-05, "loss": 2.1324, "step": 8215000 }, { "epoch": 40.7, "learning_rate": 2.965608418424221e-05, "loss": 2.1274, "step": 8215500 }, { "epoch": 40.7, "learning_rate": 2.9654845597816128e-05, "loss": 2.1553, "step": 8216000 }, { "epoch": 40.71, "learning_rate": 2.9653609488562893e-05, "loss": 2.1646, "step": 8216500 }, { "epoch": 40.71, "learning_rate": 2.965237090213681e-05, "loss": 2.1816, "step": 8217000 }, { "epoch": 40.71, "learning_rate": 2.9651132315710727e-05, "loss": 2.1453, "step": 8217500 }, { "epoch": 40.71, "learning_rate": 2.9649893729284644e-05, "loss": 2.1531, "step": 8218000 }, { "epoch": 40.72, "learning_rate": 2.964865514285856e-05, "loss": 2.1362, "step": 8218500 }, { "epoch": 40.72, "learning_rate": 2.964741903360533e-05, "loss": 2.1224, "step": 8219000 }, { "epoch": 40.72, "learning_rate": 2.9646180447179246e-05, "loss": 2.1708, "step": 8219500 }, { "epoch": 40.72, "learning_rate": 2.964494186075316e-05, "loss": 2.1652, "step": 8220000 }, { "epoch": 40.73, "learning_rate": 2.9643703274327077e-05, "loss": 2.1282, "step": 8220500 }, { "epoch": 40.73, "learning_rate": 2.9642464687900994e-05, "loss": 2.1318, "step": 8221000 }, { "epoch": 40.73, "learning_rate": 2.964122610147491e-05, "loss": 2.1681, "step": 8221500 }, { "epoch": 40.73, "learning_rate": 2.9639987515048828e-05, "loss": 2.2041, "step": 8222000 }, { "epoch": 40.74, "learning_rate": 2.9638748928622745e-05, "loss": 2.1432, "step": 8222500 }, { "epoch": 40.74, "learning_rate": 2.963751034219666e-05, "loss": 2.1502, "step": 8223000 }, { "epoch": 40.74, "learning_rate": 2.963627175577058e-05, "loss": 2.1343, "step": 8223500 }, { "epoch": 40.74, "learning_rate": 2.9635035646517344e-05, "loss": 2.1548, "step": 8224000 }, { "epoch": 40.75, "learning_rate": 2.9633799537264113e-05, "loss": 2.1592, "step": 8224500 }, { "epoch": 40.75, "learning_rate": 2.963256095083803e-05, "loss": 2.1304, "step": 8225000 }, { "epoch": 40.75, "learning_rate": 2.9631324841584795e-05, "loss": 2.1808, "step": 8225500 }, { "epoch": 40.75, "learning_rate": 2.9630086255158712e-05, "loss": 2.1154, "step": 8226000 }, { "epoch": 40.76, "learning_rate": 2.962884766873263e-05, "loss": 2.1518, "step": 8226500 }, { "epoch": 40.76, "learning_rate": 2.9627609082306546e-05, "loss": 2.1472, "step": 8227000 }, { "epoch": 40.76, "learning_rate": 2.9626370495880463e-05, "loss": 2.1474, "step": 8227500 }, { "epoch": 40.76, "learning_rate": 2.962513190945438e-05, "loss": 2.1526, "step": 8228000 }, { "epoch": 40.77, "learning_rate": 2.9623895800201145e-05, "loss": 2.1285, "step": 8228500 }, { "epoch": 40.77, "learning_rate": 2.9622657213775062e-05, "loss": 2.1489, "step": 8229000 }, { "epoch": 40.77, "learning_rate": 2.962141862734898e-05, "loss": 2.1787, "step": 8229500 }, { "epoch": 40.77, "learning_rate": 2.9620180040922896e-05, "loss": 2.1508, "step": 8230000 }, { "epoch": 40.78, "learning_rate": 2.9618941454496813e-05, "loss": 2.1339, "step": 8230500 }, { "epoch": 40.78, "learning_rate": 2.961770286807073e-05, "loss": 2.1528, "step": 8231000 }, { "epoch": 40.78, "learning_rate": 2.9616466758817495e-05, "loss": 2.1211, "step": 8231500 }, { "epoch": 40.78, "learning_rate": 2.9615228172391412e-05, "loss": 2.1352, "step": 8232000 }, { "epoch": 40.79, "learning_rate": 2.961398958596533e-05, "loss": 2.1341, "step": 8232500 }, { "epoch": 40.79, "learning_rate": 2.9612750999539246e-05, "loss": 2.1666, "step": 8233000 }, { "epoch": 40.79, "learning_rate": 2.9611512413113163e-05, "loss": 2.1458, "step": 8233500 }, { "epoch": 40.79, "learning_rate": 2.961027382668708e-05, "loss": 2.1408, "step": 8234000 }, { "epoch": 40.8, "learning_rate": 2.9609035240260997e-05, "loss": 2.1511, "step": 8234500 }, { "epoch": 40.8, "learning_rate": 2.9607796653834914e-05, "loss": 2.1705, "step": 8235000 }, { "epoch": 40.8, "learning_rate": 2.960655806740883e-05, "loss": 2.1223, "step": 8235500 }, { "epoch": 40.8, "learning_rate": 2.9605319480982748e-05, "loss": 2.1659, "step": 8236000 }, { "epoch": 40.81, "learning_rate": 2.960408089455666e-05, "loss": 2.1462, "step": 8236500 }, { "epoch": 40.81, "learning_rate": 2.9602842308130578e-05, "loss": 2.1357, "step": 8237000 }, { "epoch": 40.81, "learning_rate": 2.9601603721704495e-05, "loss": 2.1573, "step": 8237500 }, { "epoch": 40.81, "learning_rate": 2.9600367612451264e-05, "loss": 2.1558, "step": 8238000 }, { "epoch": 40.82, "learning_rate": 2.959913150319803e-05, "loss": 2.1621, "step": 8238500 }, { "epoch": 40.82, "learning_rate": 2.95978953939448e-05, "loss": 2.1501, "step": 8239000 }, { "epoch": 40.82, "learning_rate": 2.959665680751872e-05, "loss": 2.1453, "step": 8239500 }, { "epoch": 40.82, "learning_rate": 2.959541822109263e-05, "loss": 2.1669, "step": 8240000 }, { "epoch": 40.83, "learning_rate": 2.9594179634666545e-05, "loss": 2.147, "step": 8240500 }, { "epoch": 40.83, "learning_rate": 2.9592941048240462e-05, "loss": 2.137, "step": 8241000 }, { "epoch": 40.83, "learning_rate": 2.959170246181438e-05, "loss": 2.1562, "step": 8241500 }, { "epoch": 40.83, "learning_rate": 2.9590463875388296e-05, "loss": 2.1316, "step": 8242000 }, { "epoch": 40.84, "learning_rate": 2.9589225288962213e-05, "loss": 2.1594, "step": 8242500 }, { "epoch": 40.84, "learning_rate": 2.958798670253613e-05, "loss": 2.1823, "step": 8243000 }, { "epoch": 40.84, "learning_rate": 2.9586748116110047e-05, "loss": 2.1473, "step": 8243500 }, { "epoch": 40.84, "learning_rate": 2.9585509529683964e-05, "loss": 2.1704, "step": 8244000 }, { "epoch": 40.85, "learning_rate": 2.958427094325788e-05, "loss": 2.1192, "step": 8244500 }, { "epoch": 40.85, "learning_rate": 2.9583032356831798e-05, "loss": 2.1319, "step": 8245000 }, { "epoch": 40.85, "learning_rate": 2.9581793770405715e-05, "loss": 2.1212, "step": 8245500 }, { "epoch": 40.85, "learning_rate": 2.958055518397963e-05, "loss": 2.1501, "step": 8246000 }, { "epoch": 40.86, "learning_rate": 2.957932155189925e-05, "loss": 2.1414, "step": 8246500 }, { "epoch": 40.86, "learning_rate": 2.9578082965473162e-05, "loss": 2.1391, "step": 8247000 }, { "epoch": 40.86, "learning_rate": 2.9576846856219935e-05, "loss": 2.1557, "step": 8247500 }, { "epoch": 40.86, "learning_rate": 2.957560826979385e-05, "loss": 2.132, "step": 8248000 }, { "epoch": 40.87, "learning_rate": 2.957437216054062e-05, "loss": 2.1542, "step": 8248500 }, { "epoch": 40.87, "learning_rate": 2.9573133574114537e-05, "loss": 2.1584, "step": 8249000 }, { "epoch": 40.87, "learning_rate": 2.9571894987688454e-05, "loss": 2.1358, "step": 8249500 }, { "epoch": 40.87, "learning_rate": 2.957065640126237e-05, "loss": 2.15, "step": 8250000 }, { "epoch": 40.88, "learning_rate": 2.9569417814836288e-05, "loss": 2.1911, "step": 8250500 }, { "epoch": 40.88, "learning_rate": 2.95681792284102e-05, "loss": 2.1346, "step": 8251000 }, { "epoch": 40.88, "learning_rate": 2.956694064198412e-05, "loss": 2.1544, "step": 8251500 }, { "epoch": 40.88, "learning_rate": 2.9565702055558036e-05, "loss": 2.1412, "step": 8252000 }, { "epoch": 40.89, "learning_rate": 2.956446346913195e-05, "loss": 2.1766, "step": 8252500 }, { "epoch": 40.89, "learning_rate": 2.956322735987872e-05, "loss": 2.1383, "step": 8253000 }, { "epoch": 40.89, "learning_rate": 2.9561988773452638e-05, "loss": 2.1539, "step": 8253500 }, { "epoch": 40.89, "learning_rate": 2.9560750187026555e-05, "loss": 2.1565, "step": 8254000 }, { "epoch": 40.9, "learning_rate": 2.955951160060047e-05, "loss": 2.1515, "step": 8254500 }, { "epoch": 40.9, "learning_rate": 2.9558275491347237e-05, "loss": 2.1533, "step": 8255000 }, { "epoch": 40.9, "learning_rate": 2.9557036904921154e-05, "loss": 2.1598, "step": 8255500 }, { "epoch": 40.9, "learning_rate": 2.955579831849507e-05, "loss": 2.1603, "step": 8256000 }, { "epoch": 40.91, "learning_rate": 2.9554559732068988e-05, "loss": 2.1545, "step": 8256500 }, { "epoch": 40.91, "learning_rate": 2.9553321145642905e-05, "loss": 2.1765, "step": 8257000 }, { "epoch": 40.91, "learning_rate": 2.9552082559216822e-05, "loss": 2.1444, "step": 8257500 }, { "epoch": 40.91, "learning_rate": 2.9550843972790736e-05, "loss": 2.1319, "step": 8258000 }, { "epoch": 40.92, "learning_rate": 2.9549605386364653e-05, "loss": 2.1509, "step": 8258500 }, { "epoch": 40.92, "learning_rate": 2.9548366799938566e-05, "loss": 2.1569, "step": 8259000 }, { "epoch": 40.92, "learning_rate": 2.9547128213512483e-05, "loss": 2.1474, "step": 8259500 }, { "epoch": 40.92, "learning_rate": 2.95458896270864e-05, "loss": 2.1596, "step": 8260000 }, { "epoch": 40.93, "learning_rate": 2.9544651040660314e-05, "loss": 2.1492, "step": 8260500 }, { "epoch": 40.93, "learning_rate": 2.954341245423423e-05, "loss": 2.1461, "step": 8261000 }, { "epoch": 40.93, "learning_rate": 2.9542173867808147e-05, "loss": 2.1755, "step": 8261500 }, { "epoch": 40.93, "learning_rate": 2.9540935281382064e-05, "loss": 2.1641, "step": 8262000 }, { "epoch": 40.94, "learning_rate": 2.953969669495598e-05, "loss": 2.1256, "step": 8262500 }, { "epoch": 40.94, "learning_rate": 2.953846058570275e-05, "loss": 2.1586, "step": 8263000 }, { "epoch": 40.94, "learning_rate": 2.9537221999276664e-05, "loss": 2.1268, "step": 8263500 }, { "epoch": 40.94, "learning_rate": 2.953598341285058e-05, "loss": 2.1527, "step": 8264000 }, { "epoch": 40.95, "learning_rate": 2.9534744826424497e-05, "loss": 2.1534, "step": 8264500 }, { "epoch": 40.95, "learning_rate": 2.9533508717171266e-05, "loss": 2.163, "step": 8265000 }, { "epoch": 40.95, "learning_rate": 2.9532270130745183e-05, "loss": 2.1763, "step": 8265500 }, { "epoch": 40.95, "learning_rate": 2.95310315443191e-05, "loss": 2.1647, "step": 8266000 }, { "epoch": 40.96, "learning_rate": 2.9529792957893017e-05, "loss": 2.1395, "step": 8266500 }, { "epoch": 40.96, "learning_rate": 2.952855437146693e-05, "loss": 2.1333, "step": 8267000 }, { "epoch": 40.96, "learning_rate": 2.9527318262213706e-05, "loss": 2.134, "step": 8267500 }, { "epoch": 40.96, "learning_rate": 2.952607967578762e-05, "loss": 2.1343, "step": 8268000 }, { "epoch": 40.97, "learning_rate": 2.9524841089361533e-05, "loss": 2.1466, "step": 8268500 }, { "epoch": 40.97, "learning_rate": 2.9523607457281154e-05, "loss": 2.1499, "step": 8269000 }, { "epoch": 40.97, "learning_rate": 2.952236887085507e-05, "loss": 2.1687, "step": 8269500 }, { "epoch": 40.97, "learning_rate": 2.9521130284428988e-05, "loss": 2.1647, "step": 8270000 }, { "epoch": 40.97, "learning_rate": 2.9519891698002905e-05, "loss": 2.1593, "step": 8270500 }, { "epoch": 40.98, "learning_rate": 2.951865311157682e-05, "loss": 2.1457, "step": 8271000 }, { "epoch": 40.98, "learning_rate": 2.951741452515074e-05, "loss": 2.16, "step": 8271500 }, { "epoch": 40.98, "learning_rate": 2.9516175938724655e-05, "loss": 2.1421, "step": 8272000 }, { "epoch": 40.98, "learning_rate": 2.9514937352298572e-05, "loss": 2.158, "step": 8272500 }, { "epoch": 40.99, "learning_rate": 2.951369876587249e-05, "loss": 2.1384, "step": 8273000 }, { "epoch": 40.99, "learning_rate": 2.9512460179446406e-05, "loss": 2.165, "step": 8273500 }, { "epoch": 40.99, "learning_rate": 2.9511221593020323e-05, "loss": 2.1627, "step": 8274000 }, { "epoch": 40.99, "learning_rate": 2.9509983006594233e-05, "loss": 2.1509, "step": 8274500 }, { "epoch": 41.0, "learning_rate": 2.950874442016815e-05, "loss": 2.1459, "step": 8275000 }, { "epoch": 41.0, "learning_rate": 2.950751078808777e-05, "loss": 2.152, "step": 8275500 }, { "epoch": 41.0, "eval_accuracy": 0.6644707547688418, "eval_accuracy_mlm": 0.6214229360312783, "eval_accuracy_nsp": 0.867135500217682, "eval_loss": 2.302288055419922, "eval_runtime": 146.7937, "eval_samples_per_second": 1736.852, "eval_steps_per_second": 72.374, "step": 8275563 }, { "epoch": 41.0, "learning_rate": 2.9506272201661688e-05, "loss": 2.1229, "step": 8276000 }, { "epoch": 41.0, "learning_rate": 2.9505033615235605e-05, "loss": 2.132, "step": 8276500 }, { "epoch": 41.01, "learning_rate": 2.9503795028809522e-05, "loss": 2.1164, "step": 8277000 }, { "epoch": 41.01, "learning_rate": 2.950255644238344e-05, "loss": 2.1181, "step": 8277500 }, { "epoch": 41.01, "learning_rate": 2.9501317855957356e-05, "loss": 2.101, "step": 8278000 }, { "epoch": 41.01, "learning_rate": 2.9500079269531273e-05, "loss": 2.1239, "step": 8278500 }, { "epoch": 41.02, "learning_rate": 2.949884068310519e-05, "loss": 2.118, "step": 8279000 }, { "epoch": 41.02, "learning_rate": 2.9497602096679106e-05, "loss": 2.1109, "step": 8279500 }, { "epoch": 41.02, "learning_rate": 2.9496365987425872e-05, "loss": 2.1189, "step": 8280000 }, { "epoch": 41.02, "learning_rate": 2.949512740099979e-05, "loss": 2.1079, "step": 8280500 }, { "epoch": 41.03, "learning_rate": 2.9493888814573706e-05, "loss": 2.1173, "step": 8281000 }, { "epoch": 41.03, "learning_rate": 2.9492650228147623e-05, "loss": 2.1281, "step": 8281500 }, { "epoch": 41.03, "learning_rate": 2.9491414118894388e-05, "loss": 2.118, "step": 8282000 }, { "epoch": 41.03, "learning_rate": 2.9490175532468305e-05, "loss": 2.1282, "step": 8282500 }, { "epoch": 41.04, "learning_rate": 2.9488936946042222e-05, "loss": 2.1413, "step": 8283000 }, { "epoch": 41.04, "learning_rate": 2.948769835961614e-05, "loss": 2.1148, "step": 8283500 }, { "epoch": 41.04, "learning_rate": 2.9486459773190056e-05, "loss": 2.1337, "step": 8284000 }, { "epoch": 41.04, "learning_rate": 2.9485223663936825e-05, "loss": 2.14, "step": 8284500 }, { "epoch": 41.05, "learning_rate": 2.9483985077510738e-05, "loss": 2.1158, "step": 8285000 }, { "epoch": 41.05, "learning_rate": 2.9482746491084655e-05, "loss": 2.1164, "step": 8285500 }, { "epoch": 41.05, "learning_rate": 2.9481507904658572e-05, "loss": 2.1358, "step": 8286000 }, { "epoch": 41.05, "learning_rate": 2.948026931823249e-05, "loss": 2.1007, "step": 8286500 }, { "epoch": 41.06, "learning_rate": 2.9479033208979258e-05, "loss": 2.102, "step": 8287000 }, { "epoch": 41.06, "learning_rate": 2.9477794622553175e-05, "loss": 2.1005, "step": 8287500 }, { "epoch": 41.06, "learning_rate": 2.9476556036127088e-05, "loss": 2.1461, "step": 8288000 }, { "epoch": 41.06, "learning_rate": 2.9475317449701005e-05, "loss": 2.0995, "step": 8288500 }, { "epoch": 41.07, "learning_rate": 2.9474078863274922e-05, "loss": 2.1221, "step": 8289000 }, { "epoch": 41.07, "learning_rate": 2.947284027684884e-05, "loss": 2.1063, "step": 8289500 }, { "epoch": 41.07, "learning_rate": 2.9471601690422756e-05, "loss": 2.1314, "step": 8290000 }, { "epoch": 41.07, "learning_rate": 2.9470363103996673e-05, "loss": 2.1368, "step": 8290500 }, { "epoch": 41.08, "learning_rate": 2.946912699474344e-05, "loss": 2.1085, "step": 8291000 }, { "epoch": 41.08, "learning_rate": 2.9467888408317355e-05, "loss": 2.1352, "step": 8291500 }, { "epoch": 41.08, "learning_rate": 2.9466649821891272e-05, "loss": 2.1063, "step": 8292000 }, { "epoch": 41.08, "learning_rate": 2.946541123546519e-05, "loss": 2.1187, "step": 8292500 }, { "epoch": 41.09, "learning_rate": 2.9464172649039106e-05, "loss": 2.1295, "step": 8293000 }, { "epoch": 41.09, "learning_rate": 2.9462934062613023e-05, "loss": 2.0973, "step": 8293500 }, { "epoch": 41.09, "learning_rate": 2.946169547618694e-05, "loss": 2.1179, "step": 8294000 }, { "epoch": 41.09, "learning_rate": 2.9460456889760857e-05, "loss": 2.1424, "step": 8294500 }, { "epoch": 41.1, "learning_rate": 2.9459218303334774e-05, "loss": 2.1093, "step": 8295000 }, { "epoch": 41.1, "learning_rate": 2.945797971690869e-05, "loss": 2.1237, "step": 8295500 }, { "epoch": 41.1, "learning_rate": 2.9456743607655456e-05, "loss": 2.1329, "step": 8296000 }, { "epoch": 41.1, "learning_rate": 2.9455507498402225e-05, "loss": 2.1317, "step": 8296500 }, { "epoch": 41.11, "learning_rate": 2.9454268911976142e-05, "loss": 2.112, "step": 8297000 }, { "epoch": 41.11, "learning_rate": 2.945303032555006e-05, "loss": 2.1019, "step": 8297500 }, { "epoch": 41.11, "learning_rate": 2.9451791739123976e-05, "loss": 2.1156, "step": 8298000 }, { "epoch": 41.11, "learning_rate": 2.945055315269789e-05, "loss": 2.1268, "step": 8298500 }, { "epoch": 41.12, "learning_rate": 2.9449314566271806e-05, "loss": 2.1263, "step": 8299000 }, { "epoch": 41.12, "learning_rate": 2.9448078457018575e-05, "loss": 2.1061, "step": 8299500 }, { "epoch": 41.12, "learning_rate": 2.9446839870592492e-05, "loss": 2.1532, "step": 8300000 }, { "epoch": 41.12, "learning_rate": 2.944560128416641e-05, "loss": 2.1156, "step": 8300500 }, { "epoch": 41.13, "learning_rate": 2.9444362697740326e-05, "loss": 2.1254, "step": 8301000 }, { "epoch": 41.13, "learning_rate": 2.944312411131424e-05, "loss": 2.1345, "step": 8301500 }, { "epoch": 41.13, "learning_rate": 2.9441885524888156e-05, "loss": 2.1294, "step": 8302000 }, { "epoch": 41.13, "learning_rate": 2.9440649415634925e-05, "loss": 2.1545, "step": 8302500 }, { "epoch": 41.14, "learning_rate": 2.9439410829208842e-05, "loss": 2.1175, "step": 8303000 }, { "epoch": 41.14, "learning_rate": 2.943817224278276e-05, "loss": 2.1143, "step": 8303500 }, { "epoch": 41.14, "learning_rate": 2.9436933656356676e-05, "loss": 2.103, "step": 8304000 }, { "epoch": 41.14, "learning_rate": 2.9435695069930593e-05, "loss": 2.1272, "step": 8304500 }, { "epoch": 41.15, "learning_rate": 2.9434456483504506e-05, "loss": 2.1076, "step": 8305000 }, { "epoch": 41.15, "learning_rate": 2.9433217897078423e-05, "loss": 2.1238, "step": 8305500 }, { "epoch": 41.15, "learning_rate": 2.943197931065234e-05, "loss": 2.104, "step": 8306000 }, { "epoch": 41.15, "learning_rate": 2.9430740724226257e-05, "loss": 2.1289, "step": 8306500 }, { "epoch": 41.16, "learning_rate": 2.9429502137800174e-05, "loss": 2.1363, "step": 8307000 }, { "epoch": 41.16, "learning_rate": 2.942826355137409e-05, "loss": 2.1162, "step": 8307500 }, { "epoch": 41.16, "learning_rate": 2.9427024964948008e-05, "loss": 2.1305, "step": 8308000 }, { "epoch": 41.16, "learning_rate": 2.9425786378521925e-05, "loss": 2.1258, "step": 8308500 }, { "epoch": 41.17, "learning_rate": 2.9424547792095835e-05, "loss": 2.1138, "step": 8309000 }, { "epoch": 41.17, "learning_rate": 2.9423311682842607e-05, "loss": 2.1353, "step": 8309500 }, { "epoch": 41.17, "learning_rate": 2.9422075573589376e-05, "loss": 2.1382, "step": 8310000 }, { "epoch": 41.17, "learning_rate": 2.9420836987163293e-05, "loss": 2.1216, "step": 8310500 }, { "epoch": 41.18, "learning_rate": 2.941959840073721e-05, "loss": 2.1563, "step": 8311000 }, { "epoch": 41.18, "learning_rate": 2.9418359814311127e-05, "loss": 2.1517, "step": 8311500 }, { "epoch": 41.18, "learning_rate": 2.941712122788504e-05, "loss": 2.1114, "step": 8312000 }, { "epoch": 41.18, "learning_rate": 2.941588511863181e-05, "loss": 2.1274, "step": 8312500 }, { "epoch": 41.19, "learning_rate": 2.9414646532205726e-05, "loss": 2.1403, "step": 8313000 }, { "epoch": 41.19, "learning_rate": 2.941341042295249e-05, "loss": 2.0904, "step": 8313500 }, { "epoch": 41.19, "learning_rate": 2.9412171836526408e-05, "loss": 2.1162, "step": 8314000 }, { "epoch": 41.19, "learning_rate": 2.9410933250100325e-05, "loss": 2.1413, "step": 8314500 }, { "epoch": 41.2, "learning_rate": 2.9409697140847097e-05, "loss": 2.1158, "step": 8315000 }, { "epoch": 41.2, "learning_rate": 2.9408458554421014e-05, "loss": 2.1269, "step": 8315500 }, { "epoch": 41.2, "learning_rate": 2.9407219967994924e-05, "loss": 2.1183, "step": 8316000 }, { "epoch": 41.2, "learning_rate": 2.940598138156884e-05, "loss": 2.1098, "step": 8316500 }, { "epoch": 41.21, "learning_rate": 2.9404742795142758e-05, "loss": 2.139, "step": 8317000 }, { "epoch": 41.21, "learning_rate": 2.9403504208716675e-05, "loss": 2.1271, "step": 8317500 }, { "epoch": 41.21, "learning_rate": 2.9402268099463447e-05, "loss": 2.1363, "step": 8318000 }, { "epoch": 41.21, "learning_rate": 2.9401029513037364e-05, "loss": 2.1293, "step": 8318500 }, { "epoch": 41.22, "learning_rate": 2.9399790926611274e-05, "loss": 2.1608, "step": 8319000 }, { "epoch": 41.22, "learning_rate": 2.939855234018519e-05, "loss": 2.1401, "step": 8319500 }, { "epoch": 41.22, "learning_rate": 2.939731375375911e-05, "loss": 2.1279, "step": 8320000 }, { "epoch": 41.22, "learning_rate": 2.9396075167333025e-05, "loss": 2.1236, "step": 8320500 }, { "epoch": 41.23, "learning_rate": 2.9394836580906942e-05, "loss": 2.1424, "step": 8321000 }, { "epoch": 41.23, "learning_rate": 2.939359799448086e-05, "loss": 2.1305, "step": 8321500 }, { "epoch": 41.23, "learning_rate": 2.9392359408054776e-05, "loss": 2.1008, "step": 8322000 }, { "epoch": 41.23, "learning_rate": 2.9391120821628693e-05, "loss": 2.1429, "step": 8322500 }, { "epoch": 41.24, "learning_rate": 2.938988223520261e-05, "loss": 2.1131, "step": 8323000 }, { "epoch": 41.24, "learning_rate": 2.9388643648776527e-05, "loss": 2.1256, "step": 8323500 }, { "epoch": 41.24, "learning_rate": 2.9387405062350444e-05, "loss": 2.1325, "step": 8324000 }, { "epoch": 41.24, "learning_rate": 2.938616647592436e-05, "loss": 2.1284, "step": 8324500 }, { "epoch": 41.24, "learning_rate": 2.9384927889498278e-05, "loss": 2.1152, "step": 8325000 }, { "epoch": 41.25, "learning_rate": 2.938368930307219e-05, "loss": 2.1257, "step": 8325500 }, { "epoch": 41.25, "learning_rate": 2.9382450716646108e-05, "loss": 2.1267, "step": 8326000 }, { "epoch": 41.25, "learning_rate": 2.9381214607392877e-05, "loss": 2.1501, "step": 8326500 }, { "epoch": 41.25, "learning_rate": 2.9379976020966794e-05, "loss": 2.1381, "step": 8327000 }, { "epoch": 41.26, "learning_rate": 2.937873743454071e-05, "loss": 2.1209, "step": 8327500 }, { "epoch": 41.26, "learning_rate": 2.9377498848114628e-05, "loss": 2.1089, "step": 8328000 }, { "epoch": 41.26, "learning_rate": 2.9376262738861393e-05, "loss": 2.1425, "step": 8328500 }, { "epoch": 41.26, "learning_rate": 2.937502662960816e-05, "loss": 2.1217, "step": 8329000 }, { "epoch": 41.27, "learning_rate": 2.937379052035493e-05, "loss": 2.1259, "step": 8329500 }, { "epoch": 41.27, "learning_rate": 2.9372551933928848e-05, "loss": 2.1471, "step": 8330000 }, { "epoch": 41.27, "learning_rate": 2.9371313347502765e-05, "loss": 2.13, "step": 8330500 }, { "epoch": 41.27, "learning_rate": 2.937007476107668e-05, "loss": 2.1325, "step": 8331000 }, { "epoch": 41.28, "learning_rate": 2.9368836174650595e-05, "loss": 2.1263, "step": 8331500 }, { "epoch": 41.28, "learning_rate": 2.9367600065397367e-05, "loss": 2.1302, "step": 8332000 }, { "epoch": 41.28, "learning_rate": 2.9366361478971284e-05, "loss": 2.1305, "step": 8332500 }, { "epoch": 41.28, "learning_rate": 2.9365122892545198e-05, "loss": 2.1162, "step": 8333000 }, { "epoch": 41.29, "learning_rate": 2.9363884306119115e-05, "loss": 2.1277, "step": 8333500 }, { "epoch": 41.29, "learning_rate": 2.936264571969303e-05, "loss": 2.1113, "step": 8334000 }, { "epoch": 41.29, "learning_rate": 2.9361407133266945e-05, "loss": 2.1347, "step": 8334500 }, { "epoch": 41.29, "learning_rate": 2.9360168546840862e-05, "loss": 2.131, "step": 8335000 }, { "epoch": 41.3, "learning_rate": 2.9358929960414776e-05, "loss": 2.1207, "step": 8335500 }, { "epoch": 41.3, "learning_rate": 2.9357691373988693e-05, "loss": 2.1434, "step": 8336000 }, { "epoch": 41.3, "learning_rate": 2.9356455264735465e-05, "loss": 2.1239, "step": 8336500 }, { "epoch": 41.3, "learning_rate": 2.935521667830938e-05, "loss": 2.1484, "step": 8337000 }, { "epoch": 41.31, "learning_rate": 2.93539780918833e-05, "loss": 2.1227, "step": 8337500 }, { "epoch": 41.31, "learning_rate": 2.9352739505457212e-05, "loss": 2.1109, "step": 8338000 }, { "epoch": 41.31, "learning_rate": 2.9351503396203984e-05, "loss": 2.1401, "step": 8338500 }, { "epoch": 41.31, "learning_rate": 2.935026728695075e-05, "loss": 2.1377, "step": 8339000 }, { "epoch": 41.32, "learning_rate": 2.9349028700524667e-05, "loss": 2.1245, "step": 8339500 }, { "epoch": 41.32, "learning_rate": 2.9347790114098584e-05, "loss": 2.1135, "step": 8340000 }, { "epoch": 41.32, "learning_rate": 2.934655400484535e-05, "loss": 2.1513, "step": 8340500 }, { "epoch": 41.32, "learning_rate": 2.9345315418419266e-05, "loss": 2.1504, "step": 8341000 }, { "epoch": 41.33, "learning_rate": 2.9344076831993183e-05, "loss": 2.134, "step": 8341500 }, { "epoch": 41.33, "learning_rate": 2.93428382455671e-05, "loss": 2.0967, "step": 8342000 }, { "epoch": 41.33, "learning_rate": 2.9341599659141017e-05, "loss": 2.1226, "step": 8342500 }, { "epoch": 41.33, "learning_rate": 2.9340361072714934e-05, "loss": 2.1373, "step": 8343000 }, { "epoch": 41.34, "learning_rate": 2.933912248628885e-05, "loss": 2.1211, "step": 8343500 }, { "epoch": 41.34, "learning_rate": 2.9337883899862768e-05, "loss": 2.1376, "step": 8344000 }, { "epoch": 41.34, "learning_rate": 2.9336645313436684e-05, "loss": 2.1132, "step": 8344500 }, { "epoch": 41.34, "learning_rate": 2.933540920418345e-05, "loss": 2.1331, "step": 8345000 }, { "epoch": 41.35, "learning_rate": 2.933417309493022e-05, "loss": 2.1236, "step": 8345500 }, { "epoch": 41.35, "learning_rate": 2.9332934508504136e-05, "loss": 2.1284, "step": 8346000 }, { "epoch": 41.35, "learning_rate": 2.933169592207805e-05, "loss": 2.1453, "step": 8346500 }, { "epoch": 41.35, "learning_rate": 2.9330457335651966e-05, "loss": 2.1459, "step": 8347000 }, { "epoch": 41.36, "learning_rate": 2.9329218749225883e-05, "loss": 2.1144, "step": 8347500 }, { "epoch": 41.36, "learning_rate": 2.93279801627998e-05, "loss": 2.1058, "step": 8348000 }, { "epoch": 41.36, "learning_rate": 2.9326741576373717e-05, "loss": 2.1198, "step": 8348500 }, { "epoch": 41.36, "learning_rate": 2.9325502989947634e-05, "loss": 2.1258, "step": 8349000 }, { "epoch": 41.37, "learning_rate": 2.932426440352155e-05, "loss": 2.1521, "step": 8349500 }, { "epoch": 41.37, "learning_rate": 2.9323025817095468e-05, "loss": 2.1342, "step": 8350000 }, { "epoch": 41.37, "learning_rate": 2.9321787230669385e-05, "loss": 2.1383, "step": 8350500 }, { "epoch": 41.37, "learning_rate": 2.93205486442433e-05, "loss": 2.1237, "step": 8351000 }, { "epoch": 41.38, "learning_rate": 2.931931005781722e-05, "loss": 2.1204, "step": 8351500 }, { "epoch": 41.38, "learning_rate": 2.9318071471391135e-05, "loss": 2.1337, "step": 8352000 }, { "epoch": 41.38, "learning_rate": 2.9316832884965052e-05, "loss": 2.1367, "step": 8352500 }, { "epoch": 41.38, "learning_rate": 2.931559429853897e-05, "loss": 2.1263, "step": 8353000 }, { "epoch": 41.39, "learning_rate": 2.931435571211288e-05, "loss": 2.1214, "step": 8353500 }, { "epoch": 41.39, "learning_rate": 2.9313117125686796e-05, "loss": 2.143, "step": 8354000 }, { "epoch": 41.39, "learning_rate": 2.9311878539260713e-05, "loss": 2.1395, "step": 8354500 }, { "epoch": 41.39, "learning_rate": 2.931063995283463e-05, "loss": 2.1241, "step": 8355000 }, { "epoch": 41.4, "learning_rate": 2.9309403843581402e-05, "loss": 2.129, "step": 8355500 }, { "epoch": 41.4, "learning_rate": 2.930816525715532e-05, "loss": 2.1155, "step": 8356000 }, { "epoch": 41.4, "learning_rate": 2.930692667072923e-05, "loss": 2.1089, "step": 8356500 }, { "epoch": 41.4, "learning_rate": 2.930569303864885e-05, "loss": 2.1339, "step": 8357000 }, { "epoch": 41.41, "learning_rate": 2.9304454452222767e-05, "loss": 2.1347, "step": 8357500 }, { "epoch": 41.41, "learning_rate": 2.9303215865796684e-05, "loss": 2.129, "step": 8358000 }, { "epoch": 41.41, "learning_rate": 2.93019772793706e-05, "loss": 2.1181, "step": 8358500 }, { "epoch": 41.41, "learning_rate": 2.9300738692944518e-05, "loss": 2.1473, "step": 8359000 }, { "epoch": 41.42, "learning_rate": 2.9299500106518435e-05, "loss": 2.124, "step": 8359500 }, { "epoch": 41.42, "learning_rate": 2.929826152009235e-05, "loss": 2.1402, "step": 8360000 }, { "epoch": 41.42, "learning_rate": 2.929702293366627e-05, "loss": 2.1212, "step": 8360500 }, { "epoch": 41.42, "learning_rate": 2.9295786824413034e-05, "loss": 2.1264, "step": 8361000 }, { "epoch": 41.43, "learning_rate": 2.929454823798695e-05, "loss": 2.1331, "step": 8361500 }, { "epoch": 41.43, "learning_rate": 2.9293309651560868e-05, "loss": 2.1452, "step": 8362000 }, { "epoch": 41.43, "learning_rate": 2.9292071065134785e-05, "loss": 2.1413, "step": 8362500 }, { "epoch": 41.43, "learning_rate": 2.9290832478708702e-05, "loss": 2.1448, "step": 8363000 }, { "epoch": 41.44, "learning_rate": 2.928959389228262e-05, "loss": 2.1217, "step": 8363500 }, { "epoch": 41.44, "learning_rate": 2.9288355305856536e-05, "loss": 2.1019, "step": 8364000 }, { "epoch": 41.44, "learning_rate": 2.9287116719430453e-05, "loss": 2.1244, "step": 8364500 }, { "epoch": 41.44, "learning_rate": 2.9285880610177218e-05, "loss": 2.1364, "step": 8365000 }, { "epoch": 41.45, "learning_rate": 2.9284642023751135e-05, "loss": 2.1196, "step": 8365500 }, { "epoch": 41.45, "learning_rate": 2.9283403437325052e-05, "loss": 2.1368, "step": 8366000 }, { "epoch": 41.45, "learning_rate": 2.928216485089897e-05, "loss": 2.1499, "step": 8366500 }, { "epoch": 41.45, "learning_rate": 2.9280926264472886e-05, "loss": 2.1613, "step": 8367000 }, { "epoch": 41.46, "learning_rate": 2.9279687678046803e-05, "loss": 2.1246, "step": 8367500 }, { "epoch": 41.46, "learning_rate": 2.9278451568793568e-05, "loss": 2.1223, "step": 8368000 }, { "epoch": 41.46, "learning_rate": 2.9277215459540337e-05, "loss": 2.1158, "step": 8368500 }, { "epoch": 41.46, "learning_rate": 2.9275976873114254e-05, "loss": 2.1163, "step": 8369000 }, { "epoch": 41.47, "learning_rate": 2.927473828668817e-05, "loss": 2.129, "step": 8369500 }, { "epoch": 41.47, "learning_rate": 2.9273499700262084e-05, "loss": 2.1304, "step": 8370000 }, { "epoch": 41.47, "learning_rate": 2.9272261113836e-05, "loss": 2.14, "step": 8370500 }, { "epoch": 41.47, "learning_rate": 2.9271022527409918e-05, "loss": 2.119, "step": 8371000 }, { "epoch": 41.48, "learning_rate": 2.9269783940983835e-05, "loss": 2.143, "step": 8371500 }, { "epoch": 41.48, "learning_rate": 2.9268545354557752e-05, "loss": 2.1197, "step": 8372000 }, { "epoch": 41.48, "learning_rate": 2.926730676813167e-05, "loss": 2.1248, "step": 8372500 }, { "epoch": 41.48, "learning_rate": 2.9266068181705586e-05, "loss": 2.1274, "step": 8373000 }, { "epoch": 41.49, "learning_rate": 2.9264829595279503e-05, "loss": 2.107, "step": 8373500 }, { "epoch": 41.49, "learning_rate": 2.926359100885342e-05, "loss": 2.1345, "step": 8374000 }, { "epoch": 41.49, "learning_rate": 2.9262352422427337e-05, "loss": 2.1298, "step": 8374500 }, { "epoch": 41.49, "learning_rate": 2.9261116313174102e-05, "loss": 2.131, "step": 8375000 }, { "epoch": 41.5, "learning_rate": 2.925987772674802e-05, "loss": 2.136, "step": 8375500 }, { "epoch": 41.5, "learning_rate": 2.9258639140321936e-05, "loss": 2.1615, "step": 8376000 }, { "epoch": 41.5, "learning_rate": 2.9257400553895853e-05, "loss": 2.1242, "step": 8376500 }, { "epoch": 41.5, "learning_rate": 2.925616196746977e-05, "loss": 2.1289, "step": 8377000 }, { "epoch": 41.51, "learning_rate": 2.9254925858216535e-05, "loss": 2.1672, "step": 8377500 }, { "epoch": 41.51, "learning_rate": 2.9253687271790452e-05, "loss": 2.1322, "step": 8378000 }, { "epoch": 41.51, "learning_rate": 2.925244868536437e-05, "loss": 2.1499, "step": 8378500 }, { "epoch": 41.51, "learning_rate": 2.9251212576111138e-05, "loss": 2.1332, "step": 8379000 }, { "epoch": 41.51, "learning_rate": 2.9249973989685055e-05, "loss": 2.1311, "step": 8379500 }, { "epoch": 41.52, "learning_rate": 2.924873540325897e-05, "loss": 2.1508, "step": 8380000 }, { "epoch": 41.52, "learning_rate": 2.9247496816832885e-05, "loss": 2.1108, "step": 8380500 }, { "epoch": 41.52, "learning_rate": 2.9246258230406802e-05, "loss": 2.1235, "step": 8381000 }, { "epoch": 41.52, "learning_rate": 2.924501964398072e-05, "loss": 2.0973, "step": 8381500 }, { "epoch": 41.53, "learning_rate": 2.9243781057554636e-05, "loss": 2.129, "step": 8382000 }, { "epoch": 41.53, "learning_rate": 2.9242542471128553e-05, "loss": 2.1297, "step": 8382500 }, { "epoch": 41.53, "learning_rate": 2.924130388470247e-05, "loss": 2.1159, "step": 8383000 }, { "epoch": 41.53, "learning_rate": 2.9240065298276387e-05, "loss": 2.1705, "step": 8383500 }, { "epoch": 41.54, "learning_rate": 2.9238826711850304e-05, "loss": 2.1245, "step": 8384000 }, { "epoch": 41.54, "learning_rate": 2.9237588125424214e-05, "loss": 2.162, "step": 8384500 }, { "epoch": 41.54, "learning_rate": 2.9236352016170986e-05, "loss": 2.1449, "step": 8385000 }, { "epoch": 41.54, "learning_rate": 2.9235113429744903e-05, "loss": 2.1533, "step": 8385500 }, { "epoch": 41.55, "learning_rate": 2.923387484331882e-05, "loss": 2.1242, "step": 8386000 }, { "epoch": 41.55, "learning_rate": 2.9232636256892737e-05, "loss": 2.1126, "step": 8386500 }, { "epoch": 41.55, "learning_rate": 2.9231402624812354e-05, "loss": 2.1357, "step": 8387000 }, { "epoch": 41.55, "learning_rate": 2.923016403838627e-05, "loss": 2.1192, "step": 8387500 }, { "epoch": 41.56, "learning_rate": 2.9228925451960188e-05, "loss": 2.1454, "step": 8388000 }, { "epoch": 41.56, "learning_rate": 2.9227686865534105e-05, "loss": 2.1328, "step": 8388500 }, { "epoch": 41.56, "learning_rate": 2.9226448279108022e-05, "loss": 2.1203, "step": 8389000 }, { "epoch": 41.56, "learning_rate": 2.9225212169854787e-05, "loss": 2.1288, "step": 8389500 }, { "epoch": 41.57, "learning_rate": 2.922397606060156e-05, "loss": 2.1206, "step": 8390000 }, { "epoch": 41.57, "learning_rate": 2.9222737474175476e-05, "loss": 2.1223, "step": 8390500 }, { "epoch": 41.57, "learning_rate": 2.9221498887749393e-05, "loss": 2.1522, "step": 8391000 }, { "epoch": 41.57, "learning_rate": 2.9220260301323303e-05, "loss": 2.1204, "step": 8391500 }, { "epoch": 41.58, "learning_rate": 2.9219024192070076e-05, "loss": 2.1508, "step": 8392000 }, { "epoch": 41.58, "learning_rate": 2.9217785605643993e-05, "loss": 2.1591, "step": 8392500 }, { "epoch": 41.58, "learning_rate": 2.921654701921791e-05, "loss": 2.1387, "step": 8393000 }, { "epoch": 41.58, "learning_rate": 2.9215308432791826e-05, "loss": 2.1316, "step": 8393500 }, { "epoch": 41.59, "learning_rate": 2.9214069846365743e-05, "loss": 2.1476, "step": 8394000 }, { "epoch": 41.59, "learning_rate": 2.9212831259939654e-05, "loss": 2.1287, "step": 8394500 }, { "epoch": 41.59, "learning_rate": 2.921159267351357e-05, "loss": 2.1216, "step": 8395000 }, { "epoch": 41.59, "learning_rate": 2.9210356564260343e-05, "loss": 2.1125, "step": 8395500 }, { "epoch": 41.6, "learning_rate": 2.920911797783426e-05, "loss": 2.144, "step": 8396000 }, { "epoch": 41.6, "learning_rate": 2.9207879391408177e-05, "loss": 2.1285, "step": 8396500 }, { "epoch": 41.6, "learning_rate": 2.9206640804982093e-05, "loss": 2.1291, "step": 8397000 }, { "epoch": 41.6, "learning_rate": 2.920540221855601e-05, "loss": 2.1494, "step": 8397500 }, { "epoch": 41.61, "learning_rate": 2.920416363212992e-05, "loss": 2.1383, "step": 8398000 }, { "epoch": 41.61, "learning_rate": 2.9202927522876693e-05, "loss": 2.1262, "step": 8398500 }, { "epoch": 41.61, "learning_rate": 2.920168893645061e-05, "loss": 2.1102, "step": 8399000 }, { "epoch": 41.61, "learning_rate": 2.9200450350024527e-05, "loss": 2.1289, "step": 8399500 }, { "epoch": 41.62, "learning_rate": 2.9199211763598443e-05, "loss": 2.1496, "step": 8400000 }, { "epoch": 41.62, "learning_rate": 2.919797317717236e-05, "loss": 2.1261, "step": 8400500 }, { "epoch": 41.62, "learning_rate": 2.919673459074627e-05, "loss": 2.1324, "step": 8401000 }, { "epoch": 41.62, "learning_rate": 2.9195496004320187e-05, "loss": 2.1366, "step": 8401500 }, { "epoch": 41.63, "learning_rate": 2.9194257417894104e-05, "loss": 2.1405, "step": 8402000 }, { "epoch": 41.63, "learning_rate": 2.919302378581373e-05, "loss": 2.1709, "step": 8402500 }, { "epoch": 41.63, "learning_rate": 2.9191787676560494e-05, "loss": 2.1422, "step": 8403000 }, { "epoch": 41.63, "learning_rate": 2.919054909013441e-05, "loss": 2.1486, "step": 8403500 }, { "epoch": 41.64, "learning_rate": 2.9189312980881183e-05, "loss": 2.1565, "step": 8404000 }, { "epoch": 41.64, "learning_rate": 2.9188074394455097e-05, "loss": 2.1408, "step": 8404500 }, { "epoch": 41.64, "learning_rate": 2.918683580802901e-05, "loss": 2.1283, "step": 8405000 }, { "epoch": 41.64, "learning_rate": 2.9185597221602927e-05, "loss": 2.1385, "step": 8405500 }, { "epoch": 41.65, "learning_rate": 2.9184358635176844e-05, "loss": 2.1252, "step": 8406000 }, { "epoch": 41.65, "learning_rate": 2.918312004875076e-05, "loss": 2.1343, "step": 8406500 }, { "epoch": 41.65, "learning_rate": 2.9181881462324678e-05, "loss": 2.1697, "step": 8407000 }, { "epoch": 41.65, "learning_rate": 2.9180642875898595e-05, "loss": 2.1211, "step": 8407500 }, { "epoch": 41.66, "learning_rate": 2.9179404289472512e-05, "loss": 2.153, "step": 8408000 }, { "epoch": 41.66, "learning_rate": 2.917816570304643e-05, "loss": 2.1579, "step": 8408500 }, { "epoch": 41.66, "learning_rate": 2.9176927116620346e-05, "loss": 2.155, "step": 8409000 }, { "epoch": 41.66, "learning_rate": 2.9175688530194262e-05, "loss": 2.156, "step": 8409500 }, { "epoch": 41.67, "learning_rate": 2.917444994376818e-05, "loss": 2.1369, "step": 8410000 }, { "epoch": 41.67, "learning_rate": 2.9173213834514945e-05, "loss": 2.1322, "step": 8410500 }, { "epoch": 41.67, "learning_rate": 2.9171975248088862e-05, "loss": 2.1407, "step": 8411000 }, { "epoch": 41.67, "learning_rate": 2.9170739138835627e-05, "loss": 2.1455, "step": 8411500 }, { "epoch": 41.68, "learning_rate": 2.9169500552409544e-05, "loss": 2.1428, "step": 8412000 }, { "epoch": 41.68, "learning_rate": 2.916826196598346e-05, "loss": 2.1273, "step": 8412500 }, { "epoch": 41.68, "learning_rate": 2.9167023379557378e-05, "loss": 2.1484, "step": 8413000 }, { "epoch": 41.68, "learning_rate": 2.916578727030415e-05, "loss": 2.1207, "step": 8413500 }, { "epoch": 41.69, "learning_rate": 2.9164548683878064e-05, "loss": 2.1529, "step": 8414000 }, { "epoch": 41.69, "learning_rate": 2.916331009745198e-05, "loss": 2.1485, "step": 8414500 }, { "epoch": 41.69, "learning_rate": 2.9162071511025894e-05, "loss": 2.1382, "step": 8415000 }, { "epoch": 41.69, "learning_rate": 2.916083292459981e-05, "loss": 2.1455, "step": 8415500 }, { "epoch": 41.7, "learning_rate": 2.9159596815346583e-05, "loss": 2.1401, "step": 8416000 }, { "epoch": 41.7, "learning_rate": 2.91583582289205e-05, "loss": 2.1394, "step": 8416500 }, { "epoch": 41.7, "learning_rate": 2.9157119642494414e-05, "loss": 2.1504, "step": 8417000 }, { "epoch": 41.7, "learning_rate": 2.915588105606833e-05, "loss": 2.1211, "step": 8417500 }, { "epoch": 41.71, "learning_rate": 2.9154642469642248e-05, "loss": 2.1529, "step": 8418000 }, { "epoch": 41.71, "learning_rate": 2.915340388321616e-05, "loss": 2.1283, "step": 8418500 }, { "epoch": 41.71, "learning_rate": 2.9152165296790078e-05, "loss": 2.1441, "step": 8419000 }, { "epoch": 41.71, "learning_rate": 2.9150926710363995e-05, "loss": 2.1296, "step": 8419500 }, { "epoch": 41.72, "learning_rate": 2.9149688123937912e-05, "loss": 2.1416, "step": 8420000 }, { "epoch": 41.72, "learning_rate": 2.914844953751183e-05, "loss": 2.1093, "step": 8420500 }, { "epoch": 41.72, "learning_rate": 2.9147210951085746e-05, "loss": 2.1066, "step": 8421000 }, { "epoch": 41.72, "learning_rate": 2.9145972364659663e-05, "loss": 2.1342, "step": 8421500 }, { "epoch": 41.73, "learning_rate": 2.9144736255406428e-05, "loss": 2.1271, "step": 8422000 }, { "epoch": 41.73, "learning_rate": 2.91435001461532e-05, "loss": 2.1558, "step": 8422500 }, { "epoch": 41.73, "learning_rate": 2.9142261559727117e-05, "loss": 2.1436, "step": 8423000 }, { "epoch": 41.73, "learning_rate": 2.914102297330103e-05, "loss": 2.142, "step": 8423500 }, { "epoch": 41.74, "learning_rate": 2.9139784386874948e-05, "loss": 2.1283, "step": 8424000 }, { "epoch": 41.74, "learning_rate": 2.913854827762172e-05, "loss": 2.1923, "step": 8424500 }, { "epoch": 41.74, "learning_rate": 2.9137309691195637e-05, "loss": 2.1207, "step": 8425000 }, { "epoch": 41.74, "learning_rate": 2.9136071104769554e-05, "loss": 2.1158, "step": 8425500 }, { "epoch": 41.75, "learning_rate": 2.9134832518343467e-05, "loss": 2.1432, "step": 8426000 }, { "epoch": 41.75, "learning_rate": 2.913359393191738e-05, "loss": 2.1343, "step": 8426500 }, { "epoch": 41.75, "learning_rate": 2.9132355345491298e-05, "loss": 2.1268, "step": 8427000 }, { "epoch": 41.75, "learning_rate": 2.9131116759065215e-05, "loss": 2.1485, "step": 8427500 }, { "epoch": 41.76, "learning_rate": 2.912987817263913e-05, "loss": 2.1306, "step": 8428000 }, { "epoch": 41.76, "learning_rate": 2.9128639586213045e-05, "loss": 2.1375, "step": 8428500 }, { "epoch": 41.76, "learning_rate": 2.912740347695982e-05, "loss": 2.1313, "step": 8429000 }, { "epoch": 41.76, "learning_rate": 2.912616489053373e-05, "loss": 2.1361, "step": 8429500 }, { "epoch": 41.77, "learning_rate": 2.9124926304107648e-05, "loss": 2.1361, "step": 8430000 }, { "epoch": 41.77, "learning_rate": 2.9123687717681565e-05, "loss": 2.1307, "step": 8430500 }, { "epoch": 41.77, "learning_rate": 2.9122449131255482e-05, "loss": 2.1288, "step": 8431000 }, { "epoch": 41.77, "learning_rate": 2.91212105448294e-05, "loss": 2.1391, "step": 8431500 }, { "epoch": 41.78, "learning_rate": 2.9119971958403312e-05, "loss": 2.153, "step": 8432000 }, { "epoch": 41.78, "learning_rate": 2.911873337197723e-05, "loss": 2.1335, "step": 8432500 }, { "epoch": 41.78, "learning_rate": 2.9117494785551146e-05, "loss": 2.1287, "step": 8433000 }, { "epoch": 41.78, "learning_rate": 2.9116258676297915e-05, "loss": 2.1767, "step": 8433500 }, { "epoch": 41.78, "learning_rate": 2.9115020089871832e-05, "loss": 2.1246, "step": 8434000 }, { "epoch": 41.79, "learning_rate": 2.911378150344575e-05, "loss": 2.1293, "step": 8434500 }, { "epoch": 41.79, "learning_rate": 2.911254787136537e-05, "loss": 2.1431, "step": 8435000 }, { "epoch": 41.79, "learning_rate": 2.9111311762112138e-05, "loss": 2.1408, "step": 8435500 }, { "epoch": 41.79, "learning_rate": 2.9110073175686052e-05, "loss": 2.1463, "step": 8436000 }, { "epoch": 41.8, "learning_rate": 2.910883706643282e-05, "loss": 2.1463, "step": 8436500 }, { "epoch": 41.8, "learning_rate": 2.9107598480006737e-05, "loss": 2.1399, "step": 8437000 }, { "epoch": 41.8, "learning_rate": 2.9106359893580654e-05, "loss": 2.1302, "step": 8437500 }, { "epoch": 41.8, "learning_rate": 2.910512130715457e-05, "loss": 2.1603, "step": 8438000 }, { "epoch": 41.81, "learning_rate": 2.9103882720728488e-05, "loss": 2.1519, "step": 8438500 }, { "epoch": 41.81, "learning_rate": 2.9102644134302405e-05, "loss": 2.1522, "step": 8439000 }, { "epoch": 41.81, "learning_rate": 2.910140554787632e-05, "loss": 2.1143, "step": 8439500 }, { "epoch": 41.81, "learning_rate": 2.9100166961450236e-05, "loss": 2.1548, "step": 8440000 }, { "epoch": 41.82, "learning_rate": 2.9098928375024153e-05, "loss": 2.1282, "step": 8440500 }, { "epoch": 41.82, "learning_rate": 2.909768978859807e-05, "loss": 2.1248, "step": 8441000 }, { "epoch": 41.82, "learning_rate": 2.9096451202171986e-05, "loss": 2.1526, "step": 8441500 }, { "epoch": 41.82, "learning_rate": 2.9095212615745903e-05, "loss": 2.1415, "step": 8442000 }, { "epoch": 41.83, "learning_rate": 2.9093976506492672e-05, "loss": 2.1274, "step": 8442500 }, { "epoch": 41.83, "learning_rate": 2.9092737920066586e-05, "loss": 2.1211, "step": 8443000 }, { "epoch": 41.83, "learning_rate": 2.9091499333640503e-05, "loss": 2.1249, "step": 8443500 }, { "epoch": 41.83, "learning_rate": 2.909026074721442e-05, "loss": 2.1274, "step": 8444000 }, { "epoch": 41.84, "learning_rate": 2.9089022160788337e-05, "loss": 2.1472, "step": 8444500 }, { "epoch": 41.84, "learning_rate": 2.9087783574362253e-05, "loss": 2.1421, "step": 8445000 }, { "epoch": 41.84, "learning_rate": 2.908654498793617e-05, "loss": 2.1551, "step": 8445500 }, { "epoch": 41.84, "learning_rate": 2.9085306401510087e-05, "loss": 2.1316, "step": 8446000 }, { "epoch": 41.85, "learning_rate": 2.9084070292256853e-05, "loss": 2.1215, "step": 8446500 }, { "epoch": 41.85, "learning_rate": 2.908283170583077e-05, "loss": 2.1285, "step": 8447000 }, { "epoch": 41.85, "learning_rate": 2.9081593119404687e-05, "loss": 2.1392, "step": 8447500 }, { "epoch": 41.85, "learning_rate": 2.9080357010151455e-05, "loss": 2.1483, "step": 8448000 }, { "epoch": 41.86, "learning_rate": 2.9079118423725372e-05, "loss": 2.1226, "step": 8448500 }, { "epoch": 41.86, "learning_rate": 2.907787983729929e-05, "loss": 2.1187, "step": 8449000 }, { "epoch": 41.86, "learning_rate": 2.9076643728046055e-05, "loss": 2.1463, "step": 8449500 }, { "epoch": 41.86, "learning_rate": 2.907540514161997e-05, "loss": 2.1373, "step": 8450000 }, { "epoch": 41.87, "learning_rate": 2.907416655519389e-05, "loss": 2.1323, "step": 8450500 }, { "epoch": 41.87, "learning_rate": 2.9072927968767805e-05, "loss": 2.1525, "step": 8451000 }, { "epoch": 41.87, "learning_rate": 2.9071691859514578e-05, "loss": 2.1387, "step": 8451500 }, { "epoch": 41.87, "learning_rate": 2.9070453273088488e-05, "loss": 2.1399, "step": 8452000 }, { "epoch": 41.88, "learning_rate": 2.9069214686662405e-05, "loss": 2.168, "step": 8452500 }, { "epoch": 41.88, "learning_rate": 2.906797610023632e-05, "loss": 2.1646, "step": 8453000 }, { "epoch": 41.88, "learning_rate": 2.906673751381024e-05, "loss": 2.1124, "step": 8453500 }, { "epoch": 41.88, "learning_rate": 2.9065498927384156e-05, "loss": 2.1099, "step": 8454000 }, { "epoch": 41.89, "learning_rate": 2.9064260340958072e-05, "loss": 2.1233, "step": 8454500 }, { "epoch": 41.89, "learning_rate": 2.906302175453199e-05, "loss": 2.1602, "step": 8455000 }, { "epoch": 41.89, "learning_rate": 2.9061783168105906e-05, "loss": 2.125, "step": 8455500 }, { "epoch": 41.89, "learning_rate": 2.9060544581679823e-05, "loss": 2.1409, "step": 8456000 }, { "epoch": 41.9, "learning_rate": 2.9059305995253737e-05, "loss": 2.1427, "step": 8456500 }, { "epoch": 41.9, "learning_rate": 2.9058067408827654e-05, "loss": 2.1436, "step": 8457000 }, { "epoch": 41.9, "learning_rate": 2.905682882240157e-05, "loss": 2.1507, "step": 8457500 }, { "epoch": 41.9, "learning_rate": 2.9055590235975488e-05, "loss": 2.1475, "step": 8458000 }, { "epoch": 41.91, "learning_rate": 2.9054354126722256e-05, "loss": 2.134, "step": 8458500 }, { "epoch": 41.91, "learning_rate": 2.9053118017469022e-05, "loss": 2.1492, "step": 8459000 }, { "epoch": 41.91, "learning_rate": 2.905187943104294e-05, "loss": 2.1508, "step": 8459500 }, { "epoch": 41.91, "learning_rate": 2.9050640844616856e-05, "loss": 2.1363, "step": 8460000 }, { "epoch": 41.92, "learning_rate": 2.9049402258190773e-05, "loss": 2.1375, "step": 8460500 }, { "epoch": 41.92, "learning_rate": 2.9048166148937545e-05, "loss": 2.1281, "step": 8461000 }, { "epoch": 41.92, "learning_rate": 2.9046927562511455e-05, "loss": 2.1504, "step": 8461500 }, { "epoch": 41.92, "learning_rate": 2.9045691453258227e-05, "loss": 2.1415, "step": 8462000 }, { "epoch": 41.93, "learning_rate": 2.9044452866832144e-05, "loss": 2.1507, "step": 8462500 }, { "epoch": 41.93, "learning_rate": 2.904321428040606e-05, "loss": 2.145, "step": 8463000 }, { "epoch": 41.93, "learning_rate": 2.9041975693979978e-05, "loss": 2.1367, "step": 8463500 }, { "epoch": 41.93, "learning_rate": 2.9040737107553895e-05, "loss": 2.1299, "step": 8464000 }, { "epoch": 41.94, "learning_rate": 2.9039498521127805e-05, "loss": 2.1509, "step": 8464500 }, { "epoch": 41.94, "learning_rate": 2.9038259934701722e-05, "loss": 2.1192, "step": 8465000 }, { "epoch": 41.94, "learning_rate": 2.903702134827564e-05, "loss": 2.1491, "step": 8465500 }, { "epoch": 41.94, "learning_rate": 2.9035782761849556e-05, "loss": 2.1115, "step": 8466000 }, { "epoch": 41.95, "learning_rate": 2.9034546652596328e-05, "loss": 2.1329, "step": 8466500 }, { "epoch": 41.95, "learning_rate": 2.9033308066170245e-05, "loss": 2.1451, "step": 8467000 }, { "epoch": 41.95, "learning_rate": 2.9032069479744162e-05, "loss": 2.1353, "step": 8467500 }, { "epoch": 41.95, "learning_rate": 2.9030830893318072e-05, "loss": 2.1526, "step": 8468000 }, { "epoch": 41.96, "learning_rate": 2.902959230689199e-05, "loss": 2.1579, "step": 8468500 }, { "epoch": 41.96, "learning_rate": 2.9028353720465906e-05, "loss": 2.1244, "step": 8469000 }, { "epoch": 41.96, "learning_rate": 2.9027115134039823e-05, "loss": 2.118, "step": 8469500 }, { "epoch": 41.96, "learning_rate": 2.902587654761374e-05, "loss": 2.1435, "step": 8470000 }, { "epoch": 41.97, "learning_rate": 2.9024640438360512e-05, "loss": 2.1342, "step": 8470500 }, { "epoch": 41.97, "learning_rate": 2.9023401851934422e-05, "loss": 2.1599, "step": 8471000 }, { "epoch": 41.97, "learning_rate": 2.902216326550834e-05, "loss": 2.1317, "step": 8471500 }, { "epoch": 41.97, "learning_rate": 2.902092715625511e-05, "loss": 2.1549, "step": 8472000 }, { "epoch": 41.98, "learning_rate": 2.9019688569829028e-05, "loss": 2.1565, "step": 8472500 }, { "epoch": 41.98, "learning_rate": 2.9018449983402945e-05, "loss": 2.1443, "step": 8473000 }, { "epoch": 41.98, "learning_rate": 2.9017211396976862e-05, "loss": 2.1285, "step": 8473500 }, { "epoch": 41.98, "learning_rate": 2.9015972810550772e-05, "loss": 2.1361, "step": 8474000 }, { "epoch": 41.99, "learning_rate": 2.901473422412469e-05, "loss": 2.1618, "step": 8474500 }, { "epoch": 41.99, "learning_rate": 2.9013495637698606e-05, "loss": 2.1443, "step": 8475000 }, { "epoch": 41.99, "learning_rate": 2.9012257051272523e-05, "loss": 2.1275, "step": 8475500 }, { "epoch": 41.99, "learning_rate": 2.901101846484644e-05, "loss": 2.1537, "step": 8476000 }, { "epoch": 42.0, "learning_rate": 2.9009782355593212e-05, "loss": 2.1263, "step": 8476500 }, { "epoch": 42.0, "learning_rate": 2.9008543769167122e-05, "loss": 2.1474, "step": 8477000 }, { "epoch": 42.0, "eval_accuracy": 0.6659192271443328, "eval_accuracy_mlm": 0.6233416215883688, "eval_accuracy_nsp": 0.8668021132809589, "eval_loss": 2.2915704250335693, "eval_runtime": 147.2318, "eval_samples_per_second": 1731.684, "eval_steps_per_second": 72.158, "step": 8477406 }, { "epoch": 42.0, "learning_rate": 2.9007307659913894e-05, "loss": 2.1259, "step": 8477500 }, { "epoch": 42.0, "learning_rate": 2.900606907348781e-05, "loss": 2.1178, "step": 8478000 }, { "epoch": 42.01, "learning_rate": 2.9004830487061728e-05, "loss": 2.1113, "step": 8478500 }, { "epoch": 42.01, "learning_rate": 2.9003591900635645e-05, "loss": 2.1324, "step": 8479000 }, { "epoch": 42.01, "learning_rate": 2.9002353314209562e-05, "loss": 2.1038, "step": 8479500 }, { "epoch": 42.01, "learning_rate": 2.900111472778348e-05, "loss": 2.0947, "step": 8480000 }, { "epoch": 42.02, "learning_rate": 2.899987614135739e-05, "loss": 2.1054, "step": 8480500 }, { "epoch": 42.02, "learning_rate": 2.8998637554931306e-05, "loss": 2.1242, "step": 8481000 }, { "epoch": 42.02, "learning_rate": 2.8997398968505223e-05, "loss": 2.1079, "step": 8481500 }, { "epoch": 42.02, "learning_rate": 2.899616038207914e-05, "loss": 2.1011, "step": 8482000 }, { "epoch": 42.03, "learning_rate": 2.8994921795653057e-05, "loss": 2.1207, "step": 8482500 }, { "epoch": 42.03, "learning_rate": 2.8993683209226974e-05, "loss": 2.1272, "step": 8483000 }, { "epoch": 42.03, "learning_rate": 2.899244462280089e-05, "loss": 2.1227, "step": 8483500 }, { "epoch": 42.03, "learning_rate": 2.8991208513547656e-05, "loss": 2.1003, "step": 8484000 }, { "epoch": 42.04, "learning_rate": 2.8989969927121573e-05, "loss": 2.0905, "step": 8484500 }, { "epoch": 42.04, "learning_rate": 2.898873134069549e-05, "loss": 2.1036, "step": 8485000 }, { "epoch": 42.04, "learning_rate": 2.8987492754269407e-05, "loss": 2.1024, "step": 8485500 }, { "epoch": 42.04, "learning_rate": 2.898625664501618e-05, "loss": 2.1125, "step": 8486000 }, { "epoch": 42.05, "learning_rate": 2.8985020535762948e-05, "loss": 2.1016, "step": 8486500 }, { "epoch": 42.05, "learning_rate": 2.8983781949336865e-05, "loss": 2.1164, "step": 8487000 }, { "epoch": 42.05, "learning_rate": 2.898254336291078e-05, "loss": 2.0769, "step": 8487500 }, { "epoch": 42.05, "learning_rate": 2.8981307253657547e-05, "loss": 2.1303, "step": 8488000 }, { "epoch": 42.05, "learning_rate": 2.8980068667231464e-05, "loss": 2.1097, "step": 8488500 }, { "epoch": 42.06, "learning_rate": 2.897883008080538e-05, "loss": 2.1163, "step": 8489000 }, { "epoch": 42.06, "learning_rate": 2.8977591494379298e-05, "loss": 2.1295, "step": 8489500 }, { "epoch": 42.06, "learning_rate": 2.8976352907953215e-05, "loss": 2.1158, "step": 8490000 }, { "epoch": 42.06, "learning_rate": 2.8975114321527132e-05, "loss": 2.1081, "step": 8490500 }, { "epoch": 42.07, "learning_rate": 2.8973875735101045e-05, "loss": 2.1096, "step": 8491000 }, { "epoch": 42.07, "learning_rate": 2.8972637148674962e-05, "loss": 2.1238, "step": 8491500 }, { "epoch": 42.07, "learning_rate": 2.897139856224888e-05, "loss": 2.1206, "step": 8492000 }, { "epoch": 42.07, "learning_rate": 2.8970159975822796e-05, "loss": 2.1224, "step": 8492500 }, { "epoch": 42.08, "learning_rate": 2.896892138939671e-05, "loss": 2.1358, "step": 8493000 }, { "epoch": 42.08, "learning_rate": 2.8967682802970623e-05, "loss": 2.1078, "step": 8493500 }, { "epoch": 42.08, "learning_rate": 2.896644421654454e-05, "loss": 2.1236, "step": 8494000 }, { "epoch": 42.08, "learning_rate": 2.8965208107291312e-05, "loss": 2.0958, "step": 8494500 }, { "epoch": 42.09, "learning_rate": 2.896396952086523e-05, "loss": 2.1156, "step": 8495000 }, { "epoch": 42.09, "learning_rate": 2.8962730934439146e-05, "loss": 2.1164, "step": 8495500 }, { "epoch": 42.09, "learning_rate": 2.896149234801306e-05, "loss": 2.1109, "step": 8496000 }, { "epoch": 42.09, "learning_rate": 2.8960253761586977e-05, "loss": 2.1287, "step": 8496500 }, { "epoch": 42.1, "learning_rate": 2.895901765233375e-05, "loss": 2.1049, "step": 8497000 }, { "epoch": 42.1, "learning_rate": 2.8957779065907666e-05, "loss": 2.1247, "step": 8497500 }, { "epoch": 42.1, "learning_rate": 2.895654047948158e-05, "loss": 2.1018, "step": 8498000 }, { "epoch": 42.1, "learning_rate": 2.8955301893055496e-05, "loss": 2.113, "step": 8498500 }, { "epoch": 42.11, "learning_rate": 2.895406330662941e-05, "loss": 2.0945, "step": 8499000 }, { "epoch": 42.11, "learning_rate": 2.8952827197376182e-05, "loss": 2.1102, "step": 8499500 }, { "epoch": 42.11, "learning_rate": 2.89515886109501e-05, "loss": 2.1103, "step": 8500000 }, { "epoch": 42.11, "learning_rate": 2.8950352501696864e-05, "loss": 2.1171, "step": 8500500 }, { "epoch": 42.12, "learning_rate": 2.894911391527078e-05, "loss": 2.1296, "step": 8501000 }, { "epoch": 42.12, "learning_rate": 2.8947875328844698e-05, "loss": 2.0903, "step": 8501500 }, { "epoch": 42.12, "learning_rate": 2.8946636742418615e-05, "loss": 2.1263, "step": 8502000 }, { "epoch": 42.12, "learning_rate": 2.8945398155992532e-05, "loss": 2.1154, "step": 8502500 }, { "epoch": 42.13, "learning_rate": 2.894415956956645e-05, "loss": 2.1165, "step": 8503000 }, { "epoch": 42.13, "learning_rate": 2.8942920983140366e-05, "loss": 2.1031, "step": 8503500 }, { "epoch": 42.13, "learning_rate": 2.8941682396714283e-05, "loss": 2.1163, "step": 8504000 }, { "epoch": 42.13, "learning_rate": 2.8940443810288196e-05, "loss": 2.1077, "step": 8504500 }, { "epoch": 42.14, "learning_rate": 2.8939205223862113e-05, "loss": 2.1178, "step": 8505000 }, { "epoch": 42.14, "learning_rate": 2.8937969114608882e-05, "loss": 2.107, "step": 8505500 }, { "epoch": 42.14, "learning_rate": 2.89367305281828e-05, "loss": 2.1164, "step": 8506000 }, { "epoch": 42.14, "learning_rate": 2.8935491941756716e-05, "loss": 2.1103, "step": 8506500 }, { "epoch": 42.15, "learning_rate": 2.8934253355330633e-05, "loss": 2.1336, "step": 8507000 }, { "epoch": 42.15, "learning_rate": 2.893301476890455e-05, "loss": 2.1166, "step": 8507500 }, { "epoch": 42.15, "learning_rate": 2.8931776182478463e-05, "loss": 2.1007, "step": 8508000 }, { "epoch": 42.15, "learning_rate": 2.8930537596052377e-05, "loss": 2.1209, "step": 8508500 }, { "epoch": 42.16, "learning_rate": 2.8929299009626294e-05, "loss": 2.1221, "step": 8509000 }, { "epoch": 42.16, "learning_rate": 2.892806042320021e-05, "loss": 2.1299, "step": 8509500 }, { "epoch": 42.16, "learning_rate": 2.892682679111983e-05, "loss": 2.1295, "step": 8510000 }, { "epoch": 42.16, "learning_rate": 2.892558820469375e-05, "loss": 2.1347, "step": 8510500 }, { "epoch": 42.17, "learning_rate": 2.8924349618267665e-05, "loss": 2.1172, "step": 8511000 }, { "epoch": 42.17, "learning_rate": 2.8923111031841582e-05, "loss": 2.1301, "step": 8511500 }, { "epoch": 42.17, "learning_rate": 2.8921877399761206e-05, "loss": 2.1204, "step": 8512000 }, { "epoch": 42.17, "learning_rate": 2.8920638813335117e-05, "loss": 2.1335, "step": 8512500 }, { "epoch": 42.18, "learning_rate": 2.8919400226909033e-05, "loss": 2.133, "step": 8513000 }, { "epoch": 42.18, "learning_rate": 2.891816164048295e-05, "loss": 2.124, "step": 8513500 }, { "epoch": 42.18, "learning_rate": 2.8916923054056867e-05, "loss": 2.1245, "step": 8514000 }, { "epoch": 42.18, "learning_rate": 2.8915684467630784e-05, "loss": 2.1043, "step": 8514500 }, { "epoch": 42.19, "learning_rate": 2.8914445881204698e-05, "loss": 2.1146, "step": 8515000 }, { "epoch": 42.19, "learning_rate": 2.8913207294778615e-05, "loss": 2.1036, "step": 8515500 }, { "epoch": 42.19, "learning_rate": 2.891196870835253e-05, "loss": 2.1213, "step": 8516000 }, { "epoch": 42.19, "learning_rate": 2.891073012192645e-05, "loss": 2.1207, "step": 8516500 }, { "epoch": 42.2, "learning_rate": 2.8909491535500365e-05, "loss": 2.098, "step": 8517000 }, { "epoch": 42.2, "learning_rate": 2.8908252949074282e-05, "loss": 2.1246, "step": 8517500 }, { "epoch": 42.2, "learning_rate": 2.89070143626482e-05, "loss": 2.1268, "step": 8518000 }, { "epoch": 42.2, "learning_rate": 2.8905775776222116e-05, "loss": 2.1292, "step": 8518500 }, { "epoch": 42.21, "learning_rate": 2.8904537189796033e-05, "loss": 2.1361, "step": 8519000 }, { "epoch": 42.21, "learning_rate": 2.890329860336995e-05, "loss": 2.1077, "step": 8519500 }, { "epoch": 42.21, "learning_rate": 2.8902062494116716e-05, "loss": 2.1086, "step": 8520000 }, { "epoch": 42.21, "learning_rate": 2.8900823907690632e-05, "loss": 2.1159, "step": 8520500 }, { "epoch": 42.22, "learning_rate": 2.889958532126455e-05, "loss": 2.1365, "step": 8521000 }, { "epoch": 42.22, "learning_rate": 2.8898346734838466e-05, "loss": 2.1401, "step": 8521500 }, { "epoch": 42.22, "learning_rate": 2.8897110625585232e-05, "loss": 2.14, "step": 8522000 }, { "epoch": 42.22, "learning_rate": 2.889587203915915e-05, "loss": 2.1256, "step": 8522500 }, { "epoch": 42.23, "learning_rate": 2.8894633452733066e-05, "loss": 2.1372, "step": 8523000 }, { "epoch": 42.23, "learning_rate": 2.8893394866306983e-05, "loss": 2.106, "step": 8523500 }, { "epoch": 42.23, "learning_rate": 2.88921562798809e-05, "loss": 2.1198, "step": 8524000 }, { "epoch": 42.23, "learning_rate": 2.8890917693454816e-05, "loss": 2.116, "step": 8524500 }, { "epoch": 42.24, "learning_rate": 2.8889679107028733e-05, "loss": 2.1323, "step": 8525000 }, { "epoch": 42.24, "learning_rate": 2.888844052060265e-05, "loss": 2.1092, "step": 8525500 }, { "epoch": 42.24, "learning_rate": 2.8887201934176567e-05, "loss": 2.1306, "step": 8526000 }, { "epoch": 42.24, "learning_rate": 2.8885963347750484e-05, "loss": 2.1348, "step": 8526500 }, { "epoch": 42.25, "learning_rate": 2.8884724761324394e-05, "loss": 2.1008, "step": 8527000 }, { "epoch": 42.25, "learning_rate": 2.8883488652071166e-05, "loss": 2.1155, "step": 8527500 }, { "epoch": 42.25, "learning_rate": 2.8882250065645083e-05, "loss": 2.1222, "step": 8528000 }, { "epoch": 42.25, "learning_rate": 2.8881011479219e-05, "loss": 2.1194, "step": 8528500 }, { "epoch": 42.26, "learning_rate": 2.8879775369965766e-05, "loss": 2.1171, "step": 8529000 }, { "epoch": 42.26, "learning_rate": 2.8878536783539683e-05, "loss": 2.124, "step": 8529500 }, { "epoch": 42.26, "learning_rate": 2.88772981971136e-05, "loss": 2.1497, "step": 8530000 }, { "epoch": 42.26, "learning_rate": 2.8876059610687517e-05, "loss": 2.1213, "step": 8530500 }, { "epoch": 42.27, "learning_rate": 2.8874821024261433e-05, "loss": 2.1205, "step": 8531000 }, { "epoch": 42.27, "learning_rate": 2.887358243783535e-05, "loss": 2.122, "step": 8531500 }, { "epoch": 42.27, "learning_rate": 2.8872343851409267e-05, "loss": 2.1159, "step": 8532000 }, { "epoch": 42.27, "learning_rate": 2.8871105264983184e-05, "loss": 2.1243, "step": 8532500 }, { "epoch": 42.28, "learning_rate": 2.88698666785571e-05, "loss": 2.137, "step": 8533000 }, { "epoch": 42.28, "learning_rate": 2.886862809213101e-05, "loss": 2.1156, "step": 8533500 }, { "epoch": 42.28, "learning_rate": 2.8867391982877784e-05, "loss": 2.118, "step": 8534000 }, { "epoch": 42.28, "learning_rate": 2.88661533964517e-05, "loss": 2.1494, "step": 8534500 }, { "epoch": 42.29, "learning_rate": 2.8864914810025617e-05, "loss": 2.1224, "step": 8535000 }, { "epoch": 42.29, "learning_rate": 2.8863676223599534e-05, "loss": 2.0861, "step": 8535500 }, { "epoch": 42.29, "learning_rate": 2.886243763717345e-05, "loss": 2.0973, "step": 8536000 }, { "epoch": 42.29, "learning_rate": 2.8861201527920217e-05, "loss": 2.1164, "step": 8536500 }, { "epoch": 42.3, "learning_rate": 2.8859965418666985e-05, "loss": 2.1171, "step": 8537000 }, { "epoch": 42.3, "learning_rate": 2.8858726832240902e-05, "loss": 2.0964, "step": 8537500 }, { "epoch": 42.3, "learning_rate": 2.885748824581482e-05, "loss": 2.1539, "step": 8538000 }, { "epoch": 42.3, "learning_rate": 2.8856249659388733e-05, "loss": 2.1208, "step": 8538500 }, { "epoch": 42.31, "learning_rate": 2.88550135501355e-05, "loss": 2.1254, "step": 8539000 }, { "epoch": 42.31, "learning_rate": 2.885377496370942e-05, "loss": 2.1377, "step": 8539500 }, { "epoch": 42.31, "learning_rate": 2.8852536377283336e-05, "loss": 2.1413, "step": 8540000 }, { "epoch": 42.31, "learning_rate": 2.8851297790857252e-05, "loss": 2.1186, "step": 8540500 }, { "epoch": 42.32, "learning_rate": 2.8850064158776873e-05, "loss": 2.1397, "step": 8541000 }, { "epoch": 42.32, "learning_rate": 2.884882557235079e-05, "loss": 2.1496, "step": 8541500 }, { "epoch": 42.32, "learning_rate": 2.8847586985924707e-05, "loss": 2.1121, "step": 8542000 }, { "epoch": 42.32, "learning_rate": 2.8846348399498624e-05, "loss": 2.1495, "step": 8542500 }, { "epoch": 42.32, "learning_rate": 2.884510981307254e-05, "loss": 2.1494, "step": 8543000 }, { "epoch": 42.33, "learning_rate": 2.884387122664645e-05, "loss": 2.1142, "step": 8543500 }, { "epoch": 42.33, "learning_rate": 2.8842632640220368e-05, "loss": 2.1129, "step": 8544000 }, { "epoch": 42.33, "learning_rate": 2.8841394053794285e-05, "loss": 2.1168, "step": 8544500 }, { "epoch": 42.33, "learning_rate": 2.8840155467368202e-05, "loss": 2.0973, "step": 8545000 }, { "epoch": 42.34, "learning_rate": 2.883891688094212e-05, "loss": 2.1386, "step": 8545500 }, { "epoch": 42.34, "learning_rate": 2.8837678294516036e-05, "loss": 2.1181, "step": 8546000 }, { "epoch": 42.34, "learning_rate": 2.8836439708089953e-05, "loss": 2.1262, "step": 8546500 }, { "epoch": 42.34, "learning_rate": 2.883520112166387e-05, "loss": 2.1434, "step": 8547000 }, { "epoch": 42.35, "learning_rate": 2.8833962535237786e-05, "loss": 2.1227, "step": 8547500 }, { "epoch": 42.35, "learning_rate": 2.8832723948811703e-05, "loss": 2.1022, "step": 8548000 }, { "epoch": 42.35, "learning_rate": 2.883148783955847e-05, "loss": 2.1319, "step": 8548500 }, { "epoch": 42.35, "learning_rate": 2.8830249253132386e-05, "loss": 2.1336, "step": 8549000 }, { "epoch": 42.36, "learning_rate": 2.8829010666706303e-05, "loss": 2.1172, "step": 8549500 }, { "epoch": 42.36, "learning_rate": 2.882777208028022e-05, "loss": 2.1306, "step": 8550000 }, { "epoch": 42.36, "learning_rate": 2.8826533493854136e-05, "loss": 2.1309, "step": 8550500 }, { "epoch": 42.36, "learning_rate": 2.8825294907428053e-05, "loss": 2.1159, "step": 8551000 }, { "epoch": 42.37, "learning_rate": 2.882405632100197e-05, "loss": 2.1363, "step": 8551500 }, { "epoch": 42.37, "learning_rate": 2.8822820211748736e-05, "loss": 2.1015, "step": 8552000 }, { "epoch": 42.37, "learning_rate": 2.8821581625322653e-05, "loss": 2.1308, "step": 8552500 }, { "epoch": 42.37, "learning_rate": 2.882034303889657e-05, "loss": 2.1325, "step": 8553000 }, { "epoch": 42.38, "learning_rate": 2.8819104452470487e-05, "loss": 2.1544, "step": 8553500 }, { "epoch": 42.38, "learning_rate": 2.8817865866044403e-05, "loss": 2.1116, "step": 8554000 }, { "epoch": 42.38, "learning_rate": 2.881662727961832e-05, "loss": 2.1356, "step": 8554500 }, { "epoch": 42.38, "learning_rate": 2.8815388693192237e-05, "loss": 2.136, "step": 8555000 }, { "epoch": 42.39, "learning_rate": 2.8814152583939003e-05, "loss": 2.1433, "step": 8555500 }, { "epoch": 42.39, "learning_rate": 2.881291399751292e-05, "loss": 2.1175, "step": 8556000 }, { "epoch": 42.39, "learning_rate": 2.8811675411086837e-05, "loss": 2.1325, "step": 8556500 }, { "epoch": 42.39, "learning_rate": 2.8810439301833602e-05, "loss": 2.104, "step": 8557000 }, { "epoch": 42.4, "learning_rate": 2.880920071540752e-05, "loss": 2.121, "step": 8557500 }, { "epoch": 42.4, "learning_rate": 2.8807962128981436e-05, "loss": 2.1491, "step": 8558000 }, { "epoch": 42.4, "learning_rate": 2.8806723542555353e-05, "loss": 2.118, "step": 8558500 }, { "epoch": 42.4, "learning_rate": 2.8805487433302118e-05, "loss": 2.1308, "step": 8559000 }, { "epoch": 42.41, "learning_rate": 2.8804248846876035e-05, "loss": 2.1325, "step": 8559500 }, { "epoch": 42.41, "learning_rate": 2.8803010260449952e-05, "loss": 2.1099, "step": 8560000 }, { "epoch": 42.41, "learning_rate": 2.880177167402387e-05, "loss": 2.1355, "step": 8560500 }, { "epoch": 42.41, "learning_rate": 2.8800533087597786e-05, "loss": 2.1114, "step": 8561000 }, { "epoch": 42.42, "learning_rate": 2.8799296978344558e-05, "loss": 2.1126, "step": 8561500 }, { "epoch": 42.42, "learning_rate": 2.8798058391918475e-05, "loss": 2.1395, "step": 8562000 }, { "epoch": 42.42, "learning_rate": 2.8796819805492385e-05, "loss": 2.1303, "step": 8562500 }, { "epoch": 42.42, "learning_rate": 2.8795581219066302e-05, "loss": 2.1358, "step": 8563000 }, { "epoch": 42.43, "learning_rate": 2.879434263264022e-05, "loss": 2.0967, "step": 8563500 }, { "epoch": 42.43, "learning_rate": 2.8793104046214136e-05, "loss": 2.1334, "step": 8564000 }, { "epoch": 42.43, "learning_rate": 2.8791867936960908e-05, "loss": 2.1188, "step": 8564500 }, { "epoch": 42.43, "learning_rate": 2.8790629350534825e-05, "loss": 2.1181, "step": 8565000 }, { "epoch": 42.44, "learning_rate": 2.8789390764108735e-05, "loss": 2.1396, "step": 8565500 }, { "epoch": 42.44, "learning_rate": 2.8788152177682652e-05, "loss": 2.1146, "step": 8566000 }, { "epoch": 42.44, "learning_rate": 2.878691359125657e-05, "loss": 2.0896, "step": 8566500 }, { "epoch": 42.44, "learning_rate": 2.8785675004830486e-05, "loss": 2.1405, "step": 8567000 }, { "epoch": 42.45, "learning_rate": 2.8784436418404403e-05, "loss": 2.1434, "step": 8567500 }, { "epoch": 42.45, "learning_rate": 2.8783200309151175e-05, "loss": 2.1075, "step": 8568000 }, { "epoch": 42.45, "learning_rate": 2.878196172272509e-05, "loss": 2.1153, "step": 8568500 }, { "epoch": 42.45, "learning_rate": 2.8780723136299002e-05, "loss": 2.1159, "step": 8569000 }, { "epoch": 42.46, "learning_rate": 2.877948454987292e-05, "loss": 2.1308, "step": 8569500 }, { "epoch": 42.46, "learning_rate": 2.8778245963446836e-05, "loss": 2.1051, "step": 8570000 }, { "epoch": 42.46, "learning_rate": 2.8777007377020753e-05, "loss": 2.1257, "step": 8570500 }, { "epoch": 42.46, "learning_rate": 2.877576879059467e-05, "loss": 2.1345, "step": 8571000 }, { "epoch": 42.47, "learning_rate": 2.8774530204168587e-05, "loss": 2.1192, "step": 8571500 }, { "epoch": 42.47, "learning_rate": 2.8773291617742504e-05, "loss": 2.1239, "step": 8572000 }, { "epoch": 42.47, "learning_rate": 2.877205303131642e-05, "loss": 2.1406, "step": 8572500 }, { "epoch": 42.47, "learning_rate": 2.8770816922063186e-05, "loss": 2.1468, "step": 8573000 }, { "epoch": 42.48, "learning_rate": 2.8769578335637103e-05, "loss": 2.1189, "step": 8573500 }, { "epoch": 42.48, "learning_rate": 2.876833974921102e-05, "loss": 2.1004, "step": 8574000 }, { "epoch": 42.48, "learning_rate": 2.8767101162784937e-05, "loss": 2.1246, "step": 8574500 }, { "epoch": 42.48, "learning_rate": 2.8765862576358854e-05, "loss": 2.1231, "step": 8575000 }, { "epoch": 42.49, "learning_rate": 2.876462398993277e-05, "loss": 2.1195, "step": 8575500 }, { "epoch": 42.49, "learning_rate": 2.8763387880679536e-05, "loss": 2.1332, "step": 8576000 }, { "epoch": 42.49, "learning_rate": 2.8762149294253453e-05, "loss": 2.141, "step": 8576500 }, { "epoch": 42.49, "learning_rate": 2.876091070782737e-05, "loss": 2.1127, "step": 8577000 }, { "epoch": 42.5, "learning_rate": 2.8759674598574142e-05, "loss": 2.1526, "step": 8577500 }, { "epoch": 42.5, "learning_rate": 2.8758436012148056e-05, "loss": 2.1372, "step": 8578000 }, { "epoch": 42.5, "learning_rate": 2.8757197425721973e-05, "loss": 2.1069, "step": 8578500 }, { "epoch": 42.5, "learning_rate": 2.8755958839295886e-05, "loss": 2.1126, "step": 8579000 }, { "epoch": 42.51, "learning_rate": 2.8754720252869803e-05, "loss": 2.1621, "step": 8579500 }, { "epoch": 42.51, "learning_rate": 2.875348166644372e-05, "loss": 2.1176, "step": 8580000 }, { "epoch": 42.51, "learning_rate": 2.8752243080017637e-05, "loss": 2.1292, "step": 8580500 }, { "epoch": 42.51, "learning_rate": 2.8751004493591554e-05, "loss": 2.1247, "step": 8581000 }, { "epoch": 42.52, "learning_rate": 2.874976590716547e-05, "loss": 2.1173, "step": 8581500 }, { "epoch": 42.52, "learning_rate": 2.874852979791224e-05, "loss": 2.0947, "step": 8582000 }, { "epoch": 42.52, "learning_rate": 2.8747293688659012e-05, "loss": 2.1313, "step": 8582500 }, { "epoch": 42.52, "learning_rate": 2.8746055102232926e-05, "loss": 2.1331, "step": 8583000 }, { "epoch": 42.53, "learning_rate": 2.8744816515806842e-05, "loss": 2.1298, "step": 8583500 }, { "epoch": 42.53, "learning_rate": 2.8743577929380756e-05, "loss": 2.1246, "step": 8584000 }, { "epoch": 42.53, "learning_rate": 2.8742339342954673e-05, "loss": 2.1233, "step": 8584500 }, { "epoch": 42.53, "learning_rate": 2.874110075652859e-05, "loss": 2.1194, "step": 8585000 }, { "epoch": 42.54, "learning_rate": 2.8739862170102503e-05, "loss": 2.1575, "step": 8585500 }, { "epoch": 42.54, "learning_rate": 2.873862358367642e-05, "loss": 2.1085, "step": 8586000 }, { "epoch": 42.54, "learning_rate": 2.8737384997250337e-05, "loss": 2.1236, "step": 8586500 }, { "epoch": 42.54, "learning_rate": 2.873614888799711e-05, "loss": 2.1339, "step": 8587000 }, { "epoch": 42.55, "learning_rate": 2.8734910301571023e-05, "loss": 2.1108, "step": 8587500 }, { "epoch": 42.55, "learning_rate": 2.8733674192317795e-05, "loss": 2.1276, "step": 8588000 }, { "epoch": 42.55, "learning_rate": 2.8732435605891712e-05, "loss": 2.1172, "step": 8588500 }, { "epoch": 42.55, "learning_rate": 2.873119701946563e-05, "loss": 2.1299, "step": 8589000 }, { "epoch": 42.56, "learning_rate": 2.8729958433039546e-05, "loss": 2.1372, "step": 8589500 }, { "epoch": 42.56, "learning_rate": 2.872871984661346e-05, "loss": 2.1174, "step": 8590000 }, { "epoch": 42.56, "learning_rate": 2.8727481260187373e-05, "loss": 2.1332, "step": 8590500 }, { "epoch": 42.56, "learning_rate": 2.872624267376129e-05, "loss": 2.1193, "step": 8591000 }, { "epoch": 42.57, "learning_rate": 2.8725004087335207e-05, "loss": 2.1312, "step": 8591500 }, { "epoch": 42.57, "learning_rate": 2.8723765500909124e-05, "loss": 2.1248, "step": 8592000 }, { "epoch": 42.57, "learning_rate": 2.8722526914483037e-05, "loss": 2.1319, "step": 8592500 }, { "epoch": 42.57, "learning_rate": 2.8721288328056954e-05, "loss": 2.1396, "step": 8593000 }, { "epoch": 42.58, "learning_rate": 2.872004974163087e-05, "loss": 2.1237, "step": 8593500 }, { "epoch": 42.58, "learning_rate": 2.871881363237764e-05, "loss": 2.1069, "step": 8594000 }, { "epoch": 42.58, "learning_rate": 2.8717575045951557e-05, "loss": 2.1269, "step": 8594500 }, { "epoch": 42.58, "learning_rate": 2.8716336459525474e-05, "loss": 2.1389, "step": 8595000 }, { "epoch": 42.59, "learning_rate": 2.871509787309939e-05, "loss": 2.1341, "step": 8595500 }, { "epoch": 42.59, "learning_rate": 2.8713859286673304e-05, "loss": 2.1294, "step": 8596000 }, { "epoch": 42.59, "learning_rate": 2.871262070024722e-05, "loss": 2.1278, "step": 8596500 }, { "epoch": 42.59, "learning_rate": 2.871138459099399e-05, "loss": 2.1082, "step": 8597000 }, { "epoch": 42.59, "learning_rate": 2.8710146004567907e-05, "loss": 2.1131, "step": 8597500 }, { "epoch": 42.6, "learning_rate": 2.8708907418141824e-05, "loss": 2.1386, "step": 8598000 }, { "epoch": 42.6, "learning_rate": 2.870766883171574e-05, "loss": 2.1369, "step": 8598500 }, { "epoch": 42.6, "learning_rate": 2.8706430245289654e-05, "loss": 2.1201, "step": 8599000 }, { "epoch": 42.6, "learning_rate": 2.870519165886357e-05, "loss": 2.1045, "step": 8599500 }, { "epoch": 42.61, "learning_rate": 2.8703953072437488e-05, "loss": 2.1404, "step": 8600000 }, { "epoch": 42.61, "learning_rate": 2.8702716963184257e-05, "loss": 2.1284, "step": 8600500 }, { "epoch": 42.61, "learning_rate": 2.8701478376758174e-05, "loss": 2.1366, "step": 8601000 }, { "epoch": 42.61, "learning_rate": 2.8700242267504946e-05, "loss": 2.1208, "step": 8601500 }, { "epoch": 42.62, "learning_rate": 2.8699003681078863e-05, "loss": 2.1236, "step": 8602000 }, { "epoch": 42.62, "learning_rate": 2.869776509465278e-05, "loss": 2.1545, "step": 8602500 }, { "epoch": 42.62, "learning_rate": 2.869652650822669e-05, "loss": 2.1495, "step": 8603000 }, { "epoch": 42.62, "learning_rate": 2.8695287921800607e-05, "loss": 2.1263, "step": 8603500 }, { "epoch": 42.63, "learning_rate": 2.8694049335374524e-05, "loss": 2.1208, "step": 8604000 }, { "epoch": 42.63, "learning_rate": 2.869281074894844e-05, "loss": 2.1358, "step": 8604500 }, { "epoch": 42.63, "learning_rate": 2.8691574639695213e-05, "loss": 2.0924, "step": 8605000 }, { "epoch": 42.63, "learning_rate": 2.869033853044198e-05, "loss": 2.135, "step": 8605500 }, { "epoch": 42.64, "learning_rate": 2.8689099944015896e-05, "loss": 2.1454, "step": 8606000 }, { "epoch": 42.64, "learning_rate": 2.8687861357589812e-05, "loss": 2.1146, "step": 8606500 }, { "epoch": 42.64, "learning_rate": 2.868662277116373e-05, "loss": 2.1221, "step": 8607000 }, { "epoch": 42.64, "learning_rate": 2.8685386661910495e-05, "loss": 2.104, "step": 8607500 }, { "epoch": 42.65, "learning_rate": 2.8684148075484412e-05, "loss": 2.1102, "step": 8608000 }, { "epoch": 42.65, "learning_rate": 2.868290948905833e-05, "loss": 2.1518, "step": 8608500 }, { "epoch": 42.65, "learning_rate": 2.8681673379805097e-05, "loss": 2.1543, "step": 8609000 }, { "epoch": 42.65, "learning_rate": 2.8680434793379014e-05, "loss": 2.1305, "step": 8609500 }, { "epoch": 42.66, "learning_rate": 2.8679196206952928e-05, "loss": 2.1472, "step": 8610000 }, { "epoch": 42.66, "learning_rate": 2.8677957620526845e-05, "loss": 2.1341, "step": 8610500 }, { "epoch": 42.66, "learning_rate": 2.8676719034100762e-05, "loss": 2.1334, "step": 8611000 }, { "epoch": 42.66, "learning_rate": 2.867548044767468e-05, "loss": 2.1372, "step": 8611500 }, { "epoch": 42.67, "learning_rate": 2.8674241861248596e-05, "loss": 2.1199, "step": 8612000 }, { "epoch": 42.67, "learning_rate": 2.8673003274822513e-05, "loss": 2.1294, "step": 8612500 }, { "epoch": 42.67, "learning_rate": 2.867176468839643e-05, "loss": 2.1249, "step": 8613000 }, { "epoch": 42.67, "learning_rate": 2.8670526101970346e-05, "loss": 2.1314, "step": 8613500 }, { "epoch": 42.68, "learning_rate": 2.8669289992717112e-05, "loss": 2.127, "step": 8614000 }, { "epoch": 42.68, "learning_rate": 2.866805140629103e-05, "loss": 2.1192, "step": 8614500 }, { "epoch": 42.68, "learning_rate": 2.8666812819864946e-05, "loss": 2.127, "step": 8615000 }, { "epoch": 42.68, "learning_rate": 2.8665574233438863e-05, "loss": 2.1457, "step": 8615500 }, { "epoch": 42.69, "learning_rate": 2.866433564701278e-05, "loss": 2.1282, "step": 8616000 }, { "epoch": 42.69, "learning_rate": 2.8663097060586697e-05, "loss": 2.1231, "step": 8616500 }, { "epoch": 42.69, "learning_rate": 2.8661858474160613e-05, "loss": 2.1313, "step": 8617000 }, { "epoch": 42.69, "learning_rate": 2.866061988773453e-05, "loss": 2.1432, "step": 8617500 }, { "epoch": 42.7, "learning_rate": 2.8659381301308447e-05, "loss": 2.1103, "step": 8618000 }, { "epoch": 42.7, "learning_rate": 2.8658145192055213e-05, "loss": 2.1329, "step": 8618500 }, { "epoch": 42.7, "learning_rate": 2.865690660562913e-05, "loss": 2.1254, "step": 8619000 }, { "epoch": 42.7, "learning_rate": 2.8655668019203047e-05, "loss": 2.1356, "step": 8619500 }, { "epoch": 42.71, "learning_rate": 2.8654429432776963e-05, "loss": 2.1149, "step": 8620000 }, { "epoch": 42.71, "learning_rate": 2.865319084635088e-05, "loss": 2.1152, "step": 8620500 }, { "epoch": 42.71, "learning_rate": 2.8651952259924797e-05, "loss": 2.1128, "step": 8621000 }, { "epoch": 42.71, "learning_rate": 2.8650713673498708e-05, "loss": 2.1439, "step": 8621500 }, { "epoch": 42.72, "learning_rate": 2.8649475087072624e-05, "loss": 2.1147, "step": 8622000 }, { "epoch": 42.72, "learning_rate": 2.864823650064654e-05, "loss": 2.123, "step": 8622500 }, { "epoch": 42.72, "learning_rate": 2.8646997914220458e-05, "loss": 2.1232, "step": 8623000 }, { "epoch": 42.72, "learning_rate": 2.864576180496723e-05, "loss": 2.1005, "step": 8623500 }, { "epoch": 42.73, "learning_rate": 2.8644523218541147e-05, "loss": 2.1005, "step": 8624000 }, { "epoch": 42.73, "learning_rate": 2.8643284632115064e-05, "loss": 2.121, "step": 8624500 }, { "epoch": 42.73, "learning_rate": 2.8642046045688974e-05, "loss": 2.1181, "step": 8625000 }, { "epoch": 42.73, "learning_rate": 2.864080745926289e-05, "loss": 2.1424, "step": 8625500 }, { "epoch": 42.74, "learning_rate": 2.8639571350009664e-05, "loss": 2.1319, "step": 8626000 }, { "epoch": 42.74, "learning_rate": 2.863833276358358e-05, "loss": 2.1825, "step": 8626500 }, { "epoch": 42.74, "learning_rate": 2.8637094177157497e-05, "loss": 2.114, "step": 8627000 }, { "epoch": 42.74, "learning_rate": 2.8635855590731414e-05, "loss": 2.1324, "step": 8627500 }, { "epoch": 42.75, "learning_rate": 2.8634617004305325e-05, "loss": 2.138, "step": 8628000 }, { "epoch": 42.75, "learning_rate": 2.863337841787924e-05, "loss": 2.1419, "step": 8628500 }, { "epoch": 42.75, "learning_rate": 2.863213983145316e-05, "loss": 2.1312, "step": 8629000 }, { "epoch": 42.75, "learning_rate": 2.8630901245027075e-05, "loss": 2.1188, "step": 8629500 }, { "epoch": 42.76, "learning_rate": 2.8629662658600992e-05, "loss": 2.1455, "step": 8630000 }, { "epoch": 42.76, "learning_rate": 2.862842407217491e-05, "loss": 2.1405, "step": 8630500 }, { "epoch": 42.76, "learning_rate": 2.8627185485748826e-05, "loss": 2.118, "step": 8631000 }, { "epoch": 42.76, "learning_rate": 2.8625951853668447e-05, "loss": 2.1245, "step": 8631500 }, { "epoch": 42.77, "learning_rate": 2.8624713267242364e-05, "loss": 2.1091, "step": 8632000 }, { "epoch": 42.77, "learning_rate": 2.862347468081628e-05, "loss": 2.1223, "step": 8632500 }, { "epoch": 42.77, "learning_rate": 2.862223857156305e-05, "loss": 2.1463, "step": 8633000 }, { "epoch": 42.77, "learning_rate": 2.8620999985136966e-05, "loss": 2.1347, "step": 8633500 }, { "epoch": 42.78, "learning_rate": 2.861976139871088e-05, "loss": 2.1087, "step": 8634000 }, { "epoch": 42.78, "learning_rate": 2.8618522812284797e-05, "loss": 2.1411, "step": 8634500 }, { "epoch": 42.78, "learning_rate": 2.8617284225858714e-05, "loss": 2.1289, "step": 8635000 }, { "epoch": 42.78, "learning_rate": 2.861604563943263e-05, "loss": 2.1067, "step": 8635500 }, { "epoch": 42.79, "learning_rate": 2.86148095301794e-05, "loss": 2.1431, "step": 8636000 }, { "epoch": 42.79, "learning_rate": 2.8613570943753316e-05, "loss": 2.1144, "step": 8636500 }, { "epoch": 42.79, "learning_rate": 2.8612332357327233e-05, "loss": 2.1461, "step": 8637000 }, { "epoch": 42.79, "learning_rate": 2.8611093770901147e-05, "loss": 2.1379, "step": 8637500 }, { "epoch": 42.8, "learning_rate": 2.8609855184475064e-05, "loss": 2.151, "step": 8638000 }, { "epoch": 42.8, "learning_rate": 2.860861659804898e-05, "loss": 2.1394, "step": 8638500 }, { "epoch": 42.8, "learning_rate": 2.8607378011622898e-05, "loss": 2.1212, "step": 8639000 }, { "epoch": 42.8, "learning_rate": 2.8606139425196815e-05, "loss": 2.1311, "step": 8639500 }, { "epoch": 42.81, "learning_rate": 2.860490083877073e-05, "loss": 2.1322, "step": 8640000 }, { "epoch": 42.81, "learning_rate": 2.8603662252344642e-05, "loss": 2.1108, "step": 8640500 }, { "epoch": 42.81, "learning_rate": 2.860242366591856e-05, "loss": 2.1548, "step": 8641000 }, { "epoch": 42.81, "learning_rate": 2.860118755666533e-05, "loss": 2.1061, "step": 8641500 }, { "epoch": 42.82, "learning_rate": 2.85999514474121e-05, "loss": 2.1268, "step": 8642000 }, { "epoch": 42.82, "learning_rate": 2.8598712860986017e-05, "loss": 2.1211, "step": 8642500 }, { "epoch": 42.82, "learning_rate": 2.8597474274559934e-05, "loss": 2.1041, "step": 8643000 }, { "epoch": 42.82, "learning_rate": 2.859623568813385e-05, "loss": 2.1365, "step": 8643500 }, { "epoch": 42.83, "learning_rate": 2.8594997101707764e-05, "loss": 2.1567, "step": 8644000 }, { "epoch": 42.83, "learning_rate": 2.859375851528168e-05, "loss": 2.145, "step": 8644500 }, { "epoch": 42.83, "learning_rate": 2.8592519928855598e-05, "loss": 2.1434, "step": 8645000 }, { "epoch": 42.83, "learning_rate": 2.8591281342429515e-05, "loss": 2.1486, "step": 8645500 }, { "epoch": 42.84, "learning_rate": 2.8590042756003432e-05, "loss": 2.129, "step": 8646000 }, { "epoch": 42.84, "learning_rate": 2.8588804169577342e-05, "loss": 2.1337, "step": 8646500 }, { "epoch": 42.84, "learning_rate": 2.858756558315126e-05, "loss": 2.1709, "step": 8647000 }, { "epoch": 42.84, "learning_rate": 2.8586326996725176e-05, "loss": 2.135, "step": 8647500 }, { "epoch": 42.85, "learning_rate": 2.8585090887471948e-05, "loss": 2.131, "step": 8648000 }, { "epoch": 42.85, "learning_rate": 2.8583854778218717e-05, "loss": 2.1352, "step": 8648500 }, { "epoch": 42.85, "learning_rate": 2.8582616191792634e-05, "loss": 2.1444, "step": 8649000 }, { "epoch": 42.85, "learning_rate": 2.858137760536655e-05, "loss": 2.1084, "step": 8649500 }, { "epoch": 42.86, "learning_rate": 2.8580139018940468e-05, "loss": 2.1372, "step": 8650000 }, { "epoch": 42.86, "learning_rate": 2.8578902909687233e-05, "loss": 2.1479, "step": 8650500 }, { "epoch": 42.86, "learning_rate": 2.857766432326115e-05, "loss": 2.1465, "step": 8651000 }, { "epoch": 42.86, "learning_rate": 2.8576425736835067e-05, "loss": 2.1263, "step": 8651500 }, { "epoch": 42.86, "learning_rate": 2.8575187150408984e-05, "loss": 2.1198, "step": 8652000 }, { "epoch": 42.87, "learning_rate": 2.85739485639829e-05, "loss": 2.1579, "step": 8652500 }, { "epoch": 42.87, "learning_rate": 2.8572709977556818e-05, "loss": 2.134, "step": 8653000 }, { "epoch": 42.87, "learning_rate": 2.8571471391130734e-05, "loss": 2.1276, "step": 8653500 }, { "epoch": 42.87, "learning_rate": 2.8570232804704648e-05, "loss": 2.1261, "step": 8654000 }, { "epoch": 42.88, "learning_rate": 2.8568999172624265e-05, "loss": 2.1436, "step": 8654500 }, { "epoch": 42.88, "learning_rate": 2.8567760586198182e-05, "loss": 2.1349, "step": 8655000 }, { "epoch": 42.88, "learning_rate": 2.85665219997721e-05, "loss": 2.1164, "step": 8655500 }, { "epoch": 42.88, "learning_rate": 2.8565283413346016e-05, "loss": 2.1538, "step": 8656000 }, { "epoch": 42.89, "learning_rate": 2.8564044826919933e-05, "loss": 2.1251, "step": 8656500 }, { "epoch": 42.89, "learning_rate": 2.856280624049385e-05, "loss": 2.1434, "step": 8657000 }, { "epoch": 42.89, "learning_rate": 2.8561567654067767e-05, "loss": 2.1499, "step": 8657500 }, { "epoch": 42.89, "learning_rate": 2.8560329067641684e-05, "loss": 2.1255, "step": 8658000 }, { "epoch": 42.9, "learning_rate": 2.85590904812156e-05, "loss": 2.1344, "step": 8658500 }, { "epoch": 42.9, "learning_rate": 2.8557851894789518e-05, "loss": 2.136, "step": 8659000 }, { "epoch": 42.9, "learning_rate": 2.8556613308363435e-05, "loss": 2.1386, "step": 8659500 }, { "epoch": 42.9, "learning_rate": 2.855537472193735e-05, "loss": 2.1304, "step": 8660000 }, { "epoch": 42.91, "learning_rate": 2.855413613551127e-05, "loss": 2.1195, "step": 8660500 }, { "epoch": 42.91, "learning_rate": 2.8552897549085182e-05, "loss": 2.1197, "step": 8661000 }, { "epoch": 42.91, "learning_rate": 2.855166143983195e-05, "loss": 2.1501, "step": 8661500 }, { "epoch": 42.91, "learning_rate": 2.8550422853405868e-05, "loss": 2.1355, "step": 8662000 }, { "epoch": 42.92, "learning_rate": 2.8549184266979785e-05, "loss": 2.1433, "step": 8662500 }, { "epoch": 42.92, "learning_rate": 2.85479456805537e-05, "loss": 2.1488, "step": 8663000 }, { "epoch": 42.92, "learning_rate": 2.854670709412762e-05, "loss": 2.1264, "step": 8663500 }, { "epoch": 42.92, "learning_rate": 2.8545468507701535e-05, "loss": 2.1147, "step": 8664000 }, { "epoch": 42.93, "learning_rate": 2.854422992127545e-05, "loss": 2.1191, "step": 8664500 }, { "epoch": 42.93, "learning_rate": 2.8542993812022218e-05, "loss": 2.1345, "step": 8665000 }, { "epoch": 42.93, "learning_rate": 2.8541755225596135e-05, "loss": 2.1173, "step": 8665500 }, { "epoch": 42.93, "learning_rate": 2.85405191163429e-05, "loss": 2.1262, "step": 8666000 }, { "epoch": 42.94, "learning_rate": 2.8539280529916817e-05, "loss": 2.1277, "step": 8666500 }, { "epoch": 42.94, "learning_rate": 2.8538041943490734e-05, "loss": 2.1479, "step": 8667000 }, { "epoch": 42.94, "learning_rate": 2.853680335706465e-05, "loss": 2.1505, "step": 8667500 }, { "epoch": 42.94, "learning_rate": 2.8535564770638568e-05, "loss": 2.1542, "step": 8668000 }, { "epoch": 42.95, "learning_rate": 2.8534326184212485e-05, "loss": 2.1316, "step": 8668500 }, { "epoch": 42.95, "learning_rate": 2.8533087597786402e-05, "loss": 2.143, "step": 8669000 }, { "epoch": 42.95, "learning_rate": 2.8531851488533167e-05, "loss": 2.119, "step": 8669500 }, { "epoch": 42.95, "learning_rate": 2.8530612902107084e-05, "loss": 2.1266, "step": 8670000 }, { "epoch": 42.96, "learning_rate": 2.8529374315681e-05, "loss": 2.1482, "step": 8670500 }, { "epoch": 42.96, "learning_rate": 2.8528135729254918e-05, "loss": 2.1332, "step": 8671000 }, { "epoch": 42.96, "learning_rate": 2.8526897142828835e-05, "loss": 2.1188, "step": 8671500 }, { "epoch": 42.96, "learning_rate": 2.85256610335756e-05, "loss": 2.1215, "step": 8672000 }, { "epoch": 42.97, "learning_rate": 2.8524422447149517e-05, "loss": 2.1093, "step": 8672500 }, { "epoch": 42.97, "learning_rate": 2.8523183860723434e-05, "loss": 2.1077, "step": 8673000 }, { "epoch": 42.97, "learning_rate": 2.852194527429735e-05, "loss": 2.1513, "step": 8673500 }, { "epoch": 42.97, "learning_rate": 2.8520706687871268e-05, "loss": 2.1571, "step": 8674000 }, { "epoch": 42.98, "learning_rate": 2.8519468101445185e-05, "loss": 2.1293, "step": 8674500 }, { "epoch": 42.98, "learning_rate": 2.8518229515019102e-05, "loss": 2.111, "step": 8675000 }, { "epoch": 42.98, "learning_rate": 2.8516993405765867e-05, "loss": 2.1368, "step": 8675500 }, { "epoch": 42.98, "learning_rate": 2.8515757296512636e-05, "loss": 2.1595, "step": 8676000 }, { "epoch": 42.99, "learning_rate": 2.8514518710086553e-05, "loss": 2.1252, "step": 8676500 }, { "epoch": 42.99, "learning_rate": 2.851328012366047e-05, "loss": 2.1213, "step": 8677000 }, { "epoch": 42.99, "learning_rate": 2.851204649158009e-05, "loss": 2.1239, "step": 8677500 }, { "epoch": 42.99, "learning_rate": 2.8510807905154008e-05, "loss": 2.1409, "step": 8678000 }, { "epoch": 43.0, "learning_rate": 2.8509569318727924e-05, "loss": 2.1415, "step": 8678500 }, { "epoch": 43.0, "learning_rate": 2.850833073230184e-05, "loss": 2.1306, "step": 8679000 }, { "epoch": 43.0, "eval_accuracy": 0.6655919954007268, "eval_accuracy_mlm": 0.623243734234915, "eval_accuracy_nsp": 0.8656411423013112, "eval_loss": 2.30618953704834, "eval_runtime": 147.1655, "eval_samples_per_second": 1732.465, "eval_steps_per_second": 72.191, "step": 8679249 }, { "epoch": 43.0, "learning_rate": 2.850709214587576e-05, "loss": 2.1107, "step": 8679500 }, { "epoch": 43.0, "learning_rate": 2.8505853559449675e-05, "loss": 2.0867, "step": 8680000 }, { "epoch": 43.01, "learning_rate": 2.8504614973023592e-05, "loss": 2.1221, "step": 8680500 }, { "epoch": 43.01, "learning_rate": 2.850337638659751e-05, "loss": 2.123, "step": 8681000 }, { "epoch": 43.01, "learning_rate": 2.850213780017142e-05, "loss": 2.1079, "step": 8681500 }, { "epoch": 43.01, "learning_rate": 2.8500899213745336e-05, "loss": 2.1028, "step": 8682000 }, { "epoch": 43.02, "learning_rate": 2.8499660627319253e-05, "loss": 2.0994, "step": 8682500 }, { "epoch": 43.02, "learning_rate": 2.849842204089317e-05, "loss": 2.0957, "step": 8683000 }, { "epoch": 43.02, "learning_rate": 2.8497183454467087e-05, "loss": 2.0947, "step": 8683500 }, { "epoch": 43.02, "learning_rate": 2.849594734521386e-05, "loss": 2.0858, "step": 8684000 }, { "epoch": 43.03, "learning_rate": 2.8494708758787776e-05, "loss": 2.0903, "step": 8684500 }, { "epoch": 43.03, "learning_rate": 2.8493470172361686e-05, "loss": 2.0908, "step": 8685000 }, { "epoch": 43.03, "learning_rate": 2.8492231585935603e-05, "loss": 2.1289, "step": 8685500 }, { "epoch": 43.03, "learning_rate": 2.849099299950952e-05, "loss": 2.1028, "step": 8686000 }, { "epoch": 43.04, "learning_rate": 2.8489754413083437e-05, "loss": 2.1133, "step": 8686500 }, { "epoch": 43.04, "learning_rate": 2.8488515826657354e-05, "loss": 2.1215, "step": 8687000 }, { "epoch": 43.04, "learning_rate": 2.848727724023127e-05, "loss": 2.0937, "step": 8687500 }, { "epoch": 43.04, "learning_rate": 2.8486038653805184e-05, "loss": 2.1178, "step": 8688000 }, { "epoch": 43.05, "learning_rate": 2.84848000673791e-05, "loss": 2.1084, "step": 8688500 }, { "epoch": 43.05, "learning_rate": 2.848356148095302e-05, "loss": 2.0933, "step": 8689000 }, { "epoch": 43.05, "learning_rate": 2.8482322894526935e-05, "loss": 2.1121, "step": 8689500 }, { "epoch": 43.05, "learning_rate": 2.8481084308100852e-05, "loss": 2.0959, "step": 8690000 }, { "epoch": 43.06, "learning_rate": 2.847984572167477e-05, "loss": 2.083, "step": 8690500 }, { "epoch": 43.06, "learning_rate": 2.8478609612421538e-05, "loss": 2.1036, "step": 8691000 }, { "epoch": 43.06, "learning_rate": 2.847737102599545e-05, "loss": 2.1072, "step": 8691500 }, { "epoch": 43.06, "learning_rate": 2.847613243956937e-05, "loss": 2.0977, "step": 8692000 }, { "epoch": 43.07, "learning_rate": 2.8474893853143285e-05, "loss": 2.1215, "step": 8692500 }, { "epoch": 43.07, "learning_rate": 2.8473655266717202e-05, "loss": 2.0817, "step": 8693000 }, { "epoch": 43.07, "learning_rate": 2.847241668029112e-05, "loss": 2.116, "step": 8693500 }, { "epoch": 43.07, "learning_rate": 2.8471178093865036e-05, "loss": 2.1169, "step": 8694000 }, { "epoch": 43.08, "learning_rate": 2.84699419846118e-05, "loss": 2.0949, "step": 8694500 }, { "epoch": 43.08, "learning_rate": 2.846870339818572e-05, "loss": 2.1083, "step": 8695000 }, { "epoch": 43.08, "learning_rate": 2.8467464811759635e-05, "loss": 2.1042, "step": 8695500 }, { "epoch": 43.08, "learning_rate": 2.8466228702506404e-05, "loss": 2.1103, "step": 8696000 }, { "epoch": 43.09, "learning_rate": 2.846499011608032e-05, "loss": 2.0968, "step": 8696500 }, { "epoch": 43.09, "learning_rate": 2.8463751529654238e-05, "loss": 2.104, "step": 8697000 }, { "epoch": 43.09, "learning_rate": 2.8462512943228155e-05, "loss": 2.1138, "step": 8697500 }, { "epoch": 43.09, "learning_rate": 2.846127683397492e-05, "loss": 2.1136, "step": 8698000 }, { "epoch": 43.1, "learning_rate": 2.8460038247548837e-05, "loss": 2.1161, "step": 8698500 }, { "epoch": 43.1, "learning_rate": 2.8458799661122754e-05, "loss": 2.0974, "step": 8699000 }, { "epoch": 43.1, "learning_rate": 2.845756107469667e-05, "loss": 2.1055, "step": 8699500 }, { "epoch": 43.1, "learning_rate": 2.8456322488270588e-05, "loss": 2.0906, "step": 8700000 }, { "epoch": 43.11, "learning_rate": 2.8455083901844505e-05, "loss": 2.1332, "step": 8700500 }, { "epoch": 43.11, "learning_rate": 2.8453845315418422e-05, "loss": 2.1165, "step": 8701000 }, { "epoch": 43.11, "learning_rate": 2.8452609206165187e-05, "loss": 2.1094, "step": 8701500 }, { "epoch": 43.11, "learning_rate": 2.8451370619739104e-05, "loss": 2.0983, "step": 8702000 }, { "epoch": 43.12, "learning_rate": 2.845013203331302e-05, "loss": 2.1005, "step": 8702500 }, { "epoch": 43.12, "learning_rate": 2.8448895924059793e-05, "loss": 2.1028, "step": 8703000 }, { "epoch": 43.12, "learning_rate": 2.8447657337633704e-05, "loss": 2.0953, "step": 8703500 }, { "epoch": 43.12, "learning_rate": 2.844641875120762e-05, "loss": 2.0915, "step": 8704000 }, { "epoch": 43.13, "learning_rate": 2.8445180164781537e-05, "loss": 2.1101, "step": 8704500 }, { "epoch": 43.13, "learning_rate": 2.8443941578355454e-05, "loss": 2.0977, "step": 8705000 }, { "epoch": 43.13, "learning_rate": 2.8442705469102227e-05, "loss": 2.0997, "step": 8705500 }, { "epoch": 43.13, "learning_rate": 2.8441466882676143e-05, "loss": 2.1118, "step": 8706000 }, { "epoch": 43.14, "learning_rate": 2.8440228296250054e-05, "loss": 2.1198, "step": 8706500 }, { "epoch": 43.14, "learning_rate": 2.843898970982397e-05, "loss": 2.0858, "step": 8707000 }, { "epoch": 43.14, "learning_rate": 2.8437751123397887e-05, "loss": 2.1261, "step": 8707500 }, { "epoch": 43.14, "learning_rate": 2.8436512536971804e-05, "loss": 2.107, "step": 8708000 }, { "epoch": 43.14, "learning_rate": 2.8435276427718577e-05, "loss": 2.0923, "step": 8708500 }, { "epoch": 43.15, "learning_rate": 2.8434037841292494e-05, "loss": 2.0999, "step": 8709000 }, { "epoch": 43.15, "learning_rate": 2.843279925486641e-05, "loss": 2.1207, "step": 8709500 }, { "epoch": 43.15, "learning_rate": 2.843156066844032e-05, "loss": 2.123, "step": 8710000 }, { "epoch": 43.15, "learning_rate": 2.8430322082014238e-05, "loss": 2.1192, "step": 8710500 }, { "epoch": 43.16, "learning_rate": 2.8429083495588154e-05, "loss": 2.1017, "step": 8711000 }, { "epoch": 43.16, "learning_rate": 2.842784490916207e-05, "loss": 2.0896, "step": 8711500 }, { "epoch": 43.16, "learning_rate": 2.842660632273599e-05, "loss": 2.1281, "step": 8712000 }, { "epoch": 43.16, "learning_rate": 2.8425367736309905e-05, "loss": 2.099, "step": 8712500 }, { "epoch": 43.17, "learning_rate": 2.8424129149883822e-05, "loss": 2.1073, "step": 8713000 }, { "epoch": 43.17, "learning_rate": 2.842289056345774e-05, "loss": 2.1012, "step": 8713500 }, { "epoch": 43.17, "learning_rate": 2.8421654454204505e-05, "loss": 2.1032, "step": 8714000 }, { "epoch": 43.17, "learning_rate": 2.842041586777842e-05, "loss": 2.0934, "step": 8714500 }, { "epoch": 43.18, "learning_rate": 2.841917728135234e-05, "loss": 2.1352, "step": 8715000 }, { "epoch": 43.18, "learning_rate": 2.8417938694926255e-05, "loss": 2.1075, "step": 8715500 }, { "epoch": 43.18, "learning_rate": 2.8416700108500172e-05, "loss": 2.0876, "step": 8716000 }, { "epoch": 43.18, "learning_rate": 2.841546152207409e-05, "loss": 2.1082, "step": 8716500 }, { "epoch": 43.19, "learning_rate": 2.8414222935648006e-05, "loss": 2.1093, "step": 8717000 }, { "epoch": 43.19, "learning_rate": 2.8412984349221923e-05, "loss": 2.1274, "step": 8717500 }, { "epoch": 43.19, "learning_rate": 2.841174576279584e-05, "loss": 2.0848, "step": 8718000 }, { "epoch": 43.19, "learning_rate": 2.8410507176369754e-05, "loss": 2.1254, "step": 8718500 }, { "epoch": 43.2, "learning_rate": 2.840926858994367e-05, "loss": 2.0959, "step": 8719000 }, { "epoch": 43.2, "learning_rate": 2.840803248069044e-05, "loss": 2.0939, "step": 8719500 }, { "epoch": 43.2, "learning_rate": 2.8406796371437205e-05, "loss": 2.1351, "step": 8720000 }, { "epoch": 43.2, "learning_rate": 2.840555778501112e-05, "loss": 2.1108, "step": 8720500 }, { "epoch": 43.21, "learning_rate": 2.840431919858504e-05, "loss": 2.1192, "step": 8721000 }, { "epoch": 43.21, "learning_rate": 2.8403080612158955e-05, "loss": 2.101, "step": 8721500 }, { "epoch": 43.21, "learning_rate": 2.8401842025732872e-05, "loss": 2.1058, "step": 8722000 }, { "epoch": 43.21, "learning_rate": 2.840060343930679e-05, "loss": 2.1222, "step": 8722500 }, { "epoch": 43.22, "learning_rate": 2.8399364852880706e-05, "loss": 2.142, "step": 8723000 }, { "epoch": 43.22, "learning_rate": 2.8398126266454623e-05, "loss": 2.0938, "step": 8723500 }, { "epoch": 43.22, "learning_rate": 2.839688768002854e-05, "loss": 2.106, "step": 8724000 }, { "epoch": 43.22, "learning_rate": 2.8395649093602457e-05, "loss": 2.1296, "step": 8724500 }, { "epoch": 43.23, "learning_rate": 2.839441050717637e-05, "loss": 2.0835, "step": 8725000 }, { "epoch": 43.23, "learning_rate": 2.8393171920750287e-05, "loss": 2.1179, "step": 8725500 }, { "epoch": 43.23, "learning_rate": 2.8391933334324204e-05, "loss": 2.1362, "step": 8726000 }, { "epoch": 43.23, "learning_rate": 2.8390697225070973e-05, "loss": 2.1088, "step": 8726500 }, { "epoch": 43.24, "learning_rate": 2.838945863864489e-05, "loss": 2.1019, "step": 8727000 }, { "epoch": 43.24, "learning_rate": 2.8388220052218807e-05, "loss": 2.1193, "step": 8727500 }, { "epoch": 43.24, "learning_rate": 2.8386981465792724e-05, "loss": 2.0857, "step": 8728000 }, { "epoch": 43.24, "learning_rate": 2.838574535653949e-05, "loss": 2.1247, "step": 8728500 }, { "epoch": 43.25, "learning_rate": 2.8384509247286255e-05, "loss": 2.1272, "step": 8729000 }, { "epoch": 43.25, "learning_rate": 2.8383270660860172e-05, "loss": 2.1252, "step": 8729500 }, { "epoch": 43.25, "learning_rate": 2.838203207443409e-05, "loss": 2.1155, "step": 8730000 }, { "epoch": 43.25, "learning_rate": 2.8380793488008006e-05, "loss": 2.1068, "step": 8730500 }, { "epoch": 43.26, "learning_rate": 2.837955985592763e-05, "loss": 2.1051, "step": 8731000 }, { "epoch": 43.26, "learning_rate": 2.8378321269501547e-05, "loss": 2.1113, "step": 8731500 }, { "epoch": 43.26, "learning_rate": 2.8377082683075464e-05, "loss": 2.1382, "step": 8732000 }, { "epoch": 43.26, "learning_rate": 2.837584409664938e-05, "loss": 2.1073, "step": 8732500 }, { "epoch": 43.27, "learning_rate": 2.8374605510223294e-05, "loss": 2.1024, "step": 8733000 }, { "epoch": 43.27, "learning_rate": 2.837336692379721e-05, "loss": 2.1154, "step": 8733500 }, { "epoch": 43.27, "learning_rate": 2.8372128337371128e-05, "loss": 2.101, "step": 8734000 }, { "epoch": 43.27, "learning_rate": 2.8370889750945045e-05, "loss": 2.1325, "step": 8734500 }, { "epoch": 43.28, "learning_rate": 2.8369651164518955e-05, "loss": 2.0983, "step": 8735000 }, { "epoch": 43.28, "learning_rate": 2.8368412578092872e-05, "loss": 2.0827, "step": 8735500 }, { "epoch": 43.28, "learning_rate": 2.836717399166679e-05, "loss": 2.1127, "step": 8736000 }, { "epoch": 43.28, "learning_rate": 2.8365935405240706e-05, "loss": 2.13, "step": 8736500 }, { "epoch": 43.29, "learning_rate": 2.8364696818814623e-05, "loss": 2.1073, "step": 8737000 }, { "epoch": 43.29, "learning_rate": 2.8363460709561395e-05, "loss": 2.1082, "step": 8737500 }, { "epoch": 43.29, "learning_rate": 2.836222212313531e-05, "loss": 2.1155, "step": 8738000 }, { "epoch": 43.29, "learning_rate": 2.836098601388208e-05, "loss": 2.1179, "step": 8738500 }, { "epoch": 43.3, "learning_rate": 2.8359749904628846e-05, "loss": 2.1095, "step": 8739000 }, { "epoch": 43.3, "learning_rate": 2.8358511318202763e-05, "loss": 2.1127, "step": 8739500 }, { "epoch": 43.3, "learning_rate": 2.835727520894953e-05, "loss": 2.1142, "step": 8740000 }, { "epoch": 43.3, "learning_rate": 2.8356036622523445e-05, "loss": 2.1295, "step": 8740500 }, { "epoch": 43.31, "learning_rate": 2.8354798036097362e-05, "loss": 2.1051, "step": 8741000 }, { "epoch": 43.31, "learning_rate": 2.835355944967128e-05, "loss": 2.1235, "step": 8741500 }, { "epoch": 43.31, "learning_rate": 2.8352320863245196e-05, "loss": 2.0949, "step": 8742000 }, { "epoch": 43.31, "learning_rate": 2.8351082276819113e-05, "loss": 2.1109, "step": 8742500 }, { "epoch": 43.32, "learning_rate": 2.834984369039303e-05, "loss": 2.0954, "step": 8743000 }, { "epoch": 43.32, "learning_rate": 2.8348605103966947e-05, "loss": 2.1022, "step": 8743500 }, { "epoch": 43.32, "learning_rate": 2.8347366517540864e-05, "loss": 2.1102, "step": 8744000 }, { "epoch": 43.32, "learning_rate": 2.834612793111478e-05, "loss": 2.1381, "step": 8744500 }, { "epoch": 43.33, "learning_rate": 2.8344889344688698e-05, "loss": 2.1247, "step": 8745000 }, { "epoch": 43.33, "learning_rate": 2.8343650758262615e-05, "loss": 2.1257, "step": 8745500 }, { "epoch": 43.33, "learning_rate": 2.834241217183653e-05, "loss": 2.1104, "step": 8746000 }, { "epoch": 43.33, "learning_rate": 2.8341176062583297e-05, "loss": 2.0999, "step": 8746500 }, { "epoch": 43.34, "learning_rate": 2.8339937476157214e-05, "loss": 2.1183, "step": 8747000 }, { "epoch": 43.34, "learning_rate": 2.833869888973113e-05, "loss": 2.1062, "step": 8747500 }, { "epoch": 43.34, "learning_rate": 2.8337462780477896e-05, "loss": 2.0867, "step": 8748000 }, { "epoch": 43.34, "learning_rate": 2.8336226671224665e-05, "loss": 2.1186, "step": 8748500 }, { "epoch": 43.35, "learning_rate": 2.8334988084798582e-05, "loss": 2.1264, "step": 8749000 }, { "epoch": 43.35, "learning_rate": 2.8333749498372496e-05, "loss": 2.1278, "step": 8749500 }, { "epoch": 43.35, "learning_rate": 2.8332510911946412e-05, "loss": 2.1108, "step": 8750000 }, { "epoch": 43.35, "learning_rate": 2.833127232552033e-05, "loss": 2.1346, "step": 8750500 }, { "epoch": 43.36, "learning_rate": 2.8330033739094246e-05, "loss": 2.1237, "step": 8751000 }, { "epoch": 43.36, "learning_rate": 2.8328795152668163e-05, "loss": 2.1191, "step": 8751500 }, { "epoch": 43.36, "learning_rate": 2.8327559043414932e-05, "loss": 2.131, "step": 8752000 }, { "epoch": 43.36, "learning_rate": 2.832632045698885e-05, "loss": 2.1324, "step": 8752500 }, { "epoch": 43.37, "learning_rate": 2.8325081870562763e-05, "loss": 2.1187, "step": 8753000 }, { "epoch": 43.37, "learning_rate": 2.832384328413668e-05, "loss": 2.123, "step": 8753500 }, { "epoch": 43.37, "learning_rate": 2.8322604697710596e-05, "loss": 2.1216, "step": 8754000 }, { "epoch": 43.37, "learning_rate": 2.8321366111284513e-05, "loss": 2.0982, "step": 8754500 }, { "epoch": 43.38, "learning_rate": 2.832012752485843e-05, "loss": 2.1309, "step": 8755000 }, { "epoch": 43.38, "learning_rate": 2.83188914156052e-05, "loss": 2.1336, "step": 8755500 }, { "epoch": 43.38, "learning_rate": 2.8317652829179116e-05, "loss": 2.1392, "step": 8756000 }, { "epoch": 43.38, "learning_rate": 2.831641424275303e-05, "loss": 2.1307, "step": 8756500 }, { "epoch": 43.39, "learning_rate": 2.8315178133499805e-05, "loss": 2.1147, "step": 8757000 }, { "epoch": 43.39, "learning_rate": 2.831394202424657e-05, "loss": 2.1303, "step": 8757500 }, { "epoch": 43.39, "learning_rate": 2.8312703437820487e-05, "loss": 2.1162, "step": 8758000 }, { "epoch": 43.39, "learning_rate": 2.8311464851394404e-05, "loss": 2.1162, "step": 8758500 }, { "epoch": 43.4, "learning_rate": 2.831022626496832e-05, "loss": 2.1109, "step": 8759000 }, { "epoch": 43.4, "learning_rate": 2.8308987678542238e-05, "loss": 2.1143, "step": 8759500 }, { "epoch": 43.4, "learning_rate": 2.8307749092116155e-05, "loss": 2.1373, "step": 8760000 }, { "epoch": 43.4, "learning_rate": 2.8306510505690065e-05, "loss": 2.1146, "step": 8760500 }, { "epoch": 43.41, "learning_rate": 2.8305271919263982e-05, "loss": 2.1277, "step": 8761000 }, { "epoch": 43.41, "learning_rate": 2.83040333328379e-05, "loss": 2.1247, "step": 8761500 }, { "epoch": 43.41, "learning_rate": 2.8302794746411816e-05, "loss": 2.1268, "step": 8762000 }, { "epoch": 43.41, "learning_rate": 2.8301556159985733e-05, "loss": 2.1378, "step": 8762500 }, { "epoch": 43.41, "learning_rate": 2.8300317573559647e-05, "loss": 2.0996, "step": 8763000 }, { "epoch": 43.42, "learning_rate": 2.8299078987133563e-05, "loss": 2.1368, "step": 8763500 }, { "epoch": 43.42, "learning_rate": 2.829784040070748e-05, "loss": 2.1326, "step": 8764000 }, { "epoch": 43.42, "learning_rate": 2.8296601814281397e-05, "loss": 2.1405, "step": 8764500 }, { "epoch": 43.42, "learning_rate": 2.8295363227855314e-05, "loss": 2.0942, "step": 8765000 }, { "epoch": 43.43, "learning_rate": 2.829412464142923e-05, "loss": 2.1299, "step": 8765500 }, { "epoch": 43.43, "learning_rate": 2.8292886055003148e-05, "loss": 2.1189, "step": 8766000 }, { "epoch": 43.43, "learning_rate": 2.8291647468577065e-05, "loss": 2.1303, "step": 8766500 }, { "epoch": 43.43, "learning_rate": 2.8290408882150982e-05, "loss": 2.1085, "step": 8767000 }, { "epoch": 43.44, "learning_rate": 2.8289172772897747e-05, "loss": 2.0894, "step": 8767500 }, { "epoch": 43.44, "learning_rate": 2.828793914081737e-05, "loss": 2.1231, "step": 8768000 }, { "epoch": 43.44, "learning_rate": 2.828670055439129e-05, "loss": 2.1233, "step": 8768500 }, { "epoch": 43.44, "learning_rate": 2.8285461967965205e-05, "loss": 2.1221, "step": 8769000 }, { "epoch": 43.45, "learning_rate": 2.828422585871197e-05, "loss": 2.112, "step": 8769500 }, { "epoch": 43.45, "learning_rate": 2.8282987272285888e-05, "loss": 2.1156, "step": 8770000 }, { "epoch": 43.45, "learning_rate": 2.8281748685859805e-05, "loss": 2.1119, "step": 8770500 }, { "epoch": 43.45, "learning_rate": 2.828051009943372e-05, "loss": 2.1314, "step": 8771000 }, { "epoch": 43.46, "learning_rate": 2.827927151300764e-05, "loss": 2.1069, "step": 8771500 }, { "epoch": 43.46, "learning_rate": 2.8278032926581555e-05, "loss": 2.1145, "step": 8772000 }, { "epoch": 43.46, "learning_rate": 2.8276794340155472e-05, "loss": 2.1268, "step": 8772500 }, { "epoch": 43.46, "learning_rate": 2.8275558230902238e-05, "loss": 2.1001, "step": 8773000 }, { "epoch": 43.47, "learning_rate": 2.8274319644476155e-05, "loss": 2.1178, "step": 8773500 }, { "epoch": 43.47, "learning_rate": 2.827308105805007e-05, "loss": 2.1348, "step": 8774000 }, { "epoch": 43.47, "learning_rate": 2.827184247162399e-05, "loss": 2.1359, "step": 8774500 }, { "epoch": 43.47, "learning_rate": 2.8270603885197905e-05, "loss": 2.0966, "step": 8775000 }, { "epoch": 43.48, "learning_rate": 2.8269365298771822e-05, "loss": 2.1105, "step": 8775500 }, { "epoch": 43.48, "learning_rate": 2.8268126712345733e-05, "loss": 2.1308, "step": 8776000 }, { "epoch": 43.48, "learning_rate": 2.826688812591965e-05, "loss": 2.1004, "step": 8776500 }, { "epoch": 43.48, "learning_rate": 2.8265649539493566e-05, "loss": 2.1251, "step": 8777000 }, { "epoch": 43.49, "learning_rate": 2.8264410953067483e-05, "loss": 2.0989, "step": 8777500 }, { "epoch": 43.49, "learning_rate": 2.82631723666414e-05, "loss": 2.121, "step": 8778000 }, { "epoch": 43.49, "learning_rate": 2.8261933780215317e-05, "loss": 2.1277, "step": 8778500 }, { "epoch": 43.49, "learning_rate": 2.8260695193789234e-05, "loss": 2.1377, "step": 8779000 }, { "epoch": 43.5, "learning_rate": 2.825945660736315e-05, "loss": 2.1043, "step": 8779500 }, { "epoch": 43.5, "learning_rate": 2.8258218020937065e-05, "loss": 2.1091, "step": 8780000 }, { "epoch": 43.5, "learning_rate": 2.8256981911683833e-05, "loss": 2.1088, "step": 8780500 }, { "epoch": 43.5, "learning_rate": 2.8255745802430606e-05, "loss": 2.1155, "step": 8781000 }, { "epoch": 43.51, "learning_rate": 2.8254507216004523e-05, "loss": 2.111, "step": 8781500 }, { "epoch": 43.51, "learning_rate": 2.825326862957844e-05, "loss": 2.115, "step": 8782000 }, { "epoch": 43.51, "learning_rate": 2.825203004315235e-05, "loss": 2.131, "step": 8782500 }, { "epoch": 43.51, "learning_rate": 2.8250791456726267e-05, "loss": 2.0964, "step": 8783000 }, { "epoch": 43.52, "learning_rate": 2.824955534747304e-05, "loss": 2.1355, "step": 8783500 }, { "epoch": 43.52, "learning_rate": 2.8248316761046956e-05, "loss": 2.1345, "step": 8784000 }, { "epoch": 43.52, "learning_rate": 2.8247078174620873e-05, "loss": 2.1252, "step": 8784500 }, { "epoch": 43.52, "learning_rate": 2.824583958819479e-05, "loss": 2.1398, "step": 8785000 }, { "epoch": 43.53, "learning_rate": 2.82446010017687e-05, "loss": 2.1106, "step": 8785500 }, { "epoch": 43.53, "learning_rate": 2.8243362415342617e-05, "loss": 2.1239, "step": 8786000 }, { "epoch": 43.53, "learning_rate": 2.8242123828916534e-05, "loss": 2.1328, "step": 8786500 }, { "epoch": 43.53, "learning_rate": 2.824088524249045e-05, "loss": 2.1385, "step": 8787000 }, { "epoch": 43.54, "learning_rate": 2.8239649133237223e-05, "loss": 2.105, "step": 8787500 }, { "epoch": 43.54, "learning_rate": 2.823841054681114e-05, "loss": 2.1009, "step": 8788000 }, { "epoch": 43.54, "learning_rate": 2.823717196038505e-05, "loss": 2.1018, "step": 8788500 }, { "epoch": 43.54, "learning_rate": 2.8235933373958967e-05, "loss": 2.1246, "step": 8789000 }, { "epoch": 43.55, "learning_rate": 2.8234694787532884e-05, "loss": 2.1262, "step": 8789500 }, { "epoch": 43.55, "learning_rate": 2.82334562011068e-05, "loss": 2.1381, "step": 8790000 }, { "epoch": 43.55, "learning_rate": 2.8232217614680717e-05, "loss": 2.1131, "step": 8790500 }, { "epoch": 43.55, "learning_rate": 2.8230979028254634e-05, "loss": 2.117, "step": 8791000 }, { "epoch": 43.56, "learning_rate": 2.822974044182855e-05, "loss": 2.0885, "step": 8791500 }, { "epoch": 43.56, "learning_rate": 2.8228501855402468e-05, "loss": 2.12, "step": 8792000 }, { "epoch": 43.56, "learning_rate": 2.8227263268976385e-05, "loss": 2.1108, "step": 8792500 }, { "epoch": 43.56, "learning_rate": 2.8226024682550302e-05, "loss": 2.1109, "step": 8793000 }, { "epoch": 43.57, "learning_rate": 2.8224786096124216e-05, "loss": 2.1024, "step": 8793500 }, { "epoch": 43.57, "learning_rate": 2.8223549986870984e-05, "loss": 2.1193, "step": 8794000 }, { "epoch": 43.57, "learning_rate": 2.82223114004449e-05, "loss": 2.1115, "step": 8794500 }, { "epoch": 43.57, "learning_rate": 2.8221072814018818e-05, "loss": 2.0942, "step": 8795000 }, { "epoch": 43.58, "learning_rate": 2.8219834227592735e-05, "loss": 2.1081, "step": 8795500 }, { "epoch": 43.58, "learning_rate": 2.8218595641166652e-05, "loss": 2.1105, "step": 8796000 }, { "epoch": 43.58, "learning_rate": 2.8217359531913418e-05, "loss": 2.1334, "step": 8796500 }, { "epoch": 43.58, "learning_rate": 2.8216120945487334e-05, "loss": 2.119, "step": 8797000 }, { "epoch": 43.59, "learning_rate": 2.821488235906125e-05, "loss": 2.1098, "step": 8797500 }, { "epoch": 43.59, "learning_rate": 2.821364377263517e-05, "loss": 2.1066, "step": 8798000 }, { "epoch": 43.59, "learning_rate": 2.8212405186209085e-05, "loss": 2.1088, "step": 8798500 }, { "epoch": 43.59, "learning_rate": 2.821116907695585e-05, "loss": 2.1299, "step": 8799000 }, { "epoch": 43.6, "learning_rate": 2.8209930490529768e-05, "loss": 2.1203, "step": 8799500 }, { "epoch": 43.6, "learning_rate": 2.8208691904103685e-05, "loss": 2.1143, "step": 8800000 }, { "epoch": 43.6, "learning_rate": 2.82074533176776e-05, "loss": 2.1093, "step": 8800500 }, { "epoch": 43.6, "learning_rate": 2.8206217208424367e-05, "loss": 2.1253, "step": 8801000 }, { "epoch": 43.61, "learning_rate": 2.820498109917114e-05, "loss": 2.1242, "step": 8801500 }, { "epoch": 43.61, "learning_rate": 2.8203744989917908e-05, "loss": 2.1098, "step": 8802000 }, { "epoch": 43.61, "learning_rate": 2.8202506403491825e-05, "loss": 2.1012, "step": 8802500 }, { "epoch": 43.61, "learning_rate": 2.8201267817065742e-05, "loss": 2.1088, "step": 8803000 }, { "epoch": 43.62, "learning_rate": 2.820002923063966e-05, "loss": 2.1408, "step": 8803500 }, { "epoch": 43.62, "learning_rate": 2.8198790644213576e-05, "loss": 2.1383, "step": 8804000 }, { "epoch": 43.62, "learning_rate": 2.8197557012133196e-05, "loss": 2.1175, "step": 8804500 }, { "epoch": 43.62, "learning_rate": 2.8196320902879965e-05, "loss": 2.1248, "step": 8805000 }, { "epoch": 43.63, "learning_rate": 2.819508231645388e-05, "loss": 2.1128, "step": 8805500 }, { "epoch": 43.63, "learning_rate": 2.8193843730027796e-05, "loss": 2.1234, "step": 8806000 }, { "epoch": 43.63, "learning_rate": 2.8192605143601713e-05, "loss": 2.1089, "step": 8806500 }, { "epoch": 43.63, "learning_rate": 2.819136655717563e-05, "loss": 2.1293, "step": 8807000 }, { "epoch": 43.64, "learning_rate": 2.8190127970749546e-05, "loss": 2.1184, "step": 8807500 }, { "epoch": 43.64, "learning_rate": 2.8188891861496315e-05, "loss": 2.1245, "step": 8808000 }, { "epoch": 43.64, "learning_rate": 2.8187653275070232e-05, "loss": 2.1373, "step": 8808500 }, { "epoch": 43.64, "learning_rate": 2.8186414688644146e-05, "loss": 2.1206, "step": 8809000 }, { "epoch": 43.65, "learning_rate": 2.8185176102218063e-05, "loss": 2.1415, "step": 8809500 }, { "epoch": 43.65, "learning_rate": 2.818393999296483e-05, "loss": 2.1149, "step": 8810000 }, { "epoch": 43.65, "learning_rate": 2.8182701406538748e-05, "loss": 2.1457, "step": 8810500 }, { "epoch": 43.65, "learning_rate": 2.8181462820112665e-05, "loss": 2.1173, "step": 8811000 }, { "epoch": 43.66, "learning_rate": 2.8180224233686582e-05, "loss": 2.1302, "step": 8811500 }, { "epoch": 43.66, "learning_rate": 2.8178985647260496e-05, "loss": 2.1259, "step": 8812000 }, { "epoch": 43.66, "learning_rate": 2.8177747060834413e-05, "loss": 2.1096, "step": 8812500 }, { "epoch": 43.66, "learning_rate": 2.817650847440833e-05, "loss": 2.1309, "step": 8813000 }, { "epoch": 43.67, "learning_rate": 2.8175269887982246e-05, "loss": 2.1177, "step": 8813500 }, { "epoch": 43.67, "learning_rate": 2.8174031301556163e-05, "loss": 2.1702, "step": 8814000 }, { "epoch": 43.67, "learning_rate": 2.8172792715130074e-05, "loss": 2.1165, "step": 8814500 }, { "epoch": 43.67, "learning_rate": 2.817155412870399e-05, "loss": 2.1257, "step": 8815000 }, { "epoch": 43.68, "learning_rate": 2.8170315542277907e-05, "loss": 2.1002, "step": 8815500 }, { "epoch": 43.68, "learning_rate": 2.8169076955851824e-05, "loss": 2.1275, "step": 8816000 }, { "epoch": 43.68, "learning_rate": 2.816783836942574e-05, "loss": 2.1112, "step": 8816500 }, { "epoch": 43.68, "learning_rate": 2.8166599782999658e-05, "loss": 2.1238, "step": 8817000 }, { "epoch": 43.68, "learning_rate": 2.8165361196573575e-05, "loss": 2.1407, "step": 8817500 }, { "epoch": 43.69, "learning_rate": 2.8164122610147492e-05, "loss": 2.1057, "step": 8818000 }, { "epoch": 43.69, "learning_rate": 2.816288402372141e-05, "loss": 2.0883, "step": 8818500 }, { "epoch": 43.69, "learning_rate": 2.8161647914468174e-05, "loss": 2.1141, "step": 8819000 }, { "epoch": 43.69, "learning_rate": 2.816040932804209e-05, "loss": 2.1209, "step": 8819500 }, { "epoch": 43.7, "learning_rate": 2.8159170741616008e-05, "loss": 2.1243, "step": 8820000 }, { "epoch": 43.7, "learning_rate": 2.8157932155189925e-05, "loss": 2.1042, "step": 8820500 }, { "epoch": 43.7, "learning_rate": 2.8156693568763842e-05, "loss": 2.1136, "step": 8821000 }, { "epoch": 43.7, "learning_rate": 2.815545498233776e-05, "loss": 2.1345, "step": 8821500 }, { "epoch": 43.71, "learning_rate": 2.8154216395911676e-05, "loss": 2.1465, "step": 8822000 }, { "epoch": 43.71, "learning_rate": 2.8152977809485593e-05, "loss": 2.1325, "step": 8822500 }, { "epoch": 43.71, "learning_rate": 2.815174170023236e-05, "loss": 2.1196, "step": 8823000 }, { "epoch": 43.71, "learning_rate": 2.8150503113806275e-05, "loss": 2.1262, "step": 8823500 }, { "epoch": 43.72, "learning_rate": 2.8149264527380192e-05, "loss": 2.125, "step": 8824000 }, { "epoch": 43.72, "learning_rate": 2.814802594095411e-05, "loss": 2.1409, "step": 8824500 }, { "epoch": 43.72, "learning_rate": 2.8146787354528026e-05, "loss": 2.1335, "step": 8825000 }, { "epoch": 43.72, "learning_rate": 2.814555372244765e-05, "loss": 2.1406, "step": 8825500 }, { "epoch": 43.73, "learning_rate": 2.8144315136021564e-05, "loss": 2.1329, "step": 8826000 }, { "epoch": 43.73, "learning_rate": 2.814307654959548e-05, "loss": 2.0975, "step": 8826500 }, { "epoch": 43.73, "learning_rate": 2.8141837963169394e-05, "loss": 2.1122, "step": 8827000 }, { "epoch": 43.73, "learning_rate": 2.814059937674331e-05, "loss": 2.1262, "step": 8827500 }, { "epoch": 43.74, "learning_rate": 2.8139360790317225e-05, "loss": 2.1228, "step": 8828000 }, { "epoch": 43.74, "learning_rate": 2.813812220389114e-05, "loss": 2.1297, "step": 8828500 }, { "epoch": 43.74, "learning_rate": 2.813688361746506e-05, "loss": 2.1271, "step": 8829000 }, { "epoch": 43.74, "learning_rate": 2.8135645031038975e-05, "loss": 2.1328, "step": 8829500 }, { "epoch": 43.75, "learning_rate": 2.8134408921785744e-05, "loss": 2.1116, "step": 8830000 }, { "epoch": 43.75, "learning_rate": 2.8133172812532516e-05, "loss": 2.121, "step": 8830500 }, { "epoch": 43.75, "learning_rate": 2.8131934226106433e-05, "loss": 2.1313, "step": 8831000 }, { "epoch": 43.75, "learning_rate": 2.813069563968035e-05, "loss": 2.1402, "step": 8831500 }, { "epoch": 43.76, "learning_rate": 2.8129457053254267e-05, "loss": 2.1069, "step": 8832000 }, { "epoch": 43.76, "learning_rate": 2.812821846682818e-05, "loss": 2.1509, "step": 8832500 }, { "epoch": 43.76, "learning_rate": 2.8126979880402094e-05, "loss": 2.1139, "step": 8833000 }, { "epoch": 43.76, "learning_rate": 2.812574129397601e-05, "loss": 2.0996, "step": 8833500 }, { "epoch": 43.77, "learning_rate": 2.8124502707549928e-05, "loss": 2.1165, "step": 8834000 }, { "epoch": 43.77, "learning_rate": 2.8123264121123845e-05, "loss": 2.1177, "step": 8834500 }, { "epoch": 43.77, "learning_rate": 2.812202553469776e-05, "loss": 2.1377, "step": 8835000 }, { "epoch": 43.77, "learning_rate": 2.8120786948271676e-05, "loss": 2.0974, "step": 8835500 }, { "epoch": 43.78, "learning_rate": 2.8119548361845592e-05, "loss": 2.1173, "step": 8836000 }, { "epoch": 43.78, "learning_rate": 2.811830977541951e-05, "loss": 2.11, "step": 8836500 }, { "epoch": 43.78, "learning_rate": 2.8117071188993426e-05, "loss": 2.1149, "step": 8837000 }, { "epoch": 43.78, "learning_rate": 2.8115832602567343e-05, "loss": 2.1418, "step": 8837500 }, { "epoch": 43.79, "learning_rate": 2.811459401614126e-05, "loss": 2.1078, "step": 8838000 }, { "epoch": 43.79, "learning_rate": 2.8113355429715177e-05, "loss": 2.1058, "step": 8838500 }, { "epoch": 43.79, "learning_rate": 2.8112119320461942e-05, "loss": 2.1087, "step": 8839000 }, { "epoch": 43.79, "learning_rate": 2.811088073403586e-05, "loss": 2.1172, "step": 8839500 }, { "epoch": 43.8, "learning_rate": 2.8109644624782628e-05, "loss": 2.1135, "step": 8840000 }, { "epoch": 43.8, "learning_rate": 2.8108406038356545e-05, "loss": 2.126, "step": 8840500 }, { "epoch": 43.8, "learning_rate": 2.8107169929103317e-05, "loss": 2.1236, "step": 8841000 }, { "epoch": 43.8, "learning_rate": 2.8105931342677234e-05, "loss": 2.1356, "step": 8841500 }, { "epoch": 43.81, "learning_rate": 2.810469275625115e-05, "loss": 2.1353, "step": 8842000 }, { "epoch": 43.81, "learning_rate": 2.8103456646997917e-05, "loss": 2.1111, "step": 8842500 }, { "epoch": 43.81, "learning_rate": 2.8102218060571834e-05, "loss": 2.1165, "step": 8843000 }, { "epoch": 43.81, "learning_rate": 2.810097947414575e-05, "loss": 2.1115, "step": 8843500 }, { "epoch": 43.82, "learning_rate": 2.8099743364892516e-05, "loss": 2.1156, "step": 8844000 }, { "epoch": 43.82, "learning_rate": 2.8098504778466433e-05, "loss": 2.1067, "step": 8844500 }, { "epoch": 43.82, "learning_rate": 2.809726619204035e-05, "loss": 2.1218, "step": 8845000 }, { "epoch": 43.82, "learning_rate": 2.8096027605614267e-05, "loss": 2.0814, "step": 8845500 }, { "epoch": 43.83, "learning_rate": 2.8094789019188184e-05, "loss": 2.1264, "step": 8846000 }, { "epoch": 43.83, "learning_rate": 2.80935504327621e-05, "loss": 2.1089, "step": 8846500 }, { "epoch": 43.83, "learning_rate": 2.8092311846336017e-05, "loss": 2.1187, "step": 8847000 }, { "epoch": 43.83, "learning_rate": 2.8091073259909934e-05, "loss": 2.1135, "step": 8847500 }, { "epoch": 43.84, "learning_rate": 2.808983467348385e-05, "loss": 2.1256, "step": 8848000 }, { "epoch": 43.84, "learning_rate": 2.808859608705776e-05, "loss": 2.1309, "step": 8848500 }, { "epoch": 43.84, "learning_rate": 2.8087359977804534e-05, "loss": 2.0909, "step": 8849000 }, { "epoch": 43.84, "learning_rate": 2.80861238685513e-05, "loss": 2.1035, "step": 8849500 }, { "epoch": 43.85, "learning_rate": 2.8084885282125216e-05, "loss": 2.1215, "step": 8850000 }, { "epoch": 43.85, "learning_rate": 2.8083649172871985e-05, "loss": 2.1383, "step": 8850500 }, { "epoch": 43.85, "learning_rate": 2.8082410586445902e-05, "loss": 2.117, "step": 8851000 }, { "epoch": 43.85, "learning_rate": 2.808117200001982e-05, "loss": 2.1252, "step": 8851500 }, { "epoch": 43.86, "learning_rate": 2.8079933413593736e-05, "loss": 2.14, "step": 8852000 }, { "epoch": 43.86, "learning_rate": 2.8078694827167653e-05, "loss": 2.1174, "step": 8852500 }, { "epoch": 43.86, "learning_rate": 2.8077456240741566e-05, "loss": 2.1335, "step": 8853000 }, { "epoch": 43.86, "learning_rate": 2.8076217654315483e-05, "loss": 2.1185, "step": 8853500 }, { "epoch": 43.87, "learning_rate": 2.80749790678894e-05, "loss": 2.1093, "step": 8854000 }, { "epoch": 43.87, "learning_rate": 2.8073740481463317e-05, "loss": 2.1202, "step": 8854500 }, { "epoch": 43.87, "learning_rate": 2.8072501895037234e-05, "loss": 2.1174, "step": 8855000 }, { "epoch": 43.87, "learning_rate": 2.807126330861115e-05, "loss": 2.115, "step": 8855500 }, { "epoch": 43.88, "learning_rate": 2.8070027199357916e-05, "loss": 2.1183, "step": 8856000 }, { "epoch": 43.88, "learning_rate": 2.8068788612931833e-05, "loss": 2.1195, "step": 8856500 }, { "epoch": 43.88, "learning_rate": 2.806755002650575e-05, "loss": 2.1094, "step": 8857000 }, { "epoch": 43.88, "learning_rate": 2.8066311440079667e-05, "loss": 2.1056, "step": 8857500 }, { "epoch": 43.89, "learning_rate": 2.8065072853653584e-05, "loss": 2.1312, "step": 8858000 }, { "epoch": 43.89, "learning_rate": 2.80638342672275e-05, "loss": 2.1449, "step": 8858500 }, { "epoch": 43.89, "learning_rate": 2.8062595680801418e-05, "loss": 2.1343, "step": 8859000 }, { "epoch": 43.89, "learning_rate": 2.8061357094375335e-05, "loss": 2.1093, "step": 8859500 }, { "epoch": 43.9, "learning_rate": 2.806011850794925e-05, "loss": 2.1058, "step": 8860000 }, { "epoch": 43.9, "learning_rate": 2.805887992152317e-05, "loss": 2.1467, "step": 8860500 }, { "epoch": 43.9, "learning_rate": 2.805764133509708e-05, "loss": 2.1084, "step": 8861000 }, { "epoch": 43.9, "learning_rate": 2.8056402748670996e-05, "loss": 2.0907, "step": 8861500 }, { "epoch": 43.91, "learning_rate": 2.8055164162244913e-05, "loss": 2.1291, "step": 8862000 }, { "epoch": 43.91, "learning_rate": 2.805392557581883e-05, "loss": 2.0913, "step": 8862500 }, { "epoch": 43.91, "learning_rate": 2.8052686989392746e-05, "loss": 2.1247, "step": 8863000 }, { "epoch": 43.91, "learning_rate": 2.8051448402966663e-05, "loss": 2.1092, "step": 8863500 }, { "epoch": 43.92, "learning_rate": 2.805020981654058e-05, "loss": 2.1302, "step": 8864000 }, { "epoch": 43.92, "learning_rate": 2.8048971230114497e-05, "loss": 2.1172, "step": 8864500 }, { "epoch": 43.92, "learning_rate": 2.8047737598034118e-05, "loss": 2.1171, "step": 8865000 }, { "epoch": 43.92, "learning_rate": 2.8046499011608035e-05, "loss": 2.1134, "step": 8865500 }, { "epoch": 43.93, "learning_rate": 2.8045260425181952e-05, "loss": 2.1403, "step": 8866000 }, { "epoch": 43.93, "learning_rate": 2.804402183875587e-05, "loss": 2.1179, "step": 8866500 }, { "epoch": 43.93, "learning_rate": 2.8042783252329786e-05, "loss": 2.1209, "step": 8867000 }, { "epoch": 43.93, "learning_rate": 2.8041549620249403e-05, "loss": 2.1357, "step": 8867500 }, { "epoch": 43.94, "learning_rate": 2.804031103382332e-05, "loss": 2.1647, "step": 8868000 }, { "epoch": 43.94, "learning_rate": 2.8039072447397237e-05, "loss": 2.1046, "step": 8868500 }, { "epoch": 43.94, "learning_rate": 2.8037833860971154e-05, "loss": 2.1093, "step": 8869000 }, { "epoch": 43.94, "learning_rate": 2.8036595274545067e-05, "loss": 2.122, "step": 8869500 }, { "epoch": 43.95, "learning_rate": 2.8035356688118984e-05, "loss": 2.1071, "step": 8870000 }, { "epoch": 43.95, "learning_rate": 2.8034120578865753e-05, "loss": 2.1453, "step": 8870500 }, { "epoch": 43.95, "learning_rate": 2.803288199243967e-05, "loss": 2.1184, "step": 8871000 }, { "epoch": 43.95, "learning_rate": 2.8031643406013587e-05, "loss": 2.1027, "step": 8871500 }, { "epoch": 43.95, "learning_rate": 2.8030404819587504e-05, "loss": 2.1113, "step": 8872000 }, { "epoch": 43.96, "learning_rate": 2.802916623316142e-05, "loss": 2.1343, "step": 8872500 }, { "epoch": 43.96, "learning_rate": 2.8027927646735334e-05, "loss": 2.091, "step": 8873000 }, { "epoch": 43.96, "learning_rate": 2.802668906030925e-05, "loss": 2.1072, "step": 8873500 }, { "epoch": 43.96, "learning_rate": 2.8025450473883168e-05, "loss": 2.1306, "step": 8874000 }, { "epoch": 43.97, "learning_rate": 2.8024211887457085e-05, "loss": 2.1418, "step": 8874500 }, { "epoch": 43.97, "learning_rate": 2.8022978255376702e-05, "loss": 2.1437, "step": 8875000 }, { "epoch": 43.97, "learning_rate": 2.8021742146123474e-05, "loss": 2.137, "step": 8875500 }, { "epoch": 43.97, "learning_rate": 2.802050355969739e-05, "loss": 2.1208, "step": 8876000 }, { "epoch": 43.98, "learning_rate": 2.801926497327131e-05, "loss": 2.1408, "step": 8876500 }, { "epoch": 43.98, "learning_rate": 2.8018028864018077e-05, "loss": 2.1212, "step": 8877000 }, { "epoch": 43.98, "learning_rate": 2.801679027759199e-05, "loss": 2.1392, "step": 8877500 }, { "epoch": 43.98, "learning_rate": 2.8015551691165908e-05, "loss": 2.1268, "step": 8878000 }, { "epoch": 43.99, "learning_rate": 2.8014313104739825e-05, "loss": 2.0959, "step": 8878500 }, { "epoch": 43.99, "learning_rate": 2.801307451831374e-05, "loss": 2.1253, "step": 8879000 }, { "epoch": 43.99, "learning_rate": 2.801183593188766e-05, "loss": 2.1358, "step": 8879500 }, { "epoch": 43.99, "learning_rate": 2.8010597345461575e-05, "loss": 2.1286, "step": 8880000 }, { "epoch": 44.0, "learning_rate": 2.8009358759035485e-05, "loss": 2.116, "step": 8880500 }, { "epoch": 44.0, "learning_rate": 2.8008122649782258e-05, "loss": 2.1385, "step": 8881000 }, { "epoch": 44.0, "eval_accuracy": 0.6667565139263253, "eval_accuracy_mlm": 0.6246655434301052, "eval_accuracy_nsp": 0.8653705105526771, "eval_loss": 2.29248309135437, "eval_runtime": 147.1117, "eval_samples_per_second": 1733.098, "eval_steps_per_second": 72.217, "step": 8881092 }, { "epoch": 44.0, "learning_rate": 2.8006884063356175e-05, "loss": 2.0963, "step": 8881500 }, { "epoch": 44.0, "learning_rate": 2.800564547693009e-05, "loss": 2.113, "step": 8882000 }, { "epoch": 44.01, "learning_rate": 2.800440689050401e-05, "loss": 2.1107, "step": 8882500 }, { "epoch": 44.01, "learning_rate": 2.8003168304077925e-05, "loss": 2.0895, "step": 8883000 }, { "epoch": 44.01, "learning_rate": 2.8001929717651836e-05, "loss": 2.1016, "step": 8883500 }, { "epoch": 44.01, "learning_rate": 2.8000691131225752e-05, "loss": 2.112, "step": 8884000 }, { "epoch": 44.02, "learning_rate": 2.799945254479967e-05, "loss": 2.1053, "step": 8884500 }, { "epoch": 44.02, "learning_rate": 2.799821643554644e-05, "loss": 2.0962, "step": 8885000 }, { "epoch": 44.02, "learning_rate": 2.799697784912036e-05, "loss": 2.0915, "step": 8885500 }, { "epoch": 44.02, "learning_rate": 2.7995739262694275e-05, "loss": 2.0952, "step": 8886000 }, { "epoch": 44.03, "learning_rate": 2.7994500676268192e-05, "loss": 2.0874, "step": 8886500 }, { "epoch": 44.03, "learning_rate": 2.7993262089842103e-05, "loss": 2.1096, "step": 8887000 }, { "epoch": 44.03, "learning_rate": 2.7992025980588875e-05, "loss": 2.089, "step": 8887500 }, { "epoch": 44.03, "learning_rate": 2.799078739416279e-05, "loss": 2.1143, "step": 8888000 }, { "epoch": 44.04, "learning_rate": 2.798954880773671e-05, "loss": 2.1132, "step": 8888500 }, { "epoch": 44.04, "learning_rate": 2.7988310221310626e-05, "loss": 2.1194, "step": 8889000 }, { "epoch": 44.04, "learning_rate": 2.7987071634884542e-05, "loss": 2.1018, "step": 8889500 }, { "epoch": 44.04, "learning_rate": 2.7985833048458453e-05, "loss": 2.1041, "step": 8890000 }, { "epoch": 44.05, "learning_rate": 2.798459446203237e-05, "loss": 2.08, "step": 8890500 }, { "epoch": 44.05, "learning_rate": 2.7983355875606286e-05, "loss": 2.0924, "step": 8891000 }, { "epoch": 44.05, "learning_rate": 2.7982117289180203e-05, "loss": 2.1105, "step": 8891500 }, { "epoch": 44.05, "learning_rate": 2.7980883657099827e-05, "loss": 2.1141, "step": 8892000 }, { "epoch": 44.06, "learning_rate": 2.7979647547846593e-05, "loss": 2.1525, "step": 8892500 }, { "epoch": 44.06, "learning_rate": 2.797840896142051e-05, "loss": 2.1343, "step": 8893000 }, { "epoch": 44.06, "learning_rate": 2.7977170374994427e-05, "loss": 2.1328, "step": 8893500 }, { "epoch": 44.06, "learning_rate": 2.7975931788568344e-05, "loss": 2.1065, "step": 8894000 }, { "epoch": 44.07, "learning_rate": 2.797469320214226e-05, "loss": 2.1252, "step": 8894500 }, { "epoch": 44.07, "learning_rate": 2.7973454615716178e-05, "loss": 2.1185, "step": 8895000 }, { "epoch": 44.07, "learning_rate": 2.7972216029290094e-05, "loss": 2.1152, "step": 8895500 }, { "epoch": 44.07, "learning_rate": 2.797097744286401e-05, "loss": 2.1088, "step": 8896000 }, { "epoch": 44.08, "learning_rate": 2.7969741333610777e-05, "loss": 2.1005, "step": 8896500 }, { "epoch": 44.08, "learning_rate": 2.7968505224357546e-05, "loss": 2.0879, "step": 8897000 }, { "epoch": 44.08, "learning_rate": 2.796726663793146e-05, "loss": 2.1223, "step": 8897500 }, { "epoch": 44.08, "learning_rate": 2.7966030528678235e-05, "loss": 2.0772, "step": 8898000 }, { "epoch": 44.09, "learning_rate": 2.7964791942252148e-05, "loss": 2.0842, "step": 8898500 }, { "epoch": 44.09, "learning_rate": 2.7963553355826065e-05, "loss": 2.1105, "step": 8899000 }, { "epoch": 44.09, "learning_rate": 2.7962314769399982e-05, "loss": 2.1018, "step": 8899500 }, { "epoch": 44.09, "learning_rate": 2.796107866014675e-05, "loss": 2.112, "step": 8900000 }, { "epoch": 44.1, "learning_rate": 2.7959840073720668e-05, "loss": 2.1071, "step": 8900500 }, { "epoch": 44.1, "learning_rate": 2.7958601487294585e-05, "loss": 2.104, "step": 8901000 }, { "epoch": 44.1, "learning_rate": 2.7957362900868502e-05, "loss": 2.1063, "step": 8901500 }, { "epoch": 44.1, "learning_rate": 2.7956126791615267e-05, "loss": 2.1168, "step": 8902000 }, { "epoch": 44.11, "learning_rate": 2.7954888205189184e-05, "loss": 2.0956, "step": 8902500 }, { "epoch": 44.11, "learning_rate": 2.79536496187631e-05, "loss": 2.1305, "step": 8903000 }, { "epoch": 44.11, "learning_rate": 2.7952411032337018e-05, "loss": 2.0999, "step": 8903500 }, { "epoch": 44.11, "learning_rate": 2.7951174923083783e-05, "loss": 2.0952, "step": 8904000 }, { "epoch": 44.12, "learning_rate": 2.79499363366577e-05, "loss": 2.0965, "step": 8904500 }, { "epoch": 44.12, "learning_rate": 2.7948697750231617e-05, "loss": 2.1042, "step": 8905000 }, { "epoch": 44.12, "learning_rate": 2.7947459163805534e-05, "loss": 2.0905, "step": 8905500 }, { "epoch": 44.12, "learning_rate": 2.794622057737945e-05, "loss": 2.1099, "step": 8906000 }, { "epoch": 44.13, "learning_rate": 2.7944981990953368e-05, "loss": 2.1102, "step": 8906500 }, { "epoch": 44.13, "learning_rate": 2.7943743404527285e-05, "loss": 2.1196, "step": 8907000 }, { "epoch": 44.13, "learning_rate": 2.7942504818101202e-05, "loss": 2.0998, "step": 8907500 }, { "epoch": 44.13, "learning_rate": 2.794126623167512e-05, "loss": 2.0874, "step": 8908000 }, { "epoch": 44.14, "learning_rate": 2.7940027645249032e-05, "loss": 2.125, "step": 8908500 }, { "epoch": 44.14, "learning_rate": 2.793878905882295e-05, "loss": 2.1002, "step": 8909000 }, { "epoch": 44.14, "learning_rate": 2.7937550472396863e-05, "loss": 2.1085, "step": 8909500 }, { "epoch": 44.14, "learning_rate": 2.793631188597078e-05, "loss": 2.1105, "step": 8910000 }, { "epoch": 44.15, "learning_rate": 2.7935073299544697e-05, "loss": 2.1256, "step": 8910500 }, { "epoch": 44.15, "learning_rate": 2.793383471311861e-05, "loss": 2.1022, "step": 8911000 }, { "epoch": 44.15, "learning_rate": 2.7932596126692527e-05, "loss": 2.1228, "step": 8911500 }, { "epoch": 44.15, "learning_rate": 2.7931357540266444e-05, "loss": 2.1061, "step": 8912000 }, { "epoch": 44.16, "learning_rate": 2.793011895384036e-05, "loss": 2.0964, "step": 8912500 }, { "epoch": 44.16, "learning_rate": 2.7928880367414278e-05, "loss": 2.1265, "step": 8913000 }, { "epoch": 44.16, "learning_rate": 2.7927641780988195e-05, "loss": 2.1298, "step": 8913500 }, { "epoch": 44.16, "learning_rate": 2.7926405671734964e-05, "loss": 2.0993, "step": 8914000 }, { "epoch": 44.17, "learning_rate": 2.7925167085308877e-05, "loss": 2.1061, "step": 8914500 }, { "epoch": 44.17, "learning_rate": 2.7923928498882794e-05, "loss": 2.1262, "step": 8915000 }, { "epoch": 44.17, "learning_rate": 2.792268991245671e-05, "loss": 2.0951, "step": 8915500 }, { "epoch": 44.17, "learning_rate": 2.7921451326030628e-05, "loss": 2.105, "step": 8916000 }, { "epoch": 44.18, "learning_rate": 2.7920215216777397e-05, "loss": 2.1053, "step": 8916500 }, { "epoch": 44.18, "learning_rate": 2.7918976630351314e-05, "loss": 2.1112, "step": 8917000 }, { "epoch": 44.18, "learning_rate": 2.7917740521098086e-05, "loss": 2.0988, "step": 8917500 }, { "epoch": 44.18, "learning_rate": 2.7916501934672003e-05, "loss": 2.097, "step": 8918000 }, { "epoch": 44.19, "learning_rate": 2.7915263348245913e-05, "loss": 2.12, "step": 8918500 }, { "epoch": 44.19, "learning_rate": 2.791402476181983e-05, "loss": 2.0885, "step": 8919000 }, { "epoch": 44.19, "learning_rate": 2.7912786175393747e-05, "loss": 2.1153, "step": 8919500 }, { "epoch": 44.19, "learning_rate": 2.7911547588967664e-05, "loss": 2.1124, "step": 8920000 }, { "epoch": 44.2, "learning_rate": 2.791030900254158e-05, "loss": 2.0934, "step": 8920500 }, { "epoch": 44.2, "learning_rate": 2.7909070416115494e-05, "loss": 2.1255, "step": 8921000 }, { "epoch": 44.2, "learning_rate": 2.790783430686227e-05, "loss": 2.0978, "step": 8921500 }, { "epoch": 44.2, "learning_rate": 2.7906598197609035e-05, "loss": 2.1198, "step": 8922000 }, { "epoch": 44.21, "learning_rate": 2.7905359611182952e-05, "loss": 2.0831, "step": 8922500 }, { "epoch": 44.21, "learning_rate": 2.790412102475687e-05, "loss": 2.1059, "step": 8923000 }, { "epoch": 44.21, "learning_rate": 2.7902882438330786e-05, "loss": 2.097, "step": 8923500 }, { "epoch": 44.21, "learning_rate": 2.7901643851904703e-05, "loss": 2.0953, "step": 8924000 }, { "epoch": 44.22, "learning_rate": 2.790040526547862e-05, "loss": 2.1006, "step": 8924500 }, { "epoch": 44.22, "learning_rate": 2.789916667905253e-05, "loss": 2.0946, "step": 8925000 }, { "epoch": 44.22, "learning_rate": 2.7897928092626447e-05, "loss": 2.104, "step": 8925500 }, { "epoch": 44.22, "learning_rate": 2.7896689506200364e-05, "loss": 2.0891, "step": 8926000 }, { "epoch": 44.22, "learning_rate": 2.789545091977428e-05, "loss": 2.1023, "step": 8926500 }, { "epoch": 44.23, "learning_rate": 2.7894214810521053e-05, "loss": 2.1098, "step": 8927000 }, { "epoch": 44.23, "learning_rate": 2.789297622409497e-05, "loss": 2.0844, "step": 8927500 }, { "epoch": 44.23, "learning_rate": 2.789173763766888e-05, "loss": 2.0866, "step": 8928000 }, { "epoch": 44.23, "learning_rate": 2.7890501528415652e-05, "loss": 2.1093, "step": 8928500 }, { "epoch": 44.24, "learning_rate": 2.788926294198957e-05, "loss": 2.0934, "step": 8929000 }, { "epoch": 44.24, "learning_rate": 2.7888024355563486e-05, "loss": 2.0882, "step": 8929500 }, { "epoch": 44.24, "learning_rate": 2.7886785769137403e-05, "loss": 2.1162, "step": 8930000 }, { "epoch": 44.24, "learning_rate": 2.788554718271132e-05, "loss": 2.0998, "step": 8930500 }, { "epoch": 44.25, "learning_rate": 2.788430859628523e-05, "loss": 2.1047, "step": 8931000 }, { "epoch": 44.25, "learning_rate": 2.7883070009859147e-05, "loss": 2.0907, "step": 8931500 }, { "epoch": 44.25, "learning_rate": 2.7881831423433064e-05, "loss": 2.1269, "step": 8932000 }, { "epoch": 44.25, "learning_rate": 2.788059283700698e-05, "loss": 2.1206, "step": 8932500 }, { "epoch": 44.26, "learning_rate": 2.7879354250580898e-05, "loss": 2.0827, "step": 8933000 }, { "epoch": 44.26, "learning_rate": 2.787811814132767e-05, "loss": 2.1141, "step": 8933500 }, { "epoch": 44.26, "learning_rate": 2.7876882032074435e-05, "loss": 2.1163, "step": 8934000 }, { "epoch": 44.26, "learning_rate": 2.7875645922821204e-05, "loss": 2.1116, "step": 8934500 }, { "epoch": 44.27, "learning_rate": 2.787440733639512e-05, "loss": 2.1141, "step": 8935000 }, { "epoch": 44.27, "learning_rate": 2.7873173704314742e-05, "loss": 2.1165, "step": 8935500 }, { "epoch": 44.27, "learning_rate": 2.787193511788866e-05, "loss": 2.1061, "step": 8936000 }, { "epoch": 44.27, "learning_rate": 2.7870696531462576e-05, "loss": 2.1144, "step": 8936500 }, { "epoch": 44.28, "learning_rate": 2.7869457945036493e-05, "loss": 2.0753, "step": 8937000 }, { "epoch": 44.28, "learning_rate": 2.786821935861041e-05, "loss": 2.1, "step": 8937500 }, { "epoch": 44.28, "learning_rate": 2.786698077218432e-05, "loss": 2.0888, "step": 8938000 }, { "epoch": 44.28, "learning_rate": 2.7865742185758237e-05, "loss": 2.1068, "step": 8938500 }, { "epoch": 44.29, "learning_rate": 2.7864503599332154e-05, "loss": 2.1095, "step": 8939000 }, { "epoch": 44.29, "learning_rate": 2.786326501290607e-05, "loss": 2.0953, "step": 8939500 }, { "epoch": 44.29, "learning_rate": 2.7862026426479987e-05, "loss": 2.1206, "step": 8940000 }, { "epoch": 44.29, "learning_rate": 2.7860787840053904e-05, "loss": 2.1039, "step": 8940500 }, { "epoch": 44.3, "learning_rate": 2.785954925362782e-05, "loss": 2.0986, "step": 8941000 }, { "epoch": 44.3, "learning_rate": 2.7858310667201738e-05, "loss": 2.1086, "step": 8941500 }, { "epoch": 44.3, "learning_rate": 2.7857072080775655e-05, "loss": 2.1257, "step": 8942000 }, { "epoch": 44.3, "learning_rate": 2.785583349434957e-05, "loss": 2.1029, "step": 8942500 }, { "epoch": 44.31, "learning_rate": 2.7854594907923486e-05, "loss": 2.1003, "step": 8943000 }, { "epoch": 44.31, "learning_rate": 2.7853356321497403e-05, "loss": 2.1103, "step": 8943500 }, { "epoch": 44.31, "learning_rate": 2.785211773507132e-05, "loss": 2.0955, "step": 8944000 }, { "epoch": 44.31, "learning_rate": 2.7850879148645236e-05, "loss": 2.1102, "step": 8944500 }, { "epoch": 44.32, "learning_rate": 2.7849640562219153e-05, "loss": 2.102, "step": 8945000 }, { "epoch": 44.32, "learning_rate": 2.7848404452965922e-05, "loss": 2.1145, "step": 8945500 }, { "epoch": 44.32, "learning_rate": 2.7847165866539836e-05, "loss": 2.1045, "step": 8946000 }, { "epoch": 44.32, "learning_rate": 2.7845927280113753e-05, "loss": 2.1279, "step": 8946500 }, { "epoch": 44.33, "learning_rate": 2.784469117086052e-05, "loss": 2.1004, "step": 8947000 }, { "epoch": 44.33, "learning_rate": 2.784345258443444e-05, "loss": 2.1049, "step": 8947500 }, { "epoch": 44.33, "learning_rate": 2.7842213998008355e-05, "loss": 2.1137, "step": 8948000 }, { "epoch": 44.33, "learning_rate": 2.7840975411582272e-05, "loss": 2.1081, "step": 8948500 }, { "epoch": 44.34, "learning_rate": 2.7839736825156186e-05, "loss": 2.121, "step": 8949000 }, { "epoch": 44.34, "learning_rate": 2.7838498238730103e-05, "loss": 2.142, "step": 8949500 }, { "epoch": 44.34, "learning_rate": 2.783725965230402e-05, "loss": 2.0863, "step": 8950000 }, { "epoch": 44.34, "learning_rate": 2.7836021065877937e-05, "loss": 2.1133, "step": 8950500 }, { "epoch": 44.35, "learning_rate": 2.7834782479451853e-05, "loss": 2.0929, "step": 8951000 }, { "epoch": 44.35, "learning_rate": 2.7833546370198622e-05, "loss": 2.1138, "step": 8951500 }, { "epoch": 44.35, "learning_rate": 2.783230778377254e-05, "loss": 2.0983, "step": 8952000 }, { "epoch": 44.35, "learning_rate": 2.7831069197346453e-05, "loss": 2.1417, "step": 8952500 }, { "epoch": 44.36, "learning_rate": 2.782983061092037e-05, "loss": 2.098, "step": 8953000 }, { "epoch": 44.36, "learning_rate": 2.7828592024494287e-05, "loss": 2.1109, "step": 8953500 }, { "epoch": 44.36, "learning_rate": 2.7827353438068204e-05, "loss": 2.0869, "step": 8954000 }, { "epoch": 44.36, "learning_rate": 2.782611485164212e-05, "loss": 2.1014, "step": 8954500 }, { "epoch": 44.37, "learning_rate": 2.7824876265216037e-05, "loss": 2.0948, "step": 8955000 }, { "epoch": 44.37, "learning_rate": 2.7823637678789954e-05, "loss": 2.1088, "step": 8955500 }, { "epoch": 44.37, "learning_rate": 2.782240156953672e-05, "loss": 2.1049, "step": 8956000 }, { "epoch": 44.37, "learning_rate": 2.782116546028349e-05, "loss": 2.1204, "step": 8956500 }, { "epoch": 44.38, "learning_rate": 2.7819926873857405e-05, "loss": 2.1134, "step": 8957000 }, { "epoch": 44.38, "learning_rate": 2.781869076460417e-05, "loss": 2.1118, "step": 8957500 }, { "epoch": 44.38, "learning_rate": 2.7817452178178088e-05, "loss": 2.1116, "step": 8958000 }, { "epoch": 44.38, "learning_rate": 2.7816213591752005e-05, "loss": 2.1117, "step": 8958500 }, { "epoch": 44.39, "learning_rate": 2.7814975005325922e-05, "loss": 2.0866, "step": 8959000 }, { "epoch": 44.39, "learning_rate": 2.781373641889984e-05, "loss": 2.0915, "step": 8959500 }, { "epoch": 44.39, "learning_rate": 2.7812497832473756e-05, "loss": 2.1217, "step": 8960000 }, { "epoch": 44.39, "learning_rate": 2.7811259246047672e-05, "loss": 2.0868, "step": 8960500 }, { "epoch": 44.4, "learning_rate": 2.781002065962159e-05, "loss": 2.0901, "step": 8961000 }, { "epoch": 44.4, "learning_rate": 2.7808782073195506e-05, "loss": 2.0989, "step": 8961500 }, { "epoch": 44.4, "learning_rate": 2.7807543486769423e-05, "loss": 2.0888, "step": 8962000 }, { "epoch": 44.4, "learning_rate": 2.780630737751619e-05, "loss": 2.1072, "step": 8962500 }, { "epoch": 44.41, "learning_rate": 2.7805068791090106e-05, "loss": 2.0992, "step": 8963000 }, { "epoch": 44.41, "learning_rate": 2.7803830204664023e-05, "loss": 2.1266, "step": 8963500 }, { "epoch": 44.41, "learning_rate": 2.780259161823794e-05, "loss": 2.0912, "step": 8964000 }, { "epoch": 44.41, "learning_rate": 2.7801355508984705e-05, "loss": 2.1155, "step": 8964500 }, { "epoch": 44.42, "learning_rate": 2.7800116922558622e-05, "loss": 2.1248, "step": 8965000 }, { "epoch": 44.42, "learning_rate": 2.779887833613254e-05, "loss": 2.1074, "step": 8965500 }, { "epoch": 44.42, "learning_rate": 2.7797639749706456e-05, "loss": 2.1082, "step": 8966000 }, { "epoch": 44.42, "learning_rate": 2.7796401163280373e-05, "loss": 2.097, "step": 8966500 }, { "epoch": 44.43, "learning_rate": 2.779516257685429e-05, "loss": 2.1098, "step": 8967000 }, { "epoch": 44.43, "learning_rate": 2.7793923990428206e-05, "loss": 2.0942, "step": 8967500 }, { "epoch": 44.43, "learning_rate": 2.7792685404002123e-05, "loss": 2.0971, "step": 8968000 }, { "epoch": 44.43, "learning_rate": 2.779144681757604e-05, "loss": 2.1015, "step": 8968500 }, { "epoch": 44.44, "learning_rate": 2.7790210708322806e-05, "loss": 2.0923, "step": 8969000 }, { "epoch": 44.44, "learning_rate": 2.7788972121896723e-05, "loss": 2.0941, "step": 8969500 }, { "epoch": 44.44, "learning_rate": 2.778773353547064e-05, "loss": 2.0986, "step": 8970000 }, { "epoch": 44.44, "learning_rate": 2.7786494949044557e-05, "loss": 2.114, "step": 8970500 }, { "epoch": 44.45, "learning_rate": 2.7785258839791322e-05, "loss": 2.1255, "step": 8971000 }, { "epoch": 44.45, "learning_rate": 2.778402025336524e-05, "loss": 2.1231, "step": 8971500 }, { "epoch": 44.45, "learning_rate": 2.7782781666939156e-05, "loss": 2.107, "step": 8972000 }, { "epoch": 44.45, "learning_rate": 2.7781543080513073e-05, "loss": 2.1082, "step": 8972500 }, { "epoch": 44.46, "learning_rate": 2.778030449408699e-05, "loss": 2.0939, "step": 8973000 }, { "epoch": 44.46, "learning_rate": 2.7779065907660907e-05, "loss": 2.1224, "step": 8973500 }, { "epoch": 44.46, "learning_rate": 2.7777827321234824e-05, "loss": 2.1173, "step": 8974000 }, { "epoch": 44.46, "learning_rate": 2.777658873480874e-05, "loss": 2.1177, "step": 8974500 }, { "epoch": 44.47, "learning_rate": 2.7775350148382657e-05, "loss": 2.1266, "step": 8975000 }, { "epoch": 44.47, "learning_rate": 2.777411899347513e-05, "loss": 2.1223, "step": 8975500 }, { "epoch": 44.47, "learning_rate": 2.7772880407049047e-05, "loss": 2.1195, "step": 8976000 }, { "epoch": 44.47, "learning_rate": 2.7771641820622964e-05, "loss": 2.1371, "step": 8976500 }, { "epoch": 44.48, "learning_rate": 2.7770403234196877e-05, "loss": 2.1325, "step": 8977000 }, { "epoch": 44.48, "learning_rate": 2.7769164647770794e-05, "loss": 2.1208, "step": 8977500 }, { "epoch": 44.48, "learning_rate": 2.776792606134471e-05, "loss": 2.1273, "step": 8978000 }, { "epoch": 44.48, "learning_rate": 2.7766687474918625e-05, "loss": 2.1031, "step": 8978500 }, { "epoch": 44.49, "learning_rate": 2.776544888849254e-05, "loss": 2.1193, "step": 8979000 }, { "epoch": 44.49, "learning_rate": 2.7764212779239314e-05, "loss": 2.1072, "step": 8979500 }, { "epoch": 44.49, "learning_rate": 2.776297419281323e-05, "loss": 2.1215, "step": 8980000 }, { "epoch": 44.49, "learning_rate": 2.7761735606387144e-05, "loss": 2.1075, "step": 8980500 }, { "epoch": 44.49, "learning_rate": 2.7760499497133913e-05, "loss": 2.1142, "step": 8981000 }, { "epoch": 44.5, "learning_rate": 2.775926091070783e-05, "loss": 2.1164, "step": 8981500 }, { "epoch": 44.5, "learning_rate": 2.7758022324281747e-05, "loss": 2.1103, "step": 8982000 }, { "epoch": 44.5, "learning_rate": 2.7756783737855664e-05, "loss": 2.1103, "step": 8982500 }, { "epoch": 44.5, "learning_rate": 2.775554515142958e-05, "loss": 2.0884, "step": 8983000 }, { "epoch": 44.51, "learning_rate": 2.7754306565003498e-05, "loss": 2.0968, "step": 8983500 }, { "epoch": 44.51, "learning_rate": 2.775306797857741e-05, "loss": 2.0982, "step": 8984000 }, { "epoch": 44.51, "learning_rate": 2.7751829392151328e-05, "loss": 2.1189, "step": 8984500 }, { "epoch": 44.51, "learning_rate": 2.7750593282898097e-05, "loss": 2.1106, "step": 8985000 }, { "epoch": 44.52, "learning_rate": 2.7749354696472014e-05, "loss": 2.1359, "step": 8985500 }, { "epoch": 44.52, "learning_rate": 2.774811611004593e-05, "loss": 2.0977, "step": 8986000 }, { "epoch": 44.52, "learning_rate": 2.7746877523619848e-05, "loss": 2.106, "step": 8986500 }, { "epoch": 44.52, "learning_rate": 2.774563893719376e-05, "loss": 2.0944, "step": 8987000 }, { "epoch": 44.53, "learning_rate": 2.774440035076768e-05, "loss": 2.1188, "step": 8987500 }, { "epoch": 44.53, "learning_rate": 2.7743161764341592e-05, "loss": 2.1167, "step": 8988000 }, { "epoch": 44.53, "learning_rate": 2.774192317791551e-05, "loss": 2.0897, "step": 8988500 }, { "epoch": 44.53, "learning_rate": 2.774068706866228e-05, "loss": 2.1184, "step": 8989000 }, { "epoch": 44.54, "learning_rate": 2.7739450959409046e-05, "loss": 2.1342, "step": 8989500 }, { "epoch": 44.54, "learning_rate": 2.7738212372982963e-05, "loss": 2.0856, "step": 8990000 }, { "epoch": 44.54, "learning_rate": 2.773697378655688e-05, "loss": 2.1062, "step": 8990500 }, { "epoch": 44.54, "learning_rate": 2.7735737677303646e-05, "loss": 2.107, "step": 8991000 }, { "epoch": 44.55, "learning_rate": 2.7734499090877563e-05, "loss": 2.1191, "step": 8991500 }, { "epoch": 44.55, "learning_rate": 2.773326050445148e-05, "loss": 2.1289, "step": 8992000 }, { "epoch": 44.55, "learning_rate": 2.7732021918025396e-05, "loss": 2.1039, "step": 8992500 }, { "epoch": 44.55, "learning_rate": 2.7730783331599313e-05, "loss": 2.1001, "step": 8993000 }, { "epoch": 44.56, "learning_rate": 2.772954474517323e-05, "loss": 2.1139, "step": 8993500 }, { "epoch": 44.56, "learning_rate": 2.7728306158747147e-05, "loss": 2.1044, "step": 8994000 }, { "epoch": 44.56, "learning_rate": 2.7727067572321064e-05, "loss": 2.1177, "step": 8994500 }, { "epoch": 44.56, "learning_rate": 2.772583146306783e-05, "loss": 2.1211, "step": 8995000 }, { "epoch": 44.57, "learning_rate": 2.7724592876641747e-05, "loss": 2.1126, "step": 8995500 }, { "epoch": 44.57, "learning_rate": 2.7723354290215663e-05, "loss": 2.1232, "step": 8996000 }, { "epoch": 44.57, "learning_rate": 2.772211570378958e-05, "loss": 2.0966, "step": 8996500 }, { "epoch": 44.57, "learning_rate": 2.7720877117363497e-05, "loss": 2.1319, "step": 8997000 }, { "epoch": 44.58, "learning_rate": 2.7719638530937414e-05, "loss": 2.0995, "step": 8997500 }, { "epoch": 44.58, "learning_rate": 2.771839994451133e-05, "loss": 2.0911, "step": 8998000 }, { "epoch": 44.58, "learning_rate": 2.7717161358085248e-05, "loss": 2.1106, "step": 8998500 }, { "epoch": 44.58, "learning_rate": 2.7715922771659165e-05, "loss": 2.1346, "step": 8999000 }, { "epoch": 44.59, "learning_rate": 2.7714684185233082e-05, "loss": 2.1099, "step": 8999500 }, { "epoch": 44.59, "learning_rate": 2.7713445598807e-05, "loss": 2.1613, "step": 9000000 }, { "epoch": 44.59, "learning_rate": 2.771220701238091e-05, "loss": 2.11, "step": 9000500 }, { "epoch": 44.59, "learning_rate": 2.7710968425954826e-05, "loss": 2.1206, "step": 9001000 }, { "epoch": 44.6, "learning_rate": 2.7709729839528743e-05, "loss": 2.083, "step": 9001500 }, { "epoch": 44.6, "learning_rate": 2.7708493730275515e-05, "loss": 2.1203, "step": 9002000 }, { "epoch": 44.6, "learning_rate": 2.7707255143849432e-05, "loss": 2.095, "step": 9002500 }, { "epoch": 44.6, "learning_rate": 2.770601655742335e-05, "loss": 2.1107, "step": 9003000 }, { "epoch": 44.61, "learning_rate": 2.770477797099726e-05, "loss": 2.0977, "step": 9003500 }, { "epoch": 44.61, "learning_rate": 2.7703539384571176e-05, "loss": 2.1077, "step": 9004000 }, { "epoch": 44.61, "learning_rate": 2.7702300798145093e-05, "loss": 2.1122, "step": 9004500 }, { "epoch": 44.61, "learning_rate": 2.7701064688891865e-05, "loss": 2.1252, "step": 9005000 }, { "epoch": 44.62, "learning_rate": 2.7699826102465782e-05, "loss": 2.1124, "step": 9005500 }, { "epoch": 44.62, "learning_rate": 2.76985875160397e-05, "loss": 2.1419, "step": 9006000 }, { "epoch": 44.62, "learning_rate": 2.7697348929613616e-05, "loss": 2.1044, "step": 9006500 }, { "epoch": 44.62, "learning_rate": 2.7696110343187526e-05, "loss": 2.1144, "step": 9007000 }, { "epoch": 44.63, "learning_rate": 2.7694871756761443e-05, "loss": 2.1108, "step": 9007500 }, { "epoch": 44.63, "learning_rate": 2.769363317033536e-05, "loss": 2.0846, "step": 9008000 }, { "epoch": 44.63, "learning_rate": 2.7692394583909277e-05, "loss": 2.1029, "step": 9008500 }, { "epoch": 44.63, "learning_rate": 2.7691155997483194e-05, "loss": 2.1199, "step": 9009000 }, { "epoch": 44.64, "learning_rate": 2.768991741105711e-05, "loss": 2.1373, "step": 9009500 }, { "epoch": 44.64, "learning_rate": 2.7688678824631024e-05, "loss": 2.1234, "step": 9010000 }, { "epoch": 44.64, "learning_rate": 2.768744023820494e-05, "loss": 2.1188, "step": 9010500 }, { "epoch": 44.64, "learning_rate": 2.7686201651778858e-05, "loss": 2.1174, "step": 9011000 }, { "epoch": 44.65, "learning_rate": 2.7684965542525627e-05, "loss": 2.1069, "step": 9011500 }, { "epoch": 44.65, "learning_rate": 2.76837294332724e-05, "loss": 2.1079, "step": 9012000 }, { "epoch": 44.65, "learning_rate": 2.7682490846846316e-05, "loss": 2.1169, "step": 9012500 }, { "epoch": 44.65, "learning_rate": 2.7681252260420226e-05, "loss": 2.1052, "step": 9013000 }, { "epoch": 44.66, "learning_rate": 2.7680013673994143e-05, "loss": 2.12, "step": 9013500 }, { "epoch": 44.66, "learning_rate": 2.7678777564740915e-05, "loss": 2.1092, "step": 9014000 }, { "epoch": 44.66, "learning_rate": 2.7677538978314832e-05, "loss": 2.109, "step": 9014500 }, { "epoch": 44.66, "learning_rate": 2.767630039188875e-05, "loss": 2.1162, "step": 9015000 }, { "epoch": 44.67, "learning_rate": 2.7675061805462666e-05, "loss": 2.1137, "step": 9015500 }, { "epoch": 44.67, "learning_rate": 2.7673823219036576e-05, "loss": 2.1264, "step": 9016000 }, { "epoch": 44.67, "learning_rate": 2.767258710978335e-05, "loss": 2.0888, "step": 9016500 }, { "epoch": 44.67, "learning_rate": 2.7671348523357265e-05, "loss": 2.1212, "step": 9017000 }, { "epoch": 44.68, "learning_rate": 2.7670109936931182e-05, "loss": 2.0983, "step": 9017500 }, { "epoch": 44.68, "learning_rate": 2.76688763048508e-05, "loss": 2.0997, "step": 9018000 }, { "epoch": 44.68, "learning_rate": 2.7667637718424717e-05, "loss": 2.1083, "step": 9018500 }, { "epoch": 44.68, "learning_rate": 2.7666399131998633e-05, "loss": 2.1143, "step": 9019000 }, { "epoch": 44.69, "learning_rate": 2.766516054557255e-05, "loss": 2.1251, "step": 9019500 }, { "epoch": 44.69, "learning_rate": 2.7663921959146467e-05, "loss": 2.1045, "step": 9020000 }, { "epoch": 44.69, "learning_rate": 2.7662683372720384e-05, "loss": 2.1194, "step": 9020500 }, { "epoch": 44.69, "learning_rate": 2.766144726346715e-05, "loss": 2.109, "step": 9021000 }, { "epoch": 44.7, "learning_rate": 2.7660208677041067e-05, "loss": 2.1203, "step": 9021500 }, { "epoch": 44.7, "learning_rate": 2.7658970090614984e-05, "loss": 2.1122, "step": 9022000 }, { "epoch": 44.7, "learning_rate": 2.76577315041889e-05, "loss": 2.1285, "step": 9022500 }, { "epoch": 44.7, "learning_rate": 2.7656492917762817e-05, "loss": 2.1163, "step": 9023000 }, { "epoch": 44.71, "learning_rate": 2.7655254331336734e-05, "loss": 2.1161, "step": 9023500 }, { "epoch": 44.71, "learning_rate": 2.765401574491065e-05, "loss": 2.1034, "step": 9024000 }, { "epoch": 44.71, "learning_rate": 2.7652777158484565e-05, "loss": 2.0967, "step": 9024500 }, { "epoch": 44.71, "learning_rate": 2.7651538572058482e-05, "loss": 2.0903, "step": 9025000 }, { "epoch": 44.72, "learning_rate": 2.76502999856324e-05, "loss": 2.118, "step": 9025500 }, { "epoch": 44.72, "learning_rate": 2.7649061399206316e-05, "loss": 2.1258, "step": 9026000 }, { "epoch": 44.72, "learning_rate": 2.7647822812780233e-05, "loss": 2.0955, "step": 9026500 }, { "epoch": 44.72, "learning_rate": 2.764658422635415e-05, "loss": 2.0933, "step": 9027000 }, { "epoch": 44.73, "learning_rate": 2.7645345639928066e-05, "loss": 2.1276, "step": 9027500 }, { "epoch": 44.73, "learning_rate": 2.7644109530674832e-05, "loss": 2.1102, "step": 9028000 }, { "epoch": 44.73, "learning_rate": 2.764287094424875e-05, "loss": 2.1092, "step": 9028500 }, { "epoch": 44.73, "learning_rate": 2.7641634834995518e-05, "loss": 2.1048, "step": 9029000 }, { "epoch": 44.74, "learning_rate": 2.7640401202915138e-05, "loss": 2.1192, "step": 9029500 }, { "epoch": 44.74, "learning_rate": 2.7639162616489055e-05, "loss": 2.1125, "step": 9030000 }, { "epoch": 44.74, "learning_rate": 2.7637924030062972e-05, "loss": 2.0913, "step": 9030500 }, { "epoch": 44.74, "learning_rate": 2.763668544363689e-05, "loss": 2.084, "step": 9031000 }, { "epoch": 44.75, "learning_rate": 2.7635446857210806e-05, "loss": 2.1214, "step": 9031500 }, { "epoch": 44.75, "learning_rate": 2.7634208270784723e-05, "loss": 2.1286, "step": 9032000 }, { "epoch": 44.75, "learning_rate": 2.7632969684358633e-05, "loss": 2.1174, "step": 9032500 }, { "epoch": 44.75, "learning_rate": 2.763173109793255e-05, "loss": 2.1235, "step": 9033000 }, { "epoch": 44.76, "learning_rate": 2.7630492511506467e-05, "loss": 2.107, "step": 9033500 }, { "epoch": 44.76, "learning_rate": 2.7629253925080384e-05, "loss": 2.1255, "step": 9034000 }, { "epoch": 44.76, "learning_rate": 2.76280153386543e-05, "loss": 2.1012, "step": 9034500 }, { "epoch": 44.76, "learning_rate": 2.7626776752228218e-05, "loss": 2.1234, "step": 9035000 }, { "epoch": 44.76, "learning_rate": 2.7625538165802135e-05, "loss": 2.1093, "step": 9035500 }, { "epoch": 44.77, "learning_rate": 2.762429957937605e-05, "loss": 2.0998, "step": 9036000 }, { "epoch": 44.77, "learning_rate": 2.762306099294997e-05, "loss": 2.1237, "step": 9036500 }, { "epoch": 44.77, "learning_rate": 2.7621824883696734e-05, "loss": 2.14, "step": 9037000 }, { "epoch": 44.77, "learning_rate": 2.762058629727065e-05, "loss": 2.1177, "step": 9037500 }, { "epoch": 44.78, "learning_rate": 2.7619347710844568e-05, "loss": 2.1155, "step": 9038000 }, { "epoch": 44.78, "learning_rate": 2.7618109124418485e-05, "loss": 2.1032, "step": 9038500 }, { "epoch": 44.78, "learning_rate": 2.76168705379924e-05, "loss": 2.1124, "step": 9039000 }, { "epoch": 44.78, "learning_rate": 2.7615634428739167e-05, "loss": 2.1459, "step": 9039500 }, { "epoch": 44.79, "learning_rate": 2.7614395842313084e-05, "loss": 2.0993, "step": 9040000 }, { "epoch": 44.79, "learning_rate": 2.7613157255887e-05, "loss": 2.1161, "step": 9040500 }, { "epoch": 44.79, "learning_rate": 2.7611918669460918e-05, "loss": 2.1249, "step": 9041000 }, { "epoch": 44.79, "learning_rate": 2.7610680083034835e-05, "loss": 2.1313, "step": 9041500 }, { "epoch": 44.8, "learning_rate": 2.760944149660875e-05, "loss": 2.1037, "step": 9042000 }, { "epoch": 44.8, "learning_rate": 2.760820291018267e-05, "loss": 2.1111, "step": 9042500 }, { "epoch": 44.8, "learning_rate": 2.7606964323756585e-05, "loss": 2.0971, "step": 9043000 }, { "epoch": 44.8, "learning_rate": 2.760572821450335e-05, "loss": 2.1185, "step": 9043500 }, { "epoch": 44.81, "learning_rate": 2.7604489628077268e-05, "loss": 2.1009, "step": 9044000 }, { "epoch": 44.81, "learning_rate": 2.7603251041651185e-05, "loss": 2.1266, "step": 9044500 }, { "epoch": 44.81, "learning_rate": 2.76020124552251e-05, "loss": 2.1393, "step": 9045000 }, { "epoch": 44.81, "learning_rate": 2.760077386879902e-05, "loss": 2.0865, "step": 9045500 }, { "epoch": 44.82, "learning_rate": 2.7599537759545784e-05, "loss": 2.1322, "step": 9046000 }, { "epoch": 44.82, "learning_rate": 2.75982991731197e-05, "loss": 2.1287, "step": 9046500 }, { "epoch": 44.82, "learning_rate": 2.7597060586693618e-05, "loss": 2.1061, "step": 9047000 }, { "epoch": 44.82, "learning_rate": 2.7595822000267535e-05, "loss": 2.1187, "step": 9047500 }, { "epoch": 44.83, "learning_rate": 2.7594583413841452e-05, "loss": 2.1208, "step": 9048000 }, { "epoch": 44.83, "learning_rate": 2.7593347304588217e-05, "loss": 2.1476, "step": 9048500 }, { "epoch": 44.83, "learning_rate": 2.7592108718162134e-05, "loss": 2.1176, "step": 9049000 }, { "epoch": 44.83, "learning_rate": 2.759087013173605e-05, "loss": 2.1153, "step": 9049500 }, { "epoch": 44.84, "learning_rate": 2.7589631545309968e-05, "loss": 2.1249, "step": 9050000 }, { "epoch": 44.84, "learning_rate": 2.7588392958883885e-05, "loss": 2.1154, "step": 9050500 }, { "epoch": 44.84, "learning_rate": 2.7587154372457802e-05, "loss": 2.1202, "step": 9051000 }, { "epoch": 44.84, "learning_rate": 2.758591578603172e-05, "loss": 2.1199, "step": 9051500 }, { "epoch": 44.85, "learning_rate": 2.7584677199605636e-05, "loss": 2.1191, "step": 9052000 }, { "epoch": 44.85, "learning_rate": 2.7583438613179553e-05, "loss": 2.1325, "step": 9052500 }, { "epoch": 44.85, "learning_rate": 2.758220002675347e-05, "loss": 2.1087, "step": 9053000 }, { "epoch": 44.85, "learning_rate": 2.7580963917500235e-05, "loss": 2.1095, "step": 9053500 }, { "epoch": 44.86, "learning_rate": 2.7579725331074152e-05, "loss": 2.1156, "step": 9054000 }, { "epoch": 44.86, "learning_rate": 2.757848674464807e-05, "loss": 2.1001, "step": 9054500 }, { "epoch": 44.86, "learning_rate": 2.7577248158221986e-05, "loss": 2.1278, "step": 9055000 }, { "epoch": 44.86, "learning_rate": 2.7576009571795903e-05, "loss": 2.118, "step": 9055500 }, { "epoch": 44.87, "learning_rate": 2.757477098536982e-05, "loss": 2.0871, "step": 9056000 }, { "epoch": 44.87, "learning_rate": 2.7573534876116585e-05, "loss": 2.1298, "step": 9056500 }, { "epoch": 44.87, "learning_rate": 2.7572298766863357e-05, "loss": 2.12, "step": 9057000 }, { "epoch": 44.87, "learning_rate": 2.757106018043727e-05, "loss": 2.0991, "step": 9057500 }, { "epoch": 44.88, "learning_rate": 2.7569821594011184e-05, "loss": 2.1159, "step": 9058000 }, { "epoch": 44.88, "learning_rate": 2.75685830075851e-05, "loss": 2.122, "step": 9058500 }, { "epoch": 44.88, "learning_rate": 2.7567344421159018e-05, "loss": 2.123, "step": 9059000 }, { "epoch": 44.88, "learning_rate": 2.7566105834732935e-05, "loss": 2.1192, "step": 9059500 }, { "epoch": 44.89, "learning_rate": 2.7564869725479707e-05, "loss": 2.1548, "step": 9060000 }, { "epoch": 44.89, "learning_rate": 2.756363113905362e-05, "loss": 2.1214, "step": 9060500 }, { "epoch": 44.89, "learning_rate": 2.7562392552627538e-05, "loss": 2.1256, "step": 9061000 }, { "epoch": 44.89, "learning_rate": 2.756115396620145e-05, "loss": 2.1136, "step": 9061500 }, { "epoch": 44.9, "learning_rate": 2.7559915379775368e-05, "loss": 2.1106, "step": 9062000 }, { "epoch": 44.9, "learning_rate": 2.7558676793349285e-05, "loss": 2.0866, "step": 9062500 }, { "epoch": 44.9, "learning_rate": 2.7557440684096057e-05, "loss": 2.0822, "step": 9063000 }, { "epoch": 44.9, "learning_rate": 2.7556204574842826e-05, "loss": 2.1299, "step": 9063500 }, { "epoch": 44.91, "learning_rate": 2.7554965988416743e-05, "loss": 2.1029, "step": 9064000 }, { "epoch": 44.91, "learning_rate": 2.755372740199066e-05, "loss": 2.1183, "step": 9064500 }, { "epoch": 44.91, "learning_rate": 2.7552488815564577e-05, "loss": 2.1512, "step": 9065000 }, { "epoch": 44.91, "learning_rate": 2.7551250229138494e-05, "loss": 2.153, "step": 9065500 }, { "epoch": 44.92, "learning_rate": 2.7550011642712407e-05, "loss": 2.1564, "step": 9066000 }, { "epoch": 44.92, "learning_rate": 2.7548773056286324e-05, "loss": 2.096, "step": 9066500 }, { "epoch": 44.92, "learning_rate": 2.7547534469860238e-05, "loss": 2.1422, "step": 9067000 }, { "epoch": 44.92, "learning_rate": 2.754629836060701e-05, "loss": 2.1195, "step": 9067500 }, { "epoch": 44.93, "learning_rate": 2.7545059774180927e-05, "loss": 2.1149, "step": 9068000 }, { "epoch": 44.93, "learning_rate": 2.7543821187754844e-05, "loss": 2.1111, "step": 9068500 }, { "epoch": 44.93, "learning_rate": 2.7542582601328757e-05, "loss": 2.1015, "step": 9069000 }, { "epoch": 44.93, "learning_rate": 2.7541344014902674e-05, "loss": 2.1409, "step": 9069500 }, { "epoch": 44.94, "learning_rate": 2.7540105428476588e-05, "loss": 2.1049, "step": 9070000 }, { "epoch": 44.94, "learning_rate": 2.753886931922336e-05, "loss": 2.1084, "step": 9070500 }, { "epoch": 44.94, "learning_rate": 2.7537630732797277e-05, "loss": 2.1125, "step": 9071000 }, { "epoch": 44.94, "learning_rate": 2.7536392146371194e-05, "loss": 2.1202, "step": 9071500 }, { "epoch": 44.95, "learning_rate": 2.753515355994511e-05, "loss": 2.1242, "step": 9072000 }, { "epoch": 44.95, "learning_rate": 2.7533914973519024e-05, "loss": 2.0862, "step": 9072500 }, { "epoch": 44.95, "learning_rate": 2.7532676387092938e-05, "loss": 2.1114, "step": 9073000 }, { "epoch": 44.95, "learning_rate": 2.7531437800666855e-05, "loss": 2.0964, "step": 9073500 }, { "epoch": 44.96, "learning_rate": 2.7530201691413627e-05, "loss": 2.1382, "step": 9074000 }, { "epoch": 44.96, "learning_rate": 2.7528963104987544e-05, "loss": 2.1032, "step": 9074500 }, { "epoch": 44.96, "learning_rate": 2.752772451856146e-05, "loss": 2.1171, "step": 9075000 }, { "epoch": 44.96, "learning_rate": 2.7526485932135378e-05, "loss": 2.1046, "step": 9075500 }, { "epoch": 44.97, "learning_rate": 2.752524734570929e-05, "loss": 2.1058, "step": 9076000 }, { "epoch": 44.97, "learning_rate": 2.7524008759283205e-05, "loss": 2.1149, "step": 9076500 }, { "epoch": 44.97, "learning_rate": 2.7522770172857122e-05, "loss": 2.1169, "step": 9077000 }, { "epoch": 44.97, "learning_rate": 2.752153158643104e-05, "loss": 2.1221, "step": 9077500 }, { "epoch": 44.98, "learning_rate": 2.7520293000004956e-05, "loss": 2.1051, "step": 9078000 }, { "epoch": 44.98, "learning_rate": 2.751905441357887e-05, "loss": 2.0887, "step": 9078500 }, { "epoch": 44.98, "learning_rate": 2.7517815827152786e-05, "loss": 2.1128, "step": 9079000 }, { "epoch": 44.98, "learning_rate": 2.7516577240726703e-05, "loss": 2.103, "step": 9079500 }, { "epoch": 44.99, "learning_rate": 2.751533865430062e-05, "loss": 2.1061, "step": 9080000 }, { "epoch": 44.99, "learning_rate": 2.7514100067874537e-05, "loss": 2.1283, "step": 9080500 }, { "epoch": 44.99, "learning_rate": 2.7512863958621306e-05, "loss": 2.1259, "step": 9081000 }, { "epoch": 44.99, "learning_rate": 2.7511625372195223e-05, "loss": 2.1105, "step": 9081500 }, { "epoch": 45.0, "learning_rate": 2.7510386785769136e-05, "loss": 2.1176, "step": 9082000 }, { "epoch": 45.0, "learning_rate": 2.7509148199343053e-05, "loss": 2.1143, "step": 9082500 }, { "epoch": 45.0, "eval_accuracy": 0.6670554463923071, "eval_accuracy_mlm": 0.6247866069401701, "eval_accuracy_nsp": 0.8664295043516801, "eval_loss": 2.289098024368286, "eval_runtime": 147.1684, "eval_samples_per_second": 1732.43, "eval_steps_per_second": 72.189, "step": 9082935 }, { "epoch": 45.0, "learning_rate": 2.750790961291697e-05, "loss": 2.1107, "step": 9083000 }, { "epoch": 45.0, "learning_rate": 2.7506671026490887e-05, "loss": 2.0847, "step": 9083500 }, { "epoch": 45.01, "learning_rate": 2.7505434917237656e-05, "loss": 2.1076, "step": 9084000 }, { "epoch": 45.01, "learning_rate": 2.7504196330811573e-05, "loss": 2.0905, "step": 9084500 }, { "epoch": 45.01, "learning_rate": 2.7502957744385486e-05, "loss": 2.0871, "step": 9085000 }, { "epoch": 45.01, "learning_rate": 2.7501721635132255e-05, "loss": 2.0687, "step": 9085500 }, { "epoch": 45.02, "learning_rate": 2.7500485525879027e-05, "loss": 2.1173, "step": 9086000 }, { "epoch": 45.02, "learning_rate": 2.7499246939452944e-05, "loss": 2.0988, "step": 9086500 }, { "epoch": 45.02, "learning_rate": 2.749800835302686e-05, "loss": 2.0864, "step": 9087000 }, { "epoch": 45.02, "learning_rate": 2.7496769766600778e-05, "loss": 2.0861, "step": 9087500 }, { "epoch": 45.03, "learning_rate": 2.7495533657347544e-05, "loss": 2.1088, "step": 9088000 }, { "epoch": 45.03, "learning_rate": 2.749429507092146e-05, "loss": 2.1049, "step": 9088500 }, { "epoch": 45.03, "learning_rate": 2.7493056484495377e-05, "loss": 2.0979, "step": 9089000 }, { "epoch": 45.03, "learning_rate": 2.7491817898069294e-05, "loss": 2.0745, "step": 9089500 }, { "epoch": 45.04, "learning_rate": 2.749057931164321e-05, "loss": 2.1003, "step": 9090000 }, { "epoch": 45.04, "learning_rate": 2.7489340725217128e-05, "loss": 2.0938, "step": 9090500 }, { "epoch": 45.04, "learning_rate": 2.7488102138791045e-05, "loss": 2.0827, "step": 9091000 }, { "epoch": 45.04, "learning_rate": 2.7486863552364962e-05, "loss": 2.0945, "step": 9091500 }, { "epoch": 45.04, "learning_rate": 2.7485627443111727e-05, "loss": 2.0793, "step": 9092000 }, { "epoch": 45.05, "learning_rate": 2.7484391333858496e-05, "loss": 2.0929, "step": 9092500 }, { "epoch": 45.05, "learning_rate": 2.748315274743241e-05, "loss": 2.0907, "step": 9093000 }, { "epoch": 45.05, "learning_rate": 2.7481914161006327e-05, "loss": 2.0754, "step": 9093500 }, { "epoch": 45.05, "learning_rate": 2.7480675574580244e-05, "loss": 2.088, "step": 9094000 }, { "epoch": 45.06, "learning_rate": 2.747943698815416e-05, "loss": 2.0843, "step": 9094500 }, { "epoch": 45.06, "learning_rate": 2.7478198401728078e-05, "loss": 2.0902, "step": 9095000 }, { "epoch": 45.06, "learning_rate": 2.7476959815301994e-05, "loss": 2.089, "step": 9095500 }, { "epoch": 45.06, "learning_rate": 2.747572370604876e-05, "loss": 2.0868, "step": 9096000 }, { "epoch": 45.07, "learning_rate": 2.7474485119622677e-05, "loss": 2.0798, "step": 9096500 }, { "epoch": 45.07, "learning_rate": 2.7473249010369446e-05, "loss": 2.1043, "step": 9097000 }, { "epoch": 45.07, "learning_rate": 2.7472010423943363e-05, "loss": 2.0809, "step": 9097500 }, { "epoch": 45.07, "learning_rate": 2.747077183751728e-05, "loss": 2.0855, "step": 9098000 }, { "epoch": 45.08, "learning_rate": 2.7469533251091196e-05, "loss": 2.0763, "step": 9098500 }, { "epoch": 45.08, "learning_rate": 2.7468294664665113e-05, "loss": 2.1094, "step": 9099000 }, { "epoch": 45.08, "learning_rate": 2.7467056078239027e-05, "loss": 2.1065, "step": 9099500 }, { "epoch": 45.08, "learning_rate": 2.7465817491812944e-05, "loss": 2.0974, "step": 9100000 }, { "epoch": 45.09, "learning_rate": 2.746457890538686e-05, "loss": 2.1037, "step": 9100500 }, { "epoch": 45.09, "learning_rate": 2.7463340318960778e-05, "loss": 2.0767, "step": 9101000 }, { "epoch": 45.09, "learning_rate": 2.7462101732534695e-05, "loss": 2.0673, "step": 9101500 }, { "epoch": 45.09, "learning_rate": 2.746086314610861e-05, "loss": 2.1057, "step": 9102000 }, { "epoch": 45.1, "learning_rate": 2.745962455968253e-05, "loss": 2.0986, "step": 9102500 }, { "epoch": 45.1, "learning_rate": 2.7458385973256445e-05, "loss": 2.1021, "step": 9103000 }, { "epoch": 45.1, "learning_rate": 2.745714986400321e-05, "loss": 2.0688, "step": 9103500 }, { "epoch": 45.1, "learning_rate": 2.7455911277577128e-05, "loss": 2.0742, "step": 9104000 }, { "epoch": 45.11, "learning_rate": 2.7454672691151045e-05, "loss": 2.0974, "step": 9104500 }, { "epoch": 45.11, "learning_rate": 2.745343410472496e-05, "loss": 2.0858, "step": 9105000 }, { "epoch": 45.11, "learning_rate": 2.745219551829888e-05, "loss": 2.0973, "step": 9105500 }, { "epoch": 45.11, "learning_rate": 2.7450959409045647e-05, "loss": 2.1047, "step": 9106000 }, { "epoch": 45.12, "learning_rate": 2.744972082261956e-05, "loss": 2.0872, "step": 9106500 }, { "epoch": 45.12, "learning_rate": 2.7448482236193478e-05, "loss": 2.1024, "step": 9107000 }, { "epoch": 45.12, "learning_rate": 2.7447243649767395e-05, "loss": 2.0964, "step": 9107500 }, { "epoch": 45.12, "learning_rate": 2.744600506334131e-05, "loss": 2.0978, "step": 9108000 }, { "epoch": 45.13, "learning_rate": 2.744476647691523e-05, "loss": 2.1001, "step": 9108500 }, { "epoch": 45.13, "learning_rate": 2.7443527890489146e-05, "loss": 2.0808, "step": 9109000 }, { "epoch": 45.13, "learning_rate": 2.7442289304063062e-05, "loss": 2.0895, "step": 9109500 }, { "epoch": 45.13, "learning_rate": 2.744105071763698e-05, "loss": 2.0935, "step": 9110000 }, { "epoch": 45.14, "learning_rate": 2.7439814608383745e-05, "loss": 2.0767, "step": 9110500 }, { "epoch": 45.14, "learning_rate": 2.7438576021957662e-05, "loss": 2.0879, "step": 9111000 }, { "epoch": 45.14, "learning_rate": 2.743733991270443e-05, "loss": 2.0817, "step": 9111500 }, { "epoch": 45.14, "learning_rate": 2.7436101326278347e-05, "loss": 2.0718, "step": 9112000 }, { "epoch": 45.15, "learning_rate": 2.7434865217025113e-05, "loss": 2.0816, "step": 9112500 }, { "epoch": 45.15, "learning_rate": 2.743362663059903e-05, "loss": 2.0966, "step": 9113000 }, { "epoch": 45.15, "learning_rate": 2.7432388044172947e-05, "loss": 2.0906, "step": 9113500 }, { "epoch": 45.15, "learning_rate": 2.7431149457746864e-05, "loss": 2.095, "step": 9114000 }, { "epoch": 45.16, "learning_rate": 2.742991087132078e-05, "loss": 2.0866, "step": 9114500 }, { "epoch": 45.16, "learning_rate": 2.7428672284894698e-05, "loss": 2.1048, "step": 9115000 }, { "epoch": 45.16, "learning_rate": 2.7427433698468614e-05, "loss": 2.1264, "step": 9115500 }, { "epoch": 45.16, "learning_rate": 2.742619511204253e-05, "loss": 2.0726, "step": 9116000 }, { "epoch": 45.17, "learning_rate": 2.7424956525616445e-05, "loss": 2.0857, "step": 9116500 }, { "epoch": 45.17, "learning_rate": 2.7423717939190362e-05, "loss": 2.0893, "step": 9117000 }, { "epoch": 45.17, "learning_rate": 2.742247935276428e-05, "loss": 2.088, "step": 9117500 }, { "epoch": 45.17, "learning_rate": 2.7421240766338196e-05, "loss": 2.098, "step": 9118000 }, { "epoch": 45.18, "learning_rate": 2.7420002179912113e-05, "loss": 2.1108, "step": 9118500 }, { "epoch": 45.18, "learning_rate": 2.741876607065888e-05, "loss": 2.0826, "step": 9119000 }, { "epoch": 45.18, "learning_rate": 2.74175274842328e-05, "loss": 2.0948, "step": 9119500 }, { "epoch": 45.18, "learning_rate": 2.7416288897806712e-05, "loss": 2.0947, "step": 9120000 }, { "epoch": 45.19, "learning_rate": 2.741505031138063e-05, "loss": 2.1092, "step": 9120500 }, { "epoch": 45.19, "learning_rate": 2.7413811724954546e-05, "loss": 2.0885, "step": 9121000 }, { "epoch": 45.19, "learning_rate": 2.7412573138528463e-05, "loss": 2.0785, "step": 9121500 }, { "epoch": 45.19, "learning_rate": 2.741133455210238e-05, "loss": 2.1145, "step": 9122000 }, { "epoch": 45.2, "learning_rate": 2.7410095965676297e-05, "loss": 2.0784, "step": 9122500 }, { "epoch": 45.2, "learning_rate": 2.7408857379250207e-05, "loss": 2.0871, "step": 9123000 }, { "epoch": 45.2, "learning_rate": 2.7407618792824124e-05, "loss": 2.1132, "step": 9123500 }, { "epoch": 45.2, "learning_rate": 2.7406382683570896e-05, "loss": 2.099, "step": 9124000 }, { "epoch": 45.21, "learning_rate": 2.7405146574317665e-05, "loss": 2.101, "step": 9124500 }, { "epoch": 45.21, "learning_rate": 2.740391046506443e-05, "loss": 2.0825, "step": 9125000 }, { "epoch": 45.21, "learning_rate": 2.7402671878638347e-05, "loss": 2.1052, "step": 9125500 }, { "epoch": 45.21, "learning_rate": 2.7401433292212264e-05, "loss": 2.0996, "step": 9126000 }, { "epoch": 45.22, "learning_rate": 2.740019470578618e-05, "loss": 2.1125, "step": 9126500 }, { "epoch": 45.22, "learning_rate": 2.7398956119360098e-05, "loss": 2.0918, "step": 9127000 }, { "epoch": 45.22, "learning_rate": 2.7397717532934015e-05, "loss": 2.0942, "step": 9127500 }, { "epoch": 45.22, "learning_rate": 2.739647894650793e-05, "loss": 2.0624, "step": 9128000 }, { "epoch": 45.23, "learning_rate": 2.7395242837254697e-05, "loss": 2.1073, "step": 9128500 }, { "epoch": 45.23, "learning_rate": 2.739400672800147e-05, "loss": 2.1042, "step": 9129000 }, { "epoch": 45.23, "learning_rate": 2.7392768141575386e-05, "loss": 2.0746, "step": 9129500 }, { "epoch": 45.23, "learning_rate": 2.7391529555149296e-05, "loss": 2.104, "step": 9130000 }, { "epoch": 45.24, "learning_rate": 2.7390290968723213e-05, "loss": 2.1052, "step": 9130500 }, { "epoch": 45.24, "learning_rate": 2.738905238229713e-05, "loss": 2.1162, "step": 9131000 }, { "epoch": 45.24, "learning_rate": 2.7387813795871047e-05, "loss": 2.0666, "step": 9131500 }, { "epoch": 45.24, "learning_rate": 2.7386575209444964e-05, "loss": 2.0927, "step": 9132000 }, { "epoch": 45.25, "learning_rate": 2.7385339100191736e-05, "loss": 2.088, "step": 9132500 }, { "epoch": 45.25, "learning_rate": 2.738410051376565e-05, "loss": 2.0805, "step": 9133000 }, { "epoch": 45.25, "learning_rate": 2.7382861927339563e-05, "loss": 2.1112, "step": 9133500 }, { "epoch": 45.25, "learning_rate": 2.738162334091348e-05, "loss": 2.0874, "step": 9134000 }, { "epoch": 45.26, "learning_rate": 2.7380384754487397e-05, "loss": 2.1034, "step": 9134500 }, { "epoch": 45.26, "learning_rate": 2.7379146168061314e-05, "loss": 2.0935, "step": 9135000 }, { "epoch": 45.26, "learning_rate": 2.737790758163523e-05, "loss": 2.1349, "step": 9135500 }, { "epoch": 45.26, "learning_rate": 2.7376668995209148e-05, "loss": 2.093, "step": 9136000 }, { "epoch": 45.27, "learning_rate": 2.7375430408783065e-05, "loss": 2.129, "step": 9136500 }, { "epoch": 45.27, "learning_rate": 2.737419429952983e-05, "loss": 2.0845, "step": 9137000 }, { "epoch": 45.27, "learning_rate": 2.7372955713103747e-05, "loss": 2.1246, "step": 9137500 }, { "epoch": 45.27, "learning_rate": 2.7371717126677664e-05, "loss": 2.1012, "step": 9138000 }, { "epoch": 45.28, "learning_rate": 2.737047854025158e-05, "loss": 2.0962, "step": 9138500 }, { "epoch": 45.28, "learning_rate": 2.7369239953825498e-05, "loss": 2.1106, "step": 9139000 }, { "epoch": 45.28, "learning_rate": 2.7368003844572267e-05, "loss": 2.0804, "step": 9139500 }, { "epoch": 45.28, "learning_rate": 2.736676525814618e-05, "loss": 2.102, "step": 9140000 }, { "epoch": 45.29, "learning_rate": 2.7365529148892956e-05, "loss": 2.0923, "step": 9140500 }, { "epoch": 45.29, "learning_rate": 2.736429056246687e-05, "loss": 2.1053, "step": 9141000 }, { "epoch": 45.29, "learning_rate": 2.7363051976040786e-05, "loss": 2.1046, "step": 9141500 }, { "epoch": 45.29, "learning_rate": 2.7361813389614703e-05, "loss": 2.0846, "step": 9142000 }, { "epoch": 45.3, "learning_rate": 2.7360574803188617e-05, "loss": 2.1144, "step": 9142500 }, { "epoch": 45.3, "learning_rate": 2.7359336216762534e-05, "loss": 2.0891, "step": 9143000 }, { "epoch": 45.3, "learning_rate": 2.7358097630336447e-05, "loss": 2.1033, "step": 9143500 }, { "epoch": 45.3, "learning_rate": 2.7356859043910364e-05, "loss": 2.0906, "step": 9144000 }, { "epoch": 45.31, "learning_rate": 2.735562045748428e-05, "loss": 2.0798, "step": 9144500 }, { "epoch": 45.31, "learning_rate": 2.7354381871058198e-05, "loss": 2.1057, "step": 9145000 }, { "epoch": 45.31, "learning_rate": 2.7353143284632115e-05, "loss": 2.1104, "step": 9145500 }, { "epoch": 45.31, "learning_rate": 2.7351904698206032e-05, "loss": 2.115, "step": 9146000 }, { "epoch": 45.31, "learning_rate": 2.735066611177995e-05, "loss": 2.1129, "step": 9146500 }, { "epoch": 45.32, "learning_rate": 2.7349430002526714e-05, "loss": 2.1199, "step": 9147000 }, { "epoch": 45.32, "learning_rate": 2.734819637044634e-05, "loss": 2.1016, "step": 9147500 }, { "epoch": 45.32, "learning_rate": 2.7346957784020255e-05, "loss": 2.0922, "step": 9148000 }, { "epoch": 45.32, "learning_rate": 2.7345719197594172e-05, "loss": 2.0949, "step": 9148500 }, { "epoch": 45.33, "learning_rate": 2.734448061116809e-05, "loss": 2.1008, "step": 9149000 }, { "epoch": 45.33, "learning_rate": 2.7343242024742006e-05, "loss": 2.07, "step": 9149500 }, { "epoch": 45.33, "learning_rate": 2.7342003438315923e-05, "loss": 2.1024, "step": 9150000 }, { "epoch": 45.33, "learning_rate": 2.734076485188984e-05, "loss": 2.1027, "step": 9150500 }, { "epoch": 45.34, "learning_rate": 2.7339526265463754e-05, "loss": 2.1263, "step": 9151000 }, { "epoch": 45.34, "learning_rate": 2.733828767903767e-05, "loss": 2.106, "step": 9151500 }, { "epoch": 45.34, "learning_rate": 2.7337049092611584e-05, "loss": 2.1239, "step": 9152000 }, { "epoch": 45.34, "learning_rate": 2.73358105061855e-05, "loss": 2.0875, "step": 9152500 }, { "epoch": 45.35, "learning_rate": 2.7334571919759418e-05, "loss": 2.1106, "step": 9153000 }, { "epoch": 45.35, "learning_rate": 2.733333333333333e-05, "loss": 2.0944, "step": 9153500 }, { "epoch": 45.35, "learning_rate": 2.7332097224080107e-05, "loss": 2.091, "step": 9154000 }, { "epoch": 45.35, "learning_rate": 2.7330861114826872e-05, "loss": 2.0873, "step": 9154500 }, { "epoch": 45.36, "learning_rate": 2.7329625005573638e-05, "loss": 2.0944, "step": 9155000 }, { "epoch": 45.36, "learning_rate": 2.7328386419147555e-05, "loss": 2.1005, "step": 9155500 }, { "epoch": 45.36, "learning_rate": 2.732714783272147e-05, "loss": 2.0946, "step": 9156000 }, { "epoch": 45.36, "learning_rate": 2.732590924629539e-05, "loss": 2.1087, "step": 9156500 }, { "epoch": 45.37, "learning_rate": 2.7324670659869306e-05, "loss": 2.1175, "step": 9157000 }, { "epoch": 45.37, "learning_rate": 2.7323432073443222e-05, "loss": 2.1181, "step": 9157500 }, { "epoch": 45.37, "learning_rate": 2.732219348701714e-05, "loss": 2.0757, "step": 9158000 }, { "epoch": 45.37, "learning_rate": 2.7320954900591056e-05, "loss": 2.088, "step": 9158500 }, { "epoch": 45.38, "learning_rate": 2.7319716314164973e-05, "loss": 2.1202, "step": 9159000 }, { "epoch": 45.38, "learning_rate": 2.731847772773889e-05, "loss": 2.1008, "step": 9159500 }, { "epoch": 45.38, "learning_rate": 2.7317239141312807e-05, "loss": 2.1181, "step": 9160000 }, { "epoch": 45.38, "learning_rate": 2.7316000554886724e-05, "loss": 2.1114, "step": 9160500 }, { "epoch": 45.39, "learning_rate": 2.731476196846064e-05, "loss": 2.0788, "step": 9161000 }, { "epoch": 45.39, "learning_rate": 2.7313525859207406e-05, "loss": 2.117, "step": 9161500 }, { "epoch": 45.39, "learning_rate": 2.7312287272781323e-05, "loss": 2.0839, "step": 9162000 }, { "epoch": 45.39, "learning_rate": 2.731104868635524e-05, "loss": 2.0938, "step": 9162500 }, { "epoch": 45.4, "learning_rate": 2.7309812577102006e-05, "loss": 2.1036, "step": 9163000 }, { "epoch": 45.4, "learning_rate": 2.7308573990675923e-05, "loss": 2.1052, "step": 9163500 }, { "epoch": 45.4, "learning_rate": 2.730733788142269e-05, "loss": 2.0986, "step": 9164000 }, { "epoch": 45.4, "learning_rate": 2.7306099294996605e-05, "loss": 2.0875, "step": 9164500 }, { "epoch": 45.41, "learning_rate": 2.7304860708570522e-05, "loss": 2.1072, "step": 9165000 }, { "epoch": 45.41, "learning_rate": 2.730362212214444e-05, "loss": 2.1085, "step": 9165500 }, { "epoch": 45.41, "learning_rate": 2.7302383535718356e-05, "loss": 2.1042, "step": 9166000 }, { "epoch": 45.41, "learning_rate": 2.7301144949292273e-05, "loss": 2.0988, "step": 9166500 }, { "epoch": 45.42, "learning_rate": 2.729990884003904e-05, "loss": 2.1144, "step": 9167000 }, { "epoch": 45.42, "learning_rate": 2.729867025361296e-05, "loss": 2.1032, "step": 9167500 }, { "epoch": 45.42, "learning_rate": 2.7297431667186872e-05, "loss": 2.1017, "step": 9168000 }, { "epoch": 45.42, "learning_rate": 2.729619308076079e-05, "loss": 2.1181, "step": 9168500 }, { "epoch": 45.43, "learning_rate": 2.7294956971507558e-05, "loss": 2.1033, "step": 9169000 }, { "epoch": 45.43, "learning_rate": 2.7293718385081475e-05, "loss": 2.0802, "step": 9169500 }, { "epoch": 45.43, "learning_rate": 2.7292482275828247e-05, "loss": 2.0992, "step": 9170000 }, { "epoch": 45.43, "learning_rate": 2.7291243689402164e-05, "loss": 2.0909, "step": 9170500 }, { "epoch": 45.44, "learning_rate": 2.729000510297608e-05, "loss": 2.0871, "step": 9171000 }, { "epoch": 45.44, "learning_rate": 2.728876651654999e-05, "loss": 2.0837, "step": 9171500 }, { "epoch": 45.44, "learning_rate": 2.7287527930123908e-05, "loss": 2.0966, "step": 9172000 }, { "epoch": 45.44, "learning_rate": 2.7286289343697825e-05, "loss": 2.1088, "step": 9172500 }, { "epoch": 45.45, "learning_rate": 2.728505075727174e-05, "loss": 2.0832, "step": 9173000 }, { "epoch": 45.45, "learning_rate": 2.728381217084566e-05, "loss": 2.0958, "step": 9173500 }, { "epoch": 45.45, "learning_rate": 2.7282573584419575e-05, "loss": 2.1003, "step": 9174000 }, { "epoch": 45.45, "learning_rate": 2.7281334997993492e-05, "loss": 2.1191, "step": 9174500 }, { "epoch": 45.46, "learning_rate": 2.7280096411567406e-05, "loss": 2.1247, "step": 9175000 }, { "epoch": 45.46, "learning_rate": 2.7278857825141323e-05, "loss": 2.0827, "step": 9175500 }, { "epoch": 45.46, "learning_rate": 2.727761923871524e-05, "loss": 2.1138, "step": 9176000 }, { "epoch": 45.46, "learning_rate": 2.7276380652289157e-05, "loss": 2.1086, "step": 9176500 }, { "epoch": 45.47, "learning_rate": 2.7275142065863074e-05, "loss": 2.1047, "step": 9177000 }, { "epoch": 45.47, "learning_rate": 2.727390347943699e-05, "loss": 2.0857, "step": 9177500 }, { "epoch": 45.47, "learning_rate": 2.7272667370183756e-05, "loss": 2.1305, "step": 9178000 }, { "epoch": 45.47, "learning_rate": 2.7271428783757673e-05, "loss": 2.1204, "step": 9178500 }, { "epoch": 45.48, "learning_rate": 2.7270195151677297e-05, "loss": 2.1019, "step": 9179000 }, { "epoch": 45.48, "learning_rate": 2.7268959042424062e-05, "loss": 2.1015, "step": 9179500 }, { "epoch": 45.48, "learning_rate": 2.726772045599798e-05, "loss": 2.1099, "step": 9180000 }, { "epoch": 45.48, "learning_rate": 2.7266481869571896e-05, "loss": 2.0797, "step": 9180500 }, { "epoch": 45.49, "learning_rate": 2.7265243283145813e-05, "loss": 2.1124, "step": 9181000 }, { "epoch": 45.49, "learning_rate": 2.726400469671973e-05, "loss": 2.0734, "step": 9181500 }, { "epoch": 45.49, "learning_rate": 2.7262766110293647e-05, "loss": 2.1023, "step": 9182000 }, { "epoch": 45.49, "learning_rate": 2.7261527523867564e-05, "loss": 2.0995, "step": 9182500 }, { "epoch": 45.5, "learning_rate": 2.726028893744148e-05, "loss": 2.089, "step": 9183000 }, { "epoch": 45.5, "learning_rate": 2.7259050351015398e-05, "loss": 2.1125, "step": 9183500 }, { "epoch": 45.5, "learning_rate": 2.7257811764589308e-05, "loss": 2.1189, "step": 9184000 }, { "epoch": 45.5, "learning_rate": 2.7256573178163225e-05, "loss": 2.1122, "step": 9184500 }, { "epoch": 45.51, "learning_rate": 2.7255334591737142e-05, "loss": 2.1015, "step": 9185000 }, { "epoch": 45.51, "learning_rate": 2.725409600531106e-05, "loss": 2.0689, "step": 9185500 }, { "epoch": 45.51, "learning_rate": 2.7252857418884976e-05, "loss": 2.1234, "step": 9186000 }, { "epoch": 45.51, "learning_rate": 2.7251618832458893e-05, "loss": 2.1127, "step": 9186500 }, { "epoch": 45.52, "learning_rate": 2.725038024603281e-05, "loss": 2.1097, "step": 9187000 }, { "epoch": 45.52, "learning_rate": 2.7249144136779575e-05, "loss": 2.0955, "step": 9187500 }, { "epoch": 45.52, "learning_rate": 2.7247905550353492e-05, "loss": 2.1055, "step": 9188000 }, { "epoch": 45.52, "learning_rate": 2.7246669441100264e-05, "loss": 2.0913, "step": 9188500 }, { "epoch": 45.53, "learning_rate": 2.724543333184703e-05, "loss": 2.1068, "step": 9189000 }, { "epoch": 45.53, "learning_rate": 2.7244194745420946e-05, "loss": 2.1101, "step": 9189500 }, { "epoch": 45.53, "learning_rate": 2.7242958636167715e-05, "loss": 2.0923, "step": 9190000 }, { "epoch": 45.53, "learning_rate": 2.7241720049741632e-05, "loss": 2.1025, "step": 9190500 }, { "epoch": 45.54, "learning_rate": 2.724048146331555e-05, "loss": 2.0855, "step": 9191000 }, { "epoch": 45.54, "learning_rate": 2.7239242876889466e-05, "loss": 2.0963, "step": 9191500 }, { "epoch": 45.54, "learning_rate": 2.7238004290463383e-05, "loss": 2.0994, "step": 9192000 }, { "epoch": 45.54, "learning_rate": 2.7236765704037296e-05, "loss": 2.1254, "step": 9192500 }, { "epoch": 45.55, "learning_rate": 2.7235527117611213e-05, "loss": 2.0916, "step": 9193000 }, { "epoch": 45.55, "learning_rate": 2.723428853118513e-05, "loss": 2.099, "step": 9193500 }, { "epoch": 45.55, "learning_rate": 2.72330524219319e-05, "loss": 2.0972, "step": 9194000 }, { "epoch": 45.55, "learning_rate": 2.7231813835505816e-05, "loss": 2.0781, "step": 9194500 }, { "epoch": 45.56, "learning_rate": 2.7230575249079733e-05, "loss": 2.1195, "step": 9195000 }, { "epoch": 45.56, "learning_rate": 2.722933666265365e-05, "loss": 2.085, "step": 9195500 }, { "epoch": 45.56, "learning_rate": 2.7228098076227563e-05, "loss": 2.1286, "step": 9196000 }, { "epoch": 45.56, "learning_rate": 2.722685948980148e-05, "loss": 2.1368, "step": 9196500 }, { "epoch": 45.57, "learning_rate": 2.722562338054825e-05, "loss": 2.1088, "step": 9197000 }, { "epoch": 45.57, "learning_rate": 2.7224384794122166e-05, "loss": 2.1017, "step": 9197500 }, { "epoch": 45.57, "learning_rate": 2.7223146207696083e-05, "loss": 2.0907, "step": 9198000 }, { "epoch": 45.57, "learning_rate": 2.722190762127e-05, "loss": 2.0873, "step": 9198500 }, { "epoch": 45.58, "learning_rate": 2.7220669034843917e-05, "loss": 2.0923, "step": 9199000 }, { "epoch": 45.58, "learning_rate": 2.7219432925590682e-05, "loss": 2.1116, "step": 9199500 }, { "epoch": 45.58, "learning_rate": 2.72181943391646e-05, "loss": 2.1015, "step": 9200000 }, { "epoch": 45.58, "learning_rate": 2.7216955752738516e-05, "loss": 2.1015, "step": 9200500 }, { "epoch": 45.58, "learning_rate": 2.7215717166312433e-05, "loss": 2.1079, "step": 9201000 }, { "epoch": 45.59, "learning_rate": 2.721447857988635e-05, "loss": 2.1233, "step": 9201500 }, { "epoch": 45.59, "learning_rate": 2.7213239993460267e-05, "loss": 2.1084, "step": 9202000 }, { "epoch": 45.59, "learning_rate": 2.721200140703418e-05, "loss": 2.1059, "step": 9202500 }, { "epoch": 45.59, "learning_rate": 2.7210762820608097e-05, "loss": 2.1035, "step": 9203000 }, { "epoch": 45.6, "learning_rate": 2.7209524234182014e-05, "loss": 2.0962, "step": 9203500 }, { "epoch": 45.6, "learning_rate": 2.720828564775593e-05, "loss": 2.1146, "step": 9204000 }, { "epoch": 45.6, "learning_rate": 2.7207047061329848e-05, "loss": 2.0905, "step": 9204500 }, { "epoch": 45.6, "learning_rate": 2.7205808474903765e-05, "loss": 2.1066, "step": 9205000 }, { "epoch": 45.61, "learning_rate": 2.7204569888477675e-05, "loss": 2.1173, "step": 9205500 }, { "epoch": 45.61, "learning_rate": 2.7203333779224448e-05, "loss": 2.131, "step": 9206000 }, { "epoch": 45.61, "learning_rate": 2.7202095192798364e-05, "loss": 2.1039, "step": 9206500 }, { "epoch": 45.61, "learning_rate": 2.720085660637228e-05, "loss": 2.1003, "step": 9207000 }, { "epoch": 45.62, "learning_rate": 2.71996180199462e-05, "loss": 2.1073, "step": 9207500 }, { "epoch": 45.62, "learning_rate": 2.7198379433520115e-05, "loss": 2.1213, "step": 9208000 }, { "epoch": 45.62, "learning_rate": 2.7197140847094032e-05, "loss": 2.1154, "step": 9208500 }, { "epoch": 45.62, "learning_rate": 2.71959047378408e-05, "loss": 2.1107, "step": 9209000 }, { "epoch": 45.63, "learning_rate": 2.7194666151414715e-05, "loss": 2.1071, "step": 9209500 }, { "epoch": 45.63, "learning_rate": 2.719342756498863e-05, "loss": 2.1096, "step": 9210000 }, { "epoch": 45.63, "learning_rate": 2.719218897856255e-05, "loss": 2.1091, "step": 9210500 }, { "epoch": 45.63, "learning_rate": 2.7190950392136465e-05, "loss": 2.0936, "step": 9211000 }, { "epoch": 45.64, "learning_rate": 2.7189711805710382e-05, "loss": 2.1229, "step": 9211500 }, { "epoch": 45.64, "learning_rate": 2.7188473219284292e-05, "loss": 2.1066, "step": 9212000 }, { "epoch": 45.64, "learning_rate": 2.718723463285821e-05, "loss": 2.1056, "step": 9212500 }, { "epoch": 45.64, "learning_rate": 2.7185996046432126e-05, "loss": 2.1168, "step": 9213000 }, { "epoch": 45.65, "learning_rate": 2.71847599371789e-05, "loss": 2.0934, "step": 9213500 }, { "epoch": 45.65, "learning_rate": 2.7183523827925667e-05, "loss": 2.0972, "step": 9214000 }, { "epoch": 45.65, "learning_rate": 2.7182285241499584e-05, "loss": 2.1081, "step": 9214500 }, { "epoch": 45.65, "learning_rate": 2.71810466550735e-05, "loss": 2.1068, "step": 9215000 }, { "epoch": 45.66, "learning_rate": 2.7179808068647418e-05, "loss": 2.1005, "step": 9215500 }, { "epoch": 45.66, "learning_rate": 2.717856948222133e-05, "loss": 2.1081, "step": 9216000 }, { "epoch": 45.66, "learning_rate": 2.717733089579525e-05, "loss": 2.1076, "step": 9216500 }, { "epoch": 45.66, "learning_rate": 2.7176094786542017e-05, "loss": 2.1249, "step": 9217000 }, { "epoch": 45.67, "learning_rate": 2.7174856200115934e-05, "loss": 2.0828, "step": 9217500 }, { "epoch": 45.67, "learning_rate": 2.717361761368985e-05, "loss": 2.1, "step": 9218000 }, { "epoch": 45.67, "learning_rate": 2.7172379027263768e-05, "loss": 2.1141, "step": 9218500 }, { "epoch": 45.67, "learning_rate": 2.7171140440837685e-05, "loss": 2.1031, "step": 9219000 }, { "epoch": 45.68, "learning_rate": 2.71699018544116e-05, "loss": 2.1069, "step": 9219500 }, { "epoch": 45.68, "learning_rate": 2.7168663267985515e-05, "loss": 2.1169, "step": 9220000 }, { "epoch": 45.68, "learning_rate": 2.7167427158732284e-05, "loss": 2.1132, "step": 9220500 }, { "epoch": 45.68, "learning_rate": 2.71661885723062e-05, "loss": 2.1261, "step": 9221000 }, { "epoch": 45.69, "learning_rate": 2.7164949985880118e-05, "loss": 2.1039, "step": 9221500 }, { "epoch": 45.69, "learning_rate": 2.7163711399454035e-05, "loss": 2.0983, "step": 9222000 }, { "epoch": 45.69, "learning_rate": 2.7162472813027952e-05, "loss": 2.1168, "step": 9222500 }, { "epoch": 45.69, "learning_rate": 2.7161234226601866e-05, "loss": 2.0975, "step": 9223000 }, { "epoch": 45.7, "learning_rate": 2.7159995640175782e-05, "loss": 2.0963, "step": 9223500 }, { "epoch": 45.7, "learning_rate": 2.71587570537497e-05, "loss": 2.08, "step": 9224000 }, { "epoch": 45.7, "learning_rate": 2.7157520944496468e-05, "loss": 2.0962, "step": 9224500 }, { "epoch": 45.7, "learning_rate": 2.7156282358070385e-05, "loss": 2.1118, "step": 9225000 }, { "epoch": 45.71, "learning_rate": 2.7155043771644302e-05, "loss": 2.0957, "step": 9225500 }, { "epoch": 45.71, "learning_rate": 2.715380518521822e-05, "loss": 2.1004, "step": 9226000 }, { "epoch": 45.71, "learning_rate": 2.7152566598792133e-05, "loss": 2.1011, "step": 9226500 }, { "epoch": 45.71, "learning_rate": 2.71513304895389e-05, "loss": 2.1138, "step": 9227000 }, { "epoch": 45.72, "learning_rate": 2.7150091903112818e-05, "loss": 2.1071, "step": 9227500 }, { "epoch": 45.72, "learning_rate": 2.7148853316686735e-05, "loss": 2.1203, "step": 9228000 }, { "epoch": 45.72, "learning_rate": 2.7147614730260652e-05, "loss": 2.1006, "step": 9228500 }, { "epoch": 45.72, "learning_rate": 2.714637614383457e-05, "loss": 2.1027, "step": 9229000 }, { "epoch": 45.73, "learning_rate": 2.7145140034581334e-05, "loss": 2.0752, "step": 9229500 }, { "epoch": 45.73, "learning_rate": 2.714390144815525e-05, "loss": 2.1197, "step": 9230000 }, { "epoch": 45.73, "learning_rate": 2.714266286172917e-05, "loss": 2.0983, "step": 9230500 }, { "epoch": 45.73, "learning_rate": 2.7141424275303085e-05, "loss": 2.1242, "step": 9231000 }, { "epoch": 45.74, "learning_rate": 2.714018816604985e-05, "loss": 2.0743, "step": 9231500 }, { "epoch": 45.74, "learning_rate": 2.7138949579623768e-05, "loss": 2.0961, "step": 9232000 }, { "epoch": 45.74, "learning_rate": 2.7137710993197685e-05, "loss": 2.1209, "step": 9232500 }, { "epoch": 45.74, "learning_rate": 2.71364724067716e-05, "loss": 2.1297, "step": 9233000 }, { "epoch": 45.75, "learning_rate": 2.713523382034552e-05, "loss": 2.1181, "step": 9233500 }, { "epoch": 45.75, "learning_rate": 2.7133995233919435e-05, "loss": 2.1106, "step": 9234000 }, { "epoch": 45.75, "learning_rate": 2.7132756647493352e-05, "loss": 2.1026, "step": 9234500 }, { "epoch": 45.75, "learning_rate": 2.713151806106727e-05, "loss": 2.1093, "step": 9235000 }, { "epoch": 45.76, "learning_rate": 2.7130279474641186e-05, "loss": 2.0896, "step": 9235500 }, { "epoch": 45.76, "learning_rate": 2.7129040888215103e-05, "loss": 2.0965, "step": 9236000 }, { "epoch": 45.76, "learning_rate": 2.7127802301789017e-05, "loss": 2.1031, "step": 9236500 }, { "epoch": 45.76, "learning_rate": 2.712656371536293e-05, "loss": 2.101, "step": 9237000 }, { "epoch": 45.77, "learning_rate": 2.7125325128936847e-05, "loss": 2.1249, "step": 9237500 }, { "epoch": 45.77, "learning_rate": 2.7124086542510764e-05, "loss": 2.1005, "step": 9238000 }, { "epoch": 45.77, "learning_rate": 2.712284795608468e-05, "loss": 2.0977, "step": 9238500 }, { "epoch": 45.77, "learning_rate": 2.71216143240043e-05, "loss": 2.1412, "step": 9239000 }, { "epoch": 45.78, "learning_rate": 2.712037573757822e-05, "loss": 2.0992, "step": 9239500 }, { "epoch": 45.78, "learning_rate": 2.7119137151152135e-05, "loss": 2.1082, "step": 9240000 }, { "epoch": 45.78, "learning_rate": 2.71179010418989e-05, "loss": 2.1095, "step": 9240500 }, { "epoch": 45.78, "learning_rate": 2.7116662455472818e-05, "loss": 2.1181, "step": 9241000 }, { "epoch": 45.79, "learning_rate": 2.7115426346219587e-05, "loss": 2.1126, "step": 9241500 }, { "epoch": 45.79, "learning_rate": 2.7114187759793504e-05, "loss": 2.1217, "step": 9242000 }, { "epoch": 45.79, "learning_rate": 2.711294917336742e-05, "loss": 2.102, "step": 9242500 }, { "epoch": 45.79, "learning_rate": 2.7111710586941334e-05, "loss": 2.094, "step": 9243000 }, { "epoch": 45.8, "learning_rate": 2.711047200051525e-05, "loss": 2.1394, "step": 9243500 }, { "epoch": 45.8, "learning_rate": 2.7109233414089168e-05, "loss": 2.1213, "step": 9244000 }, { "epoch": 45.8, "learning_rate": 2.7107994827663085e-05, "loss": 2.1091, "step": 9244500 }, { "epoch": 45.8, "learning_rate": 2.7106756241237002e-05, "loss": 2.0955, "step": 9245000 }, { "epoch": 45.81, "learning_rate": 2.710551765481092e-05, "loss": 2.096, "step": 9245500 }, { "epoch": 45.81, "learning_rate": 2.7104279068384836e-05, "loss": 2.1025, "step": 9246000 }, { "epoch": 45.81, "learning_rate": 2.7103040481958753e-05, "loss": 2.0978, "step": 9246500 }, { "epoch": 45.81, "learning_rate": 2.710180189553267e-05, "loss": 2.1043, "step": 9247000 }, { "epoch": 45.82, "learning_rate": 2.7100565786279435e-05, "loss": 2.1028, "step": 9247500 }, { "epoch": 45.82, "learning_rate": 2.7099327199853352e-05, "loss": 2.1142, "step": 9248000 }, { "epoch": 45.82, "learning_rate": 2.709808861342727e-05, "loss": 2.098, "step": 9248500 }, { "epoch": 45.82, "learning_rate": 2.7096850027001186e-05, "loss": 2.1092, "step": 9249000 }, { "epoch": 45.83, "learning_rate": 2.7095611440575103e-05, "loss": 2.1165, "step": 9249500 }, { "epoch": 45.83, "learning_rate": 2.709437285414902e-05, "loss": 2.1116, "step": 9250000 }, { "epoch": 45.83, "learning_rate": 2.7093134267722936e-05, "loss": 2.0959, "step": 9250500 }, { "epoch": 45.83, "learning_rate": 2.7091895681296853e-05, "loss": 2.1241, "step": 9251000 }, { "epoch": 45.84, "learning_rate": 2.709065709487077e-05, "loss": 2.1058, "step": 9251500 }, { "epoch": 45.84, "learning_rate": 2.7089418508444687e-05, "loss": 2.1039, "step": 9252000 }, { "epoch": 45.84, "learning_rate": 2.7088179922018597e-05, "loss": 2.0805, "step": 9252500 }, { "epoch": 45.84, "learning_rate": 2.7086941335592514e-05, "loss": 2.0975, "step": 9253000 }, { "epoch": 45.85, "learning_rate": 2.708570274916643e-05, "loss": 2.118, "step": 9253500 }, { "epoch": 45.85, "learning_rate": 2.7084464162740348e-05, "loss": 2.1025, "step": 9254000 }, { "epoch": 45.85, "learning_rate": 2.7083225576314265e-05, "loss": 2.1305, "step": 9254500 }, { "epoch": 45.85, "learning_rate": 2.7081989467061037e-05, "loss": 2.1003, "step": 9255000 }, { "epoch": 45.85, "learning_rate": 2.7080750880634947e-05, "loss": 2.0863, "step": 9255500 }, { "epoch": 45.86, "learning_rate": 2.707951477138172e-05, "loss": 2.115, "step": 9256000 }, { "epoch": 45.86, "learning_rate": 2.7078281139301337e-05, "loss": 2.0894, "step": 9256500 }, { "epoch": 45.86, "learning_rate": 2.707704503004811e-05, "loss": 2.1233, "step": 9257000 }, { "epoch": 45.86, "learning_rate": 2.7075806443622026e-05, "loss": 2.1269, "step": 9257500 }, { "epoch": 45.87, "learning_rate": 2.7074567857195943e-05, "loss": 2.1095, "step": 9258000 }, { "epoch": 45.87, "learning_rate": 2.707332927076986e-05, "loss": 2.1064, "step": 9258500 }, { "epoch": 45.87, "learning_rate": 2.7072090684343777e-05, "loss": 2.0994, "step": 9259000 }, { "epoch": 45.87, "learning_rate": 2.7070854575090542e-05, "loss": 2.1284, "step": 9259500 }, { "epoch": 45.88, "learning_rate": 2.706961598866446e-05, "loss": 2.0975, "step": 9260000 }, { "epoch": 45.88, "learning_rate": 2.7068377402238376e-05, "loss": 2.1064, "step": 9260500 }, { "epoch": 45.88, "learning_rate": 2.7067138815812293e-05, "loss": 2.1005, "step": 9261000 }, { "epoch": 45.88, "learning_rate": 2.706590022938621e-05, "loss": 2.1323, "step": 9261500 }, { "epoch": 45.89, "learning_rate": 2.7064661642960127e-05, "loss": 2.1041, "step": 9262000 }, { "epoch": 45.89, "learning_rate": 2.7063425533706892e-05, "loss": 2.124, "step": 9262500 }, { "epoch": 45.89, "learning_rate": 2.706218694728081e-05, "loss": 2.1132, "step": 9263000 }, { "epoch": 45.89, "learning_rate": 2.7060948360854726e-05, "loss": 2.1285, "step": 9263500 }, { "epoch": 45.9, "learning_rate": 2.7059709774428643e-05, "loss": 2.1113, "step": 9264000 }, { "epoch": 45.9, "learning_rate": 2.705847118800256e-05, "loss": 2.1214, "step": 9264500 }, { "epoch": 45.9, "learning_rate": 2.7057232601576477e-05, "loss": 2.1264, "step": 9265000 }, { "epoch": 45.9, "learning_rate": 2.7055994015150394e-05, "loss": 2.0996, "step": 9265500 }, { "epoch": 45.91, "learning_rate": 2.7054755428724304e-05, "loss": 2.0896, "step": 9266000 }, { "epoch": 45.91, "learning_rate": 2.705351684229822e-05, "loss": 2.1114, "step": 9266500 }, { "epoch": 45.91, "learning_rate": 2.7052280733044993e-05, "loss": 2.0987, "step": 9267000 }, { "epoch": 45.91, "learning_rate": 2.705104214661891e-05, "loss": 2.1308, "step": 9267500 }, { "epoch": 45.92, "learning_rate": 2.7049803560192827e-05, "loss": 2.1214, "step": 9268000 }, { "epoch": 45.92, "learning_rate": 2.7048564973766744e-05, "loss": 2.111, "step": 9268500 }, { "epoch": 45.92, "learning_rate": 2.7047326387340654e-05, "loss": 2.1152, "step": 9269000 }, { "epoch": 45.92, "learning_rate": 2.704608780091457e-05, "loss": 2.0788, "step": 9269500 }, { "epoch": 45.93, "learning_rate": 2.7044851691661343e-05, "loss": 2.0925, "step": 9270000 }, { "epoch": 45.93, "learning_rate": 2.704361310523526e-05, "loss": 2.1179, "step": 9270500 }, { "epoch": 45.93, "learning_rate": 2.7042374518809177e-05, "loss": 2.0874, "step": 9271000 }, { "epoch": 45.93, "learning_rate": 2.7041135932383094e-05, "loss": 2.1063, "step": 9271500 }, { "epoch": 45.94, "learning_rate": 2.7039897345957004e-05, "loss": 2.0643, "step": 9272000 }, { "epoch": 45.94, "learning_rate": 2.7038661236703776e-05, "loss": 2.1224, "step": 9272500 }, { "epoch": 45.94, "learning_rate": 2.7037425127450545e-05, "loss": 2.1164, "step": 9273000 }, { "epoch": 45.94, "learning_rate": 2.7036186541024462e-05, "loss": 2.1155, "step": 9273500 }, { "epoch": 45.95, "learning_rate": 2.703494795459838e-05, "loss": 2.101, "step": 9274000 }, { "epoch": 45.95, "learning_rate": 2.7033709368172293e-05, "loss": 2.0962, "step": 9274500 }, { "epoch": 45.95, "learning_rate": 2.703247078174621e-05, "loss": 2.1054, "step": 9275000 }, { "epoch": 45.95, "learning_rate": 2.7031232195320126e-05, "loss": 2.1239, "step": 9275500 }, { "epoch": 45.96, "learning_rate": 2.7029993608894043e-05, "loss": 2.118, "step": 9276000 }, { "epoch": 45.96, "learning_rate": 2.702875502246796e-05, "loss": 2.1185, "step": 9276500 }, { "epoch": 45.96, "learning_rate": 2.7027516436041877e-05, "loss": 2.0901, "step": 9277000 }, { "epoch": 45.96, "learning_rate": 2.7026277849615794e-05, "loss": 2.0975, "step": 9277500 }, { "epoch": 45.97, "learning_rate": 2.702503926318971e-05, "loss": 2.1045, "step": 9278000 }, { "epoch": 45.97, "learning_rate": 2.702380067676362e-05, "loss": 2.1009, "step": 9278500 }, { "epoch": 45.97, "learning_rate": 2.7022564567510393e-05, "loss": 2.0985, "step": 9279000 }, { "epoch": 45.97, "learning_rate": 2.702132598108431e-05, "loss": 2.0956, "step": 9279500 }, { "epoch": 45.98, "learning_rate": 2.7020087394658227e-05, "loss": 2.0982, "step": 9280000 }, { "epoch": 45.98, "learning_rate": 2.7018848808232144e-05, "loss": 2.0922, "step": 9280500 }, { "epoch": 45.98, "learning_rate": 2.701761022180606e-05, "loss": 2.0966, "step": 9281000 }, { "epoch": 45.98, "learning_rate": 2.701637163537997e-05, "loss": 2.1189, "step": 9281500 }, { "epoch": 45.99, "learning_rate": 2.7015135526126743e-05, "loss": 2.1236, "step": 9282000 }, { "epoch": 45.99, "learning_rate": 2.701389693970066e-05, "loss": 2.1152, "step": 9282500 }, { "epoch": 45.99, "learning_rate": 2.7012658353274577e-05, "loss": 2.0956, "step": 9283000 }, { "epoch": 45.99, "learning_rate": 2.7011419766848494e-05, "loss": 2.1048, "step": 9283500 }, { "epoch": 46.0, "learning_rate": 2.7010183657595263e-05, "loss": 2.1066, "step": 9284000 }, { "epoch": 46.0, "learning_rate": 2.7008945071169177e-05, "loss": 2.1353, "step": 9284500 }, { "epoch": 46.0, "eval_accuracy": 0.6679508959060474, "eval_accuracy_mlm": 0.6258791992319123, "eval_accuracy_nsp": 0.8662883051784797, "eval_loss": 2.295206308364868, "eval_runtime": 147.0963, "eval_samples_per_second": 1733.28, "eval_steps_per_second": 72.225, "step": 9284778 }, { "epoch": 46.0, "learning_rate": 2.7007706484743094e-05, "loss": 2.0904, "step": 9285000 }, { "epoch": 46.0, "learning_rate": 2.700646789831701e-05, "loss": 2.072, "step": 9285500 }, { "epoch": 46.01, "learning_rate": 2.7005229311890927e-05, "loss": 2.0724, "step": 9286000 }, { "epoch": 46.01, "learning_rate": 2.7003990725464844e-05, "loss": 2.0836, "step": 9286500 }, { "epoch": 46.01, "learning_rate": 2.700275213903876e-05, "loss": 2.0759, "step": 9287000 }, { "epoch": 46.01, "learning_rate": 2.700151355261267e-05, "loss": 2.0672, "step": 9287500 }, { "epoch": 46.02, "learning_rate": 2.700027496618659e-05, "loss": 2.0638, "step": 9288000 }, { "epoch": 46.02, "learning_rate": 2.699903885693336e-05, "loss": 2.1169, "step": 9288500 }, { "epoch": 46.02, "learning_rate": 2.699780274768013e-05, "loss": 2.0915, "step": 9289000 }, { "epoch": 46.02, "learning_rate": 2.6996566638426895e-05, "loss": 2.0855, "step": 9289500 }, { "epoch": 46.03, "learning_rate": 2.699532805200081e-05, "loss": 2.0756, "step": 9290000 }, { "epoch": 46.03, "learning_rate": 2.699408946557473e-05, "loss": 2.0682, "step": 9290500 }, { "epoch": 46.03, "learning_rate": 2.6992850879148646e-05, "loss": 2.0702, "step": 9291000 }, { "epoch": 46.03, "learning_rate": 2.6991612292722562e-05, "loss": 2.0941, "step": 9291500 }, { "epoch": 46.04, "learning_rate": 2.699037370629648e-05, "loss": 2.0824, "step": 9292000 }, { "epoch": 46.04, "learning_rate": 2.6989135119870396e-05, "loss": 2.0752, "step": 9292500 }, { "epoch": 46.04, "learning_rate": 2.6987896533444313e-05, "loss": 2.0707, "step": 9293000 }, { "epoch": 46.04, "learning_rate": 2.698665794701823e-05, "loss": 2.068, "step": 9293500 }, { "epoch": 46.05, "learning_rate": 2.6985419360592147e-05, "loss": 2.0742, "step": 9294000 }, { "epoch": 46.05, "learning_rate": 2.6984180774166064e-05, "loss": 2.0766, "step": 9294500 }, { "epoch": 46.05, "learning_rate": 2.6982942187739978e-05, "loss": 2.0826, "step": 9295000 }, { "epoch": 46.05, "learning_rate": 2.6981703601313895e-05, "loss": 2.1003, "step": 9295500 }, { "epoch": 46.06, "learning_rate": 2.698046501488781e-05, "loss": 2.1082, "step": 9296000 }, { "epoch": 46.06, "learning_rate": 2.697922642846173e-05, "loss": 2.0594, "step": 9296500 }, { "epoch": 46.06, "learning_rate": 2.6977990319208497e-05, "loss": 2.0842, "step": 9297000 }, { "epoch": 46.06, "learning_rate": 2.6976754209955263e-05, "loss": 2.0753, "step": 9297500 }, { "epoch": 46.07, "learning_rate": 2.697551562352918e-05, "loss": 2.0916, "step": 9298000 }, { "epoch": 46.07, "learning_rate": 2.6974277037103096e-05, "loss": 2.0814, "step": 9298500 }, { "epoch": 46.07, "learning_rate": 2.6973038450677013e-05, "loss": 2.1176, "step": 9299000 }, { "epoch": 46.07, "learning_rate": 2.697179986425093e-05, "loss": 2.0949, "step": 9299500 }, { "epoch": 46.08, "learning_rate": 2.6970561277824847e-05, "loss": 2.0768, "step": 9300000 }, { "epoch": 46.08, "learning_rate": 2.6969322691398764e-05, "loss": 2.0664, "step": 9300500 }, { "epoch": 46.08, "learning_rate": 2.696808410497268e-05, "loss": 2.0925, "step": 9301000 }, { "epoch": 46.08, "learning_rate": 2.6966845518546595e-05, "loss": 2.0882, "step": 9301500 }, { "epoch": 46.09, "learning_rate": 2.696560693212051e-05, "loss": 2.0863, "step": 9302000 }, { "epoch": 46.09, "learning_rate": 2.696437082286728e-05, "loss": 2.1116, "step": 9302500 }, { "epoch": 46.09, "learning_rate": 2.6963132236441197e-05, "loss": 2.1032, "step": 9303000 }, { "epoch": 46.09, "learning_rate": 2.6961893650015114e-05, "loss": 2.0897, "step": 9303500 }, { "epoch": 46.1, "learning_rate": 2.696065506358903e-05, "loss": 2.1125, "step": 9304000 }, { "epoch": 46.1, "learning_rate": 2.6959416477162948e-05, "loss": 2.0707, "step": 9304500 }, { "epoch": 46.1, "learning_rate": 2.695817789073686e-05, "loss": 2.0835, "step": 9305000 }, { "epoch": 46.1, "learning_rate": 2.695693930431078e-05, "loss": 2.0975, "step": 9305500 }, { "epoch": 46.11, "learning_rate": 2.6955703195057547e-05, "loss": 2.0841, "step": 9306000 }, { "epoch": 46.11, "learning_rate": 2.6954464608631464e-05, "loss": 2.099, "step": 9306500 }, { "epoch": 46.11, "learning_rate": 2.695322602220538e-05, "loss": 2.0886, "step": 9307000 }, { "epoch": 46.11, "learning_rate": 2.6951987435779298e-05, "loss": 2.0814, "step": 9307500 }, { "epoch": 46.12, "learning_rate": 2.6950748849353215e-05, "loss": 2.08, "step": 9308000 }, { "epoch": 46.12, "learning_rate": 2.694951026292713e-05, "loss": 2.0933, "step": 9308500 }, { "epoch": 46.12, "learning_rate": 2.6948271676501046e-05, "loss": 2.1073, "step": 9309000 }, { "epoch": 46.12, "learning_rate": 2.694703309007496e-05, "loss": 2.0779, "step": 9309500 }, { "epoch": 46.12, "learning_rate": 2.6945794503648876e-05, "loss": 2.1033, "step": 9310000 }, { "epoch": 46.13, "learning_rate": 2.6944555917222793e-05, "loss": 2.0733, "step": 9310500 }, { "epoch": 46.13, "learning_rate": 2.6943317330796706e-05, "loss": 2.0792, "step": 9311000 }, { "epoch": 46.13, "learning_rate": 2.694208122154348e-05, "loss": 2.0736, "step": 9311500 }, { "epoch": 46.13, "learning_rate": 2.6940842635117396e-05, "loss": 2.0896, "step": 9312000 }, { "epoch": 46.14, "learning_rate": 2.6939606525864164e-05, "loss": 2.064, "step": 9312500 }, { "epoch": 46.14, "learning_rate": 2.693836793943808e-05, "loss": 2.1021, "step": 9313000 }, { "epoch": 46.14, "learning_rate": 2.6937129353011998e-05, "loss": 2.0685, "step": 9313500 }, { "epoch": 46.14, "learning_rate": 2.6935890766585915e-05, "loss": 2.0752, "step": 9314000 }, { "epoch": 46.15, "learning_rate": 2.6934652180159832e-05, "loss": 2.0853, "step": 9314500 }, { "epoch": 46.15, "learning_rate": 2.6933413593733746e-05, "loss": 2.0748, "step": 9315000 }, { "epoch": 46.15, "learning_rate": 2.6932175007307663e-05, "loss": 2.0913, "step": 9315500 }, { "epoch": 46.15, "learning_rate": 2.6930936420881576e-05, "loss": 2.0764, "step": 9316000 }, { "epoch": 46.16, "learning_rate": 2.6929697834455493e-05, "loss": 2.0792, "step": 9316500 }, { "epoch": 46.16, "learning_rate": 2.692845924802941e-05, "loss": 2.0641, "step": 9317000 }, { "epoch": 46.16, "learning_rate": 2.6927220661603324e-05, "loss": 2.0637, "step": 9317500 }, { "epoch": 46.16, "learning_rate": 2.6925987029522948e-05, "loss": 2.0966, "step": 9318000 }, { "epoch": 46.17, "learning_rate": 2.6924748443096865e-05, "loss": 2.1145, "step": 9318500 }, { "epoch": 46.17, "learning_rate": 2.692350985667078e-05, "loss": 2.1085, "step": 9319000 }, { "epoch": 46.17, "learning_rate": 2.69222712702447e-05, "loss": 2.0656, "step": 9319500 }, { "epoch": 46.17, "learning_rate": 2.6921037638164316e-05, "loss": 2.0713, "step": 9320000 }, { "epoch": 46.18, "learning_rate": 2.6919799051738233e-05, "loss": 2.0864, "step": 9320500 }, { "epoch": 46.18, "learning_rate": 2.691856046531215e-05, "loss": 2.1114, "step": 9321000 }, { "epoch": 46.18, "learning_rate": 2.6917321878886066e-05, "loss": 2.0913, "step": 9321500 }, { "epoch": 46.18, "learning_rate": 2.691608329245998e-05, "loss": 2.0893, "step": 9322000 }, { "epoch": 46.19, "learning_rate": 2.6914844706033897e-05, "loss": 2.0936, "step": 9322500 }, { "epoch": 46.19, "learning_rate": 2.6913606119607814e-05, "loss": 2.081, "step": 9323000 }, { "epoch": 46.19, "learning_rate": 2.691236753318173e-05, "loss": 2.0976, "step": 9323500 }, { "epoch": 46.19, "learning_rate": 2.6911128946755648e-05, "loss": 2.086, "step": 9324000 }, { "epoch": 46.2, "learning_rate": 2.6909892837502417e-05, "loss": 2.0934, "step": 9324500 }, { "epoch": 46.2, "learning_rate": 2.690865425107633e-05, "loss": 2.0848, "step": 9325000 }, { "epoch": 46.2, "learning_rate": 2.6907415664650247e-05, "loss": 2.0879, "step": 9325500 }, { "epoch": 46.2, "learning_rate": 2.6906177078224164e-05, "loss": 2.0901, "step": 9326000 }, { "epoch": 46.21, "learning_rate": 2.6904940968970933e-05, "loss": 2.0875, "step": 9326500 }, { "epoch": 46.21, "learning_rate": 2.690370238254485e-05, "loss": 2.0691, "step": 9327000 }, { "epoch": 46.21, "learning_rate": 2.6902463796118767e-05, "loss": 2.07, "step": 9327500 }, { "epoch": 46.21, "learning_rate": 2.6901225209692684e-05, "loss": 2.0867, "step": 9328000 }, { "epoch": 46.22, "learning_rate": 2.6899986623266597e-05, "loss": 2.0744, "step": 9328500 }, { "epoch": 46.22, "learning_rate": 2.6898748036840514e-05, "loss": 2.1063, "step": 9329000 }, { "epoch": 46.22, "learning_rate": 2.689750945041443e-05, "loss": 2.0857, "step": 9329500 }, { "epoch": 46.22, "learning_rate": 2.68962733411612e-05, "loss": 2.1139, "step": 9330000 }, { "epoch": 46.23, "learning_rate": 2.6895034754735117e-05, "loss": 2.0702, "step": 9330500 }, { "epoch": 46.23, "learning_rate": 2.6893796168309034e-05, "loss": 2.127, "step": 9331000 }, { "epoch": 46.23, "learning_rate": 2.689255758188295e-05, "loss": 2.08, "step": 9331500 }, { "epoch": 46.23, "learning_rate": 2.6891318995456864e-05, "loss": 2.0818, "step": 9332000 }, { "epoch": 46.24, "learning_rate": 2.689008040903078e-05, "loss": 2.067, "step": 9332500 }, { "epoch": 46.24, "learning_rate": 2.6888841822604698e-05, "loss": 2.0859, "step": 9333000 }, { "epoch": 46.24, "learning_rate": 2.6887603236178615e-05, "loss": 2.0568, "step": 9333500 }, { "epoch": 46.24, "learning_rate": 2.6886364649752532e-05, "loss": 2.1119, "step": 9334000 }, { "epoch": 46.25, "learning_rate": 2.688512606332645e-05, "loss": 2.1081, "step": 9334500 }, { "epoch": 46.25, "learning_rate": 2.6883887476900366e-05, "loss": 2.0841, "step": 9335000 }, { "epoch": 46.25, "learning_rate": 2.6882653844819983e-05, "loss": 2.0973, "step": 9335500 }, { "epoch": 46.25, "learning_rate": 2.68814152583939e-05, "loss": 2.0851, "step": 9336000 }, { "epoch": 46.26, "learning_rate": 2.6880176671967817e-05, "loss": 2.0768, "step": 9336500 }, { "epoch": 46.26, "learning_rate": 2.6878938085541734e-05, "loss": 2.0922, "step": 9337000 }, { "epoch": 46.26, "learning_rate": 2.687769949911565e-05, "loss": 2.0774, "step": 9337500 }, { "epoch": 46.26, "learning_rate": 2.6876460912689568e-05, "loss": 2.0893, "step": 9338000 }, { "epoch": 46.27, "learning_rate": 2.687522232626348e-05, "loss": 2.0888, "step": 9338500 }, { "epoch": 46.27, "learning_rate": 2.6873983739837398e-05, "loss": 2.0824, "step": 9339000 }, { "epoch": 46.27, "learning_rate": 2.6872745153411315e-05, "loss": 2.0865, "step": 9339500 }, { "epoch": 46.27, "learning_rate": 2.6871509044158084e-05, "loss": 2.0862, "step": 9340000 }, { "epoch": 46.28, "learning_rate": 2.6870270457732e-05, "loss": 2.0971, "step": 9340500 }, { "epoch": 46.28, "learning_rate": 2.6869031871305918e-05, "loss": 2.1005, "step": 9341000 }, { "epoch": 46.28, "learning_rate": 2.6867793284879835e-05, "loss": 2.1086, "step": 9341500 }, { "epoch": 46.28, "learning_rate": 2.6866554698453748e-05, "loss": 2.0831, "step": 9342000 }, { "epoch": 46.29, "learning_rate": 2.6865316112027665e-05, "loss": 2.0873, "step": 9342500 }, { "epoch": 46.29, "learning_rate": 2.6864080002774434e-05, "loss": 2.1076, "step": 9343000 }, { "epoch": 46.29, "learning_rate": 2.6862843893521206e-05, "loss": 2.0898, "step": 9343500 }, { "epoch": 46.29, "learning_rate": 2.6861605307095123e-05, "loss": 2.0815, "step": 9344000 }, { "epoch": 46.3, "learning_rate": 2.6860366720669033e-05, "loss": 2.0707, "step": 9344500 }, { "epoch": 46.3, "learning_rate": 2.685912813424295e-05, "loss": 2.0795, "step": 9345000 }, { "epoch": 46.3, "learning_rate": 2.6857889547816867e-05, "loss": 2.1116, "step": 9345500 }, { "epoch": 46.3, "learning_rate": 2.6856650961390784e-05, "loss": 2.0923, "step": 9346000 }, { "epoch": 46.31, "learning_rate": 2.68554123749647e-05, "loss": 2.1106, "step": 9346500 }, { "epoch": 46.31, "learning_rate": 2.6854176265711473e-05, "loss": 2.0783, "step": 9347000 }, { "epoch": 46.31, "learning_rate": 2.685294015645824e-05, "loss": 2.0828, "step": 9347500 }, { "epoch": 46.31, "learning_rate": 2.6851701570032155e-05, "loss": 2.0808, "step": 9348000 }, { "epoch": 46.32, "learning_rate": 2.6850462983606072e-05, "loss": 2.1034, "step": 9348500 }, { "epoch": 46.32, "learning_rate": 2.684922439717999e-05, "loss": 2.1123, "step": 9349000 }, { "epoch": 46.32, "learning_rate": 2.6847985810753906e-05, "loss": 2.0978, "step": 9349500 }, { "epoch": 46.32, "learning_rate": 2.6846747224327823e-05, "loss": 2.0982, "step": 9350000 }, { "epoch": 46.33, "learning_rate": 2.684551111507459e-05, "loss": 2.106, "step": 9350500 }, { "epoch": 46.33, "learning_rate": 2.6844272528648505e-05, "loss": 2.1025, "step": 9351000 }, { "epoch": 46.33, "learning_rate": 2.6843033942222422e-05, "loss": 2.0889, "step": 9351500 }, { "epoch": 46.33, "learning_rate": 2.684179535579634e-05, "loss": 2.102, "step": 9352000 }, { "epoch": 46.34, "learning_rate": 2.6840556769370256e-05, "loss": 2.0955, "step": 9352500 }, { "epoch": 46.34, "learning_rate": 2.6839318182944173e-05, "loss": 2.1129, "step": 9353000 }, { "epoch": 46.34, "learning_rate": 2.683807959651809e-05, "loss": 2.105, "step": 9353500 }, { "epoch": 46.34, "learning_rate": 2.6836841010092e-05, "loss": 2.0678, "step": 9354000 }, { "epoch": 46.35, "learning_rate": 2.6835604900838772e-05, "loss": 2.0886, "step": 9354500 }, { "epoch": 46.35, "learning_rate": 2.683436631441269e-05, "loss": 2.0901, "step": 9355000 }, { "epoch": 46.35, "learning_rate": 2.6833127727986606e-05, "loss": 2.1076, "step": 9355500 }, { "epoch": 46.35, "learning_rate": 2.6831889141560523e-05, "loss": 2.0953, "step": 9356000 }, { "epoch": 46.36, "learning_rate": 2.683065055513444e-05, "loss": 2.091, "step": 9356500 }, { "epoch": 46.36, "learning_rate": 2.682941196870835e-05, "loss": 2.0814, "step": 9357000 }, { "epoch": 46.36, "learning_rate": 2.6828173382282267e-05, "loss": 2.1006, "step": 9357500 }, { "epoch": 46.36, "learning_rate": 2.6826934795856184e-05, "loss": 2.0724, "step": 9358000 }, { "epoch": 46.37, "learning_rate": 2.68256962094301e-05, "loss": 2.0876, "step": 9358500 }, { "epoch": 46.37, "learning_rate": 2.6824460100176873e-05, "loss": 2.0859, "step": 9359000 }, { "epoch": 46.37, "learning_rate": 2.682322151375079e-05, "loss": 2.0751, "step": 9359500 }, { "epoch": 46.37, "learning_rate": 2.68219829273247e-05, "loss": 2.0953, "step": 9360000 }, { "epoch": 46.38, "learning_rate": 2.6820744340898617e-05, "loss": 2.0919, "step": 9360500 }, { "epoch": 46.38, "learning_rate": 2.6819505754472534e-05, "loss": 2.0838, "step": 9361000 }, { "epoch": 46.38, "learning_rate": 2.681826716804645e-05, "loss": 2.0881, "step": 9361500 }, { "epoch": 46.38, "learning_rate": 2.6817031058793223e-05, "loss": 2.0988, "step": 9362000 }, { "epoch": 46.39, "learning_rate": 2.681579247236714e-05, "loss": 2.0833, "step": 9362500 }, { "epoch": 46.39, "learning_rate": 2.6814553885941057e-05, "loss": 2.0897, "step": 9363000 }, { "epoch": 46.39, "learning_rate": 2.6813315299514967e-05, "loss": 2.0733, "step": 9363500 }, { "epoch": 46.39, "learning_rate": 2.6812076713088884e-05, "loss": 2.0909, "step": 9364000 }, { "epoch": 46.39, "learning_rate": 2.68108381266628e-05, "loss": 2.0961, "step": 9364500 }, { "epoch": 46.4, "learning_rate": 2.6809599540236718e-05, "loss": 2.0844, "step": 9365000 }, { "epoch": 46.4, "learning_rate": 2.6808360953810635e-05, "loss": 2.07, "step": 9365500 }, { "epoch": 46.4, "learning_rate": 2.6807122367384552e-05, "loss": 2.1094, "step": 9366000 }, { "epoch": 46.4, "learning_rate": 2.680588378095847e-05, "loss": 2.0865, "step": 9366500 }, { "epoch": 46.41, "learning_rate": 2.6804647671705234e-05, "loss": 2.0918, "step": 9367000 }, { "epoch": 46.41, "learning_rate": 2.680340908527915e-05, "loss": 2.1046, "step": 9367500 }, { "epoch": 46.41, "learning_rate": 2.6802172976025923e-05, "loss": 2.0946, "step": 9368000 }, { "epoch": 46.41, "learning_rate": 2.680093438959984e-05, "loss": 2.0787, "step": 9368500 }, { "epoch": 46.42, "learning_rate": 2.6799695803173757e-05, "loss": 2.0928, "step": 9369000 }, { "epoch": 46.42, "learning_rate": 2.6798457216747667e-05, "loss": 2.0572, "step": 9369500 }, { "epoch": 46.42, "learning_rate": 2.6797218630321584e-05, "loss": 2.109, "step": 9370000 }, { "epoch": 46.42, "learning_rate": 2.67959800438955e-05, "loss": 2.087, "step": 9370500 }, { "epoch": 46.43, "learning_rate": 2.6794741457469418e-05, "loss": 2.1005, "step": 9371000 }, { "epoch": 46.43, "learning_rate": 2.6793502871043335e-05, "loss": 2.0934, "step": 9371500 }, { "epoch": 46.43, "learning_rate": 2.6792266761790107e-05, "loss": 2.1091, "step": 9372000 }, { "epoch": 46.43, "learning_rate": 2.6791028175364018e-05, "loss": 2.097, "step": 9372500 }, { "epoch": 46.44, "learning_rate": 2.6789789588937934e-05, "loss": 2.0731, "step": 9373000 }, { "epoch": 46.44, "learning_rate": 2.6788553479684707e-05, "loss": 2.0837, "step": 9373500 }, { "epoch": 46.44, "learning_rate": 2.6787314893258624e-05, "loss": 2.1254, "step": 9374000 }, { "epoch": 46.44, "learning_rate": 2.678607630683254e-05, "loss": 2.1316, "step": 9374500 }, { "epoch": 46.45, "learning_rate": 2.6784837720406457e-05, "loss": 2.1212, "step": 9375000 }, { "epoch": 46.45, "learning_rate": 2.6783599133980374e-05, "loss": 2.1155, "step": 9375500 }, { "epoch": 46.45, "learning_rate": 2.6782360547554285e-05, "loss": 2.1051, "step": 9376000 }, { "epoch": 46.45, "learning_rate": 2.67811219611282e-05, "loss": 2.0796, "step": 9376500 }, { "epoch": 46.46, "learning_rate": 2.677988337470212e-05, "loss": 2.1, "step": 9377000 }, { "epoch": 46.46, "learning_rate": 2.6778644788276035e-05, "loss": 2.1021, "step": 9377500 }, { "epoch": 46.46, "learning_rate": 2.6777406201849952e-05, "loss": 2.1097, "step": 9378000 }, { "epoch": 46.46, "learning_rate": 2.677616761542387e-05, "loss": 2.0913, "step": 9378500 }, { "epoch": 46.47, "learning_rate": 2.6774929028997786e-05, "loss": 2.0995, "step": 9379000 }, { "epoch": 46.47, "learning_rate": 2.677369291974455e-05, "loss": 2.0555, "step": 9379500 }, { "epoch": 46.47, "learning_rate": 2.677245433331847e-05, "loss": 2.09, "step": 9380000 }, { "epoch": 46.47, "learning_rate": 2.677121822406524e-05, "loss": 2.1035, "step": 9380500 }, { "epoch": 46.48, "learning_rate": 2.6769979637639158e-05, "loss": 2.1039, "step": 9381000 }, { "epoch": 46.48, "learning_rate": 2.6768743528385926e-05, "loss": 2.0788, "step": 9381500 }, { "epoch": 46.48, "learning_rate": 2.6767504941959843e-05, "loss": 2.0876, "step": 9382000 }, { "epoch": 46.48, "learning_rate": 2.676626635553376e-05, "loss": 2.0893, "step": 9382500 }, { "epoch": 46.49, "learning_rate": 2.6765027769107677e-05, "loss": 2.0861, "step": 9383000 }, { "epoch": 46.49, "learning_rate": 2.676378918268159e-05, "loss": 2.123, "step": 9383500 }, { "epoch": 46.49, "learning_rate": 2.6762550596255508e-05, "loss": 2.1178, "step": 9384000 }, { "epoch": 46.49, "learning_rate": 2.6761312009829425e-05, "loss": 2.0954, "step": 9384500 }, { "epoch": 46.5, "learning_rate": 2.6760073423403338e-05, "loss": 2.0921, "step": 9385000 }, { "epoch": 46.5, "learning_rate": 2.675883731415011e-05, "loss": 2.0645, "step": 9385500 }, { "epoch": 46.5, "learning_rate": 2.6757598727724027e-05, "loss": 2.0969, "step": 9386000 }, { "epoch": 46.5, "learning_rate": 2.6756360141297944e-05, "loss": 2.082, "step": 9386500 }, { "epoch": 46.51, "learning_rate": 2.6755121554871858e-05, "loss": 2.1139, "step": 9387000 }, { "epoch": 46.51, "learning_rate": 2.6753882968445775e-05, "loss": 2.1023, "step": 9387500 }, { "epoch": 46.51, "learning_rate": 2.675264438201969e-05, "loss": 2.0967, "step": 9388000 }, { "epoch": 46.51, "learning_rate": 2.6751405795593605e-05, "loss": 2.0738, "step": 9388500 }, { "epoch": 46.52, "learning_rate": 2.6750167209167522e-05, "loss": 2.0996, "step": 9389000 }, { "epoch": 46.52, "learning_rate": 2.6748928622741436e-05, "loss": 2.1111, "step": 9389500 }, { "epoch": 46.52, "learning_rate": 2.6747690036315352e-05, "loss": 2.0882, "step": 9390000 }, { "epoch": 46.52, "learning_rate": 2.674645144988927e-05, "loss": 2.0917, "step": 9390500 }, { "epoch": 46.53, "learning_rate": 2.674521534063604e-05, "loss": 2.0839, "step": 9391000 }, { "epoch": 46.53, "learning_rate": 2.6743976754209955e-05, "loss": 2.095, "step": 9391500 }, { "epoch": 46.53, "learning_rate": 2.6742738167783872e-05, "loss": 2.0966, "step": 9392000 }, { "epoch": 46.53, "learning_rate": 2.674149958135779e-05, "loss": 2.0927, "step": 9392500 }, { "epoch": 46.54, "learning_rate": 2.674026347210456e-05, "loss": 2.0991, "step": 9393000 }, { "epoch": 46.54, "learning_rate": 2.6739024885678475e-05, "loss": 2.1059, "step": 9393500 }, { "epoch": 46.54, "learning_rate": 2.673778629925239e-05, "loss": 2.0973, "step": 9394000 }, { "epoch": 46.54, "learning_rate": 2.6736547712826305e-05, "loss": 2.089, "step": 9394500 }, { "epoch": 46.55, "learning_rate": 2.6735311603573077e-05, "loss": 2.1064, "step": 9395000 }, { "epoch": 46.55, "learning_rate": 2.6734073017146994e-05, "loss": 2.0828, "step": 9395500 }, { "epoch": 46.55, "learning_rate": 2.673283690789376e-05, "loss": 2.1121, "step": 9396000 }, { "epoch": 46.55, "learning_rate": 2.6731598321467677e-05, "loss": 2.1214, "step": 9396500 }, { "epoch": 46.56, "learning_rate": 2.6730362212214442e-05, "loss": 2.1075, "step": 9397000 }, { "epoch": 46.56, "learning_rate": 2.672912362578836e-05, "loss": 2.1082, "step": 9397500 }, { "epoch": 46.56, "learning_rate": 2.6727885039362276e-05, "loss": 2.1056, "step": 9398000 }, { "epoch": 46.56, "learning_rate": 2.6726646452936193e-05, "loss": 2.0953, "step": 9398500 }, { "epoch": 46.57, "learning_rate": 2.6725410343682962e-05, "loss": 2.1062, "step": 9399000 }, { "epoch": 46.57, "learning_rate": 2.672417175725688e-05, "loss": 2.1149, "step": 9399500 }, { "epoch": 46.57, "learning_rate": 2.6722933170830796e-05, "loss": 2.1029, "step": 9400000 }, { "epoch": 46.57, "learning_rate": 2.672169458440471e-05, "loss": 2.1121, "step": 9400500 }, { "epoch": 46.58, "learning_rate": 2.6720455997978626e-05, "loss": 2.0777, "step": 9401000 }, { "epoch": 46.58, "learning_rate": 2.6719217411552543e-05, "loss": 2.105, "step": 9401500 }, { "epoch": 46.58, "learning_rate": 2.671797882512646e-05, "loss": 2.0772, "step": 9402000 }, { "epoch": 46.58, "learning_rate": 2.6716740238700377e-05, "loss": 2.1119, "step": 9402500 }, { "epoch": 46.59, "learning_rate": 2.6715501652274294e-05, "loss": 2.0875, "step": 9403000 }, { "epoch": 46.59, "learning_rate": 2.671426306584821e-05, "loss": 2.1036, "step": 9403500 }, { "epoch": 46.59, "learning_rate": 2.6713026956594976e-05, "loss": 2.0777, "step": 9404000 }, { "epoch": 46.59, "learning_rate": 2.6711788370168893e-05, "loss": 2.1194, "step": 9404500 }, { "epoch": 46.6, "learning_rate": 2.6710552260915662e-05, "loss": 2.0971, "step": 9405000 }, { "epoch": 46.6, "learning_rate": 2.670931367448958e-05, "loss": 2.0789, "step": 9405500 }, { "epoch": 46.6, "learning_rate": 2.6708075088063496e-05, "loss": 2.0974, "step": 9406000 }, { "epoch": 46.6, "learning_rate": 2.6706836501637413e-05, "loss": 2.0791, "step": 9406500 }, { "epoch": 46.61, "learning_rate": 2.6705597915211326e-05, "loss": 2.1133, "step": 9407000 }, { "epoch": 46.61, "learning_rate": 2.6704359328785243e-05, "loss": 2.1155, "step": 9407500 }, { "epoch": 46.61, "learning_rate": 2.670312074235916e-05, "loss": 2.1144, "step": 9408000 }, { "epoch": 46.61, "learning_rate": 2.6701882155933077e-05, "loss": 2.0776, "step": 9408500 }, { "epoch": 46.62, "learning_rate": 2.6700643569506994e-05, "loss": 2.0758, "step": 9409000 }, { "epoch": 46.62, "learning_rate": 2.669940498308091e-05, "loss": 2.0937, "step": 9409500 }, { "epoch": 46.62, "learning_rate": 2.6698166396654828e-05, "loss": 2.092, "step": 9410000 }, { "epoch": 46.62, "learning_rate": 2.6696930287401593e-05, "loss": 2.0704, "step": 9410500 }, { "epoch": 46.63, "learning_rate": 2.669569170097551e-05, "loss": 2.1175, "step": 9411000 }, { "epoch": 46.63, "learning_rate": 2.6694453114549427e-05, "loss": 2.0989, "step": 9411500 }, { "epoch": 46.63, "learning_rate": 2.6693214528123344e-05, "loss": 2.1242, "step": 9412000 }, { "epoch": 46.63, "learning_rate": 2.6691978418870113e-05, "loss": 2.0975, "step": 9412500 }, { "epoch": 46.64, "learning_rate": 2.669073983244403e-05, "loss": 2.073, "step": 9413000 }, { "epoch": 46.64, "learning_rate": 2.6689501246017947e-05, "loss": 2.1087, "step": 9413500 }, { "epoch": 46.64, "learning_rate": 2.6688265136764712e-05, "loss": 2.1272, "step": 9414000 }, { "epoch": 46.64, "learning_rate": 2.668702655033863e-05, "loss": 2.0857, "step": 9414500 }, { "epoch": 46.65, "learning_rate": 2.6685787963912546e-05, "loss": 2.1135, "step": 9415000 }, { "epoch": 46.65, "learning_rate": 2.6684551854659318e-05, "loss": 2.1059, "step": 9415500 }, { "epoch": 46.65, "learning_rate": 2.6683313268233235e-05, "loss": 2.1141, "step": 9416000 }, { "epoch": 46.65, "learning_rate": 2.6682074681807152e-05, "loss": 2.09, "step": 9416500 }, { "epoch": 46.66, "learning_rate": 2.6680836095381062e-05, "loss": 2.1028, "step": 9417000 }, { "epoch": 46.66, "learning_rate": 2.667959750895498e-05, "loss": 2.1042, "step": 9417500 }, { "epoch": 46.66, "learning_rate": 2.6678358922528896e-05, "loss": 2.0743, "step": 9418000 }, { "epoch": 46.66, "learning_rate": 2.6677122813275668e-05, "loss": 2.0926, "step": 9418500 }, { "epoch": 46.66, "learning_rate": 2.6675884226849585e-05, "loss": 2.0743, "step": 9419000 }, { "epoch": 46.67, "learning_rate": 2.6674645640423502e-05, "loss": 2.0737, "step": 9419500 }, { "epoch": 46.67, "learning_rate": 2.6673407053997412e-05, "loss": 2.1104, "step": 9420000 }, { "epoch": 46.67, "learning_rate": 2.667216846757133e-05, "loss": 2.0751, "step": 9420500 }, { "epoch": 46.67, "learning_rate": 2.6670929881145246e-05, "loss": 2.0902, "step": 9421000 }, { "epoch": 46.68, "learning_rate": 2.6669691294719163e-05, "loss": 2.0868, "step": 9421500 }, { "epoch": 46.68, "learning_rate": 2.666845270829308e-05, "loss": 2.1072, "step": 9422000 }, { "epoch": 46.68, "learning_rate": 2.6667214121866997e-05, "loss": 2.0492, "step": 9422500 }, { "epoch": 46.68, "learning_rate": 2.6665975535440914e-05, "loss": 2.1008, "step": 9423000 }, { "epoch": 46.69, "learning_rate": 2.666473694901483e-05, "loss": 2.1163, "step": 9423500 }, { "epoch": 46.69, "learning_rate": 2.6663498362588744e-05, "loss": 2.0887, "step": 9424000 }, { "epoch": 46.69, "learning_rate": 2.666225977616266e-05, "loss": 2.0883, "step": 9424500 }, { "epoch": 46.69, "learning_rate": 2.6661021189736578e-05, "loss": 2.1039, "step": 9425000 }, { "epoch": 46.7, "learning_rate": 2.6659782603310495e-05, "loss": 2.1164, "step": 9425500 }, { "epoch": 46.7, "learning_rate": 2.6658544016884412e-05, "loss": 2.0871, "step": 9426000 }, { "epoch": 46.7, "learning_rate": 2.665730543045833e-05, "loss": 2.068, "step": 9426500 }, { "epoch": 46.7, "learning_rate": 2.6656066844032246e-05, "loss": 2.0666, "step": 9427000 }, { "epoch": 46.71, "learning_rate": 2.6654828257606163e-05, "loss": 2.1008, "step": 9427500 }, { "epoch": 46.71, "learning_rate": 2.6653592148352928e-05, "loss": 2.1178, "step": 9428000 }, { "epoch": 46.71, "learning_rate": 2.6652353561926845e-05, "loss": 2.1047, "step": 9428500 }, { "epoch": 46.71, "learning_rate": 2.6651114975500762e-05, "loss": 2.1266, "step": 9429000 }, { "epoch": 46.72, "learning_rate": 2.664987638907468e-05, "loss": 2.1084, "step": 9429500 }, { "epoch": 46.72, "learning_rate": 2.6648637802648596e-05, "loss": 2.0987, "step": 9430000 }, { "epoch": 46.72, "learning_rate": 2.6647399216222513e-05, "loss": 2.0924, "step": 9430500 }, { "epoch": 46.72, "learning_rate": 2.664616062979643e-05, "loss": 2.0759, "step": 9431000 }, { "epoch": 46.73, "learning_rate": 2.6644922043370347e-05, "loss": 2.0957, "step": 9431500 }, { "epoch": 46.73, "learning_rate": 2.6643683456944257e-05, "loss": 2.1011, "step": 9432000 }, { "epoch": 46.73, "learning_rate": 2.664244982486388e-05, "loss": 2.0694, "step": 9432500 }, { "epoch": 46.73, "learning_rate": 2.6641211238437798e-05, "loss": 2.0865, "step": 9433000 }, { "epoch": 46.74, "learning_rate": 2.6639972652011715e-05, "loss": 2.0987, "step": 9433500 }, { "epoch": 46.74, "learning_rate": 2.6638734065585628e-05, "loss": 2.1082, "step": 9434000 }, { "epoch": 46.74, "learning_rate": 2.6637495479159545e-05, "loss": 2.0813, "step": 9434500 }, { "epoch": 46.74, "learning_rate": 2.6636256892733462e-05, "loss": 2.1459, "step": 9435000 }, { "epoch": 46.75, "learning_rate": 2.663501830630738e-05, "loss": 2.0945, "step": 9435500 }, { "epoch": 46.75, "learning_rate": 2.6633779719881296e-05, "loss": 2.1049, "step": 9436000 }, { "epoch": 46.75, "learning_rate": 2.6632543610628065e-05, "loss": 2.0994, "step": 9436500 }, { "epoch": 46.75, "learning_rate": 2.663130750137483e-05, "loss": 2.0973, "step": 9437000 }, { "epoch": 46.76, "learning_rate": 2.6630071392121602e-05, "loss": 2.1103, "step": 9437500 }, { "epoch": 46.76, "learning_rate": 2.662883280569552e-05, "loss": 2.098, "step": 9438000 }, { "epoch": 46.76, "learning_rate": 2.6627594219269436e-05, "loss": 2.101, "step": 9438500 }, { "epoch": 46.76, "learning_rate": 2.6626355632843346e-05, "loss": 2.0959, "step": 9439000 }, { "epoch": 46.77, "learning_rate": 2.6625117046417263e-05, "loss": 2.1045, "step": 9439500 }, { "epoch": 46.77, "learning_rate": 2.662387845999118e-05, "loss": 2.1279, "step": 9440000 }, { "epoch": 46.77, "learning_rate": 2.6622642350737952e-05, "loss": 2.0788, "step": 9440500 }, { "epoch": 46.77, "learning_rate": 2.662140376431187e-05, "loss": 2.1151, "step": 9441000 }, { "epoch": 46.78, "learning_rate": 2.6620167655058638e-05, "loss": 2.116, "step": 9441500 }, { "epoch": 46.78, "learning_rate": 2.6618929068632552e-05, "loss": 2.105, "step": 9442000 }, { "epoch": 46.78, "learning_rate": 2.661769048220647e-05, "loss": 2.0937, "step": 9442500 }, { "epoch": 46.78, "learning_rate": 2.6616451895780386e-05, "loss": 2.0929, "step": 9443000 }, { "epoch": 46.79, "learning_rate": 2.6615213309354302e-05, "loss": 2.109, "step": 9443500 }, { "epoch": 46.79, "learning_rate": 2.661397720010107e-05, "loss": 2.0791, "step": 9444000 }, { "epoch": 46.79, "learning_rate": 2.6612738613674988e-05, "loss": 2.1058, "step": 9444500 }, { "epoch": 46.79, "learning_rate": 2.6611500027248902e-05, "loss": 2.0953, "step": 9445000 }, { "epoch": 46.8, "learning_rate": 2.661026144082282e-05, "loss": 2.1108, "step": 9445500 }, { "epoch": 46.8, "learning_rate": 2.6609022854396736e-05, "loss": 2.0652, "step": 9446000 }, { "epoch": 46.8, "learning_rate": 2.6607784267970653e-05, "loss": 2.1089, "step": 9446500 }, { "epoch": 46.8, "learning_rate": 2.660654568154457e-05, "loss": 2.1061, "step": 9447000 }, { "epoch": 46.81, "learning_rate": 2.6605307095118486e-05, "loss": 2.0925, "step": 9447500 }, { "epoch": 46.81, "learning_rate": 2.6604068508692403e-05, "loss": 2.1221, "step": 9448000 }, { "epoch": 46.81, "learning_rate": 2.6602829922266313e-05, "loss": 2.1317, "step": 9448500 }, { "epoch": 46.81, "learning_rate": 2.660159133584023e-05, "loss": 2.1118, "step": 9449000 }, { "epoch": 46.82, "learning_rate": 2.6600352749414147e-05, "loss": 2.1038, "step": 9449500 }, { "epoch": 46.82, "learning_rate": 2.6599114162988064e-05, "loss": 2.1036, "step": 9450000 }, { "epoch": 46.82, "learning_rate": 2.659787557656198e-05, "loss": 2.1119, "step": 9450500 }, { "epoch": 46.82, "learning_rate": 2.6596636990135898e-05, "loss": 2.0929, "step": 9451000 }, { "epoch": 46.83, "learning_rate": 2.6595398403709815e-05, "loss": 2.1186, "step": 9451500 }, { "epoch": 46.83, "learning_rate": 2.6594159817283732e-05, "loss": 2.0754, "step": 9452000 }, { "epoch": 46.83, "learning_rate": 2.659292123085765e-05, "loss": 2.0911, "step": 9452500 }, { "epoch": 46.83, "learning_rate": 2.6591682644431566e-05, "loss": 2.0824, "step": 9453000 }, { "epoch": 46.84, "learning_rate": 2.659044653517833e-05, "loss": 2.0792, "step": 9453500 }, { "epoch": 46.84, "learning_rate": 2.6589210425925103e-05, "loss": 2.0894, "step": 9454000 }, { "epoch": 46.84, "learning_rate": 2.6587971839499014e-05, "loss": 2.1295, "step": 9454500 }, { "epoch": 46.84, "learning_rate": 2.658673325307293e-05, "loss": 2.0822, "step": 9455000 }, { "epoch": 46.85, "learning_rate": 2.6585494666646847e-05, "loss": 2.0912, "step": 9455500 }, { "epoch": 46.85, "learning_rate": 2.6584256080220764e-05, "loss": 2.0916, "step": 9456000 }, { "epoch": 46.85, "learning_rate": 2.658302244814039e-05, "loss": 2.0985, "step": 9456500 }, { "epoch": 46.85, "learning_rate": 2.6581783861714305e-05, "loss": 2.1411, "step": 9457000 }, { "epoch": 46.86, "learning_rate": 2.6580545275288222e-05, "loss": 2.1014, "step": 9457500 }, { "epoch": 46.86, "learning_rate": 2.657930668886214e-05, "loss": 2.1093, "step": 9458000 }, { "epoch": 46.86, "learning_rate": 2.6578068102436056e-05, "loss": 2.1106, "step": 9458500 }, { "epoch": 46.86, "learning_rate": 2.657682951600997e-05, "loss": 2.0756, "step": 9459000 }, { "epoch": 46.87, "learning_rate": 2.6575590929583887e-05, "loss": 2.1015, "step": 9459500 }, { "epoch": 46.87, "learning_rate": 2.6574352343157804e-05, "loss": 2.1034, "step": 9460000 }, { "epoch": 46.87, "learning_rate": 2.657311375673172e-05, "loss": 2.1107, "step": 9460500 }, { "epoch": 46.87, "learning_rate": 2.657187517030563e-05, "loss": 2.1167, "step": 9461000 }, { "epoch": 46.88, "learning_rate": 2.6570636583879548e-05, "loss": 2.0894, "step": 9461500 }, { "epoch": 46.88, "learning_rate": 2.6569397997453465e-05, "loss": 2.1148, "step": 9462000 }, { "epoch": 46.88, "learning_rate": 2.656815941102738e-05, "loss": 2.1067, "step": 9462500 }, { "epoch": 46.88, "learning_rate": 2.65669208246013e-05, "loss": 2.1044, "step": 9463000 }, { "epoch": 46.89, "learning_rate": 2.6565682238175215e-05, "loss": 2.108, "step": 9463500 }, { "epoch": 46.89, "learning_rate": 2.6564443651749132e-05, "loss": 2.1134, "step": 9464000 }, { "epoch": 46.89, "learning_rate": 2.656320506532305e-05, "loss": 2.0791, "step": 9464500 }, { "epoch": 46.89, "learning_rate": 2.6561968956069815e-05, "loss": 2.0893, "step": 9465000 }, { "epoch": 46.9, "learning_rate": 2.656073036964373e-05, "loss": 2.1128, "step": 9465500 }, { "epoch": 46.9, "learning_rate": 2.655949178321765e-05, "loss": 2.1039, "step": 9466000 }, { "epoch": 46.9, "learning_rate": 2.6558253196791565e-05, "loss": 2.0977, "step": 9466500 }, { "epoch": 46.9, "learning_rate": 2.6557017087538334e-05, "loss": 2.107, "step": 9467000 }, { "epoch": 46.91, "learning_rate": 2.6555780978285106e-05, "loss": 2.119, "step": 9467500 }, { "epoch": 46.91, "learning_rate": 2.6554544869031872e-05, "loss": 2.0959, "step": 9468000 }, { "epoch": 46.91, "learning_rate": 2.655330628260579e-05, "loss": 2.0897, "step": 9468500 }, { "epoch": 46.91, "learning_rate": 2.6552067696179706e-05, "loss": 2.0966, "step": 9469000 }, { "epoch": 46.92, "learning_rate": 2.6550829109753623e-05, "loss": 2.1112, "step": 9469500 }, { "epoch": 46.92, "learning_rate": 2.654959052332754e-05, "loss": 2.1053, "step": 9470000 }, { "epoch": 46.92, "learning_rate": 2.6548351936901456e-05, "loss": 2.0948, "step": 9470500 }, { "epoch": 46.92, "learning_rate": 2.6547113350475373e-05, "loss": 2.1057, "step": 9471000 }, { "epoch": 46.93, "learning_rate": 2.654587476404929e-05, "loss": 2.0805, "step": 9471500 }, { "epoch": 46.93, "learning_rate": 2.6544638654796056e-05, "loss": 2.106, "step": 9472000 }, { "epoch": 46.93, "learning_rate": 2.6543400068369973e-05, "loss": 2.1013, "step": 9472500 }, { "epoch": 46.93, "learning_rate": 2.654216148194389e-05, "loss": 2.1063, "step": 9473000 }, { "epoch": 46.93, "learning_rate": 2.6540922895517806e-05, "loss": 2.1082, "step": 9473500 }, { "epoch": 46.94, "learning_rate": 2.6539684309091723e-05, "loss": 2.0811, "step": 9474000 }, { "epoch": 46.94, "learning_rate": 2.653844572266564e-05, "loss": 2.0941, "step": 9474500 }, { "epoch": 46.94, "learning_rate": 2.6537209613412406e-05, "loss": 2.1153, "step": 9475000 }, { "epoch": 46.94, "learning_rate": 2.6535971026986323e-05, "loss": 2.1102, "step": 9475500 }, { "epoch": 46.95, "learning_rate": 2.653473244056024e-05, "loss": 2.0982, "step": 9476000 }, { "epoch": 46.95, "learning_rate": 2.6533493854134157e-05, "loss": 2.1253, "step": 9476500 }, { "epoch": 46.95, "learning_rate": 2.6532255267708073e-05, "loss": 2.093, "step": 9477000 }, { "epoch": 46.95, "learning_rate": 2.653101668128199e-05, "loss": 2.0911, "step": 9477500 }, { "epoch": 46.96, "learning_rate": 2.6529783049201608e-05, "loss": 2.1029, "step": 9478000 }, { "epoch": 46.96, "learning_rate": 2.6528544462775525e-05, "loss": 2.1197, "step": 9478500 }, { "epoch": 46.96, "learning_rate": 2.6527305876349438e-05, "loss": 2.0959, "step": 9479000 }, { "epoch": 46.96, "learning_rate": 2.6526067289923355e-05, "loss": 2.0986, "step": 9479500 }, { "epoch": 46.97, "learning_rate": 2.6524831180670127e-05, "loss": 2.0954, "step": 9480000 }, { "epoch": 46.97, "learning_rate": 2.652359259424404e-05, "loss": 2.0992, "step": 9480500 }, { "epoch": 46.97, "learning_rate": 2.6522354007817958e-05, "loss": 2.0942, "step": 9481000 }, { "epoch": 46.97, "learning_rate": 2.6521115421391875e-05, "loss": 2.1055, "step": 9481500 }, { "epoch": 46.98, "learning_rate": 2.651987683496579e-05, "loss": 2.0912, "step": 9482000 }, { "epoch": 46.98, "learning_rate": 2.6518638248539705e-05, "loss": 2.0838, "step": 9482500 }, { "epoch": 46.98, "learning_rate": 2.6517399662113622e-05, "loss": 2.1286, "step": 9483000 }, { "epoch": 46.98, "learning_rate": 2.651616107568754e-05, "loss": 2.0881, "step": 9483500 }, { "epoch": 46.99, "learning_rate": 2.6514922489261456e-05, "loss": 2.0753, "step": 9484000 }, { "epoch": 46.99, "learning_rate": 2.6513683902835373e-05, "loss": 2.1228, "step": 9484500 }, { "epoch": 46.99, "learning_rate": 2.6512447793582142e-05, "loss": 2.1091, "step": 9485000 }, { "epoch": 46.99, "learning_rate": 2.6511211684328914e-05, "loss": 2.0884, "step": 9485500 }, { "epoch": 47.0, "learning_rate": 2.650997309790283e-05, "loss": 2.0951, "step": 9486000 }, { "epoch": 47.0, "learning_rate": 2.650873451147674e-05, "loss": 2.1128, "step": 9486500 }, { "epoch": 47.0, "eval_accuracy": 0.6682178153245157, "eval_accuracy_mlm": 0.6263053472215288, "eval_accuracy_nsp": 0.8657470416812115, "eval_loss": 2.2959861755371094, "eval_runtime": 147.171, "eval_samples_per_second": 1732.399, "eval_steps_per_second": 72.188, "step": 9486621 }, { "epoch": 47.0, "learning_rate": 2.6507495925050658e-05, "loss": 2.0818, "step": 9487000 }, { "epoch": 47.0, "learning_rate": 2.6506257338624575e-05, "loss": 2.0772, "step": 9487500 }, { "epoch": 47.01, "learning_rate": 2.6505018752198492e-05, "loss": 2.0896, "step": 9488000 }, { "epoch": 47.01, "learning_rate": 2.650378016577241e-05, "loss": 2.064, "step": 9488500 }, { "epoch": 47.01, "learning_rate": 2.6502541579346322e-05, "loss": 2.0525, "step": 9489000 }, { "epoch": 47.01, "learning_rate": 2.650130299292024e-05, "loss": 2.0796, "step": 9489500 }, { "epoch": 47.02, "learning_rate": 2.6500064406494156e-05, "loss": 2.0768, "step": 9490000 }, { "epoch": 47.02, "learning_rate": 2.6498825820068073e-05, "loss": 2.0826, "step": 9490500 }, { "epoch": 47.02, "learning_rate": 2.649758723364199e-05, "loss": 2.0742, "step": 9491000 }, { "epoch": 47.02, "learning_rate": 2.6496348647215907e-05, "loss": 2.0665, "step": 9491500 }, { "epoch": 47.03, "learning_rate": 2.6495110060789824e-05, "loss": 2.0668, "step": 9492000 }, { "epoch": 47.03, "learning_rate": 2.649387147436374e-05, "loss": 2.0683, "step": 9492500 }, { "epoch": 47.03, "learning_rate": 2.6492632887937658e-05, "loss": 2.0558, "step": 9493000 }, { "epoch": 47.03, "learning_rate": 2.6491394301511575e-05, "loss": 2.057, "step": 9493500 }, { "epoch": 47.04, "learning_rate": 2.6490160669431192e-05, "loss": 2.0854, "step": 9494000 }, { "epoch": 47.04, "learning_rate": 2.648892208300511e-05, "loss": 2.0894, "step": 9494500 }, { "epoch": 47.04, "learning_rate": 2.6487683496579026e-05, "loss": 2.0619, "step": 9495000 }, { "epoch": 47.04, "learning_rate": 2.6486444910152943e-05, "loss": 2.0842, "step": 9495500 }, { "epoch": 47.05, "learning_rate": 2.6485206323726856e-05, "loss": 2.0813, "step": 9496000 }, { "epoch": 47.05, "learning_rate": 2.648397269164648e-05, "loss": 2.0599, "step": 9496500 }, { "epoch": 47.05, "learning_rate": 2.6482734105220397e-05, "loss": 2.06, "step": 9497000 }, { "epoch": 47.05, "learning_rate": 2.6481495518794314e-05, "loss": 2.0724, "step": 9497500 }, { "epoch": 47.06, "learning_rate": 2.648025693236823e-05, "loss": 2.0587, "step": 9498000 }, { "epoch": 47.06, "learning_rate": 2.6479020823114996e-05, "loss": 2.0977, "step": 9498500 }, { "epoch": 47.06, "learning_rate": 2.6477782236688913e-05, "loss": 2.1028, "step": 9499000 }, { "epoch": 47.06, "learning_rate": 2.647654365026283e-05, "loss": 2.0789, "step": 9499500 }, { "epoch": 47.07, "learning_rate": 2.6475305063836747e-05, "loss": 2.0815, "step": 9500000 }, { "epoch": 47.07, "learning_rate": 2.6474066477410664e-05, "loss": 2.0915, "step": 9500500 }, { "epoch": 47.07, "learning_rate": 2.647282789098458e-05, "loss": 2.0862, "step": 9501000 }, { "epoch": 47.07, "learning_rate": 2.6471589304558498e-05, "loss": 2.0731, "step": 9501500 }, { "epoch": 47.08, "learning_rate": 2.6470350718132408e-05, "loss": 2.0569, "step": 9502000 }, { "epoch": 47.08, "learning_rate": 2.6469112131706325e-05, "loss": 2.0773, "step": 9502500 }, { "epoch": 47.08, "learning_rate": 2.6467873545280242e-05, "loss": 2.0996, "step": 9503000 }, { "epoch": 47.08, "learning_rate": 2.646663495885416e-05, "loss": 2.0935, "step": 9503500 }, { "epoch": 47.09, "learning_rate": 2.646539884960093e-05, "loss": 2.0582, "step": 9504000 }, { "epoch": 47.09, "learning_rate": 2.6464160263174848e-05, "loss": 2.0739, "step": 9504500 }, { "epoch": 47.09, "learning_rate": 2.6462921676748765e-05, "loss": 2.0604, "step": 9505000 }, { "epoch": 47.09, "learning_rate": 2.6461683090322675e-05, "loss": 2.0967, "step": 9505500 }, { "epoch": 47.1, "learning_rate": 2.6460444503896592e-05, "loss": 2.0783, "step": 9506000 }, { "epoch": 47.1, "learning_rate": 2.6459208394643364e-05, "loss": 2.0958, "step": 9506500 }, { "epoch": 47.1, "learning_rate": 2.645796980821728e-05, "loss": 2.0805, "step": 9507000 }, { "epoch": 47.1, "learning_rate": 2.6456731221791198e-05, "loss": 2.0637, "step": 9507500 }, { "epoch": 47.11, "learning_rate": 2.6455492635365115e-05, "loss": 2.0681, "step": 9508000 }, { "epoch": 47.11, "learning_rate": 2.6454254048939025e-05, "loss": 2.0444, "step": 9508500 }, { "epoch": 47.11, "learning_rate": 2.6453015462512942e-05, "loss": 2.0347, "step": 9509000 }, { "epoch": 47.11, "learning_rate": 2.645177687608686e-05, "loss": 2.0789, "step": 9509500 }, { "epoch": 47.12, "learning_rate": 2.6450538289660776e-05, "loss": 2.0895, "step": 9510000 }, { "epoch": 47.12, "learning_rate": 2.6449299703234693e-05, "loss": 2.0676, "step": 9510500 }, { "epoch": 47.12, "learning_rate": 2.6448063593981465e-05, "loss": 2.0669, "step": 9511000 }, { "epoch": 47.12, "learning_rate": 2.6446825007555375e-05, "loss": 2.0646, "step": 9511500 }, { "epoch": 47.13, "learning_rate": 2.6445588898302148e-05, "loss": 2.0734, "step": 9512000 }, { "epoch": 47.13, "learning_rate": 2.6444350311876064e-05, "loss": 2.0987, "step": 9512500 }, { "epoch": 47.13, "learning_rate": 2.644311172544998e-05, "loss": 2.1039, "step": 9513000 }, { "epoch": 47.13, "learning_rate": 2.64418731390239e-05, "loss": 2.0675, "step": 9513500 }, { "epoch": 47.14, "learning_rate": 2.6440637029770664e-05, "loss": 2.0652, "step": 9514000 }, { "epoch": 47.14, "learning_rate": 2.6439400920517433e-05, "loss": 2.0567, "step": 9514500 }, { "epoch": 47.14, "learning_rate": 2.643816233409135e-05, "loss": 2.1097, "step": 9515000 }, { "epoch": 47.14, "learning_rate": 2.6436923747665266e-05, "loss": 2.0692, "step": 9515500 }, { "epoch": 47.15, "learning_rate": 2.6435685161239183e-05, "loss": 2.0702, "step": 9516000 }, { "epoch": 47.15, "learning_rate": 2.64344465748131e-05, "loss": 2.1093, "step": 9516500 }, { "epoch": 47.15, "learning_rate": 2.6433207988387014e-05, "loss": 2.081, "step": 9517000 }, { "epoch": 47.15, "learning_rate": 2.643196940196093e-05, "loss": 2.0738, "step": 9517500 }, { "epoch": 47.16, "learning_rate": 2.6430730815534848e-05, "loss": 2.0752, "step": 9518000 }, { "epoch": 47.16, "learning_rate": 2.6429492229108765e-05, "loss": 2.0744, "step": 9518500 }, { "epoch": 47.16, "learning_rate": 2.642825364268268e-05, "loss": 2.0638, "step": 9519000 }, { "epoch": 47.16, "learning_rate": 2.64270150562566e-05, "loss": 2.0725, "step": 9519500 }, { "epoch": 47.17, "learning_rate": 2.6425776469830515e-05, "loss": 2.0699, "step": 9520000 }, { "epoch": 47.17, "learning_rate": 2.6424537883404432e-05, "loss": 2.0974, "step": 9520500 }, { "epoch": 47.17, "learning_rate": 2.6423299296978342e-05, "loss": 2.0735, "step": 9521000 }, { "epoch": 47.17, "learning_rate": 2.642206071055226e-05, "loss": 2.0838, "step": 9521500 }, { "epoch": 47.18, "learning_rate": 2.6420822124126176e-05, "loss": 2.0751, "step": 9522000 }, { "epoch": 47.18, "learning_rate": 2.6419583537700093e-05, "loss": 2.0976, "step": 9522500 }, { "epoch": 47.18, "learning_rate": 2.641834495127401e-05, "loss": 2.0811, "step": 9523000 }, { "epoch": 47.18, "learning_rate": 2.6417106364847927e-05, "loss": 2.0705, "step": 9523500 }, { "epoch": 47.19, "learning_rate": 2.6415867778421844e-05, "loss": 2.0931, "step": 9524000 }, { "epoch": 47.19, "learning_rate": 2.641463166916861e-05, "loss": 2.0692, "step": 9524500 }, { "epoch": 47.19, "learning_rate": 2.641339555991538e-05, "loss": 2.105, "step": 9525000 }, { "epoch": 47.19, "learning_rate": 2.64121569734893e-05, "loss": 2.0932, "step": 9525500 }, { "epoch": 47.2, "learning_rate": 2.6410918387063215e-05, "loss": 2.0735, "step": 9526000 }, { "epoch": 47.2, "learning_rate": 2.6409679800637132e-05, "loss": 2.082, "step": 9526500 }, { "epoch": 47.2, "learning_rate": 2.6408441214211043e-05, "loss": 2.0956, "step": 9527000 }, { "epoch": 47.2, "learning_rate": 2.640720262778496e-05, "loss": 2.0705, "step": 9527500 }, { "epoch": 47.21, "learning_rate": 2.6405964041358876e-05, "loss": 2.0383, "step": 9528000 }, { "epoch": 47.21, "learning_rate": 2.64047304092785e-05, "loss": 2.0647, "step": 9528500 }, { "epoch": 47.21, "learning_rate": 2.6403491822852417e-05, "loss": 2.0862, "step": 9529000 }, { "epoch": 47.21, "learning_rate": 2.6402253236426334e-05, "loss": 2.0937, "step": 9529500 }, { "epoch": 47.21, "learning_rate": 2.640101465000025e-05, "loss": 2.0704, "step": 9530000 }, { "epoch": 47.22, "learning_rate": 2.6399778540747017e-05, "loss": 2.0951, "step": 9530500 }, { "epoch": 47.22, "learning_rate": 2.6398539954320934e-05, "loss": 2.0666, "step": 9531000 }, { "epoch": 47.22, "learning_rate": 2.639730136789485e-05, "loss": 2.091, "step": 9531500 }, { "epoch": 47.22, "learning_rate": 2.6396062781468767e-05, "loss": 2.1027, "step": 9532000 }, { "epoch": 47.23, "learning_rate": 2.6394824195042684e-05, "loss": 2.0909, "step": 9532500 }, { "epoch": 47.23, "learning_rate": 2.63935856086166e-05, "loss": 2.0938, "step": 9533000 }, { "epoch": 47.23, "learning_rate": 2.6392347022190518e-05, "loss": 2.0987, "step": 9533500 }, { "epoch": 47.23, "learning_rate": 2.6391108435764432e-05, "loss": 2.0936, "step": 9534000 }, { "epoch": 47.24, "learning_rate": 2.638986984933835e-05, "loss": 2.093, "step": 9534500 }, { "epoch": 47.24, "learning_rate": 2.6388631262912266e-05, "loss": 2.0692, "step": 9535000 }, { "epoch": 47.24, "learning_rate": 2.6387392676486183e-05, "loss": 2.0897, "step": 9535500 }, { "epoch": 47.24, "learning_rate": 2.63861540900601e-05, "loss": 2.0929, "step": 9536000 }, { "epoch": 47.25, "learning_rate": 2.638491798080687e-05, "loss": 2.0803, "step": 9536500 }, { "epoch": 47.25, "learning_rate": 2.6383679394380785e-05, "loss": 2.0626, "step": 9537000 }, { "epoch": 47.25, "learning_rate": 2.63824408079547e-05, "loss": 2.0888, "step": 9537500 }, { "epoch": 47.25, "learning_rate": 2.6381202221528616e-05, "loss": 2.0969, "step": 9538000 }, { "epoch": 47.26, "learning_rate": 2.6379963635102533e-05, "loss": 2.0555, "step": 9538500 }, { "epoch": 47.26, "learning_rate": 2.637872504867645e-05, "loss": 2.0908, "step": 9539000 }, { "epoch": 47.26, "learning_rate": 2.637748893942322e-05, "loss": 2.0892, "step": 9539500 }, { "epoch": 47.26, "learning_rate": 2.6376252830169984e-05, "loss": 2.0903, "step": 9540000 }, { "epoch": 47.27, "learning_rate": 2.63750142437439e-05, "loss": 2.0804, "step": 9540500 }, { "epoch": 47.27, "learning_rate": 2.6373775657317818e-05, "loss": 2.1027, "step": 9541000 }, { "epoch": 47.27, "learning_rate": 2.6372537070891735e-05, "loss": 2.0477, "step": 9541500 }, { "epoch": 47.27, "learning_rate": 2.637129848446565e-05, "loss": 2.0865, "step": 9542000 }, { "epoch": 47.28, "learning_rate": 2.6370062375212417e-05, "loss": 2.1057, "step": 9542500 }, { "epoch": 47.28, "learning_rate": 2.6368823788786334e-05, "loss": 2.1042, "step": 9543000 }, { "epoch": 47.28, "learning_rate": 2.636758520236025e-05, "loss": 2.0726, "step": 9543500 }, { "epoch": 47.28, "learning_rate": 2.6366346615934168e-05, "loss": 2.103, "step": 9544000 }, { "epoch": 47.29, "learning_rate": 2.6365108029508085e-05, "loss": 2.0779, "step": 9544500 }, { "epoch": 47.29, "learning_rate": 2.6363869443082e-05, "loss": 2.09, "step": 9545000 }, { "epoch": 47.29, "learning_rate": 2.636263085665592e-05, "loss": 2.0811, "step": 9545500 }, { "epoch": 47.29, "learning_rate": 2.6361392270229835e-05, "loss": 2.0962, "step": 9546000 }, { "epoch": 47.3, "learning_rate": 2.63601561609766e-05, "loss": 2.0768, "step": 9546500 }, { "epoch": 47.3, "learning_rate": 2.6358917574550518e-05, "loss": 2.0615, "step": 9547000 }, { "epoch": 47.3, "learning_rate": 2.6357678988124435e-05, "loss": 2.0817, "step": 9547500 }, { "epoch": 47.3, "learning_rate": 2.635644040169835e-05, "loss": 2.0715, "step": 9548000 }, { "epoch": 47.31, "learning_rate": 2.635520181527227e-05, "loss": 2.1006, "step": 9548500 }, { "epoch": 47.31, "learning_rate": 2.6353963228846186e-05, "loss": 2.0684, "step": 9549000 }, { "epoch": 47.31, "learning_rate": 2.6352724642420102e-05, "loss": 2.1017, "step": 9549500 }, { "epoch": 47.31, "learning_rate": 2.635148605599402e-05, "loss": 2.0915, "step": 9550000 }, { "epoch": 47.32, "learning_rate": 2.6350247469567936e-05, "loss": 2.0931, "step": 9550500 }, { "epoch": 47.32, "learning_rate": 2.6349011360314702e-05, "loss": 2.0893, "step": 9551000 }, { "epoch": 47.32, "learning_rate": 2.6347775251061467e-05, "loss": 2.0938, "step": 9551500 }, { "epoch": 47.32, "learning_rate": 2.6346536664635384e-05, "loss": 2.0558, "step": 9552000 }, { "epoch": 47.33, "learning_rate": 2.63452980782093e-05, "loss": 2.0938, "step": 9552500 }, { "epoch": 47.33, "learning_rate": 2.6344059491783218e-05, "loss": 2.0621, "step": 9553000 }, { "epoch": 47.33, "learning_rate": 2.6342820905357135e-05, "loss": 2.0651, "step": 9553500 }, { "epoch": 47.33, "learning_rate": 2.6341582318931052e-05, "loss": 2.0823, "step": 9554000 }, { "epoch": 47.34, "learning_rate": 2.634034373250497e-05, "loss": 2.058, "step": 9554500 }, { "epoch": 47.34, "learning_rate": 2.6339105146078886e-05, "loss": 2.0735, "step": 9555000 }, { "epoch": 47.34, "learning_rate": 2.6337866559652803e-05, "loss": 2.097, "step": 9555500 }, { "epoch": 47.34, "learning_rate": 2.6336630450399568e-05, "loss": 2.0802, "step": 9556000 }, { "epoch": 47.35, "learning_rate": 2.6335391863973485e-05, "loss": 2.0965, "step": 9556500 }, { "epoch": 47.35, "learning_rate": 2.6334153277547402e-05, "loss": 2.0823, "step": 9557000 }, { "epoch": 47.35, "learning_rate": 2.633291469112132e-05, "loss": 2.1153, "step": 9557500 }, { "epoch": 47.35, "learning_rate": 2.6331676104695236e-05, "loss": 2.0942, "step": 9558000 }, { "epoch": 47.36, "learning_rate": 2.6330437518269153e-05, "loss": 2.0921, "step": 9558500 }, { "epoch": 47.36, "learning_rate": 2.632919893184307e-05, "loss": 2.0769, "step": 9559000 }, { "epoch": 47.36, "learning_rate": 2.6327960345416986e-05, "loss": 2.0907, "step": 9559500 }, { "epoch": 47.36, "learning_rate": 2.6326721758990903e-05, "loss": 2.0946, "step": 9560000 }, { "epoch": 47.37, "learning_rate": 2.632548317256482e-05, "loss": 2.0753, "step": 9560500 }, { "epoch": 47.37, "learning_rate": 2.6324244586138734e-05, "loss": 2.0827, "step": 9561000 }, { "epoch": 47.37, "learning_rate": 2.6323005999712647e-05, "loss": 2.0757, "step": 9561500 }, { "epoch": 47.37, "learning_rate": 2.632176989045942e-05, "loss": 2.1135, "step": 9562000 }, { "epoch": 47.38, "learning_rate": 2.6320531304033337e-05, "loss": 2.0821, "step": 9562500 }, { "epoch": 47.38, "learning_rate": 2.6319295194780102e-05, "loss": 2.1001, "step": 9563000 }, { "epoch": 47.38, "learning_rate": 2.631805660835402e-05, "loss": 2.0693, "step": 9563500 }, { "epoch": 47.38, "learning_rate": 2.6316818021927936e-05, "loss": 2.0575, "step": 9564000 }, { "epoch": 47.39, "learning_rate": 2.6315579435501853e-05, "loss": 2.0961, "step": 9564500 }, { "epoch": 47.39, "learning_rate": 2.631434084907577e-05, "loss": 2.0965, "step": 9565000 }, { "epoch": 47.39, "learning_rate": 2.6313102262649687e-05, "loss": 2.0766, "step": 9565500 }, { "epoch": 47.39, "learning_rate": 2.6311868630569304e-05, "loss": 2.1241, "step": 9566000 }, { "epoch": 47.4, "learning_rate": 2.631063004414322e-05, "loss": 2.0787, "step": 9566500 }, { "epoch": 47.4, "learning_rate": 2.6309391457717138e-05, "loss": 2.106, "step": 9567000 }, { "epoch": 47.4, "learning_rate": 2.630815287129105e-05, "loss": 2.0678, "step": 9567500 }, { "epoch": 47.4, "learning_rate": 2.6306914284864968e-05, "loss": 2.0799, "step": 9568000 }, { "epoch": 47.41, "learning_rate": 2.6305675698438885e-05, "loss": 2.0819, "step": 9568500 }, { "epoch": 47.41, "learning_rate": 2.6304437112012802e-05, "loss": 2.0855, "step": 9569000 }, { "epoch": 47.41, "learning_rate": 2.630320100275957e-05, "loss": 2.0887, "step": 9569500 }, { "epoch": 47.41, "learning_rate": 2.6301962416333488e-05, "loss": 2.0786, "step": 9570000 }, { "epoch": 47.42, "learning_rate": 2.6300723829907405e-05, "loss": 2.062, "step": 9570500 }, { "epoch": 47.42, "learning_rate": 2.6299485243481318e-05, "loss": 2.1086, "step": 9571000 }, { "epoch": 47.42, "learning_rate": 2.6298246657055235e-05, "loss": 2.072, "step": 9571500 }, { "epoch": 47.42, "learning_rate": 2.6297008070629152e-05, "loss": 2.1013, "step": 9572000 }, { "epoch": 47.43, "learning_rate": 2.629577196137592e-05, "loss": 2.0859, "step": 9572500 }, { "epoch": 47.43, "learning_rate": 2.6294535852122693e-05, "loss": 2.0849, "step": 9573000 }, { "epoch": 47.43, "learning_rate": 2.629329726569661e-05, "loss": 2.0804, "step": 9573500 }, { "epoch": 47.43, "learning_rate": 2.6292058679270527e-05, "loss": 2.0846, "step": 9574000 }, { "epoch": 47.44, "learning_rate": 2.6290820092844437e-05, "loss": 2.0756, "step": 9574500 }, { "epoch": 47.44, "learning_rate": 2.6289581506418354e-05, "loss": 2.0845, "step": 9575000 }, { "epoch": 47.44, "learning_rate": 2.628834291999227e-05, "loss": 2.0745, "step": 9575500 }, { "epoch": 47.44, "learning_rate": 2.6287104333566188e-05, "loss": 2.0886, "step": 9576000 }, { "epoch": 47.45, "learning_rate": 2.628586822431296e-05, "loss": 2.0817, "step": 9576500 }, { "epoch": 47.45, "learning_rate": 2.6284629637886877e-05, "loss": 2.0866, "step": 9577000 }, { "epoch": 47.45, "learning_rate": 2.6283391051460794e-05, "loss": 2.0683, "step": 9577500 }, { "epoch": 47.45, "learning_rate": 2.6282152465034704e-05, "loss": 2.1014, "step": 9578000 }, { "epoch": 47.46, "learning_rate": 2.628091387860862e-05, "loss": 2.0702, "step": 9578500 }, { "epoch": 47.46, "learning_rate": 2.6279675292182538e-05, "loss": 2.1125, "step": 9579000 }, { "epoch": 47.46, "learning_rate": 2.6278436705756455e-05, "loss": 2.0877, "step": 9579500 }, { "epoch": 47.46, "learning_rate": 2.6277198119330372e-05, "loss": 2.103, "step": 9580000 }, { "epoch": 47.47, "learning_rate": 2.627595953290429e-05, "loss": 2.1125, "step": 9580500 }, { "epoch": 47.47, "learning_rate": 2.6274723423651054e-05, "loss": 2.0805, "step": 9581000 }, { "epoch": 47.47, "learning_rate": 2.627348483722497e-05, "loss": 2.0801, "step": 9581500 }, { "epoch": 47.47, "learning_rate": 2.6272246250798888e-05, "loss": 2.1056, "step": 9582000 }, { "epoch": 47.48, "learning_rate": 2.6271007664372805e-05, "loss": 2.1019, "step": 9582500 }, { "epoch": 47.48, "learning_rate": 2.6269769077946722e-05, "loss": 2.0863, "step": 9583000 }, { "epoch": 47.48, "learning_rate": 2.6268532968693494e-05, "loss": 2.1016, "step": 9583500 }, { "epoch": 47.48, "learning_rate": 2.6267294382267404e-05, "loss": 2.0828, "step": 9584000 }, { "epoch": 47.48, "learning_rate": 2.626605579584132e-05, "loss": 2.0838, "step": 9584500 }, { "epoch": 47.49, "learning_rate": 2.6264817209415238e-05, "loss": 2.1077, "step": 9585000 }, { "epoch": 47.49, "learning_rate": 2.6263578622989155e-05, "loss": 2.1047, "step": 9585500 }, { "epoch": 47.49, "learning_rate": 2.6262340036563072e-05, "loss": 2.0933, "step": 9586000 }, { "epoch": 47.49, "learning_rate": 2.626110145013699e-05, "loss": 2.0829, "step": 9586500 }, { "epoch": 47.5, "learning_rate": 2.6259862863710906e-05, "loss": 2.0823, "step": 9587000 }, { "epoch": 47.5, "learning_rate": 2.6258624277284823e-05, "loss": 2.0993, "step": 9587500 }, { "epoch": 47.5, "learning_rate": 2.6257385690858736e-05, "loss": 2.085, "step": 9588000 }, { "epoch": 47.5, "learning_rate": 2.6256147104432653e-05, "loss": 2.1073, "step": 9588500 }, { "epoch": 47.51, "learning_rate": 2.625490851800657e-05, "loss": 2.1112, "step": 9589000 }, { "epoch": 47.51, "learning_rate": 2.6253669931580487e-05, "loss": 2.1056, "step": 9589500 }, { "epoch": 47.51, "learning_rate": 2.6252431345154404e-05, "loss": 2.0759, "step": 9590000 }, { "epoch": 47.51, "learning_rate": 2.6251195235901173e-05, "loss": 2.0654, "step": 9590500 }, { "epoch": 47.52, "learning_rate": 2.624995664947509e-05, "loss": 2.1054, "step": 9591000 }, { "epoch": 47.52, "learning_rate": 2.6248718063049003e-05, "loss": 2.102, "step": 9591500 }, { "epoch": 47.52, "learning_rate": 2.6247481953795772e-05, "loss": 2.0845, "step": 9592000 }, { "epoch": 47.52, "learning_rate": 2.6246245844542544e-05, "loss": 2.0844, "step": 9592500 }, { "epoch": 47.53, "learning_rate": 2.624501221246216e-05, "loss": 2.0915, "step": 9593000 }, { "epoch": 47.53, "learning_rate": 2.624377362603608e-05, "loss": 2.0704, "step": 9593500 }, { "epoch": 47.53, "learning_rate": 2.6242535039609995e-05, "loss": 2.0917, "step": 9594000 }, { "epoch": 47.53, "learning_rate": 2.6241296453183912e-05, "loss": 2.0889, "step": 9594500 }, { "epoch": 47.54, "learning_rate": 2.624005786675783e-05, "loss": 2.0934, "step": 9595000 }, { "epoch": 47.54, "learning_rate": 2.6238821757504595e-05, "loss": 2.0908, "step": 9595500 }, { "epoch": 47.54, "learning_rate": 2.623758317107851e-05, "loss": 2.1025, "step": 9596000 }, { "epoch": 47.54, "learning_rate": 2.6236347061825284e-05, "loss": 2.0926, "step": 9596500 }, { "epoch": 47.55, "learning_rate": 2.6235108475399194e-05, "loss": 2.1017, "step": 9597000 }, { "epoch": 47.55, "learning_rate": 2.623386988897311e-05, "loss": 2.076, "step": 9597500 }, { "epoch": 47.55, "learning_rate": 2.6232631302547028e-05, "loss": 2.089, "step": 9598000 }, { "epoch": 47.55, "learning_rate": 2.6231392716120945e-05, "loss": 2.0719, "step": 9598500 }, { "epoch": 47.56, "learning_rate": 2.6230154129694862e-05, "loss": 2.0867, "step": 9599000 }, { "epoch": 47.56, "learning_rate": 2.622891554326878e-05, "loss": 2.0626, "step": 9599500 }, { "epoch": 47.56, "learning_rate": 2.6227676956842696e-05, "loss": 2.0884, "step": 9600000 }, { "epoch": 47.56, "learning_rate": 2.6226438370416613e-05, "loss": 2.0938, "step": 9600500 }, { "epoch": 47.57, "learning_rate": 2.6225202261163378e-05, "loss": 2.1142, "step": 9601000 }, { "epoch": 47.57, "learning_rate": 2.6223963674737295e-05, "loss": 2.0865, "step": 9601500 }, { "epoch": 47.57, "learning_rate": 2.6222725088311212e-05, "loss": 2.0836, "step": 9602000 }, { "epoch": 47.57, "learning_rate": 2.622148650188513e-05, "loss": 2.0547, "step": 9602500 }, { "epoch": 47.58, "learning_rate": 2.6220247915459046e-05, "loss": 2.0802, "step": 9603000 }, { "epoch": 47.58, "learning_rate": 2.6219009329032963e-05, "loss": 2.0837, "step": 9603500 }, { "epoch": 47.58, "learning_rate": 2.621777074260688e-05, "loss": 2.0837, "step": 9604000 }, { "epoch": 47.58, "learning_rate": 2.6216532156180796e-05, "loss": 2.0861, "step": 9604500 }, { "epoch": 47.59, "learning_rate": 2.6215293569754713e-05, "loss": 2.1014, "step": 9605000 }, { "epoch": 47.59, "learning_rate": 2.621405498332863e-05, "loss": 2.0974, "step": 9605500 }, { "epoch": 47.59, "learning_rate": 2.6212816396902544e-05, "loss": 2.0917, "step": 9606000 }, { "epoch": 47.59, "learning_rate": 2.621157781047646e-05, "loss": 2.1002, "step": 9606500 }, { "epoch": 47.6, "learning_rate": 2.621034170122323e-05, "loss": 2.0993, "step": 9607000 }, { "epoch": 47.6, "learning_rate": 2.6209103114797147e-05, "loss": 2.0975, "step": 9607500 }, { "epoch": 47.6, "learning_rate": 2.6207864528371063e-05, "loss": 2.0793, "step": 9608000 }, { "epoch": 47.6, "learning_rate": 2.620662594194498e-05, "loss": 2.1147, "step": 9608500 }, { "epoch": 47.61, "learning_rate": 2.6205387355518894e-05, "loss": 2.0978, "step": 9609000 }, { "epoch": 47.61, "learning_rate": 2.620414876909281e-05, "loss": 2.0735, "step": 9609500 }, { "epoch": 47.61, "learning_rate": 2.6202910182666728e-05, "loss": 2.0936, "step": 9610000 }, { "epoch": 47.61, "learning_rate": 2.6201671596240645e-05, "loss": 2.0894, "step": 9610500 }, { "epoch": 47.62, "learning_rate": 2.6200435486987414e-05, "loss": 2.0963, "step": 9611000 }, { "epoch": 47.62, "learning_rate": 2.619919690056133e-05, "loss": 2.0607, "step": 9611500 }, { "epoch": 47.62, "learning_rate": 2.6197958314135247e-05, "loss": 2.0949, "step": 9612000 }, { "epoch": 47.62, "learning_rate": 2.619671972770916e-05, "loss": 2.1032, "step": 9612500 }, { "epoch": 47.63, "learning_rate": 2.619548361845593e-05, "loss": 2.0672, "step": 9613000 }, { "epoch": 47.63, "learning_rate": 2.6194245032029847e-05, "loss": 2.1102, "step": 9613500 }, { "epoch": 47.63, "learning_rate": 2.6193008922776612e-05, "loss": 2.0954, "step": 9614000 }, { "epoch": 47.63, "learning_rate": 2.619177033635053e-05, "loss": 2.082, "step": 9614500 }, { "epoch": 47.64, "learning_rate": 2.6190531749924446e-05, "loss": 2.0849, "step": 9615000 }, { "epoch": 47.64, "learning_rate": 2.6189293163498363e-05, "loss": 2.085, "step": 9615500 }, { "epoch": 47.64, "learning_rate": 2.618805457707228e-05, "loss": 2.0955, "step": 9616000 }, { "epoch": 47.64, "learning_rate": 2.6186818467819045e-05, "loss": 2.0782, "step": 9616500 }, { "epoch": 47.65, "learning_rate": 2.6185579881392962e-05, "loss": 2.0831, "step": 9617000 }, { "epoch": 47.65, "learning_rate": 2.618434129496688e-05, "loss": 2.0869, "step": 9617500 }, { "epoch": 47.65, "learning_rate": 2.618310518571365e-05, "loss": 2.078, "step": 9618000 }, { "epoch": 47.65, "learning_rate": 2.6181866599287568e-05, "loss": 2.1126, "step": 9618500 }, { "epoch": 47.66, "learning_rate": 2.6180632967207185e-05, "loss": 2.1035, "step": 9619000 }, { "epoch": 47.66, "learning_rate": 2.6179394380781102e-05, "loss": 2.0826, "step": 9619500 }, { "epoch": 47.66, "learning_rate": 2.617815579435502e-05, "loss": 2.0837, "step": 9620000 }, { "epoch": 47.66, "learning_rate": 2.6176917207928936e-05, "loss": 2.0925, "step": 9620500 }, { "epoch": 47.67, "learning_rate": 2.6175678621502853e-05, "loss": 2.0942, "step": 9621000 }, { "epoch": 47.67, "learning_rate": 2.617444003507677e-05, "loss": 2.0742, "step": 9621500 }, { "epoch": 47.67, "learning_rate": 2.6173201448650687e-05, "loss": 2.0807, "step": 9622000 }, { "epoch": 47.67, "learning_rate": 2.6171962862224604e-05, "loss": 2.0802, "step": 9622500 }, { "epoch": 47.68, "learning_rate": 2.617072427579852e-05, "loss": 2.1002, "step": 9623000 }, { "epoch": 47.68, "learning_rate": 2.6169485689372434e-05, "loss": 2.0935, "step": 9623500 }, { "epoch": 47.68, "learning_rate": 2.616824710294635e-05, "loss": 2.0657, "step": 9624000 }, { "epoch": 47.68, "learning_rate": 2.6167008516520268e-05, "loss": 2.0847, "step": 9624500 }, { "epoch": 47.69, "learning_rate": 2.6165769930094185e-05, "loss": 2.1135, "step": 9625000 }, { "epoch": 47.69, "learning_rate": 2.61645313436681e-05, "loss": 2.1055, "step": 9625500 }, { "epoch": 47.69, "learning_rate": 2.616329523441487e-05, "loss": 2.098, "step": 9626000 }, { "epoch": 47.69, "learning_rate": 2.6162059125161636e-05, "loss": 2.0842, "step": 9626500 }, { "epoch": 47.7, "learning_rate": 2.6160820538735553e-05, "loss": 2.095, "step": 9627000 }, { "epoch": 47.7, "learning_rate": 2.615958195230947e-05, "loss": 2.0644, "step": 9627500 }, { "epoch": 47.7, "learning_rate": 2.6158343365883387e-05, "loss": 2.0786, "step": 9628000 }, { "epoch": 47.7, "learning_rate": 2.6157104779457304e-05, "loss": 2.0707, "step": 9628500 }, { "epoch": 47.71, "learning_rate": 2.615586619303122e-05, "loss": 2.0689, "step": 9629000 }, { "epoch": 47.71, "learning_rate": 2.6154627606605138e-05, "loss": 2.0849, "step": 9629500 }, { "epoch": 47.71, "learning_rate": 2.6153389020179055e-05, "loss": 2.079, "step": 9630000 }, { "epoch": 47.71, "learning_rate": 2.615215043375297e-05, "loss": 2.0656, "step": 9630500 }, { "epoch": 47.72, "learning_rate": 2.6150911847326885e-05, "loss": 2.0848, "step": 9631000 }, { "epoch": 47.72, "learning_rate": 2.61496732609008e-05, "loss": 2.0686, "step": 9631500 }, { "epoch": 47.72, "learning_rate": 2.6148434674474716e-05, "loss": 2.0859, "step": 9632000 }, { "epoch": 47.72, "learning_rate": 2.6147198565221488e-05, "loss": 2.0684, "step": 9632500 }, { "epoch": 47.73, "learning_rate": 2.6145959978795405e-05, "loss": 2.0732, "step": 9633000 }, { "epoch": 47.73, "learning_rate": 2.614472139236932e-05, "loss": 2.1247, "step": 9633500 }, { "epoch": 47.73, "learning_rate": 2.6143485283116087e-05, "loss": 2.1037, "step": 9634000 }, { "epoch": 47.73, "learning_rate": 2.6142246696690004e-05, "loss": 2.0959, "step": 9634500 }, { "epoch": 47.74, "learning_rate": 2.614100811026392e-05, "loss": 2.0821, "step": 9635000 }, { "epoch": 47.74, "learning_rate": 2.6139772001010687e-05, "loss": 2.1018, "step": 9635500 }, { "epoch": 47.74, "learning_rate": 2.6138533414584604e-05, "loss": 2.0829, "step": 9636000 }, { "epoch": 47.74, "learning_rate": 2.613729482815852e-05, "loss": 2.0982, "step": 9636500 }, { "epoch": 47.75, "learning_rate": 2.6136056241732437e-05, "loss": 2.0786, "step": 9637000 }, { "epoch": 47.75, "learning_rate": 2.6134817655306354e-05, "loss": 2.0792, "step": 9637500 }, { "epoch": 47.75, "learning_rate": 2.613357906888027e-05, "loss": 2.0943, "step": 9638000 }, { "epoch": 47.75, "learning_rate": 2.6132340482454188e-05, "loss": 2.1174, "step": 9638500 }, { "epoch": 47.75, "learning_rate": 2.6131101896028105e-05, "loss": 2.1001, "step": 9639000 }, { "epoch": 47.76, "learning_rate": 2.6129863309602022e-05, "loss": 2.1106, "step": 9639500 }, { "epoch": 47.76, "learning_rate": 2.612862472317594e-05, "loss": 2.0828, "step": 9640000 }, { "epoch": 47.76, "learning_rate": 2.6127386136749852e-05, "loss": 2.0857, "step": 9640500 }, { "epoch": 47.76, "learning_rate": 2.6126147550323766e-05, "loss": 2.0936, "step": 9641000 }, { "epoch": 47.77, "learning_rate": 2.6124908963897683e-05, "loss": 2.0658, "step": 9641500 }, { "epoch": 47.77, "learning_rate": 2.61236703774716e-05, "loss": 2.088, "step": 9642000 }, { "epoch": 47.77, "learning_rate": 2.6122434268218372e-05, "loss": 2.1007, "step": 9642500 }, { "epoch": 47.77, "learning_rate": 2.6121198158965137e-05, "loss": 2.0672, "step": 9643000 }, { "epoch": 47.78, "learning_rate": 2.6119959572539054e-05, "loss": 2.1081, "step": 9643500 }, { "epoch": 47.78, "learning_rate": 2.611872346328582e-05, "loss": 2.0884, "step": 9644000 }, { "epoch": 47.78, "learning_rate": 2.6117484876859737e-05, "loss": 2.0723, "step": 9644500 }, { "epoch": 47.78, "learning_rate": 2.6116246290433654e-05, "loss": 2.0919, "step": 9645000 }, { "epoch": 47.79, "learning_rate": 2.611500770400757e-05, "loss": 2.0974, "step": 9645500 }, { "epoch": 47.79, "learning_rate": 2.611377159475434e-05, "loss": 2.1091, "step": 9646000 }, { "epoch": 47.79, "learning_rate": 2.6112533008328256e-05, "loss": 2.0837, "step": 9646500 }, { "epoch": 47.79, "learning_rate": 2.611129442190217e-05, "loss": 2.0656, "step": 9647000 }, { "epoch": 47.8, "learning_rate": 2.6110055835476087e-05, "loss": 2.0716, "step": 9647500 }, { "epoch": 47.8, "learning_rate": 2.6108819726222856e-05, "loss": 2.0783, "step": 9648000 }, { "epoch": 47.8, "learning_rate": 2.6107581139796773e-05, "loss": 2.0892, "step": 9648500 }, { "epoch": 47.8, "learning_rate": 2.610634255337069e-05, "loss": 2.0974, "step": 9649000 }, { "epoch": 47.81, "learning_rate": 2.6105103966944606e-05, "loss": 2.1299, "step": 9649500 }, { "epoch": 47.81, "learning_rate": 2.6103865380518523e-05, "loss": 2.0813, "step": 9650000 }, { "epoch": 47.81, "learning_rate": 2.6102626794092437e-05, "loss": 2.0973, "step": 9650500 }, { "epoch": 47.81, "learning_rate": 2.6101388207666354e-05, "loss": 2.073, "step": 9651000 }, { "epoch": 47.82, "learning_rate": 2.610014962124027e-05, "loss": 2.092, "step": 9651500 }, { "epoch": 47.82, "learning_rate": 2.6098911034814188e-05, "loss": 2.0816, "step": 9652000 }, { "epoch": 47.82, "learning_rate": 2.6097672448388105e-05, "loss": 2.0748, "step": 9652500 }, { "epoch": 47.82, "learning_rate": 2.609643386196202e-05, "loss": 2.0843, "step": 9653000 }, { "epoch": 47.83, "learning_rate": 2.609519527553594e-05, "loss": 2.0861, "step": 9653500 }, { "epoch": 47.83, "learning_rate": 2.6093956689109855e-05, "loss": 2.0926, "step": 9654000 }, { "epoch": 47.83, "learning_rate": 2.6092718102683772e-05, "loss": 2.1106, "step": 9654500 }, { "epoch": 47.83, "learning_rate": 2.609147951625769e-05, "loss": 2.087, "step": 9655000 }, { "epoch": 47.84, "learning_rate": 2.6090240929831606e-05, "loss": 2.0889, "step": 9655500 }, { "epoch": 47.84, "learning_rate": 2.6089002343405523e-05, "loss": 2.0938, "step": 9656000 }, { "epoch": 47.84, "learning_rate": 2.6087763756979433e-05, "loss": 2.0772, "step": 9656500 }, { "epoch": 47.84, "learning_rate": 2.6086530124899057e-05, "loss": 2.1069, "step": 9657000 }, { "epoch": 47.85, "learning_rate": 2.608529153847297e-05, "loss": 2.0774, "step": 9657500 }, { "epoch": 47.85, "learning_rate": 2.6084052952046888e-05, "loss": 2.0743, "step": 9658000 }, { "epoch": 47.85, "learning_rate": 2.6082814365620805e-05, "loss": 2.083, "step": 9658500 }, { "epoch": 47.85, "learning_rate": 2.608157577919472e-05, "loss": 2.0712, "step": 9659000 }, { "epoch": 47.86, "learning_rate": 2.608033719276864e-05, "loss": 2.0926, "step": 9659500 }, { "epoch": 47.86, "learning_rate": 2.6079098606342556e-05, "loss": 2.0854, "step": 9660000 }, { "epoch": 47.86, "learning_rate": 2.6077860019916472e-05, "loss": 2.0774, "step": 9660500 }, { "epoch": 47.86, "learning_rate": 2.607662143349039e-05, "loss": 2.0936, "step": 9661000 }, { "epoch": 47.87, "learning_rate": 2.6075382847064306e-05, "loss": 2.0924, "step": 9661500 }, { "epoch": 47.87, "learning_rate": 2.6074144260638223e-05, "loss": 2.0708, "step": 9662000 }, { "epoch": 47.87, "learning_rate": 2.607290567421214e-05, "loss": 2.1057, "step": 9662500 }, { "epoch": 47.87, "learning_rate": 2.607166708778605e-05, "loss": 2.0919, "step": 9663000 }, { "epoch": 47.88, "learning_rate": 2.6070433455705674e-05, "loss": 2.0786, "step": 9663500 }, { "epoch": 47.88, "learning_rate": 2.6069194869279588e-05, "loss": 2.1013, "step": 9664000 }, { "epoch": 47.88, "learning_rate": 2.6067958760026357e-05, "loss": 2.0838, "step": 9664500 }, { "epoch": 47.88, "learning_rate": 2.606672265077313e-05, "loss": 2.1014, "step": 9665000 }, { "epoch": 47.89, "learning_rate": 2.6065484064347046e-05, "loss": 2.0802, "step": 9665500 }, { "epoch": 47.89, "learning_rate": 2.6064245477920963e-05, "loss": 2.0918, "step": 9666000 }, { "epoch": 47.89, "learning_rate": 2.6063006891494873e-05, "loss": 2.0867, "step": 9666500 }, { "epoch": 47.89, "learning_rate": 2.606176830506879e-05, "loss": 2.1075, "step": 9667000 }, { "epoch": 47.9, "learning_rate": 2.6060529718642707e-05, "loss": 2.1001, "step": 9667500 }, { "epoch": 47.9, "learning_rate": 2.6059291132216624e-05, "loss": 2.0785, "step": 9668000 }, { "epoch": 47.9, "learning_rate": 2.605805254579054e-05, "loss": 2.0727, "step": 9668500 }, { "epoch": 47.9, "learning_rate": 2.6056813959364458e-05, "loss": 2.1015, "step": 9669000 }, { "epoch": 47.91, "learning_rate": 2.6055577850111223e-05, "loss": 2.0894, "step": 9669500 }, { "epoch": 47.91, "learning_rate": 2.6054341740857995e-05, "loss": 2.1113, "step": 9670000 }, { "epoch": 47.91, "learning_rate": 2.6053103154431912e-05, "loss": 2.0956, "step": 9670500 }, { "epoch": 47.91, "learning_rate": 2.605186952235153e-05, "loss": 2.0777, "step": 9671000 }, { "epoch": 47.92, "learning_rate": 2.6050630935925446e-05, "loss": 2.0985, "step": 9671500 }, { "epoch": 47.92, "learning_rate": 2.6049392349499363e-05, "loss": 2.0767, "step": 9672000 }, { "epoch": 47.92, "learning_rate": 2.604815376307328e-05, "loss": 2.0916, "step": 9672500 }, { "epoch": 47.92, "learning_rate": 2.6046915176647197e-05, "loss": 2.0891, "step": 9673000 }, { "epoch": 47.93, "learning_rate": 2.6045676590221114e-05, "loss": 2.1025, "step": 9673500 }, { "epoch": 47.93, "learning_rate": 2.604443800379503e-05, "loss": 2.1006, "step": 9674000 }, { "epoch": 47.93, "learning_rate": 2.6043199417368948e-05, "loss": 2.1039, "step": 9674500 }, { "epoch": 47.93, "learning_rate": 2.604196083094286e-05, "loss": 2.0982, "step": 9675000 }, { "epoch": 47.94, "learning_rate": 2.604072224451678e-05, "loss": 2.0644, "step": 9675500 }, { "epoch": 47.94, "learning_rate": 2.6039483658090695e-05, "loss": 2.0967, "step": 9676000 }, { "epoch": 47.94, "learning_rate": 2.6038245071664612e-05, "loss": 2.1009, "step": 9676500 }, { "epoch": 47.94, "learning_rate": 2.603700648523853e-05, "loss": 2.1204, "step": 9677000 }, { "epoch": 47.95, "learning_rate": 2.6035767898812446e-05, "loss": 2.0929, "step": 9677500 }, { "epoch": 47.95, "learning_rate": 2.6034529312386363e-05, "loss": 2.0795, "step": 9678000 }, { "epoch": 47.95, "learning_rate": 2.603329072596028e-05, "loss": 2.0888, "step": 9678500 }, { "epoch": 47.95, "learning_rate": 2.603205213953419e-05, "loss": 2.1019, "step": 9679000 }, { "epoch": 47.96, "learning_rate": 2.6030813553108107e-05, "loss": 2.1107, "step": 9679500 }, { "epoch": 47.96, "learning_rate": 2.6029574966682024e-05, "loss": 2.0822, "step": 9680000 }, { "epoch": 47.96, "learning_rate": 2.602833638025594e-05, "loss": 2.1023, "step": 9680500 }, { "epoch": 47.96, "learning_rate": 2.6027100271002713e-05, "loss": 2.0887, "step": 9681000 }, { "epoch": 47.97, "learning_rate": 2.602586168457663e-05, "loss": 2.0788, "step": 9681500 }, { "epoch": 47.97, "learning_rate": 2.602462309815054e-05, "loss": 2.0967, "step": 9682000 }, { "epoch": 47.97, "learning_rate": 2.6023384511724457e-05, "loss": 2.0937, "step": 9682500 }, { "epoch": 47.97, "learning_rate": 2.6022145925298374e-05, "loss": 2.0981, "step": 9683000 }, { "epoch": 47.98, "learning_rate": 2.602090733887229e-05, "loss": 2.0854, "step": 9683500 }, { "epoch": 47.98, "learning_rate": 2.6019668752446208e-05, "loss": 2.0899, "step": 9684000 }, { "epoch": 47.98, "learning_rate": 2.6018430166020125e-05, "loss": 2.0874, "step": 9684500 }, { "epoch": 47.98, "learning_rate": 2.6017194056766897e-05, "loss": 2.109, "step": 9685000 }, { "epoch": 47.99, "learning_rate": 2.6015955470340807e-05, "loss": 2.0903, "step": 9685500 }, { "epoch": 47.99, "learning_rate": 2.6014716883914724e-05, "loss": 2.0694, "step": 9686000 }, { "epoch": 47.99, "learning_rate": 2.601347829748864e-05, "loss": 2.1083, "step": 9686500 }, { "epoch": 47.99, "learning_rate": 2.6012239711062558e-05, "loss": 2.0739, "step": 9687000 }, { "epoch": 48.0, "learning_rate": 2.601100360180933e-05, "loss": 2.0915, "step": 9687500 }, { "epoch": 48.0, "learning_rate": 2.6009765015383247e-05, "loss": 2.0884, "step": 9688000 }, { "epoch": 48.0, "eval_accuracy": 0.6682076935636344, "eval_accuracy_mlm": 0.6263341556313278, "eval_accuracy_nsp": 0.8657391972827003, "eval_loss": 2.2913217544555664, "eval_runtime": 147.1607, "eval_samples_per_second": 1732.521, "eval_steps_per_second": 72.193, "step": 9688464 }, { "epoch": 48.0, "learning_rate": 2.6008526428957157e-05, "loss": 2.079, "step": 9688500 }, { "epoch": 48.0, "learning_rate": 2.600729031970393e-05, "loss": 2.0726, "step": 9689000 }, { "epoch": 48.01, "learning_rate": 2.6006051733277846e-05, "loss": 2.0483, "step": 9689500 }, { "epoch": 48.01, "learning_rate": 2.6004813146851763e-05, "loss": 2.0756, "step": 9690000 }, { "epoch": 48.01, "learning_rate": 2.600357456042568e-05, "loss": 2.0568, "step": 9690500 }, { "epoch": 48.01, "learning_rate": 2.6002335973999597e-05, "loss": 2.076, "step": 9691000 }, { "epoch": 48.02, "learning_rate": 2.6001097387573507e-05, "loss": 2.0625, "step": 9691500 }, { "epoch": 48.02, "learning_rate": 2.5999858801147424e-05, "loss": 2.0608, "step": 9692000 }, { "epoch": 48.02, "learning_rate": 2.599862021472134e-05, "loss": 2.0717, "step": 9692500 }, { "epoch": 48.02, "learning_rate": 2.5997381628295258e-05, "loss": 2.0715, "step": 9693000 }, { "epoch": 48.02, "learning_rate": 2.5996143041869175e-05, "loss": 2.0704, "step": 9693500 }, { "epoch": 48.03, "learning_rate": 2.5994904455443092e-05, "loss": 2.0508, "step": 9694000 }, { "epoch": 48.03, "learning_rate": 2.5993668346189857e-05, "loss": 2.0863, "step": 9694500 }, { "epoch": 48.03, "learning_rate": 2.5992429759763774e-05, "loss": 2.0786, "step": 9695000 }, { "epoch": 48.03, "learning_rate": 2.5991193650510546e-05, "loss": 2.0566, "step": 9695500 }, { "epoch": 48.04, "learning_rate": 2.5989955064084463e-05, "loss": 2.108, "step": 9696000 }, { "epoch": 48.04, "learning_rate": 2.598871647765838e-05, "loss": 2.0685, "step": 9696500 }, { "epoch": 48.04, "learning_rate": 2.5987477891232297e-05, "loss": 2.0494, "step": 9697000 }, { "epoch": 48.04, "learning_rate": 2.5986239304806214e-05, "loss": 2.0624, "step": 9697500 }, { "epoch": 48.05, "learning_rate": 2.5985003195552983e-05, "loss": 2.0749, "step": 9698000 }, { "epoch": 48.05, "learning_rate": 2.598376708629975e-05, "loss": 2.0353, "step": 9698500 }, { "epoch": 48.05, "learning_rate": 2.5982528499873665e-05, "loss": 2.0688, "step": 9699000 }, { "epoch": 48.05, "learning_rate": 2.5981289913447582e-05, "loss": 2.0472, "step": 9699500 }, { "epoch": 48.06, "learning_rate": 2.59800513270215e-05, "loss": 2.0664, "step": 9700000 }, { "epoch": 48.06, "learning_rate": 2.5978812740595416e-05, "loss": 2.0577, "step": 9700500 }, { "epoch": 48.06, "learning_rate": 2.5977574154169333e-05, "loss": 2.0729, "step": 9701000 }, { "epoch": 48.06, "learning_rate": 2.597633556774325e-05, "loss": 2.0776, "step": 9701500 }, { "epoch": 48.07, "learning_rate": 2.5975096981317164e-05, "loss": 2.063, "step": 9702000 }, { "epoch": 48.07, "learning_rate": 2.597385839489108e-05, "loss": 2.0854, "step": 9702500 }, { "epoch": 48.07, "learning_rate": 2.5972619808464997e-05, "loss": 2.0922, "step": 9703000 }, { "epoch": 48.07, "learning_rate": 2.5971383699211766e-05, "loss": 2.0871, "step": 9703500 }, { "epoch": 48.08, "learning_rate": 2.5970145112785683e-05, "loss": 2.0844, "step": 9704000 }, { "epoch": 48.08, "learning_rate": 2.59689065263596e-05, "loss": 2.0717, "step": 9704500 }, { "epoch": 48.08, "learning_rate": 2.5967667939933517e-05, "loss": 2.0612, "step": 9705000 }, { "epoch": 48.08, "learning_rate": 2.5966431830680282e-05, "loss": 2.0453, "step": 9705500 }, { "epoch": 48.09, "learning_rate": 2.59651932442542e-05, "loss": 2.0646, "step": 9706000 }, { "epoch": 48.09, "learning_rate": 2.5963954657828116e-05, "loss": 2.0671, "step": 9706500 }, { "epoch": 48.09, "learning_rate": 2.5962716071402033e-05, "loss": 2.1132, "step": 9707000 }, { "epoch": 48.09, "learning_rate": 2.596147748497595e-05, "loss": 2.0626, "step": 9707500 }, { "epoch": 48.1, "learning_rate": 2.5960238898549867e-05, "loss": 2.0508, "step": 9708000 }, { "epoch": 48.1, "learning_rate": 2.5959000312123784e-05, "loss": 2.0575, "step": 9708500 }, { "epoch": 48.1, "learning_rate": 2.5957761725697698e-05, "loss": 2.0687, "step": 9709000 }, { "epoch": 48.1, "learning_rate": 2.5956523139271614e-05, "loss": 2.0814, "step": 9709500 }, { "epoch": 48.11, "learning_rate": 2.5955287030018383e-05, "loss": 2.0803, "step": 9710000 }, { "epoch": 48.11, "learning_rate": 2.59540484435923e-05, "loss": 2.0769, "step": 9710500 }, { "epoch": 48.11, "learning_rate": 2.5952812334339066e-05, "loss": 2.0761, "step": 9711000 }, { "epoch": 48.11, "learning_rate": 2.5951573747912983e-05, "loss": 2.0469, "step": 9711500 }, { "epoch": 48.12, "learning_rate": 2.59503351614869e-05, "loss": 2.0807, "step": 9712000 }, { "epoch": 48.12, "learning_rate": 2.5949096575060816e-05, "loss": 2.0468, "step": 9712500 }, { "epoch": 48.12, "learning_rate": 2.5947857988634733e-05, "loss": 2.0451, "step": 9713000 }, { "epoch": 48.12, "learning_rate": 2.594661940220865e-05, "loss": 2.0614, "step": 9713500 }, { "epoch": 48.13, "learning_rate": 2.5945380815782567e-05, "loss": 2.0499, "step": 9714000 }, { "epoch": 48.13, "learning_rate": 2.5944142229356484e-05, "loss": 2.0682, "step": 9714500 }, { "epoch": 48.13, "learning_rate": 2.59429036429304e-05, "loss": 2.0467, "step": 9715000 }, { "epoch": 48.13, "learning_rate": 2.5941665056504315e-05, "loss": 2.0862, "step": 9715500 }, { "epoch": 48.14, "learning_rate": 2.5940428947251083e-05, "loss": 2.0578, "step": 9716000 }, { "epoch": 48.14, "learning_rate": 2.5939190360825e-05, "loss": 2.0569, "step": 9716500 }, { "epoch": 48.14, "learning_rate": 2.5937951774398917e-05, "loss": 2.0743, "step": 9717000 }, { "epoch": 48.14, "learning_rate": 2.5936713187972834e-05, "loss": 2.0602, "step": 9717500 }, { "epoch": 48.15, "learning_rate": 2.593547460154675e-05, "loss": 2.062, "step": 9718000 }, { "epoch": 48.15, "learning_rate": 2.5934236015120668e-05, "loss": 2.0689, "step": 9718500 }, { "epoch": 48.15, "learning_rate": 2.593299742869458e-05, "loss": 2.0593, "step": 9719000 }, { "epoch": 48.15, "learning_rate": 2.5931758842268495e-05, "loss": 2.0911, "step": 9719500 }, { "epoch": 48.16, "learning_rate": 2.5930520255842412e-05, "loss": 2.0665, "step": 9720000 }, { "epoch": 48.16, "learning_rate": 2.5929284146589184e-05, "loss": 2.0961, "step": 9720500 }, { "epoch": 48.16, "learning_rate": 2.59280455601631e-05, "loss": 2.0641, "step": 9721000 }, { "epoch": 48.16, "learning_rate": 2.5926806973737018e-05, "loss": 2.0816, "step": 9721500 }, { "epoch": 48.17, "learning_rate": 2.5925568387310935e-05, "loss": 2.0643, "step": 9722000 }, { "epoch": 48.17, "learning_rate": 2.592432980088485e-05, "loss": 2.0623, "step": 9722500 }, { "epoch": 48.17, "learning_rate": 2.5923091214458762e-05, "loss": 2.0549, "step": 9723000 }, { "epoch": 48.17, "learning_rate": 2.592185262803268e-05, "loss": 2.0921, "step": 9723500 }, { "epoch": 48.18, "learning_rate": 2.592061651877945e-05, "loss": 2.0591, "step": 9724000 }, { "epoch": 48.18, "learning_rate": 2.5919377932353368e-05, "loss": 2.074, "step": 9724500 }, { "epoch": 48.18, "learning_rate": 2.5918139345927285e-05, "loss": 2.0504, "step": 9725000 }, { "epoch": 48.18, "learning_rate": 2.5916900759501202e-05, "loss": 2.0681, "step": 9725500 }, { "epoch": 48.19, "learning_rate": 2.5915662173075112e-05, "loss": 2.0731, "step": 9726000 }, { "epoch": 48.19, "learning_rate": 2.591442358664903e-05, "loss": 2.0701, "step": 9726500 }, { "epoch": 48.19, "learning_rate": 2.5913185000222946e-05, "loss": 2.0678, "step": 9727000 }, { "epoch": 48.19, "learning_rate": 2.5911951368142567e-05, "loss": 2.0891, "step": 9727500 }, { "epoch": 48.2, "learning_rate": 2.5910712781716484e-05, "loss": 2.0894, "step": 9728000 }, { "epoch": 48.2, "learning_rate": 2.59094741952904e-05, "loss": 2.0625, "step": 9728500 }, { "epoch": 48.2, "learning_rate": 2.5908235608864317e-05, "loss": 2.0668, "step": 9729000 }, { "epoch": 48.2, "learning_rate": 2.5906997022438234e-05, "loss": 2.0652, "step": 9729500 }, { "epoch": 48.21, "learning_rate": 2.590575843601215e-05, "loss": 2.0679, "step": 9730000 }, { "epoch": 48.21, "learning_rate": 2.5904519849586068e-05, "loss": 2.0607, "step": 9730500 }, { "epoch": 48.21, "learning_rate": 2.5903281263159985e-05, "loss": 2.0868, "step": 9731000 }, { "epoch": 48.21, "learning_rate": 2.5902042676733902e-05, "loss": 2.0603, "step": 9731500 }, { "epoch": 48.22, "learning_rate": 2.5900804090307812e-05, "loss": 2.054, "step": 9732000 }, { "epoch": 48.22, "learning_rate": 2.5899567981054584e-05, "loss": 2.061, "step": 9732500 }, { "epoch": 48.22, "learning_rate": 2.58983293946285e-05, "loss": 2.0857, "step": 9733000 }, { "epoch": 48.22, "learning_rate": 2.589709080820242e-05, "loss": 2.0811, "step": 9733500 }, { "epoch": 48.23, "learning_rate": 2.5895854698949184e-05, "loss": 2.0748, "step": 9734000 }, { "epoch": 48.23, "learning_rate": 2.58946161125231e-05, "loss": 2.0677, "step": 9734500 }, { "epoch": 48.23, "learning_rate": 2.5893377526097018e-05, "loss": 2.075, "step": 9735000 }, { "epoch": 48.23, "learning_rate": 2.5892138939670935e-05, "loss": 2.0586, "step": 9735500 }, { "epoch": 48.24, "learning_rate": 2.589090035324485e-05, "loss": 2.0557, "step": 9736000 }, { "epoch": 48.24, "learning_rate": 2.588966176681877e-05, "loss": 2.0609, "step": 9736500 }, { "epoch": 48.24, "learning_rate": 2.5888423180392685e-05, "loss": 2.0632, "step": 9737000 }, { "epoch": 48.24, "learning_rate": 2.5887184593966602e-05, "loss": 2.1116, "step": 9737500 }, { "epoch": 48.25, "learning_rate": 2.5885948484713368e-05, "loss": 2.0596, "step": 9738000 }, { "epoch": 48.25, "learning_rate": 2.5884709898287285e-05, "loss": 2.0707, "step": 9738500 }, { "epoch": 48.25, "learning_rate": 2.58834713118612e-05, "loss": 2.0671, "step": 9739000 }, { "epoch": 48.25, "learning_rate": 2.588223272543512e-05, "loss": 2.0982, "step": 9739500 }, { "epoch": 48.26, "learning_rate": 2.5880994139009035e-05, "loss": 2.0637, "step": 9740000 }, { "epoch": 48.26, "learning_rate": 2.5879755552582952e-05, "loss": 2.0786, "step": 9740500 }, { "epoch": 48.26, "learning_rate": 2.587851696615687e-05, "loss": 2.0927, "step": 9741000 }, { "epoch": 48.26, "learning_rate": 2.5877280856903635e-05, "loss": 2.0884, "step": 9741500 }, { "epoch": 48.27, "learning_rate": 2.587604227047755e-05, "loss": 2.0889, "step": 9742000 }, { "epoch": 48.27, "learning_rate": 2.587480368405147e-05, "loss": 2.0755, "step": 9742500 }, { "epoch": 48.27, "learning_rate": 2.5873565097625385e-05, "loss": 2.0769, "step": 9743000 }, { "epoch": 48.27, "learning_rate": 2.5872326511199302e-05, "loss": 2.0776, "step": 9743500 }, { "epoch": 48.28, "learning_rate": 2.587108792477322e-05, "loss": 2.0677, "step": 9744000 }, { "epoch": 48.28, "learning_rate": 2.5869849338347136e-05, "loss": 2.071, "step": 9744500 }, { "epoch": 48.28, "learning_rate": 2.5868610751921046e-05, "loss": 2.0744, "step": 9745000 }, { "epoch": 48.28, "learning_rate": 2.5867372165494963e-05, "loss": 2.0669, "step": 9745500 }, { "epoch": 48.29, "learning_rate": 2.586613357906888e-05, "loss": 2.062, "step": 9746000 }, { "epoch": 48.29, "learning_rate": 2.5864897469815652e-05, "loss": 2.0728, "step": 9746500 }, { "epoch": 48.29, "learning_rate": 2.5863661360562418e-05, "loss": 2.0797, "step": 9747000 }, { "epoch": 48.29, "learning_rate": 2.5862425251309187e-05, "loss": 2.0729, "step": 9747500 }, { "epoch": 48.29, "learning_rate": 2.5861186664883104e-05, "loss": 2.0772, "step": 9748000 }, { "epoch": 48.3, "learning_rate": 2.585994807845702e-05, "loss": 2.0869, "step": 9748500 }, { "epoch": 48.3, "learning_rate": 2.5858709492030937e-05, "loss": 2.0845, "step": 9749000 }, { "epoch": 48.3, "learning_rate": 2.585747090560485e-05, "loss": 2.073, "step": 9749500 }, { "epoch": 48.3, "learning_rate": 2.5856232319178768e-05, "loss": 2.0641, "step": 9750000 }, { "epoch": 48.31, "learning_rate": 2.5854993732752685e-05, "loss": 2.0851, "step": 9750500 }, { "epoch": 48.31, "learning_rate": 2.5853755146326602e-05, "loss": 2.0751, "step": 9751000 }, { "epoch": 48.31, "learning_rate": 2.585251903707337e-05, "loss": 2.0666, "step": 9751500 }, { "epoch": 48.31, "learning_rate": 2.5851280450647287e-05, "loss": 2.091, "step": 9752000 }, { "epoch": 48.32, "learning_rate": 2.5850041864221204e-05, "loss": 2.0829, "step": 9752500 }, { "epoch": 48.32, "learning_rate": 2.5848803277795118e-05, "loss": 2.0533, "step": 9753000 }, { "epoch": 48.32, "learning_rate": 2.5847564691369035e-05, "loss": 2.0803, "step": 9753500 }, { "epoch": 48.32, "learning_rate": 2.5846326104942952e-05, "loss": 2.0733, "step": 9754000 }, { "epoch": 48.33, "learning_rate": 2.584508999568972e-05, "loss": 2.0647, "step": 9754500 }, { "epoch": 48.33, "learning_rate": 2.5843851409263638e-05, "loss": 2.0768, "step": 9755000 }, { "epoch": 48.33, "learning_rate": 2.5842612822837554e-05, "loss": 2.0593, "step": 9755500 }, { "epoch": 48.33, "learning_rate": 2.5841374236411468e-05, "loss": 2.0653, "step": 9756000 }, { "epoch": 48.34, "learning_rate": 2.5840135649985385e-05, "loss": 2.0729, "step": 9756500 }, { "epoch": 48.34, "learning_rate": 2.5838897063559302e-05, "loss": 2.0721, "step": 9757000 }, { "epoch": 48.34, "learning_rate": 2.583765847713322e-05, "loss": 2.0603, "step": 9757500 }, { "epoch": 48.34, "learning_rate": 2.5836419890707136e-05, "loss": 2.0674, "step": 9758000 }, { "epoch": 48.35, "learning_rate": 2.5835181304281053e-05, "loss": 2.0737, "step": 9758500 }, { "epoch": 48.35, "learning_rate": 2.583394271785497e-05, "loss": 2.0599, "step": 9759000 }, { "epoch": 48.35, "learning_rate": 2.5832706608601735e-05, "loss": 2.0306, "step": 9759500 }, { "epoch": 48.35, "learning_rate": 2.5831470499348504e-05, "loss": 2.0718, "step": 9760000 }, { "epoch": 48.36, "learning_rate": 2.583023191292242e-05, "loss": 2.0608, "step": 9760500 }, { "epoch": 48.36, "learning_rate": 2.5828993326496338e-05, "loss": 2.0524, "step": 9761000 }, { "epoch": 48.36, "learning_rate": 2.5827754740070255e-05, "loss": 2.0856, "step": 9761500 }, { "epoch": 48.36, "learning_rate": 2.582651615364417e-05, "loss": 2.0602, "step": 9762000 }, { "epoch": 48.37, "learning_rate": 2.582527756721809e-05, "loss": 2.0569, "step": 9762500 }, { "epoch": 48.37, "learning_rate": 2.582404393513771e-05, "loss": 2.0502, "step": 9763000 }, { "epoch": 48.37, "learning_rate": 2.5822805348711626e-05, "loss": 2.0887, "step": 9763500 }, { "epoch": 48.37, "learning_rate": 2.5821566762285536e-05, "loss": 2.0735, "step": 9764000 }, { "epoch": 48.38, "learning_rate": 2.5820328175859453e-05, "loss": 2.0687, "step": 9764500 }, { "epoch": 48.38, "learning_rate": 2.581908958943337e-05, "loss": 2.055, "step": 9765000 }, { "epoch": 48.38, "learning_rate": 2.5817853480180142e-05, "loss": 2.0727, "step": 9765500 }, { "epoch": 48.38, "learning_rate": 2.581661489375406e-05, "loss": 2.0782, "step": 9766000 }, { "epoch": 48.39, "learning_rate": 2.5815376307327976e-05, "loss": 2.0736, "step": 9766500 }, { "epoch": 48.39, "learning_rate": 2.5814137720901893e-05, "loss": 2.0872, "step": 9767000 }, { "epoch": 48.39, "learning_rate": 2.5812899134475803e-05, "loss": 2.0343, "step": 9767500 }, { "epoch": 48.39, "learning_rate": 2.581166054804972e-05, "loss": 2.0607, "step": 9768000 }, { "epoch": 48.4, "learning_rate": 2.5810421961623637e-05, "loss": 2.0824, "step": 9768500 }, { "epoch": 48.4, "learning_rate": 2.580918585237041e-05, "loss": 2.0666, "step": 9769000 }, { "epoch": 48.4, "learning_rate": 2.5807947265944326e-05, "loss": 2.0827, "step": 9769500 }, { "epoch": 48.4, "learning_rate": 2.5806708679518243e-05, "loss": 2.0688, "step": 9770000 }, { "epoch": 48.41, "learning_rate": 2.5805470093092153e-05, "loss": 2.0586, "step": 9770500 }, { "epoch": 48.41, "learning_rate": 2.580423150666607e-05, "loss": 2.094, "step": 9771000 }, { "epoch": 48.41, "learning_rate": 2.5802992920239987e-05, "loss": 2.0918, "step": 9771500 }, { "epoch": 48.41, "learning_rate": 2.5801754333813904e-05, "loss": 2.0866, "step": 9772000 }, { "epoch": 48.42, "learning_rate": 2.580051574738782e-05, "loss": 2.0746, "step": 9772500 }, { "epoch": 48.42, "learning_rate": 2.5799277160961738e-05, "loss": 2.0768, "step": 9773000 }, { "epoch": 48.42, "learning_rate": 2.5798041051708503e-05, "loss": 2.0521, "step": 9773500 }, { "epoch": 48.42, "learning_rate": 2.579680246528242e-05, "loss": 2.0737, "step": 9774000 }, { "epoch": 48.43, "learning_rate": 2.5795563878856337e-05, "loss": 2.0919, "step": 9774500 }, { "epoch": 48.43, "learning_rate": 2.5794325292430254e-05, "loss": 2.0785, "step": 9775000 }, { "epoch": 48.43, "learning_rate": 2.579308670600417e-05, "loss": 2.0853, "step": 9775500 }, { "epoch": 48.43, "learning_rate": 2.5791848119578088e-05, "loss": 2.0713, "step": 9776000 }, { "epoch": 48.44, "learning_rate": 2.5790612010324853e-05, "loss": 2.0862, "step": 9776500 }, { "epoch": 48.44, "learning_rate": 2.578937342389877e-05, "loss": 2.057, "step": 9777000 }, { "epoch": 48.44, "learning_rate": 2.5788134837472687e-05, "loss": 2.1189, "step": 9777500 }, { "epoch": 48.44, "learning_rate": 2.5786896251046604e-05, "loss": 2.0738, "step": 9778000 }, { "epoch": 48.45, "learning_rate": 2.578565766462052e-05, "loss": 2.0712, "step": 9778500 }, { "epoch": 48.45, "learning_rate": 2.5784419078194438e-05, "loss": 2.0801, "step": 9779000 }, { "epoch": 48.45, "learning_rate": 2.5783180491768355e-05, "loss": 2.0682, "step": 9779500 }, { "epoch": 48.45, "learning_rate": 2.5781941905342272e-05, "loss": 2.1006, "step": 9780000 }, { "epoch": 48.46, "learning_rate": 2.578070331891619e-05, "loss": 2.0551, "step": 9780500 }, { "epoch": 48.46, "learning_rate": 2.5779464732490106e-05, "loss": 2.0699, "step": 9781000 }, { "epoch": 48.46, "learning_rate": 2.5778226146064023e-05, "loss": 2.0729, "step": 9781500 }, { "epoch": 48.46, "learning_rate": 2.577698755963794e-05, "loss": 2.0691, "step": 9782000 }, { "epoch": 48.47, "learning_rate": 2.5775748973211857e-05, "loss": 2.0737, "step": 9782500 }, { "epoch": 48.47, "learning_rate": 2.5774510386785773e-05, "loss": 2.0722, "step": 9783000 }, { "epoch": 48.47, "learning_rate": 2.577327427753254e-05, "loss": 2.0975, "step": 9783500 }, { "epoch": 48.47, "learning_rate": 2.5772035691106456e-05, "loss": 2.082, "step": 9784000 }, { "epoch": 48.48, "learning_rate": 2.577079958185322e-05, "loss": 2.0808, "step": 9784500 }, { "epoch": 48.48, "learning_rate": 2.5769560995427138e-05, "loss": 2.0684, "step": 9785000 }, { "epoch": 48.48, "learning_rate": 2.5768322409001055e-05, "loss": 2.0711, "step": 9785500 }, { "epoch": 48.48, "learning_rate": 2.5767083822574972e-05, "loss": 2.0805, "step": 9786000 }, { "epoch": 48.49, "learning_rate": 2.576584523614889e-05, "loss": 2.0834, "step": 9786500 }, { "epoch": 48.49, "learning_rate": 2.5764606649722806e-05, "loss": 2.0863, "step": 9787000 }, { "epoch": 48.49, "learning_rate": 2.5763368063296723e-05, "loss": 2.0866, "step": 9787500 }, { "epoch": 48.49, "learning_rate": 2.576212947687064e-05, "loss": 2.0597, "step": 9788000 }, { "epoch": 48.5, "learning_rate": 2.5760890890444557e-05, "loss": 2.0672, "step": 9788500 }, { "epoch": 48.5, "learning_rate": 2.5759654781191322e-05, "loss": 2.0599, "step": 9789000 }, { "epoch": 48.5, "learning_rate": 2.575841619476524e-05, "loss": 2.0653, "step": 9789500 }, { "epoch": 48.5, "learning_rate": 2.5757177608339156e-05, "loss": 2.0824, "step": 9790000 }, { "epoch": 48.51, "learning_rate": 2.575594149908592e-05, "loss": 2.0585, "step": 9790500 }, { "epoch": 48.51, "learning_rate": 2.5754702912659838e-05, "loss": 2.0629, "step": 9791000 }, { "epoch": 48.51, "learning_rate": 2.5753464326233755e-05, "loss": 2.0828, "step": 9791500 }, { "epoch": 48.51, "learning_rate": 2.5752225739807672e-05, "loss": 2.0785, "step": 9792000 }, { "epoch": 48.52, "learning_rate": 2.575098715338159e-05, "loss": 2.0805, "step": 9792500 }, { "epoch": 48.52, "learning_rate": 2.5749748566955506e-05, "loss": 2.0427, "step": 9793000 }, { "epoch": 48.52, "learning_rate": 2.5748509980529423e-05, "loss": 2.0649, "step": 9793500 }, { "epoch": 48.52, "learning_rate": 2.574727139410334e-05, "loss": 2.0645, "step": 9794000 }, { "epoch": 48.53, "learning_rate": 2.5746032807677257e-05, "loss": 2.0942, "step": 9794500 }, { "epoch": 48.53, "learning_rate": 2.5744799175596877e-05, "loss": 2.0986, "step": 9795000 }, { "epoch": 48.53, "learning_rate": 2.574356058917079e-05, "loss": 2.094, "step": 9795500 }, { "epoch": 48.53, "learning_rate": 2.5742322002744708e-05, "loss": 2.0723, "step": 9796000 }, { "epoch": 48.54, "learning_rate": 2.574108341631862e-05, "loss": 2.0933, "step": 9796500 }, { "epoch": 48.54, "learning_rate": 2.573984482989254e-05, "loss": 2.0473, "step": 9797000 }, { "epoch": 48.54, "learning_rate": 2.5738606243466455e-05, "loss": 2.0803, "step": 9797500 }, { "epoch": 48.54, "learning_rate": 2.5737367657040372e-05, "loss": 2.092, "step": 9798000 }, { "epoch": 48.55, "learning_rate": 2.573612907061429e-05, "loss": 2.0707, "step": 9798500 }, { "epoch": 48.55, "learning_rate": 2.5734890484188206e-05, "loss": 2.0831, "step": 9799000 }, { "epoch": 48.55, "learning_rate": 2.573365685210783e-05, "loss": 2.0785, "step": 9799500 }, { "epoch": 48.55, "learning_rate": 2.5732418265681747e-05, "loss": 2.0462, "step": 9800000 }, { "epoch": 48.56, "learning_rate": 2.5731179679255664e-05, "loss": 2.0567, "step": 9800500 }, { "epoch": 48.56, "learning_rate": 2.5729941092829578e-05, "loss": 2.0486, "step": 9801000 }, { "epoch": 48.56, "learning_rate": 2.572870250640349e-05, "loss": 2.1049, "step": 9801500 }, { "epoch": 48.56, "learning_rate": 2.5727466397150263e-05, "loss": 2.0781, "step": 9802000 }, { "epoch": 48.56, "learning_rate": 2.572622781072418e-05, "loss": 2.0774, "step": 9802500 }, { "epoch": 48.57, "learning_rate": 2.5724989224298097e-05, "loss": 2.083, "step": 9803000 }, { "epoch": 48.57, "learning_rate": 2.5723750637872014e-05, "loss": 2.0656, "step": 9803500 }, { "epoch": 48.57, "learning_rate": 2.572251205144593e-05, "loss": 2.1175, "step": 9804000 }, { "epoch": 48.57, "learning_rate": 2.5721273465019845e-05, "loss": 2.0821, "step": 9804500 }, { "epoch": 48.58, "learning_rate": 2.5720034878593758e-05, "loss": 2.0814, "step": 9805000 }, { "epoch": 48.58, "learning_rate": 2.571879876934053e-05, "loss": 2.0987, "step": 9805500 }, { "epoch": 48.58, "learning_rate": 2.5717565137260148e-05, "loss": 2.0754, "step": 9806000 }, { "epoch": 48.58, "learning_rate": 2.5716326550834065e-05, "loss": 2.0633, "step": 9806500 }, { "epoch": 48.59, "learning_rate": 2.571508796440798e-05, "loss": 2.0473, "step": 9807000 }, { "epoch": 48.59, "learning_rate": 2.57138493779819e-05, "loss": 2.0819, "step": 9807500 }, { "epoch": 48.59, "learning_rate": 2.5712610791555812e-05, "loss": 2.0785, "step": 9808000 }, { "epoch": 48.59, "learning_rate": 2.571137220512973e-05, "loss": 2.0895, "step": 9808500 }, { "epoch": 48.6, "learning_rate": 2.5710133618703646e-05, "loss": 2.0761, "step": 9809000 }, { "epoch": 48.6, "learning_rate": 2.5708895032277563e-05, "loss": 2.0847, "step": 9809500 }, { "epoch": 48.6, "learning_rate": 2.570765644585148e-05, "loss": 2.0955, "step": 9810000 }, { "epoch": 48.6, "learning_rate": 2.5706417859425397e-05, "loss": 2.0907, "step": 9810500 }, { "epoch": 48.61, "learning_rate": 2.5705181750172162e-05, "loss": 2.0921, "step": 9811000 }, { "epoch": 48.61, "learning_rate": 2.570394316374608e-05, "loss": 2.1075, "step": 9811500 }, { "epoch": 48.61, "learning_rate": 2.5702707054492848e-05, "loss": 2.094, "step": 9812000 }, { "epoch": 48.61, "learning_rate": 2.5701468468066765e-05, "loss": 2.0821, "step": 9812500 }, { "epoch": 48.62, "learning_rate": 2.570022988164068e-05, "loss": 2.0647, "step": 9813000 }, { "epoch": 48.62, "learning_rate": 2.56989912952146e-05, "loss": 2.0562, "step": 9813500 }, { "epoch": 48.62, "learning_rate": 2.5697752708788515e-05, "loss": 2.1038, "step": 9814000 }, { "epoch": 48.62, "learning_rate": 2.569651412236243e-05, "loss": 2.0698, "step": 9814500 }, { "epoch": 48.63, "learning_rate": 2.5695278013109198e-05, "loss": 2.0767, "step": 9815000 }, { "epoch": 48.63, "learning_rate": 2.5694039426683115e-05, "loss": 2.087, "step": 9815500 }, { "epoch": 48.63, "learning_rate": 2.569280084025703e-05, "loss": 2.0904, "step": 9816000 }, { "epoch": 48.63, "learning_rate": 2.569156225383095e-05, "loss": 2.0937, "step": 9816500 }, { "epoch": 48.64, "learning_rate": 2.5690323667404866e-05, "loss": 2.0847, "step": 9817000 }, { "epoch": 48.64, "learning_rate": 2.5689085080978782e-05, "loss": 2.0884, "step": 9817500 }, { "epoch": 48.64, "learning_rate": 2.5687846494552696e-05, "loss": 2.0883, "step": 9818000 }, { "epoch": 48.64, "learning_rate": 2.5686607908126613e-05, "loss": 2.0669, "step": 9818500 }, { "epoch": 48.65, "learning_rate": 2.568536932170053e-05, "loss": 2.0878, "step": 9819000 }, { "epoch": 48.65, "learning_rate": 2.56841332124473e-05, "loss": 2.0696, "step": 9819500 }, { "epoch": 48.65, "learning_rate": 2.5682894626021216e-05, "loss": 2.0852, "step": 9820000 }, { "epoch": 48.65, "learning_rate": 2.5681656039595133e-05, "loss": 2.0794, "step": 9820500 }, { "epoch": 48.66, "learning_rate": 2.568041745316905e-05, "loss": 2.0929, "step": 9821000 }, { "epoch": 48.66, "learning_rate": 2.5679181343915815e-05, "loss": 2.0778, "step": 9821500 }, { "epoch": 48.66, "learning_rate": 2.5677942757489732e-05, "loss": 2.0683, "step": 9822000 }, { "epoch": 48.66, "learning_rate": 2.567670417106365e-05, "loss": 2.0707, "step": 9822500 }, { "epoch": 48.67, "learning_rate": 2.5675465584637566e-05, "loss": 2.0819, "step": 9823000 }, { "epoch": 48.67, "learning_rate": 2.5674229475384338e-05, "loss": 2.0871, "step": 9823500 }, { "epoch": 48.67, "learning_rate": 2.5672990888958248e-05, "loss": 2.0787, "step": 9824000 }, { "epoch": 48.67, "learning_rate": 2.5671752302532165e-05, "loss": 2.0702, "step": 9824500 }, { "epoch": 48.68, "learning_rate": 2.5670513716106082e-05, "loss": 2.057, "step": 9825000 }, { "epoch": 48.68, "learning_rate": 2.5669277606852854e-05, "loss": 2.092, "step": 9825500 }, { "epoch": 48.68, "learning_rate": 2.566803902042677e-05, "loss": 2.1049, "step": 9826000 }, { "epoch": 48.68, "learning_rate": 2.5666800434000688e-05, "loss": 2.0921, "step": 9826500 }, { "epoch": 48.69, "learning_rate": 2.5665561847574605e-05, "loss": 2.0816, "step": 9827000 }, { "epoch": 48.69, "learning_rate": 2.566432573832137e-05, "loss": 2.0794, "step": 9827500 }, { "epoch": 48.69, "learning_rate": 2.5663087151895287e-05, "loss": 2.0903, "step": 9828000 }, { "epoch": 48.69, "learning_rate": 2.5661848565469204e-05, "loss": 2.0987, "step": 9828500 }, { "epoch": 48.7, "learning_rate": 2.566060997904312e-05, "loss": 2.0843, "step": 9829000 }, { "epoch": 48.7, "learning_rate": 2.5659371392617038e-05, "loss": 2.0774, "step": 9829500 }, { "epoch": 48.7, "learning_rate": 2.5658132806190955e-05, "loss": 2.1036, "step": 9830000 }, { "epoch": 48.7, "learning_rate": 2.5656894219764865e-05, "loss": 2.0809, "step": 9830500 }, { "epoch": 48.71, "learning_rate": 2.5655655633338782e-05, "loss": 2.0746, "step": 9831000 }, { "epoch": 48.71, "learning_rate": 2.56544170469127e-05, "loss": 2.0667, "step": 9831500 }, { "epoch": 48.71, "learning_rate": 2.5653178460486616e-05, "loss": 2.0865, "step": 9832000 }, { "epoch": 48.71, "learning_rate": 2.5651939874060533e-05, "loss": 2.0681, "step": 9832500 }, { "epoch": 48.72, "learning_rate": 2.565070128763445e-05, "loss": 2.075, "step": 9833000 }, { "epoch": 48.72, "learning_rate": 2.5649462701208367e-05, "loss": 2.0767, "step": 9833500 }, { "epoch": 48.72, "learning_rate": 2.5648224114782284e-05, "loss": 2.078, "step": 9834000 }, { "epoch": 48.72, "learning_rate": 2.564698800552905e-05, "loss": 2.0837, "step": 9834500 }, { "epoch": 48.73, "learning_rate": 2.5645749419102966e-05, "loss": 2.0922, "step": 9835000 }, { "epoch": 48.73, "learning_rate": 2.5644510832676883e-05, "loss": 2.0839, "step": 9835500 }, { "epoch": 48.73, "learning_rate": 2.5643277200596504e-05, "loss": 2.0544, "step": 9836000 }, { "epoch": 48.73, "learning_rate": 2.564203861417042e-05, "loss": 2.0747, "step": 9836500 }, { "epoch": 48.74, "learning_rate": 2.5640800027744337e-05, "loss": 2.0954, "step": 9837000 }, { "epoch": 48.74, "learning_rate": 2.5639561441318254e-05, "loss": 2.066, "step": 9837500 }, { "epoch": 48.74, "learning_rate": 2.563832285489217e-05, "loss": 2.0959, "step": 9838000 }, { "epoch": 48.74, "learning_rate": 2.5637084268466088e-05, "loss": 2.0896, "step": 9838500 }, { "epoch": 48.75, "learning_rate": 2.5635845682040005e-05, "loss": 2.0605, "step": 9839000 }, { "epoch": 48.75, "learning_rate": 2.5634607095613922e-05, "loss": 2.0981, "step": 9839500 }, { "epoch": 48.75, "learning_rate": 2.5633368509187832e-05, "loss": 2.0604, "step": 9840000 }, { "epoch": 48.75, "learning_rate": 2.563212992276175e-05, "loss": 2.0956, "step": 9840500 }, { "epoch": 48.76, "learning_rate": 2.5630891336335666e-05, "loss": 2.0504, "step": 9841000 }, { "epoch": 48.76, "learning_rate": 2.5629652749909583e-05, "loss": 2.0919, "step": 9841500 }, { "epoch": 48.76, "learning_rate": 2.56284141634835e-05, "loss": 2.0729, "step": 9842000 }, { "epoch": 48.76, "learning_rate": 2.5627178054230272e-05, "loss": 2.0919, "step": 9842500 }, { "epoch": 48.77, "learning_rate": 2.5625939467804182e-05, "loss": 2.0976, "step": 9843000 }, { "epoch": 48.77, "learning_rate": 2.56247008813781e-05, "loss": 2.0771, "step": 9843500 }, { "epoch": 48.77, "learning_rate": 2.5623467249297723e-05, "loss": 2.1023, "step": 9844000 }, { "epoch": 48.77, "learning_rate": 2.562222866287164e-05, "loss": 2.0468, "step": 9844500 }, { "epoch": 48.78, "learning_rate": 2.5620990076445557e-05, "loss": 2.0602, "step": 9845000 }, { "epoch": 48.78, "learning_rate": 2.5619751490019474e-05, "loss": 2.0865, "step": 9845500 }, { "epoch": 48.78, "learning_rate": 2.5618512903593388e-05, "loss": 2.0828, "step": 9846000 }, { "epoch": 48.78, "learning_rate": 2.5617274317167305e-05, "loss": 2.0739, "step": 9846500 }, { "epoch": 48.79, "learning_rate": 2.561603573074122e-05, "loss": 2.0861, "step": 9847000 }, { "epoch": 48.79, "learning_rate": 2.561479962148799e-05, "loss": 2.0775, "step": 9847500 }, { "epoch": 48.79, "learning_rate": 2.5613561035061907e-05, "loss": 2.0798, "step": 9848000 }, { "epoch": 48.79, "learning_rate": 2.5612322448635824e-05, "loss": 2.0865, "step": 9848500 }, { "epoch": 48.8, "learning_rate": 2.5611083862209738e-05, "loss": 2.0808, "step": 9849000 }, { "epoch": 48.8, "learning_rate": 2.5609845275783655e-05, "loss": 2.0897, "step": 9849500 }, { "epoch": 48.8, "learning_rate": 2.560860668935757e-05, "loss": 2.0584, "step": 9850000 }, { "epoch": 48.8, "learning_rate": 2.560736810293149e-05, "loss": 2.0804, "step": 9850500 }, { "epoch": 48.81, "learning_rate": 2.5606129516505405e-05, "loss": 2.081, "step": 9851000 }, { "epoch": 48.81, "learning_rate": 2.5604890930079322e-05, "loss": 2.1051, "step": 9851500 }, { "epoch": 48.81, "learning_rate": 2.560365234365324e-05, "loss": 2.0843, "step": 9852000 }, { "epoch": 48.81, "learning_rate": 2.560241375722715e-05, "loss": 2.0912, "step": 9852500 }, { "epoch": 48.82, "learning_rate": 2.5601175170801066e-05, "loss": 2.0922, "step": 9853000 }, { "epoch": 48.82, "learning_rate": 2.5599936584374983e-05, "loss": 2.0865, "step": 9853500 }, { "epoch": 48.82, "learning_rate": 2.5598700475121755e-05, "loss": 2.1011, "step": 9854000 }, { "epoch": 48.82, "learning_rate": 2.5597461888695672e-05, "loss": 2.0769, "step": 9854500 }, { "epoch": 48.83, "learning_rate": 2.559622577944244e-05, "loss": 2.0675, "step": 9855000 }, { "epoch": 48.83, "learning_rate": 2.5594987193016358e-05, "loss": 2.0966, "step": 9855500 }, { "epoch": 48.83, "learning_rate": 2.5593751083763124e-05, "loss": 2.0887, "step": 9856000 }, { "epoch": 48.83, "learning_rate": 2.559251249733704e-05, "loss": 2.0795, "step": 9856500 }, { "epoch": 48.83, "learning_rate": 2.5591273910910957e-05, "loss": 2.0824, "step": 9857000 }, { "epoch": 48.84, "learning_rate": 2.5590035324484874e-05, "loss": 2.0583, "step": 9857500 }, { "epoch": 48.84, "learning_rate": 2.558879673805879e-05, "loss": 2.0865, "step": 9858000 }, { "epoch": 48.84, "learning_rate": 2.5587560628805557e-05, "loss": 2.0836, "step": 9858500 }, { "epoch": 48.84, "learning_rate": 2.5586322042379474e-05, "loss": 2.1011, "step": 9859000 }, { "epoch": 48.85, "learning_rate": 2.558508345595339e-05, "loss": 2.0893, "step": 9859500 }, { "epoch": 48.85, "learning_rate": 2.5583844869527307e-05, "loss": 2.0582, "step": 9860000 }, { "epoch": 48.85, "learning_rate": 2.5582606283101224e-05, "loss": 2.079, "step": 9860500 }, { "epoch": 48.85, "learning_rate": 2.558136769667514e-05, "loss": 2.0785, "step": 9861000 }, { "epoch": 48.86, "learning_rate": 2.5580129110249058e-05, "loss": 2.1065, "step": 9861500 }, { "epoch": 48.86, "learning_rate": 2.5578890523822975e-05, "loss": 2.1317, "step": 9862000 }, { "epoch": 48.86, "learning_rate": 2.557765193739689e-05, "loss": 2.0918, "step": 9862500 }, { "epoch": 48.86, "learning_rate": 2.5576413350970806e-05, "loss": 2.0792, "step": 9863000 }, { "epoch": 48.87, "learning_rate": 2.5575174764544723e-05, "loss": 2.0784, "step": 9863500 }, { "epoch": 48.87, "learning_rate": 2.557393865529149e-05, "loss": 2.0741, "step": 9864000 }, { "epoch": 48.87, "learning_rate": 2.5572700068865408e-05, "loss": 2.0666, "step": 9864500 }, { "epoch": 48.87, "learning_rate": 2.5571461482439325e-05, "loss": 2.071, "step": 9865000 }, { "epoch": 48.88, "learning_rate": 2.5570222896013242e-05, "loss": 2.0642, "step": 9865500 }, { "epoch": 48.88, "learning_rate": 2.5568984309587156e-05, "loss": 2.0886, "step": 9866000 }, { "epoch": 48.88, "learning_rate": 2.5567745723161073e-05, "loss": 2.0813, "step": 9866500 }, { "epoch": 48.88, "learning_rate": 2.556650961390784e-05, "loss": 2.0934, "step": 9867000 }, { "epoch": 48.89, "learning_rate": 2.556527102748176e-05, "loss": 2.0554, "step": 9867500 }, { "epoch": 48.89, "learning_rate": 2.5564032441055675e-05, "loss": 2.0594, "step": 9868000 }, { "epoch": 48.89, "learning_rate": 2.5562793854629592e-05, "loss": 2.1098, "step": 9868500 }, { "epoch": 48.89, "learning_rate": 2.556155526820351e-05, "loss": 2.0791, "step": 9869000 }, { "epoch": 48.9, "learning_rate": 2.5560316681777423e-05, "loss": 2.0948, "step": 9869500 }, { "epoch": 48.9, "learning_rate": 2.555907809535134e-05, "loss": 2.0945, "step": 9870000 }, { "epoch": 48.9, "learning_rate": 2.5557839508925257e-05, "loss": 2.0797, "step": 9870500 }, { "epoch": 48.9, "learning_rate": 2.5556603399672025e-05, "loss": 2.1059, "step": 9871000 }, { "epoch": 48.91, "learning_rate": 2.5555364813245942e-05, "loss": 2.074, "step": 9871500 }, { "epoch": 48.91, "learning_rate": 2.5554128703992708e-05, "loss": 2.1053, "step": 9872000 }, { "epoch": 48.91, "learning_rate": 2.5552890117566625e-05, "loss": 2.0689, "step": 9872500 }, { "epoch": 48.91, "learning_rate": 2.555165153114054e-05, "loss": 2.0867, "step": 9873000 }, { "epoch": 48.92, "learning_rate": 2.555041294471446e-05, "loss": 2.0861, "step": 9873500 }, { "epoch": 48.92, "learning_rate": 2.5549174358288375e-05, "loss": 2.081, "step": 9874000 }, { "epoch": 48.92, "learning_rate": 2.5547935771862292e-05, "loss": 2.0643, "step": 9874500 }, { "epoch": 48.92, "learning_rate": 2.554669718543621e-05, "loss": 2.0971, "step": 9875000 }, { "epoch": 48.93, "learning_rate": 2.5545458599010126e-05, "loss": 2.0745, "step": 9875500 }, { "epoch": 48.93, "learning_rate": 2.5544220012584043e-05, "loss": 2.0993, "step": 9876000 }, { "epoch": 48.93, "learning_rate": 2.5542981426157957e-05, "loss": 2.0756, "step": 9876500 }, { "epoch": 48.93, "learning_rate": 2.5541742839731874e-05, "loss": 2.081, "step": 9877000 }, { "epoch": 48.94, "learning_rate": 2.5540504253305787e-05, "loss": 2.0961, "step": 9877500 }, { "epoch": 48.94, "learning_rate": 2.553926814405256e-05, "loss": 2.0919, "step": 9878000 }, { "epoch": 48.94, "learning_rate": 2.5538032034799325e-05, "loss": 2.0839, "step": 9878500 }, { "epoch": 48.94, "learning_rate": 2.553679344837324e-05, "loss": 2.0824, "step": 9879000 }, { "epoch": 48.95, "learning_rate": 2.553555486194716e-05, "loss": 2.0496, "step": 9879500 }, { "epoch": 48.95, "learning_rate": 2.5534316275521076e-05, "loss": 2.1092, "step": 9880000 }, { "epoch": 48.95, "learning_rate": 2.5533077689094992e-05, "loss": 2.0769, "step": 9880500 }, { "epoch": 48.95, "learning_rate": 2.553183910266891e-05, "loss": 2.0974, "step": 9881000 }, { "epoch": 48.96, "learning_rate": 2.5530600516242826e-05, "loss": 2.0975, "step": 9881500 }, { "epoch": 48.96, "learning_rate": 2.5529361929816743e-05, "loss": 2.0665, "step": 9882000 }, { "epoch": 48.96, "learning_rate": 2.552812334339066e-05, "loss": 2.0629, "step": 9882500 }, { "epoch": 48.96, "learning_rate": 2.5526884756964574e-05, "loss": 2.1047, "step": 9883000 }, { "epoch": 48.97, "learning_rate": 2.5525648647711342e-05, "loss": 2.0884, "step": 9883500 }, { "epoch": 48.97, "learning_rate": 2.5524412538458108e-05, "loss": 2.0802, "step": 9884000 }, { "epoch": 48.97, "learning_rate": 2.5523173952032025e-05, "loss": 2.0588, "step": 9884500 }, { "epoch": 48.97, "learning_rate": 2.5521935365605942e-05, "loss": 2.0992, "step": 9885000 }, { "epoch": 48.98, "learning_rate": 2.552069677917986e-05, "loss": 2.097, "step": 9885500 }, { "epoch": 48.98, "learning_rate": 2.5519458192753776e-05, "loss": 2.0806, "step": 9886000 }, { "epoch": 48.98, "learning_rate": 2.5518219606327693e-05, "loss": 2.0905, "step": 9886500 }, { "epoch": 48.98, "learning_rate": 2.551698101990161e-05, "loss": 2.0759, "step": 9887000 }, { "epoch": 48.99, "learning_rate": 2.5515742433475526e-05, "loss": 2.0872, "step": 9887500 }, { "epoch": 48.99, "learning_rate": 2.5514506324222292e-05, "loss": 2.0778, "step": 9888000 }, { "epoch": 48.99, "learning_rate": 2.551326773779621e-05, "loss": 2.0753, "step": 9888500 }, { "epoch": 48.99, "learning_rate": 2.5512029151370126e-05, "loss": 2.0966, "step": 9889000 }, { "epoch": 49.0, "learning_rate": 2.5510790564944043e-05, "loss": 2.0912, "step": 9889500 }, { "epoch": 49.0, "learning_rate": 2.550955197851796e-05, "loss": 2.0629, "step": 9890000 }, { "epoch": 49.0, "eval_accuracy": 0.6695752446021327, "eval_accuracy_mlm": 0.627827145425072, "eval_accuracy_nsp": 0.8662451609866685, "eval_loss": 2.2877252101898193, "eval_runtime": 147.0755, "eval_samples_per_second": 1733.525, "eval_steps_per_second": 72.235, "step": 9890307 }, { "epoch": 49.0, "learning_rate": 2.5508313392091876e-05, "loss": 2.0911, "step": 9890500 }, { "epoch": 49.0, "learning_rate": 2.5507074805665793e-05, "loss": 2.0401, "step": 9891000 }, { "epoch": 49.01, "learning_rate": 2.550583869641256e-05, "loss": 2.0609, "step": 9891500 }, { "epoch": 49.01, "learning_rate": 2.5504600109986476e-05, "loss": 2.0413, "step": 9892000 }, { "epoch": 49.01, "learning_rate": 2.5503364000733245e-05, "loss": 2.0648, "step": 9892500 }, { "epoch": 49.01, "learning_rate": 2.5502125414307158e-05, "loss": 2.0649, "step": 9893000 }, { "epoch": 49.02, "learning_rate": 2.5500886827881075e-05, "loss": 2.0358, "step": 9893500 }, { "epoch": 49.02, "learning_rate": 2.5499648241454992e-05, "loss": 2.0771, "step": 9894000 }, { "epoch": 49.02, "learning_rate": 2.549840965502891e-05, "loss": 2.0387, "step": 9894500 }, { "epoch": 49.02, "learning_rate": 2.5497171068602826e-05, "loss": 2.0469, "step": 9895000 }, { "epoch": 49.03, "learning_rate": 2.5495932482176743e-05, "loss": 2.0644, "step": 9895500 }, { "epoch": 49.03, "learning_rate": 2.549469389575066e-05, "loss": 2.0797, "step": 9896000 }, { "epoch": 49.03, "learning_rate": 2.5493460263670277e-05, "loss": 2.0552, "step": 9896500 }, { "epoch": 49.03, "learning_rate": 2.5492221677244194e-05, "loss": 2.0603, "step": 9897000 }, { "epoch": 49.04, "learning_rate": 2.5490985567990966e-05, "loss": 2.0648, "step": 9897500 }, { "epoch": 49.04, "learning_rate": 2.5489746981564883e-05, "loss": 2.0462, "step": 9898000 }, { "epoch": 49.04, "learning_rate": 2.54885083951388e-05, "loss": 2.0354, "step": 9898500 }, { "epoch": 49.04, "learning_rate": 2.5487269808712717e-05, "loss": 2.0485, "step": 9899000 }, { "epoch": 49.05, "learning_rate": 2.5486031222286634e-05, "loss": 2.0275, "step": 9899500 }, { "epoch": 49.05, "learning_rate": 2.5484792635860544e-05, "loss": 2.0279, "step": 9900000 }, { "epoch": 49.05, "learning_rate": 2.548355404943446e-05, "loss": 2.0658, "step": 9900500 }, { "epoch": 49.05, "learning_rate": 2.5482315463008378e-05, "loss": 2.0575, "step": 9901000 }, { "epoch": 49.06, "learning_rate": 2.5481076876582295e-05, "loss": 2.0685, "step": 9901500 }, { "epoch": 49.06, "learning_rate": 2.547983829015621e-05, "loss": 2.0443, "step": 9902000 }, { "epoch": 49.06, "learning_rate": 2.547859970373013e-05, "loss": 2.0703, "step": 9902500 }, { "epoch": 49.06, "learning_rate": 2.5477361117304046e-05, "loss": 2.0671, "step": 9903000 }, { "epoch": 49.07, "learning_rate": 2.547612253087796e-05, "loss": 2.0786, "step": 9903500 }, { "epoch": 49.07, "learning_rate": 2.5474883944451876e-05, "loss": 2.0624, "step": 9904000 }, { "epoch": 49.07, "learning_rate": 2.5473645358025793e-05, "loss": 2.0557, "step": 9904500 }, { "epoch": 49.07, "learning_rate": 2.547240677159971e-05, "loss": 2.0553, "step": 9905000 }, { "epoch": 49.08, "learning_rate": 2.5471168185173627e-05, "loss": 2.0711, "step": 9905500 }, { "epoch": 49.08, "learning_rate": 2.5469929598747544e-05, "loss": 2.0735, "step": 9906000 }, { "epoch": 49.08, "learning_rate": 2.546869101232146e-05, "loss": 2.0724, "step": 9906500 }, { "epoch": 49.08, "learning_rate": 2.5467452425895378e-05, "loss": 2.0589, "step": 9907000 }, { "epoch": 49.09, "learning_rate": 2.5466213839469295e-05, "loss": 2.0656, "step": 9907500 }, { "epoch": 49.09, "learning_rate": 2.546497525304321e-05, "loss": 2.0495, "step": 9908000 }, { "epoch": 49.09, "learning_rate": 2.546373666661712e-05, "loss": 2.0823, "step": 9908500 }, { "epoch": 49.09, "learning_rate": 2.546249808019104e-05, "loss": 2.0424, "step": 9909000 }, { "epoch": 49.1, "learning_rate": 2.5461259493764955e-05, "loss": 2.0456, "step": 9909500 }, { "epoch": 49.1, "learning_rate": 2.5460020907338872e-05, "loss": 2.0581, "step": 9910000 }, { "epoch": 49.1, "learning_rate": 2.5458784798085645e-05, "loss": 2.0733, "step": 9910500 }, { "epoch": 49.1, "learning_rate": 2.545754621165956e-05, "loss": 2.0788, "step": 9911000 }, { "epoch": 49.1, "learning_rate": 2.545630762523347e-05, "loss": 2.078, "step": 9911500 }, { "epoch": 49.11, "learning_rate": 2.545506903880739e-05, "loss": 2.0713, "step": 9912000 }, { "epoch": 49.11, "learning_rate": 2.545383292955416e-05, "loss": 2.0769, "step": 9912500 }, { "epoch": 49.11, "learning_rate": 2.5452594343128078e-05, "loss": 2.0475, "step": 9913000 }, { "epoch": 49.11, "learning_rate": 2.5451358233874843e-05, "loss": 2.0757, "step": 9913500 }, { "epoch": 49.12, "learning_rate": 2.545011964744876e-05, "loss": 2.0615, "step": 9914000 }, { "epoch": 49.12, "learning_rate": 2.5448881061022677e-05, "loss": 2.05, "step": 9914500 }, { "epoch": 49.12, "learning_rate": 2.5447642474596594e-05, "loss": 2.0458, "step": 9915000 }, { "epoch": 49.12, "learning_rate": 2.544640884251621e-05, "loss": 2.0706, "step": 9915500 }, { "epoch": 49.13, "learning_rate": 2.5445170256090128e-05, "loss": 2.058, "step": 9916000 }, { "epoch": 49.13, "learning_rate": 2.5443931669664045e-05, "loss": 2.0487, "step": 9916500 }, { "epoch": 49.13, "learning_rate": 2.5442693083237962e-05, "loss": 2.0645, "step": 9917000 }, { "epoch": 49.13, "learning_rate": 2.544145449681188e-05, "loss": 2.0607, "step": 9917500 }, { "epoch": 49.14, "learning_rate": 2.5440215910385796e-05, "loss": 2.0641, "step": 9918000 }, { "epoch": 49.14, "learning_rate": 2.5438977323959713e-05, "loss": 2.063, "step": 9918500 }, { "epoch": 49.14, "learning_rate": 2.5437741214706478e-05, "loss": 2.0712, "step": 9919000 }, { "epoch": 49.14, "learning_rate": 2.5436502628280395e-05, "loss": 2.0694, "step": 9919500 }, { "epoch": 49.15, "learning_rate": 2.5435264041854312e-05, "loss": 2.0483, "step": 9920000 }, { "epoch": 49.15, "learning_rate": 2.543402545542823e-05, "loss": 2.0681, "step": 9920500 }, { "epoch": 49.15, "learning_rate": 2.5432786869002146e-05, "loss": 2.0574, "step": 9921000 }, { "epoch": 49.15, "learning_rate": 2.5431548282576063e-05, "loss": 2.0421, "step": 9921500 }, { "epoch": 49.16, "learning_rate": 2.543030969614998e-05, "loss": 2.0786, "step": 9922000 }, { "epoch": 49.16, "learning_rate": 2.5429071109723897e-05, "loss": 2.0547, "step": 9922500 }, { "epoch": 49.16, "learning_rate": 2.5427832523297814e-05, "loss": 2.039, "step": 9923000 }, { "epoch": 49.16, "learning_rate": 2.5426593936871727e-05, "loss": 2.0752, "step": 9923500 }, { "epoch": 49.17, "learning_rate": 2.5425355350445644e-05, "loss": 2.0538, "step": 9924000 }, { "epoch": 49.17, "learning_rate": 2.542411676401956e-05, "loss": 2.058, "step": 9924500 }, { "epoch": 49.17, "learning_rate": 2.542288065476633e-05, "loss": 2.0588, "step": 9925000 }, { "epoch": 49.17, "learning_rate": 2.5421642068340247e-05, "loss": 2.0879, "step": 9925500 }, { "epoch": 49.18, "learning_rate": 2.5420403481914164e-05, "loss": 2.0532, "step": 9926000 }, { "epoch": 49.18, "learning_rate": 2.541916489548808e-05, "loss": 2.06, "step": 9926500 }, { "epoch": 49.18, "learning_rate": 2.5417926309061994e-05, "loss": 2.0827, "step": 9927000 }, { "epoch": 49.18, "learning_rate": 2.5416690199808763e-05, "loss": 2.0754, "step": 9927500 }, { "epoch": 49.19, "learning_rate": 2.541545409055553e-05, "loss": 2.0688, "step": 9928000 }, { "epoch": 49.19, "learning_rate": 2.54142179813023e-05, "loss": 2.0526, "step": 9928500 }, { "epoch": 49.19, "learning_rate": 2.5412979394876218e-05, "loss": 2.0349, "step": 9929000 }, { "epoch": 49.19, "learning_rate": 2.5411740808450134e-05, "loss": 2.0564, "step": 9929500 }, { "epoch": 49.2, "learning_rate": 2.541050222202405e-05, "loss": 2.0655, "step": 9930000 }, { "epoch": 49.2, "learning_rate": 2.5409263635597968e-05, "loss": 2.0401, "step": 9930500 }, { "epoch": 49.2, "learning_rate": 2.540802504917188e-05, "loss": 2.0281, "step": 9931000 }, { "epoch": 49.2, "learning_rate": 2.5406786462745795e-05, "loss": 2.0521, "step": 9931500 }, { "epoch": 49.21, "learning_rate": 2.5405547876319712e-05, "loss": 2.0595, "step": 9932000 }, { "epoch": 49.21, "learning_rate": 2.540430928989363e-05, "loss": 2.0636, "step": 9932500 }, { "epoch": 49.21, "learning_rate": 2.5403070703467546e-05, "loss": 2.0789, "step": 9933000 }, { "epoch": 49.21, "learning_rate": 2.5401832117041463e-05, "loss": 2.058, "step": 9933500 }, { "epoch": 49.22, "learning_rate": 2.540059600778823e-05, "loss": 2.0563, "step": 9934000 }, { "epoch": 49.22, "learning_rate": 2.5399357421362145e-05, "loss": 2.0696, "step": 9934500 }, { "epoch": 49.22, "learning_rate": 2.5398118834936062e-05, "loss": 2.057, "step": 9935000 }, { "epoch": 49.22, "learning_rate": 2.539688024850998e-05, "loss": 2.0474, "step": 9935500 }, { "epoch": 49.23, "learning_rate": 2.5395641662083896e-05, "loss": 2.0757, "step": 9936000 }, { "epoch": 49.23, "learning_rate": 2.539440555283067e-05, "loss": 2.0523, "step": 9936500 }, { "epoch": 49.23, "learning_rate": 2.5393166966404585e-05, "loss": 2.0487, "step": 9937000 }, { "epoch": 49.23, "learning_rate": 2.5391928379978495e-05, "loss": 2.0627, "step": 9937500 }, { "epoch": 49.24, "learning_rate": 2.5390689793552412e-05, "loss": 2.0791, "step": 9938000 }, { "epoch": 49.24, "learning_rate": 2.538945120712633e-05, "loss": 2.0484, "step": 9938500 }, { "epoch": 49.24, "learning_rate": 2.5388212620700246e-05, "loss": 2.0741, "step": 9939000 }, { "epoch": 49.24, "learning_rate": 2.5386974034274163e-05, "loss": 2.0789, "step": 9939500 }, { "epoch": 49.25, "learning_rate": 2.538573544784808e-05, "loss": 2.0562, "step": 9940000 }, { "epoch": 49.25, "learning_rate": 2.5384496861421997e-05, "loss": 2.0852, "step": 9940500 }, { "epoch": 49.25, "learning_rate": 2.538326322934162e-05, "loss": 2.0542, "step": 9941000 }, { "epoch": 49.25, "learning_rate": 2.5382027120088387e-05, "loss": 2.0843, "step": 9941500 }, { "epoch": 49.26, "learning_rate": 2.5380791010835152e-05, "loss": 2.0874, "step": 9942000 }, { "epoch": 49.26, "learning_rate": 2.537955242440907e-05, "loss": 2.0847, "step": 9942500 }, { "epoch": 49.26, "learning_rate": 2.5378313837982986e-05, "loss": 2.0554, "step": 9943000 }, { "epoch": 49.26, "learning_rate": 2.5377075251556903e-05, "loss": 2.1123, "step": 9943500 }, { "epoch": 49.27, "learning_rate": 2.537583666513082e-05, "loss": 2.0722, "step": 9944000 }, { "epoch": 49.27, "learning_rate": 2.5374598078704737e-05, "loss": 2.057, "step": 9944500 }, { "epoch": 49.27, "learning_rate": 2.5373359492278654e-05, "loss": 2.0706, "step": 9945000 }, { "epoch": 49.27, "learning_rate": 2.537212090585257e-05, "loss": 2.0586, "step": 9945500 }, { "epoch": 49.28, "learning_rate": 2.5370882319426487e-05, "loss": 2.0372, "step": 9946000 }, { "epoch": 49.28, "learning_rate": 2.5369646210173253e-05, "loss": 2.09, "step": 9946500 }, { "epoch": 49.28, "learning_rate": 2.536840762374717e-05, "loss": 2.0613, "step": 9947000 }, { "epoch": 49.28, "learning_rate": 2.5367169037321087e-05, "loss": 2.0502, "step": 9947500 }, { "epoch": 49.29, "learning_rate": 2.5365930450895004e-05, "loss": 2.0587, "step": 9948000 }, { "epoch": 49.29, "learning_rate": 2.536469186446892e-05, "loss": 2.0575, "step": 9948500 }, { "epoch": 49.29, "learning_rate": 2.5363453278042837e-05, "loss": 2.0663, "step": 9949000 }, { "epoch": 49.29, "learning_rate": 2.5362214691616754e-05, "loss": 2.0553, "step": 9949500 }, { "epoch": 49.3, "learning_rate": 2.536097610519067e-05, "loss": 2.0743, "step": 9950000 }, { "epoch": 49.3, "learning_rate": 2.535974247311029e-05, "loss": 2.0675, "step": 9950500 }, { "epoch": 49.3, "learning_rate": 2.535850636385706e-05, "loss": 2.0864, "step": 9951000 }, { "epoch": 49.3, "learning_rate": 2.5357267777430978e-05, "loss": 2.0429, "step": 9951500 }, { "epoch": 49.31, "learning_rate": 2.5356029191004895e-05, "loss": 2.0477, "step": 9952000 }, { "epoch": 49.31, "learning_rate": 2.5354790604578808e-05, "loss": 2.0787, "step": 9952500 }, { "epoch": 49.31, "learning_rate": 2.5353552018152725e-05, "loss": 2.0543, "step": 9953000 }, { "epoch": 49.31, "learning_rate": 2.535231343172664e-05, "loss": 2.0381, "step": 9953500 }, { "epoch": 49.32, "learning_rate": 2.5351074845300556e-05, "loss": 2.0665, "step": 9954000 }, { "epoch": 49.32, "learning_rate": 2.5349836258874473e-05, "loss": 2.0811, "step": 9954500 }, { "epoch": 49.32, "learning_rate": 2.5348597672448386e-05, "loss": 2.0603, "step": 9955000 }, { "epoch": 49.32, "learning_rate": 2.5347359086022303e-05, "loss": 2.0739, "step": 9955500 }, { "epoch": 49.33, "learning_rate": 2.5346122976769075e-05, "loss": 2.0817, "step": 9956000 }, { "epoch": 49.33, "learning_rate": 2.5344884390342992e-05, "loss": 2.1061, "step": 9956500 }, { "epoch": 49.33, "learning_rate": 2.5343645803916906e-05, "loss": 2.0632, "step": 9957000 }, { "epoch": 49.33, "learning_rate": 2.5342407217490823e-05, "loss": 2.0567, "step": 9957500 }, { "epoch": 49.34, "learning_rate": 2.5341168631064736e-05, "loss": 2.0572, "step": 9958000 }, { "epoch": 49.34, "learning_rate": 2.5339930044638653e-05, "loss": 2.0783, "step": 9958500 }, { "epoch": 49.34, "learning_rate": 2.533869145821257e-05, "loss": 2.0564, "step": 9959000 }, { "epoch": 49.34, "learning_rate": 2.5337452871786487e-05, "loss": 2.0852, "step": 9959500 }, { "epoch": 49.35, "learning_rate": 2.5336216762533256e-05, "loss": 2.0519, "step": 9960000 }, { "epoch": 49.35, "learning_rate": 2.5334978176107173e-05, "loss": 2.0558, "step": 9960500 }, { "epoch": 49.35, "learning_rate": 2.533373958968109e-05, "loss": 2.0673, "step": 9961000 }, { "epoch": 49.35, "learning_rate": 2.5332501003255003e-05, "loss": 2.0511, "step": 9961500 }, { "epoch": 49.36, "learning_rate": 2.533126489400178e-05, "loss": 2.0642, "step": 9962000 }, { "epoch": 49.36, "learning_rate": 2.5330028784748544e-05, "loss": 2.0694, "step": 9962500 }, { "epoch": 49.36, "learning_rate": 2.532879019832246e-05, "loss": 2.0595, "step": 9963000 }, { "epoch": 49.36, "learning_rate": 2.5327551611896378e-05, "loss": 2.0809, "step": 9963500 }, { "epoch": 49.37, "learning_rate": 2.5326313025470295e-05, "loss": 2.0781, "step": 9964000 }, { "epoch": 49.37, "learning_rate": 2.5325074439044212e-05, "loss": 2.0703, "step": 9964500 }, { "epoch": 49.37, "learning_rate": 2.5323838329790977e-05, "loss": 2.0442, "step": 9965000 }, { "epoch": 49.37, "learning_rate": 2.5322599743364894e-05, "loss": 2.0667, "step": 9965500 }, { "epoch": 49.38, "learning_rate": 2.532136115693881e-05, "loss": 2.0847, "step": 9966000 }, { "epoch": 49.38, "learning_rate": 2.5320122570512728e-05, "loss": 2.0575, "step": 9966500 }, { "epoch": 49.38, "learning_rate": 2.5318883984086645e-05, "loss": 2.077, "step": 9967000 }, { "epoch": 49.38, "learning_rate": 2.5317645397660562e-05, "loss": 2.0657, "step": 9967500 }, { "epoch": 49.38, "learning_rate": 2.5316409288407327e-05, "loss": 2.0976, "step": 9968000 }, { "epoch": 49.39, "learning_rate": 2.5315170701981244e-05, "loss": 2.083, "step": 9968500 }, { "epoch": 49.39, "learning_rate": 2.531393459272801e-05, "loss": 2.0661, "step": 9969000 }, { "epoch": 49.39, "learning_rate": 2.5312696006301927e-05, "loss": 2.0651, "step": 9969500 }, { "epoch": 49.39, "learning_rate": 2.5311457419875844e-05, "loss": 2.0643, "step": 9970000 }, { "epoch": 49.4, "learning_rate": 2.5310221310622612e-05, "loss": 2.0709, "step": 9970500 }, { "epoch": 49.4, "learning_rate": 2.530898272419653e-05, "loss": 2.0696, "step": 9971000 }, { "epoch": 49.4, "learning_rate": 2.5307744137770446e-05, "loss": 2.0578, "step": 9971500 }, { "epoch": 49.4, "learning_rate": 2.5306505551344363e-05, "loss": 2.07, "step": 9972000 }, { "epoch": 49.41, "learning_rate": 2.5305266964918277e-05, "loss": 2.0888, "step": 9972500 }, { "epoch": 49.41, "learning_rate": 2.5304028378492194e-05, "loss": 2.084, "step": 9973000 }, { "epoch": 49.41, "learning_rate": 2.530278979206611e-05, "loss": 2.0629, "step": 9973500 }, { "epoch": 49.41, "learning_rate": 2.5301551205640027e-05, "loss": 2.0664, "step": 9974000 }, { "epoch": 49.42, "learning_rate": 2.5300312619213944e-05, "loss": 2.0557, "step": 9974500 }, { "epoch": 49.42, "learning_rate": 2.529907403278786e-05, "loss": 2.0756, "step": 9975000 }, { "epoch": 49.42, "learning_rate": 2.5297835446361778e-05, "loss": 2.0948, "step": 9975500 }, { "epoch": 49.42, "learning_rate": 2.5296596859935695e-05, "loss": 2.076, "step": 9976000 }, { "epoch": 49.43, "learning_rate": 2.5295358273509612e-05, "loss": 2.0763, "step": 9976500 }, { "epoch": 49.43, "learning_rate": 2.529411968708353e-05, "loss": 2.0535, "step": 9977000 }, { "epoch": 49.43, "learning_rate": 2.5292883577830294e-05, "loss": 2.0803, "step": 9977500 }, { "epoch": 49.43, "learning_rate": 2.529164499140421e-05, "loss": 2.061, "step": 9978000 }, { "epoch": 49.44, "learning_rate": 2.529040640497813e-05, "loss": 2.0635, "step": 9978500 }, { "epoch": 49.44, "learning_rate": 2.5289167818552045e-05, "loss": 2.0546, "step": 9979000 }, { "epoch": 49.44, "learning_rate": 2.5287929232125962e-05, "loss": 2.0426, "step": 9979500 }, { "epoch": 49.44, "learning_rate": 2.528669064569988e-05, "loss": 2.0536, "step": 9980000 }, { "epoch": 49.45, "learning_rate": 2.5285452059273796e-05, "loss": 2.0764, "step": 9980500 }, { "epoch": 49.45, "learning_rate": 2.5284213472847713e-05, "loss": 2.0831, "step": 9981000 }, { "epoch": 49.45, "learning_rate": 2.528297736359448e-05, "loss": 2.0666, "step": 9981500 }, { "epoch": 49.45, "learning_rate": 2.5281741254341247e-05, "loss": 2.0609, "step": 9982000 }, { "epoch": 49.46, "learning_rate": 2.528050266791516e-05, "loss": 2.0725, "step": 9982500 }, { "epoch": 49.46, "learning_rate": 2.5279264081489078e-05, "loss": 2.0692, "step": 9983000 }, { "epoch": 49.46, "learning_rate": 2.5278025495062995e-05, "loss": 2.0733, "step": 9983500 }, { "epoch": 49.46, "learning_rate": 2.527678690863691e-05, "loss": 2.05, "step": 9984000 }, { "epoch": 49.47, "learning_rate": 2.527554832221083e-05, "loss": 2.0845, "step": 9984500 }, { "epoch": 49.47, "learning_rate": 2.5274309735784745e-05, "loss": 2.0775, "step": 9985000 }, { "epoch": 49.47, "learning_rate": 2.5273071149358662e-05, "loss": 2.0842, "step": 9985500 }, { "epoch": 49.47, "learning_rate": 2.527183256293258e-05, "loss": 2.0529, "step": 9986000 }, { "epoch": 49.48, "learning_rate": 2.5270593976506496e-05, "loss": 2.0648, "step": 9986500 }, { "epoch": 49.48, "learning_rate": 2.5269355390080413e-05, "loss": 2.0659, "step": 9987000 }, { "epoch": 49.48, "learning_rate": 2.526811680365433e-05, "loss": 2.0589, "step": 9987500 }, { "epoch": 49.48, "learning_rate": 2.526687821722824e-05, "loss": 2.0605, "step": 9988000 }, { "epoch": 49.49, "learning_rate": 2.5265639630802157e-05, "loss": 2.0624, "step": 9988500 }, { "epoch": 49.49, "learning_rate": 2.5264401044376074e-05, "loss": 2.0769, "step": 9989000 }, { "epoch": 49.49, "learning_rate": 2.526316245794999e-05, "loss": 2.0447, "step": 9989500 }, { "epoch": 49.49, "learning_rate": 2.5261923871523908e-05, "loss": 2.0645, "step": 9990000 }, { "epoch": 49.5, "learning_rate": 2.526068776227068e-05, "loss": 2.0754, "step": 9990500 }, { "epoch": 49.5, "learning_rate": 2.525944917584459e-05, "loss": 2.0636, "step": 9991000 }, { "epoch": 49.5, "learning_rate": 2.5258210589418507e-05, "loss": 2.0745, "step": 9991500 }, { "epoch": 49.5, "learning_rate": 2.525697448016528e-05, "loss": 2.077, "step": 9992000 }, { "epoch": 49.51, "learning_rate": 2.5255735893739196e-05, "loss": 2.056, "step": 9992500 }, { "epoch": 49.51, "learning_rate": 2.5254497307313113e-05, "loss": 2.0699, "step": 9993000 }, { "epoch": 49.51, "learning_rate": 2.525325872088703e-05, "loss": 2.0783, "step": 9993500 }, { "epoch": 49.51, "learning_rate": 2.5252020134460947e-05, "loss": 2.041, "step": 9994000 }, { "epoch": 49.52, "learning_rate": 2.5250781548034857e-05, "loss": 2.067, "step": 9994500 }, { "epoch": 49.52, "learning_rate": 2.524954543878163e-05, "loss": 2.052, "step": 9995000 }, { "epoch": 49.52, "learning_rate": 2.5248306852355546e-05, "loss": 2.0761, "step": 9995500 }, { "epoch": 49.52, "learning_rate": 2.5247068265929463e-05, "loss": 2.0752, "step": 9996000 }, { "epoch": 49.53, "learning_rate": 2.524582967950338e-05, "loss": 2.0894, "step": 9996500 }, { "epoch": 49.53, "learning_rate": 2.5244591093077297e-05, "loss": 2.0863, "step": 9997000 }, { "epoch": 49.53, "learning_rate": 2.5243352506651207e-05, "loss": 2.0644, "step": 9997500 }, { "epoch": 49.53, "learning_rate": 2.5242113920225124e-05, "loss": 2.0731, "step": 9998000 }, { "epoch": 49.54, "learning_rate": 2.524087533379904e-05, "loss": 2.0667, "step": 9998500 }, { "epoch": 49.54, "learning_rate": 2.5239639224545813e-05, "loss": 2.099, "step": 9999000 }, { "epoch": 49.54, "learning_rate": 2.523840063811973e-05, "loss": 2.0757, "step": 9999500 }, { "epoch": 49.54, "learning_rate": 2.5237162051693647e-05, "loss": 2.0801, "step": 10000000 }, { "epoch": 49.55, "learning_rate": 2.5235923465267557e-05, "loss": 2.0685, "step": 10000500 }, { "epoch": 49.55, "learning_rate": 2.5234684878841474e-05, "loss": 2.0787, "step": 10001000 }, { "epoch": 49.55, "learning_rate": 2.52334512467611e-05, "loss": 2.0804, "step": 10001500 }, { "epoch": 49.55, "learning_rate": 2.5232212660335015e-05, "loss": 2.0821, "step": 10002000 }, { "epoch": 49.56, "learning_rate": 2.523097655108178e-05, "loss": 2.0875, "step": 10002500 }, { "epoch": 49.56, "learning_rate": 2.5229737964655698e-05, "loss": 2.0901, "step": 10003000 }, { "epoch": 49.56, "learning_rate": 2.5228499378229615e-05, "loss": 2.0621, "step": 10003500 }, { "epoch": 49.56, "learning_rate": 2.522726079180353e-05, "loss": 2.062, "step": 10004000 }, { "epoch": 49.57, "learning_rate": 2.522602220537745e-05, "loss": 2.0521, "step": 10004500 }, { "epoch": 49.57, "learning_rate": 2.5224786096124214e-05, "loss": 2.0791, "step": 10005000 }, { "epoch": 49.57, "learning_rate": 2.522354750969813e-05, "loss": 2.085, "step": 10005500 }, { "epoch": 49.57, "learning_rate": 2.5222308923272048e-05, "loss": 2.0817, "step": 10006000 }, { "epoch": 49.58, "learning_rate": 2.5221070336845965e-05, "loss": 2.0787, "step": 10006500 }, { "epoch": 49.58, "learning_rate": 2.521983175041988e-05, "loss": 2.0649, "step": 10007000 }, { "epoch": 49.58, "learning_rate": 2.52185931639938e-05, "loss": 2.0699, "step": 10007500 }, { "epoch": 49.58, "learning_rate": 2.5217354577567715e-05, "loss": 2.0697, "step": 10008000 }, { "epoch": 49.59, "learning_rate": 2.5216115991141632e-05, "loss": 2.0751, "step": 10008500 }, { "epoch": 49.59, "learning_rate": 2.521487740471555e-05, "loss": 2.0779, "step": 10009000 }, { "epoch": 49.59, "learning_rate": 2.5213638818289466e-05, "loss": 2.0834, "step": 10009500 }, { "epoch": 49.59, "learning_rate": 2.521240023186338e-05, "loss": 2.0896, "step": 10010000 }, { "epoch": 49.6, "learning_rate": 2.5211161645437297e-05, "loss": 2.0687, "step": 10010500 }, { "epoch": 49.6, "learning_rate": 2.5209923059011214e-05, "loss": 2.0728, "step": 10011000 }, { "epoch": 49.6, "learning_rate": 2.520868447258513e-05, "loss": 2.1004, "step": 10011500 }, { "epoch": 49.6, "learning_rate": 2.5207445886159047e-05, "loss": 2.0634, "step": 10012000 }, { "epoch": 49.61, "learning_rate": 2.5206207299732964e-05, "loss": 2.0856, "step": 10012500 }, { "epoch": 49.61, "learning_rate": 2.520497119047973e-05, "loss": 2.0762, "step": 10013000 }, { "epoch": 49.61, "learning_rate": 2.52037350812265e-05, "loss": 2.0718, "step": 10013500 }, { "epoch": 49.61, "learning_rate": 2.5202496494800416e-05, "loss": 2.0678, "step": 10014000 }, { "epoch": 49.62, "learning_rate": 2.5201257908374332e-05, "loss": 2.0631, "step": 10014500 }, { "epoch": 49.62, "learning_rate": 2.520001932194825e-05, "loss": 2.0635, "step": 10015000 }, { "epoch": 49.62, "learning_rate": 2.5198780735522166e-05, "loss": 2.0666, "step": 10015500 }, { "epoch": 49.62, "learning_rate": 2.5197544626268932e-05, "loss": 2.061, "step": 10016000 }, { "epoch": 49.63, "learning_rate": 2.519630603984285e-05, "loss": 2.1172, "step": 10016500 }, { "epoch": 49.63, "learning_rate": 2.5195067453416766e-05, "loss": 2.0686, "step": 10017000 }, { "epoch": 49.63, "learning_rate": 2.5193828866990683e-05, "loss": 2.0696, "step": 10017500 }, { "epoch": 49.63, "learning_rate": 2.51925902805646e-05, "loss": 2.0608, "step": 10018000 }, { "epoch": 49.64, "learning_rate": 2.5191351694138516e-05, "loss": 2.0811, "step": 10018500 }, { "epoch": 49.64, "learning_rate": 2.5190113107712433e-05, "loss": 2.0857, "step": 10019000 }, { "epoch": 49.64, "learning_rate": 2.518887452128635e-05, "loss": 2.0473, "step": 10019500 }, { "epoch": 49.64, "learning_rate": 2.5187640889205964e-05, "loss": 2.0893, "step": 10020000 }, { "epoch": 49.65, "learning_rate": 2.518640477995274e-05, "loss": 2.0935, "step": 10020500 }, { "epoch": 49.65, "learning_rate": 2.5185166193526653e-05, "loss": 2.0902, "step": 10021000 }, { "epoch": 49.65, "learning_rate": 2.518392760710057e-05, "loss": 2.0681, "step": 10021500 }, { "epoch": 49.65, "learning_rate": 2.5182689020674487e-05, "loss": 2.0637, "step": 10022000 }, { "epoch": 49.65, "learning_rate": 2.5181450434248404e-05, "loss": 2.0663, "step": 10022500 }, { "epoch": 49.66, "learning_rate": 2.5180211847822318e-05, "loss": 2.0701, "step": 10023000 }, { "epoch": 49.66, "learning_rate": 2.517897573856909e-05, "loss": 2.0551, "step": 10023500 }, { "epoch": 49.66, "learning_rate": 2.5177739629315855e-05, "loss": 2.094, "step": 10024000 }, { "epoch": 49.66, "learning_rate": 2.5176501042889772e-05, "loss": 2.0705, "step": 10024500 }, { "epoch": 49.67, "learning_rate": 2.517526245646369e-05, "loss": 2.0466, "step": 10025000 }, { "epoch": 49.67, "learning_rate": 2.5174023870037606e-05, "loss": 2.0608, "step": 10025500 }, { "epoch": 49.67, "learning_rate": 2.5172785283611523e-05, "loss": 2.0408, "step": 10026000 }, { "epoch": 49.67, "learning_rate": 2.517154669718544e-05, "loss": 2.0661, "step": 10026500 }, { "epoch": 49.68, "learning_rate": 2.5170308110759357e-05, "loss": 2.0912, "step": 10027000 }, { "epoch": 49.68, "learning_rate": 2.516906952433327e-05, "loss": 2.0724, "step": 10027500 }, { "epoch": 49.68, "learning_rate": 2.5167830937907187e-05, "loss": 2.0548, "step": 10028000 }, { "epoch": 49.68, "learning_rate": 2.5166592351481104e-05, "loss": 2.0704, "step": 10028500 }, { "epoch": 49.69, "learning_rate": 2.5165356242227873e-05, "loss": 2.0908, "step": 10029000 }, { "epoch": 49.69, "learning_rate": 2.516411765580179e-05, "loss": 2.0752, "step": 10029500 }, { "epoch": 49.69, "learning_rate": 2.5162879069375707e-05, "loss": 2.0883, "step": 10030000 }, { "epoch": 49.69, "learning_rate": 2.5161640482949624e-05, "loss": 2.0867, "step": 10030500 }, { "epoch": 49.7, "learning_rate": 2.5160401896523537e-05, "loss": 2.0661, "step": 10031000 }, { "epoch": 49.7, "learning_rate": 2.5159163310097454e-05, "loss": 2.0925, "step": 10031500 }, { "epoch": 49.7, "learning_rate": 2.515792472367137e-05, "loss": 2.0626, "step": 10032000 }, { "epoch": 49.7, "learning_rate": 2.5156686137245285e-05, "loss": 2.0457, "step": 10032500 }, { "epoch": 49.71, "learning_rate": 2.51554475508192e-05, "loss": 2.0661, "step": 10033000 }, { "epoch": 49.71, "learning_rate": 2.5154211441565974e-05, "loss": 2.0776, "step": 10033500 }, { "epoch": 49.71, "learning_rate": 2.515297285513989e-05, "loss": 2.0839, "step": 10034000 }, { "epoch": 49.71, "learning_rate": 2.5151734268713804e-05, "loss": 2.077, "step": 10034500 }, { "epoch": 49.72, "learning_rate": 2.5150498159460573e-05, "loss": 2.0778, "step": 10035000 }, { "epoch": 49.72, "learning_rate": 2.514925957303449e-05, "loss": 2.0595, "step": 10035500 }, { "epoch": 49.72, "learning_rate": 2.5148020986608407e-05, "loss": 2.0821, "step": 10036000 }, { "epoch": 49.72, "learning_rate": 2.5146782400182324e-05, "loss": 2.0948, "step": 10036500 }, { "epoch": 49.73, "learning_rate": 2.514554381375624e-05, "loss": 2.0562, "step": 10037000 }, { "epoch": 49.73, "learning_rate": 2.5144307704503006e-05, "loss": 2.088, "step": 10037500 }, { "epoch": 49.73, "learning_rate": 2.5143069118076923e-05, "loss": 2.098, "step": 10038000 }, { "epoch": 49.73, "learning_rate": 2.514183300882369e-05, "loss": 2.0479, "step": 10038500 }, { "epoch": 49.74, "learning_rate": 2.5140594422397606e-05, "loss": 2.0532, "step": 10039000 }, { "epoch": 49.74, "learning_rate": 2.5139355835971522e-05, "loss": 2.0658, "step": 10039500 }, { "epoch": 49.74, "learning_rate": 2.513811724954544e-05, "loss": 2.0968, "step": 10040000 }, { "epoch": 49.74, "learning_rate": 2.5136878663119356e-05, "loss": 2.0495, "step": 10040500 }, { "epoch": 49.75, "learning_rate": 2.5135640076693273e-05, "loss": 2.0608, "step": 10041000 }, { "epoch": 49.75, "learning_rate": 2.513440149026719e-05, "loss": 2.0931, "step": 10041500 }, { "epoch": 49.75, "learning_rate": 2.5133162903841107e-05, "loss": 2.0862, "step": 10042000 }, { "epoch": 49.75, "learning_rate": 2.5131924317415024e-05, "loss": 2.0629, "step": 10042500 }, { "epoch": 49.76, "learning_rate": 2.513068573098894e-05, "loss": 2.0711, "step": 10043000 }, { "epoch": 49.76, "learning_rate": 2.5129447144562858e-05, "loss": 2.0681, "step": 10043500 }, { "epoch": 49.76, "learning_rate": 2.5128208558136775e-05, "loss": 2.0607, "step": 10044000 }, { "epoch": 49.76, "learning_rate": 2.512696997171069e-05, "loss": 2.0837, "step": 10044500 }, { "epoch": 49.77, "learning_rate": 2.5125731385284602e-05, "loss": 2.0823, "step": 10045000 }, { "epoch": 49.77, "learning_rate": 2.512449279885852e-05, "loss": 2.0465, "step": 10045500 }, { "epoch": 49.77, "learning_rate": 2.512325668960529e-05, "loss": 2.0768, "step": 10046000 }, { "epoch": 49.77, "learning_rate": 2.5122018103179208e-05, "loss": 2.0843, "step": 10046500 }, { "epoch": 49.78, "learning_rate": 2.5120779516753125e-05, "loss": 2.0824, "step": 10047000 }, { "epoch": 49.78, "learning_rate": 2.5119540930327042e-05, "loss": 2.0706, "step": 10047500 }, { "epoch": 49.78, "learning_rate": 2.5118302343900952e-05, "loss": 2.0525, "step": 10048000 }, { "epoch": 49.78, "learning_rate": 2.511706375747487e-05, "loss": 2.0638, "step": 10048500 }, { "epoch": 49.79, "learning_rate": 2.5115825171048786e-05, "loss": 2.0823, "step": 10049000 }, { "epoch": 49.79, "learning_rate": 2.5114586584622703e-05, "loss": 2.072, "step": 10049500 }, { "epoch": 49.79, "learning_rate": 2.511334799819662e-05, "loss": 2.0879, "step": 10050000 }, { "epoch": 49.79, "learning_rate": 2.5112111888943392e-05, "loss": 2.1039, "step": 10050500 }, { "epoch": 49.8, "learning_rate": 2.5110875779690157e-05, "loss": 2.0484, "step": 10051000 }, { "epoch": 49.8, "learning_rate": 2.5109637193264074e-05, "loss": 2.0874, "step": 10051500 }, { "epoch": 49.8, "learning_rate": 2.510839860683799e-05, "loss": 2.0751, "step": 10052000 }, { "epoch": 49.8, "learning_rate": 2.5107162497584757e-05, "loss": 2.0666, "step": 10052500 }, { "epoch": 49.81, "learning_rate": 2.5105923911158673e-05, "loss": 2.0895, "step": 10053000 }, { "epoch": 49.81, "learning_rate": 2.510468532473259e-05, "loss": 2.0793, "step": 10053500 }, { "epoch": 49.81, "learning_rate": 2.5103446738306507e-05, "loss": 2.0927, "step": 10054000 }, { "epoch": 49.81, "learning_rate": 2.5102208151880424e-05, "loss": 2.0726, "step": 10054500 }, { "epoch": 49.82, "learning_rate": 2.510096956545434e-05, "loss": 2.062, "step": 10055000 }, { "epoch": 49.82, "learning_rate": 2.5099730979028258e-05, "loss": 2.0739, "step": 10055500 }, { "epoch": 49.82, "learning_rate": 2.5098492392602175e-05, "loss": 2.0649, "step": 10056000 }, { "epoch": 49.82, "learning_rate": 2.5097253806176092e-05, "loss": 2.0906, "step": 10056500 }, { "epoch": 49.83, "learning_rate": 2.509601521975001e-05, "loss": 2.0836, "step": 10057000 }, { "epoch": 49.83, "learning_rate": 2.509477663332392e-05, "loss": 2.065, "step": 10057500 }, { "epoch": 49.83, "learning_rate": 2.509354052407069e-05, "loss": 2.0876, "step": 10058000 }, { "epoch": 49.83, "learning_rate": 2.5092301937644608e-05, "loss": 2.0593, "step": 10058500 }, { "epoch": 49.84, "learning_rate": 2.5091063351218525e-05, "loss": 2.0738, "step": 10059000 }, { "epoch": 49.84, "learning_rate": 2.5089824764792442e-05, "loss": 2.0891, "step": 10059500 }, { "epoch": 49.84, "learning_rate": 2.508858617836636e-05, "loss": 2.0643, "step": 10060000 }, { "epoch": 49.84, "learning_rate": 2.508734759194027e-05, "loss": 2.0642, "step": 10060500 }, { "epoch": 49.85, "learning_rate": 2.5086109005514186e-05, "loss": 2.1028, "step": 10061000 }, { "epoch": 49.85, "learning_rate": 2.5084870419088103e-05, "loss": 2.0619, "step": 10061500 }, { "epoch": 49.85, "learning_rate": 2.508363183266202e-05, "loss": 2.0848, "step": 10062000 }, { "epoch": 49.85, "learning_rate": 2.5082393246235937e-05, "loss": 2.0733, "step": 10062500 }, { "epoch": 49.86, "learning_rate": 2.508115713698271e-05, "loss": 2.0678, "step": 10063000 }, { "epoch": 49.86, "learning_rate": 2.507991855055662e-05, "loss": 2.0638, "step": 10063500 }, { "epoch": 49.86, "learning_rate": 2.5078679964130536e-05, "loss": 2.0863, "step": 10064000 }, { "epoch": 49.86, "learning_rate": 2.5077441377704453e-05, "loss": 2.0897, "step": 10064500 }, { "epoch": 49.87, "learning_rate": 2.5076207745624074e-05, "loss": 2.0843, "step": 10065000 }, { "epoch": 49.87, "learning_rate": 2.5074971636370843e-05, "loss": 2.0939, "step": 10065500 }, { "epoch": 49.87, "learning_rate": 2.507373304994476e-05, "loss": 2.0571, "step": 10066000 }, { "epoch": 49.87, "learning_rate": 2.5072494463518676e-05, "loss": 2.086, "step": 10066500 }, { "epoch": 49.88, "learning_rate": 2.5071255877092593e-05, "loss": 2.0804, "step": 10067000 }, { "epoch": 49.88, "learning_rate": 2.507001729066651e-05, "loss": 2.067, "step": 10067500 }, { "epoch": 49.88, "learning_rate": 2.5068778704240424e-05, "loss": 2.0927, "step": 10068000 }, { "epoch": 49.88, "learning_rate": 2.5067542594987193e-05, "loss": 2.0809, "step": 10068500 }, { "epoch": 49.89, "learning_rate": 2.506630400856111e-05, "loss": 2.0717, "step": 10069000 }, { "epoch": 49.89, "learning_rate": 2.5065065422135026e-05, "loss": 2.063, "step": 10069500 }, { "epoch": 49.89, "learning_rate": 2.5063826835708943e-05, "loss": 2.0693, "step": 10070000 }, { "epoch": 49.89, "learning_rate": 2.506258824928286e-05, "loss": 2.0874, "step": 10070500 }, { "epoch": 49.9, "learning_rate": 2.5061349662856777e-05, "loss": 2.1091, "step": 10071000 }, { "epoch": 49.9, "learning_rate": 2.506011107643069e-05, "loss": 2.0745, "step": 10071500 }, { "epoch": 49.9, "learning_rate": 2.505887496717746e-05, "loss": 2.079, "step": 10072000 }, { "epoch": 49.9, "learning_rate": 2.5057636380751377e-05, "loss": 2.0716, "step": 10072500 }, { "epoch": 49.91, "learning_rate": 2.5056397794325293e-05, "loss": 2.097, "step": 10073000 }, { "epoch": 49.91, "learning_rate": 2.505515920789921e-05, "loss": 2.0744, "step": 10073500 }, { "epoch": 49.91, "learning_rate": 2.5053923098645976e-05, "loss": 2.0566, "step": 10074000 }, { "epoch": 49.91, "learning_rate": 2.5052684512219893e-05, "loss": 2.0711, "step": 10074500 }, { "epoch": 49.92, "learning_rate": 2.505144592579381e-05, "loss": 2.0607, "step": 10075000 }, { "epoch": 49.92, "learning_rate": 2.5050207339367727e-05, "loss": 2.0703, "step": 10075500 }, { "epoch": 49.92, "learning_rate": 2.5048968752941644e-05, "loss": 2.0714, "step": 10076000 }, { "epoch": 49.92, "learning_rate": 2.504773016651556e-05, "loss": 2.0693, "step": 10076500 }, { "epoch": 49.92, "learning_rate": 2.5046491580089477e-05, "loss": 2.0652, "step": 10077000 }, { "epoch": 49.93, "learning_rate": 2.5045252993663394e-05, "loss": 2.0822, "step": 10077500 }, { "epoch": 49.93, "learning_rate": 2.5044014407237308e-05, "loss": 2.0501, "step": 10078000 }, { "epoch": 49.93, "learning_rate": 2.5042775820811225e-05, "loss": 2.0675, "step": 10078500 }, { "epoch": 49.93, "learning_rate": 2.504153723438514e-05, "loss": 2.0835, "step": 10079000 }, { "epoch": 49.94, "learning_rate": 2.504029864795906e-05, "loss": 2.0774, "step": 10079500 }, { "epoch": 49.94, "learning_rate": 2.5039060061532976e-05, "loss": 2.0773, "step": 10080000 }, { "epoch": 49.94, "learning_rate": 2.5037821475106892e-05, "loss": 2.0766, "step": 10080500 }, { "epoch": 49.94, "learning_rate": 2.503658288868081e-05, "loss": 2.0877, "step": 10081000 }, { "epoch": 49.95, "learning_rate": 2.5035349256600427e-05, "loss": 2.095, "step": 10081500 }, { "epoch": 49.95, "learning_rate": 2.5034110670174344e-05, "loss": 2.0619, "step": 10082000 }, { "epoch": 49.95, "learning_rate": 2.503287208374826e-05, "loss": 2.0599, "step": 10082500 }, { "epoch": 49.95, "learning_rate": 2.5031633497322177e-05, "loss": 2.0795, "step": 10083000 }, { "epoch": 49.96, "learning_rate": 2.5030394910896094e-05, "loss": 2.0687, "step": 10083500 }, { "epoch": 49.96, "learning_rate": 2.502915632447001e-05, "loss": 2.0784, "step": 10084000 }, { "epoch": 49.96, "learning_rate": 2.5027920215216777e-05, "loss": 2.0769, "step": 10084500 }, { "epoch": 49.96, "learning_rate": 2.5026681628790694e-05, "loss": 2.067, "step": 10085000 }, { "epoch": 49.97, "learning_rate": 2.502544304236461e-05, "loss": 2.0656, "step": 10085500 }, { "epoch": 49.97, "learning_rate": 2.5024206933111376e-05, "loss": 2.086, "step": 10086000 }, { "epoch": 49.97, "learning_rate": 2.5022968346685293e-05, "loss": 2.0772, "step": 10086500 }, { "epoch": 49.97, "learning_rate": 2.502172976025921e-05, "loss": 2.0816, "step": 10087000 }, { "epoch": 49.98, "learning_rate": 2.5020491173833127e-05, "loss": 2.0712, "step": 10087500 }, { "epoch": 49.98, "learning_rate": 2.5019252587407044e-05, "loss": 2.0866, "step": 10088000 }, { "epoch": 49.98, "learning_rate": 2.5018016478153816e-05, "loss": 2.0833, "step": 10088500 }, { "epoch": 49.98, "learning_rate": 2.5016777891727733e-05, "loss": 2.0498, "step": 10089000 }, { "epoch": 49.99, "learning_rate": 2.5015539305301643e-05, "loss": 2.0584, "step": 10089500 }, { "epoch": 49.99, "learning_rate": 2.501430071887556e-05, "loss": 2.0665, "step": 10090000 }, { "epoch": 49.99, "learning_rate": 2.5013062132449477e-05, "loss": 2.0684, "step": 10090500 }, { "epoch": 49.99, "learning_rate": 2.5011823546023394e-05, "loss": 2.0713, "step": 10091000 }, { "epoch": 50.0, "learning_rate": 2.501058495959731e-05, "loss": 2.0831, "step": 10091500 }, { "epoch": 50.0, "learning_rate": 2.5009346373171228e-05, "loss": 2.0453, "step": 10092000 }, { "epoch": 50.0, "eval_accuracy": 0.6690132336768863, "eval_accuracy_mlm": 0.6274065364763405, "eval_accuracy_nsp": 0.8651312563980875, "eval_loss": 2.3012943267822266, "eval_runtime": 147.0529, "eval_samples_per_second": 1733.791, "eval_steps_per_second": 72.246, "step": 10092150 }, { "epoch": 50.0, "learning_rate": 2.5008107786745145e-05, "loss": 2.0824, "step": 10092500 }, { "epoch": 50.0, "learning_rate": 2.500687167749191e-05, "loss": 2.0333, "step": 10093000 }, { "epoch": 50.01, "learning_rate": 2.5005635568238682e-05, "loss": 2.0593, "step": 10093500 }, { "epoch": 50.01, "learning_rate": 2.50043969818126e-05, "loss": 2.0273, "step": 10094000 }, { "epoch": 50.01, "learning_rate": 2.5003158395386516e-05, "loss": 2.0535, "step": 10094500 }, { "epoch": 50.01, "learning_rate": 2.5001919808960433e-05, "loss": 2.0412, "step": 10095000 }, { "epoch": 50.02, "learning_rate": 2.5000681222534343e-05, "loss": 2.0528, "step": 10095500 }, { "epoch": 50.02, "learning_rate": 2.4999442636108263e-05, "loss": 2.0634, "step": 10096000 }, { "epoch": 50.02, "learning_rate": 2.499820404968218e-05, "loss": 2.0148, "step": 10096500 }, { "epoch": 50.02, "learning_rate": 2.4996965463256097e-05, "loss": 2.0578, "step": 10097000 }, { "epoch": 50.03, "learning_rate": 2.4995729354002863e-05, "loss": 2.0595, "step": 10097500 }, { "epoch": 50.03, "learning_rate": 2.499449076757678e-05, "loss": 2.0559, "step": 10098000 }, { "epoch": 50.03, "learning_rate": 2.4993252181150697e-05, "loss": 2.0362, "step": 10098500 }, { "epoch": 50.03, "learning_rate": 2.499201607189747e-05, "loss": 2.0511, "step": 10099000 }, { "epoch": 50.04, "learning_rate": 2.4990777485471382e-05, "loss": 2.031, "step": 10099500 }, { "epoch": 50.04, "learning_rate": 2.49895388990453e-05, "loss": 2.0856, "step": 10100000 }, { "epoch": 50.04, "learning_rate": 2.4988300312619213e-05, "loss": 2.0489, "step": 10100500 }, { "epoch": 50.04, "learning_rate": 2.498706172619313e-05, "loss": 2.043, "step": 10101000 }, { "epoch": 50.05, "learning_rate": 2.4985823139767047e-05, "loss": 2.0638, "step": 10101500 }, { "epoch": 50.05, "learning_rate": 2.4984584553340964e-05, "loss": 2.0425, "step": 10102000 }, { "epoch": 50.05, "learning_rate": 2.498334596691488e-05, "loss": 2.0491, "step": 10102500 }, { "epoch": 50.05, "learning_rate": 2.4982107380488797e-05, "loss": 2.0651, "step": 10103000 }, { "epoch": 50.06, "learning_rate": 2.4980871271235566e-05, "loss": 2.0492, "step": 10103500 }, { "epoch": 50.06, "learning_rate": 2.497963268480948e-05, "loss": 2.032, "step": 10104000 }, { "epoch": 50.06, "learning_rate": 2.4978394098383397e-05, "loss": 2.0555, "step": 10104500 }, { "epoch": 50.06, "learning_rate": 2.4977155511957314e-05, "loss": 2.044, "step": 10105000 }, { "epoch": 50.07, "learning_rate": 2.497591692553123e-05, "loss": 2.0518, "step": 10105500 }, { "epoch": 50.07, "learning_rate": 2.4974678339105148e-05, "loss": 2.0651, "step": 10106000 }, { "epoch": 50.07, "learning_rate": 2.4973439752679064e-05, "loss": 2.0618, "step": 10106500 }, { "epoch": 50.07, "learning_rate": 2.4972201166252978e-05, "loss": 2.024, "step": 10107000 }, { "epoch": 50.08, "learning_rate": 2.4970962579826895e-05, "loss": 2.0431, "step": 10107500 }, { "epoch": 50.08, "learning_rate": 2.4969726470573664e-05, "loss": 2.048, "step": 10108000 }, { "epoch": 50.08, "learning_rate": 2.4968490361320436e-05, "loss": 2.0445, "step": 10108500 }, { "epoch": 50.08, "learning_rate": 2.496725177489435e-05, "loss": 2.0547, "step": 10109000 }, { "epoch": 50.09, "learning_rate": 2.4966013188468266e-05, "loss": 2.0599, "step": 10109500 }, { "epoch": 50.09, "learning_rate": 2.4964774602042183e-05, "loss": 2.0471, "step": 10110000 }, { "epoch": 50.09, "learning_rate": 2.49635360156161e-05, "loss": 2.0445, "step": 10110500 }, { "epoch": 50.09, "learning_rate": 2.4962297429190014e-05, "loss": 2.0358, "step": 10111000 }, { "epoch": 50.1, "learning_rate": 2.4961061319936786e-05, "loss": 2.0686, "step": 10111500 }, { "epoch": 50.1, "learning_rate": 2.49598227335107e-05, "loss": 2.0416, "step": 10112000 }, { "epoch": 50.1, "learning_rate": 2.4958584147084616e-05, "loss": 2.0738, "step": 10112500 }, { "epoch": 50.1, "learning_rate": 2.4957348037831385e-05, "loss": 2.0607, "step": 10113000 }, { "epoch": 50.11, "learning_rate": 2.4956109451405302e-05, "loss": 2.0288, "step": 10113500 }, { "epoch": 50.11, "learning_rate": 2.495487086497922e-05, "loss": 2.0524, "step": 10114000 }, { "epoch": 50.11, "learning_rate": 2.4953632278553136e-05, "loss": 2.0672, "step": 10114500 }, { "epoch": 50.11, "learning_rate": 2.4952393692127053e-05, "loss": 2.0627, "step": 10115000 }, { "epoch": 50.12, "learning_rate": 2.4951155105700967e-05, "loss": 2.062, "step": 10115500 }, { "epoch": 50.12, "learning_rate": 2.4949916519274883e-05, "loss": 2.0799, "step": 10116000 }, { "epoch": 50.12, "learning_rate": 2.49486779328488e-05, "loss": 2.0556, "step": 10116500 }, { "epoch": 50.12, "learning_rate": 2.4947439346422717e-05, "loss": 2.0486, "step": 10117000 }, { "epoch": 50.13, "learning_rate": 2.494620075999663e-05, "loss": 2.0681, "step": 10117500 }, { "epoch": 50.13, "learning_rate": 2.4944962173570548e-05, "loss": 2.0678, "step": 10118000 }, { "epoch": 50.13, "learning_rate": 2.4943723587144465e-05, "loss": 2.0516, "step": 10118500 }, { "epoch": 50.13, "learning_rate": 2.494248500071838e-05, "loss": 2.0416, "step": 10119000 }, { "epoch": 50.14, "learning_rate": 2.4941246414292295e-05, "loss": 2.0681, "step": 10119500 }, { "epoch": 50.14, "learning_rate": 2.4940007827866212e-05, "loss": 2.0443, "step": 10120000 }, { "epoch": 50.14, "learning_rate": 2.493876924144013e-05, "loss": 2.048, "step": 10120500 }, { "epoch": 50.14, "learning_rate": 2.4937533132186898e-05, "loss": 2.0518, "step": 10121000 }, { "epoch": 50.15, "learning_rate": 2.4936294545760815e-05, "loss": 2.065, "step": 10121500 }, { "epoch": 50.15, "learning_rate": 2.493505595933473e-05, "loss": 2.0634, "step": 10122000 }, { "epoch": 50.15, "learning_rate": 2.493381737290865e-05, "loss": 2.0595, "step": 10122500 }, { "epoch": 50.15, "learning_rate": 2.4932578786482562e-05, "loss": 2.0736, "step": 10123000 }, { "epoch": 50.16, "learning_rate": 2.493134020005648e-05, "loss": 2.0528, "step": 10123500 }, { "epoch": 50.16, "learning_rate": 2.493010409080325e-05, "loss": 2.0527, "step": 10124000 }, { "epoch": 50.16, "learning_rate": 2.492887045872287e-05, "loss": 2.0387, "step": 10124500 }, { "epoch": 50.16, "learning_rate": 2.4927631872296786e-05, "loss": 2.0381, "step": 10125000 }, { "epoch": 50.17, "learning_rate": 2.4926393285870702e-05, "loss": 2.0591, "step": 10125500 }, { "epoch": 50.17, "learning_rate": 2.492515469944462e-05, "loss": 2.0651, "step": 10126000 }, { "epoch": 50.17, "learning_rate": 2.4923916113018536e-05, "loss": 2.0536, "step": 10126500 }, { "epoch": 50.17, "learning_rate": 2.4922677526592453e-05, "loss": 2.0449, "step": 10127000 }, { "epoch": 50.18, "learning_rate": 2.492143894016637e-05, "loss": 2.0732, "step": 10127500 }, { "epoch": 50.18, "learning_rate": 2.4920200353740284e-05, "loss": 2.0375, "step": 10128000 }, { "epoch": 50.18, "learning_rate": 2.49189617673142e-05, "loss": 2.0387, "step": 10128500 }, { "epoch": 50.18, "learning_rate": 2.4917723180888118e-05, "loss": 2.0668, "step": 10129000 }, { "epoch": 50.19, "learning_rate": 2.4916484594462034e-05, "loss": 2.0587, "step": 10129500 }, { "epoch": 50.19, "learning_rate": 2.491524600803595e-05, "loss": 2.0645, "step": 10130000 }, { "epoch": 50.19, "learning_rate": 2.491400989878272e-05, "loss": 2.0692, "step": 10130500 }, { "epoch": 50.19, "learning_rate": 2.4912771312356634e-05, "loss": 2.053, "step": 10131000 }, { "epoch": 50.19, "learning_rate": 2.491153272593055e-05, "loss": 2.0529, "step": 10131500 }, { "epoch": 50.2, "learning_rate": 2.4910294139504468e-05, "loss": 2.0721, "step": 10132000 }, { "epoch": 50.2, "learning_rate": 2.4909055553078385e-05, "loss": 2.0809, "step": 10132500 }, { "epoch": 50.2, "learning_rate": 2.49078169666523e-05, "loss": 2.06, "step": 10133000 }, { "epoch": 50.2, "learning_rate": 2.490658085739907e-05, "loss": 2.0647, "step": 10133500 }, { "epoch": 50.21, "learning_rate": 2.4905342270972984e-05, "loss": 2.052, "step": 10134000 }, { "epoch": 50.21, "learning_rate": 2.49041036845469e-05, "loss": 2.0847, "step": 10134500 }, { "epoch": 50.21, "learning_rate": 2.4902865098120818e-05, "loss": 2.0454, "step": 10135000 }, { "epoch": 50.21, "learning_rate": 2.4901626511694735e-05, "loss": 2.0439, "step": 10135500 }, { "epoch": 50.22, "learning_rate": 2.4900390402441503e-05, "loss": 2.0333, "step": 10136000 }, { "epoch": 50.22, "learning_rate": 2.489915181601542e-05, "loss": 2.0483, "step": 10136500 }, { "epoch": 50.22, "learning_rate": 2.4897913229589334e-05, "loss": 2.0637, "step": 10137000 }, { "epoch": 50.22, "learning_rate": 2.489667464316325e-05, "loss": 2.0616, "step": 10137500 }, { "epoch": 50.23, "learning_rate": 2.4895436056737168e-05, "loss": 2.0516, "step": 10138000 }, { "epoch": 50.23, "learning_rate": 2.4894199947483937e-05, "loss": 2.0694, "step": 10138500 }, { "epoch": 50.23, "learning_rate": 2.4892961361057853e-05, "loss": 2.0448, "step": 10139000 }, { "epoch": 50.23, "learning_rate": 2.489172277463177e-05, "loss": 2.0379, "step": 10139500 }, { "epoch": 50.24, "learning_rate": 2.4890484188205687e-05, "loss": 2.0634, "step": 10140000 }, { "epoch": 50.24, "learning_rate": 2.4889248078952453e-05, "loss": 2.0381, "step": 10140500 }, { "epoch": 50.24, "learning_rate": 2.488800949252637e-05, "loss": 2.0469, "step": 10141000 }, { "epoch": 50.24, "learning_rate": 2.4886770906100287e-05, "loss": 2.0541, "step": 10141500 }, { "epoch": 50.25, "learning_rate": 2.4885532319674204e-05, "loss": 2.0731, "step": 10142000 }, { "epoch": 50.25, "learning_rate": 2.488429373324812e-05, "loss": 2.0607, "step": 10142500 }, { "epoch": 50.25, "learning_rate": 2.4883055146822037e-05, "loss": 2.0643, "step": 10143000 }, { "epoch": 50.25, "learning_rate": 2.488181656039595e-05, "loss": 2.0559, "step": 10143500 }, { "epoch": 50.26, "learning_rate": 2.4880577973969868e-05, "loss": 2.0598, "step": 10144000 }, { "epoch": 50.26, "learning_rate": 2.4879339387543785e-05, "loss": 2.044, "step": 10144500 }, { "epoch": 50.26, "learning_rate": 2.4878100801117702e-05, "loss": 2.0641, "step": 10145000 }, { "epoch": 50.26, "learning_rate": 2.487686221469162e-05, "loss": 2.0331, "step": 10145500 }, { "epoch": 50.27, "learning_rate": 2.4875623628265536e-05, "loss": 2.079, "step": 10146000 }, { "epoch": 50.27, "learning_rate": 2.48743875190123e-05, "loss": 2.0569, "step": 10146500 }, { "epoch": 50.27, "learning_rate": 2.4873148932586218e-05, "loss": 2.0563, "step": 10147000 }, { "epoch": 50.27, "learning_rate": 2.4871910346160135e-05, "loss": 2.0803, "step": 10147500 }, { "epoch": 50.28, "learning_rate": 2.4870671759734052e-05, "loss": 2.0425, "step": 10148000 }, { "epoch": 50.28, "learning_rate": 2.486943317330797e-05, "loss": 2.0611, "step": 10148500 }, { "epoch": 50.28, "learning_rate": 2.4868197064054738e-05, "loss": 2.0541, "step": 10149000 }, { "epoch": 50.28, "learning_rate": 2.486695847762865e-05, "loss": 2.076, "step": 10149500 }, { "epoch": 50.29, "learning_rate": 2.486572236837542e-05, "loss": 2.0424, "step": 10150000 }, { "epoch": 50.29, "learning_rate": 2.4864483781949337e-05, "loss": 2.0258, "step": 10150500 }, { "epoch": 50.29, "learning_rate": 2.4863245195523254e-05, "loss": 2.0491, "step": 10151000 }, { "epoch": 50.29, "learning_rate": 2.486200660909717e-05, "loss": 2.0689, "step": 10151500 }, { "epoch": 50.3, "learning_rate": 2.4860768022671088e-05, "loss": 2.0573, "step": 10152000 }, { "epoch": 50.3, "learning_rate": 2.4859529436245005e-05, "loss": 2.0476, "step": 10152500 }, { "epoch": 50.3, "learning_rate": 2.4858290849818918e-05, "loss": 2.0562, "step": 10153000 }, { "epoch": 50.3, "learning_rate": 2.4857052263392835e-05, "loss": 2.0501, "step": 10153500 }, { "epoch": 50.31, "learning_rate": 2.4855813676966752e-05, "loss": 2.053, "step": 10154000 }, { "epoch": 50.31, "learning_rate": 2.485457509054067e-05, "loss": 2.035, "step": 10154500 }, { "epoch": 50.31, "learning_rate": 2.4853336504114586e-05, "loss": 2.0412, "step": 10155000 }, { "epoch": 50.31, "learning_rate": 2.4852097917688503e-05, "loss": 2.0488, "step": 10155500 }, { "epoch": 50.32, "learning_rate": 2.485085933126242e-05, "loss": 2.0893, "step": 10156000 }, { "epoch": 50.32, "learning_rate": 2.4849623222009185e-05, "loss": 2.0715, "step": 10156500 }, { "epoch": 50.32, "learning_rate": 2.4848387112755954e-05, "loss": 2.0633, "step": 10157000 }, { "epoch": 50.32, "learning_rate": 2.484714852632987e-05, "loss": 2.052, "step": 10157500 }, { "epoch": 50.33, "learning_rate": 2.4845909939903788e-05, "loss": 2.0869, "step": 10158000 }, { "epoch": 50.33, "learning_rate": 2.4844671353477705e-05, "loss": 2.0605, "step": 10158500 }, { "epoch": 50.33, "learning_rate": 2.4843432767051618e-05, "loss": 2.0674, "step": 10159000 }, { "epoch": 50.33, "learning_rate": 2.4842194180625535e-05, "loss": 2.0277, "step": 10159500 }, { "epoch": 50.34, "learning_rate": 2.4840955594199452e-05, "loss": 2.0682, "step": 10160000 }, { "epoch": 50.34, "learning_rate": 2.483971700777337e-05, "loss": 2.0574, "step": 10160500 }, { "epoch": 50.34, "learning_rate": 2.483848337569299e-05, "loss": 2.0519, "step": 10161000 }, { "epoch": 50.34, "learning_rate": 2.4837244789266907e-05, "loss": 2.0436, "step": 10161500 }, { "epoch": 50.35, "learning_rate": 2.4836006202840824e-05, "loss": 2.0654, "step": 10162000 }, { "epoch": 50.35, "learning_rate": 2.483476761641474e-05, "loss": 2.0504, "step": 10162500 }, { "epoch": 50.35, "learning_rate": 2.4833529029988657e-05, "loss": 2.0845, "step": 10163000 }, { "epoch": 50.35, "learning_rate": 2.483229044356257e-05, "loss": 2.0603, "step": 10163500 }, { "epoch": 50.36, "learning_rate": 2.4831051857136488e-05, "loss": 2.0623, "step": 10164000 }, { "epoch": 50.36, "learning_rate": 2.4829813270710405e-05, "loss": 2.0494, "step": 10164500 }, { "epoch": 50.36, "learning_rate": 2.482857468428432e-05, "loss": 2.058, "step": 10165000 }, { "epoch": 50.36, "learning_rate": 2.4827336097858235e-05, "loss": 2.072, "step": 10165500 }, { "epoch": 50.37, "learning_rate": 2.4826097511432152e-05, "loss": 2.0603, "step": 10166000 }, { "epoch": 50.37, "learning_rate": 2.4824861402178924e-05, "loss": 2.0655, "step": 10166500 }, { "epoch": 50.37, "learning_rate": 2.4823622815752838e-05, "loss": 2.0587, "step": 10167000 }, { "epoch": 50.37, "learning_rate": 2.4822384229326755e-05, "loss": 2.0733, "step": 10167500 }, { "epoch": 50.38, "learning_rate": 2.4821145642900672e-05, "loss": 2.0492, "step": 10168000 }, { "epoch": 50.38, "learning_rate": 2.4819907056474585e-05, "loss": 2.0606, "step": 10168500 }, { "epoch": 50.38, "learning_rate": 2.4818668470048502e-05, "loss": 2.064, "step": 10169000 }, { "epoch": 50.38, "learning_rate": 2.481742988362242e-05, "loss": 2.0607, "step": 10169500 }, { "epoch": 50.39, "learning_rate": 2.481619377436919e-05, "loss": 2.0479, "step": 10170000 }, { "epoch": 50.39, "learning_rate": 2.4814955187943105e-05, "loss": 2.0523, "step": 10170500 }, { "epoch": 50.39, "learning_rate": 2.4813716601517022e-05, "loss": 2.0725, "step": 10171000 }, { "epoch": 50.39, "learning_rate": 2.4812478015090935e-05, "loss": 2.0706, "step": 10171500 }, { "epoch": 50.4, "learning_rate": 2.4811239428664852e-05, "loss": 2.0717, "step": 10172000 }, { "epoch": 50.4, "learning_rate": 2.481000084223877e-05, "loss": 2.0584, "step": 10172500 }, { "epoch": 50.4, "learning_rate": 2.4808762255812686e-05, "loss": 2.0498, "step": 10173000 }, { "epoch": 50.4, "learning_rate": 2.4807523669386603e-05, "loss": 2.0541, "step": 10173500 }, { "epoch": 50.41, "learning_rate": 2.480628508296052e-05, "loss": 2.046, "step": 10174000 }, { "epoch": 50.41, "learning_rate": 2.4805046496534437e-05, "loss": 2.0679, "step": 10174500 }, { "epoch": 50.41, "learning_rate": 2.4803807910108354e-05, "loss": 2.0505, "step": 10175000 }, { "epoch": 50.41, "learning_rate": 2.480257180085512e-05, "loss": 2.0577, "step": 10175500 }, { "epoch": 50.42, "learning_rate": 2.4801333214429036e-05, "loss": 2.0659, "step": 10176000 }, { "epoch": 50.42, "learning_rate": 2.4800094628002953e-05, "loss": 2.0782, "step": 10176500 }, { "epoch": 50.42, "learning_rate": 2.479885604157687e-05, "loss": 2.0611, "step": 10177000 }, { "epoch": 50.42, "learning_rate": 2.4797617455150787e-05, "loss": 2.0391, "step": 10177500 }, { "epoch": 50.43, "learning_rate": 2.4796378868724704e-05, "loss": 2.0479, "step": 10178000 }, { "epoch": 50.43, "learning_rate": 2.479514028229862e-05, "loss": 2.0695, "step": 10178500 }, { "epoch": 50.43, "learning_rate": 2.4793901695872534e-05, "loss": 2.0757, "step": 10179000 }, { "epoch": 50.43, "learning_rate": 2.4792665586619303e-05, "loss": 2.0828, "step": 10179500 }, { "epoch": 50.44, "learning_rate": 2.479142700019322e-05, "loss": 2.0671, "step": 10180000 }, { "epoch": 50.44, "learning_rate": 2.4790188413767137e-05, "loss": 2.0618, "step": 10180500 }, { "epoch": 50.44, "learning_rate": 2.4788949827341054e-05, "loss": 2.068, "step": 10181000 }, { "epoch": 50.44, "learning_rate": 2.478771124091497e-05, "loss": 2.0466, "step": 10181500 }, { "epoch": 50.45, "learning_rate": 2.4786472654488884e-05, "loss": 2.0684, "step": 10182000 }, { "epoch": 50.45, "learning_rate": 2.47852340680628e-05, "loss": 2.0447, "step": 10182500 }, { "epoch": 50.45, "learning_rate": 2.4783995481636718e-05, "loss": 2.0712, "step": 10183000 }, { "epoch": 50.45, "learning_rate": 2.4782756895210635e-05, "loss": 2.0635, "step": 10183500 }, { "epoch": 50.46, "learning_rate": 2.4781518308784552e-05, "loss": 2.0865, "step": 10184000 }, { "epoch": 50.46, "learning_rate": 2.478027972235847e-05, "loss": 2.0359, "step": 10184500 }, { "epoch": 50.46, "learning_rate": 2.4779041135932386e-05, "loss": 2.0538, "step": 10185000 }, { "epoch": 50.46, "learning_rate": 2.477780502667915e-05, "loss": 2.0672, "step": 10185500 }, { "epoch": 50.46, "learning_rate": 2.477656644025307e-05, "loss": 2.0918, "step": 10186000 }, { "epoch": 50.47, "learning_rate": 2.4775330330999837e-05, "loss": 2.0555, "step": 10186500 }, { "epoch": 50.47, "learning_rate": 2.4774094221746606e-05, "loss": 2.057, "step": 10187000 }, { "epoch": 50.47, "learning_rate": 2.4772858112493375e-05, "loss": 2.0619, "step": 10187500 }, { "epoch": 50.47, "learning_rate": 2.4771619526067292e-05, "loss": 2.0732, "step": 10188000 }, { "epoch": 50.48, "learning_rate": 2.477038093964121e-05, "loss": 2.0743, "step": 10188500 }, { "epoch": 50.48, "learning_rate": 2.4769142353215126e-05, "loss": 2.0565, "step": 10189000 }, { "epoch": 50.48, "learning_rate": 2.4767903766789042e-05, "loss": 2.0953, "step": 10189500 }, { "epoch": 50.48, "learning_rate": 2.476666518036296e-05, "loss": 2.0662, "step": 10190000 }, { "epoch": 50.49, "learning_rate": 2.4765426593936873e-05, "loss": 2.0564, "step": 10190500 }, { "epoch": 50.49, "learning_rate": 2.476418800751079e-05, "loss": 2.0566, "step": 10191000 }, { "epoch": 50.49, "learning_rate": 2.4762949421084707e-05, "loss": 2.0552, "step": 10191500 }, { "epoch": 50.49, "learning_rate": 2.476171083465862e-05, "loss": 2.0794, "step": 10192000 }, { "epoch": 50.5, "learning_rate": 2.4760474725405393e-05, "loss": 2.0654, "step": 10192500 }, { "epoch": 50.5, "learning_rate": 2.4759238616152158e-05, "loss": 2.0456, "step": 10193000 }, { "epoch": 50.5, "learning_rate": 2.4758002506898927e-05, "loss": 2.0559, "step": 10193500 }, { "epoch": 50.5, "learning_rate": 2.4756763920472844e-05, "loss": 2.0619, "step": 10194000 }, { "epoch": 50.51, "learning_rate": 2.475552533404676e-05, "loss": 2.0606, "step": 10194500 }, { "epoch": 50.51, "learning_rate": 2.4754286747620678e-05, "loss": 2.0514, "step": 10195000 }, { "epoch": 50.51, "learning_rate": 2.475304816119459e-05, "loss": 2.0649, "step": 10195500 }, { "epoch": 50.51, "learning_rate": 2.475181205194136e-05, "loss": 2.0741, "step": 10196000 }, { "epoch": 50.52, "learning_rate": 2.4750573465515277e-05, "loss": 2.0816, "step": 10196500 }, { "epoch": 50.52, "learning_rate": 2.4749334879089194e-05, "loss": 2.0735, "step": 10197000 }, { "epoch": 50.52, "learning_rate": 2.474809629266311e-05, "loss": 2.0707, "step": 10197500 }, { "epoch": 50.52, "learning_rate": 2.4746857706237028e-05, "loss": 2.0954, "step": 10198000 }, { "epoch": 50.53, "learning_rate": 2.474561911981094e-05, "loss": 2.0377, "step": 10198500 }, { "epoch": 50.53, "learning_rate": 2.4744380533384858e-05, "loss": 2.0733, "step": 10199000 }, { "epoch": 50.53, "learning_rate": 2.4743141946958775e-05, "loss": 2.0976, "step": 10199500 }, { "epoch": 50.53, "learning_rate": 2.4741905837705544e-05, "loss": 2.0625, "step": 10200000 }, { "epoch": 50.54, "learning_rate": 2.474066725127946e-05, "loss": 2.0327, "step": 10200500 }, { "epoch": 50.54, "learning_rate": 2.4739428664853378e-05, "loss": 2.0477, "step": 10201000 }, { "epoch": 50.54, "learning_rate": 2.473819007842729e-05, "loss": 2.0646, "step": 10201500 }, { "epoch": 50.54, "learning_rate": 2.4736951492001208e-05, "loss": 2.0691, "step": 10202000 }, { "epoch": 50.55, "learning_rate": 2.4735712905575125e-05, "loss": 2.0517, "step": 10202500 }, { "epoch": 50.55, "learning_rate": 2.4734474319149042e-05, "loss": 2.0497, "step": 10203000 }, { "epoch": 50.55, "learning_rate": 2.473323573272296e-05, "loss": 2.0748, "step": 10203500 }, { "epoch": 50.55, "learning_rate": 2.4731997146296876e-05, "loss": 2.0682, "step": 10204000 }, { "epoch": 50.56, "learning_rate": 2.4730758559870793e-05, "loss": 2.0611, "step": 10204500 }, { "epoch": 50.56, "learning_rate": 2.472951997344471e-05, "loss": 2.0441, "step": 10205000 }, { "epoch": 50.56, "learning_rate": 2.4728281387018627e-05, "loss": 2.0727, "step": 10205500 }, { "epoch": 50.56, "learning_rate": 2.472704280059254e-05, "loss": 2.0837, "step": 10206000 }, { "epoch": 50.57, "learning_rate": 2.4725804214166457e-05, "loss": 2.0768, "step": 10206500 }, { "epoch": 50.57, "learning_rate": 2.4724568104913226e-05, "loss": 2.0629, "step": 10207000 }, { "epoch": 50.57, "learning_rate": 2.4723329518487143e-05, "loss": 2.0449, "step": 10207500 }, { "epoch": 50.57, "learning_rate": 2.472209093206106e-05, "loss": 2.0525, "step": 10208000 }, { "epoch": 50.58, "learning_rate": 2.4720852345634977e-05, "loss": 2.0636, "step": 10208500 }, { "epoch": 50.58, "learning_rate": 2.4719616236381742e-05, "loss": 2.0821, "step": 10209000 }, { "epoch": 50.58, "learning_rate": 2.471837764995566e-05, "loss": 2.0724, "step": 10209500 }, { "epoch": 50.58, "learning_rate": 2.4717139063529576e-05, "loss": 2.0658, "step": 10210000 }, { "epoch": 50.59, "learning_rate": 2.4715902954276345e-05, "loss": 2.0612, "step": 10210500 }, { "epoch": 50.59, "learning_rate": 2.471466436785026e-05, "loss": 2.0648, "step": 10211000 }, { "epoch": 50.59, "learning_rate": 2.4713425781424175e-05, "loss": 2.0491, "step": 10211500 }, { "epoch": 50.59, "learning_rate": 2.4712187194998092e-05, "loss": 2.0536, "step": 10212000 }, { "epoch": 50.6, "learning_rate": 2.471094860857201e-05, "loss": 2.0321, "step": 10212500 }, { "epoch": 50.6, "learning_rate": 2.4709710022145926e-05, "loss": 2.0592, "step": 10213000 }, { "epoch": 50.6, "learning_rate": 2.4708471435719843e-05, "loss": 2.0605, "step": 10213500 }, { "epoch": 50.6, "learning_rate": 2.470723284929376e-05, "loss": 2.0841, "step": 10214000 }, { "epoch": 50.61, "learning_rate": 2.4705996740040525e-05, "loss": 2.0895, "step": 10214500 }, { "epoch": 50.61, "learning_rate": 2.4704758153614442e-05, "loss": 2.0767, "step": 10215000 }, { "epoch": 50.61, "learning_rate": 2.470351956718836e-05, "loss": 2.0809, "step": 10215500 }, { "epoch": 50.61, "learning_rate": 2.4702280980762276e-05, "loss": 2.0491, "step": 10216000 }, { "epoch": 50.62, "learning_rate": 2.4701042394336193e-05, "loss": 2.0582, "step": 10216500 }, { "epoch": 50.62, "learning_rate": 2.469980380791011e-05, "loss": 2.0875, "step": 10217000 }, { "epoch": 50.62, "learning_rate": 2.4698565221484027e-05, "loss": 2.0758, "step": 10217500 }, { "epoch": 50.62, "learning_rate": 2.4697329112230792e-05, "loss": 2.049, "step": 10218000 }, { "epoch": 50.63, "learning_rate": 2.469609052580471e-05, "loss": 2.095, "step": 10218500 }, { "epoch": 50.63, "learning_rate": 2.4694851939378626e-05, "loss": 2.0731, "step": 10219000 }, { "epoch": 50.63, "learning_rate": 2.4693613352952543e-05, "loss": 2.0535, "step": 10219500 }, { "epoch": 50.63, "learning_rate": 2.4692377243699312e-05, "loss": 2.0781, "step": 10220000 }, { "epoch": 50.64, "learning_rate": 2.469113865727323e-05, "loss": 2.051, "step": 10220500 }, { "epoch": 50.64, "learning_rate": 2.4689900070847142e-05, "loss": 2.0967, "step": 10221000 }, { "epoch": 50.64, "learning_rate": 2.468866148442106e-05, "loss": 2.0877, "step": 10221500 }, { "epoch": 50.64, "learning_rate": 2.4687422897994976e-05, "loss": 2.0461, "step": 10222000 }, { "epoch": 50.65, "learning_rate": 2.4686184311568893e-05, "loss": 2.0622, "step": 10222500 }, { "epoch": 50.65, "learning_rate": 2.4684948202315662e-05, "loss": 2.0529, "step": 10223000 }, { "epoch": 50.65, "learning_rate": 2.468370961588958e-05, "loss": 2.0929, "step": 10223500 }, { "epoch": 50.65, "learning_rate": 2.4682471029463496e-05, "loss": 2.0693, "step": 10224000 }, { "epoch": 50.66, "learning_rate": 2.4681234920210265e-05, "loss": 2.0783, "step": 10224500 }, { "epoch": 50.66, "learning_rate": 2.467999633378418e-05, "loss": 2.0781, "step": 10225000 }, { "epoch": 50.66, "learning_rate": 2.46787577473581e-05, "loss": 2.0613, "step": 10225500 }, { "epoch": 50.66, "learning_rate": 2.4677519160932015e-05, "loss": 2.0501, "step": 10226000 }, { "epoch": 50.67, "learning_rate": 2.467628057450593e-05, "loss": 2.0385, "step": 10226500 }, { "epoch": 50.67, "learning_rate": 2.4675041988079846e-05, "loss": 2.0785, "step": 10227000 }, { "epoch": 50.67, "learning_rate": 2.4673803401653763e-05, "loss": 2.0632, "step": 10227500 }, { "epoch": 50.67, "learning_rate": 2.4672564815227676e-05, "loss": 2.0562, "step": 10228000 }, { "epoch": 50.68, "learning_rate": 2.4671326228801593e-05, "loss": 2.0538, "step": 10228500 }, { "epoch": 50.68, "learning_rate": 2.467008764237551e-05, "loss": 2.038, "step": 10229000 }, { "epoch": 50.68, "learning_rate": 2.4668849055949427e-05, "loss": 2.058, "step": 10229500 }, { "epoch": 50.68, "learning_rate": 2.4667610469523344e-05, "loss": 2.0596, "step": 10230000 }, { "epoch": 50.69, "learning_rate": 2.466637188309726e-05, "loss": 2.0578, "step": 10230500 }, { "epoch": 50.69, "learning_rate": 2.4665133296671175e-05, "loss": 2.0572, "step": 10231000 }, { "epoch": 50.69, "learning_rate": 2.466389471024509e-05, "loss": 2.0446, "step": 10231500 }, { "epoch": 50.69, "learning_rate": 2.466265860099186e-05, "loss": 2.0498, "step": 10232000 }, { "epoch": 50.7, "learning_rate": 2.4661420014565777e-05, "loss": 2.0508, "step": 10232500 }, { "epoch": 50.7, "learning_rate": 2.4660181428139694e-05, "loss": 2.0584, "step": 10233000 }, { "epoch": 50.7, "learning_rate": 2.465894284171361e-05, "loss": 2.0613, "step": 10233500 }, { "epoch": 50.7, "learning_rate": 2.4657704255287525e-05, "loss": 2.0912, "step": 10234000 }, { "epoch": 50.71, "learning_rate": 2.465646566886144e-05, "loss": 2.0464, "step": 10234500 }, { "epoch": 50.71, "learning_rate": 2.465522955960821e-05, "loss": 2.0585, "step": 10235000 }, { "epoch": 50.71, "learning_rate": 2.4653990973182127e-05, "loss": 2.07, "step": 10235500 }, { "epoch": 50.71, "learning_rate": 2.4652752386756044e-05, "loss": 2.0775, "step": 10236000 }, { "epoch": 50.72, "learning_rate": 2.465151380032996e-05, "loss": 2.0502, "step": 10236500 }, { "epoch": 50.72, "learning_rate": 2.4650275213903875e-05, "loss": 2.0757, "step": 10237000 }, { "epoch": 50.72, "learning_rate": 2.464903662747779e-05, "loss": 2.0762, "step": 10237500 }, { "epoch": 50.72, "learning_rate": 2.464779804105171e-05, "loss": 2.0526, "step": 10238000 }, { "epoch": 50.73, "learning_rate": 2.4646561931798477e-05, "loss": 2.059, "step": 10238500 }, { "epoch": 50.73, "learning_rate": 2.4645323345372394e-05, "loss": 2.0666, "step": 10239000 }, { "epoch": 50.73, "learning_rate": 2.464408475894631e-05, "loss": 2.0534, "step": 10239500 }, { "epoch": 50.73, "learning_rate": 2.4642846172520228e-05, "loss": 2.0882, "step": 10240000 }, { "epoch": 50.73, "learning_rate": 2.4641610063266997e-05, "loss": 2.0614, "step": 10240500 }, { "epoch": 50.74, "learning_rate": 2.4640371476840914e-05, "loss": 2.0829, "step": 10241000 }, { "epoch": 50.74, "learning_rate": 2.4639135367587683e-05, "loss": 2.0693, "step": 10241500 }, { "epoch": 50.74, "learning_rate": 2.4637896781161596e-05, "loss": 2.0563, "step": 10242000 }, { "epoch": 50.74, "learning_rate": 2.4636658194735513e-05, "loss": 2.072, "step": 10242500 }, { "epoch": 50.75, "learning_rate": 2.463541960830943e-05, "loss": 2.0897, "step": 10243000 }, { "epoch": 50.75, "learning_rate": 2.4634181021883347e-05, "loss": 2.081, "step": 10243500 }, { "epoch": 50.75, "learning_rate": 2.4632942435457264e-05, "loss": 2.0676, "step": 10244000 }, { "epoch": 50.75, "learning_rate": 2.4631703849031177e-05, "loss": 2.0737, "step": 10244500 }, { "epoch": 50.76, "learning_rate": 2.463046773977795e-05, "loss": 2.0767, "step": 10245000 }, { "epoch": 50.76, "learning_rate": 2.4629229153351863e-05, "loss": 2.0825, "step": 10245500 }, { "epoch": 50.76, "learning_rate": 2.462799056692578e-05, "loss": 2.0585, "step": 10246000 }, { "epoch": 50.76, "learning_rate": 2.4626751980499697e-05, "loss": 2.0636, "step": 10246500 }, { "epoch": 50.77, "learning_rate": 2.4625513394073614e-05, "loss": 2.085, "step": 10247000 }, { "epoch": 50.77, "learning_rate": 2.462427480764753e-05, "loss": 2.0747, "step": 10247500 }, { "epoch": 50.77, "learning_rate": 2.4623036221221444e-05, "loss": 2.0552, "step": 10248000 }, { "epoch": 50.77, "learning_rate": 2.462179763479536e-05, "loss": 2.0846, "step": 10248500 }, { "epoch": 50.78, "learning_rate": 2.462055904836928e-05, "loss": 2.0631, "step": 10249000 }, { "epoch": 50.78, "learning_rate": 2.4619320461943192e-05, "loss": 2.0971, "step": 10249500 }, { "epoch": 50.78, "learning_rate": 2.461808187551711e-05, "loss": 2.0327, "step": 10250000 }, { "epoch": 50.78, "learning_rate": 2.4616843289091026e-05, "loss": 2.0248, "step": 10250500 }, { "epoch": 50.79, "learning_rate": 2.4615607179837798e-05, "loss": 2.0483, "step": 10251000 }, { "epoch": 50.79, "learning_rate": 2.461436859341171e-05, "loss": 2.0586, "step": 10251500 }, { "epoch": 50.79, "learning_rate": 2.461313000698563e-05, "loss": 2.0825, "step": 10252000 }, { "epoch": 50.79, "learning_rate": 2.4611891420559545e-05, "loss": 2.0501, "step": 10252500 }, { "epoch": 50.8, "learning_rate": 2.4610655311306314e-05, "loss": 2.0451, "step": 10253000 }, { "epoch": 50.8, "learning_rate": 2.4609419202053083e-05, "loss": 2.0755, "step": 10253500 }, { "epoch": 50.8, "learning_rate": 2.4608180615627e-05, "loss": 2.0792, "step": 10254000 }, { "epoch": 50.8, "learning_rate": 2.4606942029200913e-05, "loss": 2.0593, "step": 10254500 }, { "epoch": 50.81, "learning_rate": 2.460570344277483e-05, "loss": 2.0665, "step": 10255000 }, { "epoch": 50.81, "learning_rate": 2.4604464856348747e-05, "loss": 2.0664, "step": 10255500 }, { "epoch": 50.81, "learning_rate": 2.4603226269922664e-05, "loss": 2.0721, "step": 10256000 }, { "epoch": 50.81, "learning_rate": 2.4601990160669433e-05, "loss": 2.0516, "step": 10256500 }, { "epoch": 50.82, "learning_rate": 2.460075157424335e-05, "loss": 2.0705, "step": 10257000 }, { "epoch": 50.82, "learning_rate": 2.4599515464990115e-05, "loss": 2.0659, "step": 10257500 }, { "epoch": 50.82, "learning_rate": 2.4598276878564032e-05, "loss": 2.0672, "step": 10258000 }, { "epoch": 50.82, "learning_rate": 2.459703829213795e-05, "loss": 2.0577, "step": 10258500 }, { "epoch": 50.83, "learning_rate": 2.4595799705711866e-05, "loss": 2.0714, "step": 10259000 }, { "epoch": 50.83, "learning_rate": 2.4594561119285783e-05, "loss": 2.0791, "step": 10259500 }, { "epoch": 50.83, "learning_rate": 2.45933225328597e-05, "loss": 2.0627, "step": 10260000 }, { "epoch": 50.83, "learning_rate": 2.4592083946433617e-05, "loss": 2.0684, "step": 10260500 }, { "epoch": 50.84, "learning_rate": 2.459084536000753e-05, "loss": 2.0586, "step": 10261000 }, { "epoch": 50.84, "learning_rate": 2.45896092507543e-05, "loss": 2.0817, "step": 10261500 }, { "epoch": 50.84, "learning_rate": 2.458837314150107e-05, "loss": 2.0737, "step": 10262000 }, { "epoch": 50.84, "learning_rate": 2.4587134555074985e-05, "loss": 2.0692, "step": 10262500 }, { "epoch": 50.85, "learning_rate": 2.4585895968648902e-05, "loss": 2.0508, "step": 10263000 }, { "epoch": 50.85, "learning_rate": 2.4584657382222815e-05, "loss": 2.0508, "step": 10263500 }, { "epoch": 50.85, "learning_rate": 2.4583418795796732e-05, "loss": 2.0704, "step": 10264000 }, { "epoch": 50.85, "learning_rate": 2.458218020937065e-05, "loss": 2.0641, "step": 10264500 }, { "epoch": 50.86, "learning_rate": 2.4580941622944566e-05, "loss": 2.0764, "step": 10265000 }, { "epoch": 50.86, "learning_rate": 2.4579703036518483e-05, "loss": 2.0881, "step": 10265500 }, { "epoch": 50.86, "learning_rate": 2.45784644500924e-05, "loss": 2.0377, "step": 10266000 }, { "epoch": 50.86, "learning_rate": 2.4577225863666317e-05, "loss": 2.0842, "step": 10266500 }, { "epoch": 50.87, "learning_rate": 2.457598727724023e-05, "loss": 2.0721, "step": 10267000 }, { "epoch": 50.87, "learning_rate": 2.4574748690814147e-05, "loss": 2.0584, "step": 10267500 }, { "epoch": 50.87, "learning_rate": 2.4573510104388064e-05, "loss": 2.0624, "step": 10268000 }, { "epoch": 50.87, "learning_rate": 2.4572273995134833e-05, "loss": 2.0766, "step": 10268500 }, { "epoch": 50.88, "learning_rate": 2.457103540870875e-05, "loss": 2.0496, "step": 10269000 }, { "epoch": 50.88, "learning_rate": 2.4569796822282667e-05, "loss": 2.0593, "step": 10269500 }, { "epoch": 50.88, "learning_rate": 2.4568558235856584e-05, "loss": 2.0678, "step": 10270000 }, { "epoch": 50.88, "learning_rate": 2.4567319649430498e-05, "loss": 2.0791, "step": 10270500 }, { "epoch": 50.89, "learning_rate": 2.4566083540177266e-05, "loss": 2.0534, "step": 10271000 }, { "epoch": 50.89, "learning_rate": 2.4564844953751183e-05, "loss": 2.0529, "step": 10271500 }, { "epoch": 50.89, "learning_rate": 2.45636063673251e-05, "loss": 2.0617, "step": 10272000 }, { "epoch": 50.89, "learning_rate": 2.456237025807187e-05, "loss": 2.0672, "step": 10272500 }, { "epoch": 50.9, "learning_rate": 2.4561131671645786e-05, "loss": 2.0919, "step": 10273000 }, { "epoch": 50.9, "learning_rate": 2.4559893085219703e-05, "loss": 2.0762, "step": 10273500 }, { "epoch": 50.9, "learning_rate": 2.4558654498793616e-05, "loss": 2.063, "step": 10274000 }, { "epoch": 50.9, "learning_rate": 2.4557415912367533e-05, "loss": 2.0562, "step": 10274500 }, { "epoch": 50.91, "learning_rate": 2.4556179803114306e-05, "loss": 2.0786, "step": 10275000 }, { "epoch": 50.91, "learning_rate": 2.455494369386107e-05, "loss": 2.0714, "step": 10275500 }, { "epoch": 50.91, "learning_rate": 2.455371006178069e-05, "loss": 2.0723, "step": 10276000 }, { "epoch": 50.91, "learning_rate": 2.455247147535461e-05, "loss": 2.0745, "step": 10276500 }, { "epoch": 50.92, "learning_rate": 2.4551232888928526e-05, "loss": 2.0486, "step": 10277000 }, { "epoch": 50.92, "learning_rate": 2.4549994302502442e-05, "loss": 2.0695, "step": 10277500 }, { "epoch": 50.92, "learning_rate": 2.4548755716076356e-05, "loss": 2.0894, "step": 10278000 }, { "epoch": 50.92, "learning_rate": 2.4547517129650273e-05, "loss": 2.0772, "step": 10278500 }, { "epoch": 50.93, "learning_rate": 2.454627854322419e-05, "loss": 2.0585, "step": 10279000 }, { "epoch": 50.93, "learning_rate": 2.4545039956798107e-05, "loss": 2.0676, "step": 10279500 }, { "epoch": 50.93, "learning_rate": 2.4543801370372024e-05, "loss": 2.0675, "step": 10280000 }, { "epoch": 50.93, "learning_rate": 2.4542562783945937e-05, "loss": 2.0797, "step": 10280500 }, { "epoch": 50.94, "learning_rate": 2.4541324197519854e-05, "loss": 2.1064, "step": 10281000 }, { "epoch": 50.94, "learning_rate": 2.454008561109377e-05, "loss": 2.0504, "step": 10281500 }, { "epoch": 50.94, "learning_rate": 2.4538847024667688e-05, "loss": 2.0739, "step": 10282000 }, { "epoch": 50.94, "learning_rate": 2.4537608438241605e-05, "loss": 2.0661, "step": 10282500 }, { "epoch": 50.95, "learning_rate": 2.4536369851815522e-05, "loss": 2.0351, "step": 10283000 }, { "epoch": 50.95, "learning_rate": 2.453513126538944e-05, "loss": 2.0694, "step": 10283500 }, { "epoch": 50.95, "learning_rate": 2.4533892678963356e-05, "loss": 2.0665, "step": 10284000 }, { "epoch": 50.95, "learning_rate": 2.453265409253727e-05, "loss": 2.1022, "step": 10284500 }, { "epoch": 50.96, "learning_rate": 2.4531417983284038e-05, "loss": 2.0483, "step": 10285000 }, { "epoch": 50.96, "learning_rate": 2.4530179396857955e-05, "loss": 2.0777, "step": 10285500 }, { "epoch": 50.96, "learning_rate": 2.4528940810431872e-05, "loss": 2.0841, "step": 10286000 }, { "epoch": 50.96, "learning_rate": 2.452770222400579e-05, "loss": 2.0911, "step": 10286500 }, { "epoch": 50.97, "learning_rate": 2.4526463637579706e-05, "loss": 2.0666, "step": 10287000 }, { "epoch": 50.97, "learning_rate": 2.452522752832647e-05, "loss": 2.0574, "step": 10287500 }, { "epoch": 50.97, "learning_rate": 2.452399141907324e-05, "loss": 2.0508, "step": 10288000 }, { "epoch": 50.97, "learning_rate": 2.4522752832647157e-05, "loss": 2.0387, "step": 10288500 }, { "epoch": 50.98, "learning_rate": 2.4521514246221074e-05, "loss": 2.0737, "step": 10289000 }, { "epoch": 50.98, "learning_rate": 2.4520275659794987e-05, "loss": 2.0772, "step": 10289500 }, { "epoch": 50.98, "learning_rate": 2.4519037073368904e-05, "loss": 2.0513, "step": 10290000 }, { "epoch": 50.98, "learning_rate": 2.4517800964115677e-05, "loss": 2.0742, "step": 10290500 }, { "epoch": 50.99, "learning_rate": 2.4516562377689593e-05, "loss": 2.0769, "step": 10291000 }, { "epoch": 50.99, "learning_rate": 2.4515323791263507e-05, "loss": 2.0693, "step": 10291500 }, { "epoch": 50.99, "learning_rate": 2.4514085204837424e-05, "loss": 2.073, "step": 10292000 }, { "epoch": 50.99, "learning_rate": 2.451284661841134e-05, "loss": 2.0722, "step": 10292500 }, { "epoch": 51.0, "learning_rate": 2.4511608031985254e-05, "loss": 2.0546, "step": 10293000 }, { "epoch": 51.0, "learning_rate": 2.4510371922732027e-05, "loss": 2.0579, "step": 10293500 }, { "epoch": 51.0, "eval_accuracy": 0.6703885384286462, "eval_accuracy_mlm": 0.6286965043668377, "eval_accuracy_nsp": 0.8669472346534148, "eval_loss": 2.271649122238159, "eval_runtime": 146.9171, "eval_samples_per_second": 1735.393, "eval_steps_per_second": 72.313, "step": 10293993 }, { "epoch": 51.0, "learning_rate": 2.4509133336305944e-05, "loss": 2.0708, "step": 10294000 }, { "epoch": 51.0, "learning_rate": 2.450789474987986e-05, "loss": 2.0436, "step": 10294500 }, { "epoch": 51.0, "learning_rate": 2.4506656163453774e-05, "loss": 2.0437, "step": 10295000 }, { "epoch": 51.01, "learning_rate": 2.450541757702769e-05, "loss": 2.0518, "step": 10295500 }, { "epoch": 51.01, "learning_rate": 2.4504178990601604e-05, "loss": 2.0442, "step": 10296000 }, { "epoch": 51.01, "learning_rate": 2.450294040417552e-05, "loss": 2.0308, "step": 10296500 }, { "epoch": 51.01, "learning_rate": 2.450170181774944e-05, "loss": 2.0156, "step": 10297000 }, { "epoch": 51.02, "learning_rate": 2.4500463231323355e-05, "loss": 2.0271, "step": 10297500 }, { "epoch": 51.02, "learning_rate": 2.4499227122070127e-05, "loss": 2.0281, "step": 10298000 }, { "epoch": 51.02, "learning_rate": 2.449798853564404e-05, "loss": 2.045, "step": 10298500 }, { "epoch": 51.02, "learning_rate": 2.4496749949217958e-05, "loss": 2.0668, "step": 10299000 }, { "epoch": 51.03, "learning_rate": 2.449551136279187e-05, "loss": 2.0434, "step": 10299500 }, { "epoch": 51.03, "learning_rate": 2.449427277636579e-05, "loss": 2.0555, "step": 10300000 }, { "epoch": 51.03, "learning_rate": 2.4493034189939705e-05, "loss": 2.0626, "step": 10300500 }, { "epoch": 51.03, "learning_rate": 2.4491795603513622e-05, "loss": 2.0572, "step": 10301000 }, { "epoch": 51.04, "learning_rate": 2.449055701708754e-05, "loss": 2.0445, "step": 10301500 }, { "epoch": 51.04, "learning_rate": 2.4489320907834308e-05, "loss": 2.0147, "step": 10302000 }, { "epoch": 51.04, "learning_rate": 2.4488082321408225e-05, "loss": 2.0535, "step": 10302500 }, { "epoch": 51.04, "learning_rate": 2.4486846212154994e-05, "loss": 2.036, "step": 10303000 }, { "epoch": 51.05, "learning_rate": 2.448560762572891e-05, "loss": 2.0616, "step": 10303500 }, { "epoch": 51.05, "learning_rate": 2.4484369039302828e-05, "loss": 2.0478, "step": 10304000 }, { "epoch": 51.05, "learning_rate": 2.4483130452876745e-05, "loss": 2.0321, "step": 10304500 }, { "epoch": 51.05, "learning_rate": 2.4481891866450658e-05, "loss": 2.0664, "step": 10305000 }, { "epoch": 51.06, "learning_rate": 2.4480653280024575e-05, "loss": 2.0236, "step": 10305500 }, { "epoch": 51.06, "learning_rate": 2.4479414693598492e-05, "loss": 2.0534, "step": 10306000 }, { "epoch": 51.06, "learning_rate": 2.4478176107172405e-05, "loss": 2.0561, "step": 10306500 }, { "epoch": 51.06, "learning_rate": 2.4476937520746322e-05, "loss": 2.0589, "step": 10307000 }, { "epoch": 51.07, "learning_rate": 2.4475703888665943e-05, "loss": 2.0619, "step": 10307500 }, { "epoch": 51.07, "learning_rate": 2.447446530223986e-05, "loss": 2.0323, "step": 10308000 }, { "epoch": 51.07, "learning_rate": 2.4473226715813777e-05, "loss": 2.0279, "step": 10308500 }, { "epoch": 51.07, "learning_rate": 2.4471988129387694e-05, "loss": 2.0489, "step": 10309000 }, { "epoch": 51.08, "learning_rate": 2.447074954296161e-05, "loss": 2.0529, "step": 10309500 }, { "epoch": 51.08, "learning_rate": 2.446951343370838e-05, "loss": 2.0614, "step": 10310000 }, { "epoch": 51.08, "learning_rate": 2.4468274847282293e-05, "loss": 2.0385, "step": 10310500 }, { "epoch": 51.08, "learning_rate": 2.446703626085621e-05, "loss": 2.0609, "step": 10311000 }, { "epoch": 51.09, "learning_rate": 2.4465797674430127e-05, "loss": 2.0665, "step": 10311500 }, { "epoch": 51.09, "learning_rate": 2.4464561565176896e-05, "loss": 2.0408, "step": 10312000 }, { "epoch": 51.09, "learning_rate": 2.4463322978750813e-05, "loss": 2.0498, "step": 10312500 }, { "epoch": 51.09, "learning_rate": 2.446208439232473e-05, "loss": 2.0591, "step": 10313000 }, { "epoch": 51.1, "learning_rate": 2.44608482830715e-05, "loss": 2.0425, "step": 10313500 }, { "epoch": 51.1, "learning_rate": 2.4459612173818267e-05, "loss": 2.0444, "step": 10314000 }, { "epoch": 51.1, "learning_rate": 2.4458373587392184e-05, "loss": 2.0561, "step": 10314500 }, { "epoch": 51.1, "learning_rate": 2.44571350009661e-05, "loss": 2.077, "step": 10315000 }, { "epoch": 51.11, "learning_rate": 2.4455896414540015e-05, "loss": 2.0415, "step": 10315500 }, { "epoch": 51.11, "learning_rate": 2.445465782811393e-05, "loss": 2.0656, "step": 10316000 }, { "epoch": 51.11, "learning_rate": 2.445341924168785e-05, "loss": 2.0363, "step": 10316500 }, { "epoch": 51.11, "learning_rate": 2.4452180655261765e-05, "loss": 2.0489, "step": 10317000 }, { "epoch": 51.12, "learning_rate": 2.445094206883568e-05, "loss": 2.0316, "step": 10317500 }, { "epoch": 51.12, "learning_rate": 2.4449703482409596e-05, "loss": 2.0433, "step": 10318000 }, { "epoch": 51.12, "learning_rate": 2.4448464895983513e-05, "loss": 2.0322, "step": 10318500 }, { "epoch": 51.12, "learning_rate": 2.444722630955743e-05, "loss": 2.0554, "step": 10319000 }, { "epoch": 51.13, "learning_rate": 2.4445987723131343e-05, "loss": 2.043, "step": 10319500 }, { "epoch": 51.13, "learning_rate": 2.444474913670526e-05, "loss": 2.0398, "step": 10320000 }, { "epoch": 51.13, "learning_rate": 2.4443510550279177e-05, "loss": 2.0735, "step": 10320500 }, { "epoch": 51.13, "learning_rate": 2.4442274441025946e-05, "loss": 2.0307, "step": 10321000 }, { "epoch": 51.14, "learning_rate": 2.4441035854599863e-05, "loss": 2.0541, "step": 10321500 }, { "epoch": 51.14, "learning_rate": 2.4439799745346632e-05, "loss": 2.055, "step": 10322000 }, { "epoch": 51.14, "learning_rate": 2.443856115892055e-05, "loss": 2.0458, "step": 10322500 }, { "epoch": 51.14, "learning_rate": 2.4437325049667317e-05, "loss": 2.0492, "step": 10323000 }, { "epoch": 51.15, "learning_rate": 2.4436086463241234e-05, "loss": 2.0538, "step": 10323500 }, { "epoch": 51.15, "learning_rate": 2.443484787681515e-05, "loss": 2.0274, "step": 10324000 }, { "epoch": 51.15, "learning_rate": 2.4433609290389068e-05, "loss": 2.0545, "step": 10324500 }, { "epoch": 51.15, "learning_rate": 2.4432370703962982e-05, "loss": 2.042, "step": 10325000 }, { "epoch": 51.16, "learning_rate": 2.443113459470975e-05, "loss": 2.051, "step": 10325500 }, { "epoch": 51.16, "learning_rate": 2.4429896008283668e-05, "loss": 2.0376, "step": 10326000 }, { "epoch": 51.16, "learning_rate": 2.4428657421857584e-05, "loss": 2.0663, "step": 10326500 }, { "epoch": 51.16, "learning_rate": 2.44274188354315e-05, "loss": 2.028, "step": 10327000 }, { "epoch": 51.17, "learning_rate": 2.442618024900542e-05, "loss": 2.0471, "step": 10327500 }, { "epoch": 51.17, "learning_rate": 2.4424941662579332e-05, "loss": 2.0491, "step": 10328000 }, { "epoch": 51.17, "learning_rate": 2.442370307615325e-05, "loss": 2.0376, "step": 10328500 }, { "epoch": 51.17, "learning_rate": 2.4422464489727166e-05, "loss": 2.0751, "step": 10329000 }, { "epoch": 51.18, "learning_rate": 2.4421225903301083e-05, "loss": 2.0649, "step": 10329500 }, { "epoch": 51.18, "learning_rate": 2.4419987316875e-05, "loss": 2.0805, "step": 10330000 }, { "epoch": 51.18, "learning_rate": 2.4418748730448916e-05, "loss": 2.0315, "step": 10330500 }, { "epoch": 51.18, "learning_rate": 2.441751014402283e-05, "loss": 2.0584, "step": 10331000 }, { "epoch": 51.19, "learning_rate": 2.4416271557596747e-05, "loss": 2.0569, "step": 10331500 }, { "epoch": 51.19, "learning_rate": 2.4415032971170664e-05, "loss": 2.0274, "step": 10332000 }, { "epoch": 51.19, "learning_rate": 2.4413794384744577e-05, "loss": 2.0455, "step": 10332500 }, { "epoch": 51.19, "learning_rate": 2.4412555798318494e-05, "loss": 2.0447, "step": 10333000 }, { "epoch": 51.2, "learning_rate": 2.441131721189241e-05, "loss": 2.0533, "step": 10333500 }, { "epoch": 51.2, "learning_rate": 2.4410081102639183e-05, "loss": 2.0797, "step": 10334000 }, { "epoch": 51.2, "learning_rate": 2.4408842516213097e-05, "loss": 2.044, "step": 10334500 }, { "epoch": 51.2, "learning_rate": 2.4407603929787014e-05, "loss": 2.0481, "step": 10335000 }, { "epoch": 51.21, "learning_rate": 2.4406367820533783e-05, "loss": 2.0566, "step": 10335500 }, { "epoch": 51.21, "learning_rate": 2.44051292341077e-05, "loss": 2.0262, "step": 10336000 }, { "epoch": 51.21, "learning_rate": 2.4403890647681617e-05, "loss": 2.0525, "step": 10336500 }, { "epoch": 51.21, "learning_rate": 2.4402652061255534e-05, "loss": 2.0429, "step": 10337000 }, { "epoch": 51.22, "learning_rate": 2.4401413474829447e-05, "loss": 2.0394, "step": 10337500 }, { "epoch": 51.22, "learning_rate": 2.4400174888403364e-05, "loss": 2.0567, "step": 10338000 }, { "epoch": 51.22, "learning_rate": 2.4398938779150133e-05, "loss": 2.0467, "step": 10338500 }, { "epoch": 51.22, "learning_rate": 2.439770019272405e-05, "loss": 2.0456, "step": 10339000 }, { "epoch": 51.23, "learning_rate": 2.439646408347082e-05, "loss": 2.053, "step": 10339500 }, { "epoch": 51.23, "learning_rate": 2.4395225497044735e-05, "loss": 2.0468, "step": 10340000 }, { "epoch": 51.23, "learning_rate": 2.439398691061865e-05, "loss": 2.0562, "step": 10340500 }, { "epoch": 51.23, "learning_rate": 2.4392748324192566e-05, "loss": 2.0694, "step": 10341000 }, { "epoch": 51.24, "learning_rate": 2.4391509737766483e-05, "loss": 2.0633, "step": 10341500 }, { "epoch": 51.24, "learning_rate": 2.43902711513404e-05, "loss": 2.0479, "step": 10342000 }, { "epoch": 51.24, "learning_rate": 2.4389032564914317e-05, "loss": 2.0482, "step": 10342500 }, { "epoch": 51.24, "learning_rate": 2.4387793978488234e-05, "loss": 2.0416, "step": 10343000 }, { "epoch": 51.25, "learning_rate": 2.438655539206215e-05, "loss": 2.0372, "step": 10343500 }, { "epoch": 51.25, "learning_rate": 2.4385316805636068e-05, "loss": 2.0447, "step": 10344000 }, { "epoch": 51.25, "learning_rate": 2.438407821920998e-05, "loss": 2.0376, "step": 10344500 }, { "epoch": 51.25, "learning_rate": 2.4382839632783898e-05, "loss": 2.0616, "step": 10345000 }, { "epoch": 51.26, "learning_rate": 2.4381603523530667e-05, "loss": 2.0499, "step": 10345500 }, { "epoch": 51.26, "learning_rate": 2.4380364937104584e-05, "loss": 2.0635, "step": 10346000 }, { "epoch": 51.26, "learning_rate": 2.43791263506785e-05, "loss": 2.044, "step": 10346500 }, { "epoch": 51.26, "learning_rate": 2.4377887764252418e-05, "loss": 2.0615, "step": 10347000 }, { "epoch": 51.27, "learning_rate": 2.4376649177826335e-05, "loss": 2.053, "step": 10347500 }, { "epoch": 51.27, "learning_rate": 2.43754130685731e-05, "loss": 2.0609, "step": 10348000 }, { "epoch": 51.27, "learning_rate": 2.4374174482147017e-05, "loss": 2.0459, "step": 10348500 }, { "epoch": 51.27, "learning_rate": 2.4372935895720934e-05, "loss": 2.074, "step": 10349000 }, { "epoch": 51.28, "learning_rate": 2.437169730929485e-05, "loss": 2.0591, "step": 10349500 }, { "epoch": 51.28, "learning_rate": 2.4370458722868768e-05, "loss": 2.0421, "step": 10350000 }, { "epoch": 51.28, "learning_rate": 2.4369220136442685e-05, "loss": 2.0521, "step": 10350500 }, { "epoch": 51.28, "learning_rate": 2.436798402718945e-05, "loss": 2.0647, "step": 10351000 }, { "epoch": 51.28, "learning_rate": 2.4366745440763367e-05, "loss": 2.0631, "step": 10351500 }, { "epoch": 51.29, "learning_rate": 2.4365506854337284e-05, "loss": 2.0561, "step": 10352000 }, { "epoch": 51.29, "learning_rate": 2.43642682679112e-05, "loss": 2.048, "step": 10352500 }, { "epoch": 51.29, "learning_rate": 2.4363029681485118e-05, "loss": 2.0566, "step": 10353000 }, { "epoch": 51.29, "learning_rate": 2.4361791095059035e-05, "loss": 2.067, "step": 10353500 }, { "epoch": 51.3, "learning_rate": 2.4360552508632948e-05, "loss": 2.0301, "step": 10354000 }, { "epoch": 51.3, "learning_rate": 2.4359313922206865e-05, "loss": 2.0627, "step": 10354500 }, { "epoch": 51.3, "learning_rate": 2.4358075335780782e-05, "loss": 2.0553, "step": 10355000 }, { "epoch": 51.3, "learning_rate": 2.43568367493547e-05, "loss": 2.0701, "step": 10355500 }, { "epoch": 51.31, "learning_rate": 2.4355598162928613e-05, "loss": 2.0451, "step": 10356000 }, { "epoch": 51.31, "learning_rate": 2.435435957650253e-05, "loss": 2.0543, "step": 10356500 }, { "epoch": 51.31, "learning_rate": 2.43531234672493e-05, "loss": 2.0387, "step": 10357000 }, { "epoch": 51.31, "learning_rate": 2.4351884880823215e-05, "loss": 2.0592, "step": 10357500 }, { "epoch": 51.32, "learning_rate": 2.4350646294397132e-05, "loss": 2.0626, "step": 10358000 }, { "epoch": 51.32, "learning_rate": 2.43494101851439e-05, "loss": 2.0656, "step": 10358500 }, { "epoch": 51.32, "learning_rate": 2.4348174075890666e-05, "loss": 2.0347, "step": 10359000 }, { "epoch": 51.32, "learning_rate": 2.4346935489464583e-05, "loss": 2.0392, "step": 10359500 }, { "epoch": 51.33, "learning_rate": 2.43456969030385e-05, "loss": 2.0248, "step": 10360000 }, { "epoch": 51.33, "learning_rate": 2.4344458316612417e-05, "loss": 2.0519, "step": 10360500 }, { "epoch": 51.33, "learning_rate": 2.4343219730186334e-05, "loss": 2.0786, "step": 10361000 }, { "epoch": 51.33, "learning_rate": 2.434198114376025e-05, "loss": 2.0357, "step": 10361500 }, { "epoch": 51.34, "learning_rate": 2.4340742557334168e-05, "loss": 2.0106, "step": 10362000 }, { "epoch": 51.34, "learning_rate": 2.4339503970908085e-05, "loss": 2.0355, "step": 10362500 }, { "epoch": 51.34, "learning_rate": 2.4338265384482002e-05, "loss": 2.0535, "step": 10363000 }, { "epoch": 51.34, "learning_rate": 2.4337026798055915e-05, "loss": 2.0507, "step": 10363500 }, { "epoch": 51.35, "learning_rate": 2.4335788211629832e-05, "loss": 2.0752, "step": 10364000 }, { "epoch": 51.35, "learning_rate": 2.433454962520375e-05, "loss": 2.0585, "step": 10364500 }, { "epoch": 51.35, "learning_rate": 2.4333311038777666e-05, "loss": 2.0646, "step": 10365000 }, { "epoch": 51.35, "learning_rate": 2.4332074929524435e-05, "loss": 2.054, "step": 10365500 }, { "epoch": 51.36, "learning_rate": 2.4330836343098352e-05, "loss": 2.067, "step": 10366000 }, { "epoch": 51.36, "learning_rate": 2.4329597756672265e-05, "loss": 2.0536, "step": 10366500 }, { "epoch": 51.36, "learning_rate": 2.4328359170246182e-05, "loss": 2.0505, "step": 10367000 }, { "epoch": 51.36, "learning_rate": 2.43271205838201e-05, "loss": 2.0499, "step": 10367500 }, { "epoch": 51.37, "learning_rate": 2.4325881997394016e-05, "loss": 2.0488, "step": 10368000 }, { "epoch": 51.37, "learning_rate": 2.4324643410967933e-05, "loss": 2.0598, "step": 10368500 }, { "epoch": 51.37, "learning_rate": 2.4323407301714702e-05, "loss": 2.0425, "step": 10369000 }, { "epoch": 51.37, "learning_rate": 2.432216871528862e-05, "loss": 2.0742, "step": 10369500 }, { "epoch": 51.38, "learning_rate": 2.4320930128862532e-05, "loss": 2.0628, "step": 10370000 }, { "epoch": 51.38, "learning_rate": 2.431969154243645e-05, "loss": 2.0541, "step": 10370500 }, { "epoch": 51.38, "learning_rate": 2.4318452956010366e-05, "loss": 2.0644, "step": 10371000 }, { "epoch": 51.38, "learning_rate": 2.4317214369584283e-05, "loss": 2.0588, "step": 10371500 }, { "epoch": 51.39, "learning_rate": 2.43159757831582e-05, "loss": 2.0622, "step": 10372000 }, { "epoch": 51.39, "learning_rate": 2.4314737196732114e-05, "loss": 2.0518, "step": 10372500 }, { "epoch": 51.39, "learning_rate": 2.431349861030603e-05, "loss": 2.056, "step": 10373000 }, { "epoch": 51.39, "learning_rate": 2.43122625010528e-05, "loss": 2.0637, "step": 10373500 }, { "epoch": 51.4, "learning_rate": 2.4311026391799568e-05, "loss": 2.0595, "step": 10374000 }, { "epoch": 51.4, "learning_rate": 2.4309787805373485e-05, "loss": 2.0389, "step": 10374500 }, { "epoch": 51.4, "learning_rate": 2.4308549218947402e-05, "loss": 2.0253, "step": 10375000 }, { "epoch": 51.4, "learning_rate": 2.430731063252132e-05, "loss": 2.0573, "step": 10375500 }, { "epoch": 51.41, "learning_rate": 2.4306072046095232e-05, "loss": 2.0722, "step": 10376000 }, { "epoch": 51.41, "learning_rate": 2.4304835936842e-05, "loss": 2.0169, "step": 10376500 }, { "epoch": 51.41, "learning_rate": 2.4303597350415918e-05, "loss": 2.0391, "step": 10377000 }, { "epoch": 51.41, "learning_rate": 2.4302358763989835e-05, "loss": 2.0356, "step": 10377500 }, { "epoch": 51.42, "learning_rate": 2.43011226547366e-05, "loss": 2.075, "step": 10378000 }, { "epoch": 51.42, "learning_rate": 2.4299884068310517e-05, "loss": 2.0562, "step": 10378500 }, { "epoch": 51.42, "learning_rate": 2.4298645481884434e-05, "loss": 2.048, "step": 10379000 }, { "epoch": 51.42, "learning_rate": 2.429740689545835e-05, "loss": 2.0481, "step": 10379500 }, { "epoch": 51.43, "learning_rate": 2.4296168309032268e-05, "loss": 2.0714, "step": 10380000 }, { "epoch": 51.43, "learning_rate": 2.4294932199779037e-05, "loss": 2.0655, "step": 10380500 }, { "epoch": 51.43, "learning_rate": 2.4293693613352954e-05, "loss": 2.0824, "step": 10381000 }, { "epoch": 51.43, "learning_rate": 2.4292455026926868e-05, "loss": 2.0666, "step": 10381500 }, { "epoch": 51.44, "learning_rate": 2.4291216440500784e-05, "loss": 2.0521, "step": 10382000 }, { "epoch": 51.44, "learning_rate": 2.42899778540747e-05, "loss": 2.0706, "step": 10382500 }, { "epoch": 51.44, "learning_rate": 2.4288741744821474e-05, "loss": 2.0275, "step": 10383000 }, { "epoch": 51.44, "learning_rate": 2.4287503158395387e-05, "loss": 2.0452, "step": 10383500 }, { "epoch": 51.45, "learning_rate": 2.4286264571969304e-05, "loss": 2.0568, "step": 10384000 }, { "epoch": 51.45, "learning_rate": 2.428502598554322e-05, "loss": 2.0424, "step": 10384500 }, { "epoch": 51.45, "learning_rate": 2.4283787399117135e-05, "loss": 2.0512, "step": 10385000 }, { "epoch": 51.45, "learning_rate": 2.428254881269105e-05, "loss": 2.0348, "step": 10385500 }, { "epoch": 51.46, "learning_rate": 2.428131022626497e-05, "loss": 2.0502, "step": 10386000 }, { "epoch": 51.46, "learning_rate": 2.4280071639838885e-05, "loss": 2.057, "step": 10386500 }, { "epoch": 51.46, "learning_rate": 2.4278835530585654e-05, "loss": 2.0502, "step": 10387000 }, { "epoch": 51.46, "learning_rate": 2.4277599421332423e-05, "loss": 2.0544, "step": 10387500 }, { "epoch": 51.47, "learning_rate": 2.4276363312079192e-05, "loss": 2.0846, "step": 10388000 }, { "epoch": 51.47, "learning_rate": 2.427512472565311e-05, "loss": 2.092, "step": 10388500 }, { "epoch": 51.47, "learning_rate": 2.4273886139227022e-05, "loss": 2.0646, "step": 10389000 }, { "epoch": 51.47, "learning_rate": 2.427265002997379e-05, "loss": 2.0494, "step": 10389500 }, { "epoch": 51.48, "learning_rate": 2.4271411443547708e-05, "loss": 2.0667, "step": 10390000 }, { "epoch": 51.48, "learning_rate": 2.427017533429448e-05, "loss": 2.0687, "step": 10390500 }, { "epoch": 51.48, "learning_rate": 2.4268936747868394e-05, "loss": 2.0447, "step": 10391000 }, { "epoch": 51.48, "learning_rate": 2.426769816144231e-05, "loss": 2.0592, "step": 10391500 }, { "epoch": 51.49, "learning_rate": 2.4266459575016228e-05, "loss": 2.0666, "step": 10392000 }, { "epoch": 51.49, "learning_rate": 2.426522098859014e-05, "loss": 2.0413, "step": 10392500 }, { "epoch": 51.49, "learning_rate": 2.4263982402164058e-05, "loss": 2.0456, "step": 10393000 }, { "epoch": 51.49, "learning_rate": 2.4262743815737975e-05, "loss": 2.0507, "step": 10393500 }, { "epoch": 51.5, "learning_rate": 2.4261505229311892e-05, "loss": 2.0922, "step": 10394000 }, { "epoch": 51.5, "learning_rate": 2.426026664288581e-05, "loss": 2.061, "step": 10394500 }, { "epoch": 51.5, "learning_rate": 2.4259028056459726e-05, "loss": 2.0556, "step": 10395000 }, { "epoch": 51.5, "learning_rate": 2.425778947003364e-05, "loss": 2.0364, "step": 10395500 }, { "epoch": 51.51, "learning_rate": 2.4256550883607556e-05, "loss": 2.0341, "step": 10396000 }, { "epoch": 51.51, "learning_rate": 2.4255312297181473e-05, "loss": 2.0737, "step": 10396500 }, { "epoch": 51.51, "learning_rate": 2.425407371075539e-05, "loss": 2.0656, "step": 10397000 }, { "epoch": 51.51, "learning_rate": 2.4252835124329307e-05, "loss": 2.0807, "step": 10397500 }, { "epoch": 51.52, "learning_rate": 2.4251596537903224e-05, "loss": 2.0583, "step": 10398000 }, { "epoch": 51.52, "learning_rate": 2.425035795147714e-05, "loss": 2.0611, "step": 10398500 }, { "epoch": 51.52, "learning_rate": 2.4249121842223906e-05, "loss": 2.0489, "step": 10399000 }, { "epoch": 51.52, "learning_rate": 2.4247883255797823e-05, "loss": 2.0565, "step": 10399500 }, { "epoch": 51.53, "learning_rate": 2.424664466937174e-05, "loss": 2.0621, "step": 10400000 }, { "epoch": 51.53, "learning_rate": 2.4245406082945657e-05, "loss": 2.0618, "step": 10400500 }, { "epoch": 51.53, "learning_rate": 2.4244167496519574e-05, "loss": 2.0569, "step": 10401000 }, { "epoch": 51.53, "learning_rate": 2.424292891009349e-05, "loss": 2.0513, "step": 10401500 }, { "epoch": 51.54, "learning_rate": 2.4241692800840256e-05, "loss": 2.0685, "step": 10402000 }, { "epoch": 51.54, "learning_rate": 2.4240454214414173e-05, "loss": 2.0482, "step": 10402500 }, { "epoch": 51.54, "learning_rate": 2.423921562798809e-05, "loss": 2.0482, "step": 10403000 }, { "epoch": 51.54, "learning_rate": 2.4237977041562007e-05, "loss": 2.065, "step": 10403500 }, { "epoch": 51.55, "learning_rate": 2.4236738455135924e-05, "loss": 2.0522, "step": 10404000 }, { "epoch": 51.55, "learning_rate": 2.423549986870984e-05, "loss": 2.0617, "step": 10404500 }, { "epoch": 51.55, "learning_rate": 2.4234261282283758e-05, "loss": 2.0461, "step": 10405000 }, { "epoch": 51.55, "learning_rate": 2.4233025173030523e-05, "loss": 2.0436, "step": 10405500 }, { "epoch": 51.55, "learning_rate": 2.423178658660444e-05, "loss": 2.0364, "step": 10406000 }, { "epoch": 51.56, "learning_rate": 2.4230548000178357e-05, "loss": 2.0653, "step": 10406500 }, { "epoch": 51.56, "learning_rate": 2.4229309413752274e-05, "loss": 2.0584, "step": 10407000 }, { "epoch": 51.56, "learning_rate": 2.4228073304499043e-05, "loss": 2.0421, "step": 10407500 }, { "epoch": 51.56, "learning_rate": 2.4226834718072956e-05, "loss": 2.0605, "step": 10408000 }, { "epoch": 51.57, "learning_rate": 2.4225596131646873e-05, "loss": 2.0721, "step": 10408500 }, { "epoch": 51.57, "learning_rate": 2.422435754522079e-05, "loss": 2.0521, "step": 10409000 }, { "epoch": 51.57, "learning_rate": 2.4223118958794707e-05, "loss": 2.0675, "step": 10409500 }, { "epoch": 51.57, "learning_rate": 2.4221880372368624e-05, "loss": 2.0354, "step": 10410000 }, { "epoch": 51.58, "learning_rate": 2.422064178594254e-05, "loss": 2.0577, "step": 10410500 }, { "epoch": 51.58, "learning_rate": 2.4219403199516458e-05, "loss": 2.0532, "step": 10411000 }, { "epoch": 51.58, "learning_rate": 2.4218167090263223e-05, "loss": 2.0514, "step": 10411500 }, { "epoch": 51.58, "learning_rate": 2.4216933458182848e-05, "loss": 2.0609, "step": 10412000 }, { "epoch": 51.59, "learning_rate": 2.4215694871756764e-05, "loss": 2.0707, "step": 10412500 }, { "epoch": 51.59, "learning_rate": 2.4214456285330678e-05, "loss": 2.0448, "step": 10413000 }, { "epoch": 51.59, "learning_rate": 2.4213217698904595e-05, "loss": 2.0485, "step": 10413500 }, { "epoch": 51.59, "learning_rate": 2.4211979112478512e-05, "loss": 2.0713, "step": 10414000 }, { "epoch": 51.6, "learning_rate": 2.421074052605243e-05, "loss": 2.0526, "step": 10414500 }, { "epoch": 51.6, "learning_rate": 2.4209501939626346e-05, "loss": 2.0615, "step": 10415000 }, { "epoch": 51.6, "learning_rate": 2.4208263353200263e-05, "loss": 2.0755, "step": 10415500 }, { "epoch": 51.6, "learning_rate": 2.4207027243947028e-05, "loss": 2.0608, "step": 10416000 }, { "epoch": 51.61, "learning_rate": 2.4205788657520945e-05, "loss": 2.0419, "step": 10416500 }, { "epoch": 51.61, "learning_rate": 2.4204550071094862e-05, "loss": 2.0523, "step": 10417000 }, { "epoch": 51.61, "learning_rate": 2.420331148466878e-05, "loss": 2.0618, "step": 10417500 }, { "epoch": 51.61, "learning_rate": 2.4202072898242696e-05, "loss": 2.0511, "step": 10418000 }, { "epoch": 51.62, "learning_rate": 2.4200836788989465e-05, "loss": 2.0691, "step": 10418500 }, { "epoch": 51.62, "learning_rate": 2.4199598202563378e-05, "loss": 2.0845, "step": 10419000 }, { "epoch": 51.62, "learning_rate": 2.4198359616137295e-05, "loss": 2.0424, "step": 10419500 }, { "epoch": 51.62, "learning_rate": 2.4197121029711212e-05, "loss": 2.0278, "step": 10420000 }, { "epoch": 51.63, "learning_rate": 2.419588244328513e-05, "loss": 2.0464, "step": 10420500 }, { "epoch": 51.63, "learning_rate": 2.4194643856859046e-05, "loss": 2.0413, "step": 10421000 }, { "epoch": 51.63, "learning_rate": 2.4193405270432963e-05, "loss": 2.0387, "step": 10421500 }, { "epoch": 51.63, "learning_rate": 2.419216668400688e-05, "loss": 2.0454, "step": 10422000 }, { "epoch": 51.64, "learning_rate": 2.4190928097580797e-05, "loss": 2.058, "step": 10422500 }, { "epoch": 51.64, "learning_rate": 2.418968951115471e-05, "loss": 2.0434, "step": 10423000 }, { "epoch": 51.64, "learning_rate": 2.4188450924728627e-05, "loss": 2.0807, "step": 10423500 }, { "epoch": 51.64, "learning_rate": 2.418721233830254e-05, "loss": 2.0875, "step": 10424000 }, { "epoch": 51.65, "learning_rate": 2.4185973751876458e-05, "loss": 2.0754, "step": 10424500 }, { "epoch": 51.65, "learning_rate": 2.4184735165450374e-05, "loss": 2.0366, "step": 10425000 }, { "epoch": 51.65, "learning_rate": 2.4183499056197147e-05, "loss": 2.0602, "step": 10425500 }, { "epoch": 51.65, "learning_rate": 2.4182260469771064e-05, "loss": 2.0382, "step": 10426000 }, { "epoch": 51.66, "learning_rate": 2.4181021883344977e-05, "loss": 2.0687, "step": 10426500 }, { "epoch": 51.66, "learning_rate": 2.4179783296918894e-05, "loss": 2.0703, "step": 10427000 }, { "epoch": 51.66, "learning_rate": 2.4178547187665663e-05, "loss": 2.0424, "step": 10427500 }, { "epoch": 51.66, "learning_rate": 2.417730860123958e-05, "loss": 2.0401, "step": 10428000 }, { "epoch": 51.67, "learning_rate": 2.4176074969159197e-05, "loss": 2.0535, "step": 10428500 }, { "epoch": 51.67, "learning_rate": 2.4174836382733114e-05, "loss": 2.0763, "step": 10429000 }, { "epoch": 51.67, "learning_rate": 2.417359779630703e-05, "loss": 2.0517, "step": 10429500 }, { "epoch": 51.67, "learning_rate": 2.4172359209880948e-05, "loss": 2.0684, "step": 10430000 }, { "epoch": 51.68, "learning_rate": 2.4171123100627717e-05, "loss": 2.0769, "step": 10430500 }, { "epoch": 51.68, "learning_rate": 2.4169884514201634e-05, "loss": 2.0275, "step": 10431000 }, { "epoch": 51.68, "learning_rate": 2.416864592777555e-05, "loss": 2.065, "step": 10431500 }, { "epoch": 51.68, "learning_rate": 2.4167407341349464e-05, "loss": 2.0637, "step": 10432000 }, { "epoch": 51.69, "learning_rate": 2.416616875492338e-05, "loss": 2.0629, "step": 10432500 }, { "epoch": 51.69, "learning_rate": 2.4164930168497298e-05, "loss": 2.0512, "step": 10433000 }, { "epoch": 51.69, "learning_rate": 2.4163691582071215e-05, "loss": 2.0536, "step": 10433500 }, { "epoch": 51.69, "learning_rate": 2.4162452995645132e-05, "loss": 2.044, "step": 10434000 }, { "epoch": 51.7, "learning_rate": 2.416121440921905e-05, "loss": 2.0441, "step": 10434500 }, { "epoch": 51.7, "learning_rate": 2.4159975822792962e-05, "loss": 2.0596, "step": 10435000 }, { "epoch": 51.7, "learning_rate": 2.415873723636688e-05, "loss": 2.045, "step": 10435500 }, { "epoch": 51.7, "learning_rate": 2.4157498649940796e-05, "loss": 2.056, "step": 10436000 }, { "epoch": 51.71, "learning_rate": 2.4156260063514713e-05, "loss": 2.0744, "step": 10436500 }, { "epoch": 51.71, "learning_rate": 2.415502147708863e-05, "loss": 2.0485, "step": 10437000 }, { "epoch": 51.71, "learning_rate": 2.4153782890662547e-05, "loss": 2.0387, "step": 10437500 }, { "epoch": 51.71, "learning_rate": 2.4152544304236464e-05, "loss": 2.0663, "step": 10438000 }, { "epoch": 51.72, "learning_rate": 2.415130819498323e-05, "loss": 2.068, "step": 10438500 }, { "epoch": 51.72, "learning_rate": 2.4150069608557146e-05, "loss": 2.0808, "step": 10439000 }, { "epoch": 51.72, "learning_rate": 2.4148833499303915e-05, "loss": 2.0842, "step": 10439500 }, { "epoch": 51.72, "learning_rate": 2.4147594912877832e-05, "loss": 2.086, "step": 10440000 }, { "epoch": 51.73, "learning_rate": 2.414635632645175e-05, "loss": 2.0649, "step": 10440500 }, { "epoch": 51.73, "learning_rate": 2.4145117740025662e-05, "loss": 2.087, "step": 10441000 }, { "epoch": 51.73, "learning_rate": 2.414387915359958e-05, "loss": 2.0699, "step": 10441500 }, { "epoch": 51.73, "learning_rate": 2.4142643044346348e-05, "loss": 2.042, "step": 10442000 }, { "epoch": 51.74, "learning_rate": 2.414140693509312e-05, "loss": 2.0585, "step": 10442500 }, { "epoch": 51.74, "learning_rate": 2.4140168348667034e-05, "loss": 2.0548, "step": 10443000 }, { "epoch": 51.74, "learning_rate": 2.413892976224095e-05, "loss": 2.0827, "step": 10443500 }, { "epoch": 51.74, "learning_rate": 2.4137691175814868e-05, "loss": 2.0667, "step": 10444000 }, { "epoch": 51.75, "learning_rate": 2.4136452589388785e-05, "loss": 2.0684, "step": 10444500 }, { "epoch": 51.75, "learning_rate": 2.41352140029627e-05, "loss": 2.0901, "step": 10445000 }, { "epoch": 51.75, "learning_rate": 2.4133975416536615e-05, "loss": 2.0582, "step": 10445500 }, { "epoch": 51.75, "learning_rate": 2.4132739307283384e-05, "loss": 2.0754, "step": 10446000 }, { "epoch": 51.76, "learning_rate": 2.41315007208573e-05, "loss": 2.069, "step": 10446500 }, { "epoch": 51.76, "learning_rate": 2.4130262134431218e-05, "loss": 2.054, "step": 10447000 }, { "epoch": 51.76, "learning_rate": 2.4129023548005135e-05, "loss": 2.064, "step": 10447500 }, { "epoch": 51.76, "learning_rate": 2.412778496157905e-05, "loss": 2.071, "step": 10448000 }, { "epoch": 51.77, "learning_rate": 2.412654637515297e-05, "loss": 2.0583, "step": 10448500 }, { "epoch": 51.77, "learning_rate": 2.4125310265899734e-05, "loss": 2.0574, "step": 10449000 }, { "epoch": 51.77, "learning_rate": 2.412407167947365e-05, "loss": 2.0484, "step": 10449500 }, { "epoch": 51.77, "learning_rate": 2.4122833093047568e-05, "loss": 2.0379, "step": 10450000 }, { "epoch": 51.78, "learning_rate": 2.4121594506621485e-05, "loss": 2.0612, "step": 10450500 }, { "epoch": 51.78, "learning_rate": 2.4120355920195402e-05, "loss": 2.0541, "step": 10451000 }, { "epoch": 51.78, "learning_rate": 2.411911981094217e-05, "loss": 2.0848, "step": 10451500 }, { "epoch": 51.78, "learning_rate": 2.4117883701688936e-05, "loss": 2.0725, "step": 10452000 }, { "epoch": 51.79, "learning_rate": 2.4116645115262853e-05, "loss": 2.0663, "step": 10452500 }, { "epoch": 51.79, "learning_rate": 2.411540652883677e-05, "loss": 2.0672, "step": 10453000 }, { "epoch": 51.79, "learning_rate": 2.4114167942410687e-05, "loss": 2.0655, "step": 10453500 }, { "epoch": 51.79, "learning_rate": 2.4112931833157456e-05, "loss": 2.0359, "step": 10454000 }, { "epoch": 51.8, "learning_rate": 2.411169324673137e-05, "loss": 2.0547, "step": 10454500 }, { "epoch": 51.8, "learning_rate": 2.4110454660305286e-05, "loss": 2.0479, "step": 10455000 }, { "epoch": 51.8, "learning_rate": 2.4109216073879203e-05, "loss": 2.0608, "step": 10455500 }, { "epoch": 51.8, "learning_rate": 2.410797748745312e-05, "loss": 2.0631, "step": 10456000 }, { "epoch": 51.81, "learning_rate": 2.4106738901027037e-05, "loss": 2.0617, "step": 10456500 }, { "epoch": 51.81, "learning_rate": 2.4105500314600954e-05, "loss": 2.0554, "step": 10457000 }, { "epoch": 51.81, "learning_rate": 2.410426172817487e-05, "loss": 2.0487, "step": 10457500 }, { "epoch": 51.81, "learning_rate": 2.4103023141748788e-05, "loss": 2.0718, "step": 10458000 }, { "epoch": 51.82, "learning_rate": 2.41017845553227e-05, "loss": 2.0444, "step": 10458500 }, { "epoch": 51.82, "learning_rate": 2.4100545968896618e-05, "loss": 2.0678, "step": 10459000 }, { "epoch": 51.82, "learning_rate": 2.4099307382470535e-05, "loss": 2.0404, "step": 10459500 }, { "epoch": 51.82, "learning_rate": 2.4098068796044452e-05, "loss": 2.0347, "step": 10460000 }, { "epoch": 51.82, "learning_rate": 2.409683020961837e-05, "loss": 2.0531, "step": 10460500 }, { "epoch": 51.83, "learning_rate": 2.4095591623192286e-05, "loss": 2.0479, "step": 10461000 }, { "epoch": 51.83, "learning_rate": 2.409435551393905e-05, "loss": 2.0603, "step": 10461500 }, { "epoch": 51.83, "learning_rate": 2.4093116927512968e-05, "loss": 2.0624, "step": 10462000 }, { "epoch": 51.83, "learning_rate": 2.4091878341086885e-05, "loss": 2.0543, "step": 10462500 }, { "epoch": 51.84, "learning_rate": 2.4090639754660802e-05, "loss": 2.0583, "step": 10463000 }, { "epoch": 51.84, "learning_rate": 2.408940116823472e-05, "loss": 2.0448, "step": 10463500 }, { "epoch": 51.84, "learning_rate": 2.4088162581808636e-05, "loss": 2.0646, "step": 10464000 }, { "epoch": 51.84, "learning_rate": 2.4086926472555405e-05, "loss": 2.0368, "step": 10464500 }, { "epoch": 51.85, "learning_rate": 2.4085687886129318e-05, "loss": 2.0469, "step": 10465000 }, { "epoch": 51.85, "learning_rate": 2.4084449299703235e-05, "loss": 2.0579, "step": 10465500 }, { "epoch": 51.85, "learning_rate": 2.408321566762286e-05, "loss": 2.0826, "step": 10466000 }, { "epoch": 51.85, "learning_rate": 2.4081977081196773e-05, "loss": 2.0584, "step": 10466500 }, { "epoch": 51.86, "learning_rate": 2.408073849477069e-05, "loss": 2.0379, "step": 10467000 }, { "epoch": 51.86, "learning_rate": 2.4079499908344607e-05, "loss": 2.0608, "step": 10467500 }, { "epoch": 51.86, "learning_rate": 2.407826132191852e-05, "loss": 2.0393, "step": 10468000 }, { "epoch": 51.86, "learning_rate": 2.4077022735492437e-05, "loss": 2.0516, "step": 10468500 }, { "epoch": 51.87, "learning_rate": 2.4075784149066354e-05, "loss": 2.0314, "step": 10469000 }, { "epoch": 51.87, "learning_rate": 2.407454556264027e-05, "loss": 2.0816, "step": 10469500 }, { "epoch": 51.87, "learning_rate": 2.4073306976214188e-05, "loss": 2.071, "step": 10470000 }, { "epoch": 51.87, "learning_rate": 2.4072070866960957e-05, "loss": 2.0397, "step": 10470500 }, { "epoch": 51.88, "learning_rate": 2.407083228053487e-05, "loss": 2.0976, "step": 10471000 }, { "epoch": 51.88, "learning_rate": 2.4069593694108787e-05, "loss": 2.0573, "step": 10471500 }, { "epoch": 51.88, "learning_rate": 2.4068355107682704e-05, "loss": 2.0537, "step": 10472000 }, { "epoch": 51.88, "learning_rate": 2.406711652125662e-05, "loss": 2.0871, "step": 10472500 }, { "epoch": 51.89, "learning_rate": 2.4065877934830538e-05, "loss": 2.0968, "step": 10473000 }, { "epoch": 51.89, "learning_rate": 2.4064639348404455e-05, "loss": 2.0654, "step": 10473500 }, { "epoch": 51.89, "learning_rate": 2.406340076197837e-05, "loss": 2.0677, "step": 10474000 }, { "epoch": 51.89, "learning_rate": 2.4062164652725137e-05, "loss": 2.0578, "step": 10474500 }, { "epoch": 51.9, "learning_rate": 2.4060926066299054e-05, "loss": 2.0707, "step": 10475000 }, { "epoch": 51.9, "learning_rate": 2.4059689957045826e-05, "loss": 2.0655, "step": 10475500 }, { "epoch": 51.9, "learning_rate": 2.4058453847792592e-05, "loss": 2.0622, "step": 10476000 }, { "epoch": 51.9, "learning_rate": 2.405721526136651e-05, "loss": 2.0664, "step": 10476500 }, { "epoch": 51.91, "learning_rate": 2.4055976674940426e-05, "loss": 2.0633, "step": 10477000 }, { "epoch": 51.91, "learning_rate": 2.4054738088514342e-05, "loss": 2.0599, "step": 10477500 }, { "epoch": 51.91, "learning_rate": 2.405349950208826e-05, "loss": 2.0425, "step": 10478000 }, { "epoch": 51.91, "learning_rate": 2.4052260915662176e-05, "loss": 2.0324, "step": 10478500 }, { "epoch": 51.92, "learning_rate": 2.405102232923609e-05, "loss": 2.0708, "step": 10479000 }, { "epoch": 51.92, "learning_rate": 2.4049783742810007e-05, "loss": 2.0632, "step": 10479500 }, { "epoch": 51.92, "learning_rate": 2.4048545156383924e-05, "loss": 2.0451, "step": 10480000 }, { "epoch": 51.92, "learning_rate": 2.4047309047130693e-05, "loss": 2.06, "step": 10480500 }, { "epoch": 51.93, "learning_rate": 2.404607046070461e-05, "loss": 2.0491, "step": 10481000 }, { "epoch": 51.93, "learning_rate": 2.4044831874278526e-05, "loss": 2.0569, "step": 10481500 }, { "epoch": 51.93, "learning_rate": 2.4043593287852443e-05, "loss": 2.0586, "step": 10482000 }, { "epoch": 51.93, "learning_rate": 2.4042354701426357e-05, "loss": 2.0451, "step": 10482500 }, { "epoch": 51.94, "learning_rate": 2.4041116115000274e-05, "loss": 2.0516, "step": 10483000 }, { "epoch": 51.94, "learning_rate": 2.403987752857419e-05, "loss": 2.0222, "step": 10483500 }, { "epoch": 51.94, "learning_rate": 2.4038638942148108e-05, "loss": 2.0979, "step": 10484000 }, { "epoch": 51.94, "learning_rate": 2.403740035572202e-05, "loss": 2.0677, "step": 10484500 }, { "epoch": 51.95, "learning_rate": 2.4036161769295938e-05, "loss": 2.0646, "step": 10485000 }, { "epoch": 51.95, "learning_rate": 2.4034925660042707e-05, "loss": 2.0583, "step": 10485500 }, { "epoch": 51.95, "learning_rate": 2.4033687073616624e-05, "loss": 2.0312, "step": 10486000 }, { "epoch": 51.95, "learning_rate": 2.403244848719054e-05, "loss": 2.0597, "step": 10486500 }, { "epoch": 51.96, "learning_rate": 2.4031209900764458e-05, "loss": 2.0445, "step": 10487000 }, { "epoch": 51.96, "learning_rate": 2.4029971314338375e-05, "loss": 2.071, "step": 10487500 }, { "epoch": 51.96, "learning_rate": 2.4028737682257992e-05, "loss": 2.0703, "step": 10488000 }, { "epoch": 51.96, "learning_rate": 2.402749909583191e-05, "loss": 2.0525, "step": 10488500 }, { "epoch": 51.97, "learning_rate": 2.4026260509405826e-05, "loss": 2.0643, "step": 10489000 }, { "epoch": 51.97, "learning_rate": 2.4025021922979743e-05, "loss": 2.0541, "step": 10489500 }, { "epoch": 51.97, "learning_rate": 2.402378333655366e-05, "loss": 2.0634, "step": 10490000 }, { "epoch": 51.97, "learning_rate": 2.4022544750127577e-05, "loss": 2.0614, "step": 10490500 }, { "epoch": 51.98, "learning_rate": 2.4021306163701494e-05, "loss": 2.058, "step": 10491000 }, { "epoch": 51.98, "learning_rate": 2.4020067577275407e-05, "loss": 2.068, "step": 10491500 }, { "epoch": 51.98, "learning_rate": 2.401883394519503e-05, "loss": 2.0493, "step": 10492000 }, { "epoch": 51.98, "learning_rate": 2.4017595358768945e-05, "loss": 2.0644, "step": 10492500 }, { "epoch": 51.99, "learning_rate": 2.401635677234286e-05, "loss": 2.0844, "step": 10493000 }, { "epoch": 51.99, "learning_rate": 2.4015118185916775e-05, "loss": 2.0862, "step": 10493500 }, { "epoch": 51.99, "learning_rate": 2.4013879599490692e-05, "loss": 2.0526, "step": 10494000 }, { "epoch": 51.99, "learning_rate": 2.401264101306461e-05, "loss": 2.0731, "step": 10494500 }, { "epoch": 52.0, "learning_rate": 2.4011402426638526e-05, "loss": 2.0629, "step": 10495000 }, { "epoch": 52.0, "learning_rate": 2.4010163840212443e-05, "loss": 2.0426, "step": 10495500 }, { "epoch": 52.0, "eval_accuracy": 0.669845569377203, "eval_accuracy_mlm": 0.6284055277939601, "eval_accuracy_nsp": 0.865425421342255, "eval_loss": 2.3016281127929688, "eval_runtime": 147.0027, "eval_samples_per_second": 1734.384, "eval_steps_per_second": 72.271, "step": 10495836 }, { "epoch": 52.0, "learning_rate": 2.400892525378636e-05, "loss": 2.0483, "step": 10496000 }, { "epoch": 52.0, "learning_rate": 2.4007686667360277e-05, "loss": 2.0356, "step": 10496500 }, { "epoch": 52.01, "learning_rate": 2.4006450558107042e-05, "loss": 2.0346, "step": 10497000 }, { "epoch": 52.01, "learning_rate": 2.400521197168096e-05, "loss": 2.04, "step": 10497500 }, { "epoch": 52.01, "learning_rate": 2.400397586242773e-05, "loss": 2.0476, "step": 10498000 }, { "epoch": 52.01, "learning_rate": 2.4002737276001648e-05, "loss": 2.0315, "step": 10498500 }, { "epoch": 52.02, "learning_rate": 2.4001498689575562e-05, "loss": 2.055, "step": 10499000 }, { "epoch": 52.02, "learning_rate": 2.400026010314948e-05, "loss": 2.0217, "step": 10499500 }, { "epoch": 52.02, "learning_rate": 2.3999021516723396e-05, "loss": 2.0311, "step": 10500000 }, { "epoch": 52.02, "learning_rate": 2.399778293029731e-05, "loss": 2.0361, "step": 10500500 }, { "epoch": 52.03, "learning_rate": 2.399654682104408e-05, "loss": 2.0242, "step": 10501000 }, { "epoch": 52.03, "learning_rate": 2.3995308234617998e-05, "loss": 2.0558, "step": 10501500 }, { "epoch": 52.03, "learning_rate": 2.3994072125364764e-05, "loss": 2.0245, "step": 10502000 }, { "epoch": 52.03, "learning_rate": 2.399283353893868e-05, "loss": 2.0279, "step": 10502500 }, { "epoch": 52.04, "learning_rate": 2.3991594952512598e-05, "loss": 2.036, "step": 10503000 }, { "epoch": 52.04, "learning_rate": 2.3990356366086514e-05, "loss": 2.0294, "step": 10503500 }, { "epoch": 52.04, "learning_rate": 2.398911777966043e-05, "loss": 2.0366, "step": 10504000 }, { "epoch": 52.04, "learning_rate": 2.398787919323435e-05, "loss": 2.0319, "step": 10504500 }, { "epoch": 52.05, "learning_rate": 2.3986640606808265e-05, "loss": 2.0213, "step": 10505000 }, { "epoch": 52.05, "learning_rate": 2.3985402020382182e-05, "loss": 2.0493, "step": 10505500 }, { "epoch": 52.05, "learning_rate": 2.3984163433956096e-05, "loss": 2.0128, "step": 10506000 }, { "epoch": 52.05, "learning_rate": 2.3982924847530013e-05, "loss": 2.0468, "step": 10506500 }, { "epoch": 52.06, "learning_rate": 2.3981686261103926e-05, "loss": 2.025, "step": 10507000 }, { "epoch": 52.06, "learning_rate": 2.3980447674677843e-05, "loss": 2.0414, "step": 10507500 }, { "epoch": 52.06, "learning_rate": 2.397920908825176e-05, "loss": 2.0486, "step": 10508000 }, { "epoch": 52.06, "learning_rate": 2.3977970501825677e-05, "loss": 2.0239, "step": 10508500 }, { "epoch": 52.07, "learning_rate": 2.3976731915399594e-05, "loss": 2.035, "step": 10509000 }, { "epoch": 52.07, "learning_rate": 2.397549332897351e-05, "loss": 2.0252, "step": 10509500 }, { "epoch": 52.07, "learning_rate": 2.3974254742547428e-05, "loss": 2.0389, "step": 10510000 }, { "epoch": 52.07, "learning_rate": 2.3973018633294193e-05, "loss": 2.0486, "step": 10510500 }, { "epoch": 52.08, "learning_rate": 2.397178004686811e-05, "loss": 2.008, "step": 10511000 }, { "epoch": 52.08, "learning_rate": 2.3970541460442027e-05, "loss": 2.0424, "step": 10511500 }, { "epoch": 52.08, "learning_rate": 2.3969302874015944e-05, "loss": 2.0332, "step": 10512000 }, { "epoch": 52.08, "learning_rate": 2.396806428758986e-05, "loss": 2.049, "step": 10512500 }, { "epoch": 52.09, "learning_rate": 2.3966825701163778e-05, "loss": 2.0204, "step": 10513000 }, { "epoch": 52.09, "learning_rate": 2.396558711473769e-05, "loss": 2.0318, "step": 10513500 }, { "epoch": 52.09, "learning_rate": 2.3964348528311608e-05, "loss": 2.0653, "step": 10514000 }, { "epoch": 52.09, "learning_rate": 2.3963112419058377e-05, "loss": 2.0498, "step": 10514500 }, { "epoch": 52.09, "learning_rate": 2.3961873832632294e-05, "loss": 2.0457, "step": 10515000 }, { "epoch": 52.1, "learning_rate": 2.396063524620621e-05, "loss": 2.0341, "step": 10515500 }, { "epoch": 52.1, "learning_rate": 2.3959396659780128e-05, "loss": 2.0232, "step": 10516000 }, { "epoch": 52.1, "learning_rate": 2.3958160550526897e-05, "loss": 2.0227, "step": 10516500 }, { "epoch": 52.1, "learning_rate": 2.395692196410081e-05, "loss": 2.0388, "step": 10517000 }, { "epoch": 52.11, "learning_rate": 2.3955683377674727e-05, "loss": 2.033, "step": 10517500 }, { "epoch": 52.11, "learning_rate": 2.39544472684215e-05, "loss": 2.0227, "step": 10518000 }, { "epoch": 52.11, "learning_rate": 2.3953208681995413e-05, "loss": 2.053, "step": 10518500 }, { "epoch": 52.11, "learning_rate": 2.395197009556933e-05, "loss": 2.0429, "step": 10519000 }, { "epoch": 52.12, "learning_rate": 2.3950731509143247e-05, "loss": 2.0319, "step": 10519500 }, { "epoch": 52.12, "learning_rate": 2.3949492922717164e-05, "loss": 2.053, "step": 10520000 }, { "epoch": 52.12, "learning_rate": 2.3948256813463932e-05, "loss": 2.0348, "step": 10520500 }, { "epoch": 52.12, "learning_rate": 2.394701822703785e-05, "loss": 2.0502, "step": 10521000 }, { "epoch": 52.13, "learning_rate": 2.3945779640611763e-05, "loss": 2.0462, "step": 10521500 }, { "epoch": 52.13, "learning_rate": 2.394454105418568e-05, "loss": 2.0481, "step": 10522000 }, { "epoch": 52.13, "learning_rate": 2.394330494493245e-05, "loss": 2.0233, "step": 10522500 }, { "epoch": 52.13, "learning_rate": 2.3942066358506366e-05, "loss": 2.034, "step": 10523000 }, { "epoch": 52.14, "learning_rate": 2.394083024925313e-05, "loss": 2.0237, "step": 10523500 }, { "epoch": 52.14, "learning_rate": 2.3939591662827048e-05, "loss": 2.0462, "step": 10524000 }, { "epoch": 52.14, "learning_rate": 2.3938353076400965e-05, "loss": 2.0544, "step": 10524500 }, { "epoch": 52.14, "learning_rate": 2.3937114489974882e-05, "loss": 2.0192, "step": 10525000 }, { "epoch": 52.15, "learning_rate": 2.39358759035488e-05, "loss": 2.0304, "step": 10525500 }, { "epoch": 52.15, "learning_rate": 2.3934637317122716e-05, "loss": 2.0817, "step": 10526000 }, { "epoch": 52.15, "learning_rate": 2.3933398730696633e-05, "loss": 2.0416, "step": 10526500 }, { "epoch": 52.15, "learning_rate": 2.393216014427055e-05, "loss": 2.0363, "step": 10527000 }, { "epoch": 52.16, "learning_rate": 2.3930921557844466e-05, "loss": 2.0574, "step": 10527500 }, { "epoch": 52.16, "learning_rate": 2.392968297141838e-05, "loss": 2.0425, "step": 10528000 }, { "epoch": 52.16, "learning_rate": 2.3928444384992297e-05, "loss": 2.0328, "step": 10528500 }, { "epoch": 52.16, "learning_rate": 2.3927205798566214e-05, "loss": 2.0081, "step": 10529000 }, { "epoch": 52.17, "learning_rate": 2.392596721214013e-05, "loss": 2.0488, "step": 10529500 }, { "epoch": 52.17, "learning_rate": 2.39247311028869e-05, "loss": 2.0606, "step": 10530000 }, { "epoch": 52.17, "learning_rate": 2.3923492516460817e-05, "loss": 2.0382, "step": 10530500 }, { "epoch": 52.17, "learning_rate": 2.392225393003473e-05, "loss": 2.0197, "step": 10531000 }, { "epoch": 52.18, "learning_rate": 2.3921015343608647e-05, "loss": 2.047, "step": 10531500 }, { "epoch": 52.18, "learning_rate": 2.3919779234355416e-05, "loss": 2.0377, "step": 10532000 }, { "epoch": 52.18, "learning_rate": 2.3918540647929333e-05, "loss": 2.0344, "step": 10532500 }, { "epoch": 52.18, "learning_rate": 2.391730206150325e-05, "loss": 2.0569, "step": 10533000 }, { "epoch": 52.19, "learning_rate": 2.3916063475077167e-05, "loss": 2.0493, "step": 10533500 }, { "epoch": 52.19, "learning_rate": 2.391482488865108e-05, "loss": 2.0428, "step": 10534000 }, { "epoch": 52.19, "learning_rate": 2.391358877939785e-05, "loss": 2.0595, "step": 10534500 }, { "epoch": 52.19, "learning_rate": 2.3912350192971766e-05, "loss": 2.0283, "step": 10535000 }, { "epoch": 52.2, "learning_rate": 2.3911111606545683e-05, "loss": 2.0564, "step": 10535500 }, { "epoch": 52.2, "learning_rate": 2.39098730201196e-05, "loss": 2.0481, "step": 10536000 }, { "epoch": 52.2, "learning_rate": 2.3908634433693517e-05, "loss": 2.0209, "step": 10536500 }, { "epoch": 52.2, "learning_rate": 2.3907398324440282e-05, "loss": 2.0397, "step": 10537000 }, { "epoch": 52.21, "learning_rate": 2.3906162215187054e-05, "loss": 2.0732, "step": 10537500 }, { "epoch": 52.21, "learning_rate": 2.390492362876097e-05, "loss": 2.045, "step": 10538000 }, { "epoch": 52.21, "learning_rate": 2.3903685042334885e-05, "loss": 2.0241, "step": 10538500 }, { "epoch": 52.21, "learning_rate": 2.39024464559088e-05, "loss": 2.0271, "step": 10539000 }, { "epoch": 52.22, "learning_rate": 2.390121034665557e-05, "loss": 2.0591, "step": 10539500 }, { "epoch": 52.22, "learning_rate": 2.3899971760229487e-05, "loss": 2.0383, "step": 10540000 }, { "epoch": 52.22, "learning_rate": 2.3898733173803404e-05, "loss": 2.0463, "step": 10540500 }, { "epoch": 52.22, "learning_rate": 2.389749458737732e-05, "loss": 2.0603, "step": 10541000 }, { "epoch": 52.23, "learning_rate": 2.3896256000951238e-05, "loss": 2.0558, "step": 10541500 }, { "epoch": 52.23, "learning_rate": 2.3895017414525152e-05, "loss": 2.0377, "step": 10542000 }, { "epoch": 52.23, "learning_rate": 2.389378130527192e-05, "loss": 2.0583, "step": 10542500 }, { "epoch": 52.23, "learning_rate": 2.3892542718845837e-05, "loss": 2.0134, "step": 10543000 }, { "epoch": 52.24, "learning_rate": 2.3891304132419754e-05, "loss": 2.0563, "step": 10543500 }, { "epoch": 52.24, "learning_rate": 2.389006554599367e-05, "loss": 2.0111, "step": 10544000 }, { "epoch": 52.24, "learning_rate": 2.3888826959567588e-05, "loss": 2.0628, "step": 10544500 }, { "epoch": 52.24, "learning_rate": 2.3887590850314354e-05, "loss": 2.0389, "step": 10545000 }, { "epoch": 52.25, "learning_rate": 2.388635226388827e-05, "loss": 2.0314, "step": 10545500 }, { "epoch": 52.25, "learning_rate": 2.3885113677462188e-05, "loss": 2.0494, "step": 10546000 }, { "epoch": 52.25, "learning_rate": 2.3883875091036104e-05, "loss": 2.0315, "step": 10546500 }, { "epoch": 52.25, "learning_rate": 2.388263650461002e-05, "loss": 2.0195, "step": 10547000 }, { "epoch": 52.26, "learning_rate": 2.388139791818394e-05, "loss": 2.0284, "step": 10547500 }, { "epoch": 52.26, "learning_rate": 2.3880159331757855e-05, "loss": 2.0591, "step": 10548000 }, { "epoch": 52.26, "learning_rate": 2.387892074533177e-05, "loss": 2.0531, "step": 10548500 }, { "epoch": 52.26, "learning_rate": 2.3877682158905686e-05, "loss": 2.037, "step": 10549000 }, { "epoch": 52.27, "learning_rate": 2.3876443572479603e-05, "loss": 2.0291, "step": 10549500 }, { "epoch": 52.27, "learning_rate": 2.3875204986053516e-05, "loss": 2.0341, "step": 10550000 }, { "epoch": 52.27, "learning_rate": 2.387396887680029e-05, "loss": 2.058, "step": 10550500 }, { "epoch": 52.27, "learning_rate": 2.3872730290374205e-05, "loss": 2.0281, "step": 10551000 }, { "epoch": 52.28, "learning_rate": 2.3871491703948122e-05, "loss": 2.0618, "step": 10551500 }, { "epoch": 52.28, "learning_rate": 2.3870255594694888e-05, "loss": 2.0574, "step": 10552000 }, { "epoch": 52.28, "learning_rate": 2.3869017008268805e-05, "loss": 2.0619, "step": 10552500 }, { "epoch": 52.28, "learning_rate": 2.386777842184272e-05, "loss": 2.0307, "step": 10553000 }, { "epoch": 52.29, "learning_rate": 2.386653983541664e-05, "loss": 2.0241, "step": 10553500 }, { "epoch": 52.29, "learning_rate": 2.3865301248990555e-05, "loss": 2.0345, "step": 10554000 }, { "epoch": 52.29, "learning_rate": 2.3864062662564472e-05, "loss": 2.0599, "step": 10554500 }, { "epoch": 52.29, "learning_rate": 2.3862824076138386e-05, "loss": 2.047, "step": 10555000 }, { "epoch": 52.3, "learning_rate": 2.3861585489712303e-05, "loss": 2.0418, "step": 10555500 }, { "epoch": 52.3, "learning_rate": 2.386034690328622e-05, "loss": 2.025, "step": 10556000 }, { "epoch": 52.3, "learning_rate": 2.3859108316860133e-05, "loss": 2.0465, "step": 10556500 }, { "epoch": 52.3, "learning_rate": 2.385786973043405e-05, "loss": 2.0574, "step": 10557000 }, { "epoch": 52.31, "learning_rate": 2.3856631144007967e-05, "loss": 2.027, "step": 10557500 }, { "epoch": 52.31, "learning_rate": 2.3855392557581884e-05, "loss": 2.0538, "step": 10558000 }, { "epoch": 52.31, "learning_rate": 2.38541539711558e-05, "loss": 2.0405, "step": 10558500 }, { "epoch": 52.31, "learning_rate": 2.3852915384729714e-05, "loss": 2.0332, "step": 10559000 }, { "epoch": 52.32, "learning_rate": 2.3851679275476487e-05, "loss": 2.0384, "step": 10559500 }, { "epoch": 52.32, "learning_rate": 2.38504406890504e-05, "loss": 2.069, "step": 10560000 }, { "epoch": 52.32, "learning_rate": 2.3849202102624317e-05, "loss": 2.0332, "step": 10560500 }, { "epoch": 52.32, "learning_rate": 2.3847963516198234e-05, "loss": 2.031, "step": 10561000 }, { "epoch": 52.33, "learning_rate": 2.384672492977215e-05, "loss": 2.0311, "step": 10561500 }, { "epoch": 52.33, "learning_rate": 2.3845486343346068e-05, "loss": 2.0557, "step": 10562000 }, { "epoch": 52.33, "learning_rate": 2.384424775691998e-05, "loss": 2.0566, "step": 10562500 }, { "epoch": 52.33, "learning_rate": 2.38430091704939e-05, "loss": 2.0499, "step": 10563000 }, { "epoch": 52.34, "learning_rate": 2.3841770584067815e-05, "loss": 2.0608, "step": 10563500 }, { "epoch": 52.34, "learning_rate": 2.3840531997641732e-05, "loss": 2.075, "step": 10564000 }, { "epoch": 52.34, "learning_rate": 2.38392958883885e-05, "loss": 2.044, "step": 10564500 }, { "epoch": 52.34, "learning_rate": 2.3838057301962418e-05, "loss": 2.0463, "step": 10565000 }, { "epoch": 52.35, "learning_rate": 2.383681871553633e-05, "loss": 2.0506, "step": 10565500 }, { "epoch": 52.35, "learning_rate": 2.383558012911025e-05, "loss": 2.0465, "step": 10566000 }, { "epoch": 52.35, "learning_rate": 2.3834341542684165e-05, "loss": 2.0174, "step": 10566500 }, { "epoch": 52.35, "learning_rate": 2.3833105433430934e-05, "loss": 2.0441, "step": 10567000 }, { "epoch": 52.36, "learning_rate": 2.3831869324177703e-05, "loss": 2.0546, "step": 10567500 }, { "epoch": 52.36, "learning_rate": 2.383063073775162e-05, "loss": 2.0553, "step": 10568000 }, { "epoch": 52.36, "learning_rate": 2.3829392151325537e-05, "loss": 2.0367, "step": 10568500 }, { "epoch": 52.36, "learning_rate": 2.3828153564899454e-05, "loss": 2.0452, "step": 10569000 }, { "epoch": 52.36, "learning_rate": 2.382691497847337e-05, "loss": 2.0409, "step": 10569500 }, { "epoch": 52.37, "learning_rate": 2.3825676392047284e-05, "loss": 2.0329, "step": 10570000 }, { "epoch": 52.37, "learning_rate": 2.38244378056212e-05, "loss": 2.0502, "step": 10570500 }, { "epoch": 52.37, "learning_rate": 2.3823199219195118e-05, "loss": 2.052, "step": 10571000 }, { "epoch": 52.37, "learning_rate": 2.3821963109941887e-05, "loss": 2.047, "step": 10571500 }, { "epoch": 52.38, "learning_rate": 2.3820724523515804e-05, "loss": 2.0581, "step": 10572000 }, { "epoch": 52.38, "learning_rate": 2.381948593708972e-05, "loss": 2.0556, "step": 10572500 }, { "epoch": 52.38, "learning_rate": 2.381824982783649e-05, "loss": 2.0384, "step": 10573000 }, { "epoch": 52.38, "learning_rate": 2.3817011241410403e-05, "loss": 2.0425, "step": 10573500 }, { "epoch": 52.39, "learning_rate": 2.381577265498432e-05, "loss": 2.0529, "step": 10574000 }, { "epoch": 52.39, "learning_rate": 2.381453654573109e-05, "loss": 2.0461, "step": 10574500 }, { "epoch": 52.39, "learning_rate": 2.3813297959305006e-05, "loss": 2.0512, "step": 10575000 }, { "epoch": 52.39, "learning_rate": 2.3812059372878923e-05, "loss": 2.0242, "step": 10575500 }, { "epoch": 52.4, "learning_rate": 2.381082078645284e-05, "loss": 2.0311, "step": 10576000 }, { "epoch": 52.4, "learning_rate": 2.3809582200026757e-05, "loss": 2.0637, "step": 10576500 }, { "epoch": 52.4, "learning_rate": 2.3808346090773522e-05, "loss": 2.041, "step": 10577000 }, { "epoch": 52.4, "learning_rate": 2.380710750434744e-05, "loss": 2.0549, "step": 10577500 }, { "epoch": 52.41, "learning_rate": 2.3805868917921356e-05, "loss": 2.0401, "step": 10578000 }, { "epoch": 52.41, "learning_rate": 2.3804630331495273e-05, "loss": 2.0592, "step": 10578500 }, { "epoch": 52.41, "learning_rate": 2.3803394222242038e-05, "loss": 2.0452, "step": 10579000 }, { "epoch": 52.41, "learning_rate": 2.3802155635815955e-05, "loss": 2.0375, "step": 10579500 }, { "epoch": 52.42, "learning_rate": 2.3800917049389872e-05, "loss": 2.04, "step": 10580000 }, { "epoch": 52.42, "learning_rate": 2.379967846296379e-05, "loss": 2.0463, "step": 10580500 }, { "epoch": 52.42, "learning_rate": 2.3798439876537706e-05, "loss": 2.0457, "step": 10581000 }, { "epoch": 52.42, "learning_rate": 2.3797201290111623e-05, "loss": 2.0572, "step": 10581500 }, { "epoch": 52.43, "learning_rate": 2.379596518085839e-05, "loss": 2.0489, "step": 10582000 }, { "epoch": 52.43, "learning_rate": 2.3794726594432305e-05, "loss": 2.0791, "step": 10582500 }, { "epoch": 52.43, "learning_rate": 2.3793488008006222e-05, "loss": 2.0749, "step": 10583000 }, { "epoch": 52.43, "learning_rate": 2.379224942158014e-05, "loss": 2.0287, "step": 10583500 }, { "epoch": 52.44, "learning_rate": 2.3791010835154056e-05, "loss": 2.0393, "step": 10584000 }, { "epoch": 52.44, "learning_rate": 2.3789774725900825e-05, "loss": 2.028, "step": 10584500 }, { "epoch": 52.44, "learning_rate": 2.3788538616647594e-05, "loss": 2.038, "step": 10585000 }, { "epoch": 52.44, "learning_rate": 2.378730003022151e-05, "loss": 2.0403, "step": 10585500 }, { "epoch": 52.45, "learning_rate": 2.3786061443795427e-05, "loss": 2.0348, "step": 10586000 }, { "epoch": 52.45, "learning_rate": 2.3784822857369344e-05, "loss": 2.0591, "step": 10586500 }, { "epoch": 52.45, "learning_rate": 2.378358427094326e-05, "loss": 2.0548, "step": 10587000 }, { "epoch": 52.45, "learning_rate": 2.3782348161690027e-05, "loss": 2.0647, "step": 10587500 }, { "epoch": 52.46, "learning_rate": 2.3781109575263944e-05, "loss": 2.0316, "step": 10588000 }, { "epoch": 52.46, "learning_rate": 2.377987098883786e-05, "loss": 2.0418, "step": 10588500 }, { "epoch": 52.46, "learning_rate": 2.3778632402411778e-05, "loss": 2.0614, "step": 10589000 }, { "epoch": 52.46, "learning_rate": 2.3777393815985694e-05, "loss": 2.0462, "step": 10589500 }, { "epoch": 52.47, "learning_rate": 2.377615522955961e-05, "loss": 2.0449, "step": 10590000 }, { "epoch": 52.47, "learning_rate": 2.377491664313353e-05, "loss": 2.035, "step": 10590500 }, { "epoch": 52.47, "learning_rate": 2.3773678056707442e-05, "loss": 2.0605, "step": 10591000 }, { "epoch": 52.47, "learning_rate": 2.377243947028136e-05, "loss": 2.047, "step": 10591500 }, { "epoch": 52.48, "learning_rate": 2.3771200883855276e-05, "loss": 2.0544, "step": 10592000 }, { "epoch": 52.48, "learning_rate": 2.376996229742919e-05, "loss": 2.0274, "step": 10592500 }, { "epoch": 52.48, "learning_rate": 2.3768723711003106e-05, "loss": 2.0599, "step": 10593000 }, { "epoch": 52.48, "learning_rate": 2.3767485124577023e-05, "loss": 2.0536, "step": 10593500 }, { "epoch": 52.49, "learning_rate": 2.376624653815094e-05, "loss": 2.0481, "step": 10594000 }, { "epoch": 52.49, "learning_rate": 2.376501042889771e-05, "loss": 2.049, "step": 10594500 }, { "epoch": 52.49, "learning_rate": 2.3763771842471626e-05, "loss": 2.0461, "step": 10595000 }, { "epoch": 52.49, "learning_rate": 2.3762533256045543e-05, "loss": 2.0479, "step": 10595500 }, { "epoch": 52.5, "learning_rate": 2.3761294669619456e-05, "loss": 2.0674, "step": 10596000 }, { "epoch": 52.5, "learning_rate": 2.376005856036623e-05, "loss": 2.0505, "step": 10596500 }, { "epoch": 52.5, "learning_rate": 2.3758819973940145e-05, "loss": 2.0667, "step": 10597000 }, { "epoch": 52.5, "learning_rate": 2.375758138751406e-05, "loss": 2.0454, "step": 10597500 }, { "epoch": 52.51, "learning_rate": 2.3756342801087976e-05, "loss": 2.0765, "step": 10598000 }, { "epoch": 52.51, "learning_rate": 2.3755104214661893e-05, "loss": 2.0556, "step": 10598500 }, { "epoch": 52.51, "learning_rate": 2.375386810540866e-05, "loss": 2.0574, "step": 10599000 }, { "epoch": 52.51, "learning_rate": 2.375262951898258e-05, "loss": 2.0569, "step": 10599500 }, { "epoch": 52.52, "learning_rate": 2.3751390932556495e-05, "loss": 2.0526, "step": 10600000 }, { "epoch": 52.52, "learning_rate": 2.375015234613041e-05, "loss": 2.0644, "step": 10600500 }, { "epoch": 52.52, "learning_rate": 2.3748913759704326e-05, "loss": 2.0397, "step": 10601000 }, { "epoch": 52.52, "learning_rate": 2.3747675173278243e-05, "loss": 2.0482, "step": 10601500 }, { "epoch": 52.53, "learning_rate": 2.374643658685216e-05, "loss": 2.0514, "step": 10602000 }, { "epoch": 52.53, "learning_rate": 2.3745198000426073e-05, "loss": 2.0522, "step": 10602500 }, { "epoch": 52.53, "learning_rate": 2.374395941399999e-05, "loss": 2.0537, "step": 10603000 }, { "epoch": 52.53, "learning_rate": 2.3742720827573907e-05, "loss": 2.0367, "step": 10603500 }, { "epoch": 52.54, "learning_rate": 2.3741482241147824e-05, "loss": 2.0503, "step": 10604000 }, { "epoch": 52.54, "learning_rate": 2.374024365472174e-05, "loss": 2.0556, "step": 10604500 }, { "epoch": 52.54, "learning_rate": 2.373900754546851e-05, "loss": 2.0459, "step": 10605000 }, { "epoch": 52.54, "learning_rate": 2.3737768959042427e-05, "loss": 2.0461, "step": 10605500 }, { "epoch": 52.55, "learning_rate": 2.373653037261634e-05, "loss": 2.0432, "step": 10606000 }, { "epoch": 52.55, "learning_rate": 2.3735291786190257e-05, "loss": 2.0302, "step": 10606500 }, { "epoch": 52.55, "learning_rate": 2.3734053199764174e-05, "loss": 2.0511, "step": 10607000 }, { "epoch": 52.55, "learning_rate": 2.3732817090510943e-05, "loss": 2.0318, "step": 10607500 }, { "epoch": 52.56, "learning_rate": 2.373157850408486e-05, "loss": 2.0569, "step": 10608000 }, { "epoch": 52.56, "learning_rate": 2.3730347349177332e-05, "loss": 2.0572, "step": 10608500 }, { "epoch": 52.56, "learning_rate": 2.372910876275125e-05, "loss": 2.068, "step": 10609000 }, { "epoch": 52.56, "learning_rate": 2.3727870176325166e-05, "loss": 2.0658, "step": 10609500 }, { "epoch": 52.57, "learning_rate": 2.372663158989908e-05, "loss": 2.0494, "step": 10610000 }, { "epoch": 52.57, "learning_rate": 2.3725393003472997e-05, "loss": 2.0521, "step": 10610500 }, { "epoch": 52.57, "learning_rate": 2.3724154417046914e-05, "loss": 2.0319, "step": 10611000 }, { "epoch": 52.57, "learning_rate": 2.372291583062083e-05, "loss": 2.0503, "step": 10611500 }, { "epoch": 52.58, "learning_rate": 2.3721677244194744e-05, "loss": 2.0378, "step": 10612000 }, { "epoch": 52.58, "learning_rate": 2.372043865776866e-05, "loss": 2.0571, "step": 10612500 }, { "epoch": 52.58, "learning_rate": 2.3719200071342578e-05, "loss": 2.0645, "step": 10613000 }, { "epoch": 52.58, "learning_rate": 2.3717961484916495e-05, "loss": 2.0592, "step": 10613500 }, { "epoch": 52.59, "learning_rate": 2.3716722898490412e-05, "loss": 2.0076, "step": 10614000 }, { "epoch": 52.59, "learning_rate": 2.371548431206433e-05, "loss": 2.044, "step": 10614500 }, { "epoch": 52.59, "learning_rate": 2.3714245725638246e-05, "loss": 2.0462, "step": 10615000 }, { "epoch": 52.59, "learning_rate": 2.371300961638501e-05, "loss": 2.0369, "step": 10615500 }, { "epoch": 52.6, "learning_rate": 2.3711773507131783e-05, "loss": 2.0447, "step": 10616000 }, { "epoch": 52.6, "learning_rate": 2.37105349207057e-05, "loss": 2.0223, "step": 10616500 }, { "epoch": 52.6, "learning_rate": 2.3709296334279614e-05, "loss": 2.0583, "step": 10617000 }, { "epoch": 52.6, "learning_rate": 2.370805774785353e-05, "loss": 2.0389, "step": 10617500 }, { "epoch": 52.61, "learning_rate": 2.3706819161427444e-05, "loss": 2.0771, "step": 10618000 }, { "epoch": 52.61, "learning_rate": 2.3705583052174216e-05, "loss": 2.0617, "step": 10618500 }, { "epoch": 52.61, "learning_rate": 2.3704344465748133e-05, "loss": 2.0593, "step": 10619000 }, { "epoch": 52.61, "learning_rate": 2.370310587932205e-05, "loss": 2.0408, "step": 10619500 }, { "epoch": 52.62, "learning_rate": 2.3701867292895967e-05, "loss": 2.0218, "step": 10620000 }, { "epoch": 52.62, "learning_rate": 2.370062870646988e-05, "loss": 2.0419, "step": 10620500 }, { "epoch": 52.62, "learning_rate": 2.3699390120043798e-05, "loss": 2.0243, "step": 10621000 }, { "epoch": 52.62, "learning_rate": 2.369815153361771e-05, "loss": 2.0128, "step": 10621500 }, { "epoch": 52.63, "learning_rate": 2.3696912947191628e-05, "loss": 2.0678, "step": 10622000 }, { "epoch": 52.63, "learning_rate": 2.3695674360765545e-05, "loss": 2.0251, "step": 10622500 }, { "epoch": 52.63, "learning_rate": 2.3694438251512317e-05, "loss": 2.0903, "step": 10623000 }, { "epoch": 52.63, "learning_rate": 2.3693202142259083e-05, "loss": 2.024, "step": 10623500 }, { "epoch": 52.63, "learning_rate": 2.3691963555833e-05, "loss": 2.0556, "step": 10624000 }, { "epoch": 52.64, "learning_rate": 2.369072744657977e-05, "loss": 2.047, "step": 10624500 }, { "epoch": 52.64, "learning_rate": 2.3689488860153685e-05, "loss": 2.0344, "step": 10625000 }, { "epoch": 52.64, "learning_rate": 2.3688250273727602e-05, "loss": 2.0331, "step": 10625500 }, { "epoch": 52.64, "learning_rate": 2.3687011687301516e-05, "loss": 2.0316, "step": 10626000 }, { "epoch": 52.65, "learning_rate": 2.3685773100875433e-05, "loss": 2.0514, "step": 10626500 }, { "epoch": 52.65, "learning_rate": 2.368453451444935e-05, "loss": 2.0611, "step": 10627000 }, { "epoch": 52.65, "learning_rate": 2.3683295928023267e-05, "loss": 2.0468, "step": 10627500 }, { "epoch": 52.65, "learning_rate": 2.3682057341597184e-05, "loss": 2.05, "step": 10628000 }, { "epoch": 52.66, "learning_rate": 2.36808187551711e-05, "loss": 2.0628, "step": 10628500 }, { "epoch": 52.66, "learning_rate": 2.3679580168745017e-05, "loss": 2.0522, "step": 10629000 }, { "epoch": 52.66, "learning_rate": 2.3678344059491783e-05, "loss": 2.0546, "step": 10629500 }, { "epoch": 52.66, "learning_rate": 2.36771054730657e-05, "loss": 2.0316, "step": 10630000 }, { "epoch": 52.67, "learning_rate": 2.3675866886639617e-05, "loss": 2.053, "step": 10630500 }, { "epoch": 52.67, "learning_rate": 2.3674628300213534e-05, "loss": 2.0491, "step": 10631000 }, { "epoch": 52.67, "learning_rate": 2.367338971378745e-05, "loss": 2.0326, "step": 10631500 }, { "epoch": 52.67, "learning_rate": 2.3672151127361368e-05, "loss": 2.0398, "step": 10632000 }, { "epoch": 52.68, "learning_rate": 2.3670912540935284e-05, "loss": 2.0286, "step": 10632500 }, { "epoch": 52.68, "learning_rate": 2.36696739545092e-05, "loss": 2.0456, "step": 10633000 }, { "epoch": 52.68, "learning_rate": 2.3668435368083115e-05, "loss": 2.039, "step": 10633500 }, { "epoch": 52.68, "learning_rate": 2.3667196781657032e-05, "loss": 2.0646, "step": 10634000 }, { "epoch": 52.69, "learning_rate": 2.366595819523095e-05, "loss": 2.0535, "step": 10634500 }, { "epoch": 52.69, "learning_rate": 2.3664719608804862e-05, "loss": 2.0624, "step": 10635000 }, { "epoch": 52.69, "learning_rate": 2.3663483499551635e-05, "loss": 2.0518, "step": 10635500 }, { "epoch": 52.69, "learning_rate": 2.36622473902984e-05, "loss": 2.0583, "step": 10636000 }, { "epoch": 52.7, "learning_rate": 2.3661008803872317e-05, "loss": 2.0478, "step": 10636500 }, { "epoch": 52.7, "learning_rate": 2.3659770217446234e-05, "loss": 2.0248, "step": 10637000 }, { "epoch": 52.7, "learning_rate": 2.365853163102015e-05, "loss": 2.0355, "step": 10637500 }, { "epoch": 52.7, "learning_rate": 2.3657293044594068e-05, "loss": 2.0471, "step": 10638000 }, { "epoch": 52.71, "learning_rate": 2.3656056935340833e-05, "loss": 2.0646, "step": 10638500 }, { "epoch": 52.71, "learning_rate": 2.365481834891475e-05, "loss": 2.055, "step": 10639000 }, { "epoch": 52.71, "learning_rate": 2.3653579762488667e-05, "loss": 2.0442, "step": 10639500 }, { "epoch": 52.71, "learning_rate": 2.3652341176062584e-05, "loss": 2.0366, "step": 10640000 }, { "epoch": 52.72, "learning_rate": 2.36511025896365e-05, "loss": 2.047, "step": 10640500 }, { "epoch": 52.72, "learning_rate": 2.3649864003210418e-05, "loss": 2.0474, "step": 10641000 }, { "epoch": 52.72, "learning_rate": 2.3648625416784335e-05, "loss": 2.0204, "step": 10641500 }, { "epoch": 52.72, "learning_rate": 2.364738683035825e-05, "loss": 2.053, "step": 10642000 }, { "epoch": 52.73, "learning_rate": 2.3646150721105017e-05, "loss": 2.0951, "step": 10642500 }, { "epoch": 52.73, "learning_rate": 2.3644912134678934e-05, "loss": 2.0795, "step": 10643000 }, { "epoch": 52.73, "learning_rate": 2.364367354825285e-05, "loss": 2.0401, "step": 10643500 }, { "epoch": 52.73, "learning_rate": 2.364243743899962e-05, "loss": 2.0502, "step": 10644000 }, { "epoch": 52.74, "learning_rate": 2.3641198852573537e-05, "loss": 2.0318, "step": 10644500 }, { "epoch": 52.74, "learning_rate": 2.363996026614745e-05, "loss": 2.0687, "step": 10645000 }, { "epoch": 52.74, "learning_rate": 2.3638721679721367e-05, "loss": 2.0462, "step": 10645500 }, { "epoch": 52.74, "learning_rate": 2.3637485570468136e-05, "loss": 2.0361, "step": 10646000 }, { "epoch": 52.75, "learning_rate": 2.3636246984042053e-05, "loss": 2.0564, "step": 10646500 }, { "epoch": 52.75, "learning_rate": 2.363500839761597e-05, "loss": 2.0723, "step": 10647000 }, { "epoch": 52.75, "learning_rate": 2.3633769811189887e-05, "loss": 2.0555, "step": 10647500 }, { "epoch": 52.75, "learning_rate": 2.36325312247638e-05, "loss": 2.0325, "step": 10648000 }, { "epoch": 52.76, "learning_rate": 2.3631297592683424e-05, "loss": 2.0641, "step": 10648500 }, { "epoch": 52.76, "learning_rate": 2.363005900625734e-05, "loss": 2.0696, "step": 10649000 }, { "epoch": 52.76, "learning_rate": 2.3628820419831258e-05, "loss": 2.03, "step": 10649500 }, { "epoch": 52.76, "learning_rate": 2.362758183340517e-05, "loss": 2.0504, "step": 10650000 }, { "epoch": 52.77, "learning_rate": 2.362634324697909e-05, "loss": 2.0467, "step": 10650500 }, { "epoch": 52.77, "learning_rate": 2.3625104660553006e-05, "loss": 2.0626, "step": 10651000 }, { "epoch": 52.77, "learning_rate": 2.3623866074126922e-05, "loss": 2.0663, "step": 10651500 }, { "epoch": 52.77, "learning_rate": 2.362262748770084e-05, "loss": 2.0561, "step": 10652000 }, { "epoch": 52.78, "learning_rate": 2.3621388901274756e-05, "loss": 2.0411, "step": 10652500 }, { "epoch": 52.78, "learning_rate": 2.362015031484867e-05, "loss": 2.0616, "step": 10653000 }, { "epoch": 52.78, "learning_rate": 2.3618911728422587e-05, "loss": 2.0422, "step": 10653500 }, { "epoch": 52.78, "learning_rate": 2.3617673141996504e-05, "loss": 2.0532, "step": 10654000 }, { "epoch": 52.79, "learning_rate": 2.3616437032743273e-05, "loss": 2.054, "step": 10654500 }, { "epoch": 52.79, "learning_rate": 2.361519844631719e-05, "loss": 2.0536, "step": 10655000 }, { "epoch": 52.79, "learning_rate": 2.3613959859891106e-05, "loss": 2.0675, "step": 10655500 }, { "epoch": 52.79, "learning_rate": 2.3612721273465023e-05, "loss": 2.08, "step": 10656000 }, { "epoch": 52.8, "learning_rate": 2.3611482687038937e-05, "loss": 2.0482, "step": 10656500 }, { "epoch": 52.8, "learning_rate": 2.3610244100612854e-05, "loss": 2.0624, "step": 10657000 }, { "epoch": 52.8, "learning_rate": 2.3609005514186767e-05, "loss": 2.0393, "step": 10657500 }, { "epoch": 52.8, "learning_rate": 2.3607766927760684e-05, "loss": 2.0705, "step": 10658000 }, { "epoch": 52.81, "learning_rate": 2.36065283413346e-05, "loss": 2.0654, "step": 10658500 }, { "epoch": 52.81, "learning_rate": 2.3605289754908518e-05, "loss": 2.0225, "step": 10659000 }, { "epoch": 52.81, "learning_rate": 2.3604053645655287e-05, "loss": 2.0654, "step": 10659500 }, { "epoch": 52.81, "learning_rate": 2.3602817536402056e-05, "loss": 2.0588, "step": 10660000 }, { "epoch": 52.82, "learning_rate": 2.3601581427148825e-05, "loss": 2.0437, "step": 10660500 }, { "epoch": 52.82, "learning_rate": 2.360034284072274e-05, "loss": 2.0423, "step": 10661000 }, { "epoch": 52.82, "learning_rate": 2.359910673146951e-05, "loss": 2.0598, "step": 10661500 }, { "epoch": 52.82, "learning_rate": 2.3597868145043424e-05, "loss": 2.0419, "step": 10662000 }, { "epoch": 52.83, "learning_rate": 2.359662955861734e-05, "loss": 2.0721, "step": 10662500 }, { "epoch": 52.83, "learning_rate": 2.3595390972191258e-05, "loss": 2.0479, "step": 10663000 }, { "epoch": 52.83, "learning_rate": 2.3594152385765175e-05, "loss": 2.0163, "step": 10663500 }, { "epoch": 52.83, "learning_rate": 2.359291379933909e-05, "loss": 2.0344, "step": 10664000 }, { "epoch": 52.84, "learning_rate": 2.359167521291301e-05, "loss": 2.0427, "step": 10664500 }, { "epoch": 52.84, "learning_rate": 2.3590436626486925e-05, "loss": 2.0485, "step": 10665000 }, { "epoch": 52.84, "learning_rate": 2.358919804006084e-05, "loss": 2.0795, "step": 10665500 }, { "epoch": 52.84, "learning_rate": 2.3587959453634756e-05, "loss": 2.0451, "step": 10666000 }, { "epoch": 52.85, "learning_rate": 2.3586720867208673e-05, "loss": 2.0255, "step": 10666500 }, { "epoch": 52.85, "learning_rate": 2.358548228078259e-05, "loss": 2.0538, "step": 10667000 }, { "epoch": 52.85, "learning_rate": 2.3584243694356507e-05, "loss": 2.0474, "step": 10667500 }, { "epoch": 52.85, "learning_rate": 2.3583005107930424e-05, "loss": 2.0294, "step": 10668000 }, { "epoch": 52.86, "learning_rate": 2.358176652150434e-05, "loss": 2.0627, "step": 10668500 }, { "epoch": 52.86, "learning_rate": 2.3580527935078257e-05, "loss": 2.0427, "step": 10669000 }, { "epoch": 52.86, "learning_rate": 2.3579289348652174e-05, "loss": 2.0363, "step": 10669500 }, { "epoch": 52.86, "learning_rate": 2.3578050762226088e-05, "loss": 2.0662, "step": 10670000 }, { "epoch": 52.87, "learning_rate": 2.357681713014571e-05, "loss": 2.0333, "step": 10670500 }, { "epoch": 52.87, "learning_rate": 2.3575578543719625e-05, "loss": 2.0685, "step": 10671000 }, { "epoch": 52.87, "learning_rate": 2.3574342434466394e-05, "loss": 2.0689, "step": 10671500 }, { "epoch": 52.87, "learning_rate": 2.3573103848040308e-05, "loss": 2.0466, "step": 10672000 }, { "epoch": 52.88, "learning_rate": 2.3571865261614225e-05, "loss": 2.0503, "step": 10672500 }, { "epoch": 52.88, "learning_rate": 2.357062667518814e-05, "loss": 2.0657, "step": 10673000 }, { "epoch": 52.88, "learning_rate": 2.356938808876206e-05, "loss": 2.0521, "step": 10673500 }, { "epoch": 52.88, "learning_rate": 2.3568149502335976e-05, "loss": 2.0508, "step": 10674000 }, { "epoch": 52.89, "learning_rate": 2.3566910915909892e-05, "loss": 2.0591, "step": 10674500 }, { "epoch": 52.89, "learning_rate": 2.3565672329483806e-05, "loss": 2.0325, "step": 10675000 }, { "epoch": 52.89, "learning_rate": 2.3564433743057723e-05, "loss": 2.0438, "step": 10675500 }, { "epoch": 52.89, "learning_rate": 2.356319515663164e-05, "loss": 2.0488, "step": 10676000 }, { "epoch": 52.9, "learning_rate": 2.3561956570205557e-05, "loss": 2.0439, "step": 10676500 }, { "epoch": 52.9, "learning_rate": 2.3560720460952326e-05, "loss": 2.0725, "step": 10677000 }, { "epoch": 52.9, "learning_rate": 2.3559484351699094e-05, "loss": 2.0396, "step": 10677500 }, { "epoch": 52.9, "learning_rate": 2.355824576527301e-05, "loss": 2.0836, "step": 10678000 }, { "epoch": 52.9, "learning_rate": 2.3557007178846925e-05, "loss": 2.0762, "step": 10678500 }, { "epoch": 52.91, "learning_rate": 2.3555771069593697e-05, "loss": 2.026, "step": 10679000 }, { "epoch": 52.91, "learning_rate": 2.3554532483167614e-05, "loss": 2.0487, "step": 10679500 }, { "epoch": 52.91, "learning_rate": 2.3553293896741528e-05, "loss": 2.0431, "step": 10680000 }, { "epoch": 52.91, "learning_rate": 2.3552055310315444e-05, "loss": 2.0527, "step": 10680500 }, { "epoch": 52.92, "learning_rate": 2.355081672388936e-05, "loss": 2.0398, "step": 10681000 }, { "epoch": 52.92, "learning_rate": 2.354957813746328e-05, "loss": 2.0494, "step": 10681500 }, { "epoch": 52.92, "learning_rate": 2.3548339551037192e-05, "loss": 2.0262, "step": 10682000 }, { "epoch": 52.92, "learning_rate": 2.354710096461111e-05, "loss": 2.0695, "step": 10682500 }, { "epoch": 52.93, "learning_rate": 2.3545862378185026e-05, "loss": 2.0272, "step": 10683000 }, { "epoch": 52.93, "learning_rate": 2.3544626268931795e-05, "loss": 2.0502, "step": 10683500 }, { "epoch": 52.93, "learning_rate": 2.354338768250571e-05, "loss": 2.0417, "step": 10684000 }, { "epoch": 52.93, "learning_rate": 2.354214909607963e-05, "loss": 2.0585, "step": 10684500 }, { "epoch": 52.94, "learning_rate": 2.3540910509653545e-05, "loss": 2.0645, "step": 10685000 }, { "epoch": 52.94, "learning_rate": 2.353967192322746e-05, "loss": 2.0706, "step": 10685500 }, { "epoch": 52.94, "learning_rate": 2.3538433336801376e-05, "loss": 2.0268, "step": 10686000 }, { "epoch": 52.94, "learning_rate": 2.3537194750375293e-05, "loss": 2.0266, "step": 10686500 }, { "epoch": 52.95, "learning_rate": 2.3535961118294913e-05, "loss": 2.067, "step": 10687000 }, { "epoch": 52.95, "learning_rate": 2.353472253186883e-05, "loss": 2.0361, "step": 10687500 }, { "epoch": 52.95, "learning_rate": 2.3533483945442747e-05, "loss": 2.0602, "step": 10688000 }, { "epoch": 52.95, "learning_rate": 2.3532245359016664e-05, "loss": 2.0702, "step": 10688500 }, { "epoch": 52.96, "learning_rate": 2.353100677259058e-05, "loss": 2.0491, "step": 10689000 }, { "epoch": 52.96, "learning_rate": 2.3529768186164495e-05, "loss": 2.0665, "step": 10689500 }, { "epoch": 52.96, "learning_rate": 2.352852959973841e-05, "loss": 2.0609, "step": 10690000 }, { "epoch": 52.96, "learning_rate": 2.352729101331233e-05, "loss": 2.0489, "step": 10690500 }, { "epoch": 52.97, "learning_rate": 2.3526052426886245e-05, "loss": 2.0359, "step": 10691000 }, { "epoch": 52.97, "learning_rate": 2.3524813840460162e-05, "loss": 2.0606, "step": 10691500 }, { "epoch": 52.97, "learning_rate": 2.3523575254034076e-05, "loss": 2.0507, "step": 10692000 }, { "epoch": 52.97, "learning_rate": 2.3522336667607993e-05, "loss": 2.0402, "step": 10692500 }, { "epoch": 52.98, "learning_rate": 2.352110055835476e-05, "loss": 2.048, "step": 10693000 }, { "epoch": 52.98, "learning_rate": 2.351986197192868e-05, "loss": 2.0532, "step": 10693500 }, { "epoch": 52.98, "learning_rate": 2.3518623385502596e-05, "loss": 2.0367, "step": 10694000 }, { "epoch": 52.98, "learning_rate": 2.3517384799076512e-05, "loss": 2.055, "step": 10694500 }, { "epoch": 52.99, "learning_rate": 2.351614868982328e-05, "loss": 2.0522, "step": 10695000 }, { "epoch": 52.99, "learning_rate": 2.3514910103397195e-05, "loss": 2.0625, "step": 10695500 }, { "epoch": 52.99, "learning_rate": 2.3513671516971112e-05, "loss": 2.0888, "step": 10696000 }, { "epoch": 52.99, "learning_rate": 2.351243293054503e-05, "loss": 2.0417, "step": 10696500 }, { "epoch": 53.0, "learning_rate": 2.3511194344118946e-05, "loss": 2.0525, "step": 10697000 }, { "epoch": 53.0, "learning_rate": 2.3509958234865714e-05, "loss": 2.0217, "step": 10697500 }, { "epoch": 53.0, "eval_accuracy": 0.6708108464228656, "eval_accuracy_mlm": 0.6293898193705628, "eval_accuracy_nsp": 0.8661196506104903, "eval_loss": 2.2714345455169678, "eval_runtime": 147.3921, "eval_samples_per_second": 1729.8, "eval_steps_per_second": 72.08, "step": 10697679 }, { "epoch": 53.0, "learning_rate": 2.350871964843963e-05, "loss": 2.0401, "step": 10698000 }, { "epoch": 53.0, "learning_rate": 2.3507483539186397e-05, "loss": 2.012, "step": 10698500 }, { "epoch": 53.01, "learning_rate": 2.3506244952760314e-05, "loss": 2.0225, "step": 10699000 }, { "epoch": 53.01, "learning_rate": 2.350500636633423e-05, "loss": 2.0394, "step": 10699500 }, { "epoch": 53.01, "learning_rate": 2.3503767779908148e-05, "loss": 2.0255, "step": 10700000 }, { "epoch": 53.01, "learning_rate": 2.3502531670654916e-05, "loss": 2.0149, "step": 10700500 }, { "epoch": 53.02, "learning_rate": 2.350129308422883e-05, "loss": 2.0301, "step": 10701000 }, { "epoch": 53.02, "learning_rate": 2.3500054497802747e-05, "loss": 2.0398, "step": 10701500 }, { "epoch": 53.02, "learning_rate": 2.3498815911376664e-05, "loss": 2.0852, "step": 10702000 }, { "epoch": 53.02, "learning_rate": 2.349757732495058e-05, "loss": 2.0246, "step": 10702500 }, { "epoch": 53.03, "learning_rate": 2.3496338738524498e-05, "loss": 2.0203, "step": 10703000 }, { "epoch": 53.03, "learning_rate": 2.3495100152098415e-05, "loss": 2.0505, "step": 10703500 }, { "epoch": 53.03, "learning_rate": 2.349386156567233e-05, "loss": 2.0514, "step": 10704000 }, { "epoch": 53.03, "learning_rate": 2.349262297924625e-05, "loss": 2.034, "step": 10704500 }, { "epoch": 53.04, "learning_rate": 2.3491384392820162e-05, "loss": 2.052, "step": 10705000 }, { "epoch": 53.04, "learning_rate": 2.349014580639408e-05, "loss": 2.0308, "step": 10705500 }, { "epoch": 53.04, "learning_rate": 2.3488907219967996e-05, "loss": 2.0173, "step": 10706000 }, { "epoch": 53.04, "learning_rate": 2.3487668633541913e-05, "loss": 2.0379, "step": 10706500 }, { "epoch": 53.05, "learning_rate": 2.348643004711583e-05, "loss": 2.0227, "step": 10707000 }, { "epoch": 53.05, "learning_rate": 2.3485191460689747e-05, "loss": 2.0367, "step": 10707500 }, { "epoch": 53.05, "learning_rate": 2.3483952874263663e-05, "loss": 2.059, "step": 10708000 }, { "epoch": 53.05, "learning_rate": 2.348271676501043e-05, "loss": 2.0316, "step": 10708500 }, { "epoch": 53.06, "learning_rate": 2.3481478178584346e-05, "loss": 2.0334, "step": 10709000 }, { "epoch": 53.06, "learning_rate": 2.3480239592158263e-05, "loss": 1.991, "step": 10709500 }, { "epoch": 53.06, "learning_rate": 2.347900100573218e-05, "loss": 2.0355, "step": 10710000 }, { "epoch": 53.06, "learning_rate": 2.3477762419306097e-05, "loss": 2.009, "step": 10710500 }, { "epoch": 53.07, "learning_rate": 2.3476523832880014e-05, "loss": 2.0195, "step": 10711000 }, { "epoch": 53.07, "learning_rate": 2.347528524645393e-05, "loss": 1.9965, "step": 10711500 }, { "epoch": 53.07, "learning_rate": 2.3474046660027847e-05, "loss": 2.0635, "step": 10712000 }, { "epoch": 53.07, "learning_rate": 2.3472810550774613e-05, "loss": 1.9948, "step": 10712500 }, { "epoch": 53.08, "learning_rate": 2.347157196434853e-05, "loss": 2.026, "step": 10713000 }, { "epoch": 53.08, "learning_rate": 2.3470333377922447e-05, "loss": 2.0343, "step": 10713500 }, { "epoch": 53.08, "learning_rate": 2.3469094791496364e-05, "loss": 2.0295, "step": 10714000 }, { "epoch": 53.08, "learning_rate": 2.346785868224313e-05, "loss": 2.0434, "step": 10714500 }, { "epoch": 53.09, "learning_rate": 2.3466620095817046e-05, "loss": 2.0334, "step": 10715000 }, { "epoch": 53.09, "learning_rate": 2.3465381509390963e-05, "loss": 2.0126, "step": 10715500 }, { "epoch": 53.09, "learning_rate": 2.346414292296488e-05, "loss": 2.0412, "step": 10716000 }, { "epoch": 53.09, "learning_rate": 2.3462904336538797e-05, "loss": 2.0343, "step": 10716500 }, { "epoch": 53.1, "learning_rate": 2.3461665750112714e-05, "loss": 2.0253, "step": 10717000 }, { "epoch": 53.1, "learning_rate": 2.346042716368663e-05, "loss": 2.0362, "step": 10717500 }, { "epoch": 53.1, "learning_rate": 2.3459188577260548e-05, "loss": 2.0447, "step": 10718000 }, { "epoch": 53.1, "learning_rate": 2.345794999083446e-05, "loss": 2.0441, "step": 10718500 }, { "epoch": 53.11, "learning_rate": 2.3456711404408378e-05, "loss": 2.0269, "step": 10719000 }, { "epoch": 53.11, "learning_rate": 2.3455472817982295e-05, "loss": 2.0483, "step": 10719500 }, { "epoch": 53.11, "learning_rate": 2.3454236708729064e-05, "loss": 2.0432, "step": 10720000 }, { "epoch": 53.11, "learning_rate": 2.345299812230298e-05, "loss": 2.0364, "step": 10720500 }, { "epoch": 53.12, "learning_rate": 2.3451759535876898e-05, "loss": 2.0504, "step": 10721000 }, { "epoch": 53.12, "learning_rate": 2.3450520949450814e-05, "loss": 2.0068, "step": 10721500 }, { "epoch": 53.12, "learning_rate": 2.3449282363024728e-05, "loss": 2.0406, "step": 10722000 }, { "epoch": 53.12, "learning_rate": 2.3448046253771497e-05, "loss": 2.0394, "step": 10722500 }, { "epoch": 53.13, "learning_rate": 2.3446810144518266e-05, "loss": 2.0458, "step": 10723000 }, { "epoch": 53.13, "learning_rate": 2.344557155809218e-05, "loss": 2.0294, "step": 10723500 }, { "epoch": 53.13, "learning_rate": 2.3444332971666096e-05, "loss": 2.0598, "step": 10724000 }, { "epoch": 53.13, "learning_rate": 2.3443096862412865e-05, "loss": 2.0389, "step": 10724500 }, { "epoch": 53.14, "learning_rate": 2.3441860753159637e-05, "loss": 2.0127, "step": 10725000 }, { "epoch": 53.14, "learning_rate": 2.344062216673355e-05, "loss": 2.0129, "step": 10725500 }, { "epoch": 53.14, "learning_rate": 2.3439383580307468e-05, "loss": 2.0311, "step": 10726000 }, { "epoch": 53.14, "learning_rate": 2.3438144993881385e-05, "loss": 2.0261, "step": 10726500 }, { "epoch": 53.15, "learning_rate": 2.3436908884628153e-05, "loss": 2.0378, "step": 10727000 }, { "epoch": 53.15, "learning_rate": 2.343567029820207e-05, "loss": 2.0415, "step": 10727500 }, { "epoch": 53.15, "learning_rate": 2.3434431711775987e-05, "loss": 2.0216, "step": 10728000 }, { "epoch": 53.15, "learning_rate": 2.34331931253499e-05, "loss": 2.0386, "step": 10728500 }, { "epoch": 53.16, "learning_rate": 2.3431954538923818e-05, "loss": 2.0311, "step": 10729000 }, { "epoch": 53.16, "learning_rate": 2.3430715952497735e-05, "loss": 2.0361, "step": 10729500 }, { "epoch": 53.16, "learning_rate": 2.342947736607165e-05, "loss": 2.0452, "step": 10730000 }, { "epoch": 53.16, "learning_rate": 2.342823877964557e-05, "loss": 2.0292, "step": 10730500 }, { "epoch": 53.17, "learning_rate": 2.3427000193219485e-05, "loss": 2.0711, "step": 10731000 }, { "epoch": 53.17, "learning_rate": 2.34257616067934e-05, "loss": 2.02, "step": 10731500 }, { "epoch": 53.17, "learning_rate": 2.3424523020367316e-05, "loss": 2.0119, "step": 10732000 }, { "epoch": 53.17, "learning_rate": 2.3423284433941233e-05, "loss": 2.0415, "step": 10732500 }, { "epoch": 53.17, "learning_rate": 2.3422045847515146e-05, "loss": 2.0457, "step": 10733000 }, { "epoch": 53.18, "learning_rate": 2.3420807261089063e-05, "loss": 2.0301, "step": 10733500 }, { "epoch": 53.18, "learning_rate": 2.341956867466298e-05, "loss": 2.0351, "step": 10734000 }, { "epoch": 53.18, "learning_rate": 2.3418330088236897e-05, "loss": 2.0204, "step": 10734500 }, { "epoch": 53.18, "learning_rate": 2.3417091501810814e-05, "loss": 2.0204, "step": 10735000 }, { "epoch": 53.19, "learning_rate": 2.3415855392557583e-05, "loss": 2.0493, "step": 10735500 }, { "epoch": 53.19, "learning_rate": 2.3414616806131496e-05, "loss": 2.0366, "step": 10736000 }, { "epoch": 53.19, "learning_rate": 2.341338069687827e-05, "loss": 2.0401, "step": 10736500 }, { "epoch": 53.19, "learning_rate": 2.3412142110452186e-05, "loss": 2.0354, "step": 10737000 }, { "epoch": 53.2, "learning_rate": 2.3410903524026102e-05, "loss": 2.0449, "step": 10737500 }, { "epoch": 53.2, "learning_rate": 2.3409667414772868e-05, "loss": 2.0463, "step": 10738000 }, { "epoch": 53.2, "learning_rate": 2.3408428828346785e-05, "loss": 2.0664, "step": 10738500 }, { "epoch": 53.2, "learning_rate": 2.3407190241920702e-05, "loss": 2.0465, "step": 10739000 }, { "epoch": 53.21, "learning_rate": 2.340595165549462e-05, "loss": 2.0176, "step": 10739500 }, { "epoch": 53.21, "learning_rate": 2.3404713069068536e-05, "loss": 2.0364, "step": 10740000 }, { "epoch": 53.21, "learning_rate": 2.3403474482642452e-05, "loss": 2.0367, "step": 10740500 }, { "epoch": 53.21, "learning_rate": 2.340223589621637e-05, "loss": 2.0481, "step": 10741000 }, { "epoch": 53.22, "learning_rate": 2.3400997309790283e-05, "loss": 2.0315, "step": 10741500 }, { "epoch": 53.22, "learning_rate": 2.33997587233642e-05, "loss": 2.0354, "step": 10742000 }, { "epoch": 53.22, "learning_rate": 2.3398520136938117e-05, "loss": 2.0032, "step": 10742500 }, { "epoch": 53.22, "learning_rate": 2.339728155051203e-05, "loss": 2.0388, "step": 10743000 }, { "epoch": 53.23, "learning_rate": 2.3396045441258803e-05, "loss": 2.0241, "step": 10743500 }, { "epoch": 53.23, "learning_rate": 2.339480685483272e-05, "loss": 2.0066, "step": 10744000 }, { "epoch": 53.23, "learning_rate": 2.3393568268406636e-05, "loss": 2.0151, "step": 10744500 }, { "epoch": 53.23, "learning_rate": 2.339232968198055e-05, "loss": 2.0386, "step": 10745000 }, { "epoch": 53.24, "learning_rate": 2.3391091095554467e-05, "loss": 2.0141, "step": 10745500 }, { "epoch": 53.24, "learning_rate": 2.338985250912838e-05, "loss": 2.034, "step": 10746000 }, { "epoch": 53.24, "learning_rate": 2.3388613922702297e-05, "loss": 2.0643, "step": 10746500 }, { "epoch": 53.24, "learning_rate": 2.338737781344907e-05, "loss": 2.0455, "step": 10747000 }, { "epoch": 53.25, "learning_rate": 2.3386139227022986e-05, "loss": 2.0344, "step": 10747500 }, { "epoch": 53.25, "learning_rate": 2.3384900640596903e-05, "loss": 2.0441, "step": 10748000 }, { "epoch": 53.25, "learning_rate": 2.3383662054170817e-05, "loss": 2.032, "step": 10748500 }, { "epoch": 53.25, "learning_rate": 2.3382423467744734e-05, "loss": 2.0345, "step": 10749000 }, { "epoch": 53.26, "learning_rate": 2.3381184881318647e-05, "loss": 2.0679, "step": 10749500 }, { "epoch": 53.26, "learning_rate": 2.3379946294892564e-05, "loss": 2.0408, "step": 10750000 }, { "epoch": 53.26, "learning_rate": 2.337870770846648e-05, "loss": 2.0453, "step": 10750500 }, { "epoch": 53.26, "learning_rate": 2.3377471599213253e-05, "loss": 2.0618, "step": 10751000 }, { "epoch": 53.27, "learning_rate": 2.337623548996002e-05, "loss": 2.0487, "step": 10751500 }, { "epoch": 53.27, "learning_rate": 2.3374996903533936e-05, "loss": 2.0392, "step": 10752000 }, { "epoch": 53.27, "learning_rate": 2.3373758317107853e-05, "loss": 2.0532, "step": 10752500 }, { "epoch": 53.27, "learning_rate": 2.337252220785462e-05, "loss": 2.0434, "step": 10753000 }, { "epoch": 53.28, "learning_rate": 2.3371283621428535e-05, "loss": 2.0279, "step": 10753500 }, { "epoch": 53.28, "learning_rate": 2.3370045035002452e-05, "loss": 2.0326, "step": 10754000 }, { "epoch": 53.28, "learning_rate": 2.336880644857637e-05, "loss": 2.0345, "step": 10754500 }, { "epoch": 53.28, "learning_rate": 2.3367570339323138e-05, "loss": 2.0285, "step": 10755000 }, { "epoch": 53.29, "learning_rate": 2.3366331752897055e-05, "loss": 2.0208, "step": 10755500 }, { "epoch": 53.29, "learning_rate": 2.336509316647097e-05, "loss": 2.0287, "step": 10756000 }, { "epoch": 53.29, "learning_rate": 2.336385458004489e-05, "loss": 2.042, "step": 10756500 }, { "epoch": 53.29, "learning_rate": 2.3362618470791657e-05, "loss": 2.0187, "step": 10757000 }, { "epoch": 53.3, "learning_rate": 2.336137988436557e-05, "loss": 2.0322, "step": 10757500 }, { "epoch": 53.3, "learning_rate": 2.3360141297939488e-05, "loss": 2.0458, "step": 10758000 }, { "epoch": 53.3, "learning_rate": 2.3358902711513405e-05, "loss": 2.0508, "step": 10758500 }, { "epoch": 53.3, "learning_rate": 2.335766412508732e-05, "loss": 2.0302, "step": 10759000 }, { "epoch": 53.31, "learning_rate": 2.335642553866124e-05, "loss": 2.0408, "step": 10759500 }, { "epoch": 53.31, "learning_rate": 2.3355186952235152e-05, "loss": 2.0292, "step": 10760000 }, { "epoch": 53.31, "learning_rate": 2.335394836580907e-05, "loss": 2.0396, "step": 10760500 }, { "epoch": 53.31, "learning_rate": 2.3352709779382986e-05, "loss": 2.0319, "step": 10761000 }, { "epoch": 53.32, "learning_rate": 2.3351471192956903e-05, "loss": 2.0568, "step": 10761500 }, { "epoch": 53.32, "learning_rate": 2.335023260653082e-05, "loss": 2.031, "step": 10762000 }, { "epoch": 53.32, "learning_rate": 2.3348994020104737e-05, "loss": 2.0231, "step": 10762500 }, { "epoch": 53.32, "learning_rate": 2.3347757910851502e-05, "loss": 2.0518, "step": 10763000 }, { "epoch": 53.33, "learning_rate": 2.334651932442542e-05, "loss": 2.051, "step": 10763500 }, { "epoch": 53.33, "learning_rate": 2.3345280737999336e-05, "loss": 2.0226, "step": 10764000 }, { "epoch": 53.33, "learning_rate": 2.3344042151573253e-05, "loss": 2.0478, "step": 10764500 }, { "epoch": 53.33, "learning_rate": 2.3342806042320022e-05, "loss": 2.0372, "step": 10765000 }, { "epoch": 53.34, "learning_rate": 2.334156745589394e-05, "loss": 2.009, "step": 10765500 }, { "epoch": 53.34, "learning_rate": 2.3340328869467852e-05, "loss": 2.0353, "step": 10766000 }, { "epoch": 53.34, "learning_rate": 2.333909028304177e-05, "loss": 2.0465, "step": 10766500 }, { "epoch": 53.34, "learning_rate": 2.333785417378854e-05, "loss": 2.0233, "step": 10767000 }, { "epoch": 53.35, "learning_rate": 2.3336615587362455e-05, "loss": 2.0439, "step": 10767500 }, { "epoch": 53.35, "learning_rate": 2.3335379478109224e-05, "loss": 2.0257, "step": 10768000 }, { "epoch": 53.35, "learning_rate": 2.333414089168314e-05, "loss": 2.0665, "step": 10768500 }, { "epoch": 53.35, "learning_rate": 2.3332902305257058e-05, "loss": 2.0453, "step": 10769000 }, { "epoch": 53.36, "learning_rate": 2.3331663718830975e-05, "loss": 2.0545, "step": 10769500 }, { "epoch": 53.36, "learning_rate": 2.333042513240489e-05, "loss": 2.0615, "step": 10770000 }, { "epoch": 53.36, "learning_rate": 2.332918654597881e-05, "loss": 2.034, "step": 10770500 }, { "epoch": 53.36, "learning_rate": 2.3327947959552722e-05, "loss": 2.0218, "step": 10771000 }, { "epoch": 53.37, "learning_rate": 2.332671185029949e-05, "loss": 2.0637, "step": 10771500 }, { "epoch": 53.37, "learning_rate": 2.332547574104626e-05, "loss": 2.0326, "step": 10772000 }, { "epoch": 53.37, "learning_rate": 2.3324237154620176e-05, "loss": 2.0741, "step": 10772500 }, { "epoch": 53.37, "learning_rate": 2.3322998568194093e-05, "loss": 2.0416, "step": 10773000 }, { "epoch": 53.38, "learning_rate": 2.332175998176801e-05, "loss": 2.0234, "step": 10773500 }, { "epoch": 53.38, "learning_rate": 2.3320521395341927e-05, "loss": 2.0399, "step": 10774000 }, { "epoch": 53.38, "learning_rate": 2.3319285286088693e-05, "loss": 2.0498, "step": 10774500 }, { "epoch": 53.38, "learning_rate": 2.331804669966261e-05, "loss": 2.0525, "step": 10775000 }, { "epoch": 53.39, "learning_rate": 2.3316808113236527e-05, "loss": 2.0079, "step": 10775500 }, { "epoch": 53.39, "learning_rate": 2.3315569526810443e-05, "loss": 2.0144, "step": 10776000 }, { "epoch": 53.39, "learning_rate": 2.331433094038436e-05, "loss": 2.0592, "step": 10776500 }, { "epoch": 53.39, "learning_rate": 2.3313092353958277e-05, "loss": 2.0101, "step": 10777000 }, { "epoch": 53.4, "learning_rate": 2.331185376753219e-05, "loss": 2.0445, "step": 10777500 }, { "epoch": 53.4, "learning_rate": 2.3310615181106108e-05, "loss": 2.0544, "step": 10778000 }, { "epoch": 53.4, "learning_rate": 2.3309376594680025e-05, "loss": 2.0099, "step": 10778500 }, { "epoch": 53.4, "learning_rate": 2.330813800825394e-05, "loss": 2.0227, "step": 10779000 }, { "epoch": 53.41, "learning_rate": 2.330689942182786e-05, "loss": 2.0605, "step": 10779500 }, { "epoch": 53.41, "learning_rate": 2.3305660835401775e-05, "loss": 2.061, "step": 10780000 }, { "epoch": 53.41, "learning_rate": 2.3304422248975692e-05, "loss": 2.05, "step": 10780500 }, { "epoch": 53.41, "learning_rate": 2.3303183662549606e-05, "loss": 2.044, "step": 10781000 }, { "epoch": 53.42, "learning_rate": 2.3301945076123523e-05, "loss": 2.0305, "step": 10781500 }, { "epoch": 53.42, "learning_rate": 2.3300706489697436e-05, "loss": 2.055, "step": 10782000 }, { "epoch": 53.42, "learning_rate": 2.329947038044421e-05, "loss": 2.0284, "step": 10782500 }, { "epoch": 53.42, "learning_rate": 2.3298231794018126e-05, "loss": 2.0299, "step": 10783000 }, { "epoch": 53.43, "learning_rate": 2.3296998161937743e-05, "loss": 2.0426, "step": 10783500 }, { "epoch": 53.43, "learning_rate": 2.329575957551166e-05, "loss": 2.0296, "step": 10784000 }, { "epoch": 53.43, "learning_rate": 2.3294520989085577e-05, "loss": 2.0394, "step": 10784500 }, { "epoch": 53.43, "learning_rate": 2.3293282402659494e-05, "loss": 2.0457, "step": 10785000 }, { "epoch": 53.44, "learning_rate": 2.3292046293406262e-05, "loss": 2.0285, "step": 10785500 }, { "epoch": 53.44, "learning_rate": 2.329080770698018e-05, "loss": 2.0525, "step": 10786000 }, { "epoch": 53.44, "learning_rate": 2.3289569120554093e-05, "loss": 2.0526, "step": 10786500 }, { "epoch": 53.44, "learning_rate": 2.328833053412801e-05, "loss": 2.0523, "step": 10787000 }, { "epoch": 53.45, "learning_rate": 2.3287091947701927e-05, "loss": 2.0306, "step": 10787500 }, { "epoch": 53.45, "learning_rate": 2.3285853361275844e-05, "loss": 2.0517, "step": 10788000 }, { "epoch": 53.45, "learning_rate": 2.328461477484976e-05, "loss": 2.0338, "step": 10788500 }, { "epoch": 53.45, "learning_rate": 2.3283376188423678e-05, "loss": 2.0374, "step": 10789000 }, { "epoch": 53.45, "learning_rate": 2.3282137601997594e-05, "loss": 2.0341, "step": 10789500 }, { "epoch": 53.46, "learning_rate": 2.3280899015571508e-05, "loss": 2.0276, "step": 10790000 }, { "epoch": 53.46, "learning_rate": 2.3279660429145425e-05, "loss": 2.0436, "step": 10790500 }, { "epoch": 53.46, "learning_rate": 2.3278424319892194e-05, "loss": 2.061, "step": 10791000 }, { "epoch": 53.46, "learning_rate": 2.327718573346611e-05, "loss": 2.0499, "step": 10791500 }, { "epoch": 53.47, "learning_rate": 2.3275947147040028e-05, "loss": 2.0357, "step": 10792000 }, { "epoch": 53.47, "learning_rate": 2.3274708560613945e-05, "loss": 2.0327, "step": 10792500 }, { "epoch": 53.47, "learning_rate": 2.3273469974187858e-05, "loss": 2.0459, "step": 10793000 }, { "epoch": 53.47, "learning_rate": 2.3272231387761775e-05, "loss": 2.0428, "step": 10793500 }, { "epoch": 53.48, "learning_rate": 2.3270992801335692e-05, "loss": 2.0415, "step": 10794000 }, { "epoch": 53.48, "learning_rate": 2.326975421490961e-05, "loss": 2.0099, "step": 10794500 }, { "epoch": 53.48, "learning_rate": 2.3268515628483526e-05, "loss": 2.047, "step": 10795000 }, { "epoch": 53.48, "learning_rate": 2.3267277042057443e-05, "loss": 2.0306, "step": 10795500 }, { "epoch": 53.49, "learning_rate": 2.326603845563136e-05, "loss": 2.0259, "step": 10796000 }, { "epoch": 53.49, "learning_rate": 2.3264802346378125e-05, "loss": 2.0269, "step": 10796500 }, { "epoch": 53.49, "learning_rate": 2.3263563759952042e-05, "loss": 2.0399, "step": 10797000 }, { "epoch": 53.49, "learning_rate": 2.326232517352596e-05, "loss": 2.0352, "step": 10797500 }, { "epoch": 53.5, "learning_rate": 2.3261089064272728e-05, "loss": 2.04, "step": 10798000 }, { "epoch": 53.5, "learning_rate": 2.3259850477846645e-05, "loss": 2.023, "step": 10798500 }, { "epoch": 53.5, "learning_rate": 2.325861189142056e-05, "loss": 2.0505, "step": 10799000 }, { "epoch": 53.5, "learning_rate": 2.3257373304994475e-05, "loss": 2.0509, "step": 10799500 }, { "epoch": 53.51, "learning_rate": 2.3256134718568392e-05, "loss": 2.0347, "step": 10800000 }, { "epoch": 53.51, "learning_rate": 2.325489613214231e-05, "loss": 2.0322, "step": 10800500 }, { "epoch": 53.51, "learning_rate": 2.3253660022889078e-05, "loss": 2.0411, "step": 10801000 }, { "epoch": 53.51, "learning_rate": 2.3252421436462995e-05, "loss": 2.0627, "step": 10801500 }, { "epoch": 53.52, "learning_rate": 2.325118285003691e-05, "loss": 2.0274, "step": 10802000 }, { "epoch": 53.52, "learning_rate": 2.3249944263610825e-05, "loss": 2.0542, "step": 10802500 }, { "epoch": 53.52, "learning_rate": 2.3248705677184742e-05, "loss": 2.0489, "step": 10803000 }, { "epoch": 53.52, "learning_rate": 2.324746709075866e-05, "loss": 2.0284, "step": 10803500 }, { "epoch": 53.53, "learning_rate": 2.3246228504332576e-05, "loss": 2.0195, "step": 10804000 }, { "epoch": 53.53, "learning_rate": 2.3244992395079345e-05, "loss": 2.0353, "step": 10804500 }, { "epoch": 53.53, "learning_rate": 2.3243758762998965e-05, "loss": 2.0178, "step": 10805000 }, { "epoch": 53.53, "learning_rate": 2.3242520176572882e-05, "loss": 2.0247, "step": 10805500 }, { "epoch": 53.54, "learning_rate": 2.32412815901468e-05, "loss": 2.0503, "step": 10806000 }, { "epoch": 53.54, "learning_rate": 2.3240043003720716e-05, "loss": 2.0379, "step": 10806500 }, { "epoch": 53.54, "learning_rate": 2.3238804417294633e-05, "loss": 2.055, "step": 10807000 }, { "epoch": 53.54, "learning_rate": 2.3237565830868547e-05, "loss": 2.0269, "step": 10807500 }, { "epoch": 53.55, "learning_rate": 2.3236327244442464e-05, "loss": 2.0456, "step": 10808000 }, { "epoch": 53.55, "learning_rate": 2.323508865801638e-05, "loss": 2.0449, "step": 10808500 }, { "epoch": 53.55, "learning_rate": 2.3233850071590298e-05, "loss": 2.0237, "step": 10809000 }, { "epoch": 53.55, "learning_rate": 2.3232611485164214e-05, "loss": 2.038, "step": 10809500 }, { "epoch": 53.56, "learning_rate": 2.3231372898738128e-05, "loss": 2.0417, "step": 10810000 }, { "epoch": 53.56, "learning_rate": 2.3230134312312045e-05, "loss": 2.027, "step": 10810500 }, { "epoch": 53.56, "learning_rate": 2.3228895725885962e-05, "loss": 2.0322, "step": 10811000 }, { "epoch": 53.56, "learning_rate": 2.322765713945988e-05, "loss": 2.0674, "step": 10811500 }, { "epoch": 53.57, "learning_rate": 2.3226418553033792e-05, "loss": 2.0541, "step": 10812000 }, { "epoch": 53.57, "learning_rate": 2.322517996660771e-05, "loss": 2.049, "step": 10812500 }, { "epoch": 53.57, "learning_rate": 2.3223941380181626e-05, "loss": 2.0465, "step": 10813000 }, { "epoch": 53.57, "learning_rate": 2.3222705270928395e-05, "loss": 2.0368, "step": 10813500 }, { "epoch": 53.58, "learning_rate": 2.3221466684502312e-05, "loss": 2.0302, "step": 10814000 }, { "epoch": 53.58, "learning_rate": 2.322022809807623e-05, "loss": 2.0117, "step": 10814500 }, { "epoch": 53.58, "learning_rate": 2.3218989511650142e-05, "loss": 2.0148, "step": 10815000 }, { "epoch": 53.58, "learning_rate": 2.321775092522406e-05, "loss": 2.0237, "step": 10815500 }, { "epoch": 53.59, "learning_rate": 2.3216512338797976e-05, "loss": 2.0557, "step": 10816000 }, { "epoch": 53.59, "learning_rate": 2.3215273752371893e-05, "loss": 2.0249, "step": 10816500 }, { "epoch": 53.59, "learning_rate": 2.321403516594581e-05, "loss": 2.0205, "step": 10817000 }, { "epoch": 53.59, "learning_rate": 2.321279905669258e-05, "loss": 2.0357, "step": 10817500 }, { "epoch": 53.6, "learning_rate": 2.3211560470266492e-05, "loss": 2.0274, "step": 10818000 }, { "epoch": 53.6, "learning_rate": 2.3210324361013265e-05, "loss": 2.0397, "step": 10818500 }, { "epoch": 53.6, "learning_rate": 2.320908577458718e-05, "loss": 2.0306, "step": 10819000 }, { "epoch": 53.6, "learning_rate": 2.320784966533395e-05, "loss": 2.0326, "step": 10819500 }, { "epoch": 53.61, "learning_rate": 2.3206611078907864e-05, "loss": 2.0414, "step": 10820000 }, { "epoch": 53.61, "learning_rate": 2.3205374969654633e-05, "loss": 2.0485, "step": 10820500 }, { "epoch": 53.61, "learning_rate": 2.320413638322855e-05, "loss": 2.0461, "step": 10821000 }, { "epoch": 53.61, "learning_rate": 2.3202897796802467e-05, "loss": 2.011, "step": 10821500 }, { "epoch": 53.62, "learning_rate": 2.3201659210376384e-05, "loss": 2.0659, "step": 10822000 }, { "epoch": 53.62, "learning_rate": 2.32004206239503e-05, "loss": 2.0538, "step": 10822500 }, { "epoch": 53.62, "learning_rate": 2.3199182037524214e-05, "loss": 2.0586, "step": 10823000 }, { "epoch": 53.62, "learning_rate": 2.319794345109813e-05, "loss": 2.026, "step": 10823500 }, { "epoch": 53.63, "learning_rate": 2.3196704864672048e-05, "loss": 2.046, "step": 10824000 }, { "epoch": 53.63, "learning_rate": 2.3195466278245965e-05, "loss": 2.0303, "step": 10824500 }, { "epoch": 53.63, "learning_rate": 2.3194227691819882e-05, "loss": 2.0464, "step": 10825000 }, { "epoch": 53.63, "learning_rate": 2.31929891053938e-05, "loss": 2.0666, "step": 10825500 }, { "epoch": 53.64, "learning_rate": 2.3191750518967716e-05, "loss": 2.0378, "step": 10826000 }, { "epoch": 53.64, "learning_rate": 2.319051440971448e-05, "loss": 2.0489, "step": 10826500 }, { "epoch": 53.64, "learning_rate": 2.3189275823288398e-05, "loss": 2.0381, "step": 10827000 }, { "epoch": 53.64, "learning_rate": 2.3188039714035167e-05, "loss": 2.0173, "step": 10827500 }, { "epoch": 53.65, "learning_rate": 2.3186801127609084e-05, "loss": 2.048, "step": 10828000 }, { "epoch": 53.65, "learning_rate": 2.3185562541183e-05, "loss": 2.045, "step": 10828500 }, { "epoch": 53.65, "learning_rate": 2.3184323954756917e-05, "loss": 2.0438, "step": 10829000 }, { "epoch": 53.65, "learning_rate": 2.318308536833083e-05, "loss": 2.0225, "step": 10829500 }, { "epoch": 53.66, "learning_rate": 2.3181846781904748e-05, "loss": 2.0348, "step": 10830000 }, { "epoch": 53.66, "learning_rate": 2.3180608195478665e-05, "loss": 2.0403, "step": 10830500 }, { "epoch": 53.66, "learning_rate": 2.3179369609052582e-05, "loss": 2.0241, "step": 10831000 }, { "epoch": 53.66, "learning_rate": 2.31781310226265e-05, "loss": 2.0532, "step": 10831500 }, { "epoch": 53.67, "learning_rate": 2.3176892436200416e-05, "loss": 2.0485, "step": 10832000 }, { "epoch": 53.67, "learning_rate": 2.317565632694718e-05, "loss": 2.0649, "step": 10832500 }, { "epoch": 53.67, "learning_rate": 2.3174417740521098e-05, "loss": 2.0438, "step": 10833000 }, { "epoch": 53.67, "learning_rate": 2.3173179154095015e-05, "loss": 2.0217, "step": 10833500 }, { "epoch": 53.68, "learning_rate": 2.3171940567668932e-05, "loss": 2.0486, "step": 10834000 }, { "epoch": 53.68, "learning_rate": 2.317070198124285e-05, "loss": 2.0474, "step": 10834500 }, { "epoch": 53.68, "learning_rate": 2.3169465871989618e-05, "loss": 2.0389, "step": 10835000 }, { "epoch": 53.68, "learning_rate": 2.316822728556353e-05, "loss": 2.0365, "step": 10835500 }, { "epoch": 53.69, "learning_rate": 2.31669911763103e-05, "loss": 2.0234, "step": 10836000 }, { "epoch": 53.69, "learning_rate": 2.3165755067057072e-05, "loss": 2.0328, "step": 10836500 }, { "epoch": 53.69, "learning_rate": 2.316451648063099e-05, "loss": 2.0373, "step": 10837000 }, { "epoch": 53.69, "learning_rate": 2.3163277894204903e-05, "loss": 2.0428, "step": 10837500 }, { "epoch": 53.7, "learning_rate": 2.316203930777882e-05, "loss": 2.0217, "step": 10838000 }, { "epoch": 53.7, "learning_rate": 2.3160800721352736e-05, "loss": 2.0352, "step": 10838500 }, { "epoch": 53.7, "learning_rate": 2.315956213492665e-05, "loss": 2.0401, "step": 10839000 }, { "epoch": 53.7, "learning_rate": 2.3158323548500567e-05, "loss": 2.0369, "step": 10839500 }, { "epoch": 53.71, "learning_rate": 2.3157084962074484e-05, "loss": 2.056, "step": 10840000 }, { "epoch": 53.71, "learning_rate": 2.31558463756484e-05, "loss": 2.0463, "step": 10840500 }, { "epoch": 53.71, "learning_rate": 2.3154607789222318e-05, "loss": 2.0388, "step": 10841000 }, { "epoch": 53.71, "learning_rate": 2.3153369202796235e-05, "loss": 2.0505, "step": 10841500 }, { "epoch": 53.72, "learning_rate": 2.3152130616370148e-05, "loss": 2.062, "step": 10842000 }, { "epoch": 53.72, "learning_rate": 2.3150894507116917e-05, "loss": 2.0421, "step": 10842500 }, { "epoch": 53.72, "learning_rate": 2.3149655920690834e-05, "loss": 2.0393, "step": 10843000 }, { "epoch": 53.72, "learning_rate": 2.314841733426475e-05, "loss": 2.0326, "step": 10843500 }, { "epoch": 53.72, "learning_rate": 2.3147178747838668e-05, "loss": 2.0536, "step": 10844000 }, { "epoch": 53.73, "learning_rate": 2.3145940161412585e-05, "loss": 2.0526, "step": 10844500 }, { "epoch": 53.73, "learning_rate": 2.3144701574986498e-05, "loss": 2.0511, "step": 10845000 }, { "epoch": 53.73, "learning_rate": 2.3143462988560415e-05, "loss": 2.0795, "step": 10845500 }, { "epoch": 53.73, "learning_rate": 2.3142224402134332e-05, "loss": 2.0497, "step": 10846000 }, { "epoch": 53.74, "learning_rate": 2.314098581570825e-05, "loss": 2.0357, "step": 10846500 }, { "epoch": 53.74, "learning_rate": 2.3139747229282166e-05, "loss": 2.056, "step": 10847000 }, { "epoch": 53.74, "learning_rate": 2.3138508642856083e-05, "loss": 2.0287, "step": 10847500 }, { "epoch": 53.74, "learning_rate": 2.313727005643e-05, "loss": 2.0368, "step": 10848000 }, { "epoch": 53.75, "learning_rate": 2.3136033947176765e-05, "loss": 2.0336, "step": 10848500 }, { "epoch": 53.75, "learning_rate": 2.3134797837923537e-05, "loss": 2.0332, "step": 10849000 }, { "epoch": 53.75, "learning_rate": 2.313355925149745e-05, "loss": 2.028, "step": 10849500 }, { "epoch": 53.75, "learning_rate": 2.3132320665071368e-05, "loss": 2.0381, "step": 10850000 }, { "epoch": 53.76, "learning_rate": 2.3131082078645285e-05, "loss": 2.0663, "step": 10850500 }, { "epoch": 53.76, "learning_rate": 2.3129843492219202e-05, "loss": 2.0448, "step": 10851000 }, { "epoch": 53.76, "learning_rate": 2.3128604905793115e-05, "loss": 2.0365, "step": 10851500 }, { "epoch": 53.76, "learning_rate": 2.3127366319367032e-05, "loss": 2.0497, "step": 10852000 }, { "epoch": 53.77, "learning_rate": 2.3126130210113804e-05, "loss": 2.0521, "step": 10852500 }, { "epoch": 53.77, "learning_rate": 2.3124891623687718e-05, "loss": 2.0581, "step": 10853000 }, { "epoch": 53.77, "learning_rate": 2.3123655514434487e-05, "loss": 2.0204, "step": 10853500 }, { "epoch": 53.77, "learning_rate": 2.3122416928008404e-05, "loss": 2.0152, "step": 10854000 }, { "epoch": 53.78, "learning_rate": 2.312117834158232e-05, "loss": 2.041, "step": 10854500 }, { "epoch": 53.78, "learning_rate": 2.3119939755156238e-05, "loss": 2.0482, "step": 10855000 }, { "epoch": 53.78, "learning_rate": 2.3118701168730155e-05, "loss": 2.0571, "step": 10855500 }, { "epoch": 53.78, "learning_rate": 2.3117462582304068e-05, "loss": 2.06, "step": 10856000 }, { "epoch": 53.79, "learning_rate": 2.3116223995877985e-05, "loss": 2.012, "step": 10856500 }, { "epoch": 53.79, "learning_rate": 2.3114987886624754e-05, "loss": 2.0524, "step": 10857000 }, { "epoch": 53.79, "learning_rate": 2.311374930019867e-05, "loss": 2.0207, "step": 10857500 }, { "epoch": 53.79, "learning_rate": 2.3112510713772588e-05, "loss": 2.0371, "step": 10858000 }, { "epoch": 53.8, "learning_rate": 2.3111272127346505e-05, "loss": 2.0479, "step": 10858500 }, { "epoch": 53.8, "learning_rate": 2.311003354092042e-05, "loss": 2.0464, "step": 10859000 }, { "epoch": 53.8, "learning_rate": 2.3108794954494335e-05, "loss": 2.0388, "step": 10859500 }, { "epoch": 53.8, "learning_rate": 2.3107556368068252e-05, "loss": 2.0571, "step": 10860000 }, { "epoch": 53.81, "learning_rate": 2.3106317781642166e-05, "loss": 2.045, "step": 10860500 }, { "epoch": 53.81, "learning_rate": 2.3105079195216082e-05, "loss": 2.0309, "step": 10861000 }, { "epoch": 53.81, "learning_rate": 2.310384060879e-05, "loss": 2.0333, "step": 10861500 }, { "epoch": 53.81, "learning_rate": 2.310260449953677e-05, "loss": 2.0415, "step": 10862000 }, { "epoch": 53.82, "learning_rate": 2.3101368390283537e-05, "loss": 2.0313, "step": 10862500 }, { "epoch": 53.82, "learning_rate": 2.3100129803857454e-05, "loss": 2.0435, "step": 10863000 }, { "epoch": 53.82, "learning_rate": 2.309889121743137e-05, "loss": 2.0437, "step": 10863500 }, { "epoch": 53.82, "learning_rate": 2.3097652631005288e-05, "loss": 2.0687, "step": 10864000 }, { "epoch": 53.83, "learning_rate": 2.3096414044579205e-05, "loss": 2.0387, "step": 10864500 }, { "epoch": 53.83, "learning_rate": 2.309517545815312e-05, "loss": 2.0307, "step": 10865000 }, { "epoch": 53.83, "learning_rate": 2.309393687172704e-05, "loss": 2.0538, "step": 10865500 }, { "epoch": 53.83, "learning_rate": 2.3092698285300955e-05, "loss": 2.0418, "step": 10866000 }, { "epoch": 53.84, "learning_rate": 2.309146217604772e-05, "loss": 2.028, "step": 10866500 }, { "epoch": 53.84, "learning_rate": 2.309022606679449e-05, "loss": 2.0294, "step": 10867000 }, { "epoch": 53.84, "learning_rate": 2.3088987480368407e-05, "loss": 2.0504, "step": 10867500 }, { "epoch": 53.84, "learning_rate": 2.3087748893942324e-05, "loss": 2.0282, "step": 10868000 }, { "epoch": 53.85, "learning_rate": 2.308651030751624e-05, "loss": 2.0602, "step": 10868500 }, { "epoch": 53.85, "learning_rate": 2.3085271721090154e-05, "loss": 2.0406, "step": 10869000 }, { "epoch": 53.85, "learning_rate": 2.308403313466407e-05, "loss": 2.0578, "step": 10869500 }, { "epoch": 53.85, "learning_rate": 2.3082794548237988e-05, "loss": 2.0714, "step": 10870000 }, { "epoch": 53.86, "learning_rate": 2.3081558438984757e-05, "loss": 2.0331, "step": 10870500 }, { "epoch": 53.86, "learning_rate": 2.3080322329731526e-05, "loss": 2.0273, "step": 10871000 }, { "epoch": 53.86, "learning_rate": 2.3079083743305442e-05, "loss": 2.055, "step": 10871500 }, { "epoch": 53.86, "learning_rate": 2.3077845156879356e-05, "loss": 2.0439, "step": 10872000 }, { "epoch": 53.87, "learning_rate": 2.3076606570453273e-05, "loss": 2.059, "step": 10872500 }, { "epoch": 53.87, "learning_rate": 2.307536798402719e-05, "loss": 2.0268, "step": 10873000 }, { "epoch": 53.87, "learning_rate": 2.3074129397601107e-05, "loss": 2.0467, "step": 10873500 }, { "epoch": 53.87, "learning_rate": 2.3072890811175024e-05, "loss": 2.0581, "step": 10874000 }, { "epoch": 53.88, "learning_rate": 2.307165222474894e-05, "loss": 2.0441, "step": 10874500 }, { "epoch": 53.88, "learning_rate": 2.3070413638322854e-05, "loss": 2.0091, "step": 10875000 }, { "epoch": 53.88, "learning_rate": 2.306917505189677e-05, "loss": 2.0582, "step": 10875500 }, { "epoch": 53.88, "learning_rate": 2.306793894264354e-05, "loss": 2.0273, "step": 10876000 }, { "epoch": 53.89, "learning_rate": 2.3066700356217457e-05, "loss": 2.0415, "step": 10876500 }, { "epoch": 53.89, "learning_rate": 2.3065461769791374e-05, "loss": 2.0536, "step": 10877000 }, { "epoch": 53.89, "learning_rate": 2.3064225660538143e-05, "loss": 2.0512, "step": 10877500 }, { "epoch": 53.89, "learning_rate": 2.306298707411206e-05, "loss": 2.0164, "step": 10878000 }, { "epoch": 53.9, "learning_rate": 2.3061748487685973e-05, "loss": 2.0507, "step": 10878500 }, { "epoch": 53.9, "learning_rate": 2.306050990125989e-05, "loss": 2.0268, "step": 10879000 }, { "epoch": 53.9, "learning_rate": 2.3059271314833807e-05, "loss": 2.0353, "step": 10879500 }, { "epoch": 53.9, "learning_rate": 2.3058032728407724e-05, "loss": 2.0323, "step": 10880000 }, { "epoch": 53.91, "learning_rate": 2.305679414198164e-05, "loss": 2.0423, "step": 10880500 }, { "epoch": 53.91, "learning_rate": 2.3055555555555558e-05, "loss": 2.0366, "step": 10881000 }, { "epoch": 53.91, "learning_rate": 2.305431696912947e-05, "loss": 2.0684, "step": 10881500 }, { "epoch": 53.91, "learning_rate": 2.3053078382703388e-05, "loss": 2.0486, "step": 10882000 }, { "epoch": 53.92, "learning_rate": 2.3051839796277305e-05, "loss": 2.0324, "step": 10882500 }, { "epoch": 53.92, "learning_rate": 2.3050603687024074e-05, "loss": 2.0249, "step": 10883000 }, { "epoch": 53.92, "learning_rate": 2.304936510059799e-05, "loss": 2.0593, "step": 10883500 }, { "epoch": 53.92, "learning_rate": 2.3048126514171908e-05, "loss": 2.0378, "step": 10884000 }, { "epoch": 53.93, "learning_rate": 2.304688792774582e-05, "loss": 2.0565, "step": 10884500 }, { "epoch": 53.93, "learning_rate": 2.3045649341319738e-05, "loss": 2.0606, "step": 10885000 }, { "epoch": 53.93, "learning_rate": 2.3044410754893655e-05, "loss": 2.045, "step": 10885500 }, { "epoch": 53.93, "learning_rate": 2.3043172168467572e-05, "loss": 2.0302, "step": 10886000 }, { "epoch": 53.94, "learning_rate": 2.304193358204149e-05, "loss": 2.0424, "step": 10886500 }, { "epoch": 53.94, "learning_rate": 2.3040697472788258e-05, "loss": 2.0411, "step": 10887000 }, { "epoch": 53.94, "learning_rate": 2.303945888636217e-05, "loss": 2.0241, "step": 10887500 }, { "epoch": 53.94, "learning_rate": 2.3038220299936088e-05, "loss": 2.0651, "step": 10888000 }, { "epoch": 53.95, "learning_rate": 2.3036984190682857e-05, "loss": 2.047, "step": 10888500 }, { "epoch": 53.95, "learning_rate": 2.3035745604256774e-05, "loss": 2.0641, "step": 10889000 }, { "epoch": 53.95, "learning_rate": 2.303450701783069e-05, "loss": 2.0459, "step": 10889500 }, { "epoch": 53.95, "learning_rate": 2.3033268431404608e-05, "loss": 2.0506, "step": 10890000 }, { "epoch": 53.96, "learning_rate": 2.303202984497852e-05, "loss": 2.051, "step": 10890500 }, { "epoch": 53.96, "learning_rate": 2.303079125855244e-05, "loss": 2.0403, "step": 10891000 }, { "epoch": 53.96, "learning_rate": 2.3029552672126355e-05, "loss": 2.0421, "step": 10891500 }, { "epoch": 53.96, "learning_rate": 2.3028314085700272e-05, "loss": 2.0449, "step": 10892000 }, { "epoch": 53.97, "learning_rate": 2.302707549927419e-05, "loss": 2.0501, "step": 10892500 }, { "epoch": 53.97, "learning_rate": 2.3025836912848106e-05, "loss": 2.0283, "step": 10893000 }, { "epoch": 53.97, "learning_rate": 2.3024598326422023e-05, "loss": 2.0237, "step": 10893500 }, { "epoch": 53.97, "learning_rate": 2.302335973999594e-05, "loss": 2.0733, "step": 10894000 }, { "epoch": 53.98, "learning_rate": 2.3022121153569857e-05, "loss": 2.0321, "step": 10894500 }, { "epoch": 53.98, "learning_rate": 2.302088256714377e-05, "loss": 2.0582, "step": 10895000 }, { "epoch": 53.98, "learning_rate": 2.301964645789054e-05, "loss": 2.0506, "step": 10895500 }, { "epoch": 53.98, "learning_rate": 2.3018407871464456e-05, "loss": 2.0293, "step": 10896000 }, { "epoch": 53.99, "learning_rate": 2.3017169285038373e-05, "loss": 2.0723, "step": 10896500 }, { "epoch": 53.99, "learning_rate": 2.301593069861229e-05, "loss": 2.0227, "step": 10897000 }, { "epoch": 53.99, "learning_rate": 2.3014694589359055e-05, "loss": 2.0696, "step": 10897500 }, { "epoch": 53.99, "learning_rate": 2.3013456002932972e-05, "loss": 2.0461, "step": 10898000 }, { "epoch": 53.99, "learning_rate": 2.301221741650689e-05, "loss": 2.0527, "step": 10898500 }, { "epoch": 54.0, "learning_rate": 2.3010978830080806e-05, "loss": 2.0396, "step": 10899000 }, { "epoch": 54.0, "learning_rate": 2.3009742720827575e-05, "loss": 2.0153, "step": 10899500 }, { "epoch": 54.0, "eval_accuracy": 0.6713126282064475, "eval_accuracy_mlm": 0.6301612022107826, "eval_accuracy_nsp": 0.865425421342255, "eval_loss": 2.319383144378662, "eval_runtime": 147.1517, "eval_samples_per_second": 1732.627, "eval_steps_per_second": 72.198, "step": 10899522 }, { "epoch": 54.0, "learning_rate": 2.300850413440149e-05, "loss": 2.0074, "step": 10900000 }, { "epoch": 54.0, "learning_rate": 2.3007265547975405e-05, "loss": 2.0332, "step": 10900500 }, { "epoch": 54.01, "learning_rate": 2.3006026961549322e-05, "loss": 2.0292, "step": 10901000 }, { "epoch": 54.01, "learning_rate": 2.300478837512324e-05, "loss": 2.0276, "step": 10901500 }, { "epoch": 54.01, "learning_rate": 2.3003549788697156e-05, "loss": 1.9952, "step": 10902000 }, { "epoch": 54.01, "learning_rate": 2.3002311202271073e-05, "loss": 2.0363, "step": 10902500 }, { "epoch": 54.02, "learning_rate": 2.300107261584499e-05, "loss": 2.0046, "step": 10903000 }, { "epoch": 54.02, "learning_rate": 2.2999834029418907e-05, "loss": 2.0319, "step": 10903500 }, { "epoch": 54.02, "learning_rate": 2.2998595442992824e-05, "loss": 2.0382, "step": 10904000 }, { "epoch": 54.02, "learning_rate": 2.299735933373959e-05, "loss": 2.0554, "step": 10904500 }, { "epoch": 54.03, "learning_rate": 2.2996120747313506e-05, "loss": 2.0122, "step": 10905000 }, { "epoch": 54.03, "learning_rate": 2.2994882160887423e-05, "loss": 1.9991, "step": 10905500 }, { "epoch": 54.03, "learning_rate": 2.299364357446134e-05, "loss": 2.0414, "step": 10906000 }, { "epoch": 54.03, "learning_rate": 2.2992404988035257e-05, "loss": 2.0386, "step": 10906500 }, { "epoch": 54.04, "learning_rate": 2.2991166401609174e-05, "loss": 2.0238, "step": 10907000 }, { "epoch": 54.04, "learning_rate": 2.2989927815183088e-05, "loss": 1.9877, "step": 10907500 }, { "epoch": 54.04, "learning_rate": 2.2988689228757004e-05, "loss": 2.0303, "step": 10908000 }, { "epoch": 54.04, "learning_rate": 2.2987453119503773e-05, "loss": 2.0237, "step": 10908500 }, { "epoch": 54.05, "learning_rate": 2.298621453307769e-05, "loss": 2.0283, "step": 10909000 }, { "epoch": 54.05, "learning_rate": 2.2984975946651607e-05, "loss": 2.0273, "step": 10909500 }, { "epoch": 54.05, "learning_rate": 2.2983739837398373e-05, "loss": 1.9982, "step": 10910000 }, { "epoch": 54.05, "learning_rate": 2.298250125097229e-05, "loss": 2.0215, "step": 10910500 }, { "epoch": 54.06, "learning_rate": 2.2981262664546206e-05, "loss": 1.9922, "step": 10911000 }, { "epoch": 54.06, "learning_rate": 2.2980024078120123e-05, "loss": 2.0096, "step": 10911500 }, { "epoch": 54.06, "learning_rate": 2.297878549169404e-05, "loss": 2.0323, "step": 10912000 }, { "epoch": 54.06, "learning_rate": 2.2977546905267957e-05, "loss": 2.0228, "step": 10912500 }, { "epoch": 54.07, "learning_rate": 2.2976308318841874e-05, "loss": 2.0177, "step": 10913000 }, { "epoch": 54.07, "learning_rate": 2.297507220958864e-05, "loss": 2.0493, "step": 10913500 }, { "epoch": 54.07, "learning_rate": 2.2973833623162556e-05, "loss": 2.041, "step": 10914000 }, { "epoch": 54.07, "learning_rate": 2.2972595036736473e-05, "loss": 2.0379, "step": 10914500 }, { "epoch": 54.08, "learning_rate": 2.297135645031039e-05, "loss": 2.0279, "step": 10915000 }, { "epoch": 54.08, "learning_rate": 2.2970117863884307e-05, "loss": 2.0217, "step": 10915500 }, { "epoch": 54.08, "learning_rate": 2.2968881754631076e-05, "loss": 2.0091, "step": 10916000 }, { "epoch": 54.08, "learning_rate": 2.2967643168204993e-05, "loss": 2.0359, "step": 10916500 }, { "epoch": 54.09, "learning_rate": 2.2966404581778907e-05, "loss": 2.0159, "step": 10917000 }, { "epoch": 54.09, "learning_rate": 2.2965165995352823e-05, "loss": 2.0283, "step": 10917500 }, { "epoch": 54.09, "learning_rate": 2.296392740892674e-05, "loss": 2.005, "step": 10918000 }, { "epoch": 54.09, "learning_rate": 2.2962688822500657e-05, "loss": 2.0101, "step": 10918500 }, { "epoch": 54.1, "learning_rate": 2.2961452713247426e-05, "loss": 2.0035, "step": 10919000 }, { "epoch": 54.1, "learning_rate": 2.2960214126821343e-05, "loss": 2.0101, "step": 10919500 }, { "epoch": 54.1, "learning_rate": 2.295897554039526e-05, "loss": 2.0204, "step": 10920000 }, { "epoch": 54.1, "learning_rate": 2.2957736953969174e-05, "loss": 2.0448, "step": 10920500 }, { "epoch": 54.11, "learning_rate": 2.2956500844715946e-05, "loss": 2.0171, "step": 10921000 }, { "epoch": 54.11, "learning_rate": 2.2955262258289863e-05, "loss": 2.0305, "step": 10921500 }, { "epoch": 54.11, "learning_rate": 2.2954023671863776e-05, "loss": 2.0103, "step": 10922000 }, { "epoch": 54.11, "learning_rate": 2.2952785085437693e-05, "loss": 2.056, "step": 10922500 }, { "epoch": 54.12, "learning_rate": 2.295154649901161e-05, "loss": 2.0333, "step": 10923000 }, { "epoch": 54.12, "learning_rate": 2.295031038975838e-05, "loss": 2.0058, "step": 10923500 }, { "epoch": 54.12, "learning_rate": 2.2949071803332296e-05, "loss": 2.0245, "step": 10924000 }, { "epoch": 54.12, "learning_rate": 2.2947833216906213e-05, "loss": 2.0343, "step": 10924500 }, { "epoch": 54.13, "learning_rate": 2.2946594630480126e-05, "loss": 2.0272, "step": 10925000 }, { "epoch": 54.13, "learning_rate": 2.2945356044054043e-05, "loss": 2.0078, "step": 10925500 }, { "epoch": 54.13, "learning_rate": 2.294411745762796e-05, "loss": 2.052, "step": 10926000 }, { "epoch": 54.13, "learning_rate": 2.2942878871201877e-05, "loss": 1.9939, "step": 10926500 }, { "epoch": 54.14, "learning_rate": 2.2941642761948646e-05, "loss": 2.0111, "step": 10927000 }, { "epoch": 54.14, "learning_rate": 2.2940404175522563e-05, "loss": 2.0173, "step": 10927500 }, { "epoch": 54.14, "learning_rate": 2.2939165589096476e-05, "loss": 2.0303, "step": 10928000 }, { "epoch": 54.14, "learning_rate": 2.2937927002670393e-05, "loss": 2.034, "step": 10928500 }, { "epoch": 54.15, "learning_rate": 2.293668841624431e-05, "loss": 2.0417, "step": 10929000 }, { "epoch": 54.15, "learning_rate": 2.2935449829818227e-05, "loss": 2.0415, "step": 10929500 }, { "epoch": 54.15, "learning_rate": 2.2934213720564996e-05, "loss": 2.0219, "step": 10930000 }, { "epoch": 54.15, "learning_rate": 2.2932975134138913e-05, "loss": 2.0166, "step": 10930500 }, { "epoch": 54.16, "learning_rate": 2.293173654771283e-05, "loss": 2.0097, "step": 10931000 }, { "epoch": 54.16, "learning_rate": 2.2930497961286743e-05, "loss": 2.023, "step": 10931500 }, { "epoch": 54.16, "learning_rate": 2.292925937486066e-05, "loss": 2.0376, "step": 10932000 }, { "epoch": 54.16, "learning_rate": 2.2928020788434577e-05, "loss": 2.0308, "step": 10932500 }, { "epoch": 54.17, "learning_rate": 2.2926782202008494e-05, "loss": 2.0052, "step": 10933000 }, { "epoch": 54.17, "learning_rate": 2.292554361558241e-05, "loss": 2.0425, "step": 10933500 }, { "epoch": 54.17, "learning_rate": 2.2924305029156325e-05, "loss": 2.0369, "step": 10934000 }, { "epoch": 54.17, "learning_rate": 2.2923068919903093e-05, "loss": 2.0379, "step": 10934500 }, { "epoch": 54.18, "learning_rate": 2.292183033347701e-05, "loss": 2.0308, "step": 10935000 }, { "epoch": 54.18, "learning_rate": 2.292059422422378e-05, "loss": 2.0239, "step": 10935500 }, { "epoch": 54.18, "learning_rate": 2.2919355637797696e-05, "loss": 2.0137, "step": 10936000 }, { "epoch": 54.18, "learning_rate": 2.2918117051371613e-05, "loss": 2.0285, "step": 10936500 }, { "epoch": 54.19, "learning_rate": 2.291687846494553e-05, "loss": 2.0561, "step": 10937000 }, { "epoch": 54.19, "learning_rate": 2.2915642355692295e-05, "loss": 2.0308, "step": 10937500 }, { "epoch": 54.19, "learning_rate": 2.2914403769266212e-05, "loss": 2.012, "step": 10938000 }, { "epoch": 54.19, "learning_rate": 2.291316518284013e-05, "loss": 2.0179, "step": 10938500 }, { "epoch": 54.2, "learning_rate": 2.2911926596414046e-05, "loss": 2.0294, "step": 10939000 }, { "epoch": 54.2, "learning_rate": 2.2910688009987963e-05, "loss": 2.027, "step": 10939500 }, { "epoch": 54.2, "learning_rate": 2.290944942356188e-05, "loss": 2.0294, "step": 10940000 }, { "epoch": 54.2, "learning_rate": 2.2908210837135793e-05, "loss": 2.0265, "step": 10940500 }, { "epoch": 54.21, "learning_rate": 2.290697225070971e-05, "loss": 2.0387, "step": 10941000 }, { "epoch": 54.21, "learning_rate": 2.2905733664283627e-05, "loss": 2.0339, "step": 10941500 }, { "epoch": 54.21, "learning_rate": 2.2904500032203248e-05, "loss": 2.0256, "step": 10942000 }, { "epoch": 54.21, "learning_rate": 2.2903263922950017e-05, "loss": 2.0062, "step": 10942500 }, { "epoch": 54.22, "learning_rate": 2.2902025336523934e-05, "loss": 2.0335, "step": 10943000 }, { "epoch": 54.22, "learning_rate": 2.290078675009785e-05, "loss": 2.0308, "step": 10943500 }, { "epoch": 54.22, "learning_rate": 2.2899548163671768e-05, "loss": 2.0216, "step": 10944000 }, { "epoch": 54.22, "learning_rate": 2.2898309577245685e-05, "loss": 2.0434, "step": 10944500 }, { "epoch": 54.23, "learning_rate": 2.2897070990819598e-05, "loss": 2.0554, "step": 10945000 }, { "epoch": 54.23, "learning_rate": 2.2895832404393515e-05, "loss": 2.0543, "step": 10945500 }, { "epoch": 54.23, "learning_rate": 2.289459381796743e-05, "loss": 2.0301, "step": 10946000 }, { "epoch": 54.23, "learning_rate": 2.28933577087142e-05, "loss": 2.0348, "step": 10946500 }, { "epoch": 54.24, "learning_rate": 2.2892119122288118e-05, "loss": 1.9992, "step": 10947000 }, { "epoch": 54.24, "learning_rate": 2.2890880535862035e-05, "loss": 2.0342, "step": 10947500 }, { "epoch": 54.24, "learning_rate": 2.2889641949435948e-05, "loss": 2.0166, "step": 10948000 }, { "epoch": 54.24, "learning_rate": 2.2888403363009865e-05, "loss": 2.0269, "step": 10948500 }, { "epoch": 54.25, "learning_rate": 2.2887164776583782e-05, "loss": 2.0635, "step": 10949000 }, { "epoch": 54.25, "learning_rate": 2.2885926190157696e-05, "loss": 2.0527, "step": 10949500 }, { "epoch": 54.25, "learning_rate": 2.2884687603731612e-05, "loss": 2.0258, "step": 10950000 }, { "epoch": 54.25, "learning_rate": 2.288344901730553e-05, "loss": 2.0423, "step": 10950500 }, { "epoch": 54.26, "learning_rate": 2.2882210430879446e-05, "loss": 2.0173, "step": 10951000 }, { "epoch": 54.26, "learning_rate": 2.2880971844453363e-05, "loss": 2.0465, "step": 10951500 }, { "epoch": 54.26, "learning_rate": 2.287973325802728e-05, "loss": 2.0221, "step": 10952000 }, { "epoch": 54.26, "learning_rate": 2.2878494671601197e-05, "loss": 2.0387, "step": 10952500 }, { "epoch": 54.26, "learning_rate": 2.287725608517511e-05, "loss": 2.04, "step": 10953000 }, { "epoch": 54.27, "learning_rate": 2.287601997592188e-05, "loss": 2.0136, "step": 10953500 }, { "epoch": 54.27, "learning_rate": 2.2874781389495796e-05, "loss": 2.0204, "step": 10954000 }, { "epoch": 54.27, "learning_rate": 2.2873542803069713e-05, "loss": 2.0499, "step": 10954500 }, { "epoch": 54.27, "learning_rate": 2.287230421664363e-05, "loss": 2.0085, "step": 10955000 }, { "epoch": 54.28, "learning_rate": 2.28710681073904e-05, "loss": 2.0408, "step": 10955500 }, { "epoch": 54.28, "learning_rate": 2.2869829520964313e-05, "loss": 2.0255, "step": 10956000 }, { "epoch": 54.28, "learning_rate": 2.286859093453823e-05, "loss": 2.0261, "step": 10956500 }, { "epoch": 54.28, "learning_rate": 2.2867352348112146e-05, "loss": 2.0234, "step": 10957000 }, { "epoch": 54.29, "learning_rate": 2.286611623885892e-05, "loss": 2.0171, "step": 10957500 }, { "epoch": 54.29, "learning_rate": 2.2864877652432832e-05, "loss": 2.0137, "step": 10958000 }, { "epoch": 54.29, "learning_rate": 2.286363906600675e-05, "loss": 2.0186, "step": 10958500 }, { "epoch": 54.29, "learning_rate": 2.2862400479580666e-05, "loss": 2.0408, "step": 10959000 }, { "epoch": 54.3, "learning_rate": 2.286116189315458e-05, "loss": 2.0534, "step": 10959500 }, { "epoch": 54.3, "learning_rate": 2.2859923306728497e-05, "loss": 2.017, "step": 10960000 }, { "epoch": 54.3, "learning_rate": 2.2858684720302413e-05, "loss": 2.0311, "step": 10960500 }, { "epoch": 54.3, "learning_rate": 2.285744613387633e-05, "loss": 2.0628, "step": 10961000 }, { "epoch": 54.31, "learning_rate": 2.2856207547450247e-05, "loss": 2.0337, "step": 10961500 }, { "epoch": 54.31, "learning_rate": 2.2854971438197016e-05, "loss": 2.0386, "step": 10962000 }, { "epoch": 54.31, "learning_rate": 2.2853732851770933e-05, "loss": 2.0133, "step": 10962500 }, { "epoch": 54.31, "learning_rate": 2.2852494265344847e-05, "loss": 2.0233, "step": 10963000 }, { "epoch": 54.32, "learning_rate": 2.2851255678918764e-05, "loss": 2.063, "step": 10963500 }, { "epoch": 54.32, "learning_rate": 2.285001709249268e-05, "loss": 2.0502, "step": 10964000 }, { "epoch": 54.32, "learning_rate": 2.2848778506066597e-05, "loss": 2.0479, "step": 10964500 }, { "epoch": 54.32, "learning_rate": 2.2847539919640514e-05, "loss": 2.0247, "step": 10965000 }, { "epoch": 54.33, "learning_rate": 2.2846301333214428e-05, "loss": 2.0369, "step": 10965500 }, { "epoch": 54.33, "learning_rate": 2.28450652239612e-05, "loss": 2.0314, "step": 10966000 }, { "epoch": 54.33, "learning_rate": 2.2843826637535114e-05, "loss": 2.0361, "step": 10966500 }, { "epoch": 54.33, "learning_rate": 2.284258805110903e-05, "loss": 2.0063, "step": 10967000 }, { "epoch": 54.34, "learning_rate": 2.28413519418558e-05, "loss": 2.0497, "step": 10967500 }, { "epoch": 54.34, "learning_rate": 2.2840115832602568e-05, "loss": 2.0154, "step": 10968000 }, { "epoch": 54.34, "learning_rate": 2.2838877246176485e-05, "loss": 2.0166, "step": 10968500 }, { "epoch": 54.34, "learning_rate": 2.2837638659750402e-05, "loss": 2.0187, "step": 10969000 }, { "epoch": 54.35, "learning_rate": 2.283640007332432e-05, "loss": 2.0325, "step": 10969500 }, { "epoch": 54.35, "learning_rate": 2.2835161486898236e-05, "loss": 2.042, "step": 10970000 }, { "epoch": 54.35, "learning_rate": 2.283392290047215e-05, "loss": 2.0589, "step": 10970500 }, { "epoch": 54.35, "learning_rate": 2.2832686791218918e-05, "loss": 2.0512, "step": 10971000 }, { "epoch": 54.36, "learning_rate": 2.2831448204792835e-05, "loss": 2.0469, "step": 10971500 }, { "epoch": 54.36, "learning_rate": 2.2830209618366752e-05, "loss": 2.0337, "step": 10972000 }, { "epoch": 54.36, "learning_rate": 2.282897103194067e-05, "loss": 2.0349, "step": 10972500 }, { "epoch": 54.36, "learning_rate": 2.2827734922687434e-05, "loss": 2.0412, "step": 10973000 }, { "epoch": 54.37, "learning_rate": 2.282649633626135e-05, "loss": 2.0258, "step": 10973500 }, { "epoch": 54.37, "learning_rate": 2.2825257749835268e-05, "loss": 2.0367, "step": 10974000 }, { "epoch": 54.37, "learning_rate": 2.2824019163409185e-05, "loss": 2.0298, "step": 10974500 }, { "epoch": 54.37, "learning_rate": 2.2822780576983102e-05, "loss": 2.0281, "step": 10975000 }, { "epoch": 54.38, "learning_rate": 2.282154199055702e-05, "loss": 2.0243, "step": 10975500 }, { "epoch": 54.38, "learning_rate": 2.2820303404130936e-05, "loss": 2.01, "step": 10976000 }, { "epoch": 54.38, "learning_rate": 2.2819064817704853e-05, "loss": 2.0538, "step": 10976500 }, { "epoch": 54.38, "learning_rate": 2.2817826231278766e-05, "loss": 2.0258, "step": 10977000 }, { "epoch": 54.39, "learning_rate": 2.2816587644852683e-05, "loss": 2.0257, "step": 10977500 }, { "epoch": 54.39, "learning_rate": 2.28153490584266e-05, "loss": 2.0179, "step": 10978000 }, { "epoch": 54.39, "learning_rate": 2.2814110472000517e-05, "loss": 2.0451, "step": 10978500 }, { "epoch": 54.39, "learning_rate": 2.2812871885574434e-05, "loss": 2.055, "step": 10979000 }, { "epoch": 54.4, "learning_rate": 2.2811635776321203e-05, "loss": 2.0217, "step": 10979500 }, { "epoch": 54.4, "learning_rate": 2.281039966706797e-05, "loss": 2.0309, "step": 10980000 }, { "epoch": 54.4, "learning_rate": 2.2809161080641885e-05, "loss": 2.0295, "step": 10980500 }, { "epoch": 54.4, "learning_rate": 2.2807924971388654e-05, "loss": 2.0181, "step": 10981000 }, { "epoch": 54.41, "learning_rate": 2.280668638496257e-05, "loss": 2.0235, "step": 10981500 }, { "epoch": 54.41, "learning_rate": 2.2805447798536485e-05, "loss": 2.0241, "step": 10982000 }, { "epoch": 54.41, "learning_rate": 2.28042092121104e-05, "loss": 2.0487, "step": 10982500 }, { "epoch": 54.41, "learning_rate": 2.280297062568432e-05, "loss": 2.0388, "step": 10983000 }, { "epoch": 54.42, "learning_rate": 2.2801732039258235e-05, "loss": 2.041, "step": 10983500 }, { "epoch": 54.42, "learning_rate": 2.2800493452832152e-05, "loss": 2.057, "step": 10984000 }, { "epoch": 54.42, "learning_rate": 2.279925486640607e-05, "loss": 2.0222, "step": 10984500 }, { "epoch": 54.42, "learning_rate": 2.2798016279979986e-05, "loss": 2.0318, "step": 10985000 }, { "epoch": 54.43, "learning_rate": 2.279678017072675e-05, "loss": 1.9984, "step": 10985500 }, { "epoch": 54.43, "learning_rate": 2.279554158430067e-05, "loss": 2.0231, "step": 10986000 }, { "epoch": 54.43, "learning_rate": 2.2794302997874585e-05, "loss": 2.0425, "step": 10986500 }, { "epoch": 54.43, "learning_rate": 2.2793064411448502e-05, "loss": 2.0371, "step": 10987000 }, { "epoch": 54.44, "learning_rate": 2.279182830219527e-05, "loss": 2.0104, "step": 10987500 }, { "epoch": 54.44, "learning_rate": 2.2790589715769188e-05, "loss": 2.0494, "step": 10988000 }, { "epoch": 54.44, "learning_rate": 2.2789353606515957e-05, "loss": 2.0306, "step": 10988500 }, { "epoch": 54.44, "learning_rate": 2.2788115020089874e-05, "loss": 2.0332, "step": 10989000 }, { "epoch": 54.45, "learning_rate": 2.278687643366379e-05, "loss": 2.0334, "step": 10989500 }, { "epoch": 54.45, "learning_rate": 2.2785637847237708e-05, "loss": 2.0285, "step": 10990000 }, { "epoch": 54.45, "learning_rate": 2.2784399260811625e-05, "loss": 2.0178, "step": 10990500 }, { "epoch": 54.45, "learning_rate": 2.2783160674385538e-05, "loss": 2.0284, "step": 10991000 }, { "epoch": 54.46, "learning_rate": 2.2781922087959455e-05, "loss": 2.012, "step": 10991500 }, { "epoch": 54.46, "learning_rate": 2.278068350153337e-05, "loss": 2.031, "step": 10992000 }, { "epoch": 54.46, "learning_rate": 2.2779444915107286e-05, "loss": 2.0311, "step": 10992500 }, { "epoch": 54.46, "learning_rate": 2.2778206328681202e-05, "loss": 2.0433, "step": 10993000 }, { "epoch": 54.47, "learning_rate": 2.277696774225512e-05, "loss": 2.0318, "step": 10993500 }, { "epoch": 54.47, "learning_rate": 2.277573163300189e-05, "loss": 2.037, "step": 10994000 }, { "epoch": 54.47, "learning_rate": 2.2774493046575805e-05, "loss": 2.0262, "step": 10994500 }, { "epoch": 54.47, "learning_rate": 2.2773254460149722e-05, "loss": 2.0299, "step": 10995000 }, { "epoch": 54.48, "learning_rate": 2.2772015873723636e-05, "loss": 2.0327, "step": 10995500 }, { "epoch": 54.48, "learning_rate": 2.2770777287297553e-05, "loss": 2.0293, "step": 10996000 }, { "epoch": 54.48, "learning_rate": 2.276953870087147e-05, "loss": 2.0157, "step": 10996500 }, { "epoch": 54.48, "learning_rate": 2.2768300114445386e-05, "loss": 2.0221, "step": 10997000 }, { "epoch": 54.49, "learning_rate": 2.2767061528019303e-05, "loss": 2.0369, "step": 10997500 }, { "epoch": 54.49, "learning_rate": 2.276582294159322e-05, "loss": 2.0243, "step": 10998000 }, { "epoch": 54.49, "learning_rate": 2.2764584355167137e-05, "loss": 2.0042, "step": 10998500 }, { "epoch": 54.49, "learning_rate": 2.2763348245913903e-05, "loss": 2.0395, "step": 10999000 }, { "epoch": 54.5, "learning_rate": 2.2762112136660675e-05, "loss": 2.0404, "step": 10999500 }, { "epoch": 54.5, "learning_rate": 2.2760873550234592e-05, "loss": 2.0419, "step": 11000000 }, { "epoch": 54.5, "learning_rate": 2.2759634963808505e-05, "loss": 2.0337, "step": 11000500 }, { "epoch": 54.5, "learning_rate": 2.2758396377382422e-05, "loss": 2.019, "step": 11001000 }, { "epoch": 54.51, "learning_rate": 2.275715779095634e-05, "loss": 2.0199, "step": 11001500 }, { "epoch": 54.51, "learning_rate": 2.2755919204530256e-05, "loss": 2.0115, "step": 11002000 }, { "epoch": 54.51, "learning_rate": 2.275468061810417e-05, "loss": 2.0641, "step": 11002500 }, { "epoch": 54.51, "learning_rate": 2.2753442031678087e-05, "loss": 2.0231, "step": 11003000 }, { "epoch": 54.52, "learning_rate": 2.2752203445252003e-05, "loss": 2.0108, "step": 11003500 }, { "epoch": 54.52, "learning_rate": 2.275096485882592e-05, "loss": 2.0433, "step": 11004000 }, { "epoch": 54.52, "learning_rate": 2.2749726272399837e-05, "loss": 2.0265, "step": 11004500 }, { "epoch": 54.52, "learning_rate": 2.274848768597375e-05, "loss": 2.0204, "step": 11005000 }, { "epoch": 54.53, "learning_rate": 2.274725157672052e-05, "loss": 2.0495, "step": 11005500 }, { "epoch": 54.53, "learning_rate": 2.2746012990294437e-05, "loss": 2.0404, "step": 11006000 }, { "epoch": 54.53, "learning_rate": 2.2744774403868354e-05, "loss": 2.0503, "step": 11006500 }, { "epoch": 54.53, "learning_rate": 2.274353581744227e-05, "loss": 2.0266, "step": 11007000 }, { "epoch": 54.53, "learning_rate": 2.2742297231016187e-05, "loss": 2.0233, "step": 11007500 }, { "epoch": 54.54, "learning_rate": 2.27410586445901e-05, "loss": 2.046, "step": 11008000 }, { "epoch": 54.54, "learning_rate": 2.2739822535336873e-05, "loss": 2.0233, "step": 11008500 }, { "epoch": 54.54, "learning_rate": 2.2738583948910787e-05, "loss": 2.0302, "step": 11009000 }, { "epoch": 54.54, "learning_rate": 2.2737345362484704e-05, "loss": 2.0532, "step": 11009500 }, { "epoch": 54.55, "learning_rate": 2.2736109253231472e-05, "loss": 2.0494, "step": 11010000 }, { "epoch": 54.55, "learning_rate": 2.273487066680539e-05, "loss": 2.0414, "step": 11010500 }, { "epoch": 54.55, "learning_rate": 2.2733632080379306e-05, "loss": 2.0131, "step": 11011000 }, { "epoch": 54.55, "learning_rate": 2.2732393493953223e-05, "loss": 2.0358, "step": 11011500 }, { "epoch": 54.56, "learning_rate": 2.273115490752714e-05, "loss": 2.0229, "step": 11012000 }, { "epoch": 54.56, "learning_rate": 2.2729916321101054e-05, "loss": 2.0217, "step": 11012500 }, { "epoch": 54.56, "learning_rate": 2.272867773467497e-05, "loss": 2.0309, "step": 11013000 }, { "epoch": 54.56, "learning_rate": 2.2727439148248888e-05, "loss": 2.0118, "step": 11013500 }, { "epoch": 54.57, "learning_rate": 2.2726200561822804e-05, "loss": 2.0244, "step": 11014000 }, { "epoch": 54.57, "learning_rate": 2.2724964452569573e-05, "loss": 2.0289, "step": 11014500 }, { "epoch": 54.57, "learning_rate": 2.272372586614349e-05, "loss": 2.0086, "step": 11015000 }, { "epoch": 54.57, "learning_rate": 2.272248975689026e-05, "loss": 2.0353, "step": 11015500 }, { "epoch": 54.58, "learning_rate": 2.2721251170464176e-05, "loss": 2.0415, "step": 11016000 }, { "epoch": 54.58, "learning_rate": 2.272001258403809e-05, "loss": 2.0279, "step": 11016500 }, { "epoch": 54.58, "learning_rate": 2.2718773997612006e-05, "loss": 2.024, "step": 11017000 }, { "epoch": 54.58, "learning_rate": 2.2717535411185923e-05, "loss": 2.0334, "step": 11017500 }, { "epoch": 54.59, "learning_rate": 2.271629682475984e-05, "loss": 2.0349, "step": 11018000 }, { "epoch": 54.59, "learning_rate": 2.2715058238333757e-05, "loss": 2.0282, "step": 11018500 }, { "epoch": 54.59, "learning_rate": 2.271381965190767e-05, "loss": 2.0314, "step": 11019000 }, { "epoch": 54.59, "learning_rate": 2.271258354265444e-05, "loss": 2.0378, "step": 11019500 }, { "epoch": 54.6, "learning_rate": 2.2711344956228356e-05, "loss": 2.0543, "step": 11020000 }, { "epoch": 54.6, "learning_rate": 2.2710106369802273e-05, "loss": 2.041, "step": 11020500 }, { "epoch": 54.6, "learning_rate": 2.270886778337619e-05, "loss": 2.0395, "step": 11021000 }, { "epoch": 54.6, "learning_rate": 2.2707629196950107e-05, "loss": 2.0608, "step": 11021500 }, { "epoch": 54.61, "learning_rate": 2.2706390610524024e-05, "loss": 2.0291, "step": 11022000 }, { "epoch": 54.61, "learning_rate": 2.2705152024097938e-05, "loss": 2.0357, "step": 11022500 }, { "epoch": 54.61, "learning_rate": 2.2703913437671855e-05, "loss": 2.0329, "step": 11023000 }, { "epoch": 54.61, "learning_rate": 2.270267485124577e-05, "loss": 2.0507, "step": 11023500 }, { "epoch": 54.62, "learning_rate": 2.2701436264819685e-05, "loss": 2.0102, "step": 11024000 }, { "epoch": 54.62, "learning_rate": 2.2700197678393602e-05, "loss": 2.0352, "step": 11024500 }, { "epoch": 54.62, "learning_rate": 2.269895909196752e-05, "loss": 2.0395, "step": 11025000 }, { "epoch": 54.62, "learning_rate": 2.2697720505541436e-05, "loss": 2.0276, "step": 11025500 }, { "epoch": 54.63, "learning_rate": 2.2696481919115353e-05, "loss": 2.0433, "step": 11026000 }, { "epoch": 54.63, "learning_rate": 2.269524580986212e-05, "loss": 2.038, "step": 11026500 }, { "epoch": 54.63, "learning_rate": 2.2694007223436035e-05, "loss": 2.0182, "step": 11027000 }, { "epoch": 54.63, "learning_rate": 2.2692768637009952e-05, "loss": 2.0334, "step": 11027500 }, { "epoch": 54.64, "learning_rate": 2.269153005058387e-05, "loss": 2.0343, "step": 11028000 }, { "epoch": 54.64, "learning_rate": 2.2690291464157786e-05, "loss": 2.0235, "step": 11028500 }, { "epoch": 54.64, "learning_rate": 2.2689052877731703e-05, "loss": 2.0449, "step": 11029000 }, { "epoch": 54.64, "learning_rate": 2.268781676847847e-05, "loss": 2.0321, "step": 11029500 }, { "epoch": 54.65, "learning_rate": 2.268658065922524e-05, "loss": 2.0376, "step": 11030000 }, { "epoch": 54.65, "learning_rate": 2.2685342072799157e-05, "loss": 2.0151, "step": 11030500 }, { "epoch": 54.65, "learning_rate": 2.2684103486373074e-05, "loss": 2.0467, "step": 11031000 }, { "epoch": 54.65, "learning_rate": 2.268286489994699e-05, "loss": 2.0466, "step": 11031500 }, { "epoch": 54.66, "learning_rate": 2.2681626313520908e-05, "loss": 2.0394, "step": 11032000 }, { "epoch": 54.66, "learning_rate": 2.2680387727094822e-05, "loss": 2.0336, "step": 11032500 }, { "epoch": 54.66, "learning_rate": 2.267914914066874e-05, "loss": 2.0281, "step": 11033000 }, { "epoch": 54.66, "learning_rate": 2.2677910554242656e-05, "loss": 2.0366, "step": 11033500 }, { "epoch": 54.67, "learning_rate": 2.2676674444989424e-05, "loss": 2.0255, "step": 11034000 }, { "epoch": 54.67, "learning_rate": 2.267543585856334e-05, "loss": 2.0459, "step": 11034500 }, { "epoch": 54.67, "learning_rate": 2.2674197272137258e-05, "loss": 2.0763, "step": 11035000 }, { "epoch": 54.67, "learning_rate": 2.2672958685711175e-05, "loss": 2.0531, "step": 11035500 }, { "epoch": 54.68, "learning_rate": 2.267172009928509e-05, "loss": 2.0489, "step": 11036000 }, { "epoch": 54.68, "learning_rate": 2.2670481512859006e-05, "loss": 2.0503, "step": 11036500 }, { "epoch": 54.68, "learning_rate": 2.2669242926432923e-05, "loss": 2.0202, "step": 11037000 }, { "epoch": 54.68, "learning_rate": 2.2668004340006836e-05, "loss": 2.0289, "step": 11037500 }, { "epoch": 54.69, "learning_rate": 2.2666765753580753e-05, "loss": 2.0491, "step": 11038000 }, { "epoch": 54.69, "learning_rate": 2.2665532121500374e-05, "loss": 2.02, "step": 11038500 }, { "epoch": 54.69, "learning_rate": 2.266429353507429e-05, "loss": 2.0453, "step": 11039000 }, { "epoch": 54.69, "learning_rate": 2.266305742582106e-05, "loss": 2.0686, "step": 11039500 }, { "epoch": 54.7, "learning_rate": 2.2661818839394976e-05, "loss": 2.0536, "step": 11040000 }, { "epoch": 54.7, "learning_rate": 2.2660580252968893e-05, "loss": 2.0192, "step": 11040500 }, { "epoch": 54.7, "learning_rate": 2.265934166654281e-05, "loss": 2.0318, "step": 11041000 }, { "epoch": 54.7, "learning_rate": 2.2658103080116724e-05, "loss": 2.0397, "step": 11041500 }, { "epoch": 54.71, "learning_rate": 2.265686449369064e-05, "loss": 2.0447, "step": 11042000 }, { "epoch": 54.71, "learning_rate": 2.2655625907264558e-05, "loss": 2.0108, "step": 11042500 }, { "epoch": 54.71, "learning_rate": 2.2654389798011326e-05, "loss": 2.0175, "step": 11043000 }, { "epoch": 54.71, "learning_rate": 2.2653153688758095e-05, "loss": 2.0299, "step": 11043500 }, { "epoch": 54.72, "learning_rate": 2.2651915102332012e-05, "loss": 2.0611, "step": 11044000 }, { "epoch": 54.72, "learning_rate": 2.265067651590593e-05, "loss": 2.0441, "step": 11044500 }, { "epoch": 54.72, "learning_rate": 2.2649437929479843e-05, "loss": 2.0562, "step": 11045000 }, { "epoch": 54.72, "learning_rate": 2.264819934305376e-05, "loss": 2.0487, "step": 11045500 }, { "epoch": 54.73, "learning_rate": 2.2646960756627677e-05, "loss": 2.0446, "step": 11046000 }, { "epoch": 54.73, "learning_rate": 2.2645722170201593e-05, "loss": 2.0565, "step": 11046500 }, { "epoch": 54.73, "learning_rate": 2.2644486060948362e-05, "loss": 2.0307, "step": 11047000 }, { "epoch": 54.73, "learning_rate": 2.264324747452228e-05, "loss": 2.0433, "step": 11047500 }, { "epoch": 54.74, "learning_rate": 2.2642008888096196e-05, "loss": 2.0182, "step": 11048000 }, { "epoch": 54.74, "learning_rate": 2.264077030167011e-05, "loss": 2.0419, "step": 11048500 }, { "epoch": 54.74, "learning_rate": 2.2639531715244027e-05, "loss": 2.0555, "step": 11049000 }, { "epoch": 54.74, "learning_rate": 2.2638293128817944e-05, "loss": 2.0096, "step": 11049500 }, { "epoch": 54.75, "learning_rate": 2.263705454239186e-05, "loss": 2.0335, "step": 11050000 }, { "epoch": 54.75, "learning_rate": 2.2635815955965774e-05, "loss": 2.0101, "step": 11050500 }, { "epoch": 54.75, "learning_rate": 2.2634579846712546e-05, "loss": 2.0455, "step": 11051000 }, { "epoch": 54.75, "learning_rate": 2.263334126028646e-05, "loss": 2.0418, "step": 11051500 }, { "epoch": 54.76, "learning_rate": 2.2632102673860377e-05, "loss": 2.0244, "step": 11052000 }, { "epoch": 54.76, "learning_rate": 2.2630864087434294e-05, "loss": 2.0517, "step": 11052500 }, { "epoch": 54.76, "learning_rate": 2.262962550100821e-05, "loss": 2.0477, "step": 11053000 }, { "epoch": 54.76, "learning_rate": 2.2628386914582127e-05, "loss": 2.0332, "step": 11053500 }, { "epoch": 54.77, "learning_rate": 2.262714832815604e-05, "loss": 2.0208, "step": 11054000 }, { "epoch": 54.77, "learning_rate": 2.2625909741729958e-05, "loss": 2.0575, "step": 11054500 }, { "epoch": 54.77, "learning_rate": 2.2624673632476727e-05, "loss": 2.0644, "step": 11055000 }, { "epoch": 54.77, "learning_rate": 2.2623435046050644e-05, "loss": 2.0397, "step": 11055500 }, { "epoch": 54.78, "learning_rate": 2.2622198936797412e-05, "loss": 2.0486, "step": 11056000 }, { "epoch": 54.78, "learning_rate": 2.262096035037133e-05, "loss": 2.0249, "step": 11056500 }, { "epoch": 54.78, "learning_rate": 2.2619721763945246e-05, "loss": 2.0223, "step": 11057000 }, { "epoch": 54.78, "learning_rate": 2.2618483177519163e-05, "loss": 2.0276, "step": 11057500 }, { "epoch": 54.79, "learning_rate": 2.261724459109308e-05, "loss": 2.0336, "step": 11058000 }, { "epoch": 54.79, "learning_rate": 2.2616006004666994e-05, "loss": 2.0077, "step": 11058500 }, { "epoch": 54.79, "learning_rate": 2.2614769895413763e-05, "loss": 2.0773, "step": 11059000 }, { "epoch": 54.79, "learning_rate": 2.261353130898768e-05, "loss": 2.0307, "step": 11059500 }, { "epoch": 54.8, "learning_rate": 2.2612292722561596e-05, "loss": 2.0563, "step": 11060000 }, { "epoch": 54.8, "learning_rate": 2.2611054136135513e-05, "loss": 2.0657, "step": 11060500 }, { "epoch": 54.8, "learning_rate": 2.2609818026882282e-05, "loss": 2.0447, "step": 11061000 }, { "epoch": 54.8, "learning_rate": 2.26085794404562e-05, "loss": 2.0448, "step": 11061500 }, { "epoch": 54.8, "learning_rate": 2.2607340854030113e-05, "loss": 2.0392, "step": 11062000 }, { "epoch": 54.81, "learning_rate": 2.260610226760403e-05, "loss": 2.0456, "step": 11062500 }, { "epoch": 54.81, "learning_rate": 2.2604863681177946e-05, "loss": 2.0122, "step": 11063000 }, { "epoch": 54.81, "learning_rate": 2.2603625094751863e-05, "loss": 2.0135, "step": 11063500 }, { "epoch": 54.81, "learning_rate": 2.2602388985498632e-05, "loss": 2.0478, "step": 11064000 }, { "epoch": 54.82, "learning_rate": 2.260115039907255e-05, "loss": 2.036, "step": 11064500 }, { "epoch": 54.82, "learning_rate": 2.2599914289819315e-05, "loss": 2.042, "step": 11065000 }, { "epoch": 54.82, "learning_rate": 2.259867570339323e-05, "loss": 2.0128, "step": 11065500 }, { "epoch": 54.82, "learning_rate": 2.259743711696715e-05, "loss": 2.0213, "step": 11066000 }, { "epoch": 54.83, "learning_rate": 2.2596198530541065e-05, "loss": 2.0598, "step": 11066500 }, { "epoch": 54.83, "learning_rate": 2.2594959944114982e-05, "loss": 2.0337, "step": 11067000 }, { "epoch": 54.83, "learning_rate": 2.25937213576889e-05, "loss": 2.059, "step": 11067500 }, { "epoch": 54.83, "learning_rate": 2.2592482771262813e-05, "loss": 2.0573, "step": 11068000 }, { "epoch": 54.84, "learning_rate": 2.259124418483673e-05, "loss": 2.0317, "step": 11068500 }, { "epoch": 54.84, "learning_rate": 2.2590005598410647e-05, "loss": 2.0539, "step": 11069000 }, { "epoch": 54.84, "learning_rate": 2.2588769489157415e-05, "loss": 2.0362, "step": 11069500 }, { "epoch": 54.84, "learning_rate": 2.2587530902731332e-05, "loss": 2.0241, "step": 11070000 }, { "epoch": 54.85, "learning_rate": 2.258629231630525e-05, "loss": 2.0475, "step": 11070500 }, { "epoch": 54.85, "learning_rate": 2.2585053729879166e-05, "loss": 2.0445, "step": 11071000 }, { "epoch": 54.85, "learning_rate": 2.258381762062593e-05, "loss": 2.0423, "step": 11071500 }, { "epoch": 54.85, "learning_rate": 2.258257903419985e-05, "loss": 2.049, "step": 11072000 }, { "epoch": 54.86, "learning_rate": 2.2581340447773765e-05, "loss": 2.0462, "step": 11072500 }, { "epoch": 54.86, "learning_rate": 2.2580101861347682e-05, "loss": 2.0679, "step": 11073000 }, { "epoch": 54.86, "learning_rate": 2.25788632749216e-05, "loss": 2.0468, "step": 11073500 }, { "epoch": 54.86, "learning_rate": 2.2577624688495516e-05, "loss": 2.0292, "step": 11074000 }, { "epoch": 54.87, "learning_rate": 2.257638610206943e-05, "loss": 2.0591, "step": 11074500 }, { "epoch": 54.87, "learning_rate": 2.25751499928162e-05, "loss": 2.0423, "step": 11075000 }, { "epoch": 54.87, "learning_rate": 2.2573911406390115e-05, "loss": 2.0519, "step": 11075500 }, { "epoch": 54.87, "learning_rate": 2.2572672819964032e-05, "loss": 2.0325, "step": 11076000 }, { "epoch": 54.88, "learning_rate": 2.257143423353795e-05, "loss": 2.046, "step": 11076500 }, { "epoch": 54.88, "learning_rate": 2.2570195647111866e-05, "loss": 2.0339, "step": 11077000 }, { "epoch": 54.88, "learning_rate": 2.256895706068578e-05, "loss": 2.0415, "step": 11077500 }, { "epoch": 54.88, "learning_rate": 2.2567718474259697e-05, "loss": 2.0174, "step": 11078000 }, { "epoch": 54.89, "learning_rate": 2.2566479887833614e-05, "loss": 2.0321, "step": 11078500 }, { "epoch": 54.89, "learning_rate": 2.256524130140753e-05, "loss": 2.0488, "step": 11079000 }, { "epoch": 54.89, "learning_rate": 2.2564002714981448e-05, "loss": 2.0524, "step": 11079500 }, { "epoch": 54.89, "learning_rate": 2.2562764128555364e-05, "loss": 2.0093, "step": 11080000 }, { "epoch": 54.9, "learning_rate": 2.256152554212928e-05, "loss": 2.0482, "step": 11080500 }, { "epoch": 54.9, "learning_rate": 2.25602869557032e-05, "loss": 2.0449, "step": 11081000 }, { "epoch": 54.9, "learning_rate": 2.2559048369277115e-05, "loss": 2.0521, "step": 11081500 }, { "epoch": 54.9, "learning_rate": 2.255780978285103e-05, "loss": 2.0604, "step": 11082000 }, { "epoch": 54.91, "learning_rate": 2.2556573673597798e-05, "loss": 2.0675, "step": 11082500 }, { "epoch": 54.91, "learning_rate": 2.2555337564344566e-05, "loss": 2.0228, "step": 11083000 }, { "epoch": 54.91, "learning_rate": 2.2554101455091335e-05, "loss": 2.0337, "step": 11083500 }, { "epoch": 54.91, "learning_rate": 2.2552862868665252e-05, "loss": 2.0479, "step": 11084000 }, { "epoch": 54.92, "learning_rate": 2.2551624282239166e-05, "loss": 2.0354, "step": 11084500 }, { "epoch": 54.92, "learning_rate": 2.2550385695813083e-05, "loss": 2.0311, "step": 11085000 }, { "epoch": 54.92, "learning_rate": 2.2549149586559855e-05, "loss": 2.0253, "step": 11085500 }, { "epoch": 54.92, "learning_rate": 2.254791100013377e-05, "loss": 2.0346, "step": 11086000 }, { "epoch": 54.93, "learning_rate": 2.2546672413707685e-05, "loss": 2.0268, "step": 11086500 }, { "epoch": 54.93, "learning_rate": 2.2545433827281602e-05, "loss": 2.03, "step": 11087000 }, { "epoch": 54.93, "learning_rate": 2.2544195240855516e-05, "loss": 2.0346, "step": 11087500 }, { "epoch": 54.93, "learning_rate": 2.2542956654429433e-05, "loss": 2.0132, "step": 11088000 }, { "epoch": 54.94, "learning_rate": 2.254171806800335e-05, "loss": 2.032, "step": 11088500 }, { "epoch": 54.94, "learning_rate": 2.2540479481577267e-05, "loss": 2.0462, "step": 11089000 }, { "epoch": 54.94, "learning_rate": 2.2539240895151183e-05, "loss": 2.0419, "step": 11089500 }, { "epoch": 54.94, "learning_rate": 2.2538002308725097e-05, "loss": 2.0508, "step": 11090000 }, { "epoch": 54.95, "learning_rate": 2.2536763722299014e-05, "loss": 2.0179, "step": 11090500 }, { "epoch": 54.95, "learning_rate": 2.253552513587293e-05, "loss": 2.0322, "step": 11091000 }, { "epoch": 54.95, "learning_rate": 2.2534286549446848e-05, "loss": 2.0322, "step": 11091500 }, { "epoch": 54.95, "learning_rate": 2.2533047963020765e-05, "loss": 2.0513, "step": 11092000 }, { "epoch": 54.96, "learning_rate": 2.2531811853767534e-05, "loss": 2.0313, "step": 11092500 }, { "epoch": 54.96, "learning_rate": 2.253057326734145e-05, "loss": 2.041, "step": 11093000 }, { "epoch": 54.96, "learning_rate": 2.2529334680915364e-05, "loss": 2.059, "step": 11093500 }, { "epoch": 54.96, "learning_rate": 2.252809609448928e-05, "loss": 2.0471, "step": 11094000 }, { "epoch": 54.97, "learning_rate": 2.2526857508063198e-05, "loss": 2.0225, "step": 11094500 }, { "epoch": 54.97, "learning_rate": 2.2525618921637115e-05, "loss": 2.0284, "step": 11095000 }, { "epoch": 54.97, "learning_rate": 2.2524382812383884e-05, "loss": 2.0238, "step": 11095500 }, { "epoch": 54.97, "learning_rate": 2.25231442259578e-05, "loss": 2.052, "step": 11096000 }, { "epoch": 54.98, "learning_rate": 2.2521905639531714e-05, "loss": 2.0415, "step": 11096500 }, { "epoch": 54.98, "learning_rate": 2.252066705310563e-05, "loss": 2.0393, "step": 11097000 }, { "epoch": 54.98, "learning_rate": 2.2519430943852403e-05, "loss": 2.0284, "step": 11097500 }, { "epoch": 54.98, "learning_rate": 2.2518192357426317e-05, "loss": 2.0203, "step": 11098000 }, { "epoch": 54.99, "learning_rate": 2.2516953771000234e-05, "loss": 2.0677, "step": 11098500 }, { "epoch": 54.99, "learning_rate": 2.251571518457415e-05, "loss": 2.0408, "step": 11099000 }, { "epoch": 54.99, "learning_rate": 2.2514476598148064e-05, "loss": 2.0378, "step": 11099500 }, { "epoch": 54.99, "learning_rate": 2.251323801172198e-05, "loss": 2.0298, "step": 11100000 }, { "epoch": 55.0, "learning_rate": 2.2512004379641605e-05, "loss": 2.0381, "step": 11100500 }, { "epoch": 55.0, "learning_rate": 2.251076827038837e-05, "loss": 2.033, "step": 11101000 }, { "epoch": 55.0, "eval_accuracy": 0.672224111965458, "eval_accuracy_mlm": 0.631029625043114, "eval_accuracy_nsp": 0.866629536513714, "eval_loss": 2.276235818862915, "eval_runtime": 147.1505, "eval_samples_per_second": 1732.641, "eval_steps_per_second": 72.198, "step": 11101365 }, { "epoch": 55.0, "learning_rate": 2.2509529683962287e-05, "loss": 2.0209, "step": 11101500 }, { "epoch": 55.0, "learning_rate": 2.2508291097536204e-05, "loss": 2.0102, "step": 11102000 }, { "epoch": 55.01, "learning_rate": 2.250705251111012e-05, "loss": 2.0122, "step": 11102500 }, { "epoch": 55.01, "learning_rate": 2.2505813924684038e-05, "loss": 2.0064, "step": 11103000 }, { "epoch": 55.01, "learning_rate": 2.2504575338257955e-05, "loss": 2.0262, "step": 11103500 }, { "epoch": 55.01, "learning_rate": 2.2503336751831872e-05, "loss": 2.0118, "step": 11104000 }, { "epoch": 55.02, "learning_rate": 2.2502098165405786e-05, "loss": 2.0347, "step": 11104500 }, { "epoch": 55.02, "learning_rate": 2.2500859578979703e-05, "loss": 2.0186, "step": 11105000 }, { "epoch": 55.02, "learning_rate": 2.249962099255362e-05, "loss": 2.0271, "step": 11105500 }, { "epoch": 55.02, "learning_rate": 2.2498382406127536e-05, "loss": 2.0215, "step": 11106000 }, { "epoch": 55.03, "learning_rate": 2.2497143819701453e-05, "loss": 1.9975, "step": 11106500 }, { "epoch": 55.03, "learning_rate": 2.249590523327537e-05, "loss": 2.0182, "step": 11107000 }, { "epoch": 55.03, "learning_rate": 2.2494666646849287e-05, "loss": 2.0198, "step": 11107500 }, { "epoch": 55.03, "learning_rate": 2.24934280604232e-05, "loss": 2.0215, "step": 11108000 }, { "epoch": 55.04, "learning_rate": 2.2492189473997118e-05, "loss": 1.9976, "step": 11108500 }, { "epoch": 55.04, "learning_rate": 2.249095088757103e-05, "loss": 1.9886, "step": 11109000 }, { "epoch": 55.04, "learning_rate": 2.2489714778317803e-05, "loss": 2.0251, "step": 11109500 }, { "epoch": 55.04, "learning_rate": 2.248847619189172e-05, "loss": 2.0471, "step": 11110000 }, { "epoch": 55.05, "learning_rate": 2.2487237605465637e-05, "loss": 2.0077, "step": 11110500 }, { "epoch": 55.05, "learning_rate": 2.2485999019039554e-05, "loss": 1.9942, "step": 11111000 }, { "epoch": 55.05, "learning_rate": 2.248476290978632e-05, "loss": 2.0062, "step": 11111500 }, { "epoch": 55.05, "learning_rate": 2.2483524323360237e-05, "loss": 2.0184, "step": 11112000 }, { "epoch": 55.06, "learning_rate": 2.2482285736934153e-05, "loss": 2.0015, "step": 11112500 }, { "epoch": 55.06, "learning_rate": 2.248104715050807e-05, "loss": 2.0269, "step": 11113000 }, { "epoch": 55.06, "learning_rate": 2.2479808564081987e-05, "loss": 2.0024, "step": 11113500 }, { "epoch": 55.06, "learning_rate": 2.2478569977655904e-05, "loss": 2.0099, "step": 11114000 }, { "epoch": 55.07, "learning_rate": 2.247733386840267e-05, "loss": 2.0131, "step": 11114500 }, { "epoch": 55.07, "learning_rate": 2.2476095281976587e-05, "loss": 2.019, "step": 11115000 }, { "epoch": 55.07, "learning_rate": 2.2474859172723355e-05, "loss": 2.0488, "step": 11115500 }, { "epoch": 55.07, "learning_rate": 2.2473620586297272e-05, "loss": 2.0335, "step": 11116000 }, { "epoch": 55.07, "learning_rate": 2.247238199987119e-05, "loss": 2.046, "step": 11116500 }, { "epoch": 55.08, "learning_rate": 2.2471143413445103e-05, "loss": 2.0297, "step": 11117000 }, { "epoch": 55.08, "learning_rate": 2.246990482701902e-05, "loss": 2.0144, "step": 11117500 }, { "epoch": 55.08, "learning_rate": 2.2468666240592937e-05, "loss": 1.9958, "step": 11118000 }, { "epoch": 55.08, "learning_rate": 2.2467427654166854e-05, "loss": 2.0267, "step": 11118500 }, { "epoch": 55.09, "learning_rate": 2.2466191544913622e-05, "loss": 2.0259, "step": 11119000 }, { "epoch": 55.09, "learning_rate": 2.246495543566039e-05, "loss": 1.9944, "step": 11119500 }, { "epoch": 55.09, "learning_rate": 2.2463716849234305e-05, "loss": 2.0134, "step": 11120000 }, { "epoch": 55.09, "learning_rate": 2.246247826280822e-05, "loss": 2.0241, "step": 11120500 }, { "epoch": 55.1, "learning_rate": 2.246123967638214e-05, "loss": 2.056, "step": 11121000 }, { "epoch": 55.1, "learning_rate": 2.2460001089956056e-05, "loss": 2.0322, "step": 11121500 }, { "epoch": 55.1, "learning_rate": 2.2458762503529972e-05, "loss": 1.9998, "step": 11122000 }, { "epoch": 55.1, "learning_rate": 2.245752391710389e-05, "loss": 2.0271, "step": 11122500 }, { "epoch": 55.11, "learning_rate": 2.2456287807850658e-05, "loss": 2.0191, "step": 11123000 }, { "epoch": 55.11, "learning_rate": 2.2455051698597427e-05, "loss": 2.0404, "step": 11123500 }, { "epoch": 55.11, "learning_rate": 2.2453813112171344e-05, "loss": 1.9933, "step": 11124000 }, { "epoch": 55.11, "learning_rate": 2.245257452574526e-05, "loss": 2.011, "step": 11124500 }, { "epoch": 55.12, "learning_rate": 2.2451335939319174e-05, "loss": 2.0226, "step": 11125000 }, { "epoch": 55.12, "learning_rate": 2.245009735289309e-05, "loss": 2.043, "step": 11125500 }, { "epoch": 55.12, "learning_rate": 2.2448858766467008e-05, "loss": 2.0354, "step": 11126000 }, { "epoch": 55.12, "learning_rate": 2.2447620180040925e-05, "loss": 2.0401, "step": 11126500 }, { "epoch": 55.13, "learning_rate": 2.244638159361484e-05, "loss": 2.0249, "step": 11127000 }, { "epoch": 55.13, "learning_rate": 2.2445143007188756e-05, "loss": 2.035, "step": 11127500 }, { "epoch": 55.13, "learning_rate": 2.2443904420762673e-05, "loss": 2.0256, "step": 11128000 }, { "epoch": 55.13, "learning_rate": 2.244266583433659e-05, "loss": 2.0412, "step": 11128500 }, { "epoch": 55.14, "learning_rate": 2.2441427247910506e-05, "loss": 2.008, "step": 11129000 }, { "epoch": 55.14, "learning_rate": 2.244018866148442e-05, "loss": 1.9968, "step": 11129500 }, { "epoch": 55.14, "learning_rate": 2.2438952552231192e-05, "loss": 2.0254, "step": 11130000 }, { "epoch": 55.14, "learning_rate": 2.2437713965805106e-05, "loss": 2.0295, "step": 11130500 }, { "epoch": 55.15, "learning_rate": 2.2436475379379023e-05, "loss": 2.033, "step": 11131000 }, { "epoch": 55.15, "learning_rate": 2.243523679295294e-05, "loss": 2.0461, "step": 11131500 }, { "epoch": 55.15, "learning_rate": 2.2433998206526857e-05, "loss": 2.0205, "step": 11132000 }, { "epoch": 55.15, "learning_rate": 2.243275962010077e-05, "loss": 2.0333, "step": 11132500 }, { "epoch": 55.16, "learning_rate": 2.2431523510847542e-05, "loss": 2.0239, "step": 11133000 }, { "epoch": 55.16, "learning_rate": 2.2430284924421456e-05, "loss": 2.0262, "step": 11133500 }, { "epoch": 55.16, "learning_rate": 2.2429046337995373e-05, "loss": 2.0289, "step": 11134000 }, { "epoch": 55.16, "learning_rate": 2.242780775156929e-05, "loss": 2.0213, "step": 11134500 }, { "epoch": 55.17, "learning_rate": 2.2426569165143207e-05, "loss": 2.0104, "step": 11135000 }, { "epoch": 55.17, "learning_rate": 2.2425330578717124e-05, "loss": 2.0092, "step": 11135500 }, { "epoch": 55.17, "learning_rate": 2.2424091992291037e-05, "loss": 2.0337, "step": 11136000 }, { "epoch": 55.17, "learning_rate": 2.2422853405864954e-05, "loss": 2.0144, "step": 11136500 }, { "epoch": 55.18, "learning_rate": 2.242161481943887e-05, "loss": 2.0168, "step": 11137000 }, { "epoch": 55.18, "learning_rate": 2.2420376233012788e-05, "loss": 2.0315, "step": 11137500 }, { "epoch": 55.18, "learning_rate": 2.2419140123759557e-05, "loss": 2.0293, "step": 11138000 }, { "epoch": 55.18, "learning_rate": 2.2417901537333474e-05, "loss": 2.0135, "step": 11138500 }, { "epoch": 55.19, "learning_rate": 2.2416662950907387e-05, "loss": 2.0205, "step": 11139000 }, { "epoch": 55.19, "learning_rate": 2.2415424364481304e-05, "loss": 2.0124, "step": 11139500 }, { "epoch": 55.19, "learning_rate": 2.241418577805522e-05, "loss": 2.0355, "step": 11140000 }, { "epoch": 55.19, "learning_rate": 2.2412947191629138e-05, "loss": 2.0062, "step": 11140500 }, { "epoch": 55.2, "learning_rate": 2.2411708605203055e-05, "loss": 1.9863, "step": 11141000 }, { "epoch": 55.2, "learning_rate": 2.2410470018776972e-05, "loss": 2.0103, "step": 11141500 }, { "epoch": 55.2, "learning_rate": 2.2409233909523737e-05, "loss": 2.0173, "step": 11142000 }, { "epoch": 55.2, "learning_rate": 2.2407995323097654e-05, "loss": 2.0201, "step": 11142500 }, { "epoch": 55.21, "learning_rate": 2.240675673667157e-05, "loss": 2.0333, "step": 11143000 }, { "epoch": 55.21, "learning_rate": 2.2405518150245488e-05, "loss": 2.0143, "step": 11143500 }, { "epoch": 55.21, "learning_rate": 2.2404279563819405e-05, "loss": 2.0304, "step": 11144000 }, { "epoch": 55.21, "learning_rate": 2.2403043454566174e-05, "loss": 2.0122, "step": 11144500 }, { "epoch": 55.22, "learning_rate": 2.2401804868140087e-05, "loss": 2.0295, "step": 11145000 }, { "epoch": 55.22, "learning_rate": 2.2400566281714004e-05, "loss": 2.002, "step": 11145500 }, { "epoch": 55.22, "learning_rate": 2.239932769528792e-05, "loss": 2.0181, "step": 11146000 }, { "epoch": 55.22, "learning_rate": 2.2398089108861838e-05, "loss": 2.0223, "step": 11146500 }, { "epoch": 55.23, "learning_rate": 2.2396852999608607e-05, "loss": 2.0289, "step": 11147000 }, { "epoch": 55.23, "learning_rate": 2.2395616890355376e-05, "loss": 2.0349, "step": 11147500 }, { "epoch": 55.23, "learning_rate": 2.2394378303929293e-05, "loss": 2.0568, "step": 11148000 }, { "epoch": 55.23, "learning_rate": 2.239313971750321e-05, "loss": 2.0181, "step": 11148500 }, { "epoch": 55.24, "learning_rate": 2.2391901131077126e-05, "loss": 2.0107, "step": 11149000 }, { "epoch": 55.24, "learning_rate": 2.2390665021823895e-05, "loss": 1.997, "step": 11149500 }, { "epoch": 55.24, "learning_rate": 2.238942643539781e-05, "loss": 2.0257, "step": 11150000 }, { "epoch": 55.24, "learning_rate": 2.2388187848971726e-05, "loss": 2.0276, "step": 11150500 }, { "epoch": 55.25, "learning_rate": 2.2386949262545643e-05, "loss": 2.0009, "step": 11151000 }, { "epoch": 55.25, "learning_rate": 2.238571067611956e-05, "loss": 2.0255, "step": 11151500 }, { "epoch": 55.25, "learning_rate": 2.2384472089693476e-05, "loss": 2.0144, "step": 11152000 }, { "epoch": 55.25, "learning_rate": 2.2383233503267393e-05, "loss": 2.0302, "step": 11152500 }, { "epoch": 55.26, "learning_rate": 2.2381997394014162e-05, "loss": 2.0304, "step": 11153000 }, { "epoch": 55.26, "learning_rate": 2.2380758807588076e-05, "loss": 2.0293, "step": 11153500 }, { "epoch": 55.26, "learning_rate": 2.2379520221161993e-05, "loss": 2.0247, "step": 11154000 }, { "epoch": 55.26, "learning_rate": 2.237828163473591e-05, "loss": 2.0237, "step": 11154500 }, { "epoch": 55.27, "learning_rate": 2.2377043048309827e-05, "loss": 2.0147, "step": 11155000 }, { "epoch": 55.27, "learning_rate": 2.2375804461883743e-05, "loss": 2.0317, "step": 11155500 }, { "epoch": 55.27, "learning_rate": 2.237456587545766e-05, "loss": 2.0295, "step": 11156000 }, { "epoch": 55.27, "learning_rate": 2.2373329766204426e-05, "loss": 2.0203, "step": 11156500 }, { "epoch": 55.28, "learning_rate": 2.2372091179778343e-05, "loss": 2.027, "step": 11157000 }, { "epoch": 55.28, "learning_rate": 2.237085259335226e-05, "loss": 2.0061, "step": 11157500 }, { "epoch": 55.28, "learning_rate": 2.236961648409903e-05, "loss": 2.0299, "step": 11158000 }, { "epoch": 55.28, "learning_rate": 2.2368377897672945e-05, "loss": 2.0324, "step": 11158500 }, { "epoch": 55.29, "learning_rate": 2.2367139311246862e-05, "loss": 2.0288, "step": 11159000 }, { "epoch": 55.29, "learning_rate": 2.2365903201993628e-05, "loss": 2.028, "step": 11159500 }, { "epoch": 55.29, "learning_rate": 2.2364664615567545e-05, "loss": 2.0262, "step": 11160000 }, { "epoch": 55.29, "learning_rate": 2.236342602914146e-05, "loss": 2.012, "step": 11160500 }, { "epoch": 55.3, "learning_rate": 2.236218744271538e-05, "loss": 2.0426, "step": 11161000 }, { "epoch": 55.3, "learning_rate": 2.2360948856289295e-05, "loss": 2.0202, "step": 11161500 }, { "epoch": 55.3, "learning_rate": 2.2359710269863212e-05, "loss": 2.0327, "step": 11162000 }, { "epoch": 55.3, "learning_rate": 2.2358471683437126e-05, "loss": 2.019, "step": 11162500 }, { "epoch": 55.31, "learning_rate": 2.2357233097011043e-05, "loss": 2.0197, "step": 11163000 }, { "epoch": 55.31, "learning_rate": 2.235599451058496e-05, "loss": 2.0314, "step": 11163500 }, { "epoch": 55.31, "learning_rate": 2.235475840133173e-05, "loss": 2.0163, "step": 11164000 }, { "epoch": 55.31, "learning_rate": 2.2353519814905646e-05, "loss": 2.0203, "step": 11164500 }, { "epoch": 55.32, "learning_rate": 2.2352281228479562e-05, "loss": 2.0092, "step": 11165000 }, { "epoch": 55.32, "learning_rate": 2.235104264205348e-05, "loss": 2.0022, "step": 11165500 }, { "epoch": 55.32, "learning_rate": 2.2349804055627393e-05, "loss": 2.0105, "step": 11166000 }, { "epoch": 55.32, "learning_rate": 2.234856546920131e-05, "loss": 1.9927, "step": 11166500 }, { "epoch": 55.33, "learning_rate": 2.2347326882775227e-05, "loss": 2.0257, "step": 11167000 }, { "epoch": 55.33, "learning_rate": 2.2346088296349144e-05, "loss": 2.0348, "step": 11167500 }, { "epoch": 55.33, "learning_rate": 2.234484970992306e-05, "loss": 2.0045, "step": 11168000 }, { "epoch": 55.33, "learning_rate": 2.2343611123496978e-05, "loss": 2.0104, "step": 11168500 }, { "epoch": 55.34, "learning_rate": 2.2342372537070895e-05, "loss": 2.0035, "step": 11169000 }, { "epoch": 55.34, "learning_rate": 2.234113395064481e-05, "loss": 2.0434, "step": 11169500 }, { "epoch": 55.34, "learning_rate": 2.2339895364218725e-05, "loss": 2.0255, "step": 11170000 }, { "epoch": 55.34, "learning_rate": 2.2338656777792642e-05, "loss": 2.027, "step": 11170500 }, { "epoch": 55.34, "learning_rate": 2.233742066853941e-05, "loss": 2.027, "step": 11171000 }, { "epoch": 55.35, "learning_rate": 2.2336182082113328e-05, "loss": 2.0215, "step": 11171500 }, { "epoch": 55.35, "learning_rate": 2.2334943495687245e-05, "loss": 2.0489, "step": 11172000 }, { "epoch": 55.35, "learning_rate": 2.233370490926116e-05, "loss": 1.9986, "step": 11172500 }, { "epoch": 55.35, "learning_rate": 2.233246632283508e-05, "loss": 2.0157, "step": 11173000 }, { "epoch": 55.36, "learning_rate": 2.2331230213581844e-05, "loss": 2.018, "step": 11173500 }, { "epoch": 55.36, "learning_rate": 2.232999162715576e-05, "loss": 2.0191, "step": 11174000 }, { "epoch": 55.36, "learning_rate": 2.2328753040729678e-05, "loss": 2.0211, "step": 11174500 }, { "epoch": 55.36, "learning_rate": 2.2327514454303595e-05, "loss": 2.0106, "step": 11175000 }, { "epoch": 55.37, "learning_rate": 2.232627586787751e-05, "loss": 2.0167, "step": 11175500 }, { "epoch": 55.37, "learning_rate": 2.232503728145143e-05, "loss": 2.0501, "step": 11176000 }, { "epoch": 55.37, "learning_rate": 2.2323798695025342e-05, "loss": 2.0382, "step": 11176500 }, { "epoch": 55.37, "learning_rate": 2.232256010859926e-05, "loss": 2.0263, "step": 11177000 }, { "epoch": 55.38, "learning_rate": 2.2321321522173176e-05, "loss": 2.035, "step": 11177500 }, { "epoch": 55.38, "learning_rate": 2.2320085412919945e-05, "loss": 2.0142, "step": 11178000 }, { "epoch": 55.38, "learning_rate": 2.231884930366671e-05, "loss": 2.0303, "step": 11178500 }, { "epoch": 55.38, "learning_rate": 2.2317610717240627e-05, "loss": 2.0149, "step": 11179000 }, { "epoch": 55.39, "learning_rate": 2.2316372130814544e-05, "loss": 2.0216, "step": 11179500 }, { "epoch": 55.39, "learning_rate": 2.231513354438846e-05, "loss": 2.0332, "step": 11180000 }, { "epoch": 55.39, "learning_rate": 2.2313894957962378e-05, "loss": 2.0332, "step": 11180500 }, { "epoch": 55.39, "learning_rate": 2.2312656371536295e-05, "loss": 2.0205, "step": 11181000 }, { "epoch": 55.4, "learning_rate": 2.231141778511021e-05, "loss": 2.0099, "step": 11181500 }, { "epoch": 55.4, "learning_rate": 2.231017919868413e-05, "loss": 2.0088, "step": 11182000 }, { "epoch": 55.4, "learning_rate": 2.2308940612258042e-05, "loss": 2.0184, "step": 11182500 }, { "epoch": 55.4, "learning_rate": 2.230770450300481e-05, "loss": 2.0211, "step": 11183000 }, { "epoch": 55.41, "learning_rate": 2.2306465916578728e-05, "loss": 2.0301, "step": 11183500 }, { "epoch": 55.41, "learning_rate": 2.2305227330152645e-05, "loss": 2.0286, "step": 11184000 }, { "epoch": 55.41, "learning_rate": 2.2303993698072266e-05, "loss": 2.0241, "step": 11184500 }, { "epoch": 55.41, "learning_rate": 2.2302755111646182e-05, "loss": 2.0186, "step": 11185000 }, { "epoch": 55.42, "learning_rate": 2.23015165252201e-05, "loss": 2.0245, "step": 11185500 }, { "epoch": 55.42, "learning_rate": 2.2300280415966868e-05, "loss": 2.0154, "step": 11186000 }, { "epoch": 55.42, "learning_rate": 2.2299041829540782e-05, "loss": 2.0177, "step": 11186500 }, { "epoch": 55.42, "learning_rate": 2.22978032431147e-05, "loss": 2.0242, "step": 11187000 }, { "epoch": 55.43, "learning_rate": 2.2296564656688616e-05, "loss": 2.0018, "step": 11187500 }, { "epoch": 55.43, "learning_rate": 2.2295326070262532e-05, "loss": 2.0478, "step": 11188000 }, { "epoch": 55.43, "learning_rate": 2.22940899610093e-05, "loss": 2.031, "step": 11188500 }, { "epoch": 55.43, "learning_rate": 2.2292851374583218e-05, "loss": 2.0333, "step": 11189000 }, { "epoch": 55.44, "learning_rate": 2.2291612788157132e-05, "loss": 2.0329, "step": 11189500 }, { "epoch": 55.44, "learning_rate": 2.229037420173105e-05, "loss": 2.032, "step": 11190000 }, { "epoch": 55.44, "learning_rate": 2.2289138092477818e-05, "loss": 2.0295, "step": 11190500 }, { "epoch": 55.44, "learning_rate": 2.2287899506051734e-05, "loss": 2.0348, "step": 11191000 }, { "epoch": 55.45, "learning_rate": 2.228666091962565e-05, "loss": 2.0011, "step": 11191500 }, { "epoch": 55.45, "learning_rate": 2.2285422333199568e-05, "loss": 1.9925, "step": 11192000 }, { "epoch": 55.45, "learning_rate": 2.2284183746773482e-05, "loss": 2.0142, "step": 11192500 }, { "epoch": 55.45, "learning_rate": 2.22829451603474e-05, "loss": 2.037, "step": 11193000 }, { "epoch": 55.46, "learning_rate": 2.2281706573921316e-05, "loss": 2.0403, "step": 11193500 }, { "epoch": 55.46, "learning_rate": 2.2280467987495233e-05, "loss": 2.0413, "step": 11194000 }, { "epoch": 55.46, "learning_rate": 2.2279231878242e-05, "loss": 2.0374, "step": 11194500 }, { "epoch": 55.46, "learning_rate": 2.227799576898877e-05, "loss": 2.0109, "step": 11195000 }, { "epoch": 55.47, "learning_rate": 2.2276757182562684e-05, "loss": 2.0395, "step": 11195500 }, { "epoch": 55.47, "learning_rate": 2.22755185961366e-05, "loss": 2.0274, "step": 11196000 }, { "epoch": 55.47, "learning_rate": 2.2274280009710518e-05, "loss": 2.013, "step": 11196500 }, { "epoch": 55.47, "learning_rate": 2.2273041423284435e-05, "loss": 2.0045, "step": 11197000 }, { "epoch": 55.48, "learning_rate": 2.227180283685835e-05, "loss": 2.0283, "step": 11197500 }, { "epoch": 55.48, "learning_rate": 2.227056425043227e-05, "loss": 2.0173, "step": 11198000 }, { "epoch": 55.48, "learning_rate": 2.2269325664006185e-05, "loss": 2.0388, "step": 11198500 }, { "epoch": 55.48, "learning_rate": 2.22680870775801e-05, "loss": 2.0439, "step": 11199000 }, { "epoch": 55.49, "learning_rate": 2.2266848491154016e-05, "loss": 2.021, "step": 11199500 }, { "epoch": 55.49, "learning_rate": 2.2265609904727933e-05, "loss": 2.015, "step": 11200000 }, { "epoch": 55.49, "learning_rate": 2.22643737954747e-05, "loss": 2.0181, "step": 11200500 }, { "epoch": 55.49, "learning_rate": 2.226313520904862e-05, "loss": 2.0131, "step": 11201000 }, { "epoch": 55.5, "learning_rate": 2.2261896622622535e-05, "loss": 1.9992, "step": 11201500 }, { "epoch": 55.5, "learning_rate": 2.226065803619645e-05, "loss": 2.0293, "step": 11202000 }, { "epoch": 55.5, "learning_rate": 2.2259419449770366e-05, "loss": 2.0294, "step": 11202500 }, { "epoch": 55.5, "learning_rate": 2.2258180863344283e-05, "loss": 1.9866, "step": 11203000 }, { "epoch": 55.51, "learning_rate": 2.22569422769182e-05, "loss": 2.0492, "step": 11203500 }, { "epoch": 55.51, "learning_rate": 2.2255703690492117e-05, "loss": 2.0323, "step": 11204000 }, { "epoch": 55.51, "learning_rate": 2.2254470058411737e-05, "loss": 2.0321, "step": 11204500 }, { "epoch": 55.51, "learning_rate": 2.2253231471985654e-05, "loss": 2.0258, "step": 11205000 }, { "epoch": 55.52, "learning_rate": 2.2251992885559568e-05, "loss": 2.0293, "step": 11205500 }, { "epoch": 55.52, "learning_rate": 2.225075677630634e-05, "loss": 2.0442, "step": 11206000 }, { "epoch": 55.52, "learning_rate": 2.2249520667053105e-05, "loss": 2.0226, "step": 11206500 }, { "epoch": 55.52, "learning_rate": 2.2248282080627022e-05, "loss": 2.0461, "step": 11207000 }, { "epoch": 55.53, "learning_rate": 2.224704349420094e-05, "loss": 2.0134, "step": 11207500 }, { "epoch": 55.53, "learning_rate": 2.2245804907774856e-05, "loss": 2.0409, "step": 11208000 }, { "epoch": 55.53, "learning_rate": 2.2244566321348773e-05, "loss": 2.0168, "step": 11208500 }, { "epoch": 55.53, "learning_rate": 2.224332773492269e-05, "loss": 2.0207, "step": 11209000 }, { "epoch": 55.54, "learning_rate": 2.2242089148496607e-05, "loss": 2.029, "step": 11209500 }, { "epoch": 55.54, "learning_rate": 2.224085056207052e-05, "loss": 2.0277, "step": 11210000 }, { "epoch": 55.54, "learning_rate": 2.2239611975644437e-05, "loss": 2.017, "step": 11210500 }, { "epoch": 55.54, "learning_rate": 2.2238373389218354e-05, "loss": 2.0073, "step": 11211000 }, { "epoch": 55.55, "learning_rate": 2.223713480279227e-05, "loss": 2.0269, "step": 11211500 }, { "epoch": 55.55, "learning_rate": 2.2235896216366188e-05, "loss": 2.0334, "step": 11212000 }, { "epoch": 55.55, "learning_rate": 2.2234657629940102e-05, "loss": 2.0082, "step": 11212500 }, { "epoch": 55.55, "learning_rate": 2.2233421520686874e-05, "loss": 2.0447, "step": 11213000 }, { "epoch": 55.56, "learning_rate": 2.2232182934260788e-05, "loss": 2.0049, "step": 11213500 }, { "epoch": 55.56, "learning_rate": 2.2230944347834704e-05, "loss": 2.0307, "step": 11214000 }, { "epoch": 55.56, "learning_rate": 2.222970576140862e-05, "loss": 2.024, "step": 11214500 }, { "epoch": 55.56, "learning_rate": 2.222846717498254e-05, "loss": 2.0368, "step": 11215000 }, { "epoch": 55.57, "learning_rate": 2.2227228588556452e-05, "loss": 2.029, "step": 11215500 }, { "epoch": 55.57, "learning_rate": 2.222599000213037e-05, "loss": 2.0166, "step": 11216000 }, { "epoch": 55.57, "learning_rate": 2.2224751415704286e-05, "loss": 1.9756, "step": 11216500 }, { "epoch": 55.57, "learning_rate": 2.2223512829278203e-05, "loss": 2.0397, "step": 11217000 }, { "epoch": 55.58, "learning_rate": 2.2222274242852116e-05, "loss": 2.0266, "step": 11217500 }, { "epoch": 55.58, "learning_rate": 2.222103813359889e-05, "loss": 2.0264, "step": 11218000 }, { "epoch": 55.58, "learning_rate": 2.2219799547172805e-05, "loss": 2.0119, "step": 11218500 }, { "epoch": 55.58, "learning_rate": 2.221856096074672e-05, "loss": 2.02, "step": 11219000 }, { "epoch": 55.59, "learning_rate": 2.2217322374320636e-05, "loss": 2.0303, "step": 11219500 }, { "epoch": 55.59, "learning_rate": 2.2216086265067405e-05, "loss": 1.9995, "step": 11220000 }, { "epoch": 55.59, "learning_rate": 2.221484767864132e-05, "loss": 2.0067, "step": 11220500 }, { "epoch": 55.59, "learning_rate": 2.221360909221524e-05, "loss": 2.0236, "step": 11221000 }, { "epoch": 55.6, "learning_rate": 2.2212370505789155e-05, "loss": 2.0226, "step": 11221500 }, { "epoch": 55.6, "learning_rate": 2.2211131919363072e-05, "loss": 2.0473, "step": 11222000 }, { "epoch": 55.6, "learning_rate": 2.2209893332936986e-05, "loss": 2.0071, "step": 11222500 }, { "epoch": 55.6, "learning_rate": 2.2208654746510903e-05, "loss": 2.0652, "step": 11223000 }, { "epoch": 55.61, "learning_rate": 2.220741616008482e-05, "loss": 2.0343, "step": 11223500 }, { "epoch": 55.61, "learning_rate": 2.2206177573658733e-05, "loss": 2.0454, "step": 11224000 }, { "epoch": 55.61, "learning_rate": 2.220493898723265e-05, "loss": 2.0117, "step": 11224500 }, { "epoch": 55.61, "learning_rate": 2.2203700400806567e-05, "loss": 2.0372, "step": 11225000 }, { "epoch": 55.62, "learning_rate": 2.2202461814380484e-05, "loss": 2.0315, "step": 11225500 }, { "epoch": 55.62, "learning_rate": 2.22012232279544e-05, "loss": 2.0235, "step": 11226000 }, { "epoch": 55.62, "learning_rate": 2.219998711870117e-05, "loss": 2.0223, "step": 11226500 }, { "epoch": 55.62, "learning_rate": 2.2198748532275083e-05, "loss": 2.0324, "step": 11227000 }, { "epoch": 55.62, "learning_rate": 2.2197509945849e-05, "loss": 2.0394, "step": 11227500 }, { "epoch": 55.63, "learning_rate": 2.2196271359422917e-05, "loss": 2.0106, "step": 11228000 }, { "epoch": 55.63, "learning_rate": 2.2195032772996834e-05, "loss": 2.0192, "step": 11228500 }, { "epoch": 55.63, "learning_rate": 2.219379418657075e-05, "loss": 2.0619, "step": 11229000 }, { "epoch": 55.63, "learning_rate": 2.219255807731752e-05, "loss": 2.0161, "step": 11229500 }, { "epoch": 55.64, "learning_rate": 2.2191319490891437e-05, "loss": 2.0225, "step": 11230000 }, { "epoch": 55.64, "learning_rate": 2.2190083381638206e-05, "loss": 2.0436, "step": 11230500 }, { "epoch": 55.64, "learning_rate": 2.2188844795212122e-05, "loss": 2.0207, "step": 11231000 }, { "epoch": 55.64, "learning_rate": 2.218760620878604e-05, "loss": 2.0443, "step": 11231500 }, { "epoch": 55.65, "learning_rate": 2.2186367622359956e-05, "loss": 2.0152, "step": 11232000 }, { "epoch": 55.65, "learning_rate": 2.218512903593387e-05, "loss": 2.043, "step": 11232500 }, { "epoch": 55.65, "learning_rate": 2.2183890449507787e-05, "loss": 1.9976, "step": 11233000 }, { "epoch": 55.65, "learning_rate": 2.2182651863081704e-05, "loss": 2.0162, "step": 11233500 }, { "epoch": 55.66, "learning_rate": 2.2181413276655617e-05, "loss": 2.0363, "step": 11234000 }, { "epoch": 55.66, "learning_rate": 2.2180174690229534e-05, "loss": 2.034, "step": 11234500 }, { "epoch": 55.66, "learning_rate": 2.2178938580976306e-05, "loss": 2.0462, "step": 11235000 }, { "epoch": 55.66, "learning_rate": 2.2177702471723072e-05, "loss": 2.0261, "step": 11235500 }, { "epoch": 55.67, "learning_rate": 2.217646388529699e-05, "loss": 2.0021, "step": 11236000 }, { "epoch": 55.67, "learning_rate": 2.2175225298870906e-05, "loss": 2.0523, "step": 11236500 }, { "epoch": 55.67, "learning_rate": 2.2173986712444823e-05, "loss": 2.0542, "step": 11237000 }, { "epoch": 55.67, "learning_rate": 2.217274812601874e-05, "loss": 2.0082, "step": 11237500 }, { "epoch": 55.68, "learning_rate": 2.2171509539592656e-05, "loss": 2.0365, "step": 11238000 }, { "epoch": 55.68, "learning_rate": 2.2170270953166573e-05, "loss": 2.0466, "step": 11238500 }, { "epoch": 55.68, "learning_rate": 2.216903236674049e-05, "loss": 2.0195, "step": 11239000 }, { "epoch": 55.68, "learning_rate": 2.2167793780314404e-05, "loss": 2.0189, "step": 11239500 }, { "epoch": 55.69, "learning_rate": 2.2166557671061173e-05, "loss": 2.0072, "step": 11240000 }, { "epoch": 55.69, "learning_rate": 2.216532156180794e-05, "loss": 2.045, "step": 11240500 }, { "epoch": 55.69, "learning_rate": 2.2164087929727562e-05, "loss": 2.038, "step": 11241000 }, { "epoch": 55.69, "learning_rate": 2.216284934330148e-05, "loss": 2.0109, "step": 11241500 }, { "epoch": 55.7, "learning_rate": 2.2161613234048245e-05, "loss": 2.0275, "step": 11242000 }, { "epoch": 55.7, "learning_rate": 2.216037464762216e-05, "loss": 2.0323, "step": 11242500 }, { "epoch": 55.7, "learning_rate": 2.215913606119608e-05, "loss": 2.0141, "step": 11243000 }, { "epoch": 55.7, "learning_rate": 2.2157897474769995e-05, "loss": 2.0298, "step": 11243500 }, { "epoch": 55.71, "learning_rate": 2.2156658888343912e-05, "loss": 2.0184, "step": 11244000 }, { "epoch": 55.71, "learning_rate": 2.215542030191783e-05, "loss": 2.0504, "step": 11244500 }, { "epoch": 55.71, "learning_rate": 2.2154181715491746e-05, "loss": 2.0206, "step": 11245000 }, { "epoch": 55.71, "learning_rate": 2.2152943129065663e-05, "loss": 2.0342, "step": 11245500 }, { "epoch": 55.72, "learning_rate": 2.215170454263958e-05, "loss": 2.013, "step": 11246000 }, { "epoch": 55.72, "learning_rate": 2.2150465956213493e-05, "loss": 2.0525, "step": 11246500 }, { "epoch": 55.72, "learning_rate": 2.214922736978741e-05, "loss": 2.031, "step": 11247000 }, { "epoch": 55.72, "learning_rate": 2.2147988783361327e-05, "loss": 1.9935, "step": 11247500 }, { "epoch": 55.73, "learning_rate": 2.214675019693524e-05, "loss": 2.0339, "step": 11248000 }, { "epoch": 55.73, "learning_rate": 2.2145511610509158e-05, "loss": 2.0395, "step": 11248500 }, { "epoch": 55.73, "learning_rate": 2.2144273024083075e-05, "loss": 2.0219, "step": 11249000 }, { "epoch": 55.73, "learning_rate": 2.214303443765699e-05, "loss": 2.0017, "step": 11249500 }, { "epoch": 55.74, "learning_rate": 2.214179585123091e-05, "loss": 2.0363, "step": 11250000 }, { "epoch": 55.74, "learning_rate": 2.2140559741977677e-05, "loss": 2.0408, "step": 11250500 }, { "epoch": 55.74, "learning_rate": 2.2139321155551594e-05, "loss": 2.0321, "step": 11251000 }, { "epoch": 55.74, "learning_rate": 2.2138082569125508e-05, "loss": 2.0437, "step": 11251500 }, { "epoch": 55.75, "learning_rate": 2.2136843982699425e-05, "loss": 2.0256, "step": 11252000 }, { "epoch": 55.75, "learning_rate": 2.2135605396273342e-05, "loss": 2.0273, "step": 11252500 }, { "epoch": 55.75, "learning_rate": 2.213436680984726e-05, "loss": 2.0322, "step": 11253000 }, { "epoch": 55.75, "learning_rate": 2.2133128223421176e-05, "loss": 2.0235, "step": 11253500 }, { "epoch": 55.76, "learning_rate": 2.2131892114167944e-05, "loss": 2.0465, "step": 11254000 }, { "epoch": 55.76, "learning_rate": 2.213065352774186e-05, "loss": 2.0262, "step": 11254500 }, { "epoch": 55.76, "learning_rate": 2.2129414941315775e-05, "loss": 2.0294, "step": 11255000 }, { "epoch": 55.76, "learning_rate": 2.2128176354889692e-05, "loss": 2.0191, "step": 11255500 }, { "epoch": 55.77, "learning_rate": 2.212694024563646e-05, "loss": 2.0059, "step": 11256000 }, { "epoch": 55.77, "learning_rate": 2.2125701659210378e-05, "loss": 2.0242, "step": 11256500 }, { "epoch": 55.77, "learning_rate": 2.2124463072784294e-05, "loss": 2.0589, "step": 11257000 }, { "epoch": 55.77, "learning_rate": 2.212322448635821e-05, "loss": 2.0187, "step": 11257500 }, { "epoch": 55.78, "learning_rate": 2.212198589993213e-05, "loss": 2.0276, "step": 11258000 }, { "epoch": 55.78, "learning_rate": 2.2120747313506042e-05, "loss": 2.006, "step": 11258500 }, { "epoch": 55.78, "learning_rate": 2.211950872707996e-05, "loss": 2.0214, "step": 11259000 }, { "epoch": 55.78, "learning_rate": 2.2118270140653876e-05, "loss": 2.0234, "step": 11259500 }, { "epoch": 55.79, "learning_rate": 2.2117036508573496e-05, "loss": 2.0276, "step": 11260000 }, { "epoch": 55.79, "learning_rate": 2.2115797922147413e-05, "loss": 2.0378, "step": 11260500 }, { "epoch": 55.79, "learning_rate": 2.211455933572133e-05, "loss": 2.0466, "step": 11261000 }, { "epoch": 55.79, "learning_rate": 2.2113320749295247e-05, "loss": 2.0429, "step": 11261500 }, { "epoch": 55.8, "learning_rate": 2.2112084640042013e-05, "loss": 2.0328, "step": 11262000 }, { "epoch": 55.8, "learning_rate": 2.211084605361593e-05, "loss": 2.0249, "step": 11262500 }, { "epoch": 55.8, "learning_rate": 2.2109607467189846e-05, "loss": 2.0251, "step": 11263000 }, { "epoch": 55.8, "learning_rate": 2.2108368880763763e-05, "loss": 2.0472, "step": 11263500 }, { "epoch": 55.81, "learning_rate": 2.210713029433768e-05, "loss": 2.0156, "step": 11264000 }, { "epoch": 55.81, "learning_rate": 2.2105891707911597e-05, "loss": 2.0301, "step": 11264500 }, { "epoch": 55.81, "learning_rate": 2.2104655598658363e-05, "loss": 2.0053, "step": 11265000 }, { "epoch": 55.81, "learning_rate": 2.210341701223228e-05, "loss": 2.0524, "step": 11265500 }, { "epoch": 55.82, "learning_rate": 2.2102178425806197e-05, "loss": 2.0375, "step": 11266000 }, { "epoch": 55.82, "learning_rate": 2.2100939839380113e-05, "loss": 2.0311, "step": 11266500 }, { "epoch": 55.82, "learning_rate": 2.209970373012688e-05, "loss": 2.0367, "step": 11267000 }, { "epoch": 55.82, "learning_rate": 2.2098465143700796e-05, "loss": 2.0481, "step": 11267500 }, { "epoch": 55.83, "learning_rate": 2.2097226557274713e-05, "loss": 2.0136, "step": 11268000 }, { "epoch": 55.83, "learning_rate": 2.209598797084863e-05, "loss": 2.0237, "step": 11268500 }, { "epoch": 55.83, "learning_rate": 2.2094749384422547e-05, "loss": 2.0268, "step": 11269000 }, { "epoch": 55.83, "learning_rate": 2.2093510797996464e-05, "loss": 2.0167, "step": 11269500 }, { "epoch": 55.84, "learning_rate": 2.209227221157038e-05, "loss": 2.0558, "step": 11270000 }, { "epoch": 55.84, "learning_rate": 2.2091033625144297e-05, "loss": 2.0283, "step": 11270500 }, { "epoch": 55.84, "learning_rate": 2.2089795038718214e-05, "loss": 2.0549, "step": 11271000 }, { "epoch": 55.84, "learning_rate": 2.2088556452292128e-05, "loss": 2.0085, "step": 11271500 }, { "epoch": 55.85, "learning_rate": 2.2087320343038897e-05, "loss": 2.0148, "step": 11272000 }, { "epoch": 55.85, "learning_rate": 2.2086081756612814e-05, "loss": 2.0307, "step": 11272500 }, { "epoch": 55.85, "learning_rate": 2.208484317018673e-05, "loss": 2.0346, "step": 11273000 }, { "epoch": 55.85, "learning_rate": 2.2083604583760647e-05, "loss": 2.0485, "step": 11273500 }, { "epoch": 55.86, "learning_rate": 2.2082365997334564e-05, "loss": 2.0458, "step": 11274000 }, { "epoch": 55.86, "learning_rate": 2.2081127410908478e-05, "loss": 2.0036, "step": 11274500 }, { "epoch": 55.86, "learning_rate": 2.2079888824482395e-05, "loss": 2.0476, "step": 11275000 }, { "epoch": 55.86, "learning_rate": 2.2078650238056312e-05, "loss": 2.0267, "step": 11275500 }, { "epoch": 55.87, "learning_rate": 2.207741165163023e-05, "loss": 2.0375, "step": 11276000 }, { "epoch": 55.87, "learning_rate": 2.2076175542376998e-05, "loss": 2.0239, "step": 11276500 }, { "epoch": 55.87, "learning_rate": 2.2074936955950914e-05, "loss": 2.0275, "step": 11277000 }, { "epoch": 55.87, "learning_rate": 2.2073698369524828e-05, "loss": 2.0413, "step": 11277500 }, { "epoch": 55.88, "learning_rate": 2.2072459783098745e-05, "loss": 2.0148, "step": 11278000 }, { "epoch": 55.88, "learning_rate": 2.2071223673845514e-05, "loss": 2.0352, "step": 11278500 }, { "epoch": 55.88, "learning_rate": 2.206998508741943e-05, "loss": 2.0475, "step": 11279000 }, { "epoch": 55.88, "learning_rate": 2.2068746500993348e-05, "loss": 2.0531, "step": 11279500 }, { "epoch": 55.89, "learning_rate": 2.2067507914567264e-05, "loss": 2.0432, "step": 11280000 }, { "epoch": 55.89, "learning_rate": 2.206626932814118e-05, "loss": 2.0326, "step": 11280500 }, { "epoch": 55.89, "learning_rate": 2.2065030741715095e-05, "loss": 2.03, "step": 11281000 }, { "epoch": 55.89, "learning_rate": 2.2063792155289012e-05, "loss": 2.0392, "step": 11281500 }, { "epoch": 55.89, "learning_rate": 2.206255356886293e-05, "loss": 2.0378, "step": 11282000 }, { "epoch": 55.9, "learning_rate": 2.2061314982436846e-05, "loss": 2.0412, "step": 11282500 }, { "epoch": 55.9, "learning_rate": 2.2060076396010763e-05, "loss": 2.0286, "step": 11283000 }, { "epoch": 55.9, "learning_rate": 2.205883780958468e-05, "loss": 2.0174, "step": 11283500 }, { "epoch": 55.9, "learning_rate": 2.2057599223158597e-05, "loss": 2.0366, "step": 11284000 }, { "epoch": 55.91, "learning_rate": 2.2056360636732513e-05, "loss": 2.0325, "step": 11284500 }, { "epoch": 55.91, "learning_rate": 2.2055122050306427e-05, "loss": 2.0184, "step": 11285000 }, { "epoch": 55.91, "learning_rate": 2.2053885941053196e-05, "loss": 2.0046, "step": 11285500 }, { "epoch": 55.91, "learning_rate": 2.2052649831799965e-05, "loss": 2.0347, "step": 11286000 }, { "epoch": 55.92, "learning_rate": 2.2051416199719585e-05, "loss": 2.0476, "step": 11286500 }, { "epoch": 55.92, "learning_rate": 2.2050177613293502e-05, "loss": 2.0201, "step": 11287000 }, { "epoch": 55.92, "learning_rate": 2.204893902686742e-05, "loss": 2.0365, "step": 11287500 }, { "epoch": 55.92, "learning_rate": 2.2047700440441336e-05, "loss": 2.0373, "step": 11288000 }, { "epoch": 55.93, "learning_rate": 2.2046461854015253e-05, "loss": 2.0398, "step": 11288500 }, { "epoch": 55.93, "learning_rate": 2.2045223267589167e-05, "loss": 2.0199, "step": 11289000 }, { "epoch": 55.93, "learning_rate": 2.2043984681163083e-05, "loss": 2.0468, "step": 11289500 }, { "epoch": 55.93, "learning_rate": 2.2042746094737e-05, "loss": 2.051, "step": 11290000 }, { "epoch": 55.94, "learning_rate": 2.2041507508310917e-05, "loss": 2.0135, "step": 11290500 }, { "epoch": 55.94, "learning_rate": 2.204026892188483e-05, "loss": 2.0542, "step": 11291000 }, { "epoch": 55.94, "learning_rate": 2.2039030335458748e-05, "loss": 2.0273, "step": 11291500 }, { "epoch": 55.94, "learning_rate": 2.2037791749032665e-05, "loss": 2.0264, "step": 11292000 }, { "epoch": 55.95, "learning_rate": 2.203655316260658e-05, "loss": 2.0287, "step": 11292500 }, { "epoch": 55.95, "learning_rate": 2.20353145761805e-05, "loss": 2.0376, "step": 11293000 }, { "epoch": 55.95, "learning_rate": 2.2034075989754412e-05, "loss": 2.0197, "step": 11293500 }, { "epoch": 55.95, "learning_rate": 2.203283740332833e-05, "loss": 2.0356, "step": 11294000 }, { "epoch": 55.96, "learning_rate": 2.2031598816902246e-05, "loss": 2.0254, "step": 11294500 }, { "epoch": 55.96, "learning_rate": 2.2030360230476163e-05, "loss": 2.0306, "step": 11295000 }, { "epoch": 55.96, "learning_rate": 2.202912164405008e-05, "loss": 2.0305, "step": 11295500 }, { "epoch": 55.96, "learning_rate": 2.202788553479685e-05, "loss": 2.0303, "step": 11296000 }, { "epoch": 55.97, "learning_rate": 2.2026646948370762e-05, "loss": 2.029, "step": 11296500 }, { "epoch": 55.97, "learning_rate": 2.202540836194468e-05, "loss": 2.0158, "step": 11297000 }, { "epoch": 55.97, "learning_rate": 2.2024169775518596e-05, "loss": 1.9991, "step": 11297500 }, { "epoch": 55.97, "learning_rate": 2.2022931189092513e-05, "loss": 2.0273, "step": 11298000 }, { "epoch": 55.98, "learning_rate": 2.202169260266643e-05, "loss": 2.0517, "step": 11298500 }, { "epoch": 55.98, "learning_rate": 2.2020454016240347e-05, "loss": 2.0358, "step": 11299000 }, { "epoch": 55.98, "learning_rate": 2.2019215429814264e-05, "loss": 2.0471, "step": 11299500 }, { "epoch": 55.98, "learning_rate": 2.201797932056103e-05, "loss": 2.0285, "step": 11300000 }, { "epoch": 55.99, "learning_rate": 2.20167432113078e-05, "loss": 2.0406, "step": 11300500 }, { "epoch": 55.99, "learning_rate": 2.2015504624881715e-05, "loss": 2.0359, "step": 11301000 }, { "epoch": 55.99, "learning_rate": 2.2014266038455632e-05, "loss": 2.0392, "step": 11301500 }, { "epoch": 55.99, "learning_rate": 2.201302745202955e-05, "loss": 2.0261, "step": 11302000 }, { "epoch": 56.0, "learning_rate": 2.2011791342776318e-05, "loss": 2.0297, "step": 11302500 }, { "epoch": 56.0, "learning_rate": 2.2010552756350235e-05, "loss": 2.0406, "step": 11303000 }, { "epoch": 56.0, "eval_accuracy": 0.6718417118199477, "eval_accuracy_mlm": 0.6308449842160062, "eval_accuracy_nsp": 0.8651979337854322, "eval_loss": 2.304525852203369, "eval_runtime": 147.1521, "eval_samples_per_second": 1732.623, "eval_steps_per_second": 72.197, "step": 11303208 }, { "epoch": 56.0, "learning_rate": 2.200931416992415e-05, "loss": 2.0117, "step": 11303500 }, { "epoch": 56.0, "learning_rate": 2.200807558349807e-05, "loss": 2.0275, "step": 11304000 }, { "epoch": 56.01, "learning_rate": 2.2006836997071982e-05, "loss": 2.009, "step": 11304500 }, { "epoch": 56.01, "learning_rate": 2.20055984106459e-05, "loss": 1.9943, "step": 11305000 }, { "epoch": 56.01, "learning_rate": 2.2004359824219816e-05, "loss": 1.9898, "step": 11305500 }, { "epoch": 56.01, "learning_rate": 2.200312123779373e-05, "loss": 2.009, "step": 11306000 }, { "epoch": 56.02, "learning_rate": 2.20018851285405e-05, "loss": 1.9995, "step": 11306500 }, { "epoch": 56.02, "learning_rate": 2.200064901928727e-05, "loss": 2.0152, "step": 11307000 }, { "epoch": 56.02, "learning_rate": 2.1999410432861184e-05, "loss": 2.0196, "step": 11307500 }, { "epoch": 56.02, "learning_rate": 2.19981718464351e-05, "loss": 2.0104, "step": 11308000 }, { "epoch": 56.03, "learning_rate": 2.1996933260009018e-05, "loss": 2.0327, "step": 11308500 }, { "epoch": 56.03, "learning_rate": 2.1995694673582935e-05, "loss": 2.0111, "step": 11309000 }, { "epoch": 56.03, "learning_rate": 2.1994458564329703e-05, "loss": 1.9974, "step": 11309500 }, { "epoch": 56.03, "learning_rate": 2.199321997790362e-05, "loss": 2.0107, "step": 11310000 }, { "epoch": 56.04, "learning_rate": 2.1991981391477537e-05, "loss": 1.9992, "step": 11310500 }, { "epoch": 56.04, "learning_rate": 2.199074280505145e-05, "loss": 1.998, "step": 11311000 }, { "epoch": 56.04, "learning_rate": 2.1989504218625368e-05, "loss": 2.0156, "step": 11311500 }, { "epoch": 56.04, "learning_rate": 2.1988268109372137e-05, "loss": 1.9835, "step": 11312000 }, { "epoch": 56.05, "learning_rate": 2.1987032000118905e-05, "loss": 2.0339, "step": 11312500 }, { "epoch": 56.05, "learning_rate": 2.1985793413692822e-05, "loss": 2.003, "step": 11313000 }, { "epoch": 56.05, "learning_rate": 2.1984554827266736e-05, "loss": 2.0123, "step": 11313500 }, { "epoch": 56.05, "learning_rate": 2.1983318718013508e-05, "loss": 1.9949, "step": 11314000 }, { "epoch": 56.06, "learning_rate": 2.1982082608760273e-05, "loss": 2.0031, "step": 11314500 }, { "epoch": 56.06, "learning_rate": 2.198084402233419e-05, "loss": 2.0043, "step": 11315000 }, { "epoch": 56.06, "learning_rate": 2.1979605435908107e-05, "loss": 2.0021, "step": 11315500 }, { "epoch": 56.06, "learning_rate": 2.1978366849482024e-05, "loss": 2.0202, "step": 11316000 }, { "epoch": 56.07, "learning_rate": 2.197712826305594e-05, "loss": 1.9835, "step": 11316500 }, { "epoch": 56.07, "learning_rate": 2.1975889676629858e-05, "loss": 2.0081, "step": 11317000 }, { "epoch": 56.07, "learning_rate": 2.1974651090203775e-05, "loss": 1.9992, "step": 11317500 }, { "epoch": 56.07, "learning_rate": 2.1973412503777692e-05, "loss": 2.0366, "step": 11318000 }, { "epoch": 56.08, "learning_rate": 2.197217391735161e-05, "loss": 1.9848, "step": 11318500 }, { "epoch": 56.08, "learning_rate": 2.1970935330925522e-05, "loss": 2.0147, "step": 11319000 }, { "epoch": 56.08, "learning_rate": 2.196969922167229e-05, "loss": 2.0235, "step": 11319500 }, { "epoch": 56.08, "learning_rate": 2.1968460635246208e-05, "loss": 2.0285, "step": 11320000 }, { "epoch": 56.09, "learning_rate": 2.1967224525992977e-05, "loss": 2.0204, "step": 11320500 }, { "epoch": 56.09, "learning_rate": 2.1965988416739742e-05, "loss": 2.0183, "step": 11321000 }, { "epoch": 56.09, "learning_rate": 2.196474983031366e-05, "loss": 2.041, "step": 11321500 }, { "epoch": 56.09, "learning_rate": 2.1963511243887576e-05, "loss": 2.0093, "step": 11322000 }, { "epoch": 56.1, "learning_rate": 2.1962272657461493e-05, "loss": 2.0215, "step": 11322500 }, { "epoch": 56.1, "learning_rate": 2.196103407103541e-05, "loss": 2.0057, "step": 11323000 }, { "epoch": 56.1, "learning_rate": 2.1959795484609327e-05, "loss": 2.0219, "step": 11323500 }, { "epoch": 56.1, "learning_rate": 2.195855689818324e-05, "loss": 2.0037, "step": 11324000 }, { "epoch": 56.11, "learning_rate": 2.195732078893001e-05, "loss": 2.02, "step": 11324500 }, { "epoch": 56.11, "learning_rate": 2.1956082202503926e-05, "loss": 1.9946, "step": 11325000 }, { "epoch": 56.11, "learning_rate": 2.1954843616077843e-05, "loss": 2.0297, "step": 11325500 }, { "epoch": 56.11, "learning_rate": 2.195360502965176e-05, "loss": 2.0123, "step": 11326000 }, { "epoch": 56.12, "learning_rate": 2.1952366443225677e-05, "loss": 2.025, "step": 11326500 }, { "epoch": 56.12, "learning_rate": 2.195112785679959e-05, "loss": 2.0246, "step": 11327000 }, { "epoch": 56.12, "learning_rate": 2.194989174754636e-05, "loss": 1.9851, "step": 11327500 }, { "epoch": 56.12, "learning_rate": 2.1948653161120276e-05, "loss": 2.0022, "step": 11328000 }, { "epoch": 56.13, "learning_rate": 2.1947414574694193e-05, "loss": 2.0185, "step": 11328500 }, { "epoch": 56.13, "learning_rate": 2.194617598826811e-05, "loss": 2.0247, "step": 11329000 }, { "epoch": 56.13, "learning_rate": 2.1944937401842027e-05, "loss": 2.0122, "step": 11329500 }, { "epoch": 56.13, "learning_rate": 2.1943698815415944e-05, "loss": 2.0075, "step": 11330000 }, { "epoch": 56.14, "learning_rate": 2.1942460228989858e-05, "loss": 1.9829, "step": 11330500 }, { "epoch": 56.14, "learning_rate": 2.1941221642563775e-05, "loss": 2.0129, "step": 11331000 }, { "epoch": 56.14, "learning_rate": 2.193998305613769e-05, "loss": 2.0119, "step": 11331500 }, { "epoch": 56.14, "learning_rate": 2.193874446971161e-05, "loss": 2.0269, "step": 11332000 }, { "epoch": 56.15, "learning_rate": 2.1937505883285525e-05, "loss": 2.0209, "step": 11332500 }, { "epoch": 56.15, "learning_rate": 2.1936269774032294e-05, "loss": 2.0275, "step": 11333000 }, { "epoch": 56.15, "learning_rate": 2.1935031187606208e-05, "loss": 1.981, "step": 11333500 }, { "epoch": 56.15, "learning_rate": 2.1933792601180125e-05, "loss": 2.0139, "step": 11334000 }, { "epoch": 56.16, "learning_rate": 2.193255401475404e-05, "loss": 1.9958, "step": 11334500 }, { "epoch": 56.16, "learning_rate": 2.193131542832796e-05, "loss": 2.0145, "step": 11335000 }, { "epoch": 56.16, "learning_rate": 2.1930076841901875e-05, "loss": 2.0014, "step": 11335500 }, { "epoch": 56.16, "learning_rate": 2.1928838255475792e-05, "loss": 2.0105, "step": 11336000 }, { "epoch": 56.16, "learning_rate": 2.192759966904971e-05, "loss": 2.0304, "step": 11336500 }, { "epoch": 56.17, "learning_rate": 2.1926363559796475e-05, "loss": 2.0117, "step": 11337000 }, { "epoch": 56.17, "learning_rate": 2.1925127450543247e-05, "loss": 2.0316, "step": 11337500 }, { "epoch": 56.17, "learning_rate": 2.192388886411716e-05, "loss": 2.0, "step": 11338000 }, { "epoch": 56.17, "learning_rate": 2.1922650277691077e-05, "loss": 2.0176, "step": 11338500 }, { "epoch": 56.18, "learning_rate": 2.1921411691264994e-05, "loss": 2.0009, "step": 11339000 }, { "epoch": 56.18, "learning_rate": 2.1920173104838908e-05, "loss": 2.0022, "step": 11339500 }, { "epoch": 56.18, "learning_rate": 2.1918934518412825e-05, "loss": 2.016, "step": 11340000 }, { "epoch": 56.18, "learning_rate": 2.191769593198674e-05, "loss": 2.0113, "step": 11340500 }, { "epoch": 56.19, "learning_rate": 2.191645734556066e-05, "loss": 1.9964, "step": 11341000 }, { "epoch": 56.19, "learning_rate": 2.1915218759134576e-05, "loss": 2.0348, "step": 11341500 }, { "epoch": 56.19, "learning_rate": 2.1913980172708492e-05, "loss": 2.0508, "step": 11342000 }, { "epoch": 56.19, "learning_rate": 2.191274158628241e-05, "loss": 2.0075, "step": 11342500 }, { "epoch": 56.2, "learning_rate": 2.1911502999856326e-05, "loss": 2.0129, "step": 11343000 }, { "epoch": 56.2, "learning_rate": 2.1910264413430243e-05, "loss": 2.0195, "step": 11343500 }, { "epoch": 56.2, "learning_rate": 2.1909025827004157e-05, "loss": 2.0312, "step": 11344000 }, { "epoch": 56.2, "learning_rate": 2.1907787240578074e-05, "loss": 2.0145, "step": 11344500 }, { "epoch": 56.21, "learning_rate": 2.190654865415199e-05, "loss": 2.0304, "step": 11345000 }, { "epoch": 56.21, "learning_rate": 2.1905310067725908e-05, "loss": 2.0306, "step": 11345500 }, { "epoch": 56.21, "learning_rate": 2.1904071481299825e-05, "loss": 2.0291, "step": 11346000 }, { "epoch": 56.21, "learning_rate": 2.190283289487374e-05, "loss": 2.0257, "step": 11346500 }, { "epoch": 56.22, "learning_rate": 2.1901596785620507e-05, "loss": 2.0105, "step": 11347000 }, { "epoch": 56.22, "learning_rate": 2.1900358199194424e-05, "loss": 2.0126, "step": 11347500 }, { "epoch": 56.22, "learning_rate": 2.1899122089941193e-05, "loss": 2.0259, "step": 11348000 }, { "epoch": 56.22, "learning_rate": 2.189788350351511e-05, "loss": 2.0251, "step": 11348500 }, { "epoch": 56.23, "learning_rate": 2.1896644917089026e-05, "loss": 2.0279, "step": 11349000 }, { "epoch": 56.23, "learning_rate": 2.1895406330662943e-05, "loss": 2.0128, "step": 11349500 }, { "epoch": 56.23, "learning_rate": 2.189416774423686e-05, "loss": 2.0352, "step": 11350000 }, { "epoch": 56.23, "learning_rate": 2.189293411215648e-05, "loss": 1.9857, "step": 11350500 }, { "epoch": 56.24, "learning_rate": 2.1891695525730398e-05, "loss": 2.0176, "step": 11351000 }, { "epoch": 56.24, "learning_rate": 2.189045693930431e-05, "loss": 2.0495, "step": 11351500 }, { "epoch": 56.24, "learning_rate": 2.188921835287823e-05, "loss": 1.9961, "step": 11352000 }, { "epoch": 56.24, "learning_rate": 2.1887979766452142e-05, "loss": 2.0003, "step": 11352500 }, { "epoch": 56.25, "learning_rate": 2.1886743657198914e-05, "loss": 2.0304, "step": 11353000 }, { "epoch": 56.25, "learning_rate": 2.1885507547945683e-05, "loss": 2.0209, "step": 11353500 }, { "epoch": 56.25, "learning_rate": 2.1884268961519596e-05, "loss": 2.0112, "step": 11354000 }, { "epoch": 56.25, "learning_rate": 2.1883030375093513e-05, "loss": 2.032, "step": 11354500 }, { "epoch": 56.26, "learning_rate": 2.188179178866743e-05, "loss": 2.0171, "step": 11355000 }, { "epoch": 56.26, "learning_rate": 2.1880553202241347e-05, "loss": 1.999, "step": 11355500 }, { "epoch": 56.26, "learning_rate": 2.1879314615815264e-05, "loss": 2.0302, "step": 11356000 }, { "epoch": 56.26, "learning_rate": 2.187807602938918e-05, "loss": 1.9934, "step": 11356500 }, { "epoch": 56.27, "learning_rate": 2.1876839920135947e-05, "loss": 2.0216, "step": 11357000 }, { "epoch": 56.27, "learning_rate": 2.1875601333709863e-05, "loss": 1.997, "step": 11357500 }, { "epoch": 56.27, "learning_rate": 2.187436274728378e-05, "loss": 2.019, "step": 11358000 }, { "epoch": 56.27, "learning_rate": 2.1873124160857697e-05, "loss": 2.01, "step": 11358500 }, { "epoch": 56.28, "learning_rate": 2.1871885574431614e-05, "loss": 2.0078, "step": 11359000 }, { "epoch": 56.28, "learning_rate": 2.187064698800553e-05, "loss": 2.0133, "step": 11359500 }, { "epoch": 56.28, "learning_rate": 2.1869408401579448e-05, "loss": 2.0222, "step": 11360000 }, { "epoch": 56.28, "learning_rate": 2.1868169815153365e-05, "loss": 2.0154, "step": 11360500 }, { "epoch": 56.29, "learning_rate": 2.1866931228727282e-05, "loss": 2.0056, "step": 11361000 }, { "epoch": 56.29, "learning_rate": 2.1865695119474047e-05, "loss": 2.0329, "step": 11361500 }, { "epoch": 56.29, "learning_rate": 2.1864456533047964e-05, "loss": 1.9983, "step": 11362000 }, { "epoch": 56.29, "learning_rate": 2.186321794662188e-05, "loss": 2.0137, "step": 11362500 }, { "epoch": 56.3, "learning_rate": 2.186198183736865e-05, "loss": 2.0273, "step": 11363000 }, { "epoch": 56.3, "learning_rate": 2.1860743250942564e-05, "loss": 2.0237, "step": 11363500 }, { "epoch": 56.3, "learning_rate": 2.185950466451648e-05, "loss": 2.015, "step": 11364000 }, { "epoch": 56.3, "learning_rate": 2.1858266078090397e-05, "loss": 2.0247, "step": 11364500 }, { "epoch": 56.31, "learning_rate": 2.1857027491664314e-05, "loss": 2.0272, "step": 11365000 }, { "epoch": 56.31, "learning_rate": 2.185578890523823e-05, "loss": 2.0027, "step": 11365500 }, { "epoch": 56.31, "learning_rate": 2.1854550318812148e-05, "loss": 2.0135, "step": 11366000 }, { "epoch": 56.31, "learning_rate": 2.1853314209558914e-05, "loss": 2.0223, "step": 11366500 }, { "epoch": 56.32, "learning_rate": 2.185207562313283e-05, "loss": 2.0288, "step": 11367000 }, { "epoch": 56.32, "learning_rate": 2.1850837036706748e-05, "loss": 2.0027, "step": 11367500 }, { "epoch": 56.32, "learning_rate": 2.1849598450280664e-05, "loss": 2.0122, "step": 11368000 }, { "epoch": 56.32, "learning_rate": 2.184835986385458e-05, "loss": 2.0108, "step": 11368500 }, { "epoch": 56.33, "learning_rate": 2.1847121277428498e-05, "loss": 2.0291, "step": 11369000 }, { "epoch": 56.33, "learning_rate": 2.1845882691002415e-05, "loss": 2.024, "step": 11369500 }, { "epoch": 56.33, "learning_rate": 2.1844644104576332e-05, "loss": 2.0149, "step": 11370000 }, { "epoch": 56.33, "learning_rate": 2.1843407995323098e-05, "loss": 2.0049, "step": 11370500 }, { "epoch": 56.34, "learning_rate": 2.1842169408897015e-05, "loss": 2.0166, "step": 11371000 }, { "epoch": 56.34, "learning_rate": 2.184093082247093e-05, "loss": 2.0042, "step": 11371500 }, { "epoch": 56.34, "learning_rate": 2.183969223604485e-05, "loss": 2.0077, "step": 11372000 }, { "epoch": 56.34, "learning_rate": 2.1838453649618765e-05, "loss": 1.9969, "step": 11372500 }, { "epoch": 56.35, "learning_rate": 2.1837215063192682e-05, "loss": 2.0327, "step": 11373000 }, { "epoch": 56.35, "learning_rate": 2.18359764767666e-05, "loss": 2.0187, "step": 11373500 }, { "epoch": 56.35, "learning_rate": 2.1834737890340513e-05, "loss": 2.0142, "step": 11374000 }, { "epoch": 56.35, "learning_rate": 2.183350178108728e-05, "loss": 1.9869, "step": 11374500 }, { "epoch": 56.36, "learning_rate": 2.18322631946612e-05, "loss": 2.0114, "step": 11375000 }, { "epoch": 56.36, "learning_rate": 2.1831024608235115e-05, "loss": 2.0371, "step": 11375500 }, { "epoch": 56.36, "learning_rate": 2.1829786021809032e-05, "loss": 2.0284, "step": 11376000 }, { "epoch": 56.36, "learning_rate": 2.182854743538295e-05, "loss": 2.0339, "step": 11376500 }, { "epoch": 56.37, "learning_rate": 2.1827308848956863e-05, "loss": 2.0265, "step": 11377000 }, { "epoch": 56.37, "learning_rate": 2.182607026253078e-05, "loss": 2.0132, "step": 11377500 }, { "epoch": 56.37, "learning_rate": 2.1824831676104697e-05, "loss": 2.035, "step": 11378000 }, { "epoch": 56.37, "learning_rate": 2.1823595566851465e-05, "loss": 2.0394, "step": 11378500 }, { "epoch": 56.38, "learning_rate": 2.1822356980425382e-05, "loss": 2.0255, "step": 11379000 }, { "epoch": 56.38, "learning_rate": 2.18211183939993e-05, "loss": 2.0337, "step": 11379500 }, { "epoch": 56.38, "learning_rate": 2.1819882284746065e-05, "loss": 2.0245, "step": 11380000 }, { "epoch": 56.38, "learning_rate": 2.181864369831998e-05, "loss": 2.0247, "step": 11380500 }, { "epoch": 56.39, "learning_rate": 2.18174051118939e-05, "loss": 1.995, "step": 11381000 }, { "epoch": 56.39, "learning_rate": 2.1816166525467815e-05, "loss": 2.0188, "step": 11381500 }, { "epoch": 56.39, "learning_rate": 2.1814927939041732e-05, "loss": 1.9943, "step": 11382000 }, { "epoch": 56.39, "learning_rate": 2.181368935261565e-05, "loss": 2.0232, "step": 11382500 }, { "epoch": 56.4, "learning_rate": 2.1812450766189566e-05, "loss": 1.989, "step": 11383000 }, { "epoch": 56.4, "learning_rate": 2.181121217976348e-05, "loss": 2.0215, "step": 11383500 }, { "epoch": 56.4, "learning_rate": 2.1809973593337397e-05, "loss": 2.0106, "step": 11384000 }, { "epoch": 56.4, "learning_rate": 2.1808735006911314e-05, "loss": 2.0355, "step": 11384500 }, { "epoch": 56.41, "learning_rate": 2.180749642048523e-05, "loss": 2.0101, "step": 11385000 }, { "epoch": 56.41, "learning_rate": 2.1806257834059148e-05, "loss": 2.0126, "step": 11385500 }, { "epoch": 56.41, "learning_rate": 2.1805019247633064e-05, "loss": 2.0186, "step": 11386000 }, { "epoch": 56.41, "learning_rate": 2.1803780661206978e-05, "loss": 2.0199, "step": 11386500 }, { "epoch": 56.42, "learning_rate": 2.18025470291266e-05, "loss": 1.9751, "step": 11387000 }, { "epoch": 56.42, "learning_rate": 2.1801308442700516e-05, "loss": 2.0216, "step": 11387500 }, { "epoch": 56.42, "learning_rate": 2.1800069856274433e-05, "loss": 2.0075, "step": 11388000 }, { "epoch": 56.42, "learning_rate": 2.179883126984835e-05, "loss": 2.0234, "step": 11388500 }, { "epoch": 56.43, "learning_rate": 2.1797592683422266e-05, "loss": 1.9945, "step": 11389000 }, { "epoch": 56.43, "learning_rate": 2.179635409699618e-05, "loss": 1.9917, "step": 11389500 }, { "epoch": 56.43, "learning_rate": 2.179511798774295e-05, "loss": 2.042, "step": 11390000 }, { "epoch": 56.43, "learning_rate": 2.1793879401316866e-05, "loss": 2.0344, "step": 11390500 }, { "epoch": 56.43, "learning_rate": 2.1792640814890783e-05, "loss": 2.0138, "step": 11391000 }, { "epoch": 56.44, "learning_rate": 2.17914022284647e-05, "loss": 2.019, "step": 11391500 }, { "epoch": 56.44, "learning_rate": 2.1790163642038616e-05, "loss": 2.0228, "step": 11392000 }, { "epoch": 56.44, "learning_rate": 2.1788925055612533e-05, "loss": 1.9926, "step": 11392500 }, { "epoch": 56.44, "learning_rate": 2.1787686469186447e-05, "loss": 2.0324, "step": 11393000 }, { "epoch": 56.45, "learning_rate": 2.1786447882760364e-05, "loss": 2.0158, "step": 11393500 }, { "epoch": 56.45, "learning_rate": 2.1785211773507133e-05, "loss": 2.0411, "step": 11394000 }, { "epoch": 56.45, "learning_rate": 2.17839756642539e-05, "loss": 2.0153, "step": 11394500 }, { "epoch": 56.45, "learning_rate": 2.1782737077827815e-05, "loss": 2.024, "step": 11395000 }, { "epoch": 56.46, "learning_rate": 2.1781498491401732e-05, "loss": 2.0017, "step": 11395500 }, { "epoch": 56.46, "learning_rate": 2.178025990497565e-05, "loss": 2.013, "step": 11396000 }, { "epoch": 56.46, "learning_rate": 2.1779021318549566e-05, "loss": 2.0266, "step": 11396500 }, { "epoch": 56.46, "learning_rate": 2.1777785209296338e-05, "loss": 2.0388, "step": 11397000 }, { "epoch": 56.47, "learning_rate": 2.177654662287025e-05, "loss": 2.0396, "step": 11397500 }, { "epoch": 56.47, "learning_rate": 2.177530803644417e-05, "loss": 2.0269, "step": 11398000 }, { "epoch": 56.47, "learning_rate": 2.1774069450018082e-05, "loss": 2.0212, "step": 11398500 }, { "epoch": 56.47, "learning_rate": 2.1772830863592e-05, "loss": 2.0163, "step": 11399000 }, { "epoch": 56.48, "learning_rate": 2.1771592277165916e-05, "loss": 2.0306, "step": 11399500 }, { "epoch": 56.48, "learning_rate": 2.1770353690739833e-05, "loss": 2.0353, "step": 11400000 }, { "epoch": 56.48, "learning_rate": 2.176911510431375e-05, "loss": 2.0111, "step": 11400500 }, { "epoch": 56.48, "learning_rate": 2.176787899506052e-05, "loss": 2.0145, "step": 11401000 }, { "epoch": 56.49, "learning_rate": 2.1766640408634435e-05, "loss": 2.0057, "step": 11401500 }, { "epoch": 56.49, "learning_rate": 2.1765404299381204e-05, "loss": 2.0165, "step": 11402000 }, { "epoch": 56.49, "learning_rate": 2.176416571295512e-05, "loss": 2.0053, "step": 11402500 }, { "epoch": 56.49, "learning_rate": 2.1762927126529038e-05, "loss": 2.0486, "step": 11403000 }, { "epoch": 56.5, "learning_rate": 2.1761688540102955e-05, "loss": 2.0243, "step": 11403500 }, { "epoch": 56.5, "learning_rate": 2.176044995367687e-05, "loss": 2.0246, "step": 11404000 }, { "epoch": 56.5, "learning_rate": 2.1759211367250786e-05, "loss": 2.0584, "step": 11404500 }, { "epoch": 56.5, "learning_rate": 2.1757975257997554e-05, "loss": 2.0356, "step": 11405000 }, { "epoch": 56.51, "learning_rate": 2.1756739148744323e-05, "loss": 2.0262, "step": 11405500 }, { "epoch": 56.51, "learning_rate": 2.1755500562318237e-05, "loss": 2.0101, "step": 11406000 }, { "epoch": 56.51, "learning_rate": 2.1754261975892154e-05, "loss": 2.0165, "step": 11406500 }, { "epoch": 56.51, "learning_rate": 2.175302338946607e-05, "loss": 2.0073, "step": 11407000 }, { "epoch": 56.52, "learning_rate": 2.1751784803039987e-05, "loss": 2.0604, "step": 11407500 }, { "epoch": 56.52, "learning_rate": 2.1750546216613904e-05, "loss": 2.0193, "step": 11408000 }, { "epoch": 56.52, "learning_rate": 2.174930763018782e-05, "loss": 1.9917, "step": 11408500 }, { "epoch": 56.52, "learning_rate": 2.1748069043761738e-05, "loss": 2.0159, "step": 11409000 }, { "epoch": 56.53, "learning_rate": 2.1746832934508504e-05, "loss": 2.0187, "step": 11409500 }, { "epoch": 56.53, "learning_rate": 2.174559434808242e-05, "loss": 2.02, "step": 11410000 }, { "epoch": 56.53, "learning_rate": 2.1744355761656338e-05, "loss": 2.035, "step": 11410500 }, { "epoch": 56.53, "learning_rate": 2.1743117175230254e-05, "loss": 2.0203, "step": 11411000 }, { "epoch": 56.54, "learning_rate": 2.174187858880417e-05, "loss": 2.0259, "step": 11411500 }, { "epoch": 56.54, "learning_rate": 2.1740640002378088e-05, "loss": 2.0412, "step": 11412000 }, { "epoch": 56.54, "learning_rate": 2.1739401415952005e-05, "loss": 1.996, "step": 11412500 }, { "epoch": 56.54, "learning_rate": 2.173816530669877e-05, "loss": 2.0278, "step": 11413000 }, { "epoch": 56.55, "learning_rate": 2.1736926720272688e-05, "loss": 2.0278, "step": 11413500 }, { "epoch": 56.55, "learning_rate": 2.1735690611019456e-05, "loss": 2.0288, "step": 11414000 }, { "epoch": 56.55, "learning_rate": 2.1734454501766225e-05, "loss": 2.0598, "step": 11414500 }, { "epoch": 56.55, "learning_rate": 2.1733215915340142e-05, "loss": 2.021, "step": 11415000 }, { "epoch": 56.56, "learning_rate": 2.173197732891406e-05, "loss": 2.0227, "step": 11415500 }, { "epoch": 56.56, "learning_rate": 2.1730738742487976e-05, "loss": 2.0236, "step": 11416000 }, { "epoch": 56.56, "learning_rate": 2.172950015606189e-05, "loss": 1.9993, "step": 11416500 }, { "epoch": 56.56, "learning_rate": 2.1728261569635806e-05, "loss": 2.0031, "step": 11417000 }, { "epoch": 56.57, "learning_rate": 2.1727022983209723e-05, "loss": 2.0179, "step": 11417500 }, { "epoch": 56.57, "learning_rate": 2.172578439678364e-05, "loss": 2.0315, "step": 11418000 }, { "epoch": 56.57, "learning_rate": 2.1724545810357554e-05, "loss": 2.0222, "step": 11418500 }, { "epoch": 56.57, "learning_rate": 2.172330722393147e-05, "loss": 2.0156, "step": 11419000 }, { "epoch": 56.58, "learning_rate": 2.1722068637505388e-05, "loss": 2.0348, "step": 11419500 }, { "epoch": 56.58, "learning_rate": 2.1720830051079305e-05, "loss": 2.0279, "step": 11420000 }, { "epoch": 56.58, "learning_rate": 2.171959146465322e-05, "loss": 2.0072, "step": 11420500 }, { "epoch": 56.58, "learning_rate": 2.171835287822714e-05, "loss": 2.0219, "step": 11421000 }, { "epoch": 56.59, "learning_rate": 2.1717116768973904e-05, "loss": 2.0376, "step": 11421500 }, { "epoch": 56.59, "learning_rate": 2.171587818254782e-05, "loss": 2.0403, "step": 11422000 }, { "epoch": 56.59, "learning_rate": 2.1714642073294593e-05, "loss": 2.0588, "step": 11422500 }, { "epoch": 56.59, "learning_rate": 2.1713403486868507e-05, "loss": 2.0254, "step": 11423000 }, { "epoch": 56.6, "learning_rate": 2.1712164900442424e-05, "loss": 2.0163, "step": 11423500 }, { "epoch": 56.6, "learning_rate": 2.171092631401634e-05, "loss": 1.9886, "step": 11424000 }, { "epoch": 56.6, "learning_rate": 2.1709687727590254e-05, "loss": 2.0102, "step": 11424500 }, { "epoch": 56.6, "learning_rate": 2.170844914116417e-05, "loss": 2.0184, "step": 11425000 }, { "epoch": 56.61, "learning_rate": 2.1707210554738088e-05, "loss": 1.9956, "step": 11425500 }, { "epoch": 56.61, "learning_rate": 2.1705971968312005e-05, "loss": 2.0282, "step": 11426000 }, { "epoch": 56.61, "learning_rate": 2.170473338188592e-05, "loss": 2.0287, "step": 11426500 }, { "epoch": 56.61, "learning_rate": 2.170349479545984e-05, "loss": 2.0188, "step": 11427000 }, { "epoch": 56.62, "learning_rate": 2.1702256209033756e-05, "loss": 2.0018, "step": 11427500 }, { "epoch": 56.62, "learning_rate": 2.170102009978052e-05, "loss": 2.0348, "step": 11428000 }, { "epoch": 56.62, "learning_rate": 2.1699781513354438e-05, "loss": 2.0293, "step": 11428500 }, { "epoch": 56.62, "learning_rate": 2.1698542926928355e-05, "loss": 2.0383, "step": 11429000 }, { "epoch": 56.63, "learning_rate": 2.1697304340502272e-05, "loss": 1.9939, "step": 11429500 }, { "epoch": 56.63, "learning_rate": 2.169606575407619e-05, "loss": 2.023, "step": 11430000 }, { "epoch": 56.63, "learning_rate": 2.1694827167650106e-05, "loss": 2.032, "step": 11430500 }, { "epoch": 56.63, "learning_rate": 2.1693588581224023e-05, "loss": 2.0223, "step": 11431000 }, { "epoch": 56.64, "learning_rate": 2.169234999479794e-05, "loss": 2.0575, "step": 11431500 }, { "epoch": 56.64, "learning_rate": 2.1691111408371853e-05, "loss": 2.0251, "step": 11432000 }, { "epoch": 56.64, "learning_rate": 2.1689875299118622e-05, "loss": 2.0542, "step": 11432500 }, { "epoch": 56.64, "learning_rate": 2.168863671269254e-05, "loss": 2.0286, "step": 11433000 }, { "epoch": 56.65, "learning_rate": 2.1687398126266456e-05, "loss": 2.0228, "step": 11433500 }, { "epoch": 56.65, "learning_rate": 2.1686159539840373e-05, "loss": 2.0327, "step": 11434000 }, { "epoch": 56.65, "learning_rate": 2.168492095341429e-05, "loss": 2.0036, "step": 11434500 }, { "epoch": 56.65, "learning_rate": 2.1683684844161055e-05, "loss": 2.0203, "step": 11435000 }, { "epoch": 56.66, "learning_rate": 2.1682446257734972e-05, "loss": 2.0138, "step": 11435500 }, { "epoch": 56.66, "learning_rate": 2.168120767130889e-05, "loss": 2.0041, "step": 11436000 }, { "epoch": 56.66, "learning_rate": 2.1679969084882806e-05, "loss": 2.0333, "step": 11436500 }, { "epoch": 56.66, "learning_rate": 2.1678730498456723e-05, "loss": 2.0014, "step": 11437000 }, { "epoch": 56.67, "learning_rate": 2.167749191203064e-05, "loss": 2.023, "step": 11437500 }, { "epoch": 56.67, "learning_rate": 2.1676253325604557e-05, "loss": 2.0459, "step": 11438000 }, { "epoch": 56.67, "learning_rate": 2.167501473917847e-05, "loss": 2.0302, "step": 11438500 }, { "epoch": 56.67, "learning_rate": 2.167377862992524e-05, "loss": 2.0505, "step": 11439000 }, { "epoch": 56.68, "learning_rate": 2.167254252067201e-05, "loss": 2.0078, "step": 11439500 }, { "epoch": 56.68, "learning_rate": 2.1671303934245925e-05, "loss": 2.0291, "step": 11440000 }, { "epoch": 56.68, "learning_rate": 2.167006534781984e-05, "loss": 2.0068, "step": 11440500 }, { "epoch": 56.68, "learning_rate": 2.166882923856661e-05, "loss": 2.0212, "step": 11441000 }, { "epoch": 56.69, "learning_rate": 2.1667590652140527e-05, "loss": 2.031, "step": 11441500 }, { "epoch": 56.69, "learning_rate": 2.1666354542887293e-05, "loss": 2.0532, "step": 11442000 }, { "epoch": 56.69, "learning_rate": 2.166511595646121e-05, "loss": 2.0206, "step": 11442500 }, { "epoch": 56.69, "learning_rate": 2.166387984720798e-05, "loss": 2.0338, "step": 11443000 }, { "epoch": 56.7, "learning_rate": 2.1662641260781895e-05, "loss": 1.9962, "step": 11443500 }, { "epoch": 56.7, "learning_rate": 2.1661402674355812e-05, "loss": 2.0243, "step": 11444000 }, { "epoch": 56.7, "learning_rate": 2.166016408792973e-05, "loss": 2.0132, "step": 11444500 }, { "epoch": 56.7, "learning_rate": 2.1658925501503646e-05, "loss": 2.0141, "step": 11445000 }, { "epoch": 56.7, "learning_rate": 2.165768691507756e-05, "loss": 2.0332, "step": 11445500 }, { "epoch": 56.71, "learning_rate": 2.1656448328651477e-05, "loss": 2.0174, "step": 11446000 }, { "epoch": 56.71, "learning_rate": 2.1655209742225394e-05, "loss": 2.0454, "step": 11446500 }, { "epoch": 56.71, "learning_rate": 2.165397115579931e-05, "loss": 2.0252, "step": 11447000 }, { "epoch": 56.71, "learning_rate": 2.1652732569373227e-05, "loss": 2.0255, "step": 11447500 }, { "epoch": 56.72, "learning_rate": 2.1651493982947144e-05, "loss": 2.0294, "step": 11448000 }, { "epoch": 56.72, "learning_rate": 2.165025539652106e-05, "loss": 2.0171, "step": 11448500 }, { "epoch": 56.72, "learning_rate": 2.1649016810094978e-05, "loss": 2.0086, "step": 11449000 }, { "epoch": 56.72, "learning_rate": 2.1647778223668892e-05, "loss": 2.0223, "step": 11449500 }, { "epoch": 56.73, "learning_rate": 2.164653963724281e-05, "loss": 2.0197, "step": 11450000 }, { "epoch": 56.73, "learning_rate": 2.1645301050816726e-05, "loss": 2.02, "step": 11450500 }, { "epoch": 56.73, "learning_rate": 2.1644062464390642e-05, "loss": 2.0015, "step": 11451000 }, { "epoch": 56.73, "learning_rate": 2.1642823877964556e-05, "loss": 2.0337, "step": 11451500 }, { "epoch": 56.74, "learning_rate": 2.1641585291538473e-05, "loss": 1.982, "step": 11452000 }, { "epoch": 56.74, "learning_rate": 2.164034670511239e-05, "loss": 2.0088, "step": 11452500 }, { "epoch": 56.74, "learning_rate": 2.1639108118686307e-05, "loss": 2.0394, "step": 11453000 }, { "epoch": 56.74, "learning_rate": 2.1637869532260224e-05, "loss": 2.0039, "step": 11453500 }, { "epoch": 56.75, "learning_rate": 2.1636630945834137e-05, "loss": 2.0169, "step": 11454000 }, { "epoch": 56.75, "learning_rate": 2.1635392359408054e-05, "loss": 2.0172, "step": 11454500 }, { "epoch": 56.75, "learning_rate": 2.163415377298197e-05, "loss": 2.0455, "step": 11455000 }, { "epoch": 56.75, "learning_rate": 2.1632920140901595e-05, "loss": 2.0376, "step": 11455500 }, { "epoch": 56.76, "learning_rate": 2.163168155447551e-05, "loss": 2.0225, "step": 11456000 }, { "epoch": 56.76, "learning_rate": 2.1630445445222278e-05, "loss": 2.0128, "step": 11456500 }, { "epoch": 56.76, "learning_rate": 2.1629206858796195e-05, "loss": 2.0269, "step": 11457000 }, { "epoch": 56.76, "learning_rate": 2.162796827237011e-05, "loss": 2.0321, "step": 11457500 }, { "epoch": 56.77, "learning_rate": 2.162672968594403e-05, "loss": 2.0315, "step": 11458000 }, { "epoch": 56.77, "learning_rate": 2.1625491099517945e-05, "loss": 2.0182, "step": 11458500 }, { "epoch": 56.77, "learning_rate": 2.162425251309186e-05, "loss": 2.032, "step": 11459000 }, { "epoch": 56.77, "learning_rate": 2.1623016403838628e-05, "loss": 2.0423, "step": 11459500 }, { "epoch": 56.78, "learning_rate": 2.1621777817412545e-05, "loss": 2.0233, "step": 11460000 }, { "epoch": 56.78, "learning_rate": 2.162053923098646e-05, "loss": 2.0257, "step": 11460500 }, { "epoch": 56.78, "learning_rate": 2.161930064456038e-05, "loss": 2.0274, "step": 11461000 }, { "epoch": 56.78, "learning_rate": 2.1618064535307144e-05, "loss": 2.0494, "step": 11461500 }, { "epoch": 56.79, "learning_rate": 2.161682594888106e-05, "loss": 2.0184, "step": 11462000 }, { "epoch": 56.79, "learning_rate": 2.1615587362454978e-05, "loss": 2.0305, "step": 11462500 }, { "epoch": 56.79, "learning_rate": 2.1614348776028895e-05, "loss": 2.0214, "step": 11463000 }, { "epoch": 56.79, "learning_rate": 2.161311018960281e-05, "loss": 1.9829, "step": 11463500 }, { "epoch": 56.8, "learning_rate": 2.161187160317673e-05, "loss": 2.0246, "step": 11464000 }, { "epoch": 56.8, "learning_rate": 2.1610635493923494e-05, "loss": 2.0138, "step": 11464500 }, { "epoch": 56.8, "learning_rate": 2.160939690749741e-05, "loss": 2.0297, "step": 11465000 }, { "epoch": 56.8, "learning_rate": 2.1608160798244183e-05, "loss": 2.0308, "step": 11465500 }, { "epoch": 56.81, "learning_rate": 2.160692468899095e-05, "loss": 2.017, "step": 11466000 }, { "epoch": 56.81, "learning_rate": 2.1605686102564865e-05, "loss": 2.0211, "step": 11466500 }, { "epoch": 56.81, "learning_rate": 2.1604447516138782e-05, "loss": 2.0189, "step": 11467000 }, { "epoch": 56.81, "learning_rate": 2.16032089297127e-05, "loss": 2.0168, "step": 11467500 }, { "epoch": 56.82, "learning_rate": 2.1601972820459468e-05, "loss": 2.042, "step": 11468000 }, { "epoch": 56.82, "learning_rate": 2.1600734234033385e-05, "loss": 2.0307, "step": 11468500 }, { "epoch": 56.82, "learning_rate": 2.15994956476073e-05, "loss": 2.0401, "step": 11469000 }, { "epoch": 56.82, "learning_rate": 2.1598257061181215e-05, "loss": 2.0112, "step": 11469500 }, { "epoch": 56.83, "learning_rate": 2.1597018474755132e-05, "loss": 2.0001, "step": 11470000 }, { "epoch": 56.83, "learning_rate": 2.159577988832905e-05, "loss": 2.0408, "step": 11470500 }, { "epoch": 56.83, "learning_rate": 2.1594541301902966e-05, "loss": 2.0113, "step": 11471000 }, { "epoch": 56.83, "learning_rate": 2.1593302715476883e-05, "loss": 2.0397, "step": 11471500 }, { "epoch": 56.84, "learning_rate": 2.15920641290508e-05, "loss": 2.0341, "step": 11472000 }, { "epoch": 56.84, "learning_rate": 2.1590830496970417e-05, "loss": 2.0024, "step": 11472500 }, { "epoch": 56.84, "learning_rate": 2.1589591910544334e-05, "loss": 2.0418, "step": 11473000 }, { "epoch": 56.84, "learning_rate": 2.158835332411825e-05, "loss": 2.0127, "step": 11473500 }, { "epoch": 56.85, "learning_rate": 2.1587114737692168e-05, "loss": 2.0258, "step": 11474000 }, { "epoch": 56.85, "learning_rate": 2.1585876151266085e-05, "loss": 2.02, "step": 11474500 }, { "epoch": 56.85, "learning_rate": 2.1584637564840002e-05, "loss": 2.0191, "step": 11475000 }, { "epoch": 56.85, "learning_rate": 2.1583398978413916e-05, "loss": 2.0505, "step": 11475500 }, { "epoch": 56.86, "learning_rate": 2.1582160391987832e-05, "loss": 2.0285, "step": 11476000 }, { "epoch": 56.86, "learning_rate": 2.158092180556175e-05, "loss": 1.9944, "step": 11476500 }, { "epoch": 56.86, "learning_rate": 2.1579683219135666e-05, "loss": 2.033, "step": 11477000 }, { "epoch": 56.86, "learning_rate": 2.1578444632709583e-05, "loss": 2.0413, "step": 11477500 }, { "epoch": 56.87, "learning_rate": 2.15772060462835e-05, "loss": 2.0395, "step": 11478000 }, { "epoch": 56.87, "learning_rate": 2.1575967459857417e-05, "loss": 2.0509, "step": 11478500 }, { "epoch": 56.87, "learning_rate": 2.1574728873431334e-05, "loss": 2.0287, "step": 11479000 }, { "epoch": 56.87, "learning_rate": 2.1573490287005248e-05, "loss": 2.0258, "step": 11479500 }, { "epoch": 56.88, "learning_rate": 2.1572251700579165e-05, "loss": 2.037, "step": 11480000 }, { "epoch": 56.88, "learning_rate": 2.1571015591325933e-05, "loss": 2.0166, "step": 11480500 }, { "epoch": 56.88, "learning_rate": 2.1569781959245554e-05, "loss": 2.0099, "step": 11481000 }, { "epoch": 56.88, "learning_rate": 2.1568543372819468e-05, "loss": 2.0123, "step": 11481500 }, { "epoch": 56.89, "learning_rate": 2.1567304786393384e-05, "loss": 2.0085, "step": 11482000 }, { "epoch": 56.89, "learning_rate": 2.15660661999673e-05, "loss": 2.0049, "step": 11482500 }, { "epoch": 56.89, "learning_rate": 2.156482761354122e-05, "loss": 2.0107, "step": 11483000 }, { "epoch": 56.89, "learning_rate": 2.1563589027115135e-05, "loss": 2.0198, "step": 11483500 }, { "epoch": 56.9, "learning_rate": 2.1562350440689052e-05, "loss": 2.0203, "step": 11484000 }, { "epoch": 56.9, "learning_rate": 2.1561111854262966e-05, "loss": 2.0477, "step": 11484500 }, { "epoch": 56.9, "learning_rate": 2.1559873267836883e-05, "loss": 2.028, "step": 11485000 }, { "epoch": 56.9, "learning_rate": 2.155863715858365e-05, "loss": 2.0038, "step": 11485500 }, { "epoch": 56.91, "learning_rate": 2.155739857215757e-05, "loss": 2.0398, "step": 11486000 }, { "epoch": 56.91, "learning_rate": 2.1556162462904337e-05, "loss": 2.0144, "step": 11486500 }, { "epoch": 56.91, "learning_rate": 2.1554923876478254e-05, "loss": 2.0149, "step": 11487000 }, { "epoch": 56.91, "learning_rate": 2.155368529005217e-05, "loss": 2.0056, "step": 11487500 }, { "epoch": 56.92, "learning_rate": 2.1552446703626085e-05, "loss": 2.0124, "step": 11488000 }, { "epoch": 56.92, "learning_rate": 2.1551210594372857e-05, "loss": 2.01, "step": 11488500 }, { "epoch": 56.92, "learning_rate": 2.1549972007946774e-05, "loss": 2.0346, "step": 11489000 }, { "epoch": 56.92, "learning_rate": 2.1548733421520687e-05, "loss": 2.0418, "step": 11489500 }, { "epoch": 56.93, "learning_rate": 2.1547494835094604e-05, "loss": 2.0241, "step": 11490000 }, { "epoch": 56.93, "learning_rate": 2.1546261203014225e-05, "loss": 2.0505, "step": 11490500 }, { "epoch": 56.93, "learning_rate": 2.1545022616588142e-05, "loss": 2.0497, "step": 11491000 }, { "epoch": 56.93, "learning_rate": 2.1543784030162055e-05, "loss": 2.0244, "step": 11491500 }, { "epoch": 56.94, "learning_rate": 2.1542545443735972e-05, "loss": 2.0083, "step": 11492000 }, { "epoch": 56.94, "learning_rate": 2.154130685730989e-05, "loss": 2.0219, "step": 11492500 }, { "epoch": 56.94, "learning_rate": 2.1540068270883806e-05, "loss": 2.0122, "step": 11493000 }, { "epoch": 56.94, "learning_rate": 2.1538832161630575e-05, "loss": 2.0139, "step": 11493500 }, { "epoch": 56.95, "learning_rate": 2.1537593575204492e-05, "loss": 2.0279, "step": 11494000 }, { "epoch": 56.95, "learning_rate": 2.153635746595126e-05, "loss": 2.0334, "step": 11494500 }, { "epoch": 56.95, "learning_rate": 2.1535118879525178e-05, "loss": 2.0393, "step": 11495000 }, { "epoch": 56.95, "learning_rate": 2.1533882770271946e-05, "loss": 2.0187, "step": 11495500 }, { "epoch": 56.96, "learning_rate": 2.1532644183845863e-05, "loss": 2.0021, "step": 11496000 }, { "epoch": 56.96, "learning_rate": 2.1531405597419777e-05, "loss": 2.0207, "step": 11496500 }, { "epoch": 56.96, "learning_rate": 2.1530167010993694e-05, "loss": 2.039, "step": 11497000 }, { "epoch": 56.96, "learning_rate": 2.152892842456761e-05, "loss": 2.023, "step": 11497500 }, { "epoch": 56.97, "learning_rate": 2.1527689838141528e-05, "loss": 2.0296, "step": 11498000 }, { "epoch": 56.97, "learning_rate": 2.1526451251715445e-05, "loss": 2.0466, "step": 11498500 }, { "epoch": 56.97, "learning_rate": 2.152521266528936e-05, "loss": 2.0161, "step": 11499000 }, { "epoch": 56.97, "learning_rate": 2.1523974078863275e-05, "loss": 2.0488, "step": 11499500 }, { "epoch": 56.97, "learning_rate": 2.1522735492437192e-05, "loss": 2.0332, "step": 11500000 }, { "epoch": 56.98, "learning_rate": 2.152149690601111e-05, "loss": 2.025, "step": 11500500 }, { "epoch": 56.98, "learning_rate": 2.1520258319585022e-05, "loss": 2.0263, "step": 11501000 }, { "epoch": 56.98, "learning_rate": 2.151901973315894e-05, "loss": 2.0122, "step": 11501500 }, { "epoch": 56.98, "learning_rate": 2.1517781146732856e-05, "loss": 2.018, "step": 11502000 }, { "epoch": 56.99, "learning_rate": 2.151654751465248e-05, "loss": 2.0316, "step": 11502500 }, { "epoch": 56.99, "learning_rate": 2.1515308928226394e-05, "loss": 2.012, "step": 11503000 }, { "epoch": 56.99, "learning_rate": 2.151407034180031e-05, "loss": 2.015, "step": 11503500 }, { "epoch": 56.99, "learning_rate": 2.1512831755374228e-05, "loss": 2.0255, "step": 11504000 }, { "epoch": 57.0, "learning_rate": 2.1511593168948145e-05, "loss": 2.0303, "step": 11504500 }, { "epoch": 57.0, "learning_rate": 2.1510357059694914e-05, "loss": 2.0118, "step": 11505000 }, { "epoch": 57.0, "eval_accuracy": 0.6725793653787202, "eval_accuracy_mlm": 0.6316911042368613, "eval_accuracy_nsp": 0.8655391651206664, "eval_loss": 2.284510612487793, "eval_runtime": 147.0273, "eval_samples_per_second": 1734.093, "eval_steps_per_second": 72.259, "step": 11505051 }, { "epoch": 57.0, "learning_rate": 2.150911847326883e-05, "loss": 1.9894, "step": 11505500 }, { "epoch": 57.0, "learning_rate": 2.1507879886842744e-05, "loss": 2.0175, "step": 11506000 }, { "epoch": 57.01, "learning_rate": 2.150664130041666e-05, "loss": 2.0049, "step": 11506500 }, { "epoch": 57.01, "learning_rate": 2.1505402713990578e-05, "loss": 2.0013, "step": 11507000 }, { "epoch": 57.01, "learning_rate": 2.1504164127564495e-05, "loss": 2.0004, "step": 11507500 }, { "epoch": 57.01, "learning_rate": 2.1502925541138412e-05, "loss": 1.9967, "step": 11508000 }, { "epoch": 57.02, "learning_rate": 2.150168695471233e-05, "loss": 2.003, "step": 11508500 }, { "epoch": 57.02, "learning_rate": 2.1500448368286246e-05, "loss": 1.9837, "step": 11509000 }, { "epoch": 57.02, "learning_rate": 2.149920978186016e-05, "loss": 1.958, "step": 11509500 }, { "epoch": 57.02, "learning_rate": 2.1497971195434076e-05, "loss": 1.9886, "step": 11510000 }, { "epoch": 57.03, "learning_rate": 2.149673260900799e-05, "loss": 1.9897, "step": 11510500 }, { "epoch": 57.03, "learning_rate": 2.1495494022581907e-05, "loss": 2.0097, "step": 11511000 }, { "epoch": 57.03, "learning_rate": 2.1494255436155823e-05, "loss": 1.9915, "step": 11511500 }, { "epoch": 57.03, "learning_rate": 2.149301684972974e-05, "loss": 2.0221, "step": 11512000 }, { "epoch": 57.04, "learning_rate": 2.1491778263303657e-05, "loss": 2.0129, "step": 11512500 }, { "epoch": 57.04, "learning_rate": 2.1490539676877574e-05, "loss": 2.046, "step": 11513000 }, { "epoch": 57.04, "learning_rate": 2.148930109045149e-05, "loss": 2.0051, "step": 11513500 }, { "epoch": 57.04, "learning_rate": 2.1488062504025408e-05, "loss": 2.0124, "step": 11514000 }, { "epoch": 57.05, "learning_rate": 2.1486826394772174e-05, "loss": 1.9994, "step": 11514500 }, { "epoch": 57.05, "learning_rate": 2.148558780834609e-05, "loss": 2.0237, "step": 11515000 }, { "epoch": 57.05, "learning_rate": 2.1484349221920007e-05, "loss": 1.9734, "step": 11515500 }, { "epoch": 57.05, "learning_rate": 2.1483110635493924e-05, "loss": 2.0002, "step": 11516000 }, { "epoch": 57.06, "learning_rate": 2.1481874526240693e-05, "loss": 2.0246, "step": 11516500 }, { "epoch": 57.06, "learning_rate": 2.148063593981461e-05, "loss": 2.0094, "step": 11517000 }, { "epoch": 57.06, "learning_rate": 2.1479397353388524e-05, "loss": 1.9894, "step": 11517500 }, { "epoch": 57.06, "learning_rate": 2.147815876696244e-05, "loss": 2.012, "step": 11518000 }, { "epoch": 57.07, "learning_rate": 2.1476922657709213e-05, "loss": 2.0272, "step": 11518500 }, { "epoch": 57.07, "learning_rate": 2.147568407128313e-05, "loss": 1.9839, "step": 11519000 }, { "epoch": 57.07, "learning_rate": 2.1474445484857043e-05, "loss": 2.0155, "step": 11519500 }, { "epoch": 57.07, "learning_rate": 2.147320689843096e-05, "loss": 2.0132, "step": 11520000 }, { "epoch": 57.08, "learning_rate": 2.1471968312004877e-05, "loss": 1.9739, "step": 11520500 }, { "epoch": 57.08, "learning_rate": 2.147072972557879e-05, "loss": 1.9999, "step": 11521000 }, { "epoch": 57.08, "learning_rate": 2.1469491139152708e-05, "loss": 1.9939, "step": 11521500 }, { "epoch": 57.08, "learning_rate": 2.1468252552726624e-05, "loss": 1.9989, "step": 11522000 }, { "epoch": 57.09, "learning_rate": 2.1467016443473397e-05, "loss": 2.0004, "step": 11522500 }, { "epoch": 57.09, "learning_rate": 2.146577785704731e-05, "loss": 2.0302, "step": 11523000 }, { "epoch": 57.09, "learning_rate": 2.1464539270621227e-05, "loss": 2.0077, "step": 11523500 }, { "epoch": 57.09, "learning_rate": 2.1463303161367996e-05, "loss": 2.0018, "step": 11524000 }, { "epoch": 57.1, "learning_rate": 2.1462064574941913e-05, "loss": 1.9876, "step": 11524500 }, { "epoch": 57.1, "learning_rate": 2.146082598851583e-05, "loss": 2.0086, "step": 11525000 }, { "epoch": 57.1, "learning_rate": 2.1459587402089747e-05, "loss": 2.0174, "step": 11525500 }, { "epoch": 57.1, "learning_rate": 2.145834881566366e-05, "loss": 1.9928, "step": 11526000 }, { "epoch": 57.11, "learning_rate": 2.145711270641043e-05, "loss": 1.9963, "step": 11526500 }, { "epoch": 57.11, "learning_rate": 2.1455874119984346e-05, "loss": 2.0014, "step": 11527000 }, { "epoch": 57.11, "learning_rate": 2.1454635533558263e-05, "loss": 2.0019, "step": 11527500 }, { "epoch": 57.11, "learning_rate": 2.145339694713218e-05, "loss": 2.0024, "step": 11528000 }, { "epoch": 57.12, "learning_rate": 2.1452158360706097e-05, "loss": 1.9968, "step": 11528500 }, { "epoch": 57.12, "learning_rate": 2.1450922251452862e-05, "loss": 2.028, "step": 11529000 }, { "epoch": 57.12, "learning_rate": 2.144968366502678e-05, "loss": 2.0028, "step": 11529500 }, { "epoch": 57.12, "learning_rate": 2.1448445078600696e-05, "loss": 1.999, "step": 11530000 }, { "epoch": 57.13, "learning_rate": 2.1447206492174613e-05, "loss": 2.0036, "step": 11530500 }, { "epoch": 57.13, "learning_rate": 2.144596790574853e-05, "loss": 2.0081, "step": 11531000 }, { "epoch": 57.13, "learning_rate": 2.1444729319322447e-05, "loss": 1.9859, "step": 11531500 }, { "epoch": 57.13, "learning_rate": 2.1443490732896364e-05, "loss": 1.9889, "step": 11532000 }, { "epoch": 57.14, "learning_rate": 2.1442252146470277e-05, "loss": 2.0097, "step": 11532500 }, { "epoch": 57.14, "learning_rate": 2.1441013560044194e-05, "loss": 1.9928, "step": 11533000 }, { "epoch": 57.14, "learning_rate": 2.143977497361811e-05, "loss": 1.9946, "step": 11533500 }, { "epoch": 57.14, "learning_rate": 2.1438536387192028e-05, "loss": 1.9872, "step": 11534000 }, { "epoch": 57.15, "learning_rate": 2.143729780076594e-05, "loss": 1.9938, "step": 11534500 }, { "epoch": 57.15, "learning_rate": 2.143605921433986e-05, "loss": 2.0104, "step": 11535000 }, { "epoch": 57.15, "learning_rate": 2.1434820627913775e-05, "loss": 2.0175, "step": 11535500 }, { "epoch": 57.15, "learning_rate": 2.1433582041487692e-05, "loss": 1.9921, "step": 11536000 }, { "epoch": 57.16, "learning_rate": 2.1432343455061606e-05, "loss": 1.9869, "step": 11536500 }, { "epoch": 57.16, "learning_rate": 2.1431104868635523e-05, "loss": 2.0038, "step": 11537000 }, { "epoch": 57.16, "learning_rate": 2.142986875938229e-05, "loss": 1.9794, "step": 11537500 }, { "epoch": 57.16, "learning_rate": 2.142863017295621e-05, "loss": 2.0062, "step": 11538000 }, { "epoch": 57.17, "learning_rate": 2.1427394063702977e-05, "loss": 1.9949, "step": 11538500 }, { "epoch": 57.17, "learning_rate": 2.1426155477276894e-05, "loss": 1.9942, "step": 11539000 }, { "epoch": 57.17, "learning_rate": 2.142491689085081e-05, "loss": 1.9909, "step": 11539500 }, { "epoch": 57.17, "learning_rate": 2.1423678304424728e-05, "loss": 2.021, "step": 11540000 }, { "epoch": 57.18, "learning_rate": 2.1422439717998645e-05, "loss": 1.9949, "step": 11540500 }, { "epoch": 57.18, "learning_rate": 2.142120113157256e-05, "loss": 2.0007, "step": 11541000 }, { "epoch": 57.18, "learning_rate": 2.141996749949218e-05, "loss": 2.0024, "step": 11541500 }, { "epoch": 57.18, "learning_rate": 2.1418728913066096e-05, "loss": 1.9961, "step": 11542000 }, { "epoch": 57.19, "learning_rate": 2.1417492803812865e-05, "loss": 2.0254, "step": 11542500 }, { "epoch": 57.19, "learning_rate": 2.1416254217386782e-05, "loss": 2.0015, "step": 11543000 }, { "epoch": 57.19, "learning_rate": 2.141501810813355e-05, "loss": 2.0192, "step": 11543500 }, { "epoch": 57.19, "learning_rate": 2.1413779521707468e-05, "loss": 2.0008, "step": 11544000 }, { "epoch": 57.2, "learning_rate": 2.1412540935281385e-05, "loss": 1.9871, "step": 11544500 }, { "epoch": 57.2, "learning_rate": 2.14113023488553e-05, "loss": 2.0006, "step": 11545000 }, { "epoch": 57.2, "learning_rate": 2.1410063762429215e-05, "loss": 2.0232, "step": 11545500 }, { "epoch": 57.2, "learning_rate": 2.1408825176003132e-05, "loss": 2.0028, "step": 11546000 }, { "epoch": 57.21, "learning_rate": 2.1407586589577046e-05, "loss": 2.0032, "step": 11546500 }, { "epoch": 57.21, "learning_rate": 2.1406348003150963e-05, "loss": 1.9965, "step": 11547000 }, { "epoch": 57.21, "learning_rate": 2.140510941672488e-05, "loss": 1.9948, "step": 11547500 }, { "epoch": 57.21, "learning_rate": 2.1403870830298796e-05, "loss": 2.0257, "step": 11548000 }, { "epoch": 57.22, "learning_rate": 2.1402632243872713e-05, "loss": 1.9903, "step": 11548500 }, { "epoch": 57.22, "learning_rate": 2.140139365744663e-05, "loss": 2.0215, "step": 11549000 }, { "epoch": 57.22, "learning_rate": 2.1400155071020547e-05, "loss": 2.0024, "step": 11549500 }, { "epoch": 57.22, "learning_rate": 2.1398916484594464e-05, "loss": 2.0268, "step": 11550000 }, { "epoch": 57.23, "learning_rate": 2.139767789816838e-05, "loss": 2.0019, "step": 11550500 }, { "epoch": 57.23, "learning_rate": 2.1396439311742295e-05, "loss": 1.9873, "step": 11551000 }, { "epoch": 57.23, "learning_rate": 2.1395203202489063e-05, "loss": 1.9962, "step": 11551500 }, { "epoch": 57.23, "learning_rate": 2.139396461606298e-05, "loss": 2.0196, "step": 11552000 }, { "epoch": 57.24, "learning_rate": 2.139272850680975e-05, "loss": 2.0096, "step": 11552500 }, { "epoch": 57.24, "learning_rate": 2.1391489920383666e-05, "loss": 1.9844, "step": 11553000 }, { "epoch": 57.24, "learning_rate": 2.139025133395758e-05, "loss": 2.0256, "step": 11553500 }, { "epoch": 57.24, "learning_rate": 2.1389012747531497e-05, "loss": 2.0, "step": 11554000 }, { "epoch": 57.24, "learning_rate": 2.138777663827827e-05, "loss": 2.0054, "step": 11554500 }, { "epoch": 57.25, "learning_rate": 2.1386538051852186e-05, "loss": 2.0293, "step": 11555000 }, { "epoch": 57.25, "learning_rate": 2.13852994654261e-05, "loss": 2.0118, "step": 11555500 }, { "epoch": 57.25, "learning_rate": 2.1384063356172868e-05, "loss": 1.9928, "step": 11556000 }, { "epoch": 57.25, "learning_rate": 2.1382824769746785e-05, "loss": 2.0257, "step": 11556500 }, { "epoch": 57.26, "learning_rate": 2.1381586183320702e-05, "loss": 2.0247, "step": 11557000 }, { "epoch": 57.26, "learning_rate": 2.138034759689462e-05, "loss": 2.0214, "step": 11557500 }, { "epoch": 57.26, "learning_rate": 2.1379109010468536e-05, "loss": 1.9952, "step": 11558000 }, { "epoch": 57.26, "learning_rate": 2.1377870424042453e-05, "loss": 2.0143, "step": 11558500 }, { "epoch": 57.27, "learning_rate": 2.1376631837616366e-05, "loss": 2.0391, "step": 11559000 }, { "epoch": 57.27, "learning_rate": 2.1375393251190283e-05, "loss": 2.015, "step": 11559500 }, { "epoch": 57.27, "learning_rate": 2.1374154664764197e-05, "loss": 2.0322, "step": 11560000 }, { "epoch": 57.27, "learning_rate": 2.1372916078338114e-05, "loss": 2.03, "step": 11560500 }, { "epoch": 57.28, "learning_rate": 2.137167749191203e-05, "loss": 1.9944, "step": 11561000 }, { "epoch": 57.28, "learning_rate": 2.1370438905485947e-05, "loss": 2.0103, "step": 11561500 }, { "epoch": 57.28, "learning_rate": 2.1369202796232716e-05, "loss": 2.0339, "step": 11562000 }, { "epoch": 57.28, "learning_rate": 2.1367964209806633e-05, "loss": 1.9758, "step": 11562500 }, { "epoch": 57.29, "learning_rate": 2.136672562338055e-05, "loss": 2.043, "step": 11563000 }, { "epoch": 57.29, "learning_rate": 2.1365487036954464e-05, "loss": 2.0133, "step": 11563500 }, { "epoch": 57.29, "learning_rate": 2.136424845052838e-05, "loss": 2.0051, "step": 11564000 }, { "epoch": 57.29, "learning_rate": 2.1363012341275153e-05, "loss": 2.0174, "step": 11564500 }, { "epoch": 57.3, "learning_rate": 2.136177375484907e-05, "loss": 2.0116, "step": 11565000 }, { "epoch": 57.3, "learning_rate": 2.1360535168422983e-05, "loss": 2.0018, "step": 11565500 }, { "epoch": 57.3, "learning_rate": 2.1359299059169752e-05, "loss": 2.0057, "step": 11566000 }, { "epoch": 57.3, "learning_rate": 2.135806047274367e-05, "loss": 2.0029, "step": 11566500 }, { "epoch": 57.31, "learning_rate": 2.1356821886317586e-05, "loss": 2.0016, "step": 11567000 }, { "epoch": 57.31, "learning_rate": 2.1355583299891503e-05, "loss": 2.0219, "step": 11567500 }, { "epoch": 57.31, "learning_rate": 2.135434471346542e-05, "loss": 2.0091, "step": 11568000 }, { "epoch": 57.31, "learning_rate": 2.1353106127039333e-05, "loss": 1.9961, "step": 11568500 }, { "epoch": 57.32, "learning_rate": 2.135186754061325e-05, "loss": 2.0061, "step": 11569000 }, { "epoch": 57.32, "learning_rate": 2.1350628954187167e-05, "loss": 2.0428, "step": 11569500 }, { "epoch": 57.32, "learning_rate": 2.134939036776108e-05, "loss": 2.013, "step": 11570000 }, { "epoch": 57.32, "learning_rate": 2.1348151781334998e-05, "loss": 1.9944, "step": 11570500 }, { "epoch": 57.33, "learning_rate": 2.1346913194908915e-05, "loss": 1.9939, "step": 11571000 }, { "epoch": 57.33, "learning_rate": 2.134567460848283e-05, "loss": 1.9918, "step": 11571500 }, { "epoch": 57.33, "learning_rate": 2.134443602205675e-05, "loss": 2.0127, "step": 11572000 }, { "epoch": 57.33, "learning_rate": 2.1343197435630665e-05, "loss": 1.9986, "step": 11572500 }, { "epoch": 57.34, "learning_rate": 2.1341961326377434e-05, "loss": 2.0114, "step": 11573000 }, { "epoch": 57.34, "learning_rate": 2.1340722739951348e-05, "loss": 1.9932, "step": 11573500 }, { "epoch": 57.34, "learning_rate": 2.1339484153525265e-05, "loss": 2.0206, "step": 11574000 }, { "epoch": 57.34, "learning_rate": 2.1338248044272037e-05, "loss": 2.0257, "step": 11574500 }, { "epoch": 57.35, "learning_rate": 2.133700945784595e-05, "loss": 1.9949, "step": 11575000 }, { "epoch": 57.35, "learning_rate": 2.1335770871419867e-05, "loss": 2.0117, "step": 11575500 }, { "epoch": 57.35, "learning_rate": 2.1334532284993784e-05, "loss": 2.0107, "step": 11576000 }, { "epoch": 57.35, "learning_rate": 2.13332936985677e-05, "loss": 1.9987, "step": 11576500 }, { "epoch": 57.36, "learning_rate": 2.1332055112141615e-05, "loss": 2.029, "step": 11577000 }, { "epoch": 57.36, "learning_rate": 2.133081652571553e-05, "loss": 2.0061, "step": 11577500 }, { "epoch": 57.36, "learning_rate": 2.132957793928945e-05, "loss": 2.0227, "step": 11578000 }, { "epoch": 57.36, "learning_rate": 2.1328341830036217e-05, "loss": 1.9957, "step": 11578500 }, { "epoch": 57.37, "learning_rate": 2.1327103243610134e-05, "loss": 2.0196, "step": 11579000 }, { "epoch": 57.37, "learning_rate": 2.132586465718405e-05, "loss": 2.0051, "step": 11579500 }, { "epoch": 57.37, "learning_rate": 2.1324626070757968e-05, "loss": 2.017, "step": 11580000 }, { "epoch": 57.37, "learning_rate": 2.132338748433188e-05, "loss": 2.004, "step": 11580500 }, { "epoch": 57.38, "learning_rate": 2.132215137507865e-05, "loss": 2.0144, "step": 11581000 }, { "epoch": 57.38, "learning_rate": 2.1320912788652567e-05, "loss": 2.0322, "step": 11581500 }, { "epoch": 57.38, "learning_rate": 2.1319674202226484e-05, "loss": 2.0264, "step": 11582000 }, { "epoch": 57.38, "learning_rate": 2.13184356158004e-05, "loss": 2.0258, "step": 11582500 }, { "epoch": 57.39, "learning_rate": 2.1317197029374318e-05, "loss": 2.0199, "step": 11583000 }, { "epoch": 57.39, "learning_rate": 2.1315960920121087e-05, "loss": 2.0093, "step": 11583500 }, { "epoch": 57.39, "learning_rate": 2.1314722333695e-05, "loss": 2.03, "step": 11584000 }, { "epoch": 57.39, "learning_rate": 2.1313483747268917e-05, "loss": 2.0002, "step": 11584500 }, { "epoch": 57.4, "learning_rate": 2.1312247638015686e-05, "loss": 2.0058, "step": 11585000 }, { "epoch": 57.4, "learning_rate": 2.1311009051589603e-05, "loss": 2.0033, "step": 11585500 }, { "epoch": 57.4, "learning_rate": 2.130977046516352e-05, "loss": 1.9898, "step": 11586000 }, { "epoch": 57.4, "learning_rate": 2.1308531878737437e-05, "loss": 2.0188, "step": 11586500 }, { "epoch": 57.41, "learning_rate": 2.1307293292311354e-05, "loss": 2.0136, "step": 11587000 }, { "epoch": 57.41, "learning_rate": 2.1306054705885268e-05, "loss": 1.9961, "step": 11587500 }, { "epoch": 57.41, "learning_rate": 2.1304816119459184e-05, "loss": 2.0286, "step": 11588000 }, { "epoch": 57.41, "learning_rate": 2.13035775330331e-05, "loss": 2.0225, "step": 11588500 }, { "epoch": 57.42, "learning_rate": 2.130233894660702e-05, "loss": 2.0363, "step": 11589000 }, { "epoch": 57.42, "learning_rate": 2.1301100360180935e-05, "loss": 1.9735, "step": 11589500 }, { "epoch": 57.42, "learning_rate": 2.1299864250927704e-05, "loss": 2.0077, "step": 11590000 }, { "epoch": 57.42, "learning_rate": 2.129862814167447e-05, "loss": 2.0031, "step": 11590500 }, { "epoch": 57.43, "learning_rate": 2.1297389555248386e-05, "loss": 2.0495, "step": 11591000 }, { "epoch": 57.43, "learning_rate": 2.1296150968822303e-05, "loss": 2.0211, "step": 11591500 }, { "epoch": 57.43, "learning_rate": 2.129491238239622e-05, "loss": 1.9998, "step": 11592000 }, { "epoch": 57.43, "learning_rate": 2.1293673795970137e-05, "loss": 1.9883, "step": 11592500 }, { "epoch": 57.44, "learning_rate": 2.1292435209544054e-05, "loss": 2.0094, "step": 11593000 }, { "epoch": 57.44, "learning_rate": 2.1291196623117968e-05, "loss": 2.0125, "step": 11593500 }, { "epoch": 57.44, "learning_rate": 2.1289958036691885e-05, "loss": 1.999, "step": 11594000 }, { "epoch": 57.44, "learning_rate": 2.1288721927438653e-05, "loss": 2.0154, "step": 11594500 }, { "epoch": 57.45, "learning_rate": 2.1287485818185422e-05, "loss": 2.0254, "step": 11595000 }, { "epoch": 57.45, "learning_rate": 2.128624723175934e-05, "loss": 2.0029, "step": 11595500 }, { "epoch": 57.45, "learning_rate": 2.1285008645333253e-05, "loss": 2.0099, "step": 11596000 }, { "epoch": 57.45, "learning_rate": 2.128377005890717e-05, "loss": 1.9965, "step": 11596500 }, { "epoch": 57.46, "learning_rate": 2.1282533949653942e-05, "loss": 2.0191, "step": 11597000 }, { "epoch": 57.46, "learning_rate": 2.128129536322786e-05, "loss": 1.9803, "step": 11597500 }, { "epoch": 57.46, "learning_rate": 2.1280056776801772e-05, "loss": 2.0156, "step": 11598000 }, { "epoch": 57.46, "learning_rate": 2.127881819037569e-05, "loss": 1.9998, "step": 11598500 }, { "epoch": 57.47, "learning_rate": 2.1277579603949606e-05, "loss": 2.0069, "step": 11599000 }, { "epoch": 57.47, "learning_rate": 2.1276343494696375e-05, "loss": 2.0087, "step": 11599500 }, { "epoch": 57.47, "learning_rate": 2.1275104908270292e-05, "loss": 2.0168, "step": 11600000 }, { "epoch": 57.47, "learning_rate": 2.127386632184421e-05, "loss": 2.0344, "step": 11600500 }, { "epoch": 57.48, "learning_rate": 2.1272627735418126e-05, "loss": 2.0255, "step": 11601000 }, { "epoch": 57.48, "learning_rate": 2.127138914899204e-05, "loss": 1.996, "step": 11601500 }, { "epoch": 57.48, "learning_rate": 2.1270150562565956e-05, "loss": 2.0302, "step": 11602000 }, { "epoch": 57.48, "learning_rate": 2.126891197613987e-05, "loss": 1.9917, "step": 11602500 }, { "epoch": 57.49, "learning_rate": 2.1267673389713787e-05, "loss": 2.0025, "step": 11603000 }, { "epoch": 57.49, "learning_rate": 2.1266434803287704e-05, "loss": 2.0119, "step": 11603500 }, { "epoch": 57.49, "learning_rate": 2.1265198694034476e-05, "loss": 2.0308, "step": 11604000 }, { "epoch": 57.49, "learning_rate": 2.1263960107608393e-05, "loss": 2.0398, "step": 11604500 }, { "epoch": 57.5, "learning_rate": 2.1262721521182306e-05, "loss": 2.0309, "step": 11605000 }, { "epoch": 57.5, "learning_rate": 2.1261482934756223e-05, "loss": 2.0154, "step": 11605500 }, { "epoch": 57.5, "learning_rate": 2.1260244348330137e-05, "loss": 2.0279, "step": 11606000 }, { "epoch": 57.5, "learning_rate": 2.1259005761904054e-05, "loss": 2.0191, "step": 11606500 }, { "epoch": 57.51, "learning_rate": 2.125776717547797e-05, "loss": 2.0201, "step": 11607000 }, { "epoch": 57.51, "learning_rate": 2.1256528589051887e-05, "loss": 2.0072, "step": 11607500 }, { "epoch": 57.51, "learning_rate": 2.1255290002625804e-05, "loss": 2.0192, "step": 11608000 }, { "epoch": 57.51, "learning_rate": 2.125405141619972e-05, "loss": 2.0151, "step": 11608500 }, { "epoch": 57.51, "learning_rate": 2.125281530694649e-05, "loss": 1.9891, "step": 11609000 }, { "epoch": 57.52, "learning_rate": 2.125157919769326e-05, "loss": 2.0175, "step": 11609500 }, { "epoch": 57.52, "learning_rate": 2.1250343088440024e-05, "loss": 2.0042, "step": 11610000 }, { "epoch": 57.52, "learning_rate": 2.124910450201394e-05, "loss": 2.0467, "step": 11610500 }, { "epoch": 57.52, "learning_rate": 2.1247865915587858e-05, "loss": 2.0031, "step": 11611000 }, { "epoch": 57.53, "learning_rate": 2.1246627329161775e-05, "loss": 2.0369, "step": 11611500 }, { "epoch": 57.53, "learning_rate": 2.1245388742735692e-05, "loss": 2.0365, "step": 11612000 }, { "epoch": 57.53, "learning_rate": 2.124415015630961e-05, "loss": 2.0109, "step": 11612500 }, { "epoch": 57.53, "learning_rate": 2.1242911569883526e-05, "loss": 2.0422, "step": 11613000 }, { "epoch": 57.54, "learning_rate": 2.1241672983457443e-05, "loss": 2.0337, "step": 11613500 }, { "epoch": 57.54, "learning_rate": 2.124043687420421e-05, "loss": 2.005, "step": 11614000 }, { "epoch": 57.54, "learning_rate": 2.1239198287778125e-05, "loss": 1.998, "step": 11614500 }, { "epoch": 57.54, "learning_rate": 2.1237959701352042e-05, "loss": 2.0178, "step": 11615000 }, { "epoch": 57.55, "learning_rate": 2.123672111492596e-05, "loss": 2.0254, "step": 11615500 }, { "epoch": 57.55, "learning_rate": 2.1235482528499876e-05, "loss": 2.0147, "step": 11616000 }, { "epoch": 57.55, "learning_rate": 2.1234243942073793e-05, "loss": 2.0013, "step": 11616500 }, { "epoch": 57.55, "learning_rate": 2.123300535564771e-05, "loss": 2.0099, "step": 11617000 }, { "epoch": 57.56, "learning_rate": 2.1231766769221623e-05, "loss": 2.0238, "step": 11617500 }, { "epoch": 57.56, "learning_rate": 2.123052818279554e-05, "loss": 2.0168, "step": 11618000 }, { "epoch": 57.56, "learning_rate": 2.1229289596369457e-05, "loss": 2.0012, "step": 11618500 }, { "epoch": 57.56, "learning_rate": 2.1228051009943374e-05, "loss": 1.9939, "step": 11619000 }, { "epoch": 57.57, "learning_rate": 2.1226812423517288e-05, "loss": 2.0142, "step": 11619500 }, { "epoch": 57.57, "learning_rate": 2.1225573837091205e-05, "loss": 2.0207, "step": 11620000 }, { "epoch": 57.57, "learning_rate": 2.122433525066512e-05, "loss": 2.013, "step": 11620500 }, { "epoch": 57.57, "learning_rate": 2.122309666423904e-05, "loss": 2.0086, "step": 11621000 }, { "epoch": 57.58, "learning_rate": 2.1221858077812952e-05, "loss": 2.0109, "step": 11621500 }, { "epoch": 57.58, "learning_rate": 2.122061949138687e-05, "loss": 2.0519, "step": 11622000 }, { "epoch": 57.58, "learning_rate": 2.1219380904960786e-05, "loss": 2.0447, "step": 11622500 }, { "epoch": 57.58, "learning_rate": 2.1218142318534703e-05, "loss": 2.0263, "step": 11623000 }, { "epoch": 57.59, "learning_rate": 2.121690373210862e-05, "loss": 2.0236, "step": 11623500 }, { "epoch": 57.59, "learning_rate": 2.1215672577201092e-05, "loss": 1.9937, "step": 11624000 }, { "epoch": 57.59, "learning_rate": 2.121443646794786e-05, "loss": 2.0092, "step": 11624500 }, { "epoch": 57.59, "learning_rate": 2.1213197881521778e-05, "loss": 2.0097, "step": 11625000 }, { "epoch": 57.6, "learning_rate": 2.121195929509569e-05, "loss": 2.0125, "step": 11625500 }, { "epoch": 57.6, "learning_rate": 2.121072070866961e-05, "loss": 2.0114, "step": 11626000 }, { "epoch": 57.6, "learning_rate": 2.120948459941638e-05, "loss": 2.0038, "step": 11626500 }, { "epoch": 57.6, "learning_rate": 2.1208246012990298e-05, "loss": 2.0222, "step": 11627000 }, { "epoch": 57.61, "learning_rate": 2.120700742656421e-05, "loss": 2.024, "step": 11627500 }, { "epoch": 57.61, "learning_rate": 2.1205768840138128e-05, "loss": 2.0346, "step": 11628000 }, { "epoch": 57.61, "learning_rate": 2.120453025371204e-05, "loss": 2.0051, "step": 11628500 }, { "epoch": 57.61, "learning_rate": 2.120329166728596e-05, "loss": 2.0241, "step": 11629000 }, { "epoch": 57.62, "learning_rate": 2.1202053080859876e-05, "loss": 1.9971, "step": 11629500 }, { "epoch": 57.62, "learning_rate": 2.1200814494433792e-05, "loss": 2.0129, "step": 11630000 }, { "epoch": 57.62, "learning_rate": 2.119957838518056e-05, "loss": 1.9995, "step": 11630500 }, { "epoch": 57.62, "learning_rate": 2.1198339798754478e-05, "loss": 2.0176, "step": 11631000 }, { "epoch": 57.63, "learning_rate": 2.1197101212328395e-05, "loss": 2.0145, "step": 11631500 }, { "epoch": 57.63, "learning_rate": 2.1195865103075164e-05, "loss": 2.0126, "step": 11632000 }, { "epoch": 57.63, "learning_rate": 2.119462651664908e-05, "loss": 2.045, "step": 11632500 }, { "epoch": 57.63, "learning_rate": 2.1193387930222998e-05, "loss": 2.004, "step": 11633000 }, { "epoch": 57.64, "learning_rate": 2.1192149343796915e-05, "loss": 2.01, "step": 11633500 }, { "epoch": 57.64, "learning_rate": 2.1190910757370828e-05, "loss": 2.0087, "step": 11634000 }, { "epoch": 57.64, "learning_rate": 2.1189672170944745e-05, "loss": 2.0093, "step": 11634500 }, { "epoch": 57.64, "learning_rate": 2.1188433584518662e-05, "loss": 2.0027, "step": 11635000 }, { "epoch": 57.65, "learning_rate": 2.1187194998092576e-05, "loss": 2.0002, "step": 11635500 }, { "epoch": 57.65, "learning_rate": 2.1185956411666493e-05, "loss": 2.0133, "step": 11636000 }, { "epoch": 57.65, "learning_rate": 2.118471782524041e-05, "loss": 2.018, "step": 11636500 }, { "epoch": 57.65, "learning_rate": 2.1183479238814326e-05, "loss": 1.999, "step": 11637000 }, { "epoch": 57.66, "learning_rate": 2.1182240652388243e-05, "loss": 2.0438, "step": 11637500 }, { "epoch": 57.66, "learning_rate": 2.118100206596216e-05, "loss": 2.0073, "step": 11638000 }, { "epoch": 57.66, "learning_rate": 2.1179763479536077e-05, "loss": 2.0213, "step": 11638500 }, { "epoch": 57.66, "learning_rate": 2.1178529847455698e-05, "loss": 2.0404, "step": 11639000 }, { "epoch": 57.67, "learning_rate": 2.1177291261029615e-05, "loss": 2.0193, "step": 11639500 }, { "epoch": 57.67, "learning_rate": 2.1176052674603532e-05, "loss": 2.0313, "step": 11640000 }, { "epoch": 57.67, "learning_rate": 2.117481408817745e-05, "loss": 2.0276, "step": 11640500 }, { "epoch": 57.67, "learning_rate": 2.1173575501751362e-05, "loss": 2.0035, "step": 11641000 }, { "epoch": 57.68, "learning_rate": 2.117233691532528e-05, "loss": 2.0117, "step": 11641500 }, { "epoch": 57.68, "learning_rate": 2.1171098328899193e-05, "loss": 2.032, "step": 11642000 }, { "epoch": 57.68, "learning_rate": 2.116985974247311e-05, "loss": 2.0126, "step": 11642500 }, { "epoch": 57.68, "learning_rate": 2.1168621156047027e-05, "loss": 2.0248, "step": 11643000 }, { "epoch": 57.69, "learning_rate": 2.1167382569620944e-05, "loss": 2.0152, "step": 11643500 }, { "epoch": 57.69, "learning_rate": 2.116614398319486e-05, "loss": 2.0272, "step": 11644000 }, { "epoch": 57.69, "learning_rate": 2.116490787394163e-05, "loss": 2.0129, "step": 11644500 }, { "epoch": 57.69, "learning_rate": 2.1163671764688398e-05, "loss": 2.015, "step": 11645000 }, { "epoch": 57.7, "learning_rate": 2.1162433178262315e-05, "loss": 2.0173, "step": 11645500 }, { "epoch": 57.7, "learning_rate": 2.1161194591836232e-05, "loss": 2.0294, "step": 11646000 }, { "epoch": 57.7, "learning_rate": 2.115995600541015e-05, "loss": 2.0165, "step": 11646500 }, { "epoch": 57.7, "learning_rate": 2.1158717418984066e-05, "loss": 2.0317, "step": 11647000 }, { "epoch": 57.71, "learning_rate": 2.115747883255798e-05, "loss": 1.9948, "step": 11647500 }, { "epoch": 57.71, "learning_rate": 2.1156240246131896e-05, "loss": 2.0019, "step": 11648000 }, { "epoch": 57.71, "learning_rate": 2.1155001659705813e-05, "loss": 2.0214, "step": 11648500 }, { "epoch": 57.71, "learning_rate": 2.1153765550452582e-05, "loss": 2.0332, "step": 11649000 }, { "epoch": 57.72, "learning_rate": 2.11525269640265e-05, "loss": 2.0089, "step": 11649500 }, { "epoch": 57.72, "learning_rate": 2.1151288377600416e-05, "loss": 1.9939, "step": 11650000 }, { "epoch": 57.72, "learning_rate": 2.115004979117433e-05, "loss": 2.0358, "step": 11650500 }, { "epoch": 57.72, "learning_rate": 2.1148811204748246e-05, "loss": 2.0073, "step": 11651000 }, { "epoch": 57.73, "learning_rate": 2.1147575095495015e-05, "loss": 2.0301, "step": 11651500 }, { "epoch": 57.73, "learning_rate": 2.1146338986241784e-05, "loss": 2.0268, "step": 11652000 }, { "epoch": 57.73, "learning_rate": 2.1145100399815697e-05, "loss": 2.0259, "step": 11652500 }, { "epoch": 57.73, "learning_rate": 2.1143861813389614e-05, "loss": 1.9977, "step": 11653000 }, { "epoch": 57.74, "learning_rate": 2.1142625704136383e-05, "loss": 1.9963, "step": 11653500 }, { "epoch": 57.74, "learning_rate": 2.11413871177103e-05, "loss": 2.0405, "step": 11654000 }, { "epoch": 57.74, "learning_rate": 2.1140148531284217e-05, "loss": 2.0246, "step": 11654500 }, { "epoch": 57.74, "learning_rate": 2.1138909944858134e-05, "loss": 2.0252, "step": 11655000 }, { "epoch": 57.75, "learning_rate": 2.1137671358432048e-05, "loss": 2.0144, "step": 11655500 }, { "epoch": 57.75, "learning_rate": 2.1136432772005964e-05, "loss": 2.0095, "step": 11656000 }, { "epoch": 57.75, "learning_rate": 2.113519418557988e-05, "loss": 2.0297, "step": 11656500 }, { "epoch": 57.75, "learning_rate": 2.1133955599153798e-05, "loss": 2.0138, "step": 11657000 }, { "epoch": 57.76, "learning_rate": 2.1132717012727715e-05, "loss": 2.0104, "step": 11657500 }, { "epoch": 57.76, "learning_rate": 2.1131478426301632e-05, "loss": 2.0349, "step": 11658000 }, { "epoch": 57.76, "learning_rate": 2.113023983987555e-05, "loss": 2.0231, "step": 11658500 }, { "epoch": 57.76, "learning_rate": 2.1129001253449466e-05, "loss": 2.0136, "step": 11659000 }, { "epoch": 57.77, "learning_rate": 2.1127767621369087e-05, "loss": 2.0063, "step": 11659500 }, { "epoch": 57.77, "learning_rate": 2.1126529034943e-05, "loss": 2.0381, "step": 11660000 }, { "epoch": 57.77, "learning_rate": 2.1125290448516917e-05, "loss": 1.9935, "step": 11660500 }, { "epoch": 57.77, "learning_rate": 2.1124051862090834e-05, "loss": 2.0407, "step": 11661000 }, { "epoch": 57.78, "learning_rate": 2.1122813275664748e-05, "loss": 2.009, "step": 11661500 }, { "epoch": 57.78, "learning_rate": 2.1121574689238665e-05, "loss": 2.0228, "step": 11662000 }, { "epoch": 57.78, "learning_rate": 2.112033610281258e-05, "loss": 2.02, "step": 11662500 }, { "epoch": 57.78, "learning_rate": 2.11190975163865e-05, "loss": 2.0127, "step": 11663000 }, { "epoch": 57.79, "learning_rate": 2.1117858929960415e-05, "loss": 2.0118, "step": 11663500 }, { "epoch": 57.79, "learning_rate": 2.1116620343534332e-05, "loss": 2.0098, "step": 11664000 }, { "epoch": 57.79, "learning_rate": 2.111538175710825e-05, "loss": 2.0279, "step": 11664500 }, { "epoch": 57.79, "learning_rate": 2.1114143170682166e-05, "loss": 2.0077, "step": 11665000 }, { "epoch": 57.79, "learning_rate": 2.1112904584256083e-05, "loss": 2.0116, "step": 11665500 }, { "epoch": 57.8, "learning_rate": 2.1111665997829997e-05, "loss": 2.0095, "step": 11666000 }, { "epoch": 57.8, "learning_rate": 2.1110427411403914e-05, "loss": 2.0221, "step": 11666500 }, { "epoch": 57.8, "learning_rate": 2.1109191302150682e-05, "loss": 2.0236, "step": 11667000 }, { "epoch": 57.8, "learning_rate": 2.11079527157246e-05, "loss": 1.9974, "step": 11667500 }, { "epoch": 57.81, "learning_rate": 2.1106714129298516e-05, "loss": 2.002, "step": 11668000 }, { "epoch": 57.81, "learning_rate": 2.1105475542872433e-05, "loss": 2.02, "step": 11668500 }, { "epoch": 57.81, "learning_rate": 2.1104236956446347e-05, "loss": 2.0223, "step": 11669000 }, { "epoch": 57.81, "learning_rate": 2.1102998370020264e-05, "loss": 2.0184, "step": 11669500 }, { "epoch": 57.82, "learning_rate": 2.110175978359418e-05, "loss": 2.0047, "step": 11670000 }, { "epoch": 57.82, "learning_rate": 2.1100521197168097e-05, "loss": 2.0385, "step": 11670500 }, { "epoch": 57.82, "learning_rate": 2.1099282610742014e-05, "loss": 1.9896, "step": 11671000 }, { "epoch": 57.82, "learning_rate": 2.1098046501488783e-05, "loss": 2.0215, "step": 11671500 }, { "epoch": 57.83, "learning_rate": 2.10968079150627e-05, "loss": 2.0363, "step": 11672000 }, { "epoch": 57.83, "learning_rate": 2.1095569328636614e-05, "loss": 2.032, "step": 11672500 }, { "epoch": 57.83, "learning_rate": 2.109433074221053e-05, "loss": 2.0324, "step": 11673000 }, { "epoch": 57.83, "learning_rate": 2.1093092155784448e-05, "loss": 2.0092, "step": 11673500 }, { "epoch": 57.84, "learning_rate": 2.1091853569358364e-05, "loss": 2.0304, "step": 11674000 }, { "epoch": 57.84, "learning_rate": 2.109061498293228e-05, "loss": 2.0172, "step": 11674500 }, { "epoch": 57.84, "learning_rate": 2.108937887367905e-05, "loss": 2.0346, "step": 11675000 }, { "epoch": 57.84, "learning_rate": 2.1088140287252964e-05, "loss": 1.9806, "step": 11675500 }, { "epoch": 57.85, "learning_rate": 2.108690170082688e-05, "loss": 2.015, "step": 11676000 }, { "epoch": 57.85, "learning_rate": 2.1085663114400798e-05, "loss": 2.0219, "step": 11676500 }, { "epoch": 57.85, "learning_rate": 2.1084424527974715e-05, "loss": 1.9936, "step": 11677000 }, { "epoch": 57.85, "learning_rate": 2.1083190895894335e-05, "loss": 2.0064, "step": 11677500 }, { "epoch": 57.86, "learning_rate": 2.108195230946825e-05, "loss": 2.0256, "step": 11678000 }, { "epoch": 57.86, "learning_rate": 2.1080713723042166e-05, "loss": 2.0408, "step": 11678500 }, { "epoch": 57.86, "learning_rate": 2.1079475136616083e-05, "loss": 2.0253, "step": 11679000 }, { "epoch": 57.86, "learning_rate": 2.1078239027362855e-05, "loss": 2.0026, "step": 11679500 }, { "epoch": 57.87, "learning_rate": 2.107700044093677e-05, "loss": 2.0005, "step": 11680000 }, { "epoch": 57.87, "learning_rate": 2.1075761854510685e-05, "loss": 2.0096, "step": 11680500 }, { "epoch": 57.87, "learning_rate": 2.1074523268084602e-05, "loss": 2.0083, "step": 11681000 }, { "epoch": 57.87, "learning_rate": 2.1073284681658516e-05, "loss": 1.9955, "step": 11681500 }, { "epoch": 57.88, "learning_rate": 2.1072046095232433e-05, "loss": 2.0136, "step": 11682000 }, { "epoch": 57.88, "learning_rate": 2.107080750880635e-05, "loss": 2.0141, "step": 11682500 }, { "epoch": 57.88, "learning_rate": 2.1069568922380267e-05, "loss": 1.9999, "step": 11683000 }, { "epoch": 57.88, "learning_rate": 2.1068332813127035e-05, "loss": 2.011, "step": 11683500 }, { "epoch": 57.89, "learning_rate": 2.1067094226700952e-05, "loss": 2.0094, "step": 11684000 }, { "epoch": 57.89, "learning_rate": 2.1065855640274866e-05, "loss": 2.0318, "step": 11684500 }, { "epoch": 57.89, "learning_rate": 2.1064617053848783e-05, "loss": 2.0133, "step": 11685000 }, { "epoch": 57.89, "learning_rate": 2.10633784674227e-05, "loss": 2.0276, "step": 11685500 }, { "epoch": 57.9, "learning_rate": 2.1062142358169472e-05, "loss": 2.0085, "step": 11686000 }, { "epoch": 57.9, "learning_rate": 2.1060903771743385e-05, "loss": 2.0183, "step": 11686500 }, { "epoch": 57.9, "learning_rate": 2.1059665185317302e-05, "loss": 2.0138, "step": 11687000 }, { "epoch": 57.9, "learning_rate": 2.105842659889122e-05, "loss": 2.0342, "step": 11687500 }, { "epoch": 57.91, "learning_rate": 2.1057188012465133e-05, "loss": 2.0334, "step": 11688000 }, { "epoch": 57.91, "learning_rate": 2.105594942603905e-05, "loss": 2.025, "step": 11688500 }, { "epoch": 57.91, "learning_rate": 2.1054710839612967e-05, "loss": 2.0292, "step": 11689000 }, { "epoch": 57.91, "learning_rate": 2.1053472253186884e-05, "loss": 2.0427, "step": 11689500 }, { "epoch": 57.92, "learning_rate": 2.10522336667608e-05, "loss": 2.0117, "step": 11690000 }, { "epoch": 57.92, "learning_rate": 2.1050995080334717e-05, "loss": 2.0144, "step": 11690500 }, { "epoch": 57.92, "learning_rate": 2.104975649390863e-05, "loss": 2.0173, "step": 11691000 }, { "epoch": 57.92, "learning_rate": 2.1048517907482548e-05, "loss": 1.989, "step": 11691500 }, { "epoch": 57.93, "learning_rate": 2.1047279321056465e-05, "loss": 1.9989, "step": 11692000 }, { "epoch": 57.93, "learning_rate": 2.1046043211803234e-05, "loss": 2.0195, "step": 11692500 }, { "epoch": 57.93, "learning_rate": 2.104480462537715e-05, "loss": 2.0129, "step": 11693000 }, { "epoch": 57.93, "learning_rate": 2.1043566038951067e-05, "loss": 2.0003, "step": 11693500 }, { "epoch": 57.94, "learning_rate": 2.104232745252498e-05, "loss": 1.9999, "step": 11694000 }, { "epoch": 57.94, "learning_rate": 2.1041091343271753e-05, "loss": 2.019, "step": 11694500 }, { "epoch": 57.94, "learning_rate": 2.1039852756845667e-05, "loss": 2.0204, "step": 11695000 }, { "epoch": 57.94, "learning_rate": 2.1038614170419584e-05, "loss": 2.0292, "step": 11695500 }, { "epoch": 57.95, "learning_rate": 2.10373755839935e-05, "loss": 1.9972, "step": 11696000 }, { "epoch": 57.95, "learning_rate": 2.1036136997567418e-05, "loss": 2.0264, "step": 11696500 }, { "epoch": 57.95, "learning_rate": 2.1034898411141334e-05, "loss": 2.0016, "step": 11697000 }, { "epoch": 57.95, "learning_rate": 2.1033662301888103e-05, "loss": 2.0109, "step": 11697500 }, { "epoch": 57.96, "learning_rate": 2.1032423715462017e-05, "loss": 2.0058, "step": 11698000 }, { "epoch": 57.96, "learning_rate": 2.1031185129035934e-05, "loss": 2.0153, "step": 11698500 }, { "epoch": 57.96, "learning_rate": 2.102994654260985e-05, "loss": 2.0308, "step": 11699000 }, { "epoch": 57.96, "learning_rate": 2.1028707956183768e-05, "loss": 2.0118, "step": 11699500 }, { "epoch": 57.97, "learning_rate": 2.1027469369757685e-05, "loss": 2.0108, "step": 11700000 }, { "epoch": 57.97, "learning_rate": 2.1026230783331598e-05, "loss": 2.0218, "step": 11700500 }, { "epoch": 57.97, "learning_rate": 2.1024992196905515e-05, "loss": 2.0061, "step": 11701000 }, { "epoch": 57.97, "learning_rate": 2.1023753610479432e-05, "loss": 2.0139, "step": 11701500 }, { "epoch": 57.98, "learning_rate": 2.102251502405335e-05, "loss": 2.0045, "step": 11702000 }, { "epoch": 57.98, "learning_rate": 2.1021276437627266e-05, "loss": 2.0401, "step": 11702500 }, { "epoch": 57.98, "learning_rate": 2.1020042805546886e-05, "loss": 2.0194, "step": 11703000 }, { "epoch": 57.98, "learning_rate": 2.1018804219120803e-05, "loss": 2.0122, "step": 11703500 }, { "epoch": 57.99, "learning_rate": 2.1017568109867572e-05, "loss": 2.0067, "step": 11704000 }, { "epoch": 57.99, "learning_rate": 2.101632952344149e-05, "loss": 2.0173, "step": 11704500 }, { "epoch": 57.99, "learning_rate": 2.1015090937015406e-05, "loss": 2.0205, "step": 11705000 }, { "epoch": 57.99, "learning_rate": 2.101385235058932e-05, "loss": 2.016, "step": 11705500 }, { "epoch": 58.0, "learning_rate": 2.101261624133609e-05, "loss": 2.0369, "step": 11706000 }, { "epoch": 58.0, "learning_rate": 2.1011377654910005e-05, "loss": 2.0086, "step": 11706500 }, { "epoch": 58.0, "eval_accuracy": 0.6727128525931729, "eval_accuracy_mlm": 0.6317242904742146, "eval_accuracy_nsp": 0.8660490510238901, "eval_loss": 2.28670597076416, "eval_runtime": 146.9052, "eval_samples_per_second": 1735.534, "eval_steps_per_second": 72.319, "step": 11706894 }, { "epoch": 58.0, "learning_rate": 2.1010139068483922e-05, "loss": 2.0567, "step": 11707000 }, { "epoch": 58.0, "learning_rate": 2.100890048205784e-05, "loss": 2.0068, "step": 11707500 }, { "epoch": 58.01, "learning_rate": 2.1007661895631756e-05, "loss": 1.9978, "step": 11708000 }, { "epoch": 58.01, "learning_rate": 2.100642330920567e-05, "loss": 1.9762, "step": 11708500 }, { "epoch": 58.01, "learning_rate": 2.1005184722779587e-05, "loss": 1.9947, "step": 11709000 }, { "epoch": 58.01, "learning_rate": 2.1003946136353504e-05, "loss": 2.005, "step": 11709500 }, { "epoch": 58.02, "learning_rate": 2.100270754992742e-05, "loss": 1.9719, "step": 11710000 }, { "epoch": 58.02, "learning_rate": 2.1001468963501337e-05, "loss": 2.0018, "step": 11710500 }, { "epoch": 58.02, "learning_rate": 2.1000230377075254e-05, "loss": 1.9836, "step": 11711000 }, { "epoch": 58.02, "learning_rate": 2.099899179064917e-05, "loss": 2.0, "step": 11711500 }, { "epoch": 58.03, "learning_rate": 2.0997755681395937e-05, "loss": 1.9897, "step": 11712000 }, { "epoch": 58.03, "learning_rate": 2.0996517094969854e-05, "loss": 2.0146, "step": 11712500 }, { "epoch": 58.03, "learning_rate": 2.099527850854377e-05, "loss": 1.9905, "step": 11713000 }, { "epoch": 58.03, "learning_rate": 2.0994039922117687e-05, "loss": 2.0171, "step": 11713500 }, { "epoch": 58.04, "learning_rate": 2.0992801335691604e-05, "loss": 1.9916, "step": 11714000 }, { "epoch": 58.04, "learning_rate": 2.0991565226438373e-05, "loss": 1.963, "step": 11714500 }, { "epoch": 58.04, "learning_rate": 2.0990326640012287e-05, "loss": 2.0159, "step": 11715000 }, { "epoch": 58.04, "learning_rate": 2.0989088053586204e-05, "loss": 2.0155, "step": 11715500 }, { "epoch": 58.05, "learning_rate": 2.0987851944332972e-05, "loss": 1.9831, "step": 11716000 }, { "epoch": 58.05, "learning_rate": 2.098661583507974e-05, "loss": 1.9883, "step": 11716500 }, { "epoch": 58.05, "learning_rate": 2.0985377248653658e-05, "loss": 2.0044, "step": 11717000 }, { "epoch": 58.05, "learning_rate": 2.0984138662227572e-05, "loss": 1.9858, "step": 11717500 }, { "epoch": 58.06, "learning_rate": 2.098290007580149e-05, "loss": 1.9876, "step": 11718000 }, { "epoch": 58.06, "learning_rate": 2.098166396654826e-05, "loss": 1.9797, "step": 11718500 }, { "epoch": 58.06, "learning_rate": 2.0980427857295026e-05, "loss": 1.991, "step": 11719000 }, { "epoch": 58.06, "learning_rate": 2.0979189270868943e-05, "loss": 1.9822, "step": 11719500 }, { "epoch": 58.06, "learning_rate": 2.097795068444286e-05, "loss": 2.0149, "step": 11720000 }, { "epoch": 58.07, "learning_rate": 2.0976712098016777e-05, "loss": 1.9885, "step": 11720500 }, { "epoch": 58.07, "learning_rate": 2.0975473511590694e-05, "loss": 1.9856, "step": 11721000 }, { "epoch": 58.07, "learning_rate": 2.097423740233746e-05, "loss": 1.9701, "step": 11721500 }, { "epoch": 58.07, "learning_rate": 2.0972998815911376e-05, "loss": 1.9879, "step": 11722000 }, { "epoch": 58.08, "learning_rate": 2.0971760229485293e-05, "loss": 1.9761, "step": 11722500 }, { "epoch": 58.08, "learning_rate": 2.097052164305921e-05, "loss": 1.9969, "step": 11723000 }, { "epoch": 58.08, "learning_rate": 2.0969283056633127e-05, "loss": 1.9772, "step": 11723500 }, { "epoch": 58.08, "learning_rate": 2.0968044470207044e-05, "loss": 1.9911, "step": 11724000 }, { "epoch": 58.09, "learning_rate": 2.096680588378096e-05, "loss": 2.0003, "step": 11724500 }, { "epoch": 58.09, "learning_rate": 2.0965567297354878e-05, "loss": 2.0262, "step": 11725000 }, { "epoch": 58.09, "learning_rate": 2.0964328710928795e-05, "loss": 1.9907, "step": 11725500 }, { "epoch": 58.09, "learning_rate": 2.096309012450271e-05, "loss": 1.9998, "step": 11726000 }, { "epoch": 58.1, "learning_rate": 2.0961851538076625e-05, "loss": 1.9711, "step": 11726500 }, { "epoch": 58.1, "learning_rate": 2.0960612951650542e-05, "loss": 2.0225, "step": 11727000 }, { "epoch": 58.1, "learning_rate": 2.0959374365224456e-05, "loss": 2.0045, "step": 11727500 }, { "epoch": 58.1, "learning_rate": 2.0958138255971228e-05, "loss": 2.006, "step": 11728000 }, { "epoch": 58.11, "learning_rate": 2.0956899669545145e-05, "loss": 2.0078, "step": 11728500 }, { "epoch": 58.11, "learning_rate": 2.095566108311906e-05, "loss": 2.004, "step": 11729000 }, { "epoch": 58.11, "learning_rate": 2.0954422496692975e-05, "loss": 1.9836, "step": 11729500 }, { "epoch": 58.11, "learning_rate": 2.0953183910266892e-05, "loss": 1.982, "step": 11730000 }, { "epoch": 58.12, "learning_rate": 2.095194532384081e-05, "loss": 1.992, "step": 11730500 }, { "epoch": 58.12, "learning_rate": 2.0950706737414723e-05, "loss": 1.9645, "step": 11731000 }, { "epoch": 58.12, "learning_rate": 2.0949470628161495e-05, "loss": 2.0014, "step": 11731500 }, { "epoch": 58.12, "learning_rate": 2.0948232041735412e-05, "loss": 1.9891, "step": 11732000 }, { "epoch": 58.13, "learning_rate": 2.0946993455309325e-05, "loss": 2.0076, "step": 11732500 }, { "epoch": 58.13, "learning_rate": 2.0945754868883242e-05, "loss": 2.0137, "step": 11733000 }, { "epoch": 58.13, "learning_rate": 2.094451628245716e-05, "loss": 2.0086, "step": 11733500 }, { "epoch": 58.13, "learning_rate": 2.0943280173203928e-05, "loss": 2.0019, "step": 11734000 }, { "epoch": 58.14, "learning_rate": 2.0942041586777845e-05, "loss": 2.0079, "step": 11734500 }, { "epoch": 58.14, "learning_rate": 2.0940803000351762e-05, "loss": 1.9811, "step": 11735000 }, { "epoch": 58.14, "learning_rate": 2.0939564413925676e-05, "loss": 1.9971, "step": 11735500 }, { "epoch": 58.14, "learning_rate": 2.0938325827499592e-05, "loss": 1.9923, "step": 11736000 }, { "epoch": 58.15, "learning_rate": 2.093708971824636e-05, "loss": 1.9981, "step": 11736500 }, { "epoch": 58.15, "learning_rate": 2.0935851131820278e-05, "loss": 1.9888, "step": 11737000 }, { "epoch": 58.15, "learning_rate": 2.0934612545394195e-05, "loss": 1.9883, "step": 11737500 }, { "epoch": 58.15, "learning_rate": 2.0933373958968112e-05, "loss": 2.0175, "step": 11738000 }, { "epoch": 58.16, "learning_rate": 2.0932135372542026e-05, "loss": 2.0223, "step": 11738500 }, { "epoch": 58.16, "learning_rate": 2.0930899263288794e-05, "loss": 2.0046, "step": 11739000 }, { "epoch": 58.16, "learning_rate": 2.092966067686271e-05, "loss": 2.0046, "step": 11739500 }, { "epoch": 58.16, "learning_rate": 2.092842456760948e-05, "loss": 1.9965, "step": 11740000 }, { "epoch": 58.17, "learning_rate": 2.0927185981183394e-05, "loss": 2.0022, "step": 11740500 }, { "epoch": 58.17, "learning_rate": 2.092594739475731e-05, "loss": 2.001, "step": 11741000 }, { "epoch": 58.17, "learning_rate": 2.0924708808331228e-05, "loss": 2.0175, "step": 11741500 }, { "epoch": 58.17, "learning_rate": 2.0923470221905144e-05, "loss": 1.9975, "step": 11742000 }, { "epoch": 58.18, "learning_rate": 2.0922234112651913e-05, "loss": 1.99, "step": 11742500 }, { "epoch": 58.18, "learning_rate": 2.092099552622583e-05, "loss": 1.9912, "step": 11743000 }, { "epoch": 58.18, "learning_rate": 2.0919756939799744e-05, "loss": 2.0013, "step": 11743500 }, { "epoch": 58.18, "learning_rate": 2.091851835337366e-05, "loss": 1.9832, "step": 11744000 }, { "epoch": 58.19, "learning_rate": 2.0917279766947578e-05, "loss": 1.9875, "step": 11744500 }, { "epoch": 58.19, "learning_rate": 2.0916041180521494e-05, "loss": 1.9896, "step": 11745000 }, { "epoch": 58.19, "learning_rate": 2.091480259409541e-05, "loss": 2.0019, "step": 11745500 }, { "epoch": 58.19, "learning_rate": 2.091356400766933e-05, "loss": 1.9886, "step": 11746000 }, { "epoch": 58.2, "learning_rate": 2.0912325421243245e-05, "loss": 1.9972, "step": 11746500 }, { "epoch": 58.2, "learning_rate": 2.091108931199001e-05, "loss": 2.0049, "step": 11747000 }, { "epoch": 58.2, "learning_rate": 2.0909850725563928e-05, "loss": 2.0161, "step": 11747500 }, { "epoch": 58.2, "learning_rate": 2.0908612139137845e-05, "loss": 2.004, "step": 11748000 }, { "epoch": 58.21, "learning_rate": 2.0907376029884613e-05, "loss": 1.9871, "step": 11748500 }, { "epoch": 58.21, "learning_rate": 2.090613744345853e-05, "loss": 1.994, "step": 11749000 }, { "epoch": 58.21, "learning_rate": 2.0904898857032447e-05, "loss": 1.9842, "step": 11749500 }, { "epoch": 58.21, "learning_rate": 2.090366027060636e-05, "loss": 1.9956, "step": 11750000 }, { "epoch": 58.22, "learning_rate": 2.0902421684180278e-05, "loss": 2.0, "step": 11750500 }, { "epoch": 58.22, "learning_rate": 2.0901183097754195e-05, "loss": 2.0076, "step": 11751000 }, { "epoch": 58.22, "learning_rate": 2.089994451132811e-05, "loss": 2.0103, "step": 11751500 }, { "epoch": 58.22, "learning_rate": 2.089870592490203e-05, "loss": 2.0044, "step": 11752000 }, { "epoch": 58.23, "learning_rate": 2.0897467338475945e-05, "loss": 2.0025, "step": 11752500 }, { "epoch": 58.23, "learning_rate": 2.0896228752049862e-05, "loss": 1.9937, "step": 11753000 }, { "epoch": 58.23, "learning_rate": 2.089499016562378e-05, "loss": 2.0145, "step": 11753500 }, { "epoch": 58.23, "learning_rate": 2.0893754056370545e-05, "loss": 1.9934, "step": 11754000 }, { "epoch": 58.24, "learning_rate": 2.089251546994446e-05, "loss": 2.0248, "step": 11754500 }, { "epoch": 58.24, "learning_rate": 2.089127688351838e-05, "loss": 1.9992, "step": 11755000 }, { "epoch": 58.24, "learning_rate": 2.0890038297092295e-05, "loss": 1.9931, "step": 11755500 }, { "epoch": 58.24, "learning_rate": 2.0888799710666212e-05, "loss": 1.996, "step": 11756000 }, { "epoch": 58.25, "learning_rate": 2.0887563601412978e-05, "loss": 2.0238, "step": 11756500 }, { "epoch": 58.25, "learning_rate": 2.088632749215975e-05, "loss": 1.9968, "step": 11757000 }, { "epoch": 58.25, "learning_rate": 2.0885088905733667e-05, "loss": 1.9852, "step": 11757500 }, { "epoch": 58.25, "learning_rate": 2.0883850319307584e-05, "loss": 1.9958, "step": 11758000 }, { "epoch": 58.26, "learning_rate": 2.0882611732881497e-05, "loss": 2.0005, "step": 11758500 }, { "epoch": 58.26, "learning_rate": 2.0881373146455414e-05, "loss": 2.0104, "step": 11759000 }, { "epoch": 58.26, "learning_rate": 2.088013456002933e-05, "loss": 1.9951, "step": 11759500 }, { "epoch": 58.26, "learning_rate": 2.08788984507761e-05, "loss": 2.0154, "step": 11760000 }, { "epoch": 58.27, "learning_rate": 2.0877659864350017e-05, "loss": 1.9888, "step": 11760500 }, { "epoch": 58.27, "learning_rate": 2.0876421277923934e-05, "loss": 2.0098, "step": 11761000 }, { "epoch": 58.27, "learning_rate": 2.087518269149785e-05, "loss": 1.9631, "step": 11761500 }, { "epoch": 58.27, "learning_rate": 2.0873944105071764e-05, "loss": 2.0011, "step": 11762000 }, { "epoch": 58.28, "learning_rate": 2.087270551864568e-05, "loss": 1.9815, "step": 11762500 }, { "epoch": 58.28, "learning_rate": 2.0871466932219598e-05, "loss": 2.0318, "step": 11763000 }, { "epoch": 58.28, "learning_rate": 2.0870228345793512e-05, "loss": 2.0076, "step": 11763500 }, { "epoch": 58.28, "learning_rate": 2.086898975936743e-05, "loss": 1.9877, "step": 11764000 }, { "epoch": 58.29, "learning_rate": 2.0867751172941346e-05, "loss": 1.9925, "step": 11764500 }, { "epoch": 58.29, "learning_rate": 2.0866515063688118e-05, "loss": 1.9988, "step": 11765000 }, { "epoch": 58.29, "learning_rate": 2.086527647726203e-05, "loss": 2.0058, "step": 11765500 }, { "epoch": 58.29, "learning_rate": 2.086403789083595e-05, "loss": 2.0188, "step": 11766000 }, { "epoch": 58.3, "learning_rate": 2.0862799304409862e-05, "loss": 2.0065, "step": 11766500 }, { "epoch": 58.3, "learning_rate": 2.086156071798378e-05, "loss": 2.0016, "step": 11767000 }, { "epoch": 58.3, "learning_rate": 2.0860322131557696e-05, "loss": 2.03, "step": 11767500 }, { "epoch": 58.3, "learning_rate": 2.0859083545131613e-05, "loss": 2.0025, "step": 11768000 }, { "epoch": 58.31, "learning_rate": 2.085784743587838e-05, "loss": 1.9936, "step": 11768500 }, { "epoch": 58.31, "learning_rate": 2.08566088494523e-05, "loss": 2.0163, "step": 11769000 }, { "epoch": 58.31, "learning_rate": 2.0855370263026215e-05, "loss": 1.9809, "step": 11769500 }, { "epoch": 58.31, "learning_rate": 2.085413167660013e-05, "loss": 2.0048, "step": 11770000 }, { "epoch": 58.32, "learning_rate": 2.0852893090174046e-05, "loss": 2.011, "step": 11770500 }, { "epoch": 58.32, "learning_rate": 2.0851654503747963e-05, "loss": 2.0069, "step": 11771000 }, { "epoch": 58.32, "learning_rate": 2.085041591732188e-05, "loss": 2.0062, "step": 11771500 }, { "epoch": 58.32, "learning_rate": 2.0849177330895797e-05, "loss": 1.9898, "step": 11772000 }, { "epoch": 58.33, "learning_rate": 2.0847941221642565e-05, "loss": 1.9868, "step": 11772500 }, { "epoch": 58.33, "learning_rate": 2.0846702635216482e-05, "loss": 2.0095, "step": 11773000 }, { "epoch": 58.33, "learning_rate": 2.0845464048790396e-05, "loss": 2.0034, "step": 11773500 }, { "epoch": 58.33, "learning_rate": 2.0844225462364313e-05, "loss": 2.0136, "step": 11774000 }, { "epoch": 58.33, "learning_rate": 2.084298687593823e-05, "loss": 2.0098, "step": 11774500 }, { "epoch": 58.34, "learning_rate": 2.0841748289512147e-05, "loss": 2.0257, "step": 11775000 }, { "epoch": 58.34, "learning_rate": 2.0840509703086064e-05, "loss": 2.0378, "step": 11775500 }, { "epoch": 58.34, "learning_rate": 2.0839271116659977e-05, "loss": 1.9975, "step": 11776000 }, { "epoch": 58.34, "learning_rate": 2.083803500740675e-05, "loss": 1.9913, "step": 11776500 }, { "epoch": 58.35, "learning_rate": 2.0836796420980663e-05, "loss": 1.9847, "step": 11777000 }, { "epoch": 58.35, "learning_rate": 2.083555783455458e-05, "loss": 2.0174, "step": 11777500 }, { "epoch": 58.35, "learning_rate": 2.0834319248128497e-05, "loss": 1.9994, "step": 11778000 }, { "epoch": 58.35, "learning_rate": 2.0833085616048117e-05, "loss": 1.9867, "step": 11778500 }, { "epoch": 58.36, "learning_rate": 2.0831849506794886e-05, "loss": 2.0035, "step": 11779000 }, { "epoch": 58.36, "learning_rate": 2.0830610920368803e-05, "loss": 2.0145, "step": 11779500 }, { "epoch": 58.36, "learning_rate": 2.0829372333942717e-05, "loss": 2.0048, "step": 11780000 }, { "epoch": 58.36, "learning_rate": 2.0828133747516634e-05, "loss": 2.0187, "step": 11780500 }, { "epoch": 58.37, "learning_rate": 2.082689516109055e-05, "loss": 1.9927, "step": 11781000 }, { "epoch": 58.37, "learning_rate": 2.0825656574664467e-05, "loss": 1.9986, "step": 11781500 }, { "epoch": 58.37, "learning_rate": 2.0824420465411236e-05, "loss": 2.0275, "step": 11782000 }, { "epoch": 58.37, "learning_rate": 2.0823181878985153e-05, "loss": 2.0028, "step": 11782500 }, { "epoch": 58.38, "learning_rate": 2.0821945769731922e-05, "loss": 2.0, "step": 11783000 }, { "epoch": 58.38, "learning_rate": 2.082070718330584e-05, "loss": 1.9838, "step": 11783500 }, { "epoch": 58.38, "learning_rate": 2.0819468596879756e-05, "loss": 2.0086, "step": 11784000 }, { "epoch": 58.38, "learning_rate": 2.081823001045367e-05, "loss": 2.0124, "step": 11784500 }, { "epoch": 58.39, "learning_rate": 2.0816991424027586e-05, "loss": 1.9879, "step": 11785000 }, { "epoch": 58.39, "learning_rate": 2.0815752837601503e-05, "loss": 2.0042, "step": 11785500 }, { "epoch": 58.39, "learning_rate": 2.0814514251175417e-05, "loss": 1.9961, "step": 11786000 }, { "epoch": 58.39, "learning_rate": 2.0813275664749334e-05, "loss": 1.9965, "step": 11786500 }, { "epoch": 58.4, "learning_rate": 2.081203707832325e-05, "loss": 1.9939, "step": 11787000 }, { "epoch": 58.4, "learning_rate": 2.0810798491897168e-05, "loss": 2.0303, "step": 11787500 }, { "epoch": 58.4, "learning_rate": 2.0809559905471084e-05, "loss": 1.9914, "step": 11788000 }, { "epoch": 58.4, "learning_rate": 2.0808323796217853e-05, "loss": 1.9915, "step": 11788500 }, { "epoch": 58.41, "learning_rate": 2.080708520979177e-05, "loss": 2.005, "step": 11789000 }, { "epoch": 58.41, "learning_rate": 2.0805846623365684e-05, "loss": 2.0025, "step": 11789500 }, { "epoch": 58.41, "learning_rate": 2.08046080369396e-05, "loss": 2.0012, "step": 11790000 }, { "epoch": 58.41, "learning_rate": 2.0803369450513518e-05, "loss": 2.0203, "step": 11790500 }, { "epoch": 58.42, "learning_rate": 2.0802130864087435e-05, "loss": 2.0153, "step": 11791000 }, { "epoch": 58.42, "learning_rate": 2.080089227766135e-05, "loss": 2.0188, "step": 11791500 }, { "epoch": 58.42, "learning_rate": 2.079965369123527e-05, "loss": 1.9945, "step": 11792000 }, { "epoch": 58.42, "learning_rate": 2.0798415104809185e-05, "loss": 1.9983, "step": 11792500 }, { "epoch": 58.43, "learning_rate": 2.0797176518383102e-05, "loss": 1.9936, "step": 11793000 }, { "epoch": 58.43, "learning_rate": 2.0795937931957016e-05, "loss": 2.0222, "step": 11793500 }, { "epoch": 58.43, "learning_rate": 2.0794699345530933e-05, "loss": 2.0045, "step": 11794000 }, { "epoch": 58.43, "learning_rate": 2.079346075910485e-05, "loss": 2.027, "step": 11794500 }, { "epoch": 58.44, "learning_rate": 2.0792222172678767e-05, "loss": 2.0157, "step": 11795000 }, { "epoch": 58.44, "learning_rate": 2.0790986063425535e-05, "loss": 2.011, "step": 11795500 }, { "epoch": 58.44, "learning_rate": 2.0789747476999452e-05, "loss": 2.0165, "step": 11796000 }, { "epoch": 58.44, "learning_rate": 2.078850889057337e-05, "loss": 2.006, "step": 11796500 }, { "epoch": 58.45, "learning_rate": 2.0787272781320135e-05, "loss": 2.0386, "step": 11797000 }, { "epoch": 58.45, "learning_rate": 2.078603419489405e-05, "loss": 2.0038, "step": 11797500 }, { "epoch": 58.45, "learning_rate": 2.078479560846797e-05, "loss": 2.0163, "step": 11798000 }, { "epoch": 58.45, "learning_rate": 2.0783557022041885e-05, "loss": 1.9898, "step": 11798500 }, { "epoch": 58.46, "learning_rate": 2.0782318435615802e-05, "loss": 1.986, "step": 11799000 }, { "epoch": 58.46, "learning_rate": 2.078107984918972e-05, "loss": 2.0015, "step": 11799500 }, { "epoch": 58.46, "learning_rate": 2.0779841262763633e-05, "loss": 1.9912, "step": 11800000 }, { "epoch": 58.46, "learning_rate": 2.07786051535104e-05, "loss": 1.9997, "step": 11800500 }, { "epoch": 58.47, "learning_rate": 2.077736656708432e-05, "loss": 2.021, "step": 11801000 }, { "epoch": 58.47, "learning_rate": 2.0776127980658236e-05, "loss": 2.0103, "step": 11801500 }, { "epoch": 58.47, "learning_rate": 2.0774891871405004e-05, "loss": 2.0079, "step": 11802000 }, { "epoch": 58.47, "learning_rate": 2.0773653284978918e-05, "loss": 2.0116, "step": 11802500 }, { "epoch": 58.48, "learning_rate": 2.0772414698552835e-05, "loss": 2.0121, "step": 11803000 }, { "epoch": 58.48, "learning_rate": 2.0771176112126752e-05, "loss": 2.0217, "step": 11803500 }, { "epoch": 58.48, "learning_rate": 2.076993752570067e-05, "loss": 2.002, "step": 11804000 }, { "epoch": 58.48, "learning_rate": 2.076870141644744e-05, "loss": 2.0168, "step": 11804500 }, { "epoch": 58.49, "learning_rate": 2.0767462830021354e-05, "loss": 2.0324, "step": 11805000 }, { "epoch": 58.49, "learning_rate": 2.076622424359527e-05, "loss": 2.0142, "step": 11805500 }, { "epoch": 58.49, "learning_rate": 2.0764985657169185e-05, "loss": 1.9739, "step": 11806000 }, { "epoch": 58.49, "learning_rate": 2.0763747070743102e-05, "loss": 2.002, "step": 11806500 }, { "epoch": 58.5, "learning_rate": 2.076250848431702e-05, "loss": 1.9847, "step": 11807000 }, { "epoch": 58.5, "learning_rate": 2.0761269897890936e-05, "loss": 1.9983, "step": 11807500 }, { "epoch": 58.5, "learning_rate": 2.0760031311464853e-05, "loss": 2.0161, "step": 11808000 }, { "epoch": 58.5, "learning_rate": 2.075879272503877e-05, "loss": 1.9808, "step": 11808500 }, { "epoch": 58.51, "learning_rate": 2.075755661578554e-05, "loss": 2.0066, "step": 11809000 }, { "epoch": 58.51, "learning_rate": 2.0756320506532307e-05, "loss": 1.9958, "step": 11809500 }, { "epoch": 58.51, "learning_rate": 2.0755081920106224e-05, "loss": 1.9997, "step": 11810000 }, { "epoch": 58.51, "learning_rate": 2.075384333368014e-05, "loss": 2.0102, "step": 11810500 }, { "epoch": 58.52, "learning_rate": 2.0752604747254055e-05, "loss": 2.0131, "step": 11811000 }, { "epoch": 58.52, "learning_rate": 2.075136616082797e-05, "loss": 2.0186, "step": 11811500 }, { "epoch": 58.52, "learning_rate": 2.075012757440189e-05, "loss": 2.0382, "step": 11812000 }, { "epoch": 58.52, "learning_rate": 2.0748888987975802e-05, "loss": 2.0048, "step": 11812500 }, { "epoch": 58.53, "learning_rate": 2.074765040154972e-05, "loss": 2.0037, "step": 11813000 }, { "epoch": 58.53, "learning_rate": 2.074641429229649e-05, "loss": 2.0258, "step": 11813500 }, { "epoch": 58.53, "learning_rate": 2.0745175705870408e-05, "loss": 2.0147, "step": 11814000 }, { "epoch": 58.53, "learning_rate": 2.074393711944432e-05, "loss": 2.0023, "step": 11814500 }, { "epoch": 58.54, "learning_rate": 2.074269853301824e-05, "loss": 2.0143, "step": 11815000 }, { "epoch": 58.54, "learning_rate": 2.0741459946592155e-05, "loss": 2.0077, "step": 11815500 }, { "epoch": 58.54, "learning_rate": 2.074022136016607e-05, "loss": 2.0023, "step": 11816000 }, { "epoch": 58.54, "learning_rate": 2.0738982773739986e-05, "loss": 2.0055, "step": 11816500 }, { "epoch": 58.55, "learning_rate": 2.0737744187313903e-05, "loss": 2.0044, "step": 11817000 }, { "epoch": 58.55, "learning_rate": 2.073650560088782e-05, "loss": 2.0068, "step": 11817500 }, { "epoch": 58.55, "learning_rate": 2.0735267014461737e-05, "loss": 2.0125, "step": 11818000 }, { "epoch": 58.55, "learning_rate": 2.073402842803565e-05, "loss": 2.0077, "step": 11818500 }, { "epoch": 58.56, "learning_rate": 2.0732789841609567e-05, "loss": 2.0377, "step": 11819000 }, { "epoch": 58.56, "learning_rate": 2.0731551255183484e-05, "loss": 1.9846, "step": 11819500 }, { "epoch": 58.56, "learning_rate": 2.07303126687574e-05, "loss": 2.0297, "step": 11820000 }, { "epoch": 58.56, "learning_rate": 2.0729074082331318e-05, "loss": 2.0194, "step": 11820500 }, { "epoch": 58.57, "learning_rate": 2.072784045025094e-05, "loss": 1.9672, "step": 11821000 }, { "epoch": 58.57, "learning_rate": 2.0726601863824855e-05, "loss": 2.0321, "step": 11821500 }, { "epoch": 58.57, "learning_rate": 2.0725363277398772e-05, "loss": 2.0038, "step": 11822000 }, { "epoch": 58.57, "learning_rate": 2.072412469097269e-05, "loss": 2.0086, "step": 11822500 }, { "epoch": 58.58, "learning_rate": 2.0722886104546603e-05, "loss": 1.9888, "step": 11823000 }, { "epoch": 58.58, "learning_rate": 2.0721649995293372e-05, "loss": 2.0244, "step": 11823500 }, { "epoch": 58.58, "learning_rate": 2.072041140886729e-05, "loss": 2.0102, "step": 11824000 }, { "epoch": 58.58, "learning_rate": 2.0719172822441206e-05, "loss": 2.0082, "step": 11824500 }, { "epoch": 58.59, "learning_rate": 2.0717934236015122e-05, "loss": 2.0125, "step": 11825000 }, { "epoch": 58.59, "learning_rate": 2.071669564958904e-05, "loss": 1.9986, "step": 11825500 }, { "epoch": 58.59, "learning_rate": 2.0715457063162956e-05, "loss": 2.0075, "step": 11826000 }, { "epoch": 58.59, "learning_rate": 2.071421847673687e-05, "loss": 2.0264, "step": 11826500 }, { "epoch": 58.6, "learning_rate": 2.071298484465649e-05, "loss": 2.0016, "step": 11827000 }, { "epoch": 58.6, "learning_rate": 2.0711746258230407e-05, "loss": 2.0172, "step": 11827500 }, { "epoch": 58.6, "learning_rate": 2.0710507671804324e-05, "loss": 2.0111, "step": 11828000 }, { "epoch": 58.6, "learning_rate": 2.070926908537824e-05, "loss": 1.9798, "step": 11828500 }, { "epoch": 58.6, "learning_rate": 2.0708030498952158e-05, "loss": 2.0042, "step": 11829000 }, { "epoch": 58.61, "learning_rate": 2.0706791912526075e-05, "loss": 1.9883, "step": 11829500 }, { "epoch": 58.61, "learning_rate": 2.070555332609999e-05, "loss": 2.0222, "step": 11830000 }, { "epoch": 58.61, "learning_rate": 2.0704314739673906e-05, "loss": 1.9982, "step": 11830500 }, { "epoch": 58.61, "learning_rate": 2.0703076153247823e-05, "loss": 1.9928, "step": 11831000 }, { "epoch": 58.62, "learning_rate": 2.070183756682174e-05, "loss": 2.022, "step": 11831500 }, { "epoch": 58.62, "learning_rate": 2.0700598980395656e-05, "loss": 2.0401, "step": 11832000 }, { "epoch": 58.62, "learning_rate": 2.0699362871142425e-05, "loss": 2.0298, "step": 11832500 }, { "epoch": 58.62, "learning_rate": 2.069812428471634e-05, "loss": 2.0158, "step": 11833000 }, { "epoch": 58.63, "learning_rate": 2.0696885698290256e-05, "loss": 1.9952, "step": 11833500 }, { "epoch": 58.63, "learning_rate": 2.0695647111864173e-05, "loss": 2.0322, "step": 11834000 }, { "epoch": 58.63, "learning_rate": 2.069441100261094e-05, "loss": 2.0166, "step": 11834500 }, { "epoch": 58.63, "learning_rate": 2.069317241618486e-05, "loss": 2.0037, "step": 11835000 }, { "epoch": 58.64, "learning_rate": 2.0691933829758775e-05, "loss": 2.0169, "step": 11835500 }, { "epoch": 58.64, "learning_rate": 2.069069524333269e-05, "loss": 2.0094, "step": 11836000 }, { "epoch": 58.64, "learning_rate": 2.0689459134079458e-05, "loss": 2.0007, "step": 11836500 }, { "epoch": 58.64, "learning_rate": 2.0688220547653375e-05, "loss": 2.0203, "step": 11837000 }, { "epoch": 58.65, "learning_rate": 2.0686984438400143e-05, "loss": 2.0256, "step": 11837500 }, { "epoch": 58.65, "learning_rate": 2.068574585197406e-05, "loss": 2.0302, "step": 11838000 }, { "epoch": 58.65, "learning_rate": 2.0684507265547974e-05, "loss": 2.0326, "step": 11838500 }, { "epoch": 58.65, "learning_rate": 2.068326867912189e-05, "loss": 1.9841, "step": 11839000 }, { "epoch": 58.66, "learning_rate": 2.0682030092695808e-05, "loss": 1.9962, "step": 11839500 }, { "epoch": 58.66, "learning_rate": 2.0680791506269725e-05, "loss": 1.9941, "step": 11840000 }, { "epoch": 58.66, "learning_rate": 2.067955291984364e-05, "loss": 2.0221, "step": 11840500 }, { "epoch": 58.66, "learning_rate": 2.067831681059041e-05, "loss": 1.9973, "step": 11841000 }, { "epoch": 58.67, "learning_rate": 2.067708070133718e-05, "loss": 2.0225, "step": 11841500 }, { "epoch": 58.67, "learning_rate": 2.0675842114911096e-05, "loss": 1.9881, "step": 11842000 }, { "epoch": 58.67, "learning_rate": 2.0674603528485013e-05, "loss": 1.9885, "step": 11842500 }, { "epoch": 58.67, "learning_rate": 2.067336494205893e-05, "loss": 2.0138, "step": 11843000 }, { "epoch": 58.68, "learning_rate": 2.0672126355632847e-05, "loss": 2.0282, "step": 11843500 }, { "epoch": 58.68, "learning_rate": 2.067088776920676e-05, "loss": 2.0325, "step": 11844000 }, { "epoch": 58.68, "learning_rate": 2.0669649182780677e-05, "loss": 2.0025, "step": 11844500 }, { "epoch": 58.68, "learning_rate": 2.0668410596354594e-05, "loss": 1.9885, "step": 11845000 }, { "epoch": 58.69, "learning_rate": 2.0667172009928508e-05, "loss": 1.9958, "step": 11845500 }, { "epoch": 58.69, "learning_rate": 2.066593590067528e-05, "loss": 2.0163, "step": 11846000 }, { "epoch": 58.69, "learning_rate": 2.0664699791422045e-05, "loss": 2.0099, "step": 11846500 }, { "epoch": 58.69, "learning_rate": 2.0663461204995962e-05, "loss": 2.0077, "step": 11847000 }, { "epoch": 58.7, "learning_rate": 2.066222261856988e-05, "loss": 2.0086, "step": 11847500 }, { "epoch": 58.7, "learning_rate": 2.0660984032143796e-05, "loss": 1.9968, "step": 11848000 }, { "epoch": 58.7, "learning_rate": 2.0659747922890565e-05, "loss": 2.0303, "step": 11848500 }, { "epoch": 58.7, "learning_rate": 2.0658509336464482e-05, "loss": 2.0268, "step": 11849000 }, { "epoch": 58.71, "learning_rate": 2.0657270750038396e-05, "loss": 2.0289, "step": 11849500 }, { "epoch": 58.71, "learning_rate": 2.0656034640785164e-05, "loss": 2.007, "step": 11850000 }, { "epoch": 58.71, "learning_rate": 2.065479605435908e-05, "loss": 2.0196, "step": 11850500 }, { "epoch": 58.71, "learning_rate": 2.0653557467932998e-05, "loss": 1.9904, "step": 11851000 }, { "epoch": 58.72, "learning_rate": 2.0652318881506915e-05, "loss": 2.0218, "step": 11851500 }, { "epoch": 58.72, "learning_rate": 2.0651080295080832e-05, "loss": 2.0041, "step": 11852000 }, { "epoch": 58.72, "learning_rate": 2.0649841708654746e-05, "loss": 2.0104, "step": 11852500 }, { "epoch": 58.72, "learning_rate": 2.0648603122228663e-05, "loss": 2.038, "step": 11853000 }, { "epoch": 58.73, "learning_rate": 2.064736453580258e-05, "loss": 2.0046, "step": 11853500 }, { "epoch": 58.73, "learning_rate": 2.0646125949376496e-05, "loss": 1.9982, "step": 11854000 }, { "epoch": 58.73, "learning_rate": 2.0644887362950413e-05, "loss": 2.0078, "step": 11854500 }, { "epoch": 58.73, "learning_rate": 2.064364877652433e-05, "loss": 1.9858, "step": 11855000 }, { "epoch": 58.74, "learning_rate": 2.0642410190098247e-05, "loss": 2.013, "step": 11855500 }, { "epoch": 58.74, "learning_rate": 2.0641171603672164e-05, "loss": 2.0181, "step": 11856000 }, { "epoch": 58.74, "learning_rate": 2.063993301724608e-05, "loss": 1.9872, "step": 11856500 }, { "epoch": 58.74, "learning_rate": 2.0638694430819995e-05, "loss": 2.0169, "step": 11857000 }, { "epoch": 58.75, "learning_rate": 2.063745584439391e-05, "loss": 1.9967, "step": 11857500 }, { "epoch": 58.75, "learning_rate": 2.063621973514068e-05, "loss": 2.0192, "step": 11858000 }, { "epoch": 58.75, "learning_rate": 2.0634981148714597e-05, "loss": 2.0197, "step": 11858500 }, { "epoch": 58.75, "learning_rate": 2.0633742562288514e-05, "loss": 2.0243, "step": 11859000 }, { "epoch": 58.76, "learning_rate": 2.063250397586243e-05, "loss": 2.0092, "step": 11859500 }, { "epoch": 58.76, "learning_rate": 2.0631265389436345e-05, "loss": 1.9994, "step": 11860000 }, { "epoch": 58.76, "learning_rate": 2.063002680301026e-05, "loss": 2.0093, "step": 11860500 }, { "epoch": 58.76, "learning_rate": 2.062879069375703e-05, "loss": 1.9963, "step": 11861000 }, { "epoch": 58.77, "learning_rate": 2.0627552107330947e-05, "loss": 1.9944, "step": 11861500 }, { "epoch": 58.77, "learning_rate": 2.0626313520904864e-05, "loss": 2.0036, "step": 11862000 }, { "epoch": 58.77, "learning_rate": 2.062507493447878e-05, "loss": 2.0218, "step": 11862500 }, { "epoch": 58.77, "learning_rate": 2.0623836348052695e-05, "loss": 2.0555, "step": 11863000 }, { "epoch": 58.78, "learning_rate": 2.0622600238799464e-05, "loss": 1.9893, "step": 11863500 }, { "epoch": 58.78, "learning_rate": 2.062136165237338e-05, "loss": 2.0034, "step": 11864000 }, { "epoch": 58.78, "learning_rate": 2.0620123065947297e-05, "loss": 2.0277, "step": 11864500 }, { "epoch": 58.78, "learning_rate": 2.0618886956694063e-05, "loss": 2.0409, "step": 11865000 }, { "epoch": 58.79, "learning_rate": 2.061764837026798e-05, "loss": 2.0249, "step": 11865500 }, { "epoch": 58.79, "learning_rate": 2.0616409783841897e-05, "loss": 2.0015, "step": 11866000 }, { "epoch": 58.79, "learning_rate": 2.0615171197415814e-05, "loss": 1.9986, "step": 11866500 }, { "epoch": 58.79, "learning_rate": 2.061393261098973e-05, "loss": 2.0252, "step": 11867000 }, { "epoch": 58.8, "learning_rate": 2.0612694024563647e-05, "loss": 2.0005, "step": 11867500 }, { "epoch": 58.8, "learning_rate": 2.0611455438137564e-05, "loss": 2.017, "step": 11868000 }, { "epoch": 58.8, "learning_rate": 2.061021685171148e-05, "loss": 2.0223, "step": 11868500 }, { "epoch": 58.8, "learning_rate": 2.0608978265285398e-05, "loss": 2.0319, "step": 11869000 }, { "epoch": 58.81, "learning_rate": 2.0607739678859312e-05, "loss": 2.0185, "step": 11869500 }, { "epoch": 58.81, "learning_rate": 2.060650109243323e-05, "loss": 1.9841, "step": 11870000 }, { "epoch": 58.81, "learning_rate": 2.0605264983179997e-05, "loss": 2.0189, "step": 11870500 }, { "epoch": 58.81, "learning_rate": 2.0604026396753914e-05, "loss": 2.0206, "step": 11871000 }, { "epoch": 58.82, "learning_rate": 2.060278781032783e-05, "loss": 2.0116, "step": 11871500 }, { "epoch": 58.82, "learning_rate": 2.0601549223901748e-05, "loss": 2.0004, "step": 11872000 }, { "epoch": 58.82, "learning_rate": 2.0600313114648514e-05, "loss": 1.9983, "step": 11872500 }, { "epoch": 58.82, "learning_rate": 2.059907452822243e-05, "loss": 2.0085, "step": 11873000 }, { "epoch": 58.83, "learning_rate": 2.0597835941796348e-05, "loss": 1.9928, "step": 11873500 }, { "epoch": 58.83, "learning_rate": 2.0596597355370264e-05, "loss": 2.0112, "step": 11874000 }, { "epoch": 58.83, "learning_rate": 2.059535876894418e-05, "loss": 2.0056, "step": 11874500 }, { "epoch": 58.83, "learning_rate": 2.05941201825181e-05, "loss": 2.0186, "step": 11875000 }, { "epoch": 58.84, "learning_rate": 2.0592881596092012e-05, "loss": 2.0007, "step": 11875500 }, { "epoch": 58.84, "learning_rate": 2.059164300966593e-05, "loss": 2.0201, "step": 11876000 }, { "epoch": 58.84, "learning_rate": 2.0590404423239846e-05, "loss": 2.0049, "step": 11876500 }, { "epoch": 58.84, "learning_rate": 2.0589168313986615e-05, "loss": 2.014, "step": 11877000 }, { "epoch": 58.85, "learning_rate": 2.058792972756053e-05, "loss": 2.0104, "step": 11877500 }, { "epoch": 58.85, "learning_rate": 2.0586693618307297e-05, "loss": 1.9915, "step": 11878000 }, { "epoch": 58.85, "learning_rate": 2.0585455031881214e-05, "loss": 2.0064, "step": 11878500 }, { "epoch": 58.85, "learning_rate": 2.058421644545513e-05, "loss": 2.0273, "step": 11879000 }, { "epoch": 58.86, "learning_rate": 2.0582980336201903e-05, "loss": 2.0163, "step": 11879500 }, { "epoch": 58.86, "learning_rate": 2.0581741749775816e-05, "loss": 1.9972, "step": 11880000 }, { "epoch": 58.86, "learning_rate": 2.0580505640522585e-05, "loss": 2.0248, "step": 11880500 }, { "epoch": 58.86, "learning_rate": 2.0579267054096502e-05, "loss": 2.0148, "step": 11881000 }, { "epoch": 58.87, "learning_rate": 2.057802846767042e-05, "loss": 2.0012, "step": 11881500 }, { "epoch": 58.87, "learning_rate": 2.0576789881244336e-05, "loss": 2.018, "step": 11882000 }, { "epoch": 58.87, "learning_rate": 2.0575551294818253e-05, "loss": 2.0208, "step": 11882500 }, { "epoch": 58.87, "learning_rate": 2.057431270839217e-05, "loss": 1.986, "step": 11883000 }, { "epoch": 58.87, "learning_rate": 2.0573074121966083e-05, "loss": 2.0201, "step": 11883500 }, { "epoch": 58.88, "learning_rate": 2.057183553554e-05, "loss": 2.0019, "step": 11884000 }, { "epoch": 58.88, "learning_rate": 2.057059942628677e-05, "loss": 2.0032, "step": 11884500 }, { "epoch": 58.88, "learning_rate": 2.0569360839860686e-05, "loss": 2.0344, "step": 11885000 }, { "epoch": 58.88, "learning_rate": 2.0568122253434603e-05, "loss": 2.0002, "step": 11885500 }, { "epoch": 58.89, "learning_rate": 2.056688366700852e-05, "loss": 2.0308, "step": 11886000 }, { "epoch": 58.89, "learning_rate": 2.0565645080582434e-05, "loss": 2.0068, "step": 11886500 }, { "epoch": 58.89, "learning_rate": 2.056440649415635e-05, "loss": 1.9798, "step": 11887000 }, { "epoch": 58.89, "learning_rate": 2.056317038490312e-05, "loss": 2.005, "step": 11887500 }, { "epoch": 58.9, "learning_rate": 2.0561931798477036e-05, "loss": 2.0163, "step": 11888000 }, { "epoch": 58.9, "learning_rate": 2.0560693212050953e-05, "loss": 2.0122, "step": 11888500 }, { "epoch": 58.9, "learning_rate": 2.055945462562487e-05, "loss": 1.984, "step": 11889000 }, { "epoch": 58.9, "learning_rate": 2.0558218516371635e-05, "loss": 2.0046, "step": 11889500 }, { "epoch": 58.91, "learning_rate": 2.0556979929945552e-05, "loss": 2.0069, "step": 11890000 }, { "epoch": 58.91, "learning_rate": 2.055574134351947e-05, "loss": 2.0165, "step": 11890500 }, { "epoch": 58.91, "learning_rate": 2.0554502757093386e-05, "loss": 2.0127, "step": 11891000 }, { "epoch": 58.91, "learning_rate": 2.0553266647840155e-05, "loss": 2.0202, "step": 11891500 }, { "epoch": 58.92, "learning_rate": 2.055202806141407e-05, "loss": 2.0219, "step": 11892000 }, { "epoch": 58.92, "learning_rate": 2.0550789474987986e-05, "loss": 2.0103, "step": 11892500 }, { "epoch": 58.92, "learning_rate": 2.0549550888561902e-05, "loss": 2.024, "step": 11893000 }, { "epoch": 58.92, "learning_rate": 2.054831477930867e-05, "loss": 1.998, "step": 11893500 }, { "epoch": 58.93, "learning_rate": 2.0547076192882588e-05, "loss": 2.0179, "step": 11894000 }, { "epoch": 58.93, "learning_rate": 2.0545837606456505e-05, "loss": 1.9992, "step": 11894500 }, { "epoch": 58.93, "learning_rate": 2.054459902003042e-05, "loss": 2.011, "step": 11895000 }, { "epoch": 58.93, "learning_rate": 2.0543360433604336e-05, "loss": 1.9985, "step": 11895500 }, { "epoch": 58.94, "learning_rate": 2.0542121847178253e-05, "loss": 2.0135, "step": 11896000 }, { "epoch": 58.94, "learning_rate": 2.054088573792502e-05, "loss": 1.9762, "step": 11896500 }, { "epoch": 58.94, "learning_rate": 2.0539647151498938e-05, "loss": 2.0247, "step": 11897000 }, { "epoch": 58.94, "learning_rate": 2.0538408565072855e-05, "loss": 2.0064, "step": 11897500 }, { "epoch": 58.95, "learning_rate": 2.053716997864677e-05, "loss": 2.0261, "step": 11898000 }, { "epoch": 58.95, "learning_rate": 2.0535931392220686e-05, "loss": 2.006, "step": 11898500 }, { "epoch": 58.95, "learning_rate": 2.0534692805794603e-05, "loss": 2.0495, "step": 11899000 }, { "epoch": 58.95, "learning_rate": 2.053345421936852e-05, "loss": 2.0134, "step": 11899500 }, { "epoch": 58.96, "learning_rate": 2.0532215632942436e-05, "loss": 1.9833, "step": 11900000 }, { "epoch": 58.96, "learning_rate": 2.0530977046516353e-05, "loss": 2.013, "step": 11900500 }, { "epoch": 58.96, "learning_rate": 2.052973846009027e-05, "loss": 2.0326, "step": 11901000 }, { "epoch": 58.96, "learning_rate": 2.0528499873664187e-05, "loss": 2.0163, "step": 11901500 }, { "epoch": 58.97, "learning_rate": 2.0527261287238104e-05, "loss": 1.9785, "step": 11902000 }, { "epoch": 58.97, "learning_rate": 2.0526022700812018e-05, "loss": 2.0013, "step": 11902500 }, { "epoch": 58.97, "learning_rate": 2.0524784114385935e-05, "loss": 2.0223, "step": 11903000 }, { "epoch": 58.97, "learning_rate": 2.052354552795985e-05, "loss": 2.0012, "step": 11903500 }, { "epoch": 58.98, "learning_rate": 2.052230941870662e-05, "loss": 2.0302, "step": 11904000 }, { "epoch": 58.98, "learning_rate": 2.0521070832280537e-05, "loss": 2.0204, "step": 11904500 }, { "epoch": 58.98, "learning_rate": 2.0519832245854454e-05, "loss": 2.0052, "step": 11905000 }, { "epoch": 58.98, "learning_rate": 2.051859613660122e-05, "loss": 2.005, "step": 11905500 }, { "epoch": 58.99, "learning_rate": 2.0517357550175137e-05, "loss": 2.0064, "step": 11906000 }, { "epoch": 58.99, "learning_rate": 2.0516118963749054e-05, "loss": 2.0225, "step": 11906500 }, { "epoch": 58.99, "learning_rate": 2.051488037732297e-05, "loss": 2.0147, "step": 11907000 }, { "epoch": 58.99, "learning_rate": 2.0513641790896887e-05, "loss": 2.0321, "step": 11907500 }, { "epoch": 59.0, "learning_rate": 2.0512405681643653e-05, "loss": 2.0521, "step": 11908000 }, { "epoch": 59.0, "learning_rate": 2.051116709521757e-05, "loss": 2.0273, "step": 11908500 }, { "epoch": 59.0, "eval_accuracy": 0.6718410116443335, "eval_accuracy_mlm": 0.6309429126349146, "eval_accuracy_nsp": 0.8648567024501979, "eval_loss": 2.301654577255249, "eval_runtime": 147.0681, "eval_samples_per_second": 1733.612, "eval_steps_per_second": 72.239, "step": 11908737 }, { "epoch": 59.0, "learning_rate": 2.0509928508791487e-05, "loss": 2.0323, "step": 11909000 }, { "epoch": 59.0, "learning_rate": 2.0508689922365404e-05, "loss": 1.9926, "step": 11909500 }, { "epoch": 59.01, "learning_rate": 2.050745133593932e-05, "loss": 1.9787, "step": 11910000 }, { "epoch": 59.01, "learning_rate": 2.0506215226686086e-05, "loss": 1.9774, "step": 11910500 }, { "epoch": 59.01, "learning_rate": 2.0504976640260003e-05, "loss": 1.9548, "step": 11911000 }, { "epoch": 59.01, "learning_rate": 2.050373805383392e-05, "loss": 1.9685, "step": 11911500 }, { "epoch": 59.02, "learning_rate": 2.0502499467407837e-05, "loss": 1.9846, "step": 11912000 }, { "epoch": 59.02, "learning_rate": 2.0501260880981754e-05, "loss": 1.9789, "step": 11912500 }, { "epoch": 59.02, "learning_rate": 2.050002229455567e-05, "loss": 1.9849, "step": 11913000 }, { "epoch": 59.02, "learning_rate": 2.0498783708129587e-05, "loss": 1.9933, "step": 11913500 }, { "epoch": 59.03, "learning_rate": 2.0497545121703504e-05, "loss": 1.9821, "step": 11914000 }, { "epoch": 59.03, "learning_rate": 2.049630653527742e-05, "loss": 1.9976, "step": 11914500 }, { "epoch": 59.03, "learning_rate": 2.0495070426024187e-05, "loss": 1.9692, "step": 11915000 }, { "epoch": 59.03, "learning_rate": 2.0493831839598104e-05, "loss": 1.9953, "step": 11915500 }, { "epoch": 59.04, "learning_rate": 2.049259325317202e-05, "loss": 1.9943, "step": 11916000 }, { "epoch": 59.04, "learning_rate": 2.0491354666745938e-05, "loss": 1.9868, "step": 11916500 }, { "epoch": 59.04, "learning_rate": 2.0490116080319854e-05, "loss": 1.9773, "step": 11917000 }, { "epoch": 59.04, "learning_rate": 2.048887997106662e-05, "loss": 2.0109, "step": 11917500 }, { "epoch": 59.05, "learning_rate": 2.0487641384640537e-05, "loss": 1.9637, "step": 11918000 }, { "epoch": 59.05, "learning_rate": 2.048640527538731e-05, "loss": 2.0022, "step": 11918500 }, { "epoch": 59.05, "learning_rate": 2.0485166688961226e-05, "loss": 2.0089, "step": 11919000 }, { "epoch": 59.05, "learning_rate": 2.048392810253514e-05, "loss": 1.9809, "step": 11919500 }, { "epoch": 59.06, "learning_rate": 2.0482689516109056e-05, "loss": 1.9869, "step": 11920000 }, { "epoch": 59.06, "learning_rate": 2.048145092968297e-05, "loss": 2.0172, "step": 11920500 }, { "epoch": 59.06, "learning_rate": 2.0480212343256887e-05, "loss": 1.9819, "step": 11921000 }, { "epoch": 59.06, "learning_rate": 2.047897623400366e-05, "loss": 1.9894, "step": 11921500 }, { "epoch": 59.07, "learning_rate": 2.0477737647577576e-05, "loss": 2.0269, "step": 11922000 }, { "epoch": 59.07, "learning_rate": 2.047649906115149e-05, "loss": 2.0019, "step": 11922500 }, { "epoch": 59.07, "learning_rate": 2.0475260474725406e-05, "loss": 1.9831, "step": 11923000 }, { "epoch": 59.07, "learning_rate": 2.0474021888299323e-05, "loss": 1.9834, "step": 11923500 }, { "epoch": 59.08, "learning_rate": 2.0472783301873237e-05, "loss": 1.9912, "step": 11924000 }, { "epoch": 59.08, "learning_rate": 2.0471544715447154e-05, "loss": 1.9785, "step": 11924500 }, { "epoch": 59.08, "learning_rate": 2.047030612902107e-05, "loss": 2.0146, "step": 11925000 }, { "epoch": 59.08, "learning_rate": 2.0469067542594988e-05, "loss": 2.0137, "step": 11925500 }, { "epoch": 59.09, "learning_rate": 2.0467828956168905e-05, "loss": 2.0124, "step": 11926000 }, { "epoch": 59.09, "learning_rate": 2.046659036974282e-05, "loss": 2.0013, "step": 11926500 }, { "epoch": 59.09, "learning_rate": 2.046535178331674e-05, "loss": 2.0075, "step": 11927000 }, { "epoch": 59.09, "learning_rate": 2.046411815123636e-05, "loss": 1.9794, "step": 11927500 }, { "epoch": 59.1, "learning_rate": 2.0462879564810276e-05, "loss": 1.9938, "step": 11928000 }, { "epoch": 59.1, "learning_rate": 2.0461640978384193e-05, "loss": 1.9568, "step": 11928500 }, { "epoch": 59.1, "learning_rate": 2.046040239195811e-05, "loss": 2.0117, "step": 11929000 }, { "epoch": 59.1, "learning_rate": 2.0459163805532024e-05, "loss": 1.9852, "step": 11929500 }, { "epoch": 59.11, "learning_rate": 2.045792521910594e-05, "loss": 1.9966, "step": 11930000 }, { "epoch": 59.11, "learning_rate": 2.0456686632679854e-05, "loss": 1.9934, "step": 11930500 }, { "epoch": 59.11, "learning_rate": 2.045544804625377e-05, "loss": 2.0097, "step": 11931000 }, { "epoch": 59.11, "learning_rate": 2.0454209459827688e-05, "loss": 1.9844, "step": 11931500 }, { "epoch": 59.12, "learning_rate": 2.045297335057446e-05, "loss": 1.9908, "step": 11932000 }, { "epoch": 59.12, "learning_rate": 2.0451737241321225e-05, "loss": 1.9661, "step": 11932500 }, { "epoch": 59.12, "learning_rate": 2.0450498654895142e-05, "loss": 1.9724, "step": 11933000 }, { "epoch": 59.12, "learning_rate": 2.044926006846906e-05, "loss": 1.9868, "step": 11933500 }, { "epoch": 59.13, "learning_rate": 2.0448021482042976e-05, "loss": 1.9549, "step": 11934000 }, { "epoch": 59.13, "learning_rate": 2.0446782895616893e-05, "loss": 1.9913, "step": 11934500 }, { "epoch": 59.13, "learning_rate": 2.044554430919081e-05, "loss": 1.9982, "step": 11935000 }, { "epoch": 59.13, "learning_rate": 2.0444305722764724e-05, "loss": 2.0034, "step": 11935500 }, { "epoch": 59.14, "learning_rate": 2.044306713633864e-05, "loss": 2.0019, "step": 11936000 }, { "epoch": 59.14, "learning_rate": 2.044183102708541e-05, "loss": 2.0128, "step": 11936500 }, { "epoch": 59.14, "learning_rate": 2.0440592440659326e-05, "loss": 2.003, "step": 11937000 }, { "epoch": 59.14, "learning_rate": 2.0439353854233243e-05, "loss": 1.9929, "step": 11937500 }, { "epoch": 59.14, "learning_rate": 2.043811526780716e-05, "loss": 2.0008, "step": 11938000 }, { "epoch": 59.15, "learning_rate": 2.0436876681381074e-05, "loss": 1.9968, "step": 11938500 }, { "epoch": 59.15, "learning_rate": 2.043563809495499e-05, "loss": 2.0043, "step": 11939000 }, { "epoch": 59.15, "learning_rate": 2.0434399508528908e-05, "loss": 1.9921, "step": 11939500 }, { "epoch": 59.15, "learning_rate": 2.0433160922102825e-05, "loss": 1.9768, "step": 11940000 }, { "epoch": 59.16, "learning_rate": 2.043192233567674e-05, "loss": 1.982, "step": 11940500 }, { "epoch": 59.16, "learning_rate": 2.0430683749250655e-05, "loss": 1.9988, "step": 11941000 }, { "epoch": 59.16, "learning_rate": 2.0429445162824572e-05, "loss": 1.9968, "step": 11941500 }, { "epoch": 59.16, "learning_rate": 2.042820657639849e-05, "loss": 1.9946, "step": 11942000 }, { "epoch": 59.17, "learning_rate": 2.042697294431811e-05, "loss": 1.9651, "step": 11942500 }, { "epoch": 59.17, "learning_rate": 2.0425734357892026e-05, "loss": 1.9739, "step": 11943000 }, { "epoch": 59.17, "learning_rate": 2.0424495771465943e-05, "loss": 1.9968, "step": 11943500 }, { "epoch": 59.17, "learning_rate": 2.042325966221271e-05, "loss": 2.0012, "step": 11944000 }, { "epoch": 59.18, "learning_rate": 2.0422021075786626e-05, "loss": 2.0106, "step": 11944500 }, { "epoch": 59.18, "learning_rate": 2.0420782489360543e-05, "loss": 1.9912, "step": 11945000 }, { "epoch": 59.18, "learning_rate": 2.041954390293446e-05, "loss": 2.0143, "step": 11945500 }, { "epoch": 59.18, "learning_rate": 2.0418305316508377e-05, "loss": 1.9998, "step": 11946000 }, { "epoch": 59.19, "learning_rate": 2.0417066730082293e-05, "loss": 1.9984, "step": 11946500 }, { "epoch": 59.19, "learning_rate": 2.041582814365621e-05, "loss": 1.9758, "step": 11947000 }, { "epoch": 59.19, "learning_rate": 2.0414592034402976e-05, "loss": 2.0099, "step": 11947500 }, { "epoch": 59.19, "learning_rate": 2.0413353447976893e-05, "loss": 2.0121, "step": 11948000 }, { "epoch": 59.2, "learning_rate": 2.041211486155081e-05, "loss": 1.9645, "step": 11948500 }, { "epoch": 59.2, "learning_rate": 2.0410876275124727e-05, "loss": 2.0188, "step": 11949000 }, { "epoch": 59.2, "learning_rate": 2.0409637688698644e-05, "loss": 1.9708, "step": 11949500 }, { "epoch": 59.2, "learning_rate": 2.040839910227256e-05, "loss": 1.9835, "step": 11950000 }, { "epoch": 59.21, "learning_rate": 2.0407162993019326e-05, "loss": 1.9888, "step": 11950500 }, { "epoch": 59.21, "learning_rate": 2.0405924406593243e-05, "loss": 1.9865, "step": 11951000 }, { "epoch": 59.21, "learning_rate": 2.040468582016716e-05, "loss": 1.9995, "step": 11951500 }, { "epoch": 59.21, "learning_rate": 2.0403447233741077e-05, "loss": 2.011, "step": 11952000 }, { "epoch": 59.22, "learning_rate": 2.0402208647314994e-05, "loss": 1.992, "step": 11952500 }, { "epoch": 59.22, "learning_rate": 2.040097006088891e-05, "loss": 2.0033, "step": 11953000 }, { "epoch": 59.22, "learning_rate": 2.0399731474462827e-05, "loss": 1.9993, "step": 11953500 }, { "epoch": 59.22, "learning_rate": 2.0398492888036744e-05, "loss": 1.9801, "step": 11954000 }, { "epoch": 59.23, "learning_rate": 2.039725677878351e-05, "loss": 2.0099, "step": 11954500 }, { "epoch": 59.23, "learning_rate": 2.0396018192357427e-05, "loss": 1.9985, "step": 11955000 }, { "epoch": 59.23, "learning_rate": 2.0394782083104196e-05, "loss": 1.9873, "step": 11955500 }, { "epoch": 59.23, "learning_rate": 2.0393543496678112e-05, "loss": 2.0085, "step": 11956000 }, { "epoch": 59.24, "learning_rate": 2.0392304910252026e-05, "loss": 2.0015, "step": 11956500 }, { "epoch": 59.24, "learning_rate": 2.0391066323825943e-05, "loss": 1.974, "step": 11957000 }, { "epoch": 59.24, "learning_rate": 2.038982773739986e-05, "loss": 1.9872, "step": 11957500 }, { "epoch": 59.24, "learning_rate": 2.0388589150973777e-05, "loss": 1.9789, "step": 11958000 }, { "epoch": 59.25, "learning_rate": 2.0387350564547694e-05, "loss": 1.9893, "step": 11958500 }, { "epoch": 59.25, "learning_rate": 2.038611197812161e-05, "loss": 1.9939, "step": 11959000 }, { "epoch": 59.25, "learning_rate": 2.0384873391695528e-05, "loss": 1.9928, "step": 11959500 }, { "epoch": 59.25, "learning_rate": 2.0383634805269444e-05, "loss": 2.0156, "step": 11960000 }, { "epoch": 59.26, "learning_rate": 2.0382396218843358e-05, "loss": 1.998, "step": 11960500 }, { "epoch": 59.26, "learning_rate": 2.0381157632417275e-05, "loss": 2.0153, "step": 11961000 }, { "epoch": 59.26, "learning_rate": 2.0379919045991192e-05, "loss": 2.0334, "step": 11961500 }, { "epoch": 59.26, "learning_rate": 2.037868045956511e-05, "loss": 2.015, "step": 11962000 }, { "epoch": 59.27, "learning_rate": 2.0377441873139026e-05, "loss": 2.009, "step": 11962500 }, { "epoch": 59.27, "learning_rate": 2.0376203286712943e-05, "loss": 2.0029, "step": 11963000 }, { "epoch": 59.27, "learning_rate": 2.0374967177459708e-05, "loss": 1.9996, "step": 11963500 }, { "epoch": 59.27, "learning_rate": 2.0373728591033625e-05, "loss": 2.0181, "step": 11964000 }, { "epoch": 59.28, "learning_rate": 2.0372492481780394e-05, "loss": 1.9986, "step": 11964500 }, { "epoch": 59.28, "learning_rate": 2.037125389535431e-05, "loss": 2.0068, "step": 11965000 }, { "epoch": 59.28, "learning_rate": 2.0370015308928228e-05, "loss": 2.004, "step": 11965500 }, { "epoch": 59.28, "learning_rate": 2.0368776722502145e-05, "loss": 1.9954, "step": 11966000 }, { "epoch": 59.29, "learning_rate": 2.036753813607606e-05, "loss": 2.0207, "step": 11966500 }, { "epoch": 59.29, "learning_rate": 2.0366299549649975e-05, "loss": 2.008, "step": 11967000 }, { "epoch": 59.29, "learning_rate": 2.0365060963223892e-05, "loss": 1.9869, "step": 11967500 }, { "epoch": 59.29, "learning_rate": 2.036382237679781e-05, "loss": 2.0009, "step": 11968000 }, { "epoch": 59.3, "learning_rate": 2.0362583790371726e-05, "loss": 2.0108, "step": 11968500 }, { "epoch": 59.3, "learning_rate": 2.0361347681118495e-05, "loss": 2.0088, "step": 11969000 }, { "epoch": 59.3, "learning_rate": 2.036010909469241e-05, "loss": 1.9682, "step": 11969500 }, { "epoch": 59.3, "learning_rate": 2.0358870508266325e-05, "loss": 1.9903, "step": 11970000 }, { "epoch": 59.31, "learning_rate": 2.0357631921840242e-05, "loss": 1.9763, "step": 11970500 }, { "epoch": 59.31, "learning_rate": 2.035639333541416e-05, "loss": 1.9943, "step": 11971000 }, { "epoch": 59.31, "learning_rate": 2.0355154748988076e-05, "loss": 2.01, "step": 11971500 }, { "epoch": 59.31, "learning_rate": 2.0353916162561993e-05, "loss": 1.9997, "step": 11972000 }, { "epoch": 59.32, "learning_rate": 2.035267757613591e-05, "loss": 1.9988, "step": 11972500 }, { "epoch": 59.32, "learning_rate": 2.0351441466882675e-05, "loss": 2.018, "step": 11973000 }, { "epoch": 59.32, "learning_rate": 2.0350202880456592e-05, "loss": 2.0005, "step": 11973500 }, { "epoch": 59.32, "learning_rate": 2.034896429403051e-05, "loss": 2.0087, "step": 11974000 }, { "epoch": 59.33, "learning_rate": 2.0347725707604426e-05, "loss": 2.0089, "step": 11974500 }, { "epoch": 59.33, "learning_rate": 2.0346487121178343e-05, "loss": 2.0196, "step": 11975000 }, { "epoch": 59.33, "learning_rate": 2.0345251011925112e-05, "loss": 2.0099, "step": 11975500 }, { "epoch": 59.33, "learning_rate": 2.0344012425499025e-05, "loss": 2.0142, "step": 11976000 }, { "epoch": 59.34, "learning_rate": 2.0342776316245794e-05, "loss": 2.0046, "step": 11976500 }, { "epoch": 59.34, "learning_rate": 2.034153772981971e-05, "loss": 1.9933, "step": 11977000 }, { "epoch": 59.34, "learning_rate": 2.0340299143393628e-05, "loss": 2.0019, "step": 11977500 }, { "epoch": 59.34, "learning_rate": 2.0339060556967545e-05, "loss": 1.9778, "step": 11978000 }, { "epoch": 59.35, "learning_rate": 2.0337824447714314e-05, "loss": 1.9888, "step": 11978500 }, { "epoch": 59.35, "learning_rate": 2.033658586128823e-05, "loss": 1.9951, "step": 11979000 }, { "epoch": 59.35, "learning_rate": 2.0335347274862148e-05, "loss": 1.9998, "step": 11979500 }, { "epoch": 59.35, "learning_rate": 2.033410868843606e-05, "loss": 1.9971, "step": 11980000 }, { "epoch": 59.36, "learning_rate": 2.0332870102009978e-05, "loss": 1.989, "step": 11980500 }, { "epoch": 59.36, "learning_rate": 2.0331631515583895e-05, "loss": 2.0135, "step": 11981000 }, { "epoch": 59.36, "learning_rate": 2.0330392929157812e-05, "loss": 2.0021, "step": 11981500 }, { "epoch": 59.36, "learning_rate": 2.032915434273173e-05, "loss": 2.0231, "step": 11982000 }, { "epoch": 59.37, "learning_rate": 2.0327915756305642e-05, "loss": 2.0134, "step": 11982500 }, { "epoch": 59.37, "learning_rate": 2.032667716987956e-05, "loss": 2.0079, "step": 11983000 }, { "epoch": 59.37, "learning_rate": 2.0325438583453476e-05, "loss": 2.0208, "step": 11983500 }, { "epoch": 59.37, "learning_rate": 2.0324199997027393e-05, "loss": 2.003, "step": 11984000 }, { "epoch": 59.38, "learning_rate": 2.032296141060131e-05, "loss": 2.0118, "step": 11984500 }, { "epoch": 59.38, "learning_rate": 2.032172530134808e-05, "loss": 2.0081, "step": 11985000 }, { "epoch": 59.38, "learning_rate": 2.0320486714921992e-05, "loss": 1.9961, "step": 11985500 }, { "epoch": 59.38, "learning_rate": 2.0319250605668765e-05, "loss": 1.9753, "step": 11986000 }, { "epoch": 59.39, "learning_rate": 2.0318014496415533e-05, "loss": 2.0029, "step": 11986500 }, { "epoch": 59.39, "learning_rate": 2.031677590998945e-05, "loss": 2.007, "step": 11987000 }, { "epoch": 59.39, "learning_rate": 2.0315537323563364e-05, "loss": 1.9788, "step": 11987500 }, { "epoch": 59.39, "learning_rate": 2.031429873713728e-05, "loss": 1.9962, "step": 11988000 }, { "epoch": 59.4, "learning_rate": 2.0313060150711198e-05, "loss": 2.0183, "step": 11988500 }, { "epoch": 59.4, "learning_rate": 2.0311821564285115e-05, "loss": 1.9952, "step": 11989000 }, { "epoch": 59.4, "learning_rate": 2.031058297785903e-05, "loss": 1.9976, "step": 11989500 }, { "epoch": 59.4, "learning_rate": 2.03093468686058e-05, "loss": 2.0119, "step": 11990000 }, { "epoch": 59.41, "learning_rate": 2.0308110759352566e-05, "loss": 2.0157, "step": 11990500 }, { "epoch": 59.41, "learning_rate": 2.0306872172926483e-05, "loss": 2.022, "step": 11991000 }, { "epoch": 59.41, "learning_rate": 2.03056335865004e-05, "loss": 2.0047, "step": 11991500 }, { "epoch": 59.41, "learning_rate": 2.0304395000074317e-05, "loss": 1.9901, "step": 11992000 }, { "epoch": 59.41, "learning_rate": 2.0303156413648233e-05, "loss": 1.9925, "step": 11992500 }, { "epoch": 59.42, "learning_rate": 2.030191782722215e-05, "loss": 1.9857, "step": 11993000 }, { "epoch": 59.42, "learning_rate": 2.0300679240796064e-05, "loss": 1.9892, "step": 11993500 }, { "epoch": 59.42, "learning_rate": 2.029944065436998e-05, "loss": 1.9909, "step": 11994000 }, { "epoch": 59.42, "learning_rate": 2.029820454511675e-05, "loss": 2.0222, "step": 11994500 }, { "epoch": 59.43, "learning_rate": 2.029696843586352e-05, "loss": 1.9896, "step": 11995000 }, { "epoch": 59.43, "learning_rate": 2.0295729849437432e-05, "loss": 1.989, "step": 11995500 }, { "epoch": 59.43, "learning_rate": 2.029449126301135e-05, "loss": 1.985, "step": 11996000 }, { "epoch": 59.43, "learning_rate": 2.0293252676585266e-05, "loss": 2.0126, "step": 11996500 }, { "epoch": 59.44, "learning_rate": 2.0292014090159183e-05, "loss": 2.0054, "step": 11997000 }, { "epoch": 59.44, "learning_rate": 2.02907755037331e-05, "loss": 1.9993, "step": 11997500 }, { "epoch": 59.44, "learning_rate": 2.028953939447987e-05, "loss": 1.9967, "step": 11998000 }, { "epoch": 59.44, "learning_rate": 2.0288300808053786e-05, "loss": 1.9822, "step": 11998500 }, { "epoch": 59.45, "learning_rate": 2.02870622216277e-05, "loss": 1.9908, "step": 11999000 }, { "epoch": 59.45, "learning_rate": 2.0285823635201616e-05, "loss": 2.0149, "step": 11999500 }, { "epoch": 59.45, "learning_rate": 2.0284585048775533e-05, "loss": 2.0043, "step": 12000000 }, { "epoch": 59.45, "learning_rate": 2.028334646234945e-05, "loss": 2.0192, "step": 12000500 }, { "epoch": 59.46, "learning_rate": 2.0282107875923367e-05, "loss": 1.998, "step": 12001000 }, { "epoch": 59.46, "learning_rate": 2.0280869289497284e-05, "loss": 1.9847, "step": 12001500 }, { "epoch": 59.46, "learning_rate": 2.0279633180244052e-05, "loss": 2.0048, "step": 12002000 }, { "epoch": 59.46, "learning_rate": 2.0278394593817966e-05, "loss": 2.0017, "step": 12002500 }, { "epoch": 59.47, "learning_rate": 2.0277156007391883e-05, "loss": 1.9956, "step": 12003000 }, { "epoch": 59.47, "learning_rate": 2.02759174209658e-05, "loss": 1.9992, "step": 12003500 }, { "epoch": 59.47, "learning_rate": 2.0274678834539717e-05, "loss": 2.009, "step": 12004000 }, { "epoch": 59.47, "learning_rate": 2.0273440248113634e-05, "loss": 2.0127, "step": 12004500 }, { "epoch": 59.48, "learning_rate": 2.027220166168755e-05, "loss": 1.9867, "step": 12005000 }, { "epoch": 59.48, "learning_rate": 2.0270963075261468e-05, "loss": 1.9748, "step": 12005500 }, { "epoch": 59.48, "learning_rate": 2.026972448883538e-05, "loss": 2.0022, "step": 12006000 }, { "epoch": 59.48, "learning_rate": 2.0268490856755005e-05, "loss": 1.996, "step": 12006500 }, { "epoch": 59.49, "learning_rate": 2.026725474750177e-05, "loss": 2.0057, "step": 12007000 }, { "epoch": 59.49, "learning_rate": 2.0266016161075688e-05, "loss": 1.9873, "step": 12007500 }, { "epoch": 59.49, "learning_rate": 2.0264780051822456e-05, "loss": 1.9833, "step": 12008000 }, { "epoch": 59.49, "learning_rate": 2.0263541465396373e-05, "loss": 2.0155, "step": 12008500 }, { "epoch": 59.5, "learning_rate": 2.026230287897029e-05, "loss": 2.0269, "step": 12009000 }, { "epoch": 59.5, "learning_rate": 2.0261064292544207e-05, "loss": 2.0182, "step": 12009500 }, { "epoch": 59.5, "learning_rate": 2.025982570611812e-05, "loss": 1.9996, "step": 12010000 }, { "epoch": 59.5, "learning_rate": 2.0258587119692038e-05, "loss": 1.9912, "step": 12010500 }, { "epoch": 59.51, "learning_rate": 2.0257348533265955e-05, "loss": 2.0056, "step": 12011000 }, { "epoch": 59.51, "learning_rate": 2.025610994683987e-05, "loss": 1.9955, "step": 12011500 }, { "epoch": 59.51, "learning_rate": 2.025487136041379e-05, "loss": 1.9685, "step": 12012000 }, { "epoch": 59.51, "learning_rate": 2.0253632773987705e-05, "loss": 1.9891, "step": 12012500 }, { "epoch": 59.52, "learning_rate": 2.0252394187561622e-05, "loss": 1.995, "step": 12013000 }, { "epoch": 59.52, "learning_rate": 2.025115560113554e-05, "loss": 2.0196, "step": 12013500 }, { "epoch": 59.52, "learning_rate": 2.0249917014709456e-05, "loss": 2.0061, "step": 12014000 }, { "epoch": 59.52, "learning_rate": 2.024867842828337e-05, "loss": 1.9962, "step": 12014500 }, { "epoch": 59.53, "learning_rate": 2.0247439841857287e-05, "loss": 2.0143, "step": 12015000 }, { "epoch": 59.53, "learning_rate": 2.0246201255431204e-05, "loss": 2.0124, "step": 12015500 }, { "epoch": 59.53, "learning_rate": 2.0244965146177972e-05, "loss": 2.0324, "step": 12016000 }, { "epoch": 59.53, "learning_rate": 2.024372655975189e-05, "loss": 2.0114, "step": 12016500 }, { "epoch": 59.54, "learning_rate": 2.0242487973325806e-05, "loss": 2.0105, "step": 12017000 }, { "epoch": 59.54, "learning_rate": 2.024124938689972e-05, "loss": 2.0222, "step": 12017500 }, { "epoch": 59.54, "learning_rate": 2.024001327764649e-05, "loss": 1.9908, "step": 12018000 }, { "epoch": 59.54, "learning_rate": 2.0238774691220405e-05, "loss": 1.9858, "step": 12018500 }, { "epoch": 59.55, "learning_rate": 2.0237538581967174e-05, "loss": 2.0092, "step": 12019000 }, { "epoch": 59.55, "learning_rate": 2.0236302472713943e-05, "loss": 1.994, "step": 12019500 }, { "epoch": 59.55, "learning_rate": 2.0235063886287857e-05, "loss": 2.0024, "step": 12020000 }, { "epoch": 59.55, "learning_rate": 2.0233825299861774e-05, "loss": 1.9996, "step": 12020500 }, { "epoch": 59.56, "learning_rate": 2.023258671343569e-05, "loss": 1.9917, "step": 12021000 }, { "epoch": 59.56, "learning_rate": 2.0231348127009607e-05, "loss": 1.9798, "step": 12021500 }, { "epoch": 59.56, "learning_rate": 2.0230109540583524e-05, "loss": 2.011, "step": 12022000 }, { "epoch": 59.56, "learning_rate": 2.0228870954157438e-05, "loss": 2.0058, "step": 12022500 }, { "epoch": 59.57, "learning_rate": 2.0227632367731355e-05, "loss": 2.0208, "step": 12023000 }, { "epoch": 59.57, "learning_rate": 2.0226396258478124e-05, "loss": 2.0031, "step": 12023500 }, { "epoch": 59.57, "learning_rate": 2.022515767205204e-05, "loss": 1.993, "step": 12024000 }, { "epoch": 59.57, "learning_rate": 2.0223919085625957e-05, "loss": 2.0166, "step": 12024500 }, { "epoch": 59.58, "learning_rate": 2.0222680499199874e-05, "loss": 1.9928, "step": 12025000 }, { "epoch": 59.58, "learning_rate": 2.0221441912773788e-05, "loss": 1.9831, "step": 12025500 }, { "epoch": 59.58, "learning_rate": 2.0220203326347705e-05, "loss": 2.019, "step": 12026000 }, { "epoch": 59.58, "learning_rate": 2.0218964739921622e-05, "loss": 1.9997, "step": 12026500 }, { "epoch": 59.59, "learning_rate": 2.021772615349554e-05, "loss": 2.0029, "step": 12027000 }, { "epoch": 59.59, "learning_rate": 2.0216487567069456e-05, "loss": 2.0112, "step": 12027500 }, { "epoch": 59.59, "learning_rate": 2.0215248980643373e-05, "loss": 1.9983, "step": 12028000 }, { "epoch": 59.59, "learning_rate": 2.021401039421729e-05, "loss": 2.0275, "step": 12028500 }, { "epoch": 59.6, "learning_rate": 2.0212771807791206e-05, "loss": 1.993, "step": 12029000 }, { "epoch": 59.6, "learning_rate": 2.0211533221365123e-05, "loss": 2.0269, "step": 12029500 }, { "epoch": 59.6, "learning_rate": 2.021029711211189e-05, "loss": 2.016, "step": 12030000 }, { "epoch": 59.6, "learning_rate": 2.0209058525685806e-05, "loss": 1.997, "step": 12030500 }, { "epoch": 59.61, "learning_rate": 2.0207822416432575e-05, "loss": 2.0558, "step": 12031000 }, { "epoch": 59.61, "learning_rate": 2.020658383000649e-05, "loss": 2.0083, "step": 12031500 }, { "epoch": 59.61, "learning_rate": 2.0205345243580405e-05, "loss": 2.0206, "step": 12032000 }, { "epoch": 59.61, "learning_rate": 2.0204106657154322e-05, "loss": 2.0095, "step": 12032500 }, { "epoch": 59.62, "learning_rate": 2.020286807072824e-05, "loss": 2.0237, "step": 12033000 }, { "epoch": 59.62, "learning_rate": 2.0201629484302156e-05, "loss": 1.9968, "step": 12033500 }, { "epoch": 59.62, "learning_rate": 2.0200390897876073e-05, "loss": 2.0235, "step": 12034000 }, { "epoch": 59.62, "learning_rate": 2.019915231144999e-05, "loss": 2.0172, "step": 12034500 }, { "epoch": 59.63, "learning_rate": 2.0197916202196755e-05, "loss": 2.0039, "step": 12035000 }, { "epoch": 59.63, "learning_rate": 2.0196677615770672e-05, "loss": 2.0077, "step": 12035500 }, { "epoch": 59.63, "learning_rate": 2.019543902934459e-05, "loss": 2.0116, "step": 12036000 }, { "epoch": 59.63, "learning_rate": 2.0194200442918506e-05, "loss": 2.013, "step": 12036500 }, { "epoch": 59.64, "learning_rate": 2.0192964333665275e-05, "loss": 1.9905, "step": 12037000 }, { "epoch": 59.64, "learning_rate": 2.019172574723919e-05, "loss": 2.0091, "step": 12037500 }, { "epoch": 59.64, "learning_rate": 2.019048716081311e-05, "loss": 2.0116, "step": 12038000 }, { "epoch": 59.64, "learning_rate": 2.0189248574387022e-05, "loss": 2.012, "step": 12038500 }, { "epoch": 59.65, "learning_rate": 2.018800998796094e-05, "loss": 2.0098, "step": 12039000 }, { "epoch": 59.65, "learning_rate": 2.018677387870771e-05, "loss": 1.9854, "step": 12039500 }, { "epoch": 59.65, "learning_rate": 2.0185535292281628e-05, "loss": 1.9876, "step": 12040000 }, { "epoch": 59.65, "learning_rate": 2.018429670585554e-05, "loss": 2.0208, "step": 12040500 }, { "epoch": 59.66, "learning_rate": 2.018305811942946e-05, "loss": 1.9959, "step": 12041000 }, { "epoch": 59.66, "learning_rate": 2.0181819533003372e-05, "loss": 2.0069, "step": 12041500 }, { "epoch": 59.66, "learning_rate": 2.018058094657729e-05, "loss": 1.9807, "step": 12042000 }, { "epoch": 59.66, "learning_rate": 2.0179347314496913e-05, "loss": 1.998, "step": 12042500 }, { "epoch": 59.67, "learning_rate": 2.0178108728070827e-05, "loss": 2.0118, "step": 12043000 }, { "epoch": 59.67, "learning_rate": 2.0176870141644744e-05, "loss": 1.9778, "step": 12043500 }, { "epoch": 59.67, "learning_rate": 2.017563155521866e-05, "loss": 2.0047, "step": 12044000 }, { "epoch": 59.67, "learning_rate": 2.0174392968792577e-05, "loss": 2.0002, "step": 12044500 }, { "epoch": 59.68, "learning_rate": 2.0173154382366494e-05, "loss": 1.9995, "step": 12045000 }, { "epoch": 59.68, "learning_rate": 2.017191579594041e-05, "loss": 2.0198, "step": 12045500 }, { "epoch": 59.68, "learning_rate": 2.017067968668718e-05, "loss": 2.0186, "step": 12046000 }, { "epoch": 59.68, "learning_rate": 2.0169441100261094e-05, "loss": 2.0072, "step": 12046500 }, { "epoch": 59.69, "learning_rate": 2.016820251383501e-05, "loss": 1.9922, "step": 12047000 }, { "epoch": 59.69, "learning_rate": 2.0166963927408928e-05, "loss": 2.0039, "step": 12047500 }, { "epoch": 59.69, "learning_rate": 2.0165725340982844e-05, "loss": 2.013, "step": 12048000 }, { "epoch": 59.69, "learning_rate": 2.016448675455676e-05, "loss": 2.0053, "step": 12048500 }, { "epoch": 59.69, "learning_rate": 2.016325064530353e-05, "loss": 1.999, "step": 12049000 }, { "epoch": 59.7, "learning_rate": 2.0162012058877444e-05, "loss": 2.0148, "step": 12049500 }, { "epoch": 59.7, "learning_rate": 2.016077347245136e-05, "loss": 2.0122, "step": 12050000 }, { "epoch": 59.7, "learning_rate": 2.0159534886025278e-05, "loss": 2.0, "step": 12050500 }, { "epoch": 59.7, "learning_rate": 2.0158296299599194e-05, "loss": 1.9933, "step": 12051000 }, { "epoch": 59.71, "learning_rate": 2.015705771317311e-05, "loss": 2.0218, "step": 12051500 }, { "epoch": 59.71, "learning_rate": 2.015581912674703e-05, "loss": 2.0331, "step": 12052000 }, { "epoch": 59.71, "learning_rate": 2.0154580540320945e-05, "loss": 1.9863, "step": 12052500 }, { "epoch": 59.71, "learning_rate": 2.0153341953894862e-05, "loss": 2.0415, "step": 12053000 }, { "epoch": 59.72, "learning_rate": 2.015210336746878e-05, "loss": 1.9757, "step": 12053500 }, { "epoch": 59.72, "learning_rate": 2.0150864781042693e-05, "loss": 2.0029, "step": 12054000 }, { "epoch": 59.72, "learning_rate": 2.014962619461661e-05, "loss": 1.9893, "step": 12054500 }, { "epoch": 59.72, "learning_rate": 2.0148387608190527e-05, "loss": 2.0268, "step": 12055000 }, { "epoch": 59.73, "learning_rate": 2.014714902176444e-05, "loss": 2.0108, "step": 12055500 }, { "epoch": 59.73, "learning_rate": 2.0145910435338357e-05, "loss": 1.9962, "step": 12056000 }, { "epoch": 59.73, "learning_rate": 2.014467432608513e-05, "loss": 1.9963, "step": 12056500 }, { "epoch": 59.73, "learning_rate": 2.0143435739659043e-05, "loss": 1.9908, "step": 12057000 }, { "epoch": 59.74, "learning_rate": 2.014219715323296e-05, "loss": 2.0168, "step": 12057500 }, { "epoch": 59.74, "learning_rate": 2.0140958566806877e-05, "loss": 2.0134, "step": 12058000 }, { "epoch": 59.74, "learning_rate": 2.013971998038079e-05, "loss": 2.0223, "step": 12058500 }, { "epoch": 59.74, "learning_rate": 2.0138481393954707e-05, "loss": 1.9892, "step": 12059000 }, { "epoch": 59.75, "learning_rate": 2.0137242807528624e-05, "loss": 1.9937, "step": 12059500 }, { "epoch": 59.75, "learning_rate": 2.013600422110254e-05, "loss": 1.9969, "step": 12060000 }, { "epoch": 59.75, "learning_rate": 2.0134765634676458e-05, "loss": 2.0426, "step": 12060500 }, { "epoch": 59.75, "learning_rate": 2.013352704825037e-05, "loss": 2.0274, "step": 12061000 }, { "epoch": 59.76, "learning_rate": 2.0132290938997144e-05, "loss": 2.0163, "step": 12061500 }, { "epoch": 59.76, "learning_rate": 2.0131054829743912e-05, "loss": 2.0211, "step": 12062000 }, { "epoch": 59.76, "learning_rate": 2.0129818720490678e-05, "loss": 2.0226, "step": 12062500 }, { "epoch": 59.76, "learning_rate": 2.0128582611237447e-05, "loss": 2.0277, "step": 12063000 }, { "epoch": 59.77, "learning_rate": 2.0127344024811364e-05, "loss": 2.0123, "step": 12063500 }, { "epoch": 59.77, "learning_rate": 2.012610543838528e-05, "loss": 1.9969, "step": 12064000 }, { "epoch": 59.77, "learning_rate": 2.0124866851959197e-05, "loss": 2.0133, "step": 12064500 }, { "epoch": 59.77, "learning_rate": 2.0123630742705966e-05, "loss": 2.0258, "step": 12065000 }, { "epoch": 59.78, "learning_rate": 2.0122394633452735e-05, "loss": 2.0113, "step": 12065500 }, { "epoch": 59.78, "learning_rate": 2.01211585241995e-05, "loss": 2.016, "step": 12066000 }, { "epoch": 59.78, "learning_rate": 2.0119919937773417e-05, "loss": 2.0098, "step": 12066500 }, { "epoch": 59.78, "learning_rate": 2.0118681351347334e-05, "loss": 2.0102, "step": 12067000 }, { "epoch": 59.79, "learning_rate": 2.011744276492125e-05, "loss": 2.0232, "step": 12067500 }, { "epoch": 59.79, "learning_rate": 2.0116204178495168e-05, "loss": 2.0065, "step": 12068000 }, { "epoch": 59.79, "learning_rate": 2.0114965592069085e-05, "loss": 2.0007, "step": 12068500 }, { "epoch": 59.79, "learning_rate": 2.0113727005643002e-05, "loss": 2.0057, "step": 12069000 }, { "epoch": 59.8, "learning_rate": 2.011248841921692e-05, "loss": 2.0183, "step": 12069500 }, { "epoch": 59.8, "learning_rate": 2.0111249832790832e-05, "loss": 2.0126, "step": 12070000 }, { "epoch": 59.8, "learning_rate": 2.011001124636475e-05, "loss": 2.026, "step": 12070500 }, { "epoch": 59.8, "learning_rate": 2.0108772659938666e-05, "loss": 2.0079, "step": 12071000 }, { "epoch": 59.81, "learning_rate": 2.0107534073512583e-05, "loss": 1.9713, "step": 12071500 }, { "epoch": 59.81, "learning_rate": 2.01062954870865e-05, "loss": 2.0021, "step": 12072000 }, { "epoch": 59.81, "learning_rate": 2.0105056900660417e-05, "loss": 2.0054, "step": 12072500 }, { "epoch": 59.81, "learning_rate": 2.0103820791407183e-05, "loss": 2.0039, "step": 12073000 }, { "epoch": 59.82, "learning_rate": 2.01025822049811e-05, "loss": 1.9962, "step": 12073500 }, { "epoch": 59.82, "learning_rate": 2.0101343618555016e-05, "loss": 2.0101, "step": 12074000 }, { "epoch": 59.82, "learning_rate": 2.0100105032128933e-05, "loss": 1.9955, "step": 12074500 }, { "epoch": 59.82, "learning_rate": 2.009886644570285e-05, "loss": 1.9914, "step": 12075000 }, { "epoch": 59.83, "learning_rate": 2.0097627859276767e-05, "loss": 2.0031, "step": 12075500 }, { "epoch": 59.83, "learning_rate": 2.0096389272850684e-05, "loss": 2.0217, "step": 12076000 }, { "epoch": 59.83, "learning_rate": 2.0095150686424598e-05, "loss": 2.0146, "step": 12076500 }, { "epoch": 59.83, "learning_rate": 2.0093912099998515e-05, "loss": 2.0016, "step": 12077000 }, { "epoch": 59.84, "learning_rate": 2.0092673513572428e-05, "loss": 1.9898, "step": 12077500 }, { "epoch": 59.84, "learning_rate": 2.0091434927146345e-05, "loss": 2.0219, "step": 12078000 }, { "epoch": 59.84, "learning_rate": 2.0090196340720262e-05, "loss": 2.0019, "step": 12078500 }, { "epoch": 59.84, "learning_rate": 2.008895775429418e-05, "loss": 1.996, "step": 12079000 }, { "epoch": 59.85, "learning_rate": 2.0087719167868096e-05, "loss": 2.0248, "step": 12079500 }, { "epoch": 59.85, "learning_rate": 2.0086480581442013e-05, "loss": 1.9972, "step": 12080000 }, { "epoch": 59.85, "learning_rate": 2.008524199501593e-05, "loss": 2.0043, "step": 12080500 }, { "epoch": 59.85, "learning_rate": 2.0084003408589847e-05, "loss": 2.0061, "step": 12081000 }, { "epoch": 59.86, "learning_rate": 2.0082767299336612e-05, "loss": 2.0163, "step": 12081500 }, { "epoch": 59.86, "learning_rate": 2.0081531190083384e-05, "loss": 1.9955, "step": 12082000 }, { "epoch": 59.86, "learning_rate": 2.008029508083015e-05, "loss": 2.0225, "step": 12082500 }, { "epoch": 59.86, "learning_rate": 2.0079056494404067e-05, "loss": 2.0165, "step": 12083000 }, { "epoch": 59.87, "learning_rate": 2.0077817907977984e-05, "loss": 1.9911, "step": 12083500 }, { "epoch": 59.87, "learning_rate": 2.00765793215519e-05, "loss": 2.0057, "step": 12084000 }, { "epoch": 59.87, "learning_rate": 2.007534321229867e-05, "loss": 2.0138, "step": 12084500 }, { "epoch": 59.87, "learning_rate": 2.0074104625872586e-05, "loss": 1.9709, "step": 12085000 }, { "epoch": 59.88, "learning_rate": 2.00728660394465e-05, "loss": 2.0029, "step": 12085500 }, { "epoch": 59.88, "learning_rate": 2.007162993019327e-05, "loss": 1.9906, "step": 12086000 }, { "epoch": 59.88, "learning_rate": 2.0070391343767185e-05, "loss": 1.9822, "step": 12086500 }, { "epoch": 59.88, "learning_rate": 2.0069152757341102e-05, "loss": 2.0049, "step": 12087000 }, { "epoch": 59.89, "learning_rate": 2.006791417091502e-05, "loss": 2.0259, "step": 12087500 }, { "epoch": 59.89, "learning_rate": 2.0066675584488936e-05, "loss": 1.9911, "step": 12088000 }, { "epoch": 59.89, "learning_rate": 2.0065436998062853e-05, "loss": 2.0162, "step": 12088500 }, { "epoch": 59.89, "learning_rate": 2.0064198411636767e-05, "loss": 1.994, "step": 12089000 }, { "epoch": 59.9, "learning_rate": 2.0062959825210684e-05, "loss": 2.0509, "step": 12089500 }, { "epoch": 59.9, "learning_rate": 2.0061723715957452e-05, "loss": 2.0027, "step": 12090000 }, { "epoch": 59.9, "learning_rate": 2.006048512953137e-05, "loss": 2.0255, "step": 12090500 }, { "epoch": 59.9, "learning_rate": 2.0059246543105286e-05, "loss": 2.0403, "step": 12091000 }, { "epoch": 59.91, "learning_rate": 2.0058007956679203e-05, "loss": 1.994, "step": 12091500 }, { "epoch": 59.91, "learning_rate": 2.0056769370253117e-05, "loss": 1.9842, "step": 12092000 }, { "epoch": 59.91, "learning_rate": 2.0055530783827034e-05, "loss": 2.0023, "step": 12092500 }, { "epoch": 59.91, "learning_rate": 2.005429219740095e-05, "loss": 1.9903, "step": 12093000 }, { "epoch": 59.92, "learning_rate": 2.0053053610974868e-05, "loss": 1.9969, "step": 12093500 }, { "epoch": 59.92, "learning_rate": 2.0051815024548784e-05, "loss": 2.021, "step": 12094000 }, { "epoch": 59.92, "learning_rate": 2.00505764381227e-05, "loss": 2.0149, "step": 12094500 }, { "epoch": 59.92, "learning_rate": 2.0049340328869467e-05, "loss": 2.0043, "step": 12095000 }, { "epoch": 59.93, "learning_rate": 2.0048104219616236e-05, "loss": 2.0112, "step": 12095500 }, { "epoch": 59.93, "learning_rate": 2.0046868110363008e-05, "loss": 1.9818, "step": 12096000 }, { "epoch": 59.93, "learning_rate": 2.0045629523936925e-05, "loss": 2.0218, "step": 12096500 }, { "epoch": 59.93, "learning_rate": 2.004439093751084e-05, "loss": 2.0058, "step": 12097000 }, { "epoch": 59.94, "learning_rate": 2.0043152351084755e-05, "loss": 1.9971, "step": 12097500 }, { "epoch": 59.94, "learning_rate": 2.0041913764658672e-05, "loss": 1.991, "step": 12098000 }, { "epoch": 59.94, "learning_rate": 2.004067517823259e-05, "loss": 1.9903, "step": 12098500 }, { "epoch": 59.94, "learning_rate": 2.0039436591806503e-05, "loss": 2.0102, "step": 12099000 }, { "epoch": 59.95, "learning_rate": 2.003819800538042e-05, "loss": 1.9986, "step": 12099500 }, { "epoch": 59.95, "learning_rate": 2.0036959418954336e-05, "loss": 2.0058, "step": 12100000 }, { "epoch": 59.95, "learning_rate": 2.0035723309701105e-05, "loss": 2.0307, "step": 12100500 }, { "epoch": 59.95, "learning_rate": 2.0034484723275022e-05, "loss": 2.0108, "step": 12101000 }, { "epoch": 59.96, "learning_rate": 2.003324613684894e-05, "loss": 2.0166, "step": 12101500 }, { "epoch": 59.96, "learning_rate": 2.0032007550422853e-05, "loss": 1.9747, "step": 12102000 }, { "epoch": 59.96, "learning_rate": 2.0030771441169625e-05, "loss": 1.9978, "step": 12102500 }, { "epoch": 59.96, "learning_rate": 2.002953285474354e-05, "loss": 1.9947, "step": 12103000 }, { "epoch": 59.96, "learning_rate": 2.0028294268317455e-05, "loss": 2.0006, "step": 12103500 }, { "epoch": 59.97, "learning_rate": 2.0027055681891372e-05, "loss": 2.0067, "step": 12104000 }, { "epoch": 59.97, "learning_rate": 2.002581709546529e-05, "loss": 2.0141, "step": 12104500 }, { "epoch": 59.97, "learning_rate": 2.0024580986212058e-05, "loss": 2.0113, "step": 12105000 }, { "epoch": 59.97, "learning_rate": 2.0023342399785975e-05, "loss": 1.9978, "step": 12105500 }, { "epoch": 59.98, "learning_rate": 2.0022103813359892e-05, "loss": 1.9913, "step": 12106000 }, { "epoch": 59.98, "learning_rate": 2.0020865226933805e-05, "loss": 2.0314, "step": 12106500 }, { "epoch": 59.98, "learning_rate": 2.0019626640507722e-05, "loss": 1.9893, "step": 12107000 }, { "epoch": 59.98, "learning_rate": 2.001838805408164e-05, "loss": 1.9992, "step": 12107500 }, { "epoch": 59.99, "learning_rate": 2.0017149467655556e-05, "loss": 2.0042, "step": 12108000 }, { "epoch": 59.99, "learning_rate": 2.0015910881229473e-05, "loss": 2.0184, "step": 12108500 }, { "epoch": 59.99, "learning_rate": 2.0014672294803387e-05, "loss": 2.0248, "step": 12109000 }, { "epoch": 59.99, "learning_rate": 2.0013436185550155e-05, "loss": 1.996, "step": 12109500 }, { "epoch": 60.0, "learning_rate": 2.0012197599124072e-05, "loss": 2.0304, "step": 12110000 }, { "epoch": 60.0, "learning_rate": 2.001095901269799e-05, "loss": 2.0066, "step": 12110500 }, { "epoch": 60.0, "eval_accuracy": 0.6722566033183166, "eval_accuracy_mlm": 0.6314998592020786, "eval_accuracy_nsp": 0.8646998144799752, "eval_loss": 2.284968376159668, "eval_runtime": 146.7996, "eval_samples_per_second": 1736.783, "eval_steps_per_second": 72.371, "step": 12110580 }, { "epoch": 60.0, "learning_rate": 2.0009722903444758e-05, "loss": 2.0074, "step": 12111000 }, { "epoch": 60.0, "learning_rate": 2.0008484317018675e-05, "loss": 2.0079, "step": 12111500 }, { "epoch": 60.01, "learning_rate": 2.000724820776544e-05, "loss": 2.0032, "step": 12112000 }, { "epoch": 60.01, "learning_rate": 2.0006009621339357e-05, "loss": 1.9842, "step": 12112500 }, { "epoch": 60.01, "learning_rate": 2.0004771034913274e-05, "loss": 1.9977, "step": 12113000 }, { "epoch": 60.01, "learning_rate": 2.000353244848719e-05, "loss": 1.9899, "step": 12113500 }, { "epoch": 60.02, "learning_rate": 2.0002293862061108e-05, "loss": 1.9793, "step": 12114000 }, { "epoch": 60.02, "learning_rate": 2.0001055275635025e-05, "loss": 1.9873, "step": 12114500 }, { "epoch": 60.02, "learning_rate": 1.9999816689208942e-05, "loss": 2.0114, "step": 12115000 }, { "epoch": 60.02, "learning_rate": 1.9998578102782856e-05, "loss": 1.9701, "step": 12115500 }, { "epoch": 60.03, "learning_rate": 1.9997339516356773e-05, "loss": 1.9952, "step": 12116000 }, { "epoch": 60.03, "learning_rate": 1.999610092993069e-05, "loss": 1.9857, "step": 12116500 }, { "epoch": 60.03, "learning_rate": 1.9994862343504606e-05, "loss": 1.99, "step": 12117000 }, { "epoch": 60.03, "learning_rate": 1.9993626234251375e-05, "loss": 1.9832, "step": 12117500 }, { "epoch": 60.04, "learning_rate": 1.9992387647825292e-05, "loss": 1.985, "step": 12118000 }, { "epoch": 60.04, "learning_rate": 1.999114906139921e-05, "loss": 2.0032, "step": 12118500 }, { "epoch": 60.04, "learning_rate": 1.9989912952145974e-05, "loss": 2.0084, "step": 12119000 }, { "epoch": 60.04, "learning_rate": 1.998867436571989e-05, "loss": 1.9641, "step": 12119500 }, { "epoch": 60.05, "learning_rate": 1.998743577929381e-05, "loss": 1.9735, "step": 12120000 }, { "epoch": 60.05, "learning_rate": 1.9986197192867725e-05, "loss": 1.9713, "step": 12120500 }, { "epoch": 60.05, "learning_rate": 1.9984958606441642e-05, "loss": 1.9739, "step": 12121000 }, { "epoch": 60.05, "learning_rate": 1.998372002001556e-05, "loss": 1.9582, "step": 12121500 }, { "epoch": 60.06, "learning_rate": 1.9982481433589473e-05, "loss": 2.0061, "step": 12122000 }, { "epoch": 60.06, "learning_rate": 1.998124284716339e-05, "loss": 1.984, "step": 12122500 }, { "epoch": 60.06, "learning_rate": 1.9980004260737307e-05, "loss": 1.9927, "step": 12123000 }, { "epoch": 60.06, "learning_rate": 1.9978765674311223e-05, "loss": 1.9988, "step": 12123500 }, { "epoch": 60.07, "learning_rate": 1.997752708788514e-05, "loss": 1.9994, "step": 12124000 }, { "epoch": 60.07, "learning_rate": 1.9976288501459057e-05, "loss": 1.9935, "step": 12124500 }, { "epoch": 60.07, "learning_rate": 1.9975052392205823e-05, "loss": 1.9972, "step": 12125000 }, { "epoch": 60.07, "learning_rate": 1.997381380577974e-05, "loss": 1.9767, "step": 12125500 }, { "epoch": 60.08, "learning_rate": 1.9972575219353657e-05, "loss": 1.9791, "step": 12126000 }, { "epoch": 60.08, "learning_rate": 1.9971336632927574e-05, "loss": 1.9597, "step": 12126500 }, { "epoch": 60.08, "learning_rate": 1.997009804650149e-05, "loss": 1.9811, "step": 12127000 }, { "epoch": 60.08, "learning_rate": 1.9968859460075407e-05, "loss": 2.0073, "step": 12127500 }, { "epoch": 60.09, "learning_rate": 1.9967620873649324e-05, "loss": 1.9842, "step": 12128000 }, { "epoch": 60.09, "learning_rate": 1.996638228722324e-05, "loss": 1.9891, "step": 12128500 }, { "epoch": 60.09, "learning_rate": 1.9965143700797155e-05, "loss": 1.97, "step": 12129000 }, { "epoch": 60.09, "learning_rate": 1.996390511437107e-05, "loss": 1.991, "step": 12129500 }, { "epoch": 60.1, "learning_rate": 1.996266652794499e-05, "loss": 1.9789, "step": 12130000 }, { "epoch": 60.1, "learning_rate": 1.9961430418691757e-05, "loss": 1.9655, "step": 12130500 }, { "epoch": 60.1, "learning_rate": 1.9960191832265674e-05, "loss": 1.9674, "step": 12131000 }, { "epoch": 60.1, "learning_rate": 1.995895324583959e-05, "loss": 1.9813, "step": 12131500 }, { "epoch": 60.11, "learning_rate": 1.9957714659413508e-05, "loss": 1.9996, "step": 12132000 }, { "epoch": 60.11, "learning_rate": 1.9956481027333126e-05, "loss": 1.9764, "step": 12132500 }, { "epoch": 60.11, "learning_rate": 1.9955242440907042e-05, "loss": 2.0145, "step": 12133000 }, { "epoch": 60.11, "learning_rate": 1.995400385448096e-05, "loss": 1.9559, "step": 12133500 }, { "epoch": 60.12, "learning_rate": 1.9952765268054876e-05, "loss": 1.9901, "step": 12134000 }, { "epoch": 60.12, "learning_rate": 1.9951529158801642e-05, "loss": 2.0002, "step": 12134500 }, { "epoch": 60.12, "learning_rate": 1.995029057237556e-05, "loss": 1.9789, "step": 12135000 }, { "epoch": 60.12, "learning_rate": 1.9949051985949476e-05, "loss": 1.9605, "step": 12135500 }, { "epoch": 60.13, "learning_rate": 1.9947813399523393e-05, "loss": 1.9855, "step": 12136000 }, { "epoch": 60.13, "learning_rate": 1.994657481309731e-05, "loss": 1.978, "step": 12136500 }, { "epoch": 60.13, "learning_rate": 1.9945336226671226e-05, "loss": 1.9772, "step": 12137000 }, { "epoch": 60.13, "learning_rate": 1.994409764024514e-05, "loss": 1.9942, "step": 12137500 }, { "epoch": 60.14, "learning_rate": 1.9942859053819057e-05, "loss": 1.9519, "step": 12138000 }, { "epoch": 60.14, "learning_rate": 1.9941620467392974e-05, "loss": 1.9843, "step": 12138500 }, { "epoch": 60.14, "learning_rate": 1.994038188096689e-05, "loss": 1.9967, "step": 12139000 }, { "epoch": 60.14, "learning_rate": 1.9939143294540808e-05, "loss": 2.0288, "step": 12139500 }, { "epoch": 60.15, "learning_rate": 1.9937904708114725e-05, "loss": 1.9771, "step": 12140000 }, { "epoch": 60.15, "learning_rate": 1.993666612168864e-05, "loss": 1.996, "step": 12140500 }, { "epoch": 60.15, "learning_rate": 1.993542753526256e-05, "loss": 2.0023, "step": 12141000 }, { "epoch": 60.15, "learning_rate": 1.9934193903182176e-05, "loss": 1.9809, "step": 12141500 }, { "epoch": 60.16, "learning_rate": 1.9932955316756093e-05, "loss": 1.9798, "step": 12142000 }, { "epoch": 60.16, "learning_rate": 1.993171673033001e-05, "loss": 2.0034, "step": 12142500 }, { "epoch": 60.16, "learning_rate": 1.993048062107678e-05, "loss": 2.0098, "step": 12143000 }, { "epoch": 60.16, "learning_rate": 1.9929242034650695e-05, "loss": 2.0157, "step": 12143500 }, { "epoch": 60.17, "learning_rate": 1.9928003448224612e-05, "loss": 2.0145, "step": 12144000 }, { "epoch": 60.17, "learning_rate": 1.992676486179853e-05, "loss": 1.9975, "step": 12144500 }, { "epoch": 60.17, "learning_rate": 1.9925526275372443e-05, "loss": 1.9908, "step": 12145000 }, { "epoch": 60.17, "learning_rate": 1.992428768894636e-05, "loss": 2.0124, "step": 12145500 }, { "epoch": 60.18, "learning_rate": 1.992305157969313e-05, "loss": 1.9974, "step": 12146000 }, { "epoch": 60.18, "learning_rate": 1.9921812993267045e-05, "loss": 2.011, "step": 12146500 }, { "epoch": 60.18, "learning_rate": 1.9920574406840962e-05, "loss": 2.009, "step": 12147000 }, { "epoch": 60.18, "learning_rate": 1.991933582041488e-05, "loss": 2.0, "step": 12147500 }, { "epoch": 60.19, "learning_rate": 1.9918099711161648e-05, "loss": 2.0135, "step": 12148000 }, { "epoch": 60.19, "learning_rate": 1.9916861124735565e-05, "loss": 1.9849, "step": 12148500 }, { "epoch": 60.19, "learning_rate": 1.991562253830948e-05, "loss": 1.9856, "step": 12149000 }, { "epoch": 60.19, "learning_rate": 1.9914383951883395e-05, "loss": 1.9902, "step": 12149500 }, { "epoch": 60.2, "learning_rate": 1.9913145365457312e-05, "loss": 1.9837, "step": 12150000 }, { "epoch": 60.2, "learning_rate": 1.991190677903123e-05, "loss": 1.9911, "step": 12150500 }, { "epoch": 60.2, "learning_rate": 1.9910668192605146e-05, "loss": 2.0206, "step": 12151000 }, { "epoch": 60.2, "learning_rate": 1.990942960617906e-05, "loss": 1.9751, "step": 12151500 }, { "epoch": 60.21, "learning_rate": 1.990819349692583e-05, "loss": 1.9865, "step": 12152000 }, { "epoch": 60.21, "learning_rate": 1.9906954910499745e-05, "loss": 1.9865, "step": 12152500 }, { "epoch": 60.21, "learning_rate": 1.9905716324073662e-05, "loss": 1.973, "step": 12153000 }, { "epoch": 60.21, "learning_rate": 1.990447773764758e-05, "loss": 1.9605, "step": 12153500 }, { "epoch": 60.22, "learning_rate": 1.9903239151221496e-05, "loss": 1.9744, "step": 12154000 }, { "epoch": 60.22, "learning_rate": 1.9902000564795413e-05, "loss": 2.018, "step": 12154500 }, { "epoch": 60.22, "learning_rate": 1.990076445554218e-05, "loss": 1.9922, "step": 12155000 }, { "epoch": 60.22, "learning_rate": 1.9899525869116096e-05, "loss": 1.9785, "step": 12155500 }, { "epoch": 60.23, "learning_rate": 1.9898287282690012e-05, "loss": 1.9933, "step": 12156000 }, { "epoch": 60.23, "learning_rate": 1.989704869626393e-05, "loss": 1.9886, "step": 12156500 }, { "epoch": 60.23, "learning_rate": 1.9895810109837846e-05, "loss": 2.0103, "step": 12157000 }, { "epoch": 60.23, "learning_rate": 1.9894574000584615e-05, "loss": 1.9744, "step": 12157500 }, { "epoch": 60.23, "learning_rate": 1.989333541415853e-05, "loss": 1.9668, "step": 12158000 }, { "epoch": 60.24, "learning_rate": 1.9892096827732446e-05, "loss": 1.9761, "step": 12158500 }, { "epoch": 60.24, "learning_rate": 1.9890858241306363e-05, "loss": 1.9857, "step": 12159000 }, { "epoch": 60.24, "learning_rate": 1.988961965488028e-05, "loss": 1.9997, "step": 12159500 }, { "epoch": 60.24, "learning_rate": 1.9888381068454196e-05, "loss": 1.9898, "step": 12160000 }, { "epoch": 60.25, "learning_rate": 1.9887142482028113e-05, "loss": 1.9844, "step": 12160500 }, { "epoch": 60.25, "learning_rate": 1.988590389560203e-05, "loss": 1.9848, "step": 12161000 }, { "epoch": 60.25, "learning_rate": 1.9884665309175947e-05, "loss": 2.0065, "step": 12161500 }, { "epoch": 60.25, "learning_rate": 1.988342672274986e-05, "loss": 1.9731, "step": 12162000 }, { "epoch": 60.26, "learning_rate": 1.9882188136323778e-05, "loss": 2.0051, "step": 12162500 }, { "epoch": 60.26, "learning_rate": 1.988094954989769e-05, "loss": 1.9837, "step": 12163000 }, { "epoch": 60.26, "learning_rate": 1.9879710963471608e-05, "loss": 2.0062, "step": 12163500 }, { "epoch": 60.26, "learning_rate": 1.987847485421838e-05, "loss": 1.9908, "step": 12164000 }, { "epoch": 60.27, "learning_rate": 1.9877236267792297e-05, "loss": 1.9862, "step": 12164500 }, { "epoch": 60.27, "learning_rate": 1.9876000158539063e-05, "loss": 2.0152, "step": 12165000 }, { "epoch": 60.27, "learning_rate": 1.987476157211298e-05, "loss": 2.0096, "step": 12165500 }, { "epoch": 60.27, "learning_rate": 1.987352546285975e-05, "loss": 2.0151, "step": 12166000 }, { "epoch": 60.28, "learning_rate": 1.9872286876433665e-05, "loss": 1.991, "step": 12166500 }, { "epoch": 60.28, "learning_rate": 1.9871048290007582e-05, "loss": 2.0051, "step": 12167000 }, { "epoch": 60.28, "learning_rate": 1.9869809703581496e-05, "loss": 1.9963, "step": 12167500 }, { "epoch": 60.28, "learning_rate": 1.9868571117155413e-05, "loss": 2.003, "step": 12168000 }, { "epoch": 60.29, "learning_rate": 1.986733253072933e-05, "loss": 1.9719, "step": 12168500 }, { "epoch": 60.29, "learning_rate": 1.9866098898648954e-05, "loss": 1.9902, "step": 12169000 }, { "epoch": 60.29, "learning_rate": 1.9864860312222867e-05, "loss": 1.9981, "step": 12169500 }, { "epoch": 60.29, "learning_rate": 1.9863621725796784e-05, "loss": 1.9902, "step": 12170000 }, { "epoch": 60.3, "learning_rate": 1.9862383139370698e-05, "loss": 1.9946, "step": 12170500 }, { "epoch": 60.3, "learning_rate": 1.9861144552944615e-05, "loss": 1.9995, "step": 12171000 }, { "epoch": 60.3, "learning_rate": 1.985990596651853e-05, "loss": 1.9912, "step": 12171500 }, { "epoch": 60.3, "learning_rate": 1.985866738009245e-05, "loss": 2.0143, "step": 12172000 }, { "epoch": 60.31, "learning_rate": 1.9857428793666365e-05, "loss": 2.0057, "step": 12172500 }, { "epoch": 60.31, "learning_rate": 1.9856190207240282e-05, "loss": 1.9595, "step": 12173000 }, { "epoch": 60.31, "learning_rate": 1.98549516208142e-05, "loss": 1.9889, "step": 12173500 }, { "epoch": 60.31, "learning_rate": 1.9853713034388113e-05, "loss": 2.0191, "step": 12174000 }, { "epoch": 60.32, "learning_rate": 1.985247444796203e-05, "loss": 1.9859, "step": 12174500 }, { "epoch": 60.32, "learning_rate": 1.9851235861535947e-05, "loss": 1.989, "step": 12175000 }, { "epoch": 60.32, "learning_rate": 1.9849997275109864e-05, "loss": 1.9849, "step": 12175500 }, { "epoch": 60.32, "learning_rate": 1.984875868868378e-05, "loss": 2.0284, "step": 12176000 }, { "epoch": 60.33, "learning_rate": 1.984752257943055e-05, "loss": 1.9813, "step": 12176500 }, { "epoch": 60.33, "learning_rate": 1.9846286470177318e-05, "loss": 1.9817, "step": 12177000 }, { "epoch": 60.33, "learning_rate": 1.9845047883751232e-05, "loss": 1.9955, "step": 12177500 }, { "epoch": 60.33, "learning_rate": 1.984380929732515e-05, "loss": 1.9983, "step": 12178000 }, { "epoch": 60.34, "learning_rate": 1.9842570710899066e-05, "loss": 1.9708, "step": 12178500 }, { "epoch": 60.34, "learning_rate": 1.9841332124472983e-05, "loss": 1.9808, "step": 12179000 }, { "epoch": 60.34, "learning_rate": 1.98400935380469e-05, "loss": 1.9955, "step": 12179500 }, { "epoch": 60.34, "learning_rate": 1.9838857428793668e-05, "loss": 1.9768, "step": 12180000 }, { "epoch": 60.35, "learning_rate": 1.9837618842367585e-05, "loss": 1.9693, "step": 12180500 }, { "epoch": 60.35, "learning_rate": 1.98363802559415e-05, "loss": 1.9881, "step": 12181000 }, { "epoch": 60.35, "learning_rate": 1.9835141669515416e-05, "loss": 2.0047, "step": 12181500 }, { "epoch": 60.35, "learning_rate": 1.9833903083089333e-05, "loss": 2.0311, "step": 12182000 }, { "epoch": 60.36, "learning_rate": 1.983266449666325e-05, "loss": 1.9957, "step": 12182500 }, { "epoch": 60.36, "learning_rate": 1.9831425910237163e-05, "loss": 2.0041, "step": 12183000 }, { "epoch": 60.36, "learning_rate": 1.983018732381108e-05, "loss": 1.9804, "step": 12183500 }, { "epoch": 60.36, "learning_rate": 1.982895121455785e-05, "loss": 2.018, "step": 12184000 }, { "epoch": 60.37, "learning_rate": 1.9827712628131766e-05, "loss": 1.9923, "step": 12184500 }, { "epoch": 60.37, "learning_rate": 1.9826474041705683e-05, "loss": 1.9987, "step": 12185000 }, { "epoch": 60.37, "learning_rate": 1.98252354552796e-05, "loss": 1.9941, "step": 12185500 }, { "epoch": 60.37, "learning_rate": 1.9823996868853516e-05, "loss": 1.9951, "step": 12186000 }, { "epoch": 60.38, "learning_rate": 1.9822763236773137e-05, "loss": 1.992, "step": 12186500 }, { "epoch": 60.38, "learning_rate": 1.9821524650347054e-05, "loss": 1.9891, "step": 12187000 }, { "epoch": 60.38, "learning_rate": 1.982028606392097e-05, "loss": 1.9915, "step": 12187500 }, { "epoch": 60.38, "learning_rate": 1.9819047477494885e-05, "loss": 1.9739, "step": 12188000 }, { "epoch": 60.39, "learning_rate": 1.9817811368241653e-05, "loss": 2.0189, "step": 12188500 }, { "epoch": 60.39, "learning_rate": 1.9816575258988422e-05, "loss": 2.0163, "step": 12189000 }, { "epoch": 60.39, "learning_rate": 1.981533667256234e-05, "loss": 2.0215, "step": 12189500 }, { "epoch": 60.39, "learning_rate": 1.9814098086136253e-05, "loss": 1.9815, "step": 12190000 }, { "epoch": 60.4, "learning_rate": 1.981285949971017e-05, "loss": 1.9803, "step": 12190500 }, { "epoch": 60.4, "learning_rate": 1.9811620913284087e-05, "loss": 1.9897, "step": 12191000 }, { "epoch": 60.4, "learning_rate": 1.9810382326858003e-05, "loss": 1.9841, "step": 12191500 }, { "epoch": 60.4, "learning_rate": 1.980914374043192e-05, "loss": 2.0187, "step": 12192000 }, { "epoch": 60.41, "learning_rate": 1.980790763117869e-05, "loss": 2.0132, "step": 12192500 }, { "epoch": 60.41, "learning_rate": 1.9806669044752603e-05, "loss": 1.9893, "step": 12193000 }, { "epoch": 60.41, "learning_rate": 1.980543045832652e-05, "loss": 1.9975, "step": 12193500 }, { "epoch": 60.41, "learning_rate": 1.9804191871900437e-05, "loss": 1.978, "step": 12194000 }, { "epoch": 60.42, "learning_rate": 1.9802953285474354e-05, "loss": 1.9828, "step": 12194500 }, { "epoch": 60.42, "learning_rate": 1.980171469904827e-05, "loss": 1.9933, "step": 12195000 }, { "epoch": 60.42, "learning_rate": 1.9800476112622187e-05, "loss": 1.9878, "step": 12195500 }, { "epoch": 60.42, "learning_rate": 1.9799237526196104e-05, "loss": 1.9957, "step": 12196000 }, { "epoch": 60.43, "learning_rate": 1.979799893977002e-05, "loss": 1.9988, "step": 12196500 }, { "epoch": 60.43, "learning_rate": 1.9796760353343938e-05, "loss": 2.0112, "step": 12197000 }, { "epoch": 60.43, "learning_rate": 1.979552176691785e-05, "loss": 2.0145, "step": 12197500 }, { "epoch": 60.43, "learning_rate": 1.979428318049177e-05, "loss": 1.9865, "step": 12198000 }, { "epoch": 60.44, "learning_rate": 1.9793044594065686e-05, "loss": 1.9867, "step": 12198500 }, { "epoch": 60.44, "learning_rate": 1.9791806007639602e-05, "loss": 1.9725, "step": 12199000 }, { "epoch": 60.44, "learning_rate": 1.979056742121352e-05, "loss": 1.9805, "step": 12199500 }, { "epoch": 60.44, "learning_rate": 1.9789328834787436e-05, "loss": 1.9887, "step": 12200000 }, { "epoch": 60.45, "learning_rate": 1.9788090248361353e-05, "loss": 1.9787, "step": 12200500 }, { "epoch": 60.45, "learning_rate": 1.978685413910812e-05, "loss": 1.9962, "step": 12201000 }, { "epoch": 60.45, "learning_rate": 1.9785615552682036e-05, "loss": 1.9895, "step": 12201500 }, { "epoch": 60.45, "learning_rate": 1.9784376966255953e-05, "loss": 2.0001, "step": 12202000 }, { "epoch": 60.46, "learning_rate": 1.978313837982987e-05, "loss": 1.9988, "step": 12202500 }, { "epoch": 60.46, "learning_rate": 1.9781902270576638e-05, "loss": 1.997, "step": 12203000 }, { "epoch": 60.46, "learning_rate": 1.9780663684150555e-05, "loss": 1.9795, "step": 12203500 }, { "epoch": 60.46, "learning_rate": 1.977942509772447e-05, "loss": 1.99, "step": 12204000 }, { "epoch": 60.47, "learning_rate": 1.9778186511298386e-05, "loss": 1.9982, "step": 12204500 }, { "epoch": 60.47, "learning_rate": 1.9776950402045154e-05, "loss": 2.0249, "step": 12205000 }, { "epoch": 60.47, "learning_rate": 1.977571181561907e-05, "loss": 1.9928, "step": 12205500 }, { "epoch": 60.47, "learning_rate": 1.977447322919299e-05, "loss": 2.0076, "step": 12206000 }, { "epoch": 60.48, "learning_rate": 1.9773234642766905e-05, "loss": 1.9851, "step": 12206500 }, { "epoch": 60.48, "learning_rate": 1.977199605634082e-05, "loss": 2.015, "step": 12207000 }, { "epoch": 60.48, "learning_rate": 1.9770757469914736e-05, "loss": 1.9792, "step": 12207500 }, { "epoch": 60.48, "learning_rate": 1.9769518883488653e-05, "loss": 1.9802, "step": 12208000 }, { "epoch": 60.49, "learning_rate": 1.976828029706257e-05, "loss": 1.9724, "step": 12208500 }, { "epoch": 60.49, "learning_rate": 1.976704418780934e-05, "loss": 2.0023, "step": 12209000 }, { "epoch": 60.49, "learning_rate": 1.9765805601383255e-05, "loss": 1.9773, "step": 12209500 }, { "epoch": 60.49, "learning_rate": 1.976456701495717e-05, "loss": 2.0057, "step": 12210000 }, { "epoch": 60.5, "learning_rate": 1.9763328428531086e-05, "loss": 1.9926, "step": 12210500 }, { "epoch": 60.5, "learning_rate": 1.9762089842105003e-05, "loss": 2.0027, "step": 12211000 }, { "epoch": 60.5, "learning_rate": 1.976085373285177e-05, "loss": 2.0002, "step": 12211500 }, { "epoch": 60.5, "learning_rate": 1.975961514642569e-05, "loss": 2.0015, "step": 12212000 }, { "epoch": 60.5, "learning_rate": 1.9758376559999605e-05, "loss": 2.0218, "step": 12212500 }, { "epoch": 60.51, "learning_rate": 1.9757140450746374e-05, "loss": 1.9921, "step": 12213000 }, { "epoch": 60.51, "learning_rate": 1.9755901864320288e-05, "loss": 2.0083, "step": 12213500 }, { "epoch": 60.51, "learning_rate": 1.9754663277894205e-05, "loss": 1.9776, "step": 12214000 }, { "epoch": 60.51, "learning_rate": 1.9753427168640977e-05, "loss": 2.0131, "step": 12214500 }, { "epoch": 60.52, "learning_rate": 1.975218858221489e-05, "loss": 2.0046, "step": 12215000 }, { "epoch": 60.52, "learning_rate": 1.9750949995788807e-05, "loss": 2.0119, "step": 12215500 }, { "epoch": 60.52, "learning_rate": 1.9749711409362724e-05, "loss": 2.0057, "step": 12216000 }, { "epoch": 60.52, "learning_rate": 1.9748472822936638e-05, "loss": 2.0056, "step": 12216500 }, { "epoch": 60.53, "learning_rate": 1.9747234236510555e-05, "loss": 2.0059, "step": 12217000 }, { "epoch": 60.53, "learning_rate": 1.974599565008447e-05, "loss": 2.0138, "step": 12217500 }, { "epoch": 60.53, "learning_rate": 1.974475706365839e-05, "loss": 1.9974, "step": 12218000 }, { "epoch": 60.53, "learning_rate": 1.9743518477232306e-05, "loss": 1.9938, "step": 12218500 }, { "epoch": 60.54, "learning_rate": 1.9742279890806222e-05, "loss": 2.0024, "step": 12219000 }, { "epoch": 60.54, "learning_rate": 1.9741041304380136e-05, "loss": 1.9745, "step": 12219500 }, { "epoch": 60.54, "learning_rate": 1.9739802717954053e-05, "loss": 1.9878, "step": 12220000 }, { "epoch": 60.54, "learning_rate": 1.973856413152797e-05, "loss": 1.9645, "step": 12220500 }, { "epoch": 60.55, "learning_rate": 1.973732802227474e-05, "loss": 1.9739, "step": 12221000 }, { "epoch": 60.55, "learning_rate": 1.9736089435848656e-05, "loss": 2.0134, "step": 12221500 }, { "epoch": 60.55, "learning_rate": 1.9734850849422572e-05, "loss": 1.9858, "step": 12222000 }, { "epoch": 60.55, "learning_rate": 1.9733617217342193e-05, "loss": 1.9856, "step": 12222500 }, { "epoch": 60.56, "learning_rate": 1.973237863091611e-05, "loss": 2.0089, "step": 12223000 }, { "epoch": 60.56, "learning_rate": 1.9731142521662876e-05, "loss": 1.9691, "step": 12223500 }, { "epoch": 60.56, "learning_rate": 1.9729903935236792e-05, "loss": 2.0034, "step": 12224000 }, { "epoch": 60.56, "learning_rate": 1.972866534881071e-05, "loss": 1.9845, "step": 12224500 }, { "epoch": 60.57, "learning_rate": 1.9727426762384626e-05, "loss": 2.0044, "step": 12225000 }, { "epoch": 60.57, "learning_rate": 1.9726188175958543e-05, "loss": 1.9884, "step": 12225500 }, { "epoch": 60.57, "learning_rate": 1.972494958953246e-05, "loss": 1.9916, "step": 12226000 }, { "epoch": 60.57, "learning_rate": 1.9723711003106377e-05, "loss": 1.9913, "step": 12226500 }, { "epoch": 60.58, "learning_rate": 1.9722472416680294e-05, "loss": 1.9715, "step": 12227000 }, { "epoch": 60.58, "learning_rate": 1.9721233830254208e-05, "loss": 1.993, "step": 12227500 }, { "epoch": 60.58, "learning_rate": 1.9719997721000976e-05, "loss": 2.0053, "step": 12228000 }, { "epoch": 60.58, "learning_rate": 1.9718759134574893e-05, "loss": 1.9767, "step": 12228500 }, { "epoch": 60.59, "learning_rate": 1.971752054814881e-05, "loss": 1.9719, "step": 12229000 }, { "epoch": 60.59, "learning_rate": 1.9716281961722727e-05, "loss": 1.9964, "step": 12229500 }, { "epoch": 60.59, "learning_rate": 1.9715043375296644e-05, "loss": 2.0065, "step": 12230000 }, { "epoch": 60.59, "learning_rate": 1.9713804788870558e-05, "loss": 2.0043, "step": 12230500 }, { "epoch": 60.6, "learning_rate": 1.9712566202444475e-05, "loss": 1.9784, "step": 12231000 }, { "epoch": 60.6, "learning_rate": 1.971132761601839e-05, "loss": 1.9924, "step": 12231500 }, { "epoch": 60.6, "learning_rate": 1.971008902959231e-05, "loss": 2.0179, "step": 12232000 }, { "epoch": 60.6, "learning_rate": 1.9708850443166225e-05, "loss": 1.9839, "step": 12232500 }, { "epoch": 60.61, "learning_rate": 1.9707611856740142e-05, "loss": 1.9721, "step": 12233000 }, { "epoch": 60.61, "learning_rate": 1.970637822465976e-05, "loss": 1.9906, "step": 12233500 }, { "epoch": 60.61, "learning_rate": 1.9705139638233677e-05, "loss": 2.0147, "step": 12234000 }, { "epoch": 60.61, "learning_rate": 1.9703901051807593e-05, "loss": 2.0114, "step": 12234500 }, { "epoch": 60.62, "learning_rate": 1.970266246538151e-05, "loss": 2.0032, "step": 12235000 }, { "epoch": 60.62, "learning_rate": 1.9701423878955427e-05, "loss": 1.9978, "step": 12235500 }, { "epoch": 60.62, "learning_rate": 1.9700185292529344e-05, "loss": 1.9937, "step": 12236000 }, { "epoch": 60.62, "learning_rate": 1.969894670610326e-05, "loss": 2.0078, "step": 12236500 }, { "epoch": 60.63, "learning_rate": 1.9697708119677175e-05, "loss": 2.0013, "step": 12237000 }, { "epoch": 60.63, "learning_rate": 1.969646953325109e-05, "loss": 1.9912, "step": 12237500 }, { "epoch": 60.63, "learning_rate": 1.969523094682501e-05, "loss": 1.9975, "step": 12238000 }, { "epoch": 60.63, "learning_rate": 1.9693994837571777e-05, "loss": 2.0008, "step": 12238500 }, { "epoch": 60.64, "learning_rate": 1.9692756251145694e-05, "loss": 2.0044, "step": 12239000 }, { "epoch": 60.64, "learning_rate": 1.969151766471961e-05, "loss": 1.9915, "step": 12239500 }, { "epoch": 60.64, "learning_rate": 1.9690281555466377e-05, "loss": 1.9897, "step": 12240000 }, { "epoch": 60.64, "learning_rate": 1.9689042969040294e-05, "loss": 1.984, "step": 12240500 }, { "epoch": 60.65, "learning_rate": 1.968780438261421e-05, "loss": 1.9692, "step": 12241000 }, { "epoch": 60.65, "learning_rate": 1.968656827336098e-05, "loss": 2.0092, "step": 12241500 }, { "epoch": 60.65, "learning_rate": 1.9685329686934896e-05, "loss": 1.9905, "step": 12242000 }, { "epoch": 60.65, "learning_rate": 1.968409110050881e-05, "loss": 2.0248, "step": 12242500 }, { "epoch": 60.66, "learning_rate": 1.9682852514082727e-05, "loss": 1.9986, "step": 12243000 }, { "epoch": 60.66, "learning_rate": 1.9681613927656644e-05, "loss": 1.9886, "step": 12243500 }, { "epoch": 60.66, "learning_rate": 1.968037534123056e-05, "loss": 1.9823, "step": 12244000 }, { "epoch": 60.66, "learning_rate": 1.9679136754804477e-05, "loss": 1.9783, "step": 12244500 }, { "epoch": 60.67, "learning_rate": 1.9677898168378394e-05, "loss": 2.023, "step": 12245000 }, { "epoch": 60.67, "learning_rate": 1.967665958195231e-05, "loss": 1.9844, "step": 12245500 }, { "epoch": 60.67, "learning_rate": 1.9675420995526228e-05, "loss": 2.0157, "step": 12246000 }, { "epoch": 60.67, "learning_rate": 1.9674182409100142e-05, "loss": 1.9817, "step": 12246500 }, { "epoch": 60.68, "learning_rate": 1.967294382267406e-05, "loss": 1.9906, "step": 12247000 }, { "epoch": 60.68, "learning_rate": 1.9671705236247976e-05, "loss": 2.0083, "step": 12247500 }, { "epoch": 60.68, "learning_rate": 1.9670469126994744e-05, "loss": 1.9891, "step": 12248000 }, { "epoch": 60.68, "learning_rate": 1.9669233017741513e-05, "loss": 2.004, "step": 12248500 }, { "epoch": 60.69, "learning_rate": 1.966799443131543e-05, "loss": 1.9897, "step": 12249000 }, { "epoch": 60.69, "learning_rate": 1.9666755844889344e-05, "loss": 2.0011, "step": 12249500 }, { "epoch": 60.69, "learning_rate": 1.966551725846326e-05, "loss": 1.9902, "step": 12250000 }, { "epoch": 60.69, "learning_rate": 1.9664278672037178e-05, "loss": 1.9675, "step": 12250500 }, { "epoch": 60.7, "learning_rate": 1.9663040085611095e-05, "loss": 1.988, "step": 12251000 }, { "epoch": 60.7, "learning_rate": 1.966180149918501e-05, "loss": 1.9818, "step": 12251500 }, { "epoch": 60.7, "learning_rate": 1.966056291275893e-05, "loss": 1.9839, "step": 12252000 }, { "epoch": 60.7, "learning_rate": 1.9659324326332842e-05, "loss": 1.9905, "step": 12252500 }, { "epoch": 60.71, "learning_rate": 1.965808573990676e-05, "loss": 1.9958, "step": 12253000 }, { "epoch": 60.71, "learning_rate": 1.9656847153480676e-05, "loss": 2.0003, "step": 12253500 }, { "epoch": 60.71, "learning_rate": 1.9655611044227445e-05, "loss": 1.9908, "step": 12254000 }, { "epoch": 60.71, "learning_rate": 1.9654374934974213e-05, "loss": 2.0082, "step": 12254500 }, { "epoch": 60.72, "learning_rate": 1.965313634854813e-05, "loss": 1.9838, "step": 12255000 }, { "epoch": 60.72, "learning_rate": 1.9651897762122047e-05, "loss": 1.9759, "step": 12255500 }, { "epoch": 60.72, "learning_rate": 1.965065917569596e-05, "loss": 2.0045, "step": 12256000 }, { "epoch": 60.72, "learning_rate": 1.9649420589269878e-05, "loss": 1.9844, "step": 12256500 }, { "epoch": 60.73, "learning_rate": 1.964818448001665e-05, "loss": 1.9942, "step": 12257000 }, { "epoch": 60.73, "learning_rate": 1.9646945893590563e-05, "loss": 2.0006, "step": 12257500 }, { "epoch": 60.73, "learning_rate": 1.964570730716448e-05, "loss": 1.9966, "step": 12258000 }, { "epoch": 60.73, "learning_rate": 1.9644468720738397e-05, "loss": 2.0042, "step": 12258500 }, { "epoch": 60.74, "learning_rate": 1.9643230134312314e-05, "loss": 1.9821, "step": 12259000 }, { "epoch": 60.74, "learning_rate": 1.9641991547886228e-05, "loss": 2.0095, "step": 12259500 }, { "epoch": 60.74, "learning_rate": 1.9640752961460145e-05, "loss": 1.9944, "step": 12260000 }, { "epoch": 60.74, "learning_rate": 1.9639516852206917e-05, "loss": 2.0085, "step": 12260500 }, { "epoch": 60.75, "learning_rate": 1.963827826578083e-05, "loss": 1.9946, "step": 12261000 }, { "epoch": 60.75, "learning_rate": 1.9637039679354747e-05, "loss": 1.9852, "step": 12261500 }, { "epoch": 60.75, "learning_rate": 1.9635801092928664e-05, "loss": 1.9885, "step": 12262000 }, { "epoch": 60.75, "learning_rate": 1.963456250650258e-05, "loss": 2.0077, "step": 12262500 }, { "epoch": 60.76, "learning_rate": 1.9633323920076495e-05, "loss": 1.9748, "step": 12263000 }, { "epoch": 60.76, "learning_rate": 1.9632085333650412e-05, "loss": 1.9807, "step": 12263500 }, { "epoch": 60.76, "learning_rate": 1.963084922439718e-05, "loss": 2.005, "step": 12264000 }, { "epoch": 60.76, "learning_rate": 1.9629610637971097e-05, "loss": 1.9985, "step": 12264500 }, { "epoch": 60.77, "learning_rate": 1.9628372051545014e-05, "loss": 1.9798, "step": 12265000 }, { "epoch": 60.77, "learning_rate": 1.962713346511893e-05, "loss": 2.0026, "step": 12265500 }, { "epoch": 60.77, "learning_rate": 1.9625894878692845e-05, "loss": 1.9917, "step": 12266000 }, { "epoch": 60.77, "learning_rate": 1.9624656292266762e-05, "loss": 2.001, "step": 12266500 }, { "epoch": 60.77, "learning_rate": 1.962341770584068e-05, "loss": 2.0135, "step": 12267000 }, { "epoch": 60.78, "learning_rate": 1.9622179119414596e-05, "loss": 1.9911, "step": 12267500 }, { "epoch": 60.78, "learning_rate": 1.962094053298851e-05, "loss": 1.9819, "step": 12268000 }, { "epoch": 60.78, "learning_rate": 1.9619701946562426e-05, "loss": 2.0214, "step": 12268500 }, { "epoch": 60.78, "learning_rate": 1.9618463360136343e-05, "loss": 1.9735, "step": 12269000 }, { "epoch": 60.79, "learning_rate": 1.9617227250883112e-05, "loss": 2.0202, "step": 12269500 }, { "epoch": 60.79, "learning_rate": 1.961598866445703e-05, "loss": 2.0012, "step": 12270000 }, { "epoch": 60.79, "learning_rate": 1.9614752555203798e-05, "loss": 1.9877, "step": 12270500 }, { "epoch": 60.79, "learning_rate": 1.9613513968777714e-05, "loss": 1.9948, "step": 12271000 }, { "epoch": 60.8, "learning_rate": 1.961227538235163e-05, "loss": 2.0001, "step": 12271500 }, { "epoch": 60.8, "learning_rate": 1.961103679592555e-05, "loss": 2.0134, "step": 12272000 }, { "epoch": 60.8, "learning_rate": 1.9609798209499465e-05, "loss": 2.0008, "step": 12272500 }, { "epoch": 60.8, "learning_rate": 1.960855962307338e-05, "loss": 1.9906, "step": 12273000 }, { "epoch": 60.81, "learning_rate": 1.9607321036647296e-05, "loss": 2.0086, "step": 12273500 }, { "epoch": 60.81, "learning_rate": 1.9606084927394065e-05, "loss": 2.0052, "step": 12274000 }, { "epoch": 60.81, "learning_rate": 1.960484634096798e-05, "loss": 1.9988, "step": 12274500 }, { "epoch": 60.81, "learning_rate": 1.96036077545419e-05, "loss": 2.0077, "step": 12275000 }, { "epoch": 60.82, "learning_rate": 1.9602369168115815e-05, "loss": 1.9928, "step": 12275500 }, { "epoch": 60.82, "learning_rate": 1.9601130581689732e-05, "loss": 1.9982, "step": 12276000 }, { "epoch": 60.82, "learning_rate": 1.9599891995263646e-05, "loss": 2.0286, "step": 12276500 }, { "epoch": 60.82, "learning_rate": 1.9598653408837563e-05, "loss": 1.9863, "step": 12277000 }, { "epoch": 60.83, "learning_rate": 1.9597414822411476e-05, "loss": 1.99, "step": 12277500 }, { "epoch": 60.83, "learning_rate": 1.959617871315825e-05, "loss": 1.9867, "step": 12278000 }, { "epoch": 60.83, "learning_rate": 1.9594942603905017e-05, "loss": 2.0101, "step": 12278500 }, { "epoch": 60.83, "learning_rate": 1.9593704017478934e-05, "loss": 1.9916, "step": 12279000 }, { "epoch": 60.84, "learning_rate": 1.9592465431052848e-05, "loss": 1.9965, "step": 12279500 }, { "epoch": 60.84, "learning_rate": 1.9591226844626765e-05, "loss": 2.0006, "step": 12280000 }, { "epoch": 60.84, "learning_rate": 1.958998825820068e-05, "loss": 1.9975, "step": 12280500 }, { "epoch": 60.84, "learning_rate": 1.95887496717746e-05, "loss": 1.9817, "step": 12281000 }, { "epoch": 60.85, "learning_rate": 1.9587511085348515e-05, "loss": 1.9876, "step": 12281500 }, { "epoch": 60.85, "learning_rate": 1.9586272498922432e-05, "loss": 2.0052, "step": 12282000 }, { "epoch": 60.85, "learning_rate": 1.958503391249635e-05, "loss": 1.9909, "step": 12282500 }, { "epoch": 60.85, "learning_rate": 1.9583795326070263e-05, "loss": 1.9935, "step": 12283000 }, { "epoch": 60.86, "learning_rate": 1.958255673964418e-05, "loss": 2.0085, "step": 12283500 }, { "epoch": 60.86, "learning_rate": 1.9581318153218097e-05, "loss": 1.9737, "step": 12284000 }, { "epoch": 60.86, "learning_rate": 1.9580082043964866e-05, "loss": 2.0257, "step": 12284500 }, { "epoch": 60.86, "learning_rate": 1.9578843457538782e-05, "loss": 1.9979, "step": 12285000 }, { "epoch": 60.87, "learning_rate": 1.95776048711127e-05, "loss": 2.0127, "step": 12285500 }, { "epoch": 60.87, "learning_rate": 1.9576366284686616e-05, "loss": 2.014, "step": 12286000 }, { "epoch": 60.87, "learning_rate": 1.957512769826053e-05, "loss": 1.999, "step": 12286500 }, { "epoch": 60.87, "learning_rate": 1.9573889111834447e-05, "loss": 2.0023, "step": 12287000 }, { "epoch": 60.88, "learning_rate": 1.957265052540836e-05, "loss": 1.9748, "step": 12287500 }, { "epoch": 60.88, "learning_rate": 1.9571411938982277e-05, "loss": 2.0041, "step": 12288000 }, { "epoch": 60.88, "learning_rate": 1.9570173352556194e-05, "loss": 2.0097, "step": 12288500 }, { "epoch": 60.88, "learning_rate": 1.956893476613011e-05, "loss": 2.0028, "step": 12289000 }, { "epoch": 60.89, "learning_rate": 1.9567698656876883e-05, "loss": 2.0102, "step": 12289500 }, { "epoch": 60.89, "learning_rate": 1.956646254762365e-05, "loss": 1.9903, "step": 12290000 }, { "epoch": 60.89, "learning_rate": 1.9565223961197566e-05, "loss": 2.006, "step": 12290500 }, { "epoch": 60.89, "learning_rate": 1.9563987851944334e-05, "loss": 1.9904, "step": 12291000 }, { "epoch": 60.9, "learning_rate": 1.956274926551825e-05, "loss": 1.9948, "step": 12291500 }, { "epoch": 60.9, "learning_rate": 1.9561510679092165e-05, "loss": 1.966, "step": 12292000 }, { "epoch": 60.9, "learning_rate": 1.9560272092666082e-05, "loss": 1.9913, "step": 12292500 }, { "epoch": 60.9, "learning_rate": 1.955903598341285e-05, "loss": 2.0031, "step": 12293000 }, { "epoch": 60.91, "learning_rate": 1.9557797396986768e-05, "loss": 1.985, "step": 12293500 }, { "epoch": 60.91, "learning_rate": 1.9556558810560685e-05, "loss": 1.9666, "step": 12294000 }, { "epoch": 60.91, "learning_rate": 1.95553202241346e-05, "loss": 1.9871, "step": 12294500 }, { "epoch": 60.91, "learning_rate": 1.955408411488137e-05, "loss": 2.0178, "step": 12295000 }, { "epoch": 60.92, "learning_rate": 1.9552845528455284e-05, "loss": 2.0051, "step": 12295500 }, { "epoch": 60.92, "learning_rate": 1.95516069420292e-05, "loss": 2.0158, "step": 12296000 }, { "epoch": 60.92, "learning_rate": 1.9550370832775973e-05, "loss": 2.0013, "step": 12296500 }, { "epoch": 60.92, "learning_rate": 1.9549132246349886e-05, "loss": 1.9957, "step": 12297000 }, { "epoch": 60.93, "learning_rate": 1.9547893659923803e-05, "loss": 1.9725, "step": 12297500 }, { "epoch": 60.93, "learning_rate": 1.954665507349772e-05, "loss": 2.0002, "step": 12298000 }, { "epoch": 60.93, "learning_rate": 1.9545416487071634e-05, "loss": 2.0056, "step": 12298500 }, { "epoch": 60.93, "learning_rate": 1.954417790064555e-05, "loss": 2.0094, "step": 12299000 }, { "epoch": 60.94, "learning_rate": 1.9542939314219468e-05, "loss": 2.0119, "step": 12299500 }, { "epoch": 60.94, "learning_rate": 1.9541700727793385e-05, "loss": 1.9901, "step": 12300000 }, { "epoch": 60.94, "learning_rate": 1.9540464618540153e-05, "loss": 2.0035, "step": 12300500 }, { "epoch": 60.94, "learning_rate": 1.953922603211407e-05, "loss": 2.0084, "step": 12301000 }, { "epoch": 60.95, "learning_rate": 1.9537987445687987e-05, "loss": 1.9661, "step": 12301500 }, { "epoch": 60.95, "learning_rate": 1.95367488592619e-05, "loss": 1.986, "step": 12302000 }, { "epoch": 60.95, "learning_rate": 1.9535510272835818e-05, "loss": 2.0113, "step": 12302500 }, { "epoch": 60.95, "learning_rate": 1.953427416358259e-05, "loss": 2.0014, "step": 12303000 }, { "epoch": 60.96, "learning_rate": 1.9533035577156504e-05, "loss": 2.0098, "step": 12303500 }, { "epoch": 60.96, "learning_rate": 1.953179699073042e-05, "loss": 1.9961, "step": 12304000 }, { "epoch": 60.96, "learning_rate": 1.953056088147719e-05, "loss": 1.9949, "step": 12304500 }, { "epoch": 60.96, "learning_rate": 1.9529322295051106e-05, "loss": 2.0036, "step": 12305000 }, { "epoch": 60.97, "learning_rate": 1.9528083708625023e-05, "loss": 2.0169, "step": 12305500 }, { "epoch": 60.97, "learning_rate": 1.952684512219894e-05, "loss": 2.0176, "step": 12306000 }, { "epoch": 60.97, "learning_rate": 1.9525606535772854e-05, "loss": 2.0064, "step": 12306500 }, { "epoch": 60.97, "learning_rate": 1.952436794934677e-05, "loss": 2.0013, "step": 12307000 }, { "epoch": 60.98, "learning_rate": 1.9523129362920687e-05, "loss": 2.0016, "step": 12307500 }, { "epoch": 60.98, "learning_rate": 1.9521890776494604e-05, "loss": 2.0048, "step": 12308000 }, { "epoch": 60.98, "learning_rate": 1.952065219006852e-05, "loss": 2.019, "step": 12308500 }, { "epoch": 60.98, "learning_rate": 1.9519413603642435e-05, "loss": 2.0177, "step": 12309000 }, { "epoch": 60.99, "learning_rate": 1.9518175017216352e-05, "loss": 1.9882, "step": 12309500 }, { "epoch": 60.99, "learning_rate": 1.951693643079027e-05, "loss": 1.9735, "step": 12310000 }, { "epoch": 60.99, "learning_rate": 1.9515697844364186e-05, "loss": 2.0114, "step": 12310500 }, { "epoch": 60.99, "learning_rate": 1.9514461735110954e-05, "loss": 2.0024, "step": 12311000 }, { "epoch": 61.0, "learning_rate": 1.951322314868487e-05, "loss": 1.9987, "step": 12311500 }, { "epoch": 61.0, "learning_rate": 1.9511984562258785e-05, "loss": 2.0092, "step": 12312000 }, { "epoch": 61.0, "eval_accuracy": 0.6731576153865696, "eval_accuracy_mlm": 0.6323841295524992, "eval_accuracy_nsp": 0.8652920665675657, "eval_loss": 2.305525302886963, "eval_runtime": 146.5793, "eval_samples_per_second": 1739.393, "eval_steps_per_second": 72.48, "step": 12312423 }, { "epoch": 61.0, "learning_rate": 1.9510745975832702e-05, "loss": 1.9919, "step": 12312500 }, { "epoch": 61.0, "learning_rate": 1.950950738940662e-05, "loss": 1.9971, "step": 12313000 }, { "epoch": 61.01, "learning_rate": 1.9508268802980536e-05, "loss": 1.9804, "step": 12313500 }, { "epoch": 61.01, "learning_rate": 1.9507032693727304e-05, "loss": 1.9713, "step": 12314000 }, { "epoch": 61.01, "learning_rate": 1.950579410730122e-05, "loss": 1.9857, "step": 12314500 }, { "epoch": 61.01, "learning_rate": 1.950456047522084e-05, "loss": 1.9462, "step": 12315000 }, { "epoch": 61.02, "learning_rate": 1.9503321888794756e-05, "loss": 1.9679, "step": 12315500 }, { "epoch": 61.02, "learning_rate": 1.9502083302368673e-05, "loss": 1.9591, "step": 12316000 }, { "epoch": 61.02, "learning_rate": 1.950084471594259e-05, "loss": 1.9822, "step": 12316500 }, { "epoch": 61.02, "learning_rate": 1.9499606129516506e-05, "loss": 1.951, "step": 12317000 }, { "epoch": 61.03, "learning_rate": 1.9498370020263272e-05, "loss": 2.0099, "step": 12317500 }, { "epoch": 61.03, "learning_rate": 1.949713143383719e-05, "loss": 1.9837, "step": 12318000 }, { "epoch": 61.03, "learning_rate": 1.9495892847411106e-05, "loss": 1.9694, "step": 12318500 }, { "epoch": 61.03, "learning_rate": 1.9494654260985023e-05, "loss": 1.9543, "step": 12319000 }, { "epoch": 61.04, "learning_rate": 1.949341567455894e-05, "loss": 1.986, "step": 12319500 }, { "epoch": 61.04, "learning_rate": 1.9492177088132856e-05, "loss": 1.9697, "step": 12320000 }, { "epoch": 61.04, "learning_rate": 1.9490938501706773e-05, "loss": 2.0174, "step": 12320500 }, { "epoch": 61.04, "learning_rate": 1.948970239245354e-05, "loss": 2.0077, "step": 12321000 }, { "epoch": 61.04, "learning_rate": 1.9488463806027456e-05, "loss": 1.9961, "step": 12321500 }, { "epoch": 61.05, "learning_rate": 1.9487225219601373e-05, "loss": 1.9782, "step": 12322000 }, { "epoch": 61.05, "learning_rate": 1.9485989110348145e-05, "loss": 1.9958, "step": 12322500 }, { "epoch": 61.05, "learning_rate": 1.948475052392206e-05, "loss": 1.9658, "step": 12323000 }, { "epoch": 61.05, "learning_rate": 1.9483511937495975e-05, "loss": 1.9744, "step": 12323500 }, { "epoch": 61.06, "learning_rate": 1.9482273351069892e-05, "loss": 1.9714, "step": 12324000 }, { "epoch": 61.06, "learning_rate": 1.9481034764643806e-05, "loss": 1.9415, "step": 12324500 }, { "epoch": 61.06, "learning_rate": 1.9479796178217723e-05, "loss": 1.97, "step": 12325000 }, { "epoch": 61.06, "learning_rate": 1.947855759179164e-05, "loss": 1.9906, "step": 12325500 }, { "epoch": 61.07, "learning_rate": 1.9477321482538412e-05, "loss": 1.9621, "step": 12326000 }, { "epoch": 61.07, "learning_rate": 1.9476082896112325e-05, "loss": 1.9779, "step": 12326500 }, { "epoch": 61.07, "learning_rate": 1.9474844309686242e-05, "loss": 1.9803, "step": 12327000 }, { "epoch": 61.07, "learning_rate": 1.947360572326016e-05, "loss": 1.9753, "step": 12327500 }, { "epoch": 61.08, "learning_rate": 1.9472367136834073e-05, "loss": 1.9657, "step": 12328000 }, { "epoch": 61.08, "learning_rate": 1.947112855040799e-05, "loss": 1.9931, "step": 12328500 }, { "epoch": 61.08, "learning_rate": 1.9469889963981907e-05, "loss": 1.9747, "step": 12329000 }, { "epoch": 61.08, "learning_rate": 1.9468651377555824e-05, "loss": 1.9692, "step": 12329500 }, { "epoch": 61.09, "learning_rate": 1.946741279112974e-05, "loss": 1.9924, "step": 12330000 }, { "epoch": 61.09, "learning_rate": 1.9466174204703657e-05, "loss": 1.9841, "step": 12330500 }, { "epoch": 61.09, "learning_rate": 1.9464935618277574e-05, "loss": 1.9759, "step": 12331000 }, { "epoch": 61.09, "learning_rate": 1.9463697031851488e-05, "loss": 1.9805, "step": 12331500 }, { "epoch": 61.1, "learning_rate": 1.9462458445425405e-05, "loss": 1.9788, "step": 12332000 }, { "epoch": 61.1, "learning_rate": 1.9461219858999322e-05, "loss": 1.9936, "step": 12332500 }, { "epoch": 61.1, "learning_rate": 1.945998127257324e-05, "loss": 1.9875, "step": 12333000 }, { "epoch": 61.1, "learning_rate": 1.9458745163320008e-05, "loss": 1.99, "step": 12333500 }, { "epoch": 61.11, "learning_rate": 1.9457509054066776e-05, "loss": 2.0022, "step": 12334000 }, { "epoch": 61.11, "learning_rate": 1.945627046764069e-05, "loss": 1.9637, "step": 12334500 }, { "epoch": 61.11, "learning_rate": 1.9455031881214607e-05, "loss": 1.9829, "step": 12335000 }, { "epoch": 61.11, "learning_rate": 1.9453793294788524e-05, "loss": 1.9785, "step": 12335500 }, { "epoch": 61.12, "learning_rate": 1.945255470836244e-05, "loss": 1.9831, "step": 12336000 }, { "epoch": 61.12, "learning_rate": 1.9451316121936358e-05, "loss": 1.9741, "step": 12336500 }, { "epoch": 61.12, "learning_rate": 1.9450077535510275e-05, "loss": 1.9964, "step": 12337000 }, { "epoch": 61.12, "learning_rate": 1.9448841426257043e-05, "loss": 1.9673, "step": 12337500 }, { "epoch": 61.13, "learning_rate": 1.9447602839830957e-05, "loss": 1.9768, "step": 12338000 }, { "epoch": 61.13, "learning_rate": 1.9446364253404874e-05, "loss": 1.9559, "step": 12338500 }, { "epoch": 61.13, "learning_rate": 1.944512566697879e-05, "loss": 1.9697, "step": 12339000 }, { "epoch": 61.13, "learning_rate": 1.9443887080552708e-05, "loss": 2.005, "step": 12339500 }, { "epoch": 61.14, "learning_rate": 1.9442650971299476e-05, "loss": 1.9989, "step": 12340000 }, { "epoch": 61.14, "learning_rate": 1.9441412384873393e-05, "loss": 1.98, "step": 12340500 }, { "epoch": 61.14, "learning_rate": 1.944017379844731e-05, "loss": 1.9659, "step": 12341000 }, { "epoch": 61.14, "learning_rate": 1.9438935212021224e-05, "loss": 1.9914, "step": 12341500 }, { "epoch": 61.15, "learning_rate": 1.943769662559514e-05, "loss": 1.9796, "step": 12342000 }, { "epoch": 61.15, "learning_rate": 1.9436458039169058e-05, "loss": 1.9423, "step": 12342500 }, { "epoch": 61.15, "learning_rate": 1.9435219452742975e-05, "loss": 1.9903, "step": 12343000 }, { "epoch": 61.15, "learning_rate": 1.943398086631689e-05, "loss": 1.9952, "step": 12343500 }, { "epoch": 61.16, "learning_rate": 1.9432742279890805e-05, "loss": 1.9998, "step": 12344000 }, { "epoch": 61.16, "learning_rate": 1.9431503693464722e-05, "loss": 1.9663, "step": 12344500 }, { "epoch": 61.16, "learning_rate": 1.943026510703864e-05, "loss": 1.9669, "step": 12345000 }, { "epoch": 61.16, "learning_rate": 1.9429026520612556e-05, "loss": 1.9783, "step": 12345500 }, { "epoch": 61.17, "learning_rate": 1.9427787934186473e-05, "loss": 1.9981, "step": 12346000 }, { "epoch": 61.17, "learning_rate": 1.942654934776039e-05, "loss": 1.9701, "step": 12346500 }, { "epoch": 61.17, "learning_rate": 1.9425313238507155e-05, "loss": 1.9703, "step": 12347000 }, { "epoch": 61.17, "learning_rate": 1.9424074652081072e-05, "loss": 1.9615, "step": 12347500 }, { "epoch": 61.18, "learning_rate": 1.942283606565499e-05, "loss": 1.9768, "step": 12348000 }, { "epoch": 61.18, "learning_rate": 1.9421597479228906e-05, "loss": 2.0039, "step": 12348500 }, { "epoch": 61.18, "learning_rate": 1.9420358892802823e-05, "loss": 2.0066, "step": 12349000 }, { "epoch": 61.18, "learning_rate": 1.9419122783549592e-05, "loss": 2.0025, "step": 12349500 }, { "epoch": 61.19, "learning_rate": 1.9417884197123505e-05, "loss": 1.9826, "step": 12350000 }, { "epoch": 61.19, "learning_rate": 1.9416645610697422e-05, "loss": 1.966, "step": 12350500 }, { "epoch": 61.19, "learning_rate": 1.941540702427134e-05, "loss": 1.9791, "step": 12351000 }, { "epoch": 61.19, "learning_rate": 1.9414170915018108e-05, "loss": 1.9814, "step": 12351500 }, { "epoch": 61.2, "learning_rate": 1.9412932328592025e-05, "loss": 1.9504, "step": 12352000 }, { "epoch": 61.2, "learning_rate": 1.9411693742165942e-05, "loss": 1.9766, "step": 12352500 }, { "epoch": 61.2, "learning_rate": 1.941045515573986e-05, "loss": 1.9588, "step": 12353000 }, { "epoch": 61.2, "learning_rate": 1.9409219046486627e-05, "loss": 1.9552, "step": 12353500 }, { "epoch": 61.21, "learning_rate": 1.9407980460060544e-05, "loss": 1.9913, "step": 12354000 }, { "epoch": 61.21, "learning_rate": 1.940674187363446e-05, "loss": 2.0166, "step": 12354500 }, { "epoch": 61.21, "learning_rate": 1.9405503287208375e-05, "loss": 1.9746, "step": 12355000 }, { "epoch": 61.21, "learning_rate": 1.9404264700782292e-05, "loss": 1.9766, "step": 12355500 }, { "epoch": 61.22, "learning_rate": 1.940302611435621e-05, "loss": 1.9679, "step": 12356000 }, { "epoch": 61.22, "learning_rate": 1.9401790005102978e-05, "loss": 2.0037, "step": 12356500 }, { "epoch": 61.22, "learning_rate": 1.9400551418676894e-05, "loss": 1.9713, "step": 12357000 }, { "epoch": 61.22, "learning_rate": 1.939931283225081e-05, "loss": 1.9661, "step": 12357500 }, { "epoch": 61.23, "learning_rate": 1.939807424582473e-05, "loss": 1.997, "step": 12358000 }, { "epoch": 61.23, "learning_rate": 1.9396835659398642e-05, "loss": 1.9801, "step": 12358500 }, { "epoch": 61.23, "learning_rate": 1.939559707297256e-05, "loss": 1.9771, "step": 12359000 }, { "epoch": 61.23, "learning_rate": 1.9394358486546472e-05, "loss": 1.9958, "step": 12359500 }, { "epoch": 61.24, "learning_rate": 1.939311990012039e-05, "loss": 1.9826, "step": 12360000 }, { "epoch": 61.24, "learning_rate": 1.9391881313694306e-05, "loss": 1.9562, "step": 12360500 }, { "epoch": 61.24, "learning_rate": 1.9390642727268223e-05, "loss": 2.0018, "step": 12361000 }, { "epoch": 61.24, "learning_rate": 1.938940414084214e-05, "loss": 1.9871, "step": 12361500 }, { "epoch": 61.25, "learning_rate": 1.9388165554416057e-05, "loss": 1.9975, "step": 12362000 }, { "epoch": 61.25, "learning_rate": 1.9386926967989974e-05, "loss": 1.9881, "step": 12362500 }, { "epoch": 61.25, "learning_rate": 1.938568838156389e-05, "loss": 1.9736, "step": 12363000 }, { "epoch": 61.25, "learning_rate": 1.9384449795137808e-05, "loss": 1.976, "step": 12363500 }, { "epoch": 61.26, "learning_rate": 1.9383213685884573e-05, "loss": 1.9967, "step": 12364000 }, { "epoch": 61.26, "learning_rate": 1.938197509945849e-05, "loss": 1.9676, "step": 12364500 }, { "epoch": 61.26, "learning_rate": 1.9380736513032407e-05, "loss": 1.9931, "step": 12365000 }, { "epoch": 61.26, "learning_rate": 1.9379497926606324e-05, "loss": 1.9991, "step": 12365500 }, { "epoch": 61.27, "learning_rate": 1.937825934018024e-05, "loss": 1.9785, "step": 12366000 }, { "epoch": 61.27, "learning_rate": 1.9377020753754158e-05, "loss": 1.9842, "step": 12366500 }, { "epoch": 61.27, "learning_rate": 1.9375784644500923e-05, "loss": 1.9729, "step": 12367000 }, { "epoch": 61.27, "learning_rate": 1.937454605807484e-05, "loss": 1.9731, "step": 12367500 }, { "epoch": 61.28, "learning_rate": 1.9373307471648757e-05, "loss": 1.9892, "step": 12368000 }, { "epoch": 61.28, "learning_rate": 1.9372071362395526e-05, "loss": 1.9678, "step": 12368500 }, { "epoch": 61.28, "learning_rate": 1.9370832775969443e-05, "loss": 1.9798, "step": 12369000 }, { "epoch": 61.28, "learning_rate": 1.9369594189543356e-05, "loss": 1.9755, "step": 12369500 }, { "epoch": 61.29, "learning_rate": 1.9368355603117273e-05, "loss": 1.9848, "step": 12370000 }, { "epoch": 61.29, "learning_rate": 1.936711701669119e-05, "loss": 1.9816, "step": 12370500 }, { "epoch": 61.29, "learning_rate": 1.9365880907437962e-05, "loss": 2.0236, "step": 12371000 }, { "epoch": 61.29, "learning_rate": 1.936464232101188e-05, "loss": 1.9708, "step": 12371500 }, { "epoch": 61.3, "learning_rate": 1.9363403734585793e-05, "loss": 1.9722, "step": 12372000 }, { "epoch": 61.3, "learning_rate": 1.936216514815971e-05, "loss": 1.9912, "step": 12372500 }, { "epoch": 61.3, "learning_rate": 1.9360926561733623e-05, "loss": 1.995, "step": 12373000 }, { "epoch": 61.3, "learning_rate": 1.935968797530754e-05, "loss": 1.9854, "step": 12373500 }, { "epoch": 61.31, "learning_rate": 1.9358449388881457e-05, "loss": 1.9875, "step": 12374000 }, { "epoch": 61.31, "learning_rate": 1.9357210802455374e-05, "loss": 2.0039, "step": 12374500 }, { "epoch": 61.31, "learning_rate": 1.9355974693202143e-05, "loss": 1.9977, "step": 12375000 }, { "epoch": 61.31, "learning_rate": 1.9354738583948912e-05, "loss": 1.9884, "step": 12375500 }, { "epoch": 61.31, "learning_rate": 1.935349999752283e-05, "loss": 1.9801, "step": 12376000 }, { "epoch": 61.32, "learning_rate": 1.9352261411096746e-05, "loss": 1.9813, "step": 12376500 }, { "epoch": 61.32, "learning_rate": 1.9351022824670663e-05, "loss": 1.9977, "step": 12377000 }, { "epoch": 61.32, "learning_rate": 1.9349786715417428e-05, "loss": 1.9955, "step": 12377500 }, { "epoch": 61.32, "learning_rate": 1.9348548128991345e-05, "loss": 1.9687, "step": 12378000 }, { "epoch": 61.33, "learning_rate": 1.9347309542565262e-05, "loss": 2.0112, "step": 12378500 }, { "epoch": 61.33, "learning_rate": 1.934607095613918e-05, "loss": 1.9562, "step": 12379000 }, { "epoch": 61.33, "learning_rate": 1.9344832369713096e-05, "loss": 1.9989, "step": 12379500 }, { "epoch": 61.33, "learning_rate": 1.9343598737632716e-05, "loss": 1.9758, "step": 12380000 }, { "epoch": 61.34, "learning_rate": 1.934236015120663e-05, "loss": 2.0219, "step": 12380500 }, { "epoch": 61.34, "learning_rate": 1.9341121564780547e-05, "loss": 1.9699, "step": 12381000 }, { "epoch": 61.34, "learning_rate": 1.9339882978354464e-05, "loss": 1.9829, "step": 12381500 }, { "epoch": 61.34, "learning_rate": 1.933864439192838e-05, "loss": 1.9942, "step": 12382000 }, { "epoch": 61.35, "learning_rate": 1.9337405805502298e-05, "loss": 1.985, "step": 12382500 }, { "epoch": 61.35, "learning_rate": 1.9336167219076215e-05, "loss": 1.995, "step": 12383000 }, { "epoch": 61.35, "learning_rate": 1.9334928632650128e-05, "loss": 1.971, "step": 12383500 }, { "epoch": 61.35, "learning_rate": 1.9333690046224045e-05, "loss": 1.9845, "step": 12384000 }, { "epoch": 61.36, "learning_rate": 1.9332453936970814e-05, "loss": 1.9906, "step": 12384500 }, { "epoch": 61.36, "learning_rate": 1.9331217827717583e-05, "loss": 1.9931, "step": 12385000 }, { "epoch": 61.36, "learning_rate": 1.93299792412915e-05, "loss": 2.0024, "step": 12385500 }, { "epoch": 61.36, "learning_rate": 1.9328740654865417e-05, "loss": 1.9839, "step": 12386000 }, { "epoch": 61.37, "learning_rate": 1.9327502068439333e-05, "loss": 1.9747, "step": 12386500 }, { "epoch": 61.37, "learning_rate": 1.9326265959186102e-05, "loss": 1.9767, "step": 12387000 }, { "epoch": 61.37, "learning_rate": 1.932502737276002e-05, "loss": 1.9928, "step": 12387500 }, { "epoch": 61.37, "learning_rate": 1.9323788786333936e-05, "loss": 1.9932, "step": 12388000 }, { "epoch": 61.38, "learning_rate": 1.932255019990785e-05, "loss": 1.9784, "step": 12388500 }, { "epoch": 61.38, "learning_rate": 1.9321311613481767e-05, "loss": 1.9995, "step": 12389000 }, { "epoch": 61.38, "learning_rate": 1.9320073027055684e-05, "loss": 2.0032, "step": 12389500 }, { "epoch": 61.38, "learning_rate": 1.93188344406296e-05, "loss": 1.9966, "step": 12390000 }, { "epoch": 61.39, "learning_rate": 1.9317595854203517e-05, "loss": 1.983, "step": 12390500 }, { "epoch": 61.39, "learning_rate": 1.931635726777743e-05, "loss": 1.9992, "step": 12391000 }, { "epoch": 61.39, "learning_rate": 1.9315118681351348e-05, "loss": 1.9809, "step": 12391500 }, { "epoch": 61.39, "learning_rate": 1.9313880094925265e-05, "loss": 1.9905, "step": 12392000 }, { "epoch": 61.4, "learning_rate": 1.9312641508499178e-05, "loss": 2.02, "step": 12392500 }, { "epoch": 61.4, "learning_rate": 1.9311402922073095e-05, "loss": 1.9893, "step": 12393000 }, { "epoch": 61.4, "learning_rate": 1.9310166812819867e-05, "loss": 1.975, "step": 12393500 }, { "epoch": 61.4, "learning_rate": 1.930892822639378e-05, "loss": 2.0005, "step": 12394000 }, { "epoch": 61.41, "learning_rate": 1.9307689639967698e-05, "loss": 1.9766, "step": 12394500 }, { "epoch": 61.41, "learning_rate": 1.9306451053541615e-05, "loss": 1.9754, "step": 12395000 }, { "epoch": 61.41, "learning_rate": 1.9305212467115532e-05, "loss": 1.9953, "step": 12395500 }, { "epoch": 61.41, "learning_rate": 1.9303973880689445e-05, "loss": 1.9846, "step": 12396000 }, { "epoch": 61.42, "learning_rate": 1.9302737771436217e-05, "loss": 1.9896, "step": 12396500 }, { "epoch": 61.42, "learning_rate": 1.9301499185010134e-05, "loss": 1.9777, "step": 12397000 }, { "epoch": 61.42, "learning_rate": 1.9300260598584048e-05, "loss": 1.9823, "step": 12397500 }, { "epoch": 61.42, "learning_rate": 1.9299024489330817e-05, "loss": 1.9763, "step": 12398000 }, { "epoch": 61.43, "learning_rate": 1.9297785902904734e-05, "loss": 1.9693, "step": 12398500 }, { "epoch": 61.43, "learning_rate": 1.929654731647865e-05, "loss": 1.9754, "step": 12399000 }, { "epoch": 61.43, "learning_rate": 1.9295308730052568e-05, "loss": 1.9743, "step": 12399500 }, { "epoch": 61.43, "learning_rate": 1.9294070143626484e-05, "loss": 1.9877, "step": 12400000 }, { "epoch": 61.44, "learning_rate": 1.92928315572004e-05, "loss": 1.9929, "step": 12400500 }, { "epoch": 61.44, "learning_rate": 1.9291592970774315e-05, "loss": 1.9846, "step": 12401000 }, { "epoch": 61.44, "learning_rate": 1.9290354384348232e-05, "loss": 1.9771, "step": 12401500 }, { "epoch": 61.44, "learning_rate": 1.9289115797922145e-05, "loss": 1.9876, "step": 12402000 }, { "epoch": 61.45, "learning_rate": 1.9287877211496062e-05, "loss": 2.014, "step": 12402500 }, { "epoch": 61.45, "learning_rate": 1.928663862506998e-05, "loss": 1.9885, "step": 12403000 }, { "epoch": 61.45, "learning_rate": 1.9285400038643896e-05, "loss": 1.9817, "step": 12403500 }, { "epoch": 61.45, "learning_rate": 1.9284161452217813e-05, "loss": 1.9793, "step": 12404000 }, { "epoch": 61.46, "learning_rate": 1.928292286579173e-05, "loss": 2.0005, "step": 12404500 }, { "epoch": 61.46, "learning_rate": 1.9281684279365647e-05, "loss": 1.9872, "step": 12405000 }, { "epoch": 61.46, "learning_rate": 1.9280445692939564e-05, "loss": 1.9969, "step": 12405500 }, { "epoch": 61.46, "learning_rate": 1.927920958368633e-05, "loss": 1.9797, "step": 12406000 }, { "epoch": 61.47, "learning_rate": 1.9277970997260246e-05, "loss": 1.9915, "step": 12406500 }, { "epoch": 61.47, "learning_rate": 1.9276732410834163e-05, "loss": 1.9882, "step": 12407000 }, { "epoch": 61.47, "learning_rate": 1.927549382440808e-05, "loss": 1.9814, "step": 12407500 }, { "epoch": 61.47, "learning_rate": 1.9274255237981997e-05, "loss": 1.9762, "step": 12408000 }, { "epoch": 61.48, "learning_rate": 1.9273019128728766e-05, "loss": 1.9942, "step": 12408500 }, { "epoch": 61.48, "learning_rate": 1.9271783019475535e-05, "loss": 1.9879, "step": 12409000 }, { "epoch": 61.48, "learning_rate": 1.9270546910222303e-05, "loss": 2.006, "step": 12409500 }, { "epoch": 61.48, "learning_rate": 1.9269308323796217e-05, "loss": 1.9593, "step": 12410000 }, { "epoch": 61.49, "learning_rate": 1.9268069737370134e-05, "loss": 1.9868, "step": 12410500 }, { "epoch": 61.49, "learning_rate": 1.926683115094405e-05, "loss": 1.9821, "step": 12411000 }, { "epoch": 61.49, "learning_rate": 1.9265592564517968e-05, "loss": 1.9838, "step": 12411500 }, { "epoch": 61.49, "learning_rate": 1.9264353978091885e-05, "loss": 1.9796, "step": 12412000 }, { "epoch": 61.5, "learning_rate": 1.9263117868838654e-05, "loss": 1.9754, "step": 12412500 }, { "epoch": 61.5, "learning_rate": 1.926187928241257e-05, "loss": 1.9808, "step": 12413000 }, { "epoch": 61.5, "learning_rate": 1.9260640695986484e-05, "loss": 2.0007, "step": 12413500 }, { "epoch": 61.5, "learning_rate": 1.92594021095604e-05, "loss": 1.9783, "step": 12414000 }, { "epoch": 61.51, "learning_rate": 1.9258163523134318e-05, "loss": 1.9808, "step": 12414500 }, { "epoch": 61.51, "learning_rate": 1.9256924936708235e-05, "loss": 1.9916, "step": 12415000 }, { "epoch": 61.51, "learning_rate": 1.9255688827455004e-05, "loss": 1.9785, "step": 12415500 }, { "epoch": 61.51, "learning_rate": 1.925445024102892e-05, "loss": 1.993, "step": 12416000 }, { "epoch": 61.52, "learning_rate": 1.9253211654602834e-05, "loss": 1.9895, "step": 12416500 }, { "epoch": 61.52, "learning_rate": 1.925197306817675e-05, "loss": 1.982, "step": 12417000 }, { "epoch": 61.52, "learning_rate": 1.9250734481750668e-05, "loss": 1.9807, "step": 12417500 }, { "epoch": 61.52, "learning_rate": 1.9249495895324585e-05, "loss": 1.9862, "step": 12418000 }, { "epoch": 61.53, "learning_rate": 1.9248257308898502e-05, "loss": 1.985, "step": 12418500 }, { "epoch": 61.53, "learning_rate": 1.924701872247242e-05, "loss": 2.007, "step": 12419000 }, { "epoch": 61.53, "learning_rate": 1.9245780136046336e-05, "loss": 1.9672, "step": 12419500 }, { "epoch": 61.53, "learning_rate": 1.9244541549620253e-05, "loss": 1.9905, "step": 12420000 }, { "epoch": 61.54, "learning_rate": 1.9243305440367018e-05, "loss": 1.9953, "step": 12420500 }, { "epoch": 61.54, "learning_rate": 1.9242071808286642e-05, "loss": 1.984, "step": 12421000 }, { "epoch": 61.54, "learning_rate": 1.9240833221860556e-05, "loss": 1.9823, "step": 12421500 }, { "epoch": 61.54, "learning_rate": 1.9239594635434473e-05, "loss": 1.9958, "step": 12422000 }, { "epoch": 61.55, "learning_rate": 1.923835604900839e-05, "loss": 1.9934, "step": 12422500 }, { "epoch": 61.55, "learning_rate": 1.9237117462582306e-05, "loss": 2.0089, "step": 12423000 }, { "epoch": 61.55, "learning_rate": 1.9235881353329075e-05, "loss": 1.9705, "step": 12423500 }, { "epoch": 61.55, "learning_rate": 1.9234642766902992e-05, "loss": 2.0054, "step": 12424000 }, { "epoch": 61.56, "learning_rate": 1.9233406657649758e-05, "loss": 1.9724, "step": 12424500 }, { "epoch": 61.56, "learning_rate": 1.9232168071223674e-05, "loss": 1.9935, "step": 12425000 }, { "epoch": 61.56, "learning_rate": 1.923092948479759e-05, "loss": 1.9912, "step": 12425500 }, { "epoch": 61.56, "learning_rate": 1.922969089837151e-05, "loss": 1.9679, "step": 12426000 }, { "epoch": 61.57, "learning_rate": 1.9228452311945425e-05, "loss": 1.9906, "step": 12426500 }, { "epoch": 61.57, "learning_rate": 1.9227213725519342e-05, "loss": 1.9876, "step": 12427000 }, { "epoch": 61.57, "learning_rate": 1.9225975139093256e-05, "loss": 1.9968, "step": 12427500 }, { "epoch": 61.57, "learning_rate": 1.9224736552667173e-05, "loss": 1.9812, "step": 12428000 }, { "epoch": 61.58, "learning_rate": 1.922349796624109e-05, "loss": 1.9873, "step": 12428500 }, { "epoch": 61.58, "learning_rate": 1.9222259379815007e-05, "loss": 1.9896, "step": 12429000 }, { "epoch": 61.58, "learning_rate": 1.9221020793388923e-05, "loss": 2.0014, "step": 12429500 }, { "epoch": 61.58, "learning_rate": 1.921978716130854e-05, "loss": 1.9767, "step": 12430000 }, { "epoch": 61.58, "learning_rate": 1.9218548574882458e-05, "loss": 1.988, "step": 12430500 }, { "epoch": 61.59, "learning_rate": 1.9217309988456375e-05, "loss": 1.9779, "step": 12431000 }, { "epoch": 61.59, "learning_rate": 1.921607140203029e-05, "loss": 2.002, "step": 12431500 }, { "epoch": 61.59, "learning_rate": 1.921483281560421e-05, "loss": 1.9915, "step": 12432000 }, { "epoch": 61.59, "learning_rate": 1.9213594229178125e-05, "loss": 2.0003, "step": 12432500 }, { "epoch": 61.6, "learning_rate": 1.9212355642752042e-05, "loss": 1.9523, "step": 12433000 }, { "epoch": 61.6, "learning_rate": 1.921111705632596e-05, "loss": 1.9745, "step": 12433500 }, { "epoch": 61.6, "learning_rate": 1.9209880947072725e-05, "loss": 1.9812, "step": 12434000 }, { "epoch": 61.6, "learning_rate": 1.920864236064664e-05, "loss": 1.9908, "step": 12434500 }, { "epoch": 61.61, "learning_rate": 1.920740377422056e-05, "loss": 1.9835, "step": 12435000 }, { "epoch": 61.61, "learning_rate": 1.9206165187794475e-05, "loss": 1.9704, "step": 12435500 }, { "epoch": 61.61, "learning_rate": 1.9204926601368392e-05, "loss": 2.0012, "step": 12436000 }, { "epoch": 61.61, "learning_rate": 1.9203690492115158e-05, "loss": 1.9852, "step": 12436500 }, { "epoch": 61.62, "learning_rate": 1.9202451905689075e-05, "loss": 1.9791, "step": 12437000 }, { "epoch": 61.62, "learning_rate": 1.920121331926299e-05, "loss": 1.9826, "step": 12437500 }, { "epoch": 61.62, "learning_rate": 1.919997473283691e-05, "loss": 1.9632, "step": 12438000 }, { "epoch": 61.62, "learning_rate": 1.9198736146410826e-05, "loss": 1.9741, "step": 12438500 }, { "epoch": 61.63, "learning_rate": 1.9197497559984742e-05, "loss": 2.0041, "step": 12439000 }, { "epoch": 61.63, "learning_rate": 1.919625897355866e-05, "loss": 1.9832, "step": 12439500 }, { "epoch": 61.63, "learning_rate": 1.9195020387132573e-05, "loss": 1.9716, "step": 12440000 }, { "epoch": 61.63, "learning_rate": 1.919378180070649e-05, "loss": 1.9778, "step": 12440500 }, { "epoch": 61.64, "learning_rate": 1.919254569145326e-05, "loss": 1.9869, "step": 12441000 }, { "epoch": 61.64, "learning_rate": 1.9191307105027176e-05, "loss": 1.9614, "step": 12441500 }, { "epoch": 61.64, "learning_rate": 1.9190068518601093e-05, "loss": 2.0067, "step": 12442000 }, { "epoch": 61.64, "learning_rate": 1.918882993217501e-05, "loss": 1.9797, "step": 12442500 }, { "epoch": 61.65, "learning_rate": 1.9187593822921775e-05, "loss": 1.9846, "step": 12443000 }, { "epoch": 61.65, "learning_rate": 1.9186355236495692e-05, "loss": 2.0056, "step": 12443500 }, { "epoch": 61.65, "learning_rate": 1.918511665006961e-05, "loss": 1.9619, "step": 12444000 }, { "epoch": 61.65, "learning_rate": 1.9183878063643526e-05, "loss": 2.0116, "step": 12444500 }, { "epoch": 61.66, "learning_rate": 1.9182639477217443e-05, "loss": 1.9978, "step": 12445000 }, { "epoch": 61.66, "learning_rate": 1.9181403367964208e-05, "loss": 1.9889, "step": 12445500 }, { "epoch": 61.66, "learning_rate": 1.9180164781538125e-05, "loss": 1.9792, "step": 12446000 }, { "epoch": 61.66, "learning_rate": 1.9178926195112042e-05, "loss": 1.9789, "step": 12446500 }, { "epoch": 61.67, "learning_rate": 1.917768760868596e-05, "loss": 1.9707, "step": 12447000 }, { "epoch": 61.67, "learning_rate": 1.9176449022259876e-05, "loss": 2.0148, "step": 12447500 }, { "epoch": 61.67, "learning_rate": 1.9175210435833793e-05, "loss": 1.9985, "step": 12448000 }, { "epoch": 61.67, "learning_rate": 1.917397184940771e-05, "loss": 1.9961, "step": 12448500 }, { "epoch": 61.68, "learning_rate": 1.9172735740154475e-05, "loss": 1.9903, "step": 12449000 }, { "epoch": 61.68, "learning_rate": 1.9171497153728392e-05, "loss": 2.0024, "step": 12449500 }, { "epoch": 61.68, "learning_rate": 1.917025856730231e-05, "loss": 1.9927, "step": 12450000 }, { "epoch": 61.68, "learning_rate": 1.9169019980876226e-05, "loss": 1.9882, "step": 12450500 }, { "epoch": 61.69, "learning_rate": 1.9167781394450143e-05, "loss": 1.9894, "step": 12451000 }, { "epoch": 61.69, "learning_rate": 1.916654280802406e-05, "loss": 1.992, "step": 12451500 }, { "epoch": 61.69, "learning_rate": 1.9165304221597977e-05, "loss": 1.9907, "step": 12452000 }, { "epoch": 61.69, "learning_rate": 1.916406563517189e-05, "loss": 1.9689, "step": 12452500 }, { "epoch": 61.7, "learning_rate": 1.916282952591866e-05, "loss": 1.9882, "step": 12453000 }, { "epoch": 61.7, "learning_rate": 1.9161590939492576e-05, "loss": 1.989, "step": 12453500 }, { "epoch": 61.7, "learning_rate": 1.9160354830239348e-05, "loss": 1.9969, "step": 12454000 }, { "epoch": 61.7, "learning_rate": 1.915911624381326e-05, "loss": 1.9881, "step": 12454500 }, { "epoch": 61.71, "learning_rate": 1.915787765738718e-05, "loss": 1.9963, "step": 12455000 }, { "epoch": 61.71, "learning_rate": 1.9156639070961095e-05, "loss": 1.988, "step": 12455500 }, { "epoch": 61.71, "learning_rate": 1.9155402961707864e-05, "loss": 1.9891, "step": 12456000 }, { "epoch": 61.71, "learning_rate": 1.915416437528178e-05, "loss": 1.9774, "step": 12456500 }, { "epoch": 61.72, "learning_rate": 1.9152925788855698e-05, "loss": 1.9728, "step": 12457000 }, { "epoch": 61.72, "learning_rate": 1.915168720242961e-05, "loss": 1.944, "step": 12457500 }, { "epoch": 61.72, "learning_rate": 1.915044861600353e-05, "loss": 1.9948, "step": 12458000 }, { "epoch": 61.72, "learning_rate": 1.9149212506750297e-05, "loss": 1.9923, "step": 12458500 }, { "epoch": 61.73, "learning_rate": 1.9147973920324214e-05, "loss": 1.9804, "step": 12459000 }, { "epoch": 61.73, "learning_rate": 1.914673533389813e-05, "loss": 2.0043, "step": 12459500 }, { "epoch": 61.73, "learning_rate": 1.9145499224644897e-05, "loss": 1.9943, "step": 12460000 }, { "epoch": 61.73, "learning_rate": 1.9144260638218814e-05, "loss": 2.0173, "step": 12460500 }, { "epoch": 61.74, "learning_rate": 1.914302205179273e-05, "loss": 1.996, "step": 12461000 }, { "epoch": 61.74, "learning_rate": 1.9141783465366647e-05, "loss": 1.9717, "step": 12461500 }, { "epoch": 61.74, "learning_rate": 1.9140544878940564e-05, "loss": 1.9782, "step": 12462000 }, { "epoch": 61.74, "learning_rate": 1.913930629251448e-05, "loss": 1.9903, "step": 12462500 }, { "epoch": 61.75, "learning_rate": 1.9138067706088398e-05, "loss": 2.0132, "step": 12463000 }, { "epoch": 61.75, "learning_rate": 1.9136829119662315e-05, "loss": 1.962, "step": 12463500 }, { "epoch": 61.75, "learning_rate": 1.913559053323623e-05, "loss": 1.9882, "step": 12464000 }, { "epoch": 61.75, "learning_rate": 1.9134354423982997e-05, "loss": 1.9986, "step": 12464500 }, { "epoch": 61.76, "learning_rate": 1.9133115837556914e-05, "loss": 1.991, "step": 12465000 }, { "epoch": 61.76, "learning_rate": 1.913187725113083e-05, "loss": 1.9956, "step": 12465500 }, { "epoch": 61.76, "learning_rate": 1.9130638664704748e-05, "loss": 1.9973, "step": 12466000 }, { "epoch": 61.76, "learning_rate": 1.9129400078278665e-05, "loss": 1.9973, "step": 12466500 }, { "epoch": 61.77, "learning_rate": 1.912816149185258e-05, "loss": 1.9732, "step": 12467000 }, { "epoch": 61.77, "learning_rate": 1.9126925382599348e-05, "loss": 1.9634, "step": 12467500 }, { "epoch": 61.77, "learning_rate": 1.9125686796173264e-05, "loss": 1.9906, "step": 12468000 }, { "epoch": 61.77, "learning_rate": 1.912444820974718e-05, "loss": 1.989, "step": 12468500 }, { "epoch": 61.78, "learning_rate": 1.91232096233211e-05, "loss": 1.9951, "step": 12469000 }, { "epoch": 61.78, "learning_rate": 1.9121971036895015e-05, "loss": 2.0092, "step": 12469500 }, { "epoch": 61.78, "learning_rate": 1.912073245046893e-05, "loss": 1.9972, "step": 12470000 }, { "epoch": 61.78, "learning_rate": 1.9119496341215698e-05, "loss": 1.9651, "step": 12470500 }, { "epoch": 61.79, "learning_rate": 1.9118260231962466e-05, "loss": 1.9745, "step": 12471000 }, { "epoch": 61.79, "learning_rate": 1.9117021645536383e-05, "loss": 1.986, "step": 12471500 }, { "epoch": 61.79, "learning_rate": 1.9115783059110297e-05, "loss": 1.9855, "step": 12472000 }, { "epoch": 61.79, "learning_rate": 1.9114544472684214e-05, "loss": 1.985, "step": 12472500 }, { "epoch": 61.8, "learning_rate": 1.911330588625813e-05, "loss": 1.9774, "step": 12473000 }, { "epoch": 61.8, "learning_rate": 1.9112067299832048e-05, "loss": 1.973, "step": 12473500 }, { "epoch": 61.8, "learning_rate": 1.9110828713405965e-05, "loss": 2.0038, "step": 12474000 }, { "epoch": 61.8, "learning_rate": 1.910959012697988e-05, "loss": 1.9978, "step": 12474500 }, { "epoch": 61.81, "learning_rate": 1.91083515405538e-05, "loss": 1.9966, "step": 12475000 }, { "epoch": 61.81, "learning_rate": 1.9107112954127715e-05, "loss": 2.0162, "step": 12475500 }, { "epoch": 61.81, "learning_rate": 1.9105874367701632e-05, "loss": 1.9911, "step": 12476000 }, { "epoch": 61.81, "learning_rate": 1.9104635781275546e-05, "loss": 2.0064, "step": 12476500 }, { "epoch": 61.82, "learning_rate": 1.9103399672022315e-05, "loss": 1.9668, "step": 12477000 }, { "epoch": 61.82, "learning_rate": 1.9102163562769083e-05, "loss": 1.9968, "step": 12477500 }, { "epoch": 61.82, "learning_rate": 1.9100924976343e-05, "loss": 1.9688, "step": 12478000 }, { "epoch": 61.82, "learning_rate": 1.9099686389916914e-05, "loss": 1.9979, "step": 12478500 }, { "epoch": 61.83, "learning_rate": 1.909844780349083e-05, "loss": 1.978, "step": 12479000 }, { "epoch": 61.83, "learning_rate": 1.9097211694237603e-05, "loss": 1.9782, "step": 12479500 }, { "epoch": 61.83, "learning_rate": 1.909597310781152e-05, "loss": 1.9879, "step": 12480000 }, { "epoch": 61.83, "learning_rate": 1.9094739475731137e-05, "loss": 1.9651, "step": 12480500 }, { "epoch": 61.84, "learning_rate": 1.9093500889305054e-05, "loss": 1.9904, "step": 12481000 }, { "epoch": 61.84, "learning_rate": 1.909226230287897e-05, "loss": 1.9654, "step": 12481500 }, { "epoch": 61.84, "learning_rate": 1.9091023716452888e-05, "loss": 1.9952, "step": 12482000 }, { "epoch": 61.84, "learning_rate": 1.9089785130026805e-05, "loss": 1.9772, "step": 12482500 }, { "epoch": 61.85, "learning_rate": 1.9088546543600722e-05, "loss": 2.005, "step": 12483000 }, { "epoch": 61.85, "learning_rate": 1.9087310434347487e-05, "loss": 1.9917, "step": 12483500 }, { "epoch": 61.85, "learning_rate": 1.9086071847921404e-05, "loss": 1.989, "step": 12484000 }, { "epoch": 61.85, "learning_rate": 1.908483326149532e-05, "loss": 2.0017, "step": 12484500 }, { "epoch": 61.86, "learning_rate": 1.9083594675069238e-05, "loss": 1.982, "step": 12485000 }, { "epoch": 61.86, "learning_rate": 1.9082356088643155e-05, "loss": 2.0051, "step": 12485500 }, { "epoch": 61.86, "learning_rate": 1.9081117502217072e-05, "loss": 2.0043, "step": 12486000 }, { "epoch": 61.86, "learning_rate": 1.9079878915790986e-05, "loss": 1.9768, "step": 12486500 }, { "epoch": 61.86, "learning_rate": 1.9078640329364902e-05, "loss": 1.9978, "step": 12487000 }, { "epoch": 61.87, "learning_rate": 1.907740174293882e-05, "loss": 1.9774, "step": 12487500 }, { "epoch": 61.87, "learning_rate": 1.9076163156512736e-05, "loss": 1.9823, "step": 12488000 }, { "epoch": 61.87, "learning_rate": 1.9074927047259505e-05, "loss": 2.0058, "step": 12488500 }, { "epoch": 61.87, "learning_rate": 1.9073690938006274e-05, "loss": 1.9786, "step": 12489000 }, { "epoch": 61.88, "learning_rate": 1.9072452351580187e-05, "loss": 1.9891, "step": 12489500 }, { "epoch": 61.88, "learning_rate": 1.9071213765154104e-05, "loss": 2.007, "step": 12490000 }, { "epoch": 61.88, "learning_rate": 1.906997517872802e-05, "loss": 1.9815, "step": 12490500 }, { "epoch": 61.88, "learning_rate": 1.9068736592301938e-05, "loss": 1.9982, "step": 12491000 }, { "epoch": 61.89, "learning_rate": 1.9067498005875855e-05, "loss": 1.9718, "step": 12491500 }, { "epoch": 61.89, "learning_rate": 1.9066259419449772e-05, "loss": 1.992, "step": 12492000 }, { "epoch": 61.89, "learning_rate": 1.906502083302369e-05, "loss": 1.977, "step": 12492500 }, { "epoch": 61.89, "learning_rate": 1.9063782246597603e-05, "loss": 2.0086, "step": 12493000 }, { "epoch": 61.9, "learning_rate": 1.906254613734437e-05, "loss": 1.9665, "step": 12493500 }, { "epoch": 61.9, "learning_rate": 1.906130755091829e-05, "loss": 1.9696, "step": 12494000 }, { "epoch": 61.9, "learning_rate": 1.9060068964492205e-05, "loss": 1.9883, "step": 12494500 }, { "epoch": 61.9, "learning_rate": 1.9058830378066122e-05, "loss": 1.9727, "step": 12495000 }, { "epoch": 61.91, "learning_rate": 1.905759179164004e-05, "loss": 2.0074, "step": 12495500 }, { "epoch": 61.91, "learning_rate": 1.9056353205213953e-05, "loss": 1.9906, "step": 12496000 }, { "epoch": 61.91, "learning_rate": 1.905511709596072e-05, "loss": 2.0129, "step": 12496500 }, { "epoch": 61.91, "learning_rate": 1.905387850953464e-05, "loss": 1.9982, "step": 12497000 }, { "epoch": 61.92, "learning_rate": 1.9052639923108555e-05, "loss": 1.9915, "step": 12497500 }, { "epoch": 61.92, "learning_rate": 1.9051401336682472e-05, "loss": 1.9789, "step": 12498000 }, { "epoch": 61.92, "learning_rate": 1.905016522742924e-05, "loss": 1.9803, "step": 12498500 }, { "epoch": 61.92, "learning_rate": 1.9048926641003158e-05, "loss": 1.9715, "step": 12499000 }, { "epoch": 61.93, "learning_rate": 1.9047693008922775e-05, "loss": 1.9869, "step": 12499500 }, { "epoch": 61.93, "learning_rate": 1.9046454422496692e-05, "loss": 1.9728, "step": 12500000 }, { "epoch": 61.93, "learning_rate": 1.904521583607061e-05, "loss": 2.0261, "step": 12500500 }, { "epoch": 61.93, "learning_rate": 1.9043977249644526e-05, "loss": 2.0219, "step": 12501000 }, { "epoch": 61.94, "learning_rate": 1.9042738663218443e-05, "loss": 1.9939, "step": 12501500 }, { "epoch": 61.94, "learning_rate": 1.904150007679236e-05, "loss": 1.9998, "step": 12502000 }, { "epoch": 61.94, "learning_rate": 1.9040261490366277e-05, "loss": 1.9742, "step": 12502500 }, { "epoch": 61.94, "learning_rate": 1.9039022903940194e-05, "loss": 1.9766, "step": 12503000 }, { "epoch": 61.95, "learning_rate": 1.903778431751411e-05, "loss": 1.9901, "step": 12503500 }, { "epoch": 61.95, "learning_rate": 1.9036545731088024e-05, "loss": 1.9964, "step": 12504000 }, { "epoch": 61.95, "learning_rate": 1.9035309621834793e-05, "loss": 1.9899, "step": 12504500 }, { "epoch": 61.95, "learning_rate": 1.903407103540871e-05, "loss": 1.9843, "step": 12505000 }, { "epoch": 61.96, "learning_rate": 1.9032832448982627e-05, "loss": 1.9807, "step": 12505500 }, { "epoch": 61.96, "learning_rate": 1.9031593862556544e-05, "loss": 2.003, "step": 12506000 }, { "epoch": 61.96, "learning_rate": 1.903035527613046e-05, "loss": 1.9882, "step": 12506500 }, { "epoch": 61.96, "learning_rate": 1.9029119166877226e-05, "loss": 2.0212, "step": 12507000 }, { "epoch": 61.97, "learning_rate": 1.9027880580451143e-05, "loss": 1.9792, "step": 12507500 }, { "epoch": 61.97, "learning_rate": 1.902664199402506e-05, "loss": 1.9769, "step": 12508000 }, { "epoch": 61.97, "learning_rate": 1.9025403407598977e-05, "loss": 1.9927, "step": 12508500 }, { "epoch": 61.97, "learning_rate": 1.9024164821172894e-05, "loss": 2.0107, "step": 12509000 }, { "epoch": 61.98, "learning_rate": 1.902292871191966e-05, "loss": 1.9887, "step": 12509500 }, { "epoch": 61.98, "learning_rate": 1.9021690125493576e-05, "loss": 1.9708, "step": 12510000 }, { "epoch": 61.98, "learning_rate": 1.9020451539067493e-05, "loss": 1.9702, "step": 12510500 }, { "epoch": 61.98, "learning_rate": 1.901921295264141e-05, "loss": 1.9863, "step": 12511000 }, { "epoch": 61.99, "learning_rate": 1.9017974366215327e-05, "loss": 1.9949, "step": 12511500 }, { "epoch": 61.99, "learning_rate": 1.9016735779789244e-05, "loss": 1.9844, "step": 12512000 }, { "epoch": 61.99, "learning_rate": 1.901549719336316e-05, "loss": 1.9883, "step": 12512500 }, { "epoch": 61.99, "learning_rate": 1.9014258606937078e-05, "loss": 2.0032, "step": 12513000 }, { "epoch": 62.0, "learning_rate": 1.901302002051099e-05, "loss": 1.9931, "step": 12513500 }, { "epoch": 62.0, "learning_rate": 1.9011781434084908e-05, "loss": 1.9676, "step": 12514000 }, { "epoch": 62.0, "eval_accuracy": 0.6740752328298691, "eval_accuracy_mlm": 0.6335324391726533, "eval_accuracy_nsp": 0.865299910966077, "eval_loss": 2.2945070266723633, "eval_runtime": 146.6761, "eval_samples_per_second": 1738.245, "eval_steps_per_second": 72.432, "step": 12514266 }, { "epoch": 62.0, "learning_rate": 1.9010542847658825e-05, "loss": 1.9582, "step": 12514500 }, { "epoch": 62.0, "learning_rate": 1.9009304261232742e-05, "loss": 1.9599, "step": 12515000 }, { "epoch": 62.01, "learning_rate": 1.900806567480666e-05, "loss": 1.9586, "step": 12515500 }, { "epoch": 62.01, "learning_rate": 1.9006827088380576e-05, "loss": 1.9668, "step": 12516000 }, { "epoch": 62.01, "learning_rate": 1.900558850195449e-05, "loss": 1.9685, "step": 12516500 }, { "epoch": 62.01, "learning_rate": 1.9004349915528406e-05, "loss": 1.9468, "step": 12517000 }, { "epoch": 62.02, "learning_rate": 1.9003111329102323e-05, "loss": 1.9357, "step": 12517500 }, { "epoch": 62.02, "learning_rate": 1.9001875219849092e-05, "loss": 1.9362, "step": 12518000 }, { "epoch": 62.02, "learning_rate": 1.900063911059586e-05, "loss": 1.971, "step": 12518500 }, { "epoch": 62.02, "learning_rate": 1.8999400524169778e-05, "loss": 1.9651, "step": 12519000 }, { "epoch": 62.03, "learning_rate": 1.899816193774369e-05, "loss": 1.9772, "step": 12519500 }, { "epoch": 62.03, "learning_rate": 1.899692335131761e-05, "loss": 1.9774, "step": 12520000 }, { "epoch": 62.03, "learning_rate": 1.8995684764891525e-05, "loss": 1.9407, "step": 12520500 }, { "epoch": 62.03, "learning_rate": 1.8994448655638294e-05, "loss": 1.9708, "step": 12521000 }, { "epoch": 62.04, "learning_rate": 1.899321006921221e-05, "loss": 1.9938, "step": 12521500 }, { "epoch": 62.04, "learning_rate": 1.8991971482786128e-05, "loss": 1.9787, "step": 12522000 }, { "epoch": 62.04, "learning_rate": 1.8990732896360045e-05, "loss": 1.9785, "step": 12522500 }, { "epoch": 62.04, "learning_rate": 1.898949430993396e-05, "loss": 1.9766, "step": 12523000 }, { "epoch": 62.05, "learning_rate": 1.8988258200680727e-05, "loss": 1.9594, "step": 12523500 }, { "epoch": 62.05, "learning_rate": 1.8987019614254644e-05, "loss": 1.9805, "step": 12524000 }, { "epoch": 62.05, "learning_rate": 1.898578102782856e-05, "loss": 1.9578, "step": 12524500 }, { "epoch": 62.05, "learning_rate": 1.8984542441402478e-05, "loss": 1.9744, "step": 12525000 }, { "epoch": 62.06, "learning_rate": 1.8983303854976395e-05, "loss": 1.9807, "step": 12525500 }, { "epoch": 62.06, "learning_rate": 1.898206774572316e-05, "loss": 1.9893, "step": 12526000 }, { "epoch": 62.06, "learning_rate": 1.8980829159297077e-05, "loss": 1.9816, "step": 12526500 }, { "epoch": 62.06, "learning_rate": 1.8979590572870994e-05, "loss": 1.9951, "step": 12527000 }, { "epoch": 62.07, "learning_rate": 1.897835198644491e-05, "loss": 1.9469, "step": 12527500 }, { "epoch": 62.07, "learning_rate": 1.8977113400018828e-05, "loss": 1.9806, "step": 12528000 }, { "epoch": 62.07, "learning_rate": 1.897587976793845e-05, "loss": 1.9739, "step": 12528500 }, { "epoch": 62.07, "learning_rate": 1.8974643658685218e-05, "loss": 1.9707, "step": 12529000 }, { "epoch": 62.08, "learning_rate": 1.897340507225913e-05, "loss": 1.9828, "step": 12529500 }, { "epoch": 62.08, "learning_rate": 1.8972166485833048e-05, "loss": 1.9832, "step": 12530000 }, { "epoch": 62.08, "learning_rate": 1.8970927899406965e-05, "loss": 1.9572, "step": 12530500 }, { "epoch": 62.08, "learning_rate": 1.8969689312980882e-05, "loss": 1.9609, "step": 12531000 }, { "epoch": 62.09, "learning_rate": 1.89684507265548e-05, "loss": 1.9805, "step": 12531500 }, { "epoch": 62.09, "learning_rate": 1.8967212140128716e-05, "loss": 1.9737, "step": 12532000 }, { "epoch": 62.09, "learning_rate": 1.8965973553702633e-05, "loss": 1.9589, "step": 12532500 }, { "epoch": 62.09, "learning_rate": 1.896473496727655e-05, "loss": 1.9673, "step": 12533000 }, { "epoch": 62.1, "learning_rate": 1.8963496380850467e-05, "loss": 1.9562, "step": 12533500 }, { "epoch": 62.1, "learning_rate": 1.896225779442438e-05, "loss": 1.9551, "step": 12534000 }, { "epoch": 62.1, "learning_rate": 1.8961019207998297e-05, "loss": 1.9695, "step": 12534500 }, { "epoch": 62.1, "learning_rate": 1.8959780621572214e-05, "loss": 1.9711, "step": 12535000 }, { "epoch": 62.11, "learning_rate": 1.8958544512318983e-05, "loss": 1.9691, "step": 12535500 }, { "epoch": 62.11, "learning_rate": 1.89573059258929e-05, "loss": 1.9779, "step": 12536000 }, { "epoch": 62.11, "learning_rate": 1.8956067339466817e-05, "loss": 1.9463, "step": 12536500 }, { "epoch": 62.11, "learning_rate": 1.895482875304073e-05, "loss": 1.9695, "step": 12537000 }, { "epoch": 62.12, "learning_rate": 1.8953590166614647e-05, "loss": 1.9987, "step": 12537500 }, { "epoch": 62.12, "learning_rate": 1.8952354057361416e-05, "loss": 1.9819, "step": 12538000 }, { "epoch": 62.12, "learning_rate": 1.8951115470935333e-05, "loss": 1.9757, "step": 12538500 }, { "epoch": 62.12, "learning_rate": 1.8949879361682098e-05, "loss": 1.9528, "step": 12539000 }, { "epoch": 62.13, "learning_rate": 1.8948640775256015e-05, "loss": 1.9657, "step": 12539500 }, { "epoch": 62.13, "learning_rate": 1.8947402188829932e-05, "loss": 1.9836, "step": 12540000 }, { "epoch": 62.13, "learning_rate": 1.89461660795767e-05, "loss": 1.9578, "step": 12540500 }, { "epoch": 62.13, "learning_rate": 1.894492997032347e-05, "loss": 1.9524, "step": 12541000 }, { "epoch": 62.13, "learning_rate": 1.8943691383897387e-05, "loss": 1.977, "step": 12541500 }, { "epoch": 62.14, "learning_rate": 1.8942452797471304e-05, "loss": 1.9904, "step": 12542000 }, { "epoch": 62.14, "learning_rate": 1.894121421104522e-05, "loss": 1.9614, "step": 12542500 }, { "epoch": 62.14, "learning_rate": 1.8939975624619134e-05, "loss": 1.977, "step": 12543000 }, { "epoch": 62.14, "learning_rate": 1.893873703819305e-05, "loss": 1.9853, "step": 12543500 }, { "epoch": 62.15, "learning_rate": 1.8937498451766968e-05, "loss": 1.9813, "step": 12544000 }, { "epoch": 62.15, "learning_rate": 1.8936259865340885e-05, "loss": 1.9535, "step": 12544500 }, { "epoch": 62.15, "learning_rate": 1.8935021278914802e-05, "loss": 1.9847, "step": 12545000 }, { "epoch": 62.15, "learning_rate": 1.893378516966157e-05, "loss": 1.9822, "step": 12545500 }, { "epoch": 62.16, "learning_rate": 1.8932546583235488e-05, "loss": 1.9644, "step": 12546000 }, { "epoch": 62.16, "learning_rate": 1.89313079968094e-05, "loss": 1.963, "step": 12546500 }, { "epoch": 62.16, "learning_rate": 1.8930069410383318e-05, "loss": 1.9582, "step": 12547000 }, { "epoch": 62.16, "learning_rate": 1.8928830823957235e-05, "loss": 1.9811, "step": 12547500 }, { "epoch": 62.17, "learning_rate": 1.8927592237531152e-05, "loss": 1.9791, "step": 12548000 }, { "epoch": 62.17, "learning_rate": 1.8926358605450773e-05, "loss": 1.9664, "step": 12548500 }, { "epoch": 62.17, "learning_rate": 1.892512001902469e-05, "loss": 1.9673, "step": 12549000 }, { "epoch": 62.17, "learning_rate": 1.8923881432598606e-05, "loss": 1.9645, "step": 12549500 }, { "epoch": 62.18, "learning_rate": 1.8922642846172523e-05, "loss": 1.9951, "step": 12550000 }, { "epoch": 62.18, "learning_rate": 1.8921404259746437e-05, "loss": 1.9559, "step": 12550500 }, { "epoch": 62.18, "learning_rate": 1.8920165673320354e-05, "loss": 2.0003, "step": 12551000 }, { "epoch": 62.18, "learning_rate": 1.891892708689427e-05, "loss": 1.9735, "step": 12551500 }, { "epoch": 62.19, "learning_rate": 1.8917688500468188e-05, "loss": 1.9818, "step": 12552000 }, { "epoch": 62.19, "learning_rate": 1.8916449914042105e-05, "loss": 1.9708, "step": 12552500 }, { "epoch": 62.19, "learning_rate": 1.8915213804788873e-05, "loss": 2.0047, "step": 12553000 }, { "epoch": 62.19, "learning_rate": 1.8913975218362787e-05, "loss": 1.9934, "step": 12553500 }, { "epoch": 62.2, "learning_rate": 1.8912736631936704e-05, "loss": 1.9583, "step": 12554000 }, { "epoch": 62.2, "learning_rate": 1.891149804551062e-05, "loss": 1.9729, "step": 12554500 }, { "epoch": 62.2, "learning_rate": 1.8910259459084538e-05, "loss": 1.9762, "step": 12555000 }, { "epoch": 62.2, "learning_rate": 1.8909020872658455e-05, "loss": 1.9543, "step": 12555500 }, { "epoch": 62.21, "learning_rate": 1.890778228623237e-05, "loss": 1.9589, "step": 12556000 }, { "epoch": 62.21, "learning_rate": 1.8906543699806285e-05, "loss": 1.96, "step": 12556500 }, { "epoch": 62.21, "learning_rate": 1.8905305113380202e-05, "loss": 1.9587, "step": 12557000 }, { "epoch": 62.21, "learning_rate": 1.890406652695412e-05, "loss": 1.9923, "step": 12557500 }, { "epoch": 62.22, "learning_rate": 1.8902827940528033e-05, "loss": 1.9812, "step": 12558000 }, { "epoch": 62.22, "learning_rate": 1.8901591831274805e-05, "loss": 1.9588, "step": 12558500 }, { "epoch": 62.22, "learning_rate": 1.890035324484872e-05, "loss": 1.9794, "step": 12559000 }, { "epoch": 62.22, "learning_rate": 1.889911465842264e-05, "loss": 1.9592, "step": 12559500 }, { "epoch": 62.23, "learning_rate": 1.8897876071996552e-05, "loss": 1.9527, "step": 12560000 }, { "epoch": 62.23, "learning_rate": 1.889663748557047e-05, "loss": 1.9336, "step": 12560500 }, { "epoch": 62.23, "learning_rate": 1.8895398899144383e-05, "loss": 1.9659, "step": 12561000 }, { "epoch": 62.23, "learning_rate": 1.88941603127183e-05, "loss": 1.9813, "step": 12561500 }, { "epoch": 62.24, "learning_rate": 1.8892921726292216e-05, "loss": 1.9675, "step": 12562000 }, { "epoch": 62.24, "learning_rate": 1.8891683139866133e-05, "loss": 1.9541, "step": 12562500 }, { "epoch": 62.24, "learning_rate": 1.889044455344005e-05, "loss": 1.9758, "step": 12563000 }, { "epoch": 62.24, "learning_rate": 1.8889205967013967e-05, "loss": 1.9765, "step": 12563500 }, { "epoch": 62.25, "learning_rate": 1.8887969857760736e-05, "loss": 1.9497, "step": 12564000 }, { "epoch": 62.25, "learning_rate": 1.888673127133465e-05, "loss": 1.9542, "step": 12564500 }, { "epoch": 62.25, "learning_rate": 1.8885492684908567e-05, "loss": 1.9707, "step": 12565000 }, { "epoch": 62.25, "learning_rate": 1.8884254098482483e-05, "loss": 1.9688, "step": 12565500 }, { "epoch": 62.26, "learning_rate": 1.8883017989229256e-05, "loss": 1.9519, "step": 12566000 }, { "epoch": 62.26, "learning_rate": 1.888177940280317e-05, "loss": 1.9775, "step": 12566500 }, { "epoch": 62.26, "learning_rate": 1.8880540816377086e-05, "loss": 1.9811, "step": 12567000 }, { "epoch": 62.26, "learning_rate": 1.8879302229951003e-05, "loss": 1.9777, "step": 12567500 }, { "epoch": 62.27, "learning_rate": 1.8878063643524917e-05, "loss": 1.9873, "step": 12568000 }, { "epoch": 62.27, "learning_rate": 1.8876825057098833e-05, "loss": 1.9773, "step": 12568500 }, { "epoch": 62.27, "learning_rate": 1.887558647067275e-05, "loss": 1.9731, "step": 12569000 }, { "epoch": 62.27, "learning_rate": 1.8874347884246667e-05, "loss": 1.9481, "step": 12569500 }, { "epoch": 62.28, "learning_rate": 1.8873109297820584e-05, "loss": 1.9617, "step": 12570000 }, { "epoch": 62.28, "learning_rate": 1.88718707113945e-05, "loss": 1.9759, "step": 12570500 }, { "epoch": 62.28, "learning_rate": 1.887063460214127e-05, "loss": 1.9808, "step": 12571000 }, { "epoch": 62.28, "learning_rate": 1.8869396015715184e-05, "loss": 1.9857, "step": 12571500 }, { "epoch": 62.29, "learning_rate": 1.88681574292891e-05, "loss": 1.9608, "step": 12572000 }, { "epoch": 62.29, "learning_rate": 1.8866921320035873e-05, "loss": 1.9579, "step": 12572500 }, { "epoch": 62.29, "learning_rate": 1.886568273360979e-05, "loss": 1.9726, "step": 12573000 }, { "epoch": 62.29, "learning_rate": 1.8864444147183703e-05, "loss": 1.9627, "step": 12573500 }, { "epoch": 62.3, "learning_rate": 1.8863208037930472e-05, "loss": 1.9809, "step": 12574000 }, { "epoch": 62.3, "learning_rate": 1.886196945150439e-05, "loss": 1.974, "step": 12574500 }, { "epoch": 62.3, "learning_rate": 1.8860730865078306e-05, "loss": 2.0003, "step": 12575000 }, { "epoch": 62.3, "learning_rate": 1.8859492278652223e-05, "loss": 1.9678, "step": 12575500 }, { "epoch": 62.31, "learning_rate": 1.885825369222614e-05, "loss": 1.953, "step": 12576000 }, { "epoch": 62.31, "learning_rate": 1.8857015105800053e-05, "loss": 1.9649, "step": 12576500 }, { "epoch": 62.31, "learning_rate": 1.885577651937397e-05, "loss": 1.9895, "step": 12577000 }, { "epoch": 62.31, "learning_rate": 1.8854537932947887e-05, "loss": 1.9686, "step": 12577500 }, { "epoch": 62.32, "learning_rate": 1.8853301823694656e-05, "loss": 1.9872, "step": 12578000 }, { "epoch": 62.32, "learning_rate": 1.8852063237268573e-05, "loss": 1.9634, "step": 12578500 }, { "epoch": 62.32, "learning_rate": 1.885082465084249e-05, "loss": 1.9618, "step": 12579000 }, { "epoch": 62.32, "learning_rate": 1.8849586064416403e-05, "loss": 2.0048, "step": 12579500 }, { "epoch": 62.33, "learning_rate": 1.884834747799032e-05, "loss": 1.9842, "step": 12580000 }, { "epoch": 62.33, "learning_rate": 1.8847108891564237e-05, "loss": 1.9721, "step": 12580500 }, { "epoch": 62.33, "learning_rate": 1.8845870305138154e-05, "loss": 1.9851, "step": 12581000 }, { "epoch": 62.33, "learning_rate": 1.8844631718712068e-05, "loss": 1.9976, "step": 12581500 }, { "epoch": 62.34, "learning_rate": 1.8843398086631688e-05, "loss": 1.9617, "step": 12582000 }, { "epoch": 62.34, "learning_rate": 1.8842159500205605e-05, "loss": 1.9842, "step": 12582500 }, { "epoch": 62.34, "learning_rate": 1.8840923390952374e-05, "loss": 1.9675, "step": 12583000 }, { "epoch": 62.34, "learning_rate": 1.883968480452629e-05, "loss": 1.9691, "step": 12583500 }, { "epoch": 62.35, "learning_rate": 1.8838446218100208e-05, "loss": 1.9923, "step": 12584000 }, { "epoch": 62.35, "learning_rate": 1.883720763167412e-05, "loss": 1.9686, "step": 12584500 }, { "epoch": 62.35, "learning_rate": 1.883596904524804e-05, "loss": 1.9709, "step": 12585000 }, { "epoch": 62.35, "learning_rate": 1.8834730458821955e-05, "loss": 1.9817, "step": 12585500 }, { "epoch": 62.36, "learning_rate": 1.8833494349568724e-05, "loss": 1.9864, "step": 12586000 }, { "epoch": 62.36, "learning_rate": 1.883225576314264e-05, "loss": 1.9628, "step": 12586500 }, { "epoch": 62.36, "learning_rate": 1.8831017176716558e-05, "loss": 1.9796, "step": 12587000 }, { "epoch": 62.36, "learning_rate": 1.8829781067463327e-05, "loss": 1.9532, "step": 12587500 }, { "epoch": 62.37, "learning_rate": 1.8828542481037244e-05, "loss": 1.9797, "step": 12588000 }, { "epoch": 62.37, "learning_rate": 1.882730389461116e-05, "loss": 1.9853, "step": 12588500 }, { "epoch": 62.37, "learning_rate": 1.8826065308185074e-05, "loss": 1.9855, "step": 12589000 }, { "epoch": 62.37, "learning_rate": 1.882482672175899e-05, "loss": 1.9711, "step": 12589500 }, { "epoch": 62.38, "learning_rate": 1.8823588135332908e-05, "loss": 1.9621, "step": 12590000 }, { "epoch": 62.38, "learning_rate": 1.8822352026079677e-05, "loss": 1.9673, "step": 12590500 }, { "epoch": 62.38, "learning_rate": 1.8821113439653594e-05, "loss": 1.9746, "step": 12591000 }, { "epoch": 62.38, "learning_rate": 1.881987485322751e-05, "loss": 1.9824, "step": 12591500 }, { "epoch": 62.39, "learning_rate": 1.8818636266801428e-05, "loss": 1.9738, "step": 12592000 }, { "epoch": 62.39, "learning_rate": 1.881739768037534e-05, "loss": 1.9844, "step": 12592500 }, { "epoch": 62.39, "learning_rate": 1.8816159093949258e-05, "loss": 1.9694, "step": 12593000 }, { "epoch": 62.39, "learning_rate": 1.8814920507523175e-05, "loss": 1.9902, "step": 12593500 }, { "epoch": 62.4, "learning_rate": 1.881368192109709e-05, "loss": 1.9732, "step": 12594000 }, { "epoch": 62.4, "learning_rate": 1.8812443334671005e-05, "loss": 1.9657, "step": 12594500 }, { "epoch": 62.4, "learning_rate": 1.8811204748244922e-05, "loss": 1.9729, "step": 12595000 }, { "epoch": 62.4, "learning_rate": 1.8809968638991695e-05, "loss": 1.9555, "step": 12595500 }, { "epoch": 62.4, "learning_rate": 1.8808730052565608e-05, "loss": 1.9951, "step": 12596000 }, { "epoch": 62.41, "learning_rate": 1.8807491466139525e-05, "loss": 1.9757, "step": 12596500 }, { "epoch": 62.41, "learning_rate": 1.880625287971344e-05, "loss": 1.9782, "step": 12597000 }, { "epoch": 62.41, "learning_rate": 1.880501677046021e-05, "loss": 2.0025, "step": 12597500 }, { "epoch": 62.41, "learning_rate": 1.8803778184034128e-05, "loss": 1.9838, "step": 12598000 }, { "epoch": 62.42, "learning_rate": 1.8802539597608045e-05, "loss": 1.9582, "step": 12598500 }, { "epoch": 62.42, "learning_rate": 1.8801301011181958e-05, "loss": 1.976, "step": 12599000 }, { "epoch": 62.42, "learning_rate": 1.8800062424755875e-05, "loss": 1.9526, "step": 12599500 }, { "epoch": 62.42, "learning_rate": 1.8798823838329792e-05, "loss": 1.9677, "step": 12600000 }, { "epoch": 62.43, "learning_rate": 1.8797585251903706e-05, "loss": 1.9765, "step": 12600500 }, { "epoch": 62.43, "learning_rate": 1.8796346665477623e-05, "loss": 1.9864, "step": 12601000 }, { "epoch": 62.43, "learning_rate": 1.8795110556224395e-05, "loss": 1.9713, "step": 12601500 }, { "epoch": 62.43, "learning_rate": 1.879387196979831e-05, "loss": 1.9667, "step": 12602000 }, { "epoch": 62.44, "learning_rate": 1.8792633383372225e-05, "loss": 1.9772, "step": 12602500 }, { "epoch": 62.44, "learning_rate": 1.8791394796946142e-05, "loss": 1.9905, "step": 12603000 }, { "epoch": 62.44, "learning_rate": 1.879015621052006e-05, "loss": 1.9752, "step": 12603500 }, { "epoch": 62.44, "learning_rate": 1.8788920101266828e-05, "loss": 2.0028, "step": 12604000 }, { "epoch": 62.45, "learning_rate": 1.8787681514840745e-05, "loss": 1.9842, "step": 12604500 }, { "epoch": 62.45, "learning_rate": 1.8786442928414662e-05, "loss": 1.9572, "step": 12605000 }, { "epoch": 62.45, "learning_rate": 1.878520434198858e-05, "loss": 1.9838, "step": 12605500 }, { "epoch": 62.45, "learning_rate": 1.8783965755562492e-05, "loss": 1.9809, "step": 12606000 }, { "epoch": 62.46, "learning_rate": 1.878272716913641e-05, "loss": 1.9703, "step": 12606500 }, { "epoch": 62.46, "learning_rate": 1.8781491059883178e-05, "loss": 1.9745, "step": 12607000 }, { "epoch": 62.46, "learning_rate": 1.8780252473457095e-05, "loss": 1.9688, "step": 12607500 }, { "epoch": 62.46, "learning_rate": 1.8779013887031012e-05, "loss": 1.9692, "step": 12608000 }, { "epoch": 62.47, "learning_rate": 1.877777530060493e-05, "loss": 1.9679, "step": 12608500 }, { "epoch": 62.47, "learning_rate": 1.8776536714178846e-05, "loss": 1.9681, "step": 12609000 }, { "epoch": 62.47, "learning_rate": 1.877530060492561e-05, "loss": 1.9718, "step": 12609500 }, { "epoch": 62.47, "learning_rate": 1.8774062018499528e-05, "loss": 1.9935, "step": 12610000 }, { "epoch": 62.48, "learning_rate": 1.8772823432073445e-05, "loss": 1.9852, "step": 12610500 }, { "epoch": 62.48, "learning_rate": 1.8771584845647362e-05, "loss": 1.9799, "step": 12611000 }, { "epoch": 62.48, "learning_rate": 1.877034625922128e-05, "loss": 1.9564, "step": 12611500 }, { "epoch": 62.48, "learning_rate": 1.8769107672795196e-05, "loss": 1.9868, "step": 12612000 }, { "epoch": 62.49, "learning_rate": 1.876786908636911e-05, "loss": 1.9795, "step": 12612500 }, { "epoch": 62.49, "learning_rate": 1.8766630499943026e-05, "loss": 1.9728, "step": 12613000 }, { "epoch": 62.49, "learning_rate": 1.8765391913516943e-05, "loss": 1.9757, "step": 12613500 }, { "epoch": 62.49, "learning_rate": 1.8764155804263712e-05, "loss": 1.9725, "step": 12614000 }, { "epoch": 62.5, "learning_rate": 1.876291721783763e-05, "loss": 1.9757, "step": 12614500 }, { "epoch": 62.5, "learning_rate": 1.8761681108584394e-05, "loss": 1.9761, "step": 12615000 }, { "epoch": 62.5, "learning_rate": 1.876044252215831e-05, "loss": 1.9832, "step": 12615500 }, { "epoch": 62.5, "learning_rate": 1.8759203935732228e-05, "loss": 1.989, "step": 12616000 }, { "epoch": 62.51, "learning_rate": 1.8757965349306145e-05, "loss": 1.9626, "step": 12616500 }, { "epoch": 62.51, "learning_rate": 1.8756726762880062e-05, "loss": 1.9795, "step": 12617000 }, { "epoch": 62.51, "learning_rate": 1.875548817645398e-05, "loss": 1.9763, "step": 12617500 }, { "epoch": 62.51, "learning_rate": 1.8754249590027896e-05, "loss": 1.9966, "step": 12618000 }, { "epoch": 62.52, "learning_rate": 1.8753011003601813e-05, "loss": 1.9862, "step": 12618500 }, { "epoch": 62.52, "learning_rate": 1.8751774894348578e-05, "loss": 1.9689, "step": 12619000 }, { "epoch": 62.52, "learning_rate": 1.8750536307922495e-05, "loss": 1.9644, "step": 12619500 }, { "epoch": 62.52, "learning_rate": 1.8749297721496412e-05, "loss": 1.9789, "step": 12620000 }, { "epoch": 62.53, "learning_rate": 1.874805913507033e-05, "loss": 2.0008, "step": 12620500 }, { "epoch": 62.53, "learning_rate": 1.8746820548644246e-05, "loss": 1.9792, "step": 12621000 }, { "epoch": 62.53, "learning_rate": 1.8745581962218163e-05, "loss": 1.9788, "step": 12621500 }, { "epoch": 62.53, "learning_rate": 1.8744345852964928e-05, "loss": 2.0028, "step": 12622000 }, { "epoch": 62.54, "learning_rate": 1.8743107266538845e-05, "loss": 1.9845, "step": 12622500 }, { "epoch": 62.54, "learning_rate": 1.8741868680112762e-05, "loss": 1.9602, "step": 12623000 }, { "epoch": 62.54, "learning_rate": 1.874063009368668e-05, "loss": 2.008, "step": 12623500 }, { "epoch": 62.54, "learning_rate": 1.8739391507260596e-05, "loss": 1.9827, "step": 12624000 }, { "epoch": 62.55, "learning_rate": 1.8738152920834513e-05, "loss": 1.9946, "step": 12624500 }, { "epoch": 62.55, "learning_rate": 1.873691433440843e-05, "loss": 1.9838, "step": 12625000 }, { "epoch": 62.55, "learning_rate": 1.8735675747982343e-05, "loss": 1.9928, "step": 12625500 }, { "epoch": 62.55, "learning_rate": 1.873443716155626e-05, "loss": 2.0081, "step": 12626000 }, { "epoch": 62.56, "learning_rate": 1.8733198575130177e-05, "loss": 1.9629, "step": 12626500 }, { "epoch": 62.56, "learning_rate": 1.8731959988704094e-05, "loss": 1.9781, "step": 12627000 }, { "epoch": 62.56, "learning_rate": 1.8730721402278008e-05, "loss": 1.9784, "step": 12627500 }, { "epoch": 62.56, "learning_rate": 1.872948777019763e-05, "loss": 1.991, "step": 12628000 }, { "epoch": 62.57, "learning_rate": 1.8728249183771545e-05, "loss": 2.0053, "step": 12628500 }, { "epoch": 62.57, "learning_rate": 1.8727013074518314e-05, "loss": 1.9737, "step": 12629000 }, { "epoch": 62.57, "learning_rate": 1.872577448809223e-05, "loss": 1.9778, "step": 12629500 }, { "epoch": 62.57, "learning_rate": 1.8724535901666148e-05, "loss": 1.962, "step": 12630000 }, { "epoch": 62.58, "learning_rate": 1.872329731524006e-05, "loss": 1.9788, "step": 12630500 }, { "epoch": 62.58, "learning_rate": 1.872205872881398e-05, "loss": 1.9694, "step": 12631000 }, { "epoch": 62.58, "learning_rate": 1.8720822619560747e-05, "loss": 1.9538, "step": 12631500 }, { "epoch": 62.58, "learning_rate": 1.8719584033134664e-05, "loss": 1.961, "step": 12632000 }, { "epoch": 62.59, "learning_rate": 1.871834544670858e-05, "loss": 1.9963, "step": 12632500 }, { "epoch": 62.59, "learning_rate": 1.8717106860282498e-05, "loss": 1.9827, "step": 12633000 }, { "epoch": 62.59, "learning_rate": 1.871586827385641e-05, "loss": 2.0113, "step": 12633500 }, { "epoch": 62.59, "learning_rate": 1.871462968743033e-05, "loss": 1.9849, "step": 12634000 }, { "epoch": 62.6, "learning_rate": 1.8713391101004245e-05, "loss": 1.9866, "step": 12634500 }, { "epoch": 62.6, "learning_rate": 1.8712152514578162e-05, "loss": 1.9848, "step": 12635000 }, { "epoch": 62.6, "learning_rate": 1.871091392815208e-05, "loss": 1.9598, "step": 12635500 }, { "epoch": 62.6, "learning_rate": 1.8709675341725996e-05, "loss": 1.9672, "step": 12636000 }, { "epoch": 62.61, "learning_rate": 1.8708436755299913e-05, "loss": 1.9938, "step": 12636500 }, { "epoch": 62.61, "learning_rate": 1.870719816887383e-05, "loss": 1.9616, "step": 12637000 }, { "epoch": 62.61, "learning_rate": 1.8705962059620595e-05, "loss": 1.9851, "step": 12637500 }, { "epoch": 62.61, "learning_rate": 1.8704723473194512e-05, "loss": 1.9569, "step": 12638000 }, { "epoch": 62.62, "learning_rate": 1.870348488676843e-05, "loss": 1.981, "step": 12638500 }, { "epoch": 62.62, "learning_rate": 1.8702248777515198e-05, "loss": 1.9823, "step": 12639000 }, { "epoch": 62.62, "learning_rate": 1.870101019108911e-05, "loss": 1.9869, "step": 12639500 }, { "epoch": 62.62, "learning_rate": 1.869977160466303e-05, "loss": 1.979, "step": 12640000 }, { "epoch": 62.63, "learning_rate": 1.8698533018236946e-05, "loss": 1.997, "step": 12640500 }, { "epoch": 62.63, "learning_rate": 1.8697294431810862e-05, "loss": 1.946, "step": 12641000 }, { "epoch": 62.63, "learning_rate": 1.869605584538478e-05, "loss": 1.9943, "step": 12641500 }, { "epoch": 62.63, "learning_rate": 1.8694817258958696e-05, "loss": 1.9951, "step": 12642000 }, { "epoch": 62.64, "learning_rate": 1.8693578672532613e-05, "loss": 1.9688, "step": 12642500 }, { "epoch": 62.64, "learning_rate": 1.869234008610653e-05, "loss": 1.9824, "step": 12643000 }, { "epoch": 62.64, "learning_rate": 1.8691101499680447e-05, "loss": 1.9739, "step": 12643500 }, { "epoch": 62.64, "learning_rate": 1.868986291325436e-05, "loss": 1.9851, "step": 12644000 }, { "epoch": 62.65, "learning_rate": 1.868862680400113e-05, "loss": 1.9623, "step": 12644500 }, { "epoch": 62.65, "learning_rate": 1.8687388217575046e-05, "loss": 1.988, "step": 12645000 }, { "epoch": 62.65, "learning_rate": 1.8686149631148963e-05, "loss": 1.9965, "step": 12645500 }, { "epoch": 62.65, "learning_rate": 1.868491104472288e-05, "loss": 2.0084, "step": 12646000 }, { "epoch": 62.66, "learning_rate": 1.8683672458296797e-05, "loss": 1.9724, "step": 12646500 }, { "epoch": 62.66, "learning_rate": 1.868243387187071e-05, "loss": 1.9772, "step": 12647000 }, { "epoch": 62.66, "learning_rate": 1.868119776261748e-05, "loss": 1.9798, "step": 12647500 }, { "epoch": 62.66, "learning_rate": 1.8679959176191396e-05, "loss": 1.9698, "step": 12648000 }, { "epoch": 62.67, "learning_rate": 1.8678720589765313e-05, "loss": 1.9917, "step": 12648500 }, { "epoch": 62.67, "learning_rate": 1.867748200333923e-05, "loss": 1.9708, "step": 12649000 }, { "epoch": 62.67, "learning_rate": 1.8676245894086e-05, "loss": 1.9863, "step": 12649500 }, { "epoch": 62.67, "learning_rate": 1.8675007307659913e-05, "loss": 1.9885, "step": 12650000 }, { "epoch": 62.67, "learning_rate": 1.867376872123383e-05, "loss": 1.9998, "step": 12650500 }, { "epoch": 62.68, "learning_rate": 1.8672530134807746e-05, "loss": 1.983, "step": 12651000 }, { "epoch": 62.68, "learning_rate": 1.8671291548381663e-05, "loss": 2.0045, "step": 12651500 }, { "epoch": 62.68, "learning_rate": 1.867005296195558e-05, "loss": 1.9934, "step": 12652000 }, { "epoch": 62.68, "learning_rate": 1.8668814375529497e-05, "loss": 1.9757, "step": 12652500 }, { "epoch": 62.69, "learning_rate": 1.8667575789103414e-05, "loss": 1.9826, "step": 12653000 }, { "epoch": 62.69, "learning_rate": 1.866633967985018e-05, "loss": 1.9923, "step": 12653500 }, { "epoch": 62.69, "learning_rate": 1.8665101093424097e-05, "loss": 1.9749, "step": 12654000 }, { "epoch": 62.69, "learning_rate": 1.866386498417087e-05, "loss": 2.0006, "step": 12654500 }, { "epoch": 62.7, "learning_rate": 1.8662626397744786e-05, "loss": 1.9729, "step": 12655000 }, { "epoch": 62.7, "learning_rate": 1.86613878113187e-05, "loss": 1.9563, "step": 12655500 }, { "epoch": 62.7, "learning_rate": 1.8660149224892616e-05, "loss": 1.9815, "step": 12656000 }, { "epoch": 62.7, "learning_rate": 1.865891063846653e-05, "loss": 1.9818, "step": 12656500 }, { "epoch": 62.71, "learning_rate": 1.8657672052040447e-05, "loss": 1.9911, "step": 12657000 }, { "epoch": 62.71, "learning_rate": 1.8656433465614364e-05, "loss": 1.9828, "step": 12657500 }, { "epoch": 62.71, "learning_rate": 1.8655197356361136e-05, "loss": 1.9645, "step": 12658000 }, { "epoch": 62.71, "learning_rate": 1.865395876993505e-05, "loss": 1.9823, "step": 12658500 }, { "epoch": 62.72, "learning_rate": 1.8652720183508966e-05, "loss": 1.9766, "step": 12659000 }, { "epoch": 62.72, "learning_rate": 1.8651481597082883e-05, "loss": 1.9479, "step": 12659500 }, { "epoch": 62.72, "learning_rate": 1.8650243010656797e-05, "loss": 1.9817, "step": 12660000 }, { "epoch": 62.72, "learning_rate": 1.8649004424230714e-05, "loss": 1.9971, "step": 12660500 }, { "epoch": 62.73, "learning_rate": 1.864776583780463e-05, "loss": 1.9766, "step": 12661000 }, { "epoch": 62.73, "learning_rate": 1.8646527251378547e-05, "loss": 1.988, "step": 12661500 }, { "epoch": 62.73, "learning_rate": 1.8645288664952464e-05, "loss": 1.9761, "step": 12662000 }, { "epoch": 62.73, "learning_rate": 1.864405007852638e-05, "loss": 1.9849, "step": 12662500 }, { "epoch": 62.74, "learning_rate": 1.8642811492100295e-05, "loss": 1.9754, "step": 12663000 }, { "epoch": 62.74, "learning_rate": 1.8641575382847064e-05, "loss": 1.9729, "step": 12663500 }, { "epoch": 62.74, "learning_rate": 1.864033679642098e-05, "loss": 1.9872, "step": 12664000 }, { "epoch": 62.74, "learning_rate": 1.8639098209994898e-05, "loss": 1.9975, "step": 12664500 }, { "epoch": 62.75, "learning_rate": 1.8637859623568814e-05, "loss": 1.968, "step": 12665000 }, { "epoch": 62.75, "learning_rate": 1.863662103714273e-05, "loss": 1.9763, "step": 12665500 }, { "epoch": 62.75, "learning_rate": 1.86353849278895e-05, "loss": 2.0049, "step": 12666000 }, { "epoch": 62.75, "learning_rate": 1.863414881863627e-05, "loss": 2.0013, "step": 12666500 }, { "epoch": 62.76, "learning_rate": 1.8632910232210186e-05, "loss": 1.9856, "step": 12667000 }, { "epoch": 62.76, "learning_rate": 1.8631671645784103e-05, "loss": 1.954, "step": 12667500 }, { "epoch": 62.76, "learning_rate": 1.8630433059358016e-05, "loss": 1.9753, "step": 12668000 }, { "epoch": 62.76, "learning_rate": 1.8629194472931933e-05, "loss": 1.9583, "step": 12668500 }, { "epoch": 62.77, "learning_rate": 1.8627958363678702e-05, "loss": 1.9761, "step": 12669000 }, { "epoch": 62.77, "learning_rate": 1.8626722254425468e-05, "loss": 2.0116, "step": 12669500 }, { "epoch": 62.77, "learning_rate": 1.862548614517224e-05, "loss": 1.9903, "step": 12670000 }, { "epoch": 62.77, "learning_rate": 1.8624247558746157e-05, "loss": 1.9857, "step": 12670500 }, { "epoch": 62.78, "learning_rate": 1.862300897232007e-05, "loss": 1.9735, "step": 12671000 }, { "epoch": 62.78, "learning_rate": 1.8621770385893987e-05, "loss": 1.9561, "step": 12671500 }, { "epoch": 62.78, "learning_rate": 1.8620531799467904e-05, "loss": 1.9567, "step": 12672000 }, { "epoch": 62.78, "learning_rate": 1.861929321304182e-05, "loss": 1.9916, "step": 12672500 }, { "epoch": 62.79, "learning_rate": 1.8618054626615735e-05, "loss": 1.9786, "step": 12673000 }, { "epoch": 62.79, "learning_rate": 1.861681604018965e-05, "loss": 1.9673, "step": 12673500 }, { "epoch": 62.79, "learning_rate": 1.861557745376357e-05, "loss": 1.9939, "step": 12674000 }, { "epoch": 62.79, "learning_rate": 1.8614341344510337e-05, "loss": 1.975, "step": 12674500 }, { "epoch": 62.8, "learning_rate": 1.8613102758084254e-05, "loss": 1.9903, "step": 12675000 }, { "epoch": 62.8, "learning_rate": 1.861186417165817e-05, "loss": 2.0003, "step": 12675500 }, { "epoch": 62.8, "learning_rate": 1.8610625585232085e-05, "loss": 1.9551, "step": 12676000 }, { "epoch": 62.8, "learning_rate": 1.8609386998806e-05, "loss": 1.97, "step": 12676500 }, { "epoch": 62.81, "learning_rate": 1.860814841237992e-05, "loss": 1.9775, "step": 12677000 }, { "epoch": 62.81, "learning_rate": 1.8606909825953835e-05, "loss": 1.9865, "step": 12677500 }, { "epoch": 62.81, "learning_rate": 1.8605671239527752e-05, "loss": 1.9708, "step": 12678000 }, { "epoch": 62.81, "learning_rate": 1.860443513027452e-05, "loss": 1.9877, "step": 12678500 }, { "epoch": 62.82, "learning_rate": 1.8603196543848435e-05, "loss": 1.9609, "step": 12679000 }, { "epoch": 62.82, "learning_rate": 1.860195795742235e-05, "loss": 1.9754, "step": 12679500 }, { "epoch": 62.82, "learning_rate": 1.860071937099627e-05, "loss": 1.9879, "step": 12680000 }, { "epoch": 62.82, "learning_rate": 1.8599480784570185e-05, "loss": 1.9859, "step": 12680500 }, { "epoch": 62.83, "learning_rate": 1.8598242198144102e-05, "loss": 1.9541, "step": 12681000 }, { "epoch": 62.83, "learning_rate": 1.859700608889087e-05, "loss": 1.9787, "step": 12681500 }, { "epoch": 62.83, "learning_rate": 1.8595767502464788e-05, "loss": 1.9669, "step": 12682000 }, { "epoch": 62.83, "learning_rate": 1.85945289160387e-05, "loss": 1.9759, "step": 12682500 }, { "epoch": 62.84, "learning_rate": 1.859329032961262e-05, "loss": 1.9929, "step": 12683000 }, { "epoch": 62.84, "learning_rate": 1.8592051743186536e-05, "loss": 1.9692, "step": 12683500 }, { "epoch": 62.84, "learning_rate": 1.8590813156760452e-05, "loss": 1.9811, "step": 12684000 }, { "epoch": 62.84, "learning_rate": 1.858957704750722e-05, "loss": 1.9873, "step": 12684500 }, { "epoch": 62.85, "learning_rate": 1.8588338461081138e-05, "loss": 1.9886, "step": 12685000 }, { "epoch": 62.85, "learning_rate": 1.8587099874655055e-05, "loss": 2.0022, "step": 12685500 }, { "epoch": 62.85, "learning_rate": 1.858586128822897e-05, "loss": 1.9757, "step": 12686000 }, { "epoch": 62.85, "learning_rate": 1.8584622701802886e-05, "loss": 1.9802, "step": 12686500 }, { "epoch": 62.86, "learning_rate": 1.8583384115376803e-05, "loss": 1.9991, "step": 12687000 }, { "epoch": 62.86, "learning_rate": 1.858214552895072e-05, "loss": 1.9833, "step": 12687500 }, { "epoch": 62.86, "learning_rate": 1.8580909419697488e-05, "loss": 1.9732, "step": 12688000 }, { "epoch": 62.86, "learning_rate": 1.8579670833271405e-05, "loss": 1.9717, "step": 12688500 }, { "epoch": 62.87, "learning_rate": 1.8578434724018174e-05, "loss": 1.9701, "step": 12689000 }, { "epoch": 62.87, "learning_rate": 1.857719613759209e-05, "loss": 1.9962, "step": 12689500 }, { "epoch": 62.87, "learning_rate": 1.8575957551166008e-05, "loss": 1.9659, "step": 12690000 }, { "epoch": 62.87, "learning_rate": 1.8574718964739925e-05, "loss": 1.9859, "step": 12690500 }, { "epoch": 62.88, "learning_rate": 1.857348037831384e-05, "loss": 1.9821, "step": 12691000 }, { "epoch": 62.88, "learning_rate": 1.8572241791887755e-05, "loss": 1.971, "step": 12691500 }, { "epoch": 62.88, "learning_rate": 1.8571003205461672e-05, "loss": 1.986, "step": 12692000 }, { "epoch": 62.88, "learning_rate": 1.8569764619035586e-05, "loss": 1.9963, "step": 12692500 }, { "epoch": 62.89, "learning_rate": 1.8568526032609503e-05, "loss": 1.9684, "step": 12693000 }, { "epoch": 62.89, "learning_rate": 1.856728744618342e-05, "loss": 1.9609, "step": 12693500 }, { "epoch": 62.89, "learning_rate": 1.8566051336930192e-05, "loss": 1.9949, "step": 12694000 }, { "epoch": 62.89, "learning_rate": 1.8564812750504105e-05, "loss": 1.9776, "step": 12694500 }, { "epoch": 62.9, "learning_rate": 1.8563574164078022e-05, "loss": 1.9991, "step": 12695000 }, { "epoch": 62.9, "learning_rate": 1.856233557765194e-05, "loss": 1.9542, "step": 12695500 }, { "epoch": 62.9, "learning_rate": 1.8561096991225853e-05, "loss": 1.9738, "step": 12696000 }, { "epoch": 62.9, "learning_rate": 1.855985840479977e-05, "loss": 1.979, "step": 12696500 }, { "epoch": 62.91, "learning_rate": 1.8558619818373687e-05, "loss": 2.0093, "step": 12697000 }, { "epoch": 62.91, "learning_rate": 1.855738370912046e-05, "loss": 1.9767, "step": 12697500 }, { "epoch": 62.91, "learning_rate": 1.8556147599867224e-05, "loss": 2.0084, "step": 12698000 }, { "epoch": 62.91, "learning_rate": 1.855490901344114e-05, "loss": 1.9928, "step": 12698500 }, { "epoch": 62.92, "learning_rate": 1.8553670427015058e-05, "loss": 1.9871, "step": 12699000 }, { "epoch": 62.92, "learning_rate": 1.8552431840588975e-05, "loss": 1.9603, "step": 12699500 }, { "epoch": 62.92, "learning_rate": 1.8551193254162892e-05, "loss": 1.9794, "step": 12700000 }, { "epoch": 62.92, "learning_rate": 1.854995466773681e-05, "loss": 1.9793, "step": 12700500 }, { "epoch": 62.93, "learning_rate": 1.8548716081310722e-05, "loss": 1.9997, "step": 12701000 }, { "epoch": 62.93, "learning_rate": 1.854747749488464e-05, "loss": 1.9682, "step": 12701500 }, { "epoch": 62.93, "learning_rate": 1.8546238908458556e-05, "loss": 1.982, "step": 12702000 }, { "epoch": 62.93, "learning_rate": 1.8545002799205325e-05, "loss": 1.9732, "step": 12702500 }, { "epoch": 62.94, "learning_rate": 1.8543764212779242e-05, "loss": 1.9606, "step": 12703000 }, { "epoch": 62.94, "learning_rate": 1.854252562635316e-05, "loss": 1.969, "step": 12703500 }, { "epoch": 62.94, "learning_rate": 1.8541287039927072e-05, "loss": 2.0214, "step": 12704000 }, { "epoch": 62.94, "learning_rate": 1.854004845350099e-05, "loss": 1.977, "step": 12704500 }, { "epoch": 62.94, "learning_rate": 1.8538809867074906e-05, "loss": 1.9719, "step": 12705000 }, { "epoch": 62.95, "learning_rate": 1.8537571280648823e-05, "loss": 1.9911, "step": 12705500 }, { "epoch": 62.95, "learning_rate": 1.8536332694222737e-05, "loss": 1.974, "step": 12706000 }, { "epoch": 62.95, "learning_rate": 1.8535094107796654e-05, "loss": 1.9919, "step": 12706500 }, { "epoch": 62.95, "learning_rate": 1.8533857998543426e-05, "loss": 1.9967, "step": 12707000 }, { "epoch": 62.96, "learning_rate": 1.853261941211734e-05, "loss": 1.987, "step": 12707500 }, { "epoch": 62.96, "learning_rate": 1.8531380825691256e-05, "loss": 1.9668, "step": 12708000 }, { "epoch": 62.96, "learning_rate": 1.8530142239265173e-05, "loss": 1.9688, "step": 12708500 }, { "epoch": 62.96, "learning_rate": 1.852890365283909e-05, "loss": 1.9766, "step": 12709000 }, { "epoch": 62.97, "learning_rate": 1.8527670020758707e-05, "loss": 2.0038, "step": 12709500 }, { "epoch": 62.97, "learning_rate": 1.8526431434332624e-05, "loss": 1.9806, "step": 12710000 }, { "epoch": 62.97, "learning_rate": 1.852519284790654e-05, "loss": 1.9554, "step": 12710500 }, { "epoch": 62.97, "learning_rate": 1.8523954261480458e-05, "loss": 1.9733, "step": 12711000 }, { "epoch": 62.98, "learning_rate": 1.8522715675054375e-05, "loss": 1.9653, "step": 12711500 }, { "epoch": 62.98, "learning_rate": 1.8521477088628292e-05, "loss": 1.9816, "step": 12712000 }, { "epoch": 62.98, "learning_rate": 1.852023850220221e-05, "loss": 1.9949, "step": 12712500 }, { "epoch": 62.98, "learning_rate": 1.8518999915776126e-05, "loss": 1.9773, "step": 12713000 }, { "epoch": 62.99, "learning_rate": 1.851776132935004e-05, "loss": 1.963, "step": 12713500 }, { "epoch": 62.99, "learning_rate": 1.851652522009681e-05, "loss": 1.991, "step": 12714000 }, { "epoch": 62.99, "learning_rate": 1.8515286633670725e-05, "loss": 1.9834, "step": 12714500 }, { "epoch": 62.99, "learning_rate": 1.8514048047244642e-05, "loss": 1.9903, "step": 12715000 }, { "epoch": 63.0, "learning_rate": 1.851280946081856e-05, "loss": 1.9887, "step": 12715500 }, { "epoch": 63.0, "learning_rate": 1.8511570874392476e-05, "loss": 1.9981, "step": 12716000 }, { "epoch": 63.0, "eval_accuracy": 0.674643994406117, "eval_accuracy_mlm": 0.63403299312109, "eval_accuracy_nsp": 0.8659313850462231, "eval_loss": 2.3052685260772705, "eval_runtime": 147.0673, "eval_samples_per_second": 1733.621, "eval_steps_per_second": 72.239, "step": 12716109 }, { "epoch": 63.0, "learning_rate": 1.851033476513924e-05, "loss": 1.971, "step": 12716500 }, { "epoch": 63.0, "learning_rate": 1.850909617871316e-05, "loss": 1.9599, "step": 12717000 }, { "epoch": 63.01, "learning_rate": 1.8507857592287075e-05, "loss": 1.9613, "step": 12717500 }, { "epoch": 63.01, "learning_rate": 1.8506619005860992e-05, "loss": 1.9589, "step": 12718000 }, { "epoch": 63.01, "learning_rate": 1.850538041943491e-05, "loss": 1.9863, "step": 12718500 }, { "epoch": 63.01, "learning_rate": 1.8504141833008826e-05, "loss": 1.9505, "step": 12719000 }, { "epoch": 63.02, "learning_rate": 1.850290572375559e-05, "loss": 1.9948, "step": 12719500 }, { "epoch": 63.02, "learning_rate": 1.850166713732951e-05, "loss": 1.9713, "step": 12720000 }, { "epoch": 63.02, "learning_rate": 1.8500428550903425e-05, "loss": 1.9646, "step": 12720500 }, { "epoch": 63.02, "learning_rate": 1.8499189964477342e-05, "loss": 1.9447, "step": 12721000 }, { "epoch": 63.03, "learning_rate": 1.8497953855224108e-05, "loss": 1.9378, "step": 12721500 }, { "epoch": 63.03, "learning_rate": 1.8496715268798025e-05, "loss": 1.9652, "step": 12722000 }, { "epoch": 63.03, "learning_rate": 1.849547668237194e-05, "loss": 1.9719, "step": 12722500 }, { "epoch": 63.03, "learning_rate": 1.849423809594586e-05, "loss": 1.9854, "step": 12723000 }, { "epoch": 63.04, "learning_rate": 1.849300198669263e-05, "loss": 1.9442, "step": 12723500 }, { "epoch": 63.04, "learning_rate": 1.8491763400266544e-05, "loss": 1.9327, "step": 12724000 }, { "epoch": 63.04, "learning_rate": 1.849052481384046e-05, "loss": 1.9827, "step": 12724500 }, { "epoch": 63.04, "learning_rate": 1.848928870458723e-05, "loss": 1.9692, "step": 12725000 }, { "epoch": 63.05, "learning_rate": 1.8488050118161147e-05, "loss": 1.9757, "step": 12725500 }, { "epoch": 63.05, "learning_rate": 1.8486811531735064e-05, "loss": 1.975, "step": 12726000 }, { "epoch": 63.05, "learning_rate": 1.848557294530898e-05, "loss": 1.9572, "step": 12726500 }, { "epoch": 63.05, "learning_rate": 1.8484334358882894e-05, "loss": 1.9825, "step": 12727000 }, { "epoch": 63.06, "learning_rate": 1.848309577245681e-05, "loss": 1.9537, "step": 12727500 }, { "epoch": 63.06, "learning_rate": 1.848185966320358e-05, "loss": 1.9546, "step": 12728000 }, { "epoch": 63.06, "learning_rate": 1.848062355395035e-05, "loss": 1.9537, "step": 12728500 }, { "epoch": 63.06, "learning_rate": 1.8479387444697118e-05, "loss": 1.9587, "step": 12729000 }, { "epoch": 63.07, "learning_rate": 1.847814885827103e-05, "loss": 1.9678, "step": 12729500 }, { "epoch": 63.07, "learning_rate": 1.8476910271844948e-05, "loss": 1.9612, "step": 12730000 }, { "epoch": 63.07, "learning_rate": 1.8475671685418865e-05, "loss": 1.955, "step": 12730500 }, { "epoch": 63.07, "learning_rate": 1.8474433098992782e-05, "loss": 1.9664, "step": 12731000 }, { "epoch": 63.08, "learning_rate": 1.84731945125667e-05, "loss": 1.9594, "step": 12731500 }, { "epoch": 63.08, "learning_rate": 1.8471955926140616e-05, "loss": 1.9688, "step": 12732000 }, { "epoch": 63.08, "learning_rate": 1.8470717339714533e-05, "loss": 1.9475, "step": 12732500 }, { "epoch": 63.08, "learning_rate": 1.8469478753288446e-05, "loss": 1.9526, "step": 12733000 }, { "epoch": 63.09, "learning_rate": 1.8468240166862363e-05, "loss": 1.9763, "step": 12733500 }, { "epoch": 63.09, "learning_rate": 1.8467004057609132e-05, "loss": 1.9649, "step": 12734000 }, { "epoch": 63.09, "learning_rate": 1.846576547118305e-05, "loss": 1.9762, "step": 12734500 }, { "epoch": 63.09, "learning_rate": 1.8464526884756966e-05, "loss": 1.9792, "step": 12735000 }, { "epoch": 63.1, "learning_rate": 1.8463288298330883e-05, "loss": 1.944, "step": 12735500 }, { "epoch": 63.1, "learning_rate": 1.8462049711904796e-05, "loss": 1.9608, "step": 12736000 }, { "epoch": 63.1, "learning_rate": 1.8460811125478713e-05, "loss": 1.9705, "step": 12736500 }, { "epoch": 63.1, "learning_rate": 1.845957253905263e-05, "loss": 1.9806, "step": 12737000 }, { "epoch": 63.11, "learning_rate": 1.84583364297994e-05, "loss": 1.9514, "step": 12737500 }, { "epoch": 63.11, "learning_rate": 1.8457097843373316e-05, "loss": 1.985, "step": 12738000 }, { "epoch": 63.11, "learning_rate": 1.8455859256947233e-05, "loss": 1.9814, "step": 12738500 }, { "epoch": 63.11, "learning_rate": 1.8454620670521146e-05, "loss": 1.944, "step": 12739000 }, { "epoch": 63.12, "learning_rate": 1.8453382084095063e-05, "loss": 1.9591, "step": 12739500 }, { "epoch": 63.12, "learning_rate": 1.845214349766898e-05, "loss": 1.9651, "step": 12740000 }, { "epoch": 63.12, "learning_rate": 1.8450904911242897e-05, "loss": 1.9525, "step": 12740500 }, { "epoch": 63.12, "learning_rate": 1.8449666324816814e-05, "loss": 1.9726, "step": 12741000 }, { "epoch": 63.13, "learning_rate": 1.8448430215563583e-05, "loss": 1.9661, "step": 12741500 }, { "epoch": 63.13, "learning_rate": 1.84471916291375e-05, "loss": 1.9452, "step": 12742000 }, { "epoch": 63.13, "learning_rate": 1.8445953042711413e-05, "loss": 1.9576, "step": 12742500 }, { "epoch": 63.13, "learning_rate": 1.844471445628533e-05, "loss": 1.9695, "step": 12743000 }, { "epoch": 63.14, "learning_rate": 1.84434783470321e-05, "loss": 1.9601, "step": 12743500 }, { "epoch": 63.14, "learning_rate": 1.8442239760606016e-05, "loss": 1.9769, "step": 12744000 }, { "epoch": 63.14, "learning_rate": 1.8441001174179933e-05, "loss": 1.959, "step": 12744500 }, { "epoch": 63.14, "learning_rate": 1.8439765064926702e-05, "loss": 1.9583, "step": 12745000 }, { "epoch": 63.15, "learning_rate": 1.843852647850062e-05, "loss": 1.9775, "step": 12745500 }, { "epoch": 63.15, "learning_rate": 1.8437287892074532e-05, "loss": 1.999, "step": 12746000 }, { "epoch": 63.15, "learning_rate": 1.843604930564845e-05, "loss": 1.9709, "step": 12746500 }, { "epoch": 63.15, "learning_rate": 1.8434810719222366e-05, "loss": 1.9682, "step": 12747000 }, { "epoch": 63.16, "learning_rate": 1.8433572132796283e-05, "loss": 1.9855, "step": 12747500 }, { "epoch": 63.16, "learning_rate": 1.84323335463702e-05, "loss": 1.9811, "step": 12748000 }, { "epoch": 63.16, "learning_rate": 1.8431094959944114e-05, "loss": 1.9542, "step": 12748500 }, { "epoch": 63.16, "learning_rate": 1.842985637351803e-05, "loss": 1.956, "step": 12749000 }, { "epoch": 63.17, "learning_rate": 1.8428617787091947e-05, "loss": 1.9596, "step": 12749500 }, { "epoch": 63.17, "learning_rate": 1.8427379200665864e-05, "loss": 1.9572, "step": 12750000 }, { "epoch": 63.17, "learning_rate": 1.8426143091412633e-05, "loss": 1.9764, "step": 12750500 }, { "epoch": 63.17, "learning_rate": 1.842490450498655e-05, "loss": 1.9595, "step": 12751000 }, { "epoch": 63.18, "learning_rate": 1.842366839573332e-05, "loss": 1.9588, "step": 12751500 }, { "epoch": 63.18, "learning_rate": 1.8422429809307236e-05, "loss": 1.9851, "step": 12752000 }, { "epoch": 63.18, "learning_rate": 1.8421191222881153e-05, "loss": 1.9483, "step": 12752500 }, { "epoch": 63.18, "learning_rate": 1.8419952636455066e-05, "loss": 1.9868, "step": 12753000 }, { "epoch": 63.19, "learning_rate": 1.8418714050028983e-05, "loss": 1.9362, "step": 12753500 }, { "epoch": 63.19, "learning_rate": 1.84174754636029e-05, "loss": 1.9923, "step": 12754000 }, { "epoch": 63.19, "learning_rate": 1.8416236877176817e-05, "loss": 1.9794, "step": 12754500 }, { "epoch": 63.19, "learning_rate": 1.841499829075073e-05, "loss": 1.979, "step": 12755000 }, { "epoch": 63.2, "learning_rate": 1.8413759704324648e-05, "loss": 1.9715, "step": 12755500 }, { "epoch": 63.2, "learning_rate": 1.8412521117898564e-05, "loss": 1.9526, "step": 12756000 }, { "epoch": 63.2, "learning_rate": 1.841128253147248e-05, "loss": 1.9672, "step": 12756500 }, { "epoch": 63.2, "learning_rate": 1.84100439450464e-05, "loss": 1.947, "step": 12757000 }, { "epoch": 63.21, "learning_rate": 1.8408805358620315e-05, "loss": 2.0009, "step": 12757500 }, { "epoch": 63.21, "learning_rate": 1.8407566772194232e-05, "loss": 1.9554, "step": 12758000 }, { "epoch": 63.21, "learning_rate": 1.840632818576815e-05, "loss": 1.9503, "step": 12758500 }, { "epoch": 63.21, "learning_rate": 1.8405089599342063e-05, "loss": 1.9486, "step": 12759000 }, { "epoch": 63.21, "learning_rate": 1.840385349008883e-05, "loss": 1.9514, "step": 12759500 }, { "epoch": 63.22, "learning_rate": 1.840261490366275e-05, "loss": 1.9807, "step": 12760000 }, { "epoch": 63.22, "learning_rate": 1.8401376317236665e-05, "loss": 1.992, "step": 12760500 }, { "epoch": 63.22, "learning_rate": 1.8400137730810582e-05, "loss": 1.9643, "step": 12761000 }, { "epoch": 63.22, "learning_rate": 1.83988991443845e-05, "loss": 1.9278, "step": 12761500 }, { "epoch": 63.23, "learning_rate": 1.8397663035131265e-05, "loss": 1.9634, "step": 12762000 }, { "epoch": 63.23, "learning_rate": 1.839642444870518e-05, "loss": 1.9628, "step": 12762500 }, { "epoch": 63.23, "learning_rate": 1.83951858622791e-05, "loss": 1.9598, "step": 12763000 }, { "epoch": 63.23, "learning_rate": 1.8393947275853015e-05, "loss": 1.9813, "step": 12763500 }, { "epoch": 63.24, "learning_rate": 1.8392711166599784e-05, "loss": 1.9758, "step": 12764000 }, { "epoch": 63.24, "learning_rate": 1.8391472580173698e-05, "loss": 1.9823, "step": 12764500 }, { "epoch": 63.24, "learning_rate": 1.8390233993747615e-05, "loss": 1.9572, "step": 12765000 }, { "epoch": 63.24, "learning_rate": 1.838899540732153e-05, "loss": 1.9624, "step": 12765500 }, { "epoch": 63.25, "learning_rate": 1.8387759298068304e-05, "loss": 1.9627, "step": 12766000 }, { "epoch": 63.25, "learning_rate": 1.8386520711642217e-05, "loss": 1.9574, "step": 12766500 }, { "epoch": 63.25, "learning_rate": 1.8385282125216134e-05, "loss": 1.9695, "step": 12767000 }, { "epoch": 63.25, "learning_rate": 1.8384043538790048e-05, "loss": 1.9687, "step": 12767500 }, { "epoch": 63.26, "learning_rate": 1.8382804952363965e-05, "loss": 1.9756, "step": 12768000 }, { "epoch": 63.26, "learning_rate": 1.838156636593788e-05, "loss": 1.9699, "step": 12768500 }, { "epoch": 63.26, "learning_rate": 1.83803277795118e-05, "loss": 1.9828, "step": 12769000 }, { "epoch": 63.26, "learning_rate": 1.8379089193085716e-05, "loss": 1.9467, "step": 12769500 }, { "epoch": 63.27, "learning_rate": 1.8377850606659632e-05, "loss": 1.9594, "step": 12770000 }, { "epoch": 63.27, "learning_rate": 1.837661202023355e-05, "loss": 1.9748, "step": 12770500 }, { "epoch": 63.27, "learning_rate": 1.8375373433807466e-05, "loss": 1.9797, "step": 12771000 }, { "epoch": 63.27, "learning_rate": 1.837413484738138e-05, "loss": 1.9755, "step": 12771500 }, { "epoch": 63.28, "learning_rate": 1.8372896260955297e-05, "loss": 1.9551, "step": 12772000 }, { "epoch": 63.28, "learning_rate": 1.8371657674529214e-05, "loss": 1.9969, "step": 12772500 }, { "epoch": 63.28, "learning_rate": 1.8370421565275982e-05, "loss": 1.9673, "step": 12773000 }, { "epoch": 63.28, "learning_rate": 1.83691829788499e-05, "loss": 1.9613, "step": 12773500 }, { "epoch": 63.29, "learning_rate": 1.8367946869596668e-05, "loss": 1.9772, "step": 12774000 }, { "epoch": 63.29, "learning_rate": 1.8366708283170582e-05, "loss": 1.9759, "step": 12774500 }, { "epoch": 63.29, "learning_rate": 1.8365472173917354e-05, "loss": 1.966, "step": 12775000 }, { "epoch": 63.29, "learning_rate": 1.836423358749127e-05, "loss": 1.9599, "step": 12775500 }, { "epoch": 63.3, "learning_rate": 1.8362995001065188e-05, "loss": 1.9571, "step": 12776000 }, { "epoch": 63.3, "learning_rate": 1.83617564146391e-05, "loss": 1.9812, "step": 12776500 }, { "epoch": 63.3, "learning_rate": 1.8360517828213018e-05, "loss": 1.9737, "step": 12777000 }, { "epoch": 63.3, "learning_rate": 1.8359279241786935e-05, "loss": 1.9466, "step": 12777500 }, { "epoch": 63.31, "learning_rate": 1.835804065536085e-05, "loss": 1.9698, "step": 12778000 }, { "epoch": 63.31, "learning_rate": 1.8356802068934766e-05, "loss": 1.9769, "step": 12778500 }, { "epoch": 63.31, "learning_rate": 1.8355563482508683e-05, "loss": 1.9393, "step": 12779000 }, { "epoch": 63.31, "learning_rate": 1.83543248960826e-05, "loss": 1.9775, "step": 12779500 }, { "epoch": 63.32, "learning_rate": 1.8353086309656516e-05, "loss": 1.9754, "step": 12780000 }, { "epoch": 63.32, "learning_rate": 1.8351850200403285e-05, "loss": 1.9652, "step": 12780500 }, { "epoch": 63.32, "learning_rate": 1.8350611613977202e-05, "loss": 1.9766, "step": 12781000 }, { "epoch": 63.32, "learning_rate": 1.8349373027551116e-05, "loss": 1.9569, "step": 12781500 }, { "epoch": 63.33, "learning_rate": 1.8348134441125033e-05, "loss": 1.9754, "step": 12782000 }, { "epoch": 63.33, "learning_rate": 1.8346898331871805e-05, "loss": 1.9882, "step": 12782500 }, { "epoch": 63.33, "learning_rate": 1.834565974544572e-05, "loss": 1.9601, "step": 12783000 }, { "epoch": 63.33, "learning_rate": 1.8344421159019635e-05, "loss": 1.9691, "step": 12783500 }, { "epoch": 63.34, "learning_rate": 1.8343182572593552e-05, "loss": 1.954, "step": 12784000 }, { "epoch": 63.34, "learning_rate": 1.8341943986167466e-05, "loss": 1.996, "step": 12784500 }, { "epoch": 63.34, "learning_rate": 1.8340705399741383e-05, "loss": 1.9849, "step": 12785000 }, { "epoch": 63.34, "learning_rate": 1.8339469290488155e-05, "loss": 1.9615, "step": 12785500 }, { "epoch": 63.35, "learning_rate": 1.833823070406207e-05, "loss": 1.9612, "step": 12786000 }, { "epoch": 63.35, "learning_rate": 1.8336992117635985e-05, "loss": 1.9708, "step": 12786500 }, { "epoch": 63.35, "learning_rate": 1.8335753531209902e-05, "loss": 1.9593, "step": 12787000 }, { "epoch": 63.35, "learning_rate": 1.833451494478382e-05, "loss": 1.9556, "step": 12787500 }, { "epoch": 63.36, "learning_rate": 1.8333276358357733e-05, "loss": 1.992, "step": 12788000 }, { "epoch": 63.36, "learning_rate": 1.833203777193165e-05, "loss": 1.9656, "step": 12788500 }, { "epoch": 63.36, "learning_rate": 1.8330799185505567e-05, "loss": 1.9596, "step": 12789000 }, { "epoch": 63.36, "learning_rate": 1.8329560599079484e-05, "loss": 1.9627, "step": 12789500 }, { "epoch": 63.37, "learning_rate": 1.8328324489826252e-05, "loss": 1.9743, "step": 12790000 }, { "epoch": 63.37, "learning_rate": 1.832708590340017e-05, "loss": 1.9749, "step": 12790500 }, { "epoch": 63.37, "learning_rate": 1.8325847316974086e-05, "loss": 1.9598, "step": 12791000 }, { "epoch": 63.37, "learning_rate": 1.8324608730548e-05, "loss": 1.9689, "step": 12791500 }, { "epoch": 63.38, "learning_rate": 1.8323370144121917e-05, "loss": 1.9703, "step": 12792000 }, { "epoch": 63.38, "learning_rate": 1.8322131557695834e-05, "loss": 1.9682, "step": 12792500 }, { "epoch": 63.38, "learning_rate": 1.832089297126975e-05, "loss": 1.9636, "step": 12793000 }, { "epoch": 63.38, "learning_rate": 1.8319654384843664e-05, "loss": 1.9714, "step": 12793500 }, { "epoch": 63.39, "learning_rate": 1.8318418275590436e-05, "loss": 1.9598, "step": 12794000 }, { "epoch": 63.39, "learning_rate": 1.8317179689164353e-05, "loss": 1.9474, "step": 12794500 }, { "epoch": 63.39, "learning_rate": 1.8315941102738267e-05, "loss": 1.99, "step": 12795000 }, { "epoch": 63.39, "learning_rate": 1.8314702516312184e-05, "loss": 1.9502, "step": 12795500 }, { "epoch": 63.4, "learning_rate": 1.8313466407058953e-05, "loss": 2.0098, "step": 12796000 }, { "epoch": 63.4, "learning_rate": 1.831222782063287e-05, "loss": 1.9747, "step": 12796500 }, { "epoch": 63.4, "learning_rate": 1.8310989234206786e-05, "loss": 1.9802, "step": 12797000 }, { "epoch": 63.4, "learning_rate": 1.8309753124953555e-05, "loss": 1.9662, "step": 12797500 }, { "epoch": 63.41, "learning_rate": 1.8308514538527472e-05, "loss": 1.9624, "step": 12798000 }, { "epoch": 63.41, "learning_rate": 1.8307275952101386e-05, "loss": 1.9356, "step": 12798500 }, { "epoch": 63.41, "learning_rate": 1.8306037365675303e-05, "loss": 1.9952, "step": 12799000 }, { "epoch": 63.41, "learning_rate": 1.830479877924922e-05, "loss": 1.9742, "step": 12799500 }, { "epoch": 63.42, "learning_rate": 1.8303560192823136e-05, "loss": 1.9817, "step": 12800000 }, { "epoch": 63.42, "learning_rate": 1.8302321606397053e-05, "loss": 1.9599, "step": 12800500 }, { "epoch": 63.42, "learning_rate": 1.830108301997097e-05, "loss": 1.9618, "step": 12801000 }, { "epoch": 63.42, "learning_rate": 1.8299846910717736e-05, "loss": 1.9764, "step": 12801500 }, { "epoch": 63.43, "learning_rate": 1.8298608324291653e-05, "loss": 1.9812, "step": 12802000 }, { "epoch": 63.43, "learning_rate": 1.829737221503842e-05, "loss": 1.9816, "step": 12802500 }, { "epoch": 63.43, "learning_rate": 1.829613362861234e-05, "loss": 1.9706, "step": 12803000 }, { "epoch": 63.43, "learning_rate": 1.8294895042186255e-05, "loss": 1.982, "step": 12803500 }, { "epoch": 63.44, "learning_rate": 1.8293656455760172e-05, "loss": 1.991, "step": 12804000 }, { "epoch": 63.44, "learning_rate": 1.829241786933409e-05, "loss": 1.9832, "step": 12804500 }, { "epoch": 63.44, "learning_rate": 1.8291179282908003e-05, "loss": 1.9897, "step": 12805000 }, { "epoch": 63.44, "learning_rate": 1.828994069648192e-05, "loss": 1.9635, "step": 12805500 }, { "epoch": 63.45, "learning_rate": 1.8288702110055837e-05, "loss": 1.9897, "step": 12806000 }, { "epoch": 63.45, "learning_rate": 1.8287463523629753e-05, "loss": 1.9554, "step": 12806500 }, { "epoch": 63.45, "learning_rate": 1.828622493720367e-05, "loss": 1.9739, "step": 12807000 }, { "epoch": 63.45, "learning_rate": 1.8284986350777587e-05, "loss": 1.9605, "step": 12807500 }, { "epoch": 63.46, "learning_rate": 1.8283747764351504e-05, "loss": 1.96, "step": 12808000 }, { "epoch": 63.46, "learning_rate": 1.8282509177925418e-05, "loss": 1.9916, "step": 12808500 }, { "epoch": 63.46, "learning_rate": 1.8281273068672187e-05, "loss": 1.9722, "step": 12809000 }, { "epoch": 63.46, "learning_rate": 1.8280034482246104e-05, "loss": 1.9857, "step": 12809500 }, { "epoch": 63.47, "learning_rate": 1.827879589582002e-05, "loss": 1.9811, "step": 12810000 }, { "epoch": 63.47, "learning_rate": 1.8277557309393937e-05, "loss": 1.9688, "step": 12810500 }, { "epoch": 63.47, "learning_rate": 1.8276318722967854e-05, "loss": 1.986, "step": 12811000 }, { "epoch": 63.47, "learning_rate": 1.8275080136541768e-05, "loss": 1.9457, "step": 12811500 }, { "epoch": 63.48, "learning_rate": 1.8273841550115685e-05, "loss": 1.9644, "step": 12812000 }, { "epoch": 63.48, "learning_rate": 1.8272605440862454e-05, "loss": 1.9794, "step": 12812500 }, { "epoch": 63.48, "learning_rate": 1.827136685443637e-05, "loss": 1.9434, "step": 12813000 }, { "epoch": 63.48, "learning_rate": 1.8270128268010287e-05, "loss": 1.9919, "step": 12813500 }, { "epoch": 63.48, "learning_rate": 1.8268889681584204e-05, "loss": 1.9365, "step": 12814000 }, { "epoch": 63.49, "learning_rate": 1.826765109515812e-05, "loss": 1.9789, "step": 12814500 }, { "epoch": 63.49, "learning_rate": 1.826641746307774e-05, "loss": 1.9966, "step": 12815000 }, { "epoch": 63.49, "learning_rate": 1.8265178876651656e-05, "loss": 1.9463, "step": 12815500 }, { "epoch": 63.49, "learning_rate": 1.8263940290225572e-05, "loss": 1.9935, "step": 12816000 }, { "epoch": 63.5, "learning_rate": 1.826270170379949e-05, "loss": 1.9769, "step": 12816500 }, { "epoch": 63.5, "learning_rate": 1.8261463117373406e-05, "loss": 1.9923, "step": 12817000 }, { "epoch": 63.5, "learning_rate": 1.826022453094732e-05, "loss": 1.9695, "step": 12817500 }, { "epoch": 63.5, "learning_rate": 1.8258985944521237e-05, "loss": 1.9656, "step": 12818000 }, { "epoch": 63.51, "learning_rate": 1.8257749835268006e-05, "loss": 1.9645, "step": 12818500 }, { "epoch": 63.51, "learning_rate": 1.8256511248841923e-05, "loss": 1.9393, "step": 12819000 }, { "epoch": 63.51, "learning_rate": 1.825527266241584e-05, "loss": 1.9691, "step": 12819500 }, { "epoch": 63.51, "learning_rate": 1.8254034075989756e-05, "loss": 1.9765, "step": 12820000 }, { "epoch": 63.52, "learning_rate": 1.825279548956367e-05, "loss": 1.969, "step": 12820500 }, { "epoch": 63.52, "learning_rate": 1.8251556903137587e-05, "loss": 1.9624, "step": 12821000 }, { "epoch": 63.52, "learning_rate": 1.8250320793884356e-05, "loss": 1.9629, "step": 12821500 }, { "epoch": 63.52, "learning_rate": 1.8249082207458273e-05, "loss": 1.9475, "step": 12822000 }, { "epoch": 63.53, "learning_rate": 1.824784362103219e-05, "loss": 1.9865, "step": 12822500 }, { "epoch": 63.53, "learning_rate": 1.8246605034606106e-05, "loss": 1.9505, "step": 12823000 }, { "epoch": 63.53, "learning_rate": 1.8245368925352875e-05, "loss": 1.9898, "step": 12823500 }, { "epoch": 63.53, "learning_rate": 1.824413033892679e-05, "loss": 1.9564, "step": 12824000 }, { "epoch": 63.54, "learning_rate": 1.8242891752500706e-05, "loss": 1.9672, "step": 12824500 }, { "epoch": 63.54, "learning_rate": 1.8241653166074623e-05, "loss": 1.9798, "step": 12825000 }, { "epoch": 63.54, "learning_rate": 1.824041705682139e-05, "loss": 1.9616, "step": 12825500 }, { "epoch": 63.54, "learning_rate": 1.823917847039531e-05, "loss": 1.9704, "step": 12826000 }, { "epoch": 63.55, "learning_rate": 1.8237939883969225e-05, "loss": 1.9716, "step": 12826500 }, { "epoch": 63.55, "learning_rate": 1.8236701297543142e-05, "loss": 1.9506, "step": 12827000 }, { "epoch": 63.55, "learning_rate": 1.8235462711117056e-05, "loss": 1.9771, "step": 12827500 }, { "epoch": 63.55, "learning_rate": 1.8234226601863828e-05, "loss": 1.9544, "step": 12828000 }, { "epoch": 63.56, "learning_rate": 1.823298801543774e-05, "loss": 1.9699, "step": 12828500 }, { "epoch": 63.56, "learning_rate": 1.823175190618451e-05, "loss": 1.9778, "step": 12829000 }, { "epoch": 63.56, "learning_rate": 1.8230513319758427e-05, "loss": 1.9459, "step": 12829500 }, { "epoch": 63.56, "learning_rate": 1.8229274733332344e-05, "loss": 2.0041, "step": 12830000 }, { "epoch": 63.57, "learning_rate": 1.822803614690626e-05, "loss": 1.9766, "step": 12830500 }, { "epoch": 63.57, "learning_rate": 1.8226797560480178e-05, "loss": 1.9652, "step": 12831000 }, { "epoch": 63.57, "learning_rate": 1.822555897405409e-05, "loss": 1.9845, "step": 12831500 }, { "epoch": 63.57, "learning_rate": 1.822432038762801e-05, "loss": 1.9862, "step": 12832000 }, { "epoch": 63.58, "learning_rate": 1.8223081801201925e-05, "loss": 1.9779, "step": 12832500 }, { "epoch": 63.58, "learning_rate": 1.8221843214775842e-05, "loss": 1.9978, "step": 12833000 }, { "epoch": 63.58, "learning_rate": 1.822060462834976e-05, "loss": 1.9689, "step": 12833500 }, { "epoch": 63.58, "learning_rate": 1.8219370996269377e-05, "loss": 1.9886, "step": 12834000 }, { "epoch": 63.59, "learning_rate": 1.8218132409843294e-05, "loss": 1.9804, "step": 12834500 }, { "epoch": 63.59, "learning_rate": 1.821689382341721e-05, "loss": 1.9813, "step": 12835000 }, { "epoch": 63.59, "learning_rate": 1.821565771416398e-05, "loss": 1.9615, "step": 12835500 }, { "epoch": 63.59, "learning_rate": 1.8214419127737896e-05, "loss": 1.9619, "step": 12836000 }, { "epoch": 63.6, "learning_rate": 1.821318054131181e-05, "loss": 1.9997, "step": 12836500 }, { "epoch": 63.6, "learning_rate": 1.8211941954885727e-05, "loss": 1.9768, "step": 12837000 }, { "epoch": 63.6, "learning_rate": 1.8210703368459644e-05, "loss": 1.9518, "step": 12837500 }, { "epoch": 63.6, "learning_rate": 1.820946478203356e-05, "loss": 1.9705, "step": 12838000 }, { "epoch": 63.61, "learning_rate": 1.8208226195607477e-05, "loss": 1.9841, "step": 12838500 }, { "epoch": 63.61, "learning_rate": 1.8206987609181394e-05, "loss": 1.9459, "step": 12839000 }, { "epoch": 63.61, "learning_rate": 1.820574902275531e-05, "loss": 1.9573, "step": 12839500 }, { "epoch": 63.61, "learning_rate": 1.8204510436329228e-05, "loss": 1.9862, "step": 12840000 }, { "epoch": 63.62, "learning_rate": 1.8203274327075994e-05, "loss": 1.9733, "step": 12840500 }, { "epoch": 63.62, "learning_rate": 1.820203574064991e-05, "loss": 1.9637, "step": 12841000 }, { "epoch": 63.62, "learning_rate": 1.8200797154223828e-05, "loss": 1.9632, "step": 12841500 }, { "epoch": 63.62, "learning_rate": 1.8199558567797744e-05, "loss": 2.0074, "step": 12842000 }, { "epoch": 63.63, "learning_rate": 1.819831998137166e-05, "loss": 1.9684, "step": 12842500 }, { "epoch": 63.63, "learning_rate": 1.819708139494558e-05, "loss": 1.9715, "step": 12843000 }, { "epoch": 63.63, "learning_rate": 1.8195842808519495e-05, "loss": 1.9678, "step": 12843500 }, { "epoch": 63.63, "learning_rate": 1.819460422209341e-05, "loss": 1.9756, "step": 12844000 }, { "epoch": 63.64, "learning_rate": 1.8193365635667326e-05, "loss": 1.9435, "step": 12844500 }, { "epoch": 63.64, "learning_rate": 1.8192127049241243e-05, "loss": 2.0006, "step": 12845000 }, { "epoch": 63.64, "learning_rate": 1.819088846281516e-05, "loss": 1.9501, "step": 12845500 }, { "epoch": 63.64, "learning_rate": 1.8189649876389076e-05, "loss": 1.9876, "step": 12846000 }, { "epoch": 63.65, "learning_rate": 1.8188413767135845e-05, "loss": 1.9986, "step": 12846500 }, { "epoch": 63.65, "learning_rate": 1.8187175180709762e-05, "loss": 1.9806, "step": 12847000 }, { "epoch": 63.65, "learning_rate": 1.8185939071456528e-05, "loss": 1.9522, "step": 12847500 }, { "epoch": 63.65, "learning_rate": 1.8184700485030445e-05, "loss": 1.9702, "step": 12848000 }, { "epoch": 63.66, "learning_rate": 1.818346189860436e-05, "loss": 1.9602, "step": 12848500 }, { "epoch": 63.66, "learning_rate": 1.818222331217828e-05, "loss": 1.9737, "step": 12849000 }, { "epoch": 63.66, "learning_rate": 1.8180984725752195e-05, "loss": 1.9584, "step": 12849500 }, { "epoch": 63.66, "learning_rate": 1.8179746139326112e-05, "loss": 1.9819, "step": 12850000 }, { "epoch": 63.67, "learning_rate": 1.8178510030072878e-05, "loss": 1.9762, "step": 12850500 }, { "epoch": 63.67, "learning_rate": 1.8177271443646795e-05, "loss": 1.9585, "step": 12851000 }, { "epoch": 63.67, "learning_rate": 1.817603285722071e-05, "loss": 1.9525, "step": 12851500 }, { "epoch": 63.67, "learning_rate": 1.817479427079463e-05, "loss": 1.9773, "step": 12852000 }, { "epoch": 63.68, "learning_rate": 1.8173558161541397e-05, "loss": 1.9756, "step": 12852500 }, { "epoch": 63.68, "learning_rate": 1.817231957511531e-05, "loss": 1.9702, "step": 12853000 }, { "epoch": 63.68, "learning_rate": 1.8171080988689228e-05, "loss": 1.984, "step": 12853500 }, { "epoch": 63.68, "learning_rate": 1.8169842402263145e-05, "loss": 1.9746, "step": 12854000 }, { "epoch": 63.69, "learning_rate": 1.816860381583706e-05, "loss": 1.9914, "step": 12854500 }, { "epoch": 63.69, "learning_rate": 1.816736522941098e-05, "loss": 1.9835, "step": 12855000 }, { "epoch": 63.69, "learning_rate": 1.8166126642984895e-05, "loss": 1.9872, "step": 12855500 }, { "epoch": 63.69, "learning_rate": 1.8164888056558812e-05, "loss": 1.9872, "step": 12856000 }, { "epoch": 63.7, "learning_rate": 1.8163651947305578e-05, "loss": 1.978, "step": 12856500 }, { "epoch": 63.7, "learning_rate": 1.8162413360879495e-05, "loss": 1.9795, "step": 12857000 }, { "epoch": 63.7, "learning_rate": 1.8161177251626267e-05, "loss": 1.9974, "step": 12857500 }, { "epoch": 63.7, "learning_rate": 1.8159938665200184e-05, "loss": 1.9806, "step": 12858000 }, { "epoch": 63.71, "learning_rate": 1.8158700078774097e-05, "loss": 1.9699, "step": 12858500 }, { "epoch": 63.71, "learning_rate": 1.8157461492348014e-05, "loss": 1.968, "step": 12859000 }, { "epoch": 63.71, "learning_rate": 1.815622290592193e-05, "loss": 1.9856, "step": 12859500 }, { "epoch": 63.71, "learning_rate": 1.8154984319495845e-05, "loss": 1.976, "step": 12860000 }, { "epoch": 63.72, "learning_rate": 1.8153745733069762e-05, "loss": 1.9902, "step": 12860500 }, { "epoch": 63.72, "learning_rate": 1.8152509623816534e-05, "loss": 1.9791, "step": 12861000 }, { "epoch": 63.72, "learning_rate": 1.8151271037390447e-05, "loss": 1.9604, "step": 12861500 }, { "epoch": 63.72, "learning_rate": 1.8150032450964364e-05, "loss": 1.9836, "step": 12862000 }, { "epoch": 63.73, "learning_rate": 1.814879386453828e-05, "loss": 1.9698, "step": 12862500 }, { "epoch": 63.73, "learning_rate": 1.8147555278112195e-05, "loss": 1.975, "step": 12863000 }, { "epoch": 63.73, "learning_rate": 1.8146316691686112e-05, "loss": 1.971, "step": 12863500 }, { "epoch": 63.73, "learning_rate": 1.814507810526003e-05, "loss": 1.9841, "step": 12864000 }, { "epoch": 63.74, "learning_rate": 1.8143839518833946e-05, "loss": 1.9737, "step": 12864500 }, { "epoch": 63.74, "learning_rate": 1.8142600932407863e-05, "loss": 1.9694, "step": 12865000 }, { "epoch": 63.74, "learning_rate": 1.814136234598178e-05, "loss": 1.9887, "step": 12865500 }, { "epoch": 63.74, "learning_rate": 1.8140123759555693e-05, "loss": 1.974, "step": 12866000 }, { "epoch": 63.75, "learning_rate": 1.813888517312961e-05, "loss": 1.9743, "step": 12866500 }, { "epoch": 63.75, "learning_rate": 1.8137646586703527e-05, "loss": 1.9583, "step": 12867000 }, { "epoch": 63.75, "learning_rate": 1.8136408000277444e-05, "loss": 1.9712, "step": 12867500 }, { "epoch": 63.75, "learning_rate": 1.813516941385136e-05, "loss": 1.9677, "step": 12868000 }, { "epoch": 63.75, "learning_rate": 1.8133930827425278e-05, "loss": 1.9674, "step": 12868500 }, { "epoch": 63.76, "learning_rate": 1.8132692240999195e-05, "loss": 2.0035, "step": 12869000 }, { "epoch": 63.76, "learning_rate": 1.813145613174596e-05, "loss": 1.972, "step": 12869500 }, { "epoch": 63.76, "learning_rate": 1.813022002249273e-05, "loss": 1.9697, "step": 12870000 }, { "epoch": 63.76, "learning_rate": 1.8128981436066646e-05, "loss": 1.962, "step": 12870500 }, { "epoch": 63.77, "learning_rate": 1.8127742849640563e-05, "loss": 1.9854, "step": 12871000 }, { "epoch": 63.77, "learning_rate": 1.812650426321448e-05, "loss": 1.9369, "step": 12871500 }, { "epoch": 63.77, "learning_rate": 1.8125265676788397e-05, "loss": 1.95, "step": 12872000 }, { "epoch": 63.77, "learning_rate": 1.812402709036231e-05, "loss": 1.9985, "step": 12872500 }, { "epoch": 63.78, "learning_rate": 1.8122788503936227e-05, "loss": 1.9636, "step": 12873000 }, { "epoch": 63.78, "learning_rate": 1.812155487185585e-05, "loss": 1.99, "step": 12873500 }, { "epoch": 63.78, "learning_rate": 1.8120316285429765e-05, "loss": 1.9713, "step": 12874000 }, { "epoch": 63.78, "learning_rate": 1.811907769900368e-05, "loss": 1.967, "step": 12874500 }, { "epoch": 63.79, "learning_rate": 1.81178391125776e-05, "loss": 1.9776, "step": 12875000 }, { "epoch": 63.79, "learning_rate": 1.8116600526151515e-05, "loss": 1.9679, "step": 12875500 }, { "epoch": 63.79, "learning_rate": 1.8115364416898284e-05, "loss": 1.9952, "step": 12876000 }, { "epoch": 63.79, "learning_rate": 1.811412830764505e-05, "loss": 1.9765, "step": 12876500 }, { "epoch": 63.8, "learning_rate": 1.8112889721218967e-05, "loss": 1.972, "step": 12877000 }, { "epoch": 63.8, "learning_rate": 1.8111651134792884e-05, "loss": 1.9847, "step": 12877500 }, { "epoch": 63.8, "learning_rate": 1.81104125483668e-05, "loss": 2.018, "step": 12878000 }, { "epoch": 63.8, "learning_rate": 1.8109173961940717e-05, "loss": 1.967, "step": 12878500 }, { "epoch": 63.81, "learning_rate": 1.8107935375514634e-05, "loss": 1.9822, "step": 12879000 }, { "epoch": 63.81, "learning_rate": 1.810669678908855e-05, "loss": 1.9623, "step": 12879500 }, { "epoch": 63.81, "learning_rate": 1.8105458202662468e-05, "loss": 1.9919, "step": 12880000 }, { "epoch": 63.81, "learning_rate": 1.8104219616236382e-05, "loss": 1.947, "step": 12880500 }, { "epoch": 63.82, "learning_rate": 1.81029810298103e-05, "loss": 1.994, "step": 12881000 }, { "epoch": 63.82, "learning_rate": 1.8101742443384216e-05, "loss": 1.9537, "step": 12881500 }, { "epoch": 63.82, "learning_rate": 1.8100503856958133e-05, "loss": 1.9813, "step": 12882000 }, { "epoch": 63.82, "learning_rate": 1.809926527053205e-05, "loss": 1.9755, "step": 12882500 }, { "epoch": 63.83, "learning_rate": 1.8098029161278818e-05, "loss": 1.9748, "step": 12883000 }, { "epoch": 63.83, "learning_rate": 1.8096790574852732e-05, "loss": 1.9848, "step": 12883500 }, { "epoch": 63.83, "learning_rate": 1.809555198842665e-05, "loss": 1.9779, "step": 12884000 }, { "epoch": 63.83, "learning_rate": 1.8094313402000566e-05, "loss": 2.0059, "step": 12884500 }, { "epoch": 63.84, "learning_rate": 1.8093074815574483e-05, "loss": 1.973, "step": 12885000 }, { "epoch": 63.84, "learning_rate": 1.80918362291484e-05, "loss": 1.9777, "step": 12885500 }, { "epoch": 63.84, "learning_rate": 1.8090597642722316e-05, "loss": 1.9743, "step": 12886000 }, { "epoch": 63.84, "learning_rate": 1.8089359056296233e-05, "loss": 1.9666, "step": 12886500 }, { "epoch": 63.85, "learning_rate": 1.8088122947043e-05, "loss": 1.9749, "step": 12887000 }, { "epoch": 63.85, "learning_rate": 1.8086884360616916e-05, "loss": 1.9726, "step": 12887500 }, { "epoch": 63.85, "learning_rate": 1.8085648251363685e-05, "loss": 1.9753, "step": 12888000 }, { "epoch": 63.85, "learning_rate": 1.80844096649376e-05, "loss": 1.9934, "step": 12888500 }, { "epoch": 63.86, "learning_rate": 1.808317107851152e-05, "loss": 1.9639, "step": 12889000 }, { "epoch": 63.86, "learning_rate": 1.8081932492085435e-05, "loss": 1.9778, "step": 12889500 }, { "epoch": 63.86, "learning_rate": 1.808069390565935e-05, "loss": 1.969, "step": 12890000 }, { "epoch": 63.86, "learning_rate": 1.8079457796406118e-05, "loss": 1.9861, "step": 12890500 }, { "epoch": 63.87, "learning_rate": 1.8078219209980035e-05, "loss": 1.9945, "step": 12891000 }, { "epoch": 63.87, "learning_rate": 1.807698062355395e-05, "loss": 1.9905, "step": 12891500 }, { "epoch": 63.87, "learning_rate": 1.807574203712787e-05, "loss": 1.9747, "step": 12892000 }, { "epoch": 63.87, "learning_rate": 1.8074503450701785e-05, "loss": 1.9761, "step": 12892500 }, { "epoch": 63.88, "learning_rate": 1.80732648642757e-05, "loss": 1.9965, "step": 12893000 }, { "epoch": 63.88, "learning_rate": 1.8072026277849616e-05, "loss": 1.9489, "step": 12893500 }, { "epoch": 63.88, "learning_rate": 1.8070787691423533e-05, "loss": 1.9622, "step": 12894000 }, { "epoch": 63.88, "learning_rate": 1.806954910499745e-05, "loss": 1.9999, "step": 12894500 }, { "epoch": 63.89, "learning_rate": 1.806831299574422e-05, "loss": 1.9934, "step": 12895000 }, { "epoch": 63.89, "learning_rate": 1.8067076886490987e-05, "loss": 1.9769, "step": 12895500 }, { "epoch": 63.89, "learning_rate": 1.80658383000649e-05, "loss": 1.9781, "step": 12896000 }, { "epoch": 63.89, "learning_rate": 1.8064599713638818e-05, "loss": 1.9876, "step": 12896500 }, { "epoch": 63.9, "learning_rate": 1.8063361127212735e-05, "loss": 1.9956, "step": 12897000 }, { "epoch": 63.9, "learning_rate": 1.806212254078665e-05, "loss": 1.9941, "step": 12897500 }, { "epoch": 63.9, "learning_rate": 1.806088395436057e-05, "loss": 1.9425, "step": 12898000 }, { "epoch": 63.9, "learning_rate": 1.8059647845107337e-05, "loss": 1.9615, "step": 12898500 }, { "epoch": 63.91, "learning_rate": 1.805840925868125e-05, "loss": 1.9845, "step": 12899000 }, { "epoch": 63.91, "learning_rate": 1.8057170672255168e-05, "loss": 1.9843, "step": 12899500 }, { "epoch": 63.91, "learning_rate": 1.8055932085829085e-05, "loss": 1.9557, "step": 12900000 }, { "epoch": 63.91, "learning_rate": 1.8054693499403002e-05, "loss": 1.9588, "step": 12900500 }, { "epoch": 63.92, "learning_rate": 1.805345491297692e-05, "loss": 1.9467, "step": 12901000 }, { "epoch": 63.92, "learning_rate": 1.8052216326550836e-05, "loss": 1.9666, "step": 12901500 }, { "epoch": 63.92, "learning_rate": 1.8050977740124752e-05, "loss": 1.968, "step": 12902000 }, { "epoch": 63.92, "learning_rate": 1.8049741630871518e-05, "loss": 1.9703, "step": 12902500 }, { "epoch": 63.93, "learning_rate": 1.8048503044445435e-05, "loss": 1.9714, "step": 12903000 }, { "epoch": 63.93, "learning_rate": 1.8047264458019352e-05, "loss": 1.9749, "step": 12903500 }, { "epoch": 63.93, "learning_rate": 1.804602587159327e-05, "loss": 1.9945, "step": 12904000 }, { "epoch": 63.93, "learning_rate": 1.804479223951289e-05, "loss": 1.9613, "step": 12904500 }, { "epoch": 63.94, "learning_rate": 1.8043556130259658e-05, "loss": 1.9814, "step": 12905000 }, { "epoch": 63.94, "learning_rate": 1.8042317543833575e-05, "loss": 1.9728, "step": 12905500 }, { "epoch": 63.94, "learning_rate": 1.804107895740749e-05, "loss": 1.9925, "step": 12906000 }, { "epoch": 63.94, "learning_rate": 1.8039840370981406e-05, "loss": 1.9643, "step": 12906500 }, { "epoch": 63.95, "learning_rate": 1.8038601784555323e-05, "loss": 1.9877, "step": 12907000 }, { "epoch": 63.95, "learning_rate": 1.803736319812924e-05, "loss": 2.0082, "step": 12907500 }, { "epoch": 63.95, "learning_rate": 1.8036124611703156e-05, "loss": 1.9799, "step": 12908000 }, { "epoch": 63.95, "learning_rate": 1.8034886025277073e-05, "loss": 1.9742, "step": 12908500 }, { "epoch": 63.96, "learning_rate": 1.803364991602384e-05, "loss": 1.9501, "step": 12909000 }, { "epoch": 63.96, "learning_rate": 1.8032411329597756e-05, "loss": 2.0078, "step": 12909500 }, { "epoch": 63.96, "learning_rate": 1.8031172743171673e-05, "loss": 1.9511, "step": 12910000 }, { "epoch": 63.96, "learning_rate": 1.802993415674559e-05, "loss": 1.9952, "step": 12910500 }, { "epoch": 63.97, "learning_rate": 1.8028695570319506e-05, "loss": 1.9826, "step": 12911000 }, { "epoch": 63.97, "learning_rate": 1.8027456983893423e-05, "loss": 1.9852, "step": 12911500 }, { "epoch": 63.97, "learning_rate": 1.802621839746734e-05, "loss": 1.9711, "step": 12912000 }, { "epoch": 63.97, "learning_rate": 1.8024982288214106e-05, "loss": 1.9616, "step": 12912500 }, { "epoch": 63.98, "learning_rate": 1.8023743701788023e-05, "loss": 1.9807, "step": 12913000 }, { "epoch": 63.98, "learning_rate": 1.802250511536194e-05, "loss": 1.9756, "step": 12913500 }, { "epoch": 63.98, "learning_rate": 1.8021266528935856e-05, "loss": 1.9511, "step": 12914000 }, { "epoch": 63.98, "learning_rate": 1.8020030419682625e-05, "loss": 1.9889, "step": 12914500 }, { "epoch": 63.99, "learning_rate": 1.8018791833256542e-05, "loss": 1.971, "step": 12915000 }, { "epoch": 63.99, "learning_rate": 1.8017553246830456e-05, "loss": 1.9771, "step": 12915500 }, { "epoch": 63.99, "learning_rate": 1.8016314660404373e-05, "loss": 1.9651, "step": 12916000 }, { "epoch": 63.99, "learning_rate": 1.801507607397829e-05, "loss": 1.9698, "step": 12916500 }, { "epoch": 64.0, "learning_rate": 1.801383996472506e-05, "loss": 1.9779, "step": 12917000 }, { "epoch": 64.0, "learning_rate": 1.8012601378298975e-05, "loss": 1.9945, "step": 12917500 }, { "epoch": 64.0, "eval_accuracy": 0.6756957684658117, "eval_accuracy_mlm": 0.6351408521528233, "eval_accuracy_nsp": 0.867072745029593, "eval_loss": 2.2677247524261475, "eval_runtime": 147.0633, "eval_samples_per_second": 1733.668, "eval_steps_per_second": 72.241, "step": 12917952 }, { "epoch": 64.0, "learning_rate": 1.8011362791872892e-05, "loss": 1.9975, "step": 12918000 }, { "epoch": 64.0, "learning_rate": 1.8010124205446806e-05, "loss": 1.9452, "step": 12918500 }, { "epoch": 64.01, "learning_rate": 1.8008885619020723e-05, "loss": 1.9524, "step": 12919000 }, { "epoch": 64.01, "learning_rate": 1.800764703259464e-05, "loss": 1.9611, "step": 12919500 }, { "epoch": 64.01, "learning_rate": 1.8006410923341412e-05, "loss": 1.9687, "step": 12920000 }, { "epoch": 64.01, "learning_rate": 1.8005172336915325e-05, "loss": 1.9673, "step": 12920500 }, { "epoch": 64.02, "learning_rate": 1.8003933750489242e-05, "loss": 1.979, "step": 12921000 }, { "epoch": 64.02, "learning_rate": 1.8002695164063156e-05, "loss": 1.9616, "step": 12921500 }, { "epoch": 64.02, "learning_rate": 1.8001456577637073e-05, "loss": 1.9505, "step": 12922000 }, { "epoch": 64.02, "learning_rate": 1.800021799121099e-05, "loss": 1.9559, "step": 12922500 }, { "epoch": 64.03, "learning_rate": 1.7998981881957762e-05, "loss": 1.9299, "step": 12923000 }, { "epoch": 64.03, "learning_rate": 1.7997743295531675e-05, "loss": 1.9445, "step": 12923500 }, { "epoch": 64.03, "learning_rate": 1.7996504709105592e-05, "loss": 1.9257, "step": 12924000 }, { "epoch": 64.03, "learning_rate": 1.799526612267951e-05, "loss": 1.9283, "step": 12924500 }, { "epoch": 64.03, "learning_rate": 1.7994027536253423e-05, "loss": 1.9431, "step": 12925000 }, { "epoch": 64.04, "learning_rate": 1.799278894982734e-05, "loss": 1.968, "step": 12925500 }, { "epoch": 64.04, "learning_rate": 1.7991550363401257e-05, "loss": 1.9701, "step": 12926000 }, { "epoch": 64.04, "learning_rate": 1.7990311776975174e-05, "loss": 1.9714, "step": 12926500 }, { "epoch": 64.04, "learning_rate": 1.798907319054909e-05, "loss": 1.9614, "step": 12927000 }, { "epoch": 64.05, "learning_rate": 1.798783708129586e-05, "loss": 1.9336, "step": 12927500 }, { "epoch": 64.05, "learning_rate": 1.7986598494869776e-05, "loss": 1.9621, "step": 12928000 }, { "epoch": 64.05, "learning_rate": 1.798535990844369e-05, "loss": 1.956, "step": 12928500 }, { "epoch": 64.05, "learning_rate": 1.7984121322017607e-05, "loss": 1.9375, "step": 12929000 }, { "epoch": 64.06, "learning_rate": 1.7982882735591524e-05, "loss": 1.9581, "step": 12929500 }, { "epoch": 64.06, "learning_rate": 1.798164414916544e-05, "loss": 1.9729, "step": 12930000 }, { "epoch": 64.06, "learning_rate": 1.7980405562739358e-05, "loss": 1.959, "step": 12930500 }, { "epoch": 64.06, "learning_rate": 1.7979166976313275e-05, "loss": 1.9482, "step": 12931000 }, { "epoch": 64.07, "learning_rate": 1.797793086706004e-05, "loss": 1.9448, "step": 12931500 }, { "epoch": 64.07, "learning_rate": 1.7976692280633957e-05, "loss": 1.9705, "step": 12932000 }, { "epoch": 64.07, "learning_rate": 1.7975453694207874e-05, "loss": 1.9425, "step": 12932500 }, { "epoch": 64.07, "learning_rate": 1.797421510778179e-05, "loss": 1.9469, "step": 12933000 }, { "epoch": 64.08, "learning_rate": 1.7972976521355708e-05, "loss": 1.9385, "step": 12933500 }, { "epoch": 64.08, "learning_rate": 1.7971737934929625e-05, "loss": 1.9592, "step": 12934000 }, { "epoch": 64.08, "learning_rate": 1.797049934850354e-05, "loss": 1.953, "step": 12934500 }, { "epoch": 64.08, "learning_rate": 1.796926076207746e-05, "loss": 1.9584, "step": 12935000 }, { "epoch": 64.09, "learning_rate": 1.7968022175651372e-05, "loss": 1.9391, "step": 12935500 }, { "epoch": 64.09, "learning_rate": 1.796678606639814e-05, "loss": 1.9703, "step": 12936000 }, { "epoch": 64.09, "learning_rate": 1.7965547479972058e-05, "loss": 1.9622, "step": 12936500 }, { "epoch": 64.09, "learning_rate": 1.7964308893545975e-05, "loss": 1.9337, "step": 12937000 }, { "epoch": 64.1, "learning_rate": 1.796307030711989e-05, "loss": 1.9666, "step": 12937500 }, { "epoch": 64.1, "learning_rate": 1.796183419786666e-05, "loss": 1.953, "step": 12938000 }, { "epoch": 64.1, "learning_rate": 1.796059808861343e-05, "loss": 1.939, "step": 12938500 }, { "epoch": 64.1, "learning_rate": 1.7959359502187346e-05, "loss": 1.9365, "step": 12939000 }, { "epoch": 64.11, "learning_rate": 1.7958120915761263e-05, "loss": 1.9596, "step": 12939500 }, { "epoch": 64.11, "learning_rate": 1.795688232933518e-05, "loss": 1.9244, "step": 12940000 }, { "epoch": 64.11, "learning_rate": 1.7955643742909094e-05, "loss": 1.9599, "step": 12940500 }, { "epoch": 64.11, "learning_rate": 1.795440515648301e-05, "loss": 1.9407, "step": 12941000 }, { "epoch": 64.12, "learning_rate": 1.7953166570056927e-05, "loss": 1.9441, "step": 12941500 }, { "epoch": 64.12, "learning_rate": 1.795192798363084e-05, "loss": 1.9481, "step": 12942000 }, { "epoch": 64.12, "learning_rate": 1.7950689397204758e-05, "loss": 1.9491, "step": 12942500 }, { "epoch": 64.12, "learning_rate": 1.794945328795153e-05, "loss": 1.9479, "step": 12943000 }, { "epoch": 64.13, "learning_rate": 1.7948214701525444e-05, "loss": 1.9648, "step": 12943500 }, { "epoch": 64.13, "learning_rate": 1.794697611509936e-05, "loss": 1.9343, "step": 12944000 }, { "epoch": 64.13, "learning_rate": 1.794574000584613e-05, "loss": 1.9412, "step": 12944500 }, { "epoch": 64.13, "learning_rate": 1.7944501419420046e-05, "loss": 1.9564, "step": 12945000 }, { "epoch": 64.14, "learning_rate": 1.7943262832993963e-05, "loss": 1.9586, "step": 12945500 }, { "epoch": 64.14, "learning_rate": 1.794202424656788e-05, "loss": 1.957, "step": 12946000 }, { "epoch": 64.14, "learning_rate": 1.7940785660141794e-05, "loss": 1.9627, "step": 12946500 }, { "epoch": 64.14, "learning_rate": 1.793954707371571e-05, "loss": 1.9579, "step": 12947000 }, { "epoch": 64.15, "learning_rate": 1.7938308487289627e-05, "loss": 1.9543, "step": 12947500 }, { "epoch": 64.15, "learning_rate": 1.7937069900863544e-05, "loss": 1.9527, "step": 12948000 }, { "epoch": 64.15, "learning_rate": 1.7935831314437458e-05, "loss": 1.944, "step": 12948500 }, { "epoch": 64.15, "learning_rate": 1.793459520518423e-05, "loss": 1.9532, "step": 12949000 }, { "epoch": 64.16, "learning_rate": 1.7933359095930996e-05, "loss": 1.9323, "step": 12949500 }, { "epoch": 64.16, "learning_rate": 1.7932120509504913e-05, "loss": 1.9355, "step": 12950000 }, { "epoch": 64.16, "learning_rate": 1.793088192307883e-05, "loss": 1.9625, "step": 12950500 }, { "epoch": 64.16, "learning_rate": 1.7929645813825598e-05, "loss": 1.9509, "step": 12951000 }, { "epoch": 64.17, "learning_rate": 1.7928407227399515e-05, "loss": 1.9706, "step": 12951500 }, { "epoch": 64.17, "learning_rate": 1.792716864097343e-05, "loss": 1.9587, "step": 12952000 }, { "epoch": 64.17, "learning_rate": 1.7925930054547346e-05, "loss": 1.9702, "step": 12952500 }, { "epoch": 64.17, "learning_rate": 1.7924691468121263e-05, "loss": 1.9274, "step": 12953000 }, { "epoch": 64.18, "learning_rate": 1.792345288169518e-05, "loss": 1.9278, "step": 12953500 }, { "epoch": 64.18, "learning_rate": 1.7922214295269096e-05, "loss": 1.9463, "step": 12954000 }, { "epoch": 64.18, "learning_rate": 1.7920978186015865e-05, "loss": 1.9633, "step": 12954500 }, { "epoch": 64.18, "learning_rate": 1.791973959958978e-05, "loss": 1.964, "step": 12955000 }, { "epoch": 64.19, "learning_rate": 1.7918501013163696e-05, "loss": 1.9673, "step": 12955500 }, { "epoch": 64.19, "learning_rate": 1.7917262426737613e-05, "loss": 1.9484, "step": 12956000 }, { "epoch": 64.19, "learning_rate": 1.791602384031153e-05, "loss": 1.9615, "step": 12956500 }, { "epoch": 64.19, "learning_rate": 1.79147877310583e-05, "loss": 1.9718, "step": 12957000 }, { "epoch": 64.2, "learning_rate": 1.7913551621805067e-05, "loss": 1.9678, "step": 12957500 }, { "epoch": 64.2, "learning_rate": 1.7912313035378984e-05, "loss": 1.9363, "step": 12958000 }, { "epoch": 64.2, "learning_rate": 1.79110744489529e-05, "loss": 1.9608, "step": 12958500 }, { "epoch": 64.2, "learning_rate": 1.7909835862526818e-05, "loss": 1.98, "step": 12959000 }, { "epoch": 64.21, "learning_rate": 1.790859727610073e-05, "loss": 1.9498, "step": 12959500 }, { "epoch": 64.21, "learning_rate": 1.790735868967465e-05, "loss": 1.9684, "step": 12960000 }, { "epoch": 64.21, "learning_rate": 1.7906122580421417e-05, "loss": 1.9478, "step": 12960500 }, { "epoch": 64.21, "learning_rate": 1.7904883993995334e-05, "loss": 1.9521, "step": 12961000 }, { "epoch": 64.22, "learning_rate": 1.790364540756925e-05, "loss": 1.9695, "step": 12961500 }, { "epoch": 64.22, "learning_rate": 1.7902406821143168e-05, "loss": 1.9616, "step": 12962000 }, { "epoch": 64.22, "learning_rate": 1.7901168234717085e-05, "loss": 1.9725, "step": 12962500 }, { "epoch": 64.22, "learning_rate": 1.7899929648291e-05, "loss": 1.9689, "step": 12963000 }, { "epoch": 64.23, "learning_rate": 1.7898691061864915e-05, "loss": 1.9374, "step": 12963500 }, { "epoch": 64.23, "learning_rate": 1.7897452475438832e-05, "loss": 1.9772, "step": 12964000 }, { "epoch": 64.23, "learning_rate": 1.7896213889012746e-05, "loss": 1.9704, "step": 12964500 }, { "epoch": 64.23, "learning_rate": 1.7894975302586663e-05, "loss": 1.9684, "step": 12965000 }, { "epoch": 64.24, "learning_rate": 1.789373671616058e-05, "loss": 1.9611, "step": 12965500 }, { "epoch": 64.24, "learning_rate": 1.7892498129734497e-05, "loss": 1.9471, "step": 12966000 }, { "epoch": 64.24, "learning_rate": 1.7891264497654117e-05, "loss": 1.9491, "step": 12966500 }, { "epoch": 64.24, "learning_rate": 1.7890025911228034e-05, "loss": 1.9882, "step": 12967000 }, { "epoch": 64.25, "learning_rate": 1.788878732480195e-05, "loss": 1.98, "step": 12967500 }, { "epoch": 64.25, "learning_rate": 1.7887548738375868e-05, "loss": 1.9713, "step": 12968000 }, { "epoch": 64.25, "learning_rate": 1.7886310151949785e-05, "loss": 1.9545, "step": 12968500 }, { "epoch": 64.25, "learning_rate": 1.7885071565523702e-05, "loss": 1.9328, "step": 12969000 }, { "epoch": 64.26, "learning_rate": 1.7883832979097616e-05, "loss": 1.9489, "step": 12969500 }, { "epoch": 64.26, "learning_rate": 1.7882596869844384e-05, "loss": 1.9744, "step": 12970000 }, { "epoch": 64.26, "learning_rate": 1.78813582834183e-05, "loss": 1.9576, "step": 12970500 }, { "epoch": 64.26, "learning_rate": 1.7880119696992218e-05, "loss": 1.9435, "step": 12971000 }, { "epoch": 64.27, "learning_rate": 1.7878881110566135e-05, "loss": 1.9712, "step": 12971500 }, { "epoch": 64.27, "learning_rate": 1.7877642524140052e-05, "loss": 1.962, "step": 12972000 }, { "epoch": 64.27, "learning_rate": 1.787640393771397e-05, "loss": 1.9826, "step": 12972500 }, { "epoch": 64.27, "learning_rate": 1.7875165351287883e-05, "loss": 1.9796, "step": 12973000 }, { "epoch": 64.28, "learning_rate": 1.78739267648618e-05, "loss": 1.9711, "step": 12973500 }, { "epoch": 64.28, "learning_rate": 1.7872688178435716e-05, "loss": 1.9668, "step": 12974000 }, { "epoch": 64.28, "learning_rate": 1.787144959200963e-05, "loss": 1.9395, "step": 12974500 }, { "epoch": 64.28, "learning_rate": 1.7870215959929254e-05, "loss": 1.9904, "step": 12975000 }, { "epoch": 64.29, "learning_rate": 1.7868977373503168e-05, "loss": 1.9533, "step": 12975500 }, { "epoch": 64.29, "learning_rate": 1.7867741264249936e-05, "loss": 1.9694, "step": 12976000 }, { "epoch": 64.29, "learning_rate": 1.7866502677823853e-05, "loss": 1.9708, "step": 12976500 }, { "epoch": 64.29, "learning_rate": 1.786526409139777e-05, "loss": 1.9401, "step": 12977000 }, { "epoch": 64.3, "learning_rate": 1.7864025504971687e-05, "loss": 1.9421, "step": 12977500 }, { "epoch": 64.3, "learning_rate": 1.7862786918545604e-05, "loss": 1.9658, "step": 12978000 }, { "epoch": 64.3, "learning_rate": 1.7861548332119518e-05, "loss": 1.9679, "step": 12978500 }, { "epoch": 64.3, "learning_rate": 1.7860309745693435e-05, "loss": 1.9424, "step": 12979000 }, { "epoch": 64.3, "learning_rate": 1.785907115926735e-05, "loss": 1.9597, "step": 12979500 }, { "epoch": 64.31, "learning_rate": 1.785783257284127e-05, "loss": 1.9529, "step": 12980000 }, { "epoch": 64.31, "learning_rate": 1.7856593986415185e-05, "loss": 1.962, "step": 12980500 }, { "epoch": 64.31, "learning_rate": 1.7855357877161954e-05, "loss": 1.991, "step": 12981000 }, { "epoch": 64.31, "learning_rate": 1.785411929073587e-05, "loss": 1.9734, "step": 12981500 }, { "epoch": 64.32, "learning_rate": 1.7852883181482636e-05, "loss": 1.9445, "step": 12982000 }, { "epoch": 64.32, "learning_rate": 1.7851644595056553e-05, "loss": 1.9457, "step": 12982500 }, { "epoch": 64.32, "learning_rate": 1.785040600863047e-05, "loss": 1.9815, "step": 12983000 }, { "epoch": 64.32, "learning_rate": 1.784916989937724e-05, "loss": 1.9724, "step": 12983500 }, { "epoch": 64.33, "learning_rate": 1.7847931312951156e-05, "loss": 1.9483, "step": 12984000 }, { "epoch": 64.33, "learning_rate": 1.7846692726525073e-05, "loss": 1.9554, "step": 12984500 }, { "epoch": 64.33, "learning_rate": 1.784545414009899e-05, "loss": 1.9552, "step": 12985000 }, { "epoch": 64.33, "learning_rate": 1.7844215553672903e-05, "loss": 1.9588, "step": 12985500 }, { "epoch": 64.34, "learning_rate": 1.784297696724682e-05, "loss": 1.9873, "step": 12986000 }, { "epoch": 64.34, "learning_rate": 1.7841738380820737e-05, "loss": 1.9546, "step": 12986500 }, { "epoch": 64.34, "learning_rate": 1.7840499794394654e-05, "loss": 1.9677, "step": 12987000 }, { "epoch": 64.34, "learning_rate": 1.783926120796857e-05, "loss": 1.9355, "step": 12987500 }, { "epoch": 64.35, "learning_rate": 1.7838022621542485e-05, "loss": 1.9582, "step": 12988000 }, { "epoch": 64.35, "learning_rate": 1.78367840351164e-05, "loss": 1.9794, "step": 12988500 }, { "epoch": 64.35, "learning_rate": 1.783554792586317e-05, "loss": 1.9489, "step": 12989000 }, { "epoch": 64.35, "learning_rate": 1.7834309339437087e-05, "loss": 1.9876, "step": 12989500 }, { "epoch": 64.36, "learning_rate": 1.7833070753011004e-05, "loss": 1.9553, "step": 12990000 }, { "epoch": 64.36, "learning_rate": 1.783183216658492e-05, "loss": 1.9779, "step": 12990500 }, { "epoch": 64.36, "learning_rate": 1.7830593580158835e-05, "loss": 1.9652, "step": 12991000 }, { "epoch": 64.36, "learning_rate": 1.7829354993732752e-05, "loss": 1.9749, "step": 12991500 }, { "epoch": 64.37, "learning_rate": 1.782811640730667e-05, "loss": 1.9865, "step": 12992000 }, { "epoch": 64.37, "learning_rate": 1.7826877820880586e-05, "loss": 1.9561, "step": 12992500 }, { "epoch": 64.37, "learning_rate": 1.7825639234454502e-05, "loss": 1.9781, "step": 12993000 }, { "epoch": 64.37, "learning_rate": 1.782440064802842e-05, "loss": 1.9663, "step": 12993500 }, { "epoch": 64.38, "learning_rate": 1.7823162061602336e-05, "loss": 1.9632, "step": 12994000 }, { "epoch": 64.38, "learning_rate": 1.7821923475176253e-05, "loss": 1.9896, "step": 12994500 }, { "epoch": 64.38, "learning_rate": 1.782068488875017e-05, "loss": 1.9525, "step": 12995000 }, { "epoch": 64.38, "learning_rate": 1.7819446302324084e-05, "loss": 1.9406, "step": 12995500 }, { "epoch": 64.39, "learning_rate": 1.7818210193070853e-05, "loss": 1.9333, "step": 12996000 }, { "epoch": 64.39, "learning_rate": 1.781697160664477e-05, "loss": 1.9609, "step": 12996500 }, { "epoch": 64.39, "learning_rate": 1.7815733020218686e-05, "loss": 1.9579, "step": 12997000 }, { "epoch": 64.39, "learning_rate": 1.7814494433792603e-05, "loss": 1.9762, "step": 12997500 }, { "epoch": 64.4, "learning_rate": 1.781325584736652e-05, "loss": 1.9513, "step": 12998000 }, { "epoch": 64.4, "learning_rate": 1.7812017260940434e-05, "loss": 1.9542, "step": 12998500 }, { "epoch": 64.4, "learning_rate": 1.781077867451435e-05, "loss": 1.9701, "step": 12999000 }, { "epoch": 64.4, "learning_rate": 1.7809540088088268e-05, "loss": 1.9756, "step": 12999500 }, { "epoch": 64.41, "learning_rate": 1.7808301501662185e-05, "loss": 1.953, "step": 13000000 }, { "epoch": 64.41, "learning_rate": 1.7807067869581802e-05, "loss": 1.9571, "step": 13000500 }, { "epoch": 64.41, "learning_rate": 1.7805831760328574e-05, "loss": 1.9842, "step": 13001000 }, { "epoch": 64.41, "learning_rate": 1.7804595651075343e-05, "loss": 1.9782, "step": 13001500 }, { "epoch": 64.42, "learning_rate": 1.780335706464926e-05, "loss": 1.9728, "step": 13002000 }, { "epoch": 64.42, "learning_rate": 1.7802118478223173e-05, "loss": 1.9793, "step": 13002500 }, { "epoch": 64.42, "learning_rate": 1.780087989179709e-05, "loss": 1.9772, "step": 13003000 }, { "epoch": 64.42, "learning_rate": 1.7799641305371007e-05, "loss": 1.9539, "step": 13003500 }, { "epoch": 64.43, "learning_rate": 1.7798402718944924e-05, "loss": 1.9636, "step": 13004000 }, { "epoch": 64.43, "learning_rate": 1.779716413251884e-05, "loss": 1.9662, "step": 13004500 }, { "epoch": 64.43, "learning_rate": 1.7795925546092758e-05, "loss": 1.947, "step": 13005000 }, { "epoch": 64.43, "learning_rate": 1.779468695966667e-05, "loss": 1.9743, "step": 13005500 }, { "epoch": 64.44, "learning_rate": 1.779344837324059e-05, "loss": 1.9706, "step": 13006000 }, { "epoch": 64.44, "learning_rate": 1.7792209786814505e-05, "loss": 1.9645, "step": 13006500 }, { "epoch": 64.44, "learning_rate": 1.779097120038842e-05, "loss": 1.954, "step": 13007000 }, { "epoch": 64.44, "learning_rate": 1.778973509113519e-05, "loss": 1.9476, "step": 13007500 }, { "epoch": 64.45, "learning_rate": 1.7788496504709108e-05, "loss": 1.9606, "step": 13008000 }, { "epoch": 64.45, "learning_rate": 1.7787257918283025e-05, "loss": 1.9706, "step": 13008500 }, { "epoch": 64.45, "learning_rate": 1.778601933185694e-05, "loss": 1.9613, "step": 13009000 }, { "epoch": 64.45, "learning_rate": 1.7784783222603707e-05, "loss": 1.9509, "step": 13009500 }, { "epoch": 64.46, "learning_rate": 1.7783547113350476e-05, "loss": 1.9659, "step": 13010000 }, { "epoch": 64.46, "learning_rate": 1.7782308526924393e-05, "loss": 1.9641, "step": 13010500 }, { "epoch": 64.46, "learning_rate": 1.778106994049831e-05, "loss": 1.9564, "step": 13011000 }, { "epoch": 64.46, "learning_rate": 1.7779831354072227e-05, "loss": 1.9505, "step": 13011500 }, { "epoch": 64.47, "learning_rate": 1.777859276764614e-05, "loss": 1.9681, "step": 13012000 }, { "epoch": 64.47, "learning_rate": 1.7777354181220057e-05, "loss": 1.974, "step": 13012500 }, { "epoch": 64.47, "learning_rate": 1.7776115594793974e-05, "loss": 1.9705, "step": 13013000 }, { "epoch": 64.47, "learning_rate": 1.777487700836789e-05, "loss": 1.9484, "step": 13013500 }, { "epoch": 64.48, "learning_rate": 1.7773638421941808e-05, "loss": 1.9731, "step": 13014000 }, { "epoch": 64.48, "learning_rate": 1.7772404789861426e-05, "loss": 1.9781, "step": 13014500 }, { "epoch": 64.48, "learning_rate": 1.7771166203435342e-05, "loss": 1.9654, "step": 13015000 }, { "epoch": 64.48, "learning_rate": 1.776992761700926e-05, "loss": 1.9871, "step": 13015500 }, { "epoch": 64.49, "learning_rate": 1.7768689030583176e-05, "loss": 1.9515, "step": 13016000 }, { "epoch": 64.49, "learning_rate": 1.7767450444157093e-05, "loss": 1.9723, "step": 13016500 }, { "epoch": 64.49, "learning_rate": 1.776621185773101e-05, "loss": 1.978, "step": 13017000 }, { "epoch": 64.49, "learning_rate": 1.7764973271304927e-05, "loss": 1.9684, "step": 13017500 }, { "epoch": 64.5, "learning_rate": 1.776373468487884e-05, "loss": 1.9575, "step": 13018000 }, { "epoch": 64.5, "learning_rate": 1.7762496098452758e-05, "loss": 1.9531, "step": 13018500 }, { "epoch": 64.5, "learning_rate": 1.7761259989199526e-05, "loss": 1.9682, "step": 13019000 }, { "epoch": 64.5, "learning_rate": 1.77600238799463e-05, "loss": 1.9824, "step": 13019500 }, { "epoch": 64.51, "learning_rate": 1.7758785293520212e-05, "loss": 1.9486, "step": 13020000 }, { "epoch": 64.51, "learning_rate": 1.775754918426698e-05, "loss": 1.9495, "step": 13020500 }, { "epoch": 64.51, "learning_rate": 1.7756310597840898e-05, "loss": 1.9696, "step": 13021000 }, { "epoch": 64.51, "learning_rate": 1.7755072011414815e-05, "loss": 1.9394, "step": 13021500 }, { "epoch": 64.52, "learning_rate": 1.775383342498873e-05, "loss": 1.9738, "step": 13022000 }, { "epoch": 64.52, "learning_rate": 1.775259483856265e-05, "loss": 1.9536, "step": 13022500 }, { "epoch": 64.52, "learning_rate": 1.7751356252136562e-05, "loss": 2.0046, "step": 13023000 }, { "epoch": 64.52, "learning_rate": 1.775011766571048e-05, "loss": 1.945, "step": 13023500 }, { "epoch": 64.53, "learning_rate": 1.7748879079284396e-05, "loss": 1.9833, "step": 13024000 }, { "epoch": 64.53, "learning_rate": 1.774764049285831e-05, "loss": 1.969, "step": 13024500 }, { "epoch": 64.53, "learning_rate": 1.7746404383605082e-05, "loss": 1.9435, "step": 13025000 }, { "epoch": 64.53, "learning_rate": 1.7745165797179e-05, "loss": 1.9721, "step": 13025500 }, { "epoch": 64.54, "learning_rate": 1.7743927210752912e-05, "loss": 1.958, "step": 13026000 }, { "epoch": 64.54, "learning_rate": 1.774268862432683e-05, "loss": 1.9746, "step": 13026500 }, { "epoch": 64.54, "learning_rate": 1.7741450037900746e-05, "loss": 1.9597, "step": 13027000 }, { "epoch": 64.54, "learning_rate": 1.7740211451474663e-05, "loss": 1.9888, "step": 13027500 }, { "epoch": 64.55, "learning_rate": 1.7738972865048577e-05, "loss": 1.9725, "step": 13028000 }, { "epoch": 64.55, "learning_rate": 1.7737739232968197e-05, "loss": 1.9769, "step": 13028500 }, { "epoch": 64.55, "learning_rate": 1.7736500646542114e-05, "loss": 1.9824, "step": 13029000 }, { "epoch": 64.55, "learning_rate": 1.773526206011603e-05, "loss": 1.9731, "step": 13029500 }, { "epoch": 64.56, "learning_rate": 1.7734023473689948e-05, "loss": 1.9692, "step": 13030000 }, { "epoch": 64.56, "learning_rate": 1.7732784887263865e-05, "loss": 1.9645, "step": 13030500 }, { "epoch": 64.56, "learning_rate": 1.7731546300837782e-05, "loss": 1.9493, "step": 13031000 }, { "epoch": 64.56, "learning_rate": 1.77303077144117e-05, "loss": 1.9627, "step": 13031500 }, { "epoch": 64.57, "learning_rate": 1.7729069127985616e-05, "loss": 1.9608, "step": 13032000 }, { "epoch": 64.57, "learning_rate": 1.772783301873238e-05, "loss": 1.9618, "step": 13032500 }, { "epoch": 64.57, "learning_rate": 1.772659690947915e-05, "loss": 1.9566, "step": 13033000 }, { "epoch": 64.57, "learning_rate": 1.7725358323053067e-05, "loss": 1.9506, "step": 13033500 }, { "epoch": 64.57, "learning_rate": 1.7724119736626984e-05, "loss": 1.9529, "step": 13034000 }, { "epoch": 64.58, "learning_rate": 1.7722881150200897e-05, "loss": 1.9671, "step": 13034500 }, { "epoch": 64.58, "learning_rate": 1.7721642563774814e-05, "loss": 1.9686, "step": 13035000 }, { "epoch": 64.58, "learning_rate": 1.772040397734873e-05, "loss": 1.9892, "step": 13035500 }, { "epoch": 64.58, "learning_rate": 1.7719165390922648e-05, "loss": 1.9742, "step": 13036000 }, { "epoch": 64.59, "learning_rate": 1.7717926804496565e-05, "loss": 1.9702, "step": 13036500 }, { "epoch": 64.59, "learning_rate": 1.7716688218070482e-05, "loss": 1.9656, "step": 13037000 }, { "epoch": 64.59, "learning_rate": 1.7715452108817247e-05, "loss": 1.9828, "step": 13037500 }, { "epoch": 64.59, "learning_rate": 1.7714213522391164e-05, "loss": 1.9822, "step": 13038000 }, { "epoch": 64.6, "learning_rate": 1.771297493596508e-05, "loss": 1.9488, "step": 13038500 }, { "epoch": 64.6, "learning_rate": 1.7711736349538998e-05, "loss": 1.9746, "step": 13039000 }, { "epoch": 64.6, "learning_rate": 1.7710497763112915e-05, "loss": 1.9754, "step": 13039500 }, { "epoch": 64.6, "learning_rate": 1.7709259176686832e-05, "loss": 1.9647, "step": 13040000 }, { "epoch": 64.61, "learning_rate": 1.770802059026075e-05, "loss": 1.9504, "step": 13040500 }, { "epoch": 64.61, "learning_rate": 1.7706784481007514e-05, "loss": 1.9759, "step": 13041000 }, { "epoch": 64.61, "learning_rate": 1.770554589458143e-05, "loss": 1.963, "step": 13041500 }, { "epoch": 64.61, "learning_rate": 1.7704307308155348e-05, "loss": 1.9746, "step": 13042000 }, { "epoch": 64.62, "learning_rate": 1.7703068721729265e-05, "loss": 1.9684, "step": 13042500 }, { "epoch": 64.62, "learning_rate": 1.7701830135303182e-05, "loss": 1.9894, "step": 13043000 }, { "epoch": 64.62, "learning_rate": 1.77005915488771e-05, "loss": 1.9592, "step": 13043500 }, { "epoch": 64.62, "learning_rate": 1.7699352962451016e-05, "loss": 1.9467, "step": 13044000 }, { "epoch": 64.63, "learning_rate": 1.7698114376024933e-05, "loss": 1.9822, "step": 13044500 }, { "epoch": 64.63, "learning_rate": 1.7696875789598846e-05, "loss": 1.9801, "step": 13045000 }, { "epoch": 64.63, "learning_rate": 1.7695637203172763e-05, "loss": 1.9652, "step": 13045500 }, { "epoch": 64.63, "learning_rate": 1.769439861674668e-05, "loss": 1.9467, "step": 13046000 }, { "epoch": 64.64, "learning_rate": 1.769316250749345e-05, "loss": 1.9467, "step": 13046500 }, { "epoch": 64.64, "learning_rate": 1.7691923921067366e-05, "loss": 1.9844, "step": 13047000 }, { "epoch": 64.64, "learning_rate": 1.7690685334641283e-05, "loss": 1.98, "step": 13047500 }, { "epoch": 64.64, "learning_rate": 1.7689446748215197e-05, "loss": 1.9879, "step": 13048000 }, { "epoch": 64.65, "learning_rate": 1.7688208161789113e-05, "loss": 1.9518, "step": 13048500 }, { "epoch": 64.65, "learning_rate": 1.7686972052535882e-05, "loss": 1.9859, "step": 13049000 }, { "epoch": 64.65, "learning_rate": 1.7685738420455503e-05, "loss": 1.9835, "step": 13049500 }, { "epoch": 64.65, "learning_rate": 1.7684502311202272e-05, "loss": 1.9835, "step": 13050000 }, { "epoch": 64.66, "learning_rate": 1.768326372477619e-05, "loss": 1.9587, "step": 13050500 }, { "epoch": 64.66, "learning_rate": 1.7682025138350106e-05, "loss": 1.9613, "step": 13051000 }, { "epoch": 64.66, "learning_rate": 1.7680786551924023e-05, "loss": 1.9904, "step": 13051500 }, { "epoch": 64.66, "learning_rate": 1.7679547965497936e-05, "loss": 1.9804, "step": 13052000 }, { "epoch": 64.67, "learning_rate": 1.7678309379071853e-05, "loss": 1.9759, "step": 13052500 }, { "epoch": 64.67, "learning_rate": 1.767707079264577e-05, "loss": 1.9678, "step": 13053000 }, { "epoch": 64.67, "learning_rate": 1.7675832206219687e-05, "loss": 1.9606, "step": 13053500 }, { "epoch": 64.67, "learning_rate": 1.7674593619793604e-05, "loss": 1.9728, "step": 13054000 }, { "epoch": 64.68, "learning_rate": 1.767335503336752e-05, "loss": 1.9747, "step": 13054500 }, { "epoch": 64.68, "learning_rate": 1.7672116446941438e-05, "loss": 1.9619, "step": 13055000 }, { "epoch": 64.68, "learning_rate": 1.7670880337688203e-05, "loss": 1.9651, "step": 13055500 }, { "epoch": 64.68, "learning_rate": 1.766964175126212e-05, "loss": 1.9336, "step": 13056000 }, { "epoch": 64.69, "learning_rate": 1.7668403164836037e-05, "loss": 1.9865, "step": 13056500 }, { "epoch": 64.69, "learning_rate": 1.7667164578409954e-05, "loss": 1.9603, "step": 13057000 }, { "epoch": 64.69, "learning_rate": 1.7665928469156723e-05, "loss": 1.9855, "step": 13057500 }, { "epoch": 64.69, "learning_rate": 1.7664689882730636e-05, "loss": 1.9884, "step": 13058000 }, { "epoch": 64.7, "learning_rate": 1.7663451296304553e-05, "loss": 1.9669, "step": 13058500 }, { "epoch": 64.7, "learning_rate": 1.766221270987847e-05, "loss": 1.9786, "step": 13059000 }, { "epoch": 64.7, "learning_rate": 1.766097660062524e-05, "loss": 1.967, "step": 13059500 }, { "epoch": 64.7, "learning_rate": 1.7659738014199156e-05, "loss": 1.9618, "step": 13060000 }, { "epoch": 64.71, "learning_rate": 1.7658499427773073e-05, "loss": 1.9521, "step": 13060500 }, { "epoch": 64.71, "learning_rate": 1.7657260841346986e-05, "loss": 1.9743, "step": 13061000 }, { "epoch": 64.71, "learning_rate": 1.7656022254920903e-05, "loss": 1.9684, "step": 13061500 }, { "epoch": 64.71, "learning_rate": 1.765478366849482e-05, "loss": 1.975, "step": 13062000 }, { "epoch": 64.72, "learning_rate": 1.7653545082068737e-05, "loss": 1.9786, "step": 13062500 }, { "epoch": 64.72, "learning_rate": 1.7652306495642654e-05, "loss": 1.9729, "step": 13063000 }, { "epoch": 64.72, "learning_rate": 1.765106790921657e-05, "loss": 1.9581, "step": 13063500 }, { "epoch": 64.72, "learning_rate": 1.7649829322790488e-05, "loss": 1.9763, "step": 13064000 }, { "epoch": 64.73, "learning_rate": 1.7648590736364405e-05, "loss": 1.951, "step": 13064500 }, { "epoch": 64.73, "learning_rate": 1.764735214993832e-05, "loss": 1.9658, "step": 13065000 }, { "epoch": 64.73, "learning_rate": 1.7646113563512235e-05, "loss": 1.982, "step": 13065500 }, { "epoch": 64.73, "learning_rate": 1.7644877454259004e-05, "loss": 1.9676, "step": 13066000 }, { "epoch": 64.74, "learning_rate": 1.7643641345005773e-05, "loss": 1.9734, "step": 13066500 }, { "epoch": 64.74, "learning_rate": 1.764240275857969e-05, "loss": 1.9498, "step": 13067000 }, { "epoch": 64.74, "learning_rate": 1.7641164172153603e-05, "loss": 1.9465, "step": 13067500 }, { "epoch": 64.74, "learning_rate": 1.763992558572752e-05, "loss": 1.9706, "step": 13068000 }, { "epoch": 64.75, "learning_rate": 1.7638686999301437e-05, "loss": 1.9789, "step": 13068500 }, { "epoch": 64.75, "learning_rate": 1.7637448412875354e-05, "loss": 1.9819, "step": 13069000 }, { "epoch": 64.75, "learning_rate": 1.763620982644927e-05, "loss": 1.9548, "step": 13069500 }, { "epoch": 64.75, "learning_rate": 1.7634971240023188e-05, "loss": 1.9497, "step": 13070000 }, { "epoch": 64.76, "learning_rate": 1.7633732653597105e-05, "loss": 1.9767, "step": 13070500 }, { "epoch": 64.76, "learning_rate": 1.7632494067171022e-05, "loss": 1.9686, "step": 13071000 }, { "epoch": 64.76, "learning_rate": 1.763125548074494e-05, "loss": 1.9572, "step": 13071500 }, { "epoch": 64.76, "learning_rate": 1.7630016894318852e-05, "loss": 1.9652, "step": 13072000 }, { "epoch": 64.77, "learning_rate": 1.762877830789277e-05, "loss": 1.9412, "step": 13072500 }, { "epoch": 64.77, "learning_rate": 1.7627542198639538e-05, "loss": 1.9687, "step": 13073000 }, { "epoch": 64.77, "learning_rate": 1.7626303612213455e-05, "loss": 1.9766, "step": 13073500 }, { "epoch": 64.77, "learning_rate": 1.7625065025787372e-05, "loss": 1.9613, "step": 13074000 }, { "epoch": 64.78, "learning_rate": 1.7623828916534137e-05, "loss": 1.9624, "step": 13074500 }, { "epoch": 64.78, "learning_rate": 1.7622590330108054e-05, "loss": 1.9824, "step": 13075000 }, { "epoch": 64.78, "learning_rate": 1.7621354220854823e-05, "loss": 1.9875, "step": 13075500 }, { "epoch": 64.78, "learning_rate": 1.762011563442874e-05, "loss": 1.9963, "step": 13076000 }, { "epoch": 64.79, "learning_rate": 1.7618877048002657e-05, "loss": 1.9574, "step": 13076500 }, { "epoch": 64.79, "learning_rate": 1.761763846157657e-05, "loss": 1.9596, "step": 13077000 }, { "epoch": 64.79, "learning_rate": 1.7616399875150487e-05, "loss": 1.9564, "step": 13077500 }, { "epoch": 64.79, "learning_rate": 1.761516376589726e-05, "loss": 1.9379, "step": 13078000 }, { "epoch": 64.8, "learning_rate": 1.7613925179471173e-05, "loss": 1.9607, "step": 13078500 }, { "epoch": 64.8, "learning_rate": 1.761268659304509e-05, "loss": 1.9547, "step": 13079000 }, { "epoch": 64.8, "learning_rate": 1.7611448006619007e-05, "loss": 1.9484, "step": 13079500 }, { "epoch": 64.8, "learning_rate": 1.761020942019292e-05, "loss": 1.9529, "step": 13080000 }, { "epoch": 64.81, "learning_rate": 1.7608970833766837e-05, "loss": 1.9739, "step": 13080500 }, { "epoch": 64.81, "learning_rate": 1.7607732247340754e-05, "loss": 1.9652, "step": 13081000 }, { "epoch": 64.81, "learning_rate": 1.760649366091467e-05, "loss": 1.9808, "step": 13081500 }, { "epoch": 64.81, "learning_rate": 1.7605255074488588e-05, "loss": 1.9692, "step": 13082000 }, { "epoch": 64.82, "learning_rate": 1.7604016488062505e-05, "loss": 1.9846, "step": 13082500 }, { "epoch": 64.82, "learning_rate": 1.760278037880927e-05, "loss": 1.9441, "step": 13083000 }, { "epoch": 64.82, "learning_rate": 1.7601541792383187e-05, "loss": 1.9576, "step": 13083500 }, { "epoch": 64.82, "learning_rate": 1.7600303205957104e-05, "loss": 1.9495, "step": 13084000 }, { "epoch": 64.83, "learning_rate": 1.759906461953102e-05, "loss": 1.9764, "step": 13084500 }, { "epoch": 64.83, "learning_rate": 1.7597826033104938e-05, "loss": 1.9718, "step": 13085000 }, { "epoch": 64.83, "learning_rate": 1.7596587446678855e-05, "loss": 1.9677, "step": 13085500 }, { "epoch": 64.83, "learning_rate": 1.7595348860252772e-05, "loss": 1.9472, "step": 13086000 }, { "epoch": 64.84, "learning_rate": 1.759411027382669e-05, "loss": 1.9792, "step": 13086500 }, { "epoch": 64.84, "learning_rate": 1.7592871687400606e-05, "loss": 1.952, "step": 13087000 }, { "epoch": 64.84, "learning_rate": 1.759163310097452e-05, "loss": 1.9672, "step": 13087500 }, { "epoch": 64.84, "learning_rate": 1.759039699172129e-05, "loss": 1.967, "step": 13088000 }, { "epoch": 64.84, "learning_rate": 1.7589160882468057e-05, "loss": 1.9595, "step": 13088500 }, { "epoch": 64.85, "learning_rate": 1.7587922296041974e-05, "loss": 1.9609, "step": 13089000 }, { "epoch": 64.85, "learning_rate": 1.7586683709615888e-05, "loss": 1.9768, "step": 13089500 }, { "epoch": 64.85, "learning_rate": 1.7585445123189805e-05, "loss": 1.9838, "step": 13090000 }, { "epoch": 64.85, "learning_rate": 1.758420653676372e-05, "loss": 1.9657, "step": 13090500 }, { "epoch": 64.86, "learning_rate": 1.7582970427510494e-05, "loss": 1.9843, "step": 13091000 }, { "epoch": 64.86, "learning_rate": 1.758173184108441e-05, "loss": 1.9696, "step": 13091500 }, { "epoch": 64.86, "learning_rate": 1.7580493254658324e-05, "loss": 1.9636, "step": 13092000 }, { "epoch": 64.86, "learning_rate": 1.757925466823224e-05, "loss": 1.9611, "step": 13092500 }, { "epoch": 64.87, "learning_rate": 1.757801855897901e-05, "loss": 1.9885, "step": 13093000 }, { "epoch": 64.87, "learning_rate": 1.7576779972552927e-05, "loss": 1.9605, "step": 13093500 }, { "epoch": 64.87, "learning_rate": 1.7575543863299696e-05, "loss": 1.9882, "step": 13094000 }, { "epoch": 64.87, "learning_rate": 1.757430527687361e-05, "loss": 1.98, "step": 13094500 }, { "epoch": 64.88, "learning_rate": 1.7573066690447526e-05, "loss": 1.9715, "step": 13095000 }, { "epoch": 64.88, "learning_rate": 1.7571830581194295e-05, "loss": 1.973, "step": 13095500 }, { "epoch": 64.88, "learning_rate": 1.7570591994768212e-05, "loss": 1.9397, "step": 13096000 }, { "epoch": 64.88, "learning_rate": 1.756935340834213e-05, "loss": 1.9651, "step": 13096500 }, { "epoch": 64.89, "learning_rate": 1.7568114821916046e-05, "loss": 1.9559, "step": 13097000 }, { "epoch": 64.89, "learning_rate": 1.756687623548996e-05, "loss": 1.9787, "step": 13097500 }, { "epoch": 64.89, "learning_rate": 1.7565637649063876e-05, "loss": 1.9756, "step": 13098000 }, { "epoch": 64.89, "learning_rate": 1.7564399062637793e-05, "loss": 1.9665, "step": 13098500 }, { "epoch": 64.9, "learning_rate": 1.756316047621171e-05, "loss": 1.9448, "step": 13099000 }, { "epoch": 64.9, "learning_rate": 1.7561921889785627e-05, "loss": 1.9607, "step": 13099500 }, { "epoch": 64.9, "learning_rate": 1.7560683303359544e-05, "loss": 1.9538, "step": 13100000 }, { "epoch": 64.9, "learning_rate": 1.755944471693346e-05, "loss": 1.968, "step": 13100500 }, { "epoch": 64.91, "learning_rate": 1.7558206130507378e-05, "loss": 1.961, "step": 13101000 }, { "epoch": 64.91, "learning_rate": 1.7556967544081295e-05, "loss": 1.9602, "step": 13101500 }, { "epoch": 64.91, "learning_rate": 1.755573143482806e-05, "loss": 1.9652, "step": 13102000 }, { "epoch": 64.91, "learning_rate": 1.7554492848401977e-05, "loss": 1.9571, "step": 13102500 }, { "epoch": 64.92, "learning_rate": 1.7553254261975894e-05, "loss": 1.988, "step": 13103000 }, { "epoch": 64.92, "learning_rate": 1.755201567554981e-05, "loss": 1.9788, "step": 13103500 }, { "epoch": 64.92, "learning_rate": 1.7550777089123728e-05, "loss": 1.9771, "step": 13104000 }, { "epoch": 64.92, "learning_rate": 1.7549540979870493e-05, "loss": 1.9734, "step": 13104500 }, { "epoch": 64.93, "learning_rate": 1.754830239344441e-05, "loss": 1.9863, "step": 13105000 }, { "epoch": 64.93, "learning_rate": 1.754706628419118e-05, "loss": 1.954, "step": 13105500 }, { "epoch": 64.93, "learning_rate": 1.7545827697765096e-05, "loss": 1.9631, "step": 13106000 }, { "epoch": 64.93, "learning_rate": 1.7544591588511865e-05, "loss": 1.9858, "step": 13106500 }, { "epoch": 64.94, "learning_rate": 1.754335300208578e-05, "loss": 1.9646, "step": 13107000 }, { "epoch": 64.94, "learning_rate": 1.7542114415659695e-05, "loss": 1.9804, "step": 13107500 }, { "epoch": 64.94, "learning_rate": 1.7540875829233612e-05, "loss": 1.9662, "step": 13108000 }, { "epoch": 64.94, "learning_rate": 1.753963724280753e-05, "loss": 1.9881, "step": 13108500 }, { "epoch": 64.95, "learning_rate": 1.7538398656381446e-05, "loss": 1.9684, "step": 13109000 }, { "epoch": 64.95, "learning_rate": 1.7537160069955363e-05, "loss": 1.9541, "step": 13109500 }, { "epoch": 64.95, "learning_rate": 1.7535921483529276e-05, "loss": 1.9775, "step": 13110000 }, { "epoch": 64.95, "learning_rate": 1.753468537427605e-05, "loss": 1.9983, "step": 13110500 }, { "epoch": 64.96, "learning_rate": 1.7533446787849962e-05, "loss": 1.9812, "step": 13111000 }, { "epoch": 64.96, "learning_rate": 1.7532210678596734e-05, "loss": 1.9886, "step": 13111500 }, { "epoch": 64.96, "learning_rate": 1.7530972092170648e-05, "loss": 1.9676, "step": 13112000 }, { "epoch": 64.96, "learning_rate": 1.7529733505744565e-05, "loss": 1.9511, "step": 13112500 }, { "epoch": 64.97, "learning_rate": 1.7528494919318482e-05, "loss": 1.964, "step": 13113000 }, { "epoch": 64.97, "learning_rate": 1.75272563328924e-05, "loss": 1.9767, "step": 13113500 }, { "epoch": 64.97, "learning_rate": 1.7526017746466316e-05, "loss": 1.9744, "step": 13114000 }, { "epoch": 64.97, "learning_rate": 1.752477916004023e-05, "loss": 1.9893, "step": 13114500 }, { "epoch": 64.98, "learning_rate": 1.7523540573614146e-05, "loss": 1.9746, "step": 13115000 }, { "epoch": 64.98, "learning_rate": 1.7522301987188063e-05, "loss": 1.9496, "step": 13115500 }, { "epoch": 64.98, "learning_rate": 1.7521063400761976e-05, "loss": 1.9796, "step": 13116000 }, { "epoch": 64.98, "learning_rate": 1.7519824814335893e-05, "loss": 1.9575, "step": 13116500 }, { "epoch": 64.99, "learning_rate": 1.751858622790981e-05, "loss": 1.9637, "step": 13117000 }, { "epoch": 64.99, "learning_rate": 1.7517347641483727e-05, "loss": 1.9639, "step": 13117500 }, { "epoch": 64.99, "learning_rate": 1.7516109055057644e-05, "loss": 1.9888, "step": 13118000 }, { "epoch": 64.99, "learning_rate": 1.751487046863156e-05, "loss": 1.9494, "step": 13118500 }, { "epoch": 65.0, "learning_rate": 1.751363435937833e-05, "loss": 1.9539, "step": 13119000 }, { "epoch": 65.0, "learning_rate": 1.7512395772952243e-05, "loss": 1.9773, "step": 13119500 }, { "epoch": 65.0, "eval_accuracy": 0.6750307160492472, "eval_accuracy_mlm": 0.6348378879033679, "eval_accuracy_nsp": 0.8646370592918862, "eval_loss": 2.2951533794403076, "eval_runtime": 147.451, "eval_samples_per_second": 1729.109, "eval_steps_per_second": 72.051, "step": 13119795 }, { "epoch": 65.0, "learning_rate": 1.751115718652616e-05, "loss": 1.9451, "step": 13120000 }, { "epoch": 65.0, "learning_rate": 1.7509918600100077e-05, "loss": 1.9145, "step": 13120500 }, { "epoch": 65.01, "learning_rate": 1.7508680013673994e-05, "loss": 1.9327, "step": 13121000 }, { "epoch": 65.01, "learning_rate": 1.750744142724791e-05, "loss": 1.9486, "step": 13121500 }, { "epoch": 65.01, "learning_rate": 1.7506202840821828e-05, "loss": 1.9453, "step": 13122000 }, { "epoch": 65.01, "learning_rate": 1.7504964254395745e-05, "loss": 1.93, "step": 13122500 }, { "epoch": 65.02, "learning_rate": 1.7503725667969662e-05, "loss": 1.9478, "step": 13123000 }, { "epoch": 65.02, "learning_rate": 1.7502487081543576e-05, "loss": 1.9578, "step": 13123500 }, { "epoch": 65.02, "learning_rate": 1.7501250972290344e-05, "loss": 1.9489, "step": 13124000 }, { "epoch": 65.02, "learning_rate": 1.7500014863037113e-05, "loss": 1.956, "step": 13124500 }, { "epoch": 65.03, "learning_rate": 1.749877627661103e-05, "loss": 1.9393, "step": 13125000 }, { "epoch": 65.03, "learning_rate": 1.7497537690184944e-05, "loss": 1.9584, "step": 13125500 }, { "epoch": 65.03, "learning_rate": 1.749629910375886e-05, "loss": 1.9641, "step": 13126000 }, { "epoch": 65.03, "learning_rate": 1.7495060517332777e-05, "loss": 1.9215, "step": 13126500 }, { "epoch": 65.04, "learning_rate": 1.7493821930906694e-05, "loss": 1.9493, "step": 13127000 }, { "epoch": 65.04, "learning_rate": 1.749258334448061e-05, "loss": 1.9317, "step": 13127500 }, { "epoch": 65.04, "learning_rate": 1.749134723522738e-05, "loss": 1.9367, "step": 13128000 }, { "epoch": 65.04, "learning_rate": 1.7490108648801297e-05, "loss": 1.9474, "step": 13128500 }, { "epoch": 65.05, "learning_rate": 1.748887006237521e-05, "loss": 1.9491, "step": 13129000 }, { "epoch": 65.05, "learning_rate": 1.7487631475949128e-05, "loss": 1.9181, "step": 13129500 }, { "epoch": 65.05, "learning_rate": 1.7486392889523044e-05, "loss": 1.9588, "step": 13130000 }, { "epoch": 65.05, "learning_rate": 1.748515430309696e-05, "loss": 1.9497, "step": 13130500 }, { "epoch": 65.06, "learning_rate": 1.748391571667088e-05, "loss": 1.9349, "step": 13131000 }, { "epoch": 65.06, "learning_rate": 1.7482679607417647e-05, "loss": 1.9391, "step": 13131500 }, { "epoch": 65.06, "learning_rate": 1.7481441020991564e-05, "loss": 1.9689, "step": 13132000 }, { "epoch": 65.06, "learning_rate": 1.7480202434565478e-05, "loss": 1.9587, "step": 13132500 }, { "epoch": 65.07, "learning_rate": 1.7478963848139395e-05, "loss": 1.9176, "step": 13133000 }, { "epoch": 65.07, "learning_rate": 1.7477727738886167e-05, "loss": 1.9389, "step": 13133500 }, { "epoch": 65.07, "learning_rate": 1.7476489152460084e-05, "loss": 1.9358, "step": 13134000 }, { "epoch": 65.07, "learning_rate": 1.7475250566033997e-05, "loss": 1.9428, "step": 13134500 }, { "epoch": 65.08, "learning_rate": 1.7474011979607914e-05, "loss": 1.9394, "step": 13135000 }, { "epoch": 65.08, "learning_rate": 1.747277339318183e-05, "loss": 1.9468, "step": 13135500 }, { "epoch": 65.08, "learning_rate": 1.74715372839286e-05, "loss": 1.9212, "step": 13136000 }, { "epoch": 65.08, "learning_rate": 1.7470298697502517e-05, "loss": 1.9432, "step": 13136500 }, { "epoch": 65.09, "learning_rate": 1.7469060111076434e-05, "loss": 1.9745, "step": 13137000 }, { "epoch": 65.09, "learning_rate": 1.746782152465035e-05, "loss": 1.9626, "step": 13137500 }, { "epoch": 65.09, "learning_rate": 1.7466582938224264e-05, "loss": 1.9617, "step": 13138000 }, { "epoch": 65.09, "learning_rate": 1.746534435179818e-05, "loss": 1.9685, "step": 13138500 }, { "epoch": 65.1, "learning_rate": 1.7464105765372095e-05, "loss": 1.9507, "step": 13139000 }, { "epoch": 65.1, "learning_rate": 1.746286717894601e-05, "loss": 1.9397, "step": 13139500 }, { "epoch": 65.1, "learning_rate": 1.746162859251993e-05, "loss": 1.943, "step": 13140000 }, { "epoch": 65.1, "learning_rate": 1.7460390006093845e-05, "loss": 1.9264, "step": 13140500 }, { "epoch": 65.11, "learning_rate": 1.7459151419667762e-05, "loss": 1.946, "step": 13141000 }, { "epoch": 65.11, "learning_rate": 1.745791283324168e-05, "loss": 1.9378, "step": 13141500 }, { "epoch": 65.11, "learning_rate": 1.7456674246815596e-05, "loss": 1.9575, "step": 13142000 }, { "epoch": 65.11, "learning_rate": 1.745543813756236e-05, "loss": 1.9578, "step": 13142500 }, { "epoch": 65.11, "learning_rate": 1.7454202028309134e-05, "loss": 1.9632, "step": 13143000 }, { "epoch": 65.12, "learning_rate": 1.745296344188305e-05, "loss": 1.9506, "step": 13143500 }, { "epoch": 65.12, "learning_rate": 1.7451724855456968e-05, "loss": 1.9501, "step": 13144000 }, { "epoch": 65.12, "learning_rate": 1.745048626903088e-05, "loss": 1.9793, "step": 13144500 }, { "epoch": 65.12, "learning_rate": 1.7449247682604798e-05, "loss": 1.9645, "step": 13145000 }, { "epoch": 65.13, "learning_rate": 1.7448009096178715e-05, "loss": 1.9517, "step": 13145500 }, { "epoch": 65.13, "learning_rate": 1.744677050975263e-05, "loss": 1.9297, "step": 13146000 }, { "epoch": 65.13, "learning_rate": 1.7445531923326546e-05, "loss": 1.9323, "step": 13146500 }, { "epoch": 65.13, "learning_rate": 1.7444293336900462e-05, "loss": 1.9556, "step": 13147000 }, { "epoch": 65.14, "learning_rate": 1.744305475047438e-05, "loss": 1.9711, "step": 13147500 }, { "epoch": 65.14, "learning_rate": 1.7441818641221148e-05, "loss": 1.9629, "step": 13148000 }, { "epoch": 65.14, "learning_rate": 1.7440580054795065e-05, "loss": 1.9458, "step": 13148500 }, { "epoch": 65.14, "learning_rate": 1.7439341468368982e-05, "loss": 1.9475, "step": 13149000 }, { "epoch": 65.15, "learning_rate": 1.7438102881942896e-05, "loss": 1.9609, "step": 13149500 }, { "epoch": 65.15, "learning_rate": 1.7436866772689668e-05, "loss": 1.9528, "step": 13150000 }, { "epoch": 65.15, "learning_rate": 1.743562818626358e-05, "loss": 1.9648, "step": 13150500 }, { "epoch": 65.15, "learning_rate": 1.7434389599837498e-05, "loss": 1.9401, "step": 13151000 }, { "epoch": 65.16, "learning_rate": 1.7433151013411415e-05, "loss": 1.9684, "step": 13151500 }, { "epoch": 65.16, "learning_rate": 1.7431914904158184e-05, "loss": 1.9377, "step": 13152000 }, { "epoch": 65.16, "learning_rate": 1.74306763177321e-05, "loss": 1.9719, "step": 13152500 }, { "epoch": 65.16, "learning_rate": 1.7429437731306018e-05, "loss": 1.9593, "step": 13153000 }, { "epoch": 65.17, "learning_rate": 1.7428201622052783e-05, "loss": 1.9573, "step": 13153500 }, { "epoch": 65.17, "learning_rate": 1.74269630356267e-05, "loss": 1.961, "step": 13154000 }, { "epoch": 65.17, "learning_rate": 1.7425724449200617e-05, "loss": 1.967, "step": 13154500 }, { "epoch": 65.17, "learning_rate": 1.7424485862774534e-05, "loss": 1.9486, "step": 13155000 }, { "epoch": 65.18, "learning_rate": 1.742324727634845e-05, "loss": 1.9505, "step": 13155500 }, { "epoch": 65.18, "learning_rate": 1.7422008689922368e-05, "loss": 1.9538, "step": 13156000 }, { "epoch": 65.18, "learning_rate": 1.7420770103496285e-05, "loss": 1.9614, "step": 13156500 }, { "epoch": 65.18, "learning_rate": 1.74195315170702e-05, "loss": 1.9652, "step": 13157000 }, { "epoch": 65.19, "learning_rate": 1.7418292930644115e-05, "loss": 1.9684, "step": 13157500 }, { "epoch": 65.19, "learning_rate": 1.7417054344218032e-05, "loss": 1.9637, "step": 13158000 }, { "epoch": 65.19, "learning_rate": 1.74158182349648e-05, "loss": 1.9555, "step": 13158500 }, { "epoch": 65.19, "learning_rate": 1.7414582125711566e-05, "loss": 1.9464, "step": 13159000 }, { "epoch": 65.2, "learning_rate": 1.7413343539285483e-05, "loss": 1.9621, "step": 13159500 }, { "epoch": 65.2, "learning_rate": 1.74121049528594e-05, "loss": 1.9587, "step": 13160000 }, { "epoch": 65.2, "learning_rate": 1.741086884360617e-05, "loss": 1.9487, "step": 13160500 }, { "epoch": 65.2, "learning_rate": 1.7409630257180086e-05, "loss": 1.9497, "step": 13161000 }, { "epoch": 65.21, "learning_rate": 1.7408391670754003e-05, "loss": 1.9594, "step": 13161500 }, { "epoch": 65.21, "learning_rate": 1.7407153084327917e-05, "loss": 1.9534, "step": 13162000 }, { "epoch": 65.21, "learning_rate": 1.7405914497901833e-05, "loss": 1.9407, "step": 13162500 }, { "epoch": 65.21, "learning_rate": 1.740467591147575e-05, "loss": 1.9744, "step": 13163000 }, { "epoch": 65.22, "learning_rate": 1.7403437325049667e-05, "loss": 1.9606, "step": 13163500 }, { "epoch": 65.22, "learning_rate": 1.7402198738623584e-05, "loss": 1.9675, "step": 13164000 }, { "epoch": 65.22, "learning_rate": 1.74009601521975e-05, "loss": 1.9726, "step": 13164500 }, { "epoch": 65.22, "learning_rate": 1.7399721565771418e-05, "loss": 1.9471, "step": 13165000 }, { "epoch": 65.23, "learning_rate": 1.7398482979345335e-05, "loss": 1.9542, "step": 13165500 }, { "epoch": 65.23, "learning_rate": 1.739724439291925e-05, "loss": 1.9539, "step": 13166000 }, { "epoch": 65.23, "learning_rate": 1.7396005806493166e-05, "loss": 1.9547, "step": 13166500 }, { "epoch": 65.23, "learning_rate": 1.7394767220067082e-05, "loss": 1.9809, "step": 13167000 }, { "epoch": 65.24, "learning_rate": 1.739353111081385e-05, "loss": 1.9657, "step": 13167500 }, { "epoch": 65.24, "learning_rate": 1.739229500156062e-05, "loss": 1.9207, "step": 13168000 }, { "epoch": 65.24, "learning_rate": 1.7391056415134534e-05, "loss": 1.9617, "step": 13168500 }, { "epoch": 65.24, "learning_rate": 1.738981782870845e-05, "loss": 1.9518, "step": 13169000 }, { "epoch": 65.25, "learning_rate": 1.7388579242282367e-05, "loss": 1.9679, "step": 13169500 }, { "epoch": 65.25, "learning_rate": 1.7387340655856284e-05, "loss": 1.9424, "step": 13170000 }, { "epoch": 65.25, "learning_rate": 1.73861020694302e-05, "loss": 1.9715, "step": 13170500 }, { "epoch": 65.25, "learning_rate": 1.7384863483004118e-05, "loss": 1.9596, "step": 13171000 }, { "epoch": 65.26, "learning_rate": 1.7383624896578035e-05, "loss": 1.9538, "step": 13171500 }, { "epoch": 65.26, "learning_rate": 1.7382386310151952e-05, "loss": 1.9576, "step": 13172000 }, { "epoch": 65.26, "learning_rate": 1.7381147723725866e-05, "loss": 1.9202, "step": 13172500 }, { "epoch": 65.26, "learning_rate": 1.7379911614472634e-05, "loss": 1.9485, "step": 13173000 }, { "epoch": 65.27, "learning_rate": 1.737867302804655e-05, "loss": 1.9653, "step": 13173500 }, { "epoch": 65.27, "learning_rate": 1.737743444162047e-05, "loss": 1.9548, "step": 13174000 }, { "epoch": 65.27, "learning_rate": 1.7376195855194385e-05, "loss": 1.9806, "step": 13174500 }, { "epoch": 65.27, "learning_rate": 1.7374957268768302e-05, "loss": 1.9818, "step": 13175000 }, { "epoch": 65.28, "learning_rate": 1.7373721159515068e-05, "loss": 1.9476, "step": 13175500 }, { "epoch": 65.28, "learning_rate": 1.7372482573088985e-05, "loss": 1.9595, "step": 13176000 }, { "epoch": 65.28, "learning_rate": 1.73712439866629e-05, "loss": 1.9441, "step": 13176500 }, { "epoch": 65.28, "learning_rate": 1.737000540023682e-05, "loss": 1.9996, "step": 13177000 }, { "epoch": 65.29, "learning_rate": 1.7368769290983587e-05, "loss": 1.9685, "step": 13177500 }, { "epoch": 65.29, "learning_rate": 1.7367530704557504e-05, "loss": 1.962, "step": 13178000 }, { "epoch": 65.29, "learning_rate": 1.7366292118131418e-05, "loss": 1.9419, "step": 13178500 }, { "epoch": 65.29, "learning_rate": 1.7365053531705335e-05, "loss": 1.9835, "step": 13179000 }, { "epoch": 65.3, "learning_rate": 1.736381494527925e-05, "loss": 1.9603, "step": 13179500 }, { "epoch": 65.3, "learning_rate": 1.736257635885317e-05, "loss": 1.9567, "step": 13180000 }, { "epoch": 65.3, "learning_rate": 1.7361340249599937e-05, "loss": 1.9727, "step": 13180500 }, { "epoch": 65.3, "learning_rate": 1.7360101663173854e-05, "loss": 1.944, "step": 13181000 }, { "epoch": 65.31, "learning_rate": 1.735886307674777e-05, "loss": 1.9689, "step": 13181500 }, { "epoch": 65.31, "learning_rate": 1.7357624490321685e-05, "loss": 1.964, "step": 13182000 }, { "epoch": 65.31, "learning_rate": 1.7356388381068457e-05, "loss": 1.9435, "step": 13182500 }, { "epoch": 65.31, "learning_rate": 1.7355152271815222e-05, "loss": 1.963, "step": 13183000 }, { "epoch": 65.32, "learning_rate": 1.735391368538914e-05, "loss": 1.9639, "step": 13183500 }, { "epoch": 65.32, "learning_rate": 1.7352675098963056e-05, "loss": 1.9438, "step": 13184000 }, { "epoch": 65.32, "learning_rate": 1.7351438989709825e-05, "loss": 1.9284, "step": 13184500 }, { "epoch": 65.32, "learning_rate": 1.7350200403283742e-05, "loss": 1.9554, "step": 13185000 }, { "epoch": 65.33, "learning_rate": 1.7348961816857655e-05, "loss": 1.9645, "step": 13185500 }, { "epoch": 65.33, "learning_rate": 1.7347723230431572e-05, "loss": 1.9501, "step": 13186000 }, { "epoch": 65.33, "learning_rate": 1.734648464400549e-05, "loss": 1.9601, "step": 13186500 }, { "epoch": 65.33, "learning_rate": 1.7345246057579406e-05, "loss": 1.9517, "step": 13187000 }, { "epoch": 65.34, "learning_rate": 1.7344007471153323e-05, "loss": 1.9398, "step": 13187500 }, { "epoch": 65.34, "learning_rate": 1.734276888472724e-05, "loss": 1.9604, "step": 13188000 }, { "epoch": 65.34, "learning_rate": 1.7341530298301157e-05, "loss": 1.9458, "step": 13188500 }, { "epoch": 65.34, "learning_rate": 1.7340291711875074e-05, "loss": 1.9423, "step": 13189000 }, { "epoch": 65.35, "learning_rate": 1.733905312544899e-05, "loss": 1.9342, "step": 13189500 }, { "epoch": 65.35, "learning_rate": 1.7337814539022904e-05, "loss": 1.9761, "step": 13190000 }, { "epoch": 65.35, "learning_rate": 1.7336578429769673e-05, "loss": 1.9431, "step": 13190500 }, { "epoch": 65.35, "learning_rate": 1.733533984334359e-05, "loss": 1.9627, "step": 13191000 }, { "epoch": 65.36, "learning_rate": 1.7334101256917507e-05, "loss": 1.9494, "step": 13191500 }, { "epoch": 65.36, "learning_rate": 1.7332865147664272e-05, "loss": 1.9258, "step": 13192000 }, { "epoch": 65.36, "learning_rate": 1.733162656123819e-05, "loss": 1.9514, "step": 13192500 }, { "epoch": 65.36, "learning_rate": 1.7330387974812106e-05, "loss": 1.9495, "step": 13193000 }, { "epoch": 65.37, "learning_rate": 1.7329149388386023e-05, "loss": 1.9325, "step": 13193500 }, { "epoch": 65.37, "learning_rate": 1.7327913279132792e-05, "loss": 1.9587, "step": 13194000 }, { "epoch": 65.37, "learning_rate": 1.732667469270671e-05, "loss": 2.0048, "step": 13194500 }, { "epoch": 65.37, "learning_rate": 1.7325438583453478e-05, "loss": 1.9466, "step": 13195000 }, { "epoch": 65.38, "learning_rate": 1.7324199997027395e-05, "loss": 1.942, "step": 13195500 }, { "epoch": 65.38, "learning_rate": 1.732296141060131e-05, "loss": 1.9787, "step": 13196000 }, { "epoch": 65.38, "learning_rate": 1.7321722824175225e-05, "loss": 1.9591, "step": 13196500 }, { "epoch": 65.38, "learning_rate": 1.7320484237749142e-05, "loss": 1.976, "step": 13197000 }, { "epoch": 65.38, "learning_rate": 1.731924565132306e-05, "loss": 1.962, "step": 13197500 }, { "epoch": 65.39, "learning_rate": 1.7318007064896973e-05, "loss": 1.9582, "step": 13198000 }, { "epoch": 65.39, "learning_rate": 1.731676847847089e-05, "loss": 1.9564, "step": 13198500 }, { "epoch": 65.39, "learning_rate": 1.7315529892044806e-05, "loss": 1.9858, "step": 13199000 }, { "epoch": 65.39, "learning_rate": 1.7314291305618723e-05, "loss": 1.9576, "step": 13199500 }, { "epoch": 65.4, "learning_rate": 1.731305271919264e-05, "loss": 1.9576, "step": 13200000 }, { "epoch": 65.4, "learning_rate": 1.7311814132766557e-05, "loss": 1.9273, "step": 13200500 }, { "epoch": 65.4, "learning_rate": 1.7310578023513326e-05, "loss": 1.9736, "step": 13201000 }, { "epoch": 65.4, "learning_rate": 1.7309341914260095e-05, "loss": 1.9516, "step": 13201500 }, { "epoch": 65.41, "learning_rate": 1.7308103327834012e-05, "loss": 1.9744, "step": 13202000 }, { "epoch": 65.41, "learning_rate": 1.730686474140793e-05, "loss": 1.9587, "step": 13202500 }, { "epoch": 65.41, "learning_rate": 1.7305626154981842e-05, "loss": 1.9572, "step": 13203000 }, { "epoch": 65.41, "learning_rate": 1.730438756855576e-05, "loss": 1.9568, "step": 13203500 }, { "epoch": 65.42, "learning_rate": 1.7303148982129676e-05, "loss": 1.933, "step": 13204000 }, { "epoch": 65.42, "learning_rate": 1.730191039570359e-05, "loss": 1.9629, "step": 13204500 }, { "epoch": 65.42, "learning_rate": 1.7300674286450362e-05, "loss": 1.9493, "step": 13205000 }, { "epoch": 65.42, "learning_rate": 1.729943570002428e-05, "loss": 1.9615, "step": 13205500 }, { "epoch": 65.43, "learning_rate": 1.7298197113598196e-05, "loss": 1.9497, "step": 13206000 }, { "epoch": 65.43, "learning_rate": 1.729695852717211e-05, "loss": 1.9356, "step": 13206500 }, { "epoch": 65.43, "learning_rate": 1.7295719940746026e-05, "loss": 1.9511, "step": 13207000 }, { "epoch": 65.43, "learning_rate": 1.729448135431994e-05, "loss": 1.9731, "step": 13207500 }, { "epoch": 65.44, "learning_rate": 1.7293242767893857e-05, "loss": 1.9746, "step": 13208000 }, { "epoch": 65.44, "learning_rate": 1.7292004181467774e-05, "loss": 1.961, "step": 13208500 }, { "epoch": 65.44, "learning_rate": 1.729076559504169e-05, "loss": 1.9602, "step": 13209000 }, { "epoch": 65.44, "learning_rate": 1.7289527008615607e-05, "loss": 1.9477, "step": 13209500 }, { "epoch": 65.45, "learning_rate": 1.7288288422189524e-05, "loss": 1.9795, "step": 13210000 }, { "epoch": 65.45, "learning_rate": 1.728704983576344e-05, "loss": 1.9747, "step": 13210500 }, { "epoch": 65.45, "learning_rate": 1.7285811249337358e-05, "loss": 1.9734, "step": 13211000 }, { "epoch": 65.45, "learning_rate": 1.7284572662911275e-05, "loss": 1.9828, "step": 13211500 }, { "epoch": 65.46, "learning_rate": 1.728333407648519e-05, "loss": 1.9624, "step": 13212000 }, { "epoch": 65.46, "learning_rate": 1.7282095490059106e-05, "loss": 1.944, "step": 13212500 }, { "epoch": 65.46, "learning_rate": 1.7280856903633023e-05, "loss": 1.955, "step": 13213000 }, { "epoch": 65.46, "learning_rate": 1.727962079437979e-05, "loss": 1.9661, "step": 13213500 }, { "epoch": 65.47, "learning_rate": 1.7278382207953708e-05, "loss": 1.9707, "step": 13214000 }, { "epoch": 65.47, "learning_rate": 1.7277143621527625e-05, "loss": 1.9503, "step": 13214500 }, { "epoch": 65.47, "learning_rate": 1.727590503510154e-05, "loss": 1.9487, "step": 13215000 }, { "epoch": 65.47, "learning_rate": 1.7274666448675456e-05, "loss": 1.9626, "step": 13215500 }, { "epoch": 65.48, "learning_rate": 1.7273427862249373e-05, "loss": 1.9771, "step": 13216000 }, { "epoch": 65.48, "learning_rate": 1.727218927582329e-05, "loss": 1.9533, "step": 13216500 }, { "epoch": 65.48, "learning_rate": 1.7270950689397206e-05, "loss": 1.9534, "step": 13217000 }, { "epoch": 65.48, "learning_rate": 1.7269712102971123e-05, "loss": 1.9487, "step": 13217500 }, { "epoch": 65.49, "learning_rate": 1.726847599371789e-05, "loss": 1.9754, "step": 13218000 }, { "epoch": 65.49, "learning_rate": 1.7267237407291806e-05, "loss": 1.9896, "step": 13218500 }, { "epoch": 65.49, "learning_rate": 1.7265998820865723e-05, "loss": 1.9718, "step": 13219000 }, { "epoch": 65.49, "learning_rate": 1.726476023443964e-05, "loss": 1.9711, "step": 13219500 }, { "epoch": 65.5, "learning_rate": 1.726352412518641e-05, "loss": 1.9761, "step": 13220000 }, { "epoch": 65.5, "learning_rate": 1.7262288015933177e-05, "loss": 1.9529, "step": 13220500 }, { "epoch": 65.5, "learning_rate": 1.7261051906679946e-05, "loss": 1.9681, "step": 13221000 }, { "epoch": 65.5, "learning_rate": 1.7259813320253863e-05, "loss": 1.9455, "step": 13221500 }, { "epoch": 65.51, "learning_rate": 1.725857473382778e-05, "loss": 1.9652, "step": 13222000 }, { "epoch": 65.51, "learning_rate": 1.7257336147401697e-05, "loss": 1.9225, "step": 13222500 }, { "epoch": 65.51, "learning_rate": 1.725609756097561e-05, "loss": 1.9582, "step": 13223000 }, { "epoch": 65.51, "learning_rate": 1.7254858974549527e-05, "loss": 1.9874, "step": 13223500 }, { "epoch": 65.52, "learning_rate": 1.7253622865296296e-05, "loss": 1.9675, "step": 13224000 }, { "epoch": 65.52, "learning_rate": 1.7252386756043065e-05, "loss": 1.9489, "step": 13224500 }, { "epoch": 65.52, "learning_rate": 1.725114816961698e-05, "loss": 1.9471, "step": 13225000 }, { "epoch": 65.52, "learning_rate": 1.7249909583190895e-05, "loss": 1.9774, "step": 13225500 }, { "epoch": 65.53, "learning_rate": 1.7248670996764812e-05, "loss": 1.9678, "step": 13226000 }, { "epoch": 65.53, "learning_rate": 1.724743241033873e-05, "loss": 1.9652, "step": 13226500 }, { "epoch": 65.53, "learning_rate": 1.7246193823912646e-05, "loss": 1.949, "step": 13227000 }, { "epoch": 65.53, "learning_rate": 1.7244957714659415e-05, "loss": 1.9816, "step": 13227500 }, { "epoch": 65.54, "learning_rate": 1.724371912823333e-05, "loss": 1.9942, "step": 13228000 }, { "epoch": 65.54, "learning_rate": 1.7242480541807245e-05, "loss": 1.9566, "step": 13228500 }, { "epoch": 65.54, "learning_rate": 1.7241241955381162e-05, "loss": 1.9575, "step": 13229000 }, { "epoch": 65.54, "learning_rate": 1.724000336895508e-05, "loss": 1.9582, "step": 13229500 }, { "epoch": 65.55, "learning_rate": 1.7238764782528996e-05, "loss": 1.9534, "step": 13230000 }, { "epoch": 65.55, "learning_rate": 1.7237528673275765e-05, "loss": 1.9477, "step": 13230500 }, { "epoch": 65.55, "learning_rate": 1.7236290086849682e-05, "loss": 1.9878, "step": 13231000 }, { "epoch": 65.55, "learning_rate": 1.7235051500423595e-05, "loss": 1.9761, "step": 13231500 }, { "epoch": 65.56, "learning_rate": 1.7233812913997512e-05, "loss": 1.9658, "step": 13232000 }, { "epoch": 65.56, "learning_rate": 1.723257432757143e-05, "loss": 1.9478, "step": 13232500 }, { "epoch": 65.56, "learning_rate": 1.7231335741145346e-05, "loss": 1.9371, "step": 13233000 }, { "epoch": 65.56, "learning_rate": 1.7230097154719263e-05, "loss": 1.9725, "step": 13233500 }, { "epoch": 65.57, "learning_rate": 1.7228861045466032e-05, "loss": 1.9662, "step": 13234000 }, { "epoch": 65.57, "learning_rate": 1.7227622459039946e-05, "loss": 1.9606, "step": 13234500 }, { "epoch": 65.57, "learning_rate": 1.7226383872613862e-05, "loss": 1.9674, "step": 13235000 }, { "epoch": 65.57, "learning_rate": 1.722514528618778e-05, "loss": 1.9729, "step": 13235500 }, { "epoch": 65.58, "learning_rate": 1.7223906699761696e-05, "loss": 1.9744, "step": 13236000 }, { "epoch": 65.58, "learning_rate": 1.7222668113335613e-05, "loss": 1.9631, "step": 13236500 }, { "epoch": 65.58, "learning_rate": 1.722142952690953e-05, "loss": 1.9562, "step": 13237000 }, { "epoch": 65.58, "learning_rate": 1.7220190940483447e-05, "loss": 1.9717, "step": 13237500 }, { "epoch": 65.59, "learning_rate": 1.7218952354057364e-05, "loss": 1.9601, "step": 13238000 }, { "epoch": 65.59, "learning_rate": 1.721771376763128e-05, "loss": 1.9498, "step": 13238500 }, { "epoch": 65.59, "learning_rate": 1.7216475181205194e-05, "loss": 1.9629, "step": 13239000 }, { "epoch": 65.59, "learning_rate": 1.721523659477911e-05, "loss": 1.9478, "step": 13239500 }, { "epoch": 65.6, "learning_rate": 1.721399800835303e-05, "loss": 1.968, "step": 13240000 }, { "epoch": 65.6, "learning_rate": 1.7212759421926945e-05, "loss": 1.95, "step": 13240500 }, { "epoch": 65.6, "learning_rate": 1.7211523312673714e-05, "loss": 1.9517, "step": 13241000 }, { "epoch": 65.6, "learning_rate": 1.721028720342048e-05, "loss": 1.9594, "step": 13241500 }, { "epoch": 65.61, "learning_rate": 1.7209048616994396e-05, "loss": 1.9557, "step": 13242000 }, { "epoch": 65.61, "learning_rate": 1.7207810030568313e-05, "loss": 1.9563, "step": 13242500 }, { "epoch": 65.61, "learning_rate": 1.720657144414223e-05, "loss": 1.958, "step": 13243000 }, { "epoch": 65.61, "learning_rate": 1.7205332857716147e-05, "loss": 1.9688, "step": 13243500 }, { "epoch": 65.62, "learning_rate": 1.7204094271290064e-05, "loss": 1.9768, "step": 13244000 }, { "epoch": 65.62, "learning_rate": 1.720285816203683e-05, "loss": 1.9797, "step": 13244500 }, { "epoch": 65.62, "learning_rate": 1.7201619575610746e-05, "loss": 1.9543, "step": 13245000 }, { "epoch": 65.62, "learning_rate": 1.7200383466357515e-05, "loss": 1.9568, "step": 13245500 }, { "epoch": 65.63, "learning_rate": 1.7199144879931432e-05, "loss": 1.9613, "step": 13246000 }, { "epoch": 65.63, "learning_rate": 1.719790629350535e-05, "loss": 1.9701, "step": 13246500 }, { "epoch": 65.63, "learning_rate": 1.7196667707079263e-05, "loss": 1.9822, "step": 13247000 }, { "epoch": 65.63, "learning_rate": 1.719542912065318e-05, "loss": 1.9587, "step": 13247500 }, { "epoch": 65.64, "learning_rate": 1.7194190534227097e-05, "loss": 1.9635, "step": 13248000 }, { "epoch": 65.64, "learning_rate": 1.719295442497387e-05, "loss": 1.9423, "step": 13248500 }, { "epoch": 65.64, "learning_rate": 1.7191715838547782e-05, "loss": 1.9637, "step": 13249000 }, { "epoch": 65.64, "learning_rate": 1.71904772521217e-05, "loss": 1.9576, "step": 13249500 }, { "epoch": 65.65, "learning_rate": 1.7189238665695616e-05, "loss": 1.9429, "step": 13250000 }, { "epoch": 65.65, "learning_rate": 1.718800007926953e-05, "loss": 1.9626, "step": 13250500 }, { "epoch": 65.65, "learning_rate": 1.7186761492843447e-05, "loss": 1.9494, "step": 13251000 }, { "epoch": 65.65, "learning_rate": 1.7185522906417364e-05, "loss": 1.9699, "step": 13251500 }, { "epoch": 65.65, "learning_rate": 1.718428431999128e-05, "loss": 1.9475, "step": 13252000 }, { "epoch": 65.66, "learning_rate": 1.718304821073805e-05, "loss": 1.9622, "step": 13252500 }, { "epoch": 65.66, "learning_rate": 1.7181809624311966e-05, "loss": 1.9766, "step": 13253000 }, { "epoch": 65.66, "learning_rate": 1.718057103788588e-05, "loss": 1.9658, "step": 13253500 }, { "epoch": 65.66, "learning_rate": 1.7179332451459797e-05, "loss": 1.9612, "step": 13254000 }, { "epoch": 65.67, "learning_rate": 1.7178093865033714e-05, "loss": 1.9514, "step": 13254500 }, { "epoch": 65.67, "learning_rate": 1.7176860232953334e-05, "loss": 1.951, "step": 13255000 }, { "epoch": 65.67, "learning_rate": 1.717562164652725e-05, "loss": 1.9825, "step": 13255500 }, { "epoch": 65.67, "learning_rate": 1.7174383060101168e-05, "loss": 1.9655, "step": 13256000 }, { "epoch": 65.68, "learning_rate": 1.7173144473675085e-05, "loss": 1.9611, "step": 13256500 }, { "epoch": 65.68, "learning_rate": 1.7171905887249002e-05, "loss": 1.9617, "step": 13257000 }, { "epoch": 65.68, "learning_rate": 1.717066730082292e-05, "loss": 1.9638, "step": 13257500 }, { "epoch": 65.68, "learning_rate": 1.7169431191569684e-05, "loss": 1.9559, "step": 13258000 }, { "epoch": 65.69, "learning_rate": 1.71681926051436e-05, "loss": 1.9674, "step": 13258500 }, { "epoch": 65.69, "learning_rate": 1.7166954018717518e-05, "loss": 1.9586, "step": 13259000 }, { "epoch": 65.69, "learning_rate": 1.7165715432291435e-05, "loss": 1.9557, "step": 13259500 }, { "epoch": 65.69, "learning_rate": 1.7164479323038204e-05, "loss": 1.9574, "step": 13260000 }, { "epoch": 65.7, "learning_rate": 1.716324073661212e-05, "loss": 1.9529, "step": 13260500 }, { "epoch": 65.7, "learning_rate": 1.7162002150186038e-05, "loss": 1.9656, "step": 13261000 }, { "epoch": 65.7, "learning_rate": 1.716076356375995e-05, "loss": 1.9423, "step": 13261500 }, { "epoch": 65.7, "learning_rate": 1.715952745450672e-05, "loss": 1.9188, "step": 13262000 }, { "epoch": 65.71, "learning_rate": 1.7158288868080637e-05, "loss": 1.9568, "step": 13262500 }, { "epoch": 65.71, "learning_rate": 1.7157052758827406e-05, "loss": 1.9529, "step": 13263000 }, { "epoch": 65.71, "learning_rate": 1.7155814172401323e-05, "loss": 1.943, "step": 13263500 }, { "epoch": 65.71, "learning_rate": 1.715457806314809e-05, "loss": 1.9592, "step": 13264000 }, { "epoch": 65.72, "learning_rate": 1.715333947672201e-05, "loss": 1.9384, "step": 13264500 }, { "epoch": 65.72, "learning_rate": 1.7152100890295925e-05, "loss": 1.9558, "step": 13265000 }, { "epoch": 65.72, "learning_rate": 1.7150862303869842e-05, "loss": 1.9638, "step": 13265500 }, { "epoch": 65.72, "learning_rate": 1.714962371744376e-05, "loss": 1.9752, "step": 13266000 }, { "epoch": 65.73, "learning_rate": 1.7148387608190525e-05, "loss": 1.967, "step": 13266500 }, { "epoch": 65.73, "learning_rate": 1.714714902176444e-05, "loss": 1.9486, "step": 13267000 }, { "epoch": 65.73, "learning_rate": 1.714591043533836e-05, "loss": 1.9618, "step": 13267500 }, { "epoch": 65.73, "learning_rate": 1.7144671848912276e-05, "loss": 1.9491, "step": 13268000 }, { "epoch": 65.74, "learning_rate": 1.7143433262486192e-05, "loss": 1.9426, "step": 13268500 }, { "epoch": 65.74, "learning_rate": 1.714219467606011e-05, "loss": 1.9502, "step": 13269000 }, { "epoch": 65.74, "learning_rate": 1.7140956089634023e-05, "loss": 1.9484, "step": 13269500 }, { "epoch": 65.74, "learning_rate": 1.713971750320794e-05, "loss": 1.9353, "step": 13270000 }, { "epoch": 65.75, "learning_rate": 1.7138478916781857e-05, "loss": 1.9867, "step": 13270500 }, { "epoch": 65.75, "learning_rate": 1.7137240330355774e-05, "loss": 1.9675, "step": 13271000 }, { "epoch": 65.75, "learning_rate": 1.7136001743929687e-05, "loss": 1.9311, "step": 13271500 }, { "epoch": 65.75, "learning_rate": 1.7134763157503604e-05, "loss": 1.9531, "step": 13272000 }, { "epoch": 65.76, "learning_rate": 1.713352457107752e-05, "loss": 1.962, "step": 13272500 }, { "epoch": 65.76, "learning_rate": 1.713228846182429e-05, "loss": 1.9484, "step": 13273000 }, { "epoch": 65.76, "learning_rate": 1.7131049875398207e-05, "loss": 1.9774, "step": 13273500 }, { "epoch": 65.76, "learning_rate": 1.7129811288972124e-05, "loss": 1.9508, "step": 13274000 }, { "epoch": 65.77, "learning_rate": 1.712857270254604e-05, "loss": 1.942, "step": 13274500 }, { "epoch": 65.77, "learning_rate": 1.7127334116119954e-05, "loss": 1.9499, "step": 13275000 }, { "epoch": 65.77, "learning_rate": 1.712609552969387e-05, "loss": 1.9655, "step": 13275500 }, { "epoch": 65.77, "learning_rate": 1.7124856943267788e-05, "loss": 1.9661, "step": 13276000 }, { "epoch": 65.78, "learning_rate": 1.7123620834014557e-05, "loss": 1.9415, "step": 13276500 }, { "epoch": 65.78, "learning_rate": 1.7122382247588474e-05, "loss": 1.9445, "step": 13277000 }, { "epoch": 65.78, "learning_rate": 1.712114366116239e-05, "loss": 1.9712, "step": 13277500 }, { "epoch": 65.78, "learning_rate": 1.7119905074736304e-05, "loss": 1.9409, "step": 13278000 }, { "epoch": 65.79, "learning_rate": 1.711866648831022e-05, "loss": 1.9674, "step": 13278500 }, { "epoch": 65.79, "learning_rate": 1.7117427901884138e-05, "loss": 1.964, "step": 13279000 }, { "epoch": 65.79, "learning_rate": 1.7116189315458055e-05, "loss": 1.9642, "step": 13279500 }, { "epoch": 65.79, "learning_rate": 1.711495072903197e-05, "loss": 1.9612, "step": 13280000 }, { "epoch": 65.8, "learning_rate": 1.7113712142605886e-05, "loss": 1.9371, "step": 13280500 }, { "epoch": 65.8, "learning_rate": 1.7112473556179802e-05, "loss": 1.9352, "step": 13281000 }, { "epoch": 65.8, "learning_rate": 1.711123744692657e-05, "loss": 1.9306, "step": 13281500 }, { "epoch": 65.8, "learning_rate": 1.7109998860500488e-05, "loss": 1.9525, "step": 13282000 }, { "epoch": 65.81, "learning_rate": 1.7108760274074405e-05, "loss": 1.9286, "step": 13282500 }, { "epoch": 65.81, "learning_rate": 1.710752168764832e-05, "loss": 1.979, "step": 13283000 }, { "epoch": 65.81, "learning_rate": 1.710628557839509e-05, "loss": 1.9598, "step": 13283500 }, { "epoch": 65.81, "learning_rate": 1.7105046991969008e-05, "loss": 1.9753, "step": 13284000 }, { "epoch": 65.82, "learning_rate": 1.7103808405542925e-05, "loss": 1.9304, "step": 13284500 }, { "epoch": 65.82, "learning_rate": 1.7102569819116838e-05, "loss": 1.9781, "step": 13285000 }, { "epoch": 65.82, "learning_rate": 1.7101331232690755e-05, "loss": 1.9382, "step": 13285500 }, { "epoch": 65.82, "learning_rate": 1.7100092646264672e-05, "loss": 1.9728, "step": 13286000 }, { "epoch": 65.83, "learning_rate": 1.709885653701144e-05, "loss": 1.9544, "step": 13286500 }, { "epoch": 65.83, "learning_rate": 1.7097617950585358e-05, "loss": 1.9687, "step": 13287000 }, { "epoch": 65.83, "learning_rate": 1.7096379364159275e-05, "loss": 1.9513, "step": 13287500 }, { "epoch": 65.83, "learning_rate": 1.7095140777733192e-05, "loss": 1.9656, "step": 13288000 }, { "epoch": 65.84, "learning_rate": 1.7093902191307105e-05, "loss": 1.9552, "step": 13288500 }, { "epoch": 65.84, "learning_rate": 1.7092663604881022e-05, "loss": 1.9674, "step": 13289000 }, { "epoch": 65.84, "learning_rate": 1.7091425018454936e-05, "loss": 1.9383, "step": 13289500 }, { "epoch": 65.84, "learning_rate": 1.7090186432028853e-05, "loss": 1.9591, "step": 13290000 }, { "epoch": 65.85, "learning_rate": 1.708894784560277e-05, "loss": 1.9765, "step": 13290500 }, { "epoch": 65.85, "learning_rate": 1.7087709259176687e-05, "loss": 1.9519, "step": 13291000 }, { "epoch": 65.85, "learning_rate": 1.7086470672750603e-05, "loss": 1.962, "step": 13291500 }, { "epoch": 65.85, "learning_rate": 1.708523208632452e-05, "loss": 1.9754, "step": 13292000 }, { "epoch": 65.86, "learning_rate": 1.7083993499898437e-05, "loss": 1.957, "step": 13292500 }, { "epoch": 65.86, "learning_rate": 1.7082757390645203e-05, "loss": 1.9197, "step": 13293000 }, { "epoch": 65.86, "learning_rate": 1.708151880421912e-05, "loss": 1.9458, "step": 13293500 }, { "epoch": 65.86, "learning_rate": 1.7080280217793037e-05, "loss": 1.9483, "step": 13294000 }, { "epoch": 65.87, "learning_rate": 1.7079041631366954e-05, "loss": 1.9522, "step": 13294500 }, { "epoch": 65.87, "learning_rate": 1.7077805522113722e-05, "loss": 1.951, "step": 13295000 }, { "epoch": 65.87, "learning_rate": 1.707656693568764e-05, "loss": 1.9587, "step": 13295500 }, { "epoch": 65.87, "learning_rate": 1.7075328349261556e-05, "loss": 1.9654, "step": 13296000 }, { "epoch": 65.88, "learning_rate": 1.7074092240008325e-05, "loss": 1.964, "step": 13296500 }, { "epoch": 65.88, "learning_rate": 1.7072853653582242e-05, "loss": 1.9735, "step": 13297000 }, { "epoch": 65.88, "learning_rate": 1.7071617544329007e-05, "loss": 1.9584, "step": 13297500 }, { "epoch": 65.88, "learning_rate": 1.7070378957902924e-05, "loss": 1.964, "step": 13298000 }, { "epoch": 65.89, "learning_rate": 1.706914037147684e-05, "loss": 1.9433, "step": 13298500 }, { "epoch": 65.89, "learning_rate": 1.7067901785050758e-05, "loss": 1.9486, "step": 13299000 }, { "epoch": 65.89, "learning_rate": 1.7066663198624675e-05, "loss": 1.9528, "step": 13299500 }, { "epoch": 65.89, "learning_rate": 1.7065424612198592e-05, "loss": 1.9594, "step": 13300000 }, { "epoch": 65.9, "learning_rate": 1.706418602577251e-05, "loss": 1.9786, "step": 13300500 }, { "epoch": 65.9, "learning_rate": 1.7062947439346426e-05, "loss": 1.9656, "step": 13301000 }, { "epoch": 65.9, "learning_rate": 1.7061708852920343e-05, "loss": 1.9587, "step": 13301500 }, { "epoch": 65.9, "learning_rate": 1.7060472743667108e-05, "loss": 1.9674, "step": 13302000 }, { "epoch": 65.91, "learning_rate": 1.7059234157241025e-05, "loss": 1.9748, "step": 13302500 }, { "epoch": 65.91, "learning_rate": 1.7057995570814942e-05, "loss": 1.9513, "step": 13303000 }, { "epoch": 65.91, "learning_rate": 1.705675946156171e-05, "loss": 1.957, "step": 13303500 }, { "epoch": 65.91, "learning_rate": 1.7055520875135624e-05, "loss": 1.9566, "step": 13304000 }, { "epoch": 65.92, "learning_rate": 1.705428228870954e-05, "loss": 1.9617, "step": 13304500 }, { "epoch": 65.92, "learning_rate": 1.7053043702283458e-05, "loss": 1.9844, "step": 13305000 }, { "epoch": 65.92, "learning_rate": 1.7051805115857375e-05, "loss": 1.9639, "step": 13305500 }, { "epoch": 65.92, "learning_rate": 1.7050569006604144e-05, "loss": 1.9519, "step": 13306000 }, { "epoch": 65.93, "learning_rate": 1.704933042017806e-05, "loss": 1.954, "step": 13306500 }, { "epoch": 65.93, "learning_rate": 1.7048091833751974e-05, "loss": 1.953, "step": 13307000 }, { "epoch": 65.93, "learning_rate": 1.704685324732589e-05, "loss": 1.9667, "step": 13307500 }, { "epoch": 65.93, "learning_rate": 1.704561466089981e-05, "loss": 1.9446, "step": 13308000 }, { "epoch": 65.93, "learning_rate": 1.7044376074473725e-05, "loss": 1.9474, "step": 13308500 }, { "epoch": 65.94, "learning_rate": 1.7043137488047642e-05, "loss": 1.9599, "step": 13309000 }, { "epoch": 65.94, "learning_rate": 1.704190137879441e-05, "loss": 1.9558, "step": 13309500 }, { "epoch": 65.94, "learning_rate": 1.7040662792368325e-05, "loss": 1.9368, "step": 13310000 }, { "epoch": 65.94, "learning_rate": 1.703942420594224e-05, "loss": 1.9677, "step": 13310500 }, { "epoch": 65.95, "learning_rate": 1.703818561951616e-05, "loss": 1.9592, "step": 13311000 }, { "epoch": 65.95, "learning_rate": 1.7036947033090075e-05, "loss": 1.9585, "step": 13311500 }, { "epoch": 65.95, "learning_rate": 1.7035708446663992e-05, "loss": 1.9444, "step": 13312000 }, { "epoch": 65.95, "learning_rate": 1.703446986023791e-05, "loss": 1.9703, "step": 13312500 }, { "epoch": 65.96, "learning_rate": 1.7033231273811826e-05, "loss": 1.9364, "step": 13313000 }, { "epoch": 65.96, "learning_rate": 1.7031992687385743e-05, "loss": 1.9348, "step": 13313500 }, { "epoch": 65.96, "learning_rate": 1.703075410095966e-05, "loss": 1.966, "step": 13314000 }, { "epoch": 65.96, "learning_rate": 1.7029515514533573e-05, "loss": 1.9522, "step": 13314500 }, { "epoch": 65.97, "learning_rate": 1.702827692810749e-05, "loss": 1.9641, "step": 13315000 }, { "epoch": 65.97, "learning_rate": 1.702704081885426e-05, "loss": 1.9972, "step": 13315500 }, { "epoch": 65.97, "learning_rate": 1.7025802232428176e-05, "loss": 1.9693, "step": 13316000 }, { "epoch": 65.97, "learning_rate": 1.7024563646002093e-05, "loss": 1.9937, "step": 13316500 }, { "epoch": 65.98, "learning_rate": 1.702332505957601e-05, "loss": 1.9891, "step": 13317000 }, { "epoch": 65.98, "learning_rate": 1.7022088950322775e-05, "loss": 1.9458, "step": 13317500 }, { "epoch": 65.98, "learning_rate": 1.7020850363896692e-05, "loss": 1.9687, "step": 13318000 }, { "epoch": 65.98, "learning_rate": 1.701961177747061e-05, "loss": 1.96, "step": 13318500 }, { "epoch": 65.99, "learning_rate": 1.7018373191044526e-05, "loss": 1.9404, "step": 13319000 }, { "epoch": 65.99, "learning_rate": 1.7017134604618443e-05, "loss": 1.9869, "step": 13319500 }, { "epoch": 65.99, "learning_rate": 1.701589601819236e-05, "loss": 1.9902, "step": 13320000 }, { "epoch": 65.99, "learning_rate": 1.7014657431766274e-05, "loss": 1.9583, "step": 13320500 }, { "epoch": 66.0, "learning_rate": 1.701341884534019e-05, "loss": 1.9536, "step": 13321000 }, { "epoch": 66.0, "learning_rate": 1.7012180258914107e-05, "loss": 1.9802, "step": 13321500 }, { "epoch": 66.0, "eval_accuracy": 0.6756721338470035, "eval_accuracy_mlm": 0.635687710702305, "eval_accuracy_nsp": 0.8642605281633517, "eval_loss": 2.295546293258667, "eval_runtime": 147.102, "eval_samples_per_second": 1733.213, "eval_steps_per_second": 72.222, "step": 13321638 }, { "epoch": 66.0, "learning_rate": 1.7010941672488024e-05, "loss": 1.938, "step": 13322000 }, { "epoch": 66.0, "learning_rate": 1.700970308606194e-05, "loss": 1.9669, "step": 13322500 }, { "epoch": 66.01, "learning_rate": 1.7008464499635858e-05, "loss": 1.9409, "step": 13323000 }, { "epoch": 66.01, "learning_rate": 1.7007225913209772e-05, "loss": 1.9515, "step": 13323500 }, { "epoch": 66.01, "learning_rate": 1.700598980395654e-05, "loss": 1.9528, "step": 13324000 }, { "epoch": 66.01, "learning_rate": 1.7004751217530458e-05, "loss": 1.9605, "step": 13324500 }, { "epoch": 66.02, "learning_rate": 1.7003512631104374e-05, "loss": 1.9169, "step": 13325000 }, { "epoch": 66.02, "learning_rate": 1.700227404467829e-05, "loss": 1.9411, "step": 13325500 }, { "epoch": 66.02, "learning_rate": 1.700103793542506e-05, "loss": 1.9356, "step": 13326000 }, { "epoch": 66.02, "learning_rate": 1.6999799348998977e-05, "loss": 1.9256, "step": 13326500 }, { "epoch": 66.03, "learning_rate": 1.6998563239745743e-05, "loss": 1.9278, "step": 13327000 }, { "epoch": 66.03, "learning_rate": 1.699732465331966e-05, "loss": 1.937, "step": 13327500 }, { "epoch": 66.03, "learning_rate": 1.6996086066893576e-05, "loss": 1.9551, "step": 13328000 }, { "epoch": 66.03, "learning_rate": 1.6994847480467493e-05, "loss": 1.9532, "step": 13328500 }, { "epoch": 66.04, "learning_rate": 1.699360889404141e-05, "loss": 1.9338, "step": 13329000 }, { "epoch": 66.04, "learning_rate": 1.6992370307615327e-05, "loss": 1.9162, "step": 13329500 }, { "epoch": 66.04, "learning_rate": 1.699113172118924e-05, "loss": 1.9362, "step": 13330000 }, { "epoch": 66.04, "learning_rate": 1.6989893134763158e-05, "loss": 1.9326, "step": 13330500 }, { "epoch": 66.05, "learning_rate": 1.6988654548337075e-05, "loss": 1.9661, "step": 13331000 }, { "epoch": 66.05, "learning_rate": 1.698741596191099e-05, "loss": 1.9454, "step": 13331500 }, { "epoch": 66.05, "learning_rate": 1.698617985265776e-05, "loss": 1.9171, "step": 13332000 }, { "epoch": 66.05, "learning_rate": 1.6984941266231677e-05, "loss": 1.9073, "step": 13332500 }, { "epoch": 66.06, "learning_rate": 1.6983705156978443e-05, "loss": 1.9384, "step": 13333000 }, { "epoch": 66.06, "learning_rate": 1.698246657055236e-05, "loss": 1.9473, "step": 13333500 }, { "epoch": 66.06, "learning_rate": 1.6981227984126277e-05, "loss": 1.9494, "step": 13334000 }, { "epoch": 66.06, "learning_rate": 1.6979989397700193e-05, "loss": 1.9086, "step": 13334500 }, { "epoch": 66.07, "learning_rate": 1.697875081127411e-05, "loss": 1.9506, "step": 13335000 }, { "epoch": 66.07, "learning_rate": 1.6977512224848027e-05, "loss": 1.9418, "step": 13335500 }, { "epoch": 66.07, "learning_rate": 1.6976273638421944e-05, "loss": 1.921, "step": 13336000 }, { "epoch": 66.07, "learning_rate": 1.6975035051995858e-05, "loss": 1.9283, "step": 13336500 }, { "epoch": 66.08, "learning_rate": 1.6973796465569775e-05, "loss": 1.9646, "step": 13337000 }, { "epoch": 66.08, "learning_rate": 1.697255787914369e-05, "loss": 1.9095, "step": 13337500 }, { "epoch": 66.08, "learning_rate": 1.697132176989046e-05, "loss": 1.9265, "step": 13338000 }, { "epoch": 66.08, "learning_rate": 1.6970083183464377e-05, "loss": 1.9532, "step": 13338500 }, { "epoch": 66.09, "learning_rate": 1.6968844597038294e-05, "loss": 1.9632, "step": 13339000 }, { "epoch": 66.09, "learning_rate": 1.6967606010612208e-05, "loss": 1.9286, "step": 13339500 }, { "epoch": 66.09, "learning_rate": 1.6966367424186125e-05, "loss": 1.9429, "step": 13340000 }, { "epoch": 66.09, "learning_rate": 1.6965128837760042e-05, "loss": 1.9401, "step": 13340500 }, { "epoch": 66.1, "learning_rate": 1.696389272850681e-05, "loss": 1.9367, "step": 13341000 }, { "epoch": 66.1, "learning_rate": 1.6962654142080727e-05, "loss": 1.9371, "step": 13341500 }, { "epoch": 66.1, "learning_rate": 1.6961415555654644e-05, "loss": 1.9485, "step": 13342000 }, { "epoch": 66.1, "learning_rate": 1.696017944640141e-05, "loss": 1.9532, "step": 13342500 }, { "epoch": 66.11, "learning_rate": 1.6958940859975327e-05, "loss": 1.9538, "step": 13343000 }, { "epoch": 66.11, "learning_rate": 1.6957702273549244e-05, "loss": 1.9354, "step": 13343500 }, { "epoch": 66.11, "learning_rate": 1.695646368712316e-05, "loss": 1.9593, "step": 13344000 }, { "epoch": 66.11, "learning_rate": 1.6955225100697078e-05, "loss": 1.9458, "step": 13344500 }, { "epoch": 66.12, "learning_rate": 1.6953988991443846e-05, "loss": 1.9465, "step": 13345000 }, { "epoch": 66.12, "learning_rate": 1.6952750405017763e-05, "loss": 1.9414, "step": 13345500 }, { "epoch": 66.12, "learning_rate": 1.6951511818591677e-05, "loss": 1.9596, "step": 13346000 }, { "epoch": 66.12, "learning_rate": 1.6950273232165594e-05, "loss": 1.9377, "step": 13346500 }, { "epoch": 66.13, "learning_rate": 1.694903464573951e-05, "loss": 1.926, "step": 13347000 }, { "epoch": 66.13, "learning_rate": 1.694779853648628e-05, "loss": 1.9567, "step": 13347500 }, { "epoch": 66.13, "learning_rate": 1.6946559950060196e-05, "loss": 1.9366, "step": 13348000 }, { "epoch": 66.13, "learning_rate": 1.6945321363634113e-05, "loss": 1.9534, "step": 13348500 }, { "epoch": 66.14, "learning_rate": 1.6944082777208027e-05, "loss": 1.95, "step": 13349000 }, { "epoch": 66.14, "learning_rate": 1.6942844190781944e-05, "loss": 1.9443, "step": 13349500 }, { "epoch": 66.14, "learning_rate": 1.694160560435586e-05, "loss": 1.9586, "step": 13350000 }, { "epoch": 66.14, "learning_rate": 1.6940367017929778e-05, "loss": 1.9302, "step": 13350500 }, { "epoch": 66.15, "learning_rate": 1.6939128431503695e-05, "loss": 1.9774, "step": 13351000 }, { "epoch": 66.15, "learning_rate": 1.693788984507761e-05, "loss": 1.9226, "step": 13351500 }, { "epoch": 66.15, "learning_rate": 1.693665373582438e-05, "loss": 1.9343, "step": 13352000 }, { "epoch": 66.15, "learning_rate": 1.6935415149398294e-05, "loss": 1.9537, "step": 13352500 }, { "epoch": 66.16, "learning_rate": 1.693417656297221e-05, "loss": 1.9384, "step": 13353000 }, { "epoch": 66.16, "learning_rate": 1.6932937976546128e-05, "loss": 1.9409, "step": 13353500 }, { "epoch": 66.16, "learning_rate": 1.6931699390120045e-05, "loss": 1.952, "step": 13354000 }, { "epoch": 66.16, "learning_rate": 1.6930465758039665e-05, "loss": 1.962, "step": 13354500 }, { "epoch": 66.17, "learning_rate": 1.6929227171613582e-05, "loss": 1.9236, "step": 13355000 }, { "epoch": 66.17, "learning_rate": 1.69279885851875e-05, "loss": 1.9762, "step": 13355500 }, { "epoch": 66.17, "learning_rate": 1.6926749998761416e-05, "loss": 1.9455, "step": 13356000 }, { "epoch": 66.17, "learning_rate": 1.6925511412335333e-05, "loss": 1.9596, "step": 13356500 }, { "epoch": 66.18, "learning_rate": 1.6924272825909247e-05, "loss": 1.9504, "step": 13357000 }, { "epoch": 66.18, "learning_rate": 1.6923034239483163e-05, "loss": 1.9512, "step": 13357500 }, { "epoch": 66.18, "learning_rate": 1.692179565305708e-05, "loss": 1.9469, "step": 13358000 }, { "epoch": 66.18, "learning_rate": 1.692055954380385e-05, "loss": 1.9464, "step": 13358500 }, { "epoch": 66.19, "learning_rate": 1.6919320957377766e-05, "loss": 1.9514, "step": 13359000 }, { "epoch": 66.19, "learning_rate": 1.6918082370951683e-05, "loss": 1.9447, "step": 13359500 }, { "epoch": 66.19, "learning_rate": 1.6916843784525597e-05, "loss": 1.9433, "step": 13360000 }, { "epoch": 66.19, "learning_rate": 1.6915605198099514e-05, "loss": 1.9316, "step": 13360500 }, { "epoch": 66.2, "learning_rate": 1.691436661167343e-05, "loss": 1.9415, "step": 13361000 }, { "epoch": 66.2, "learning_rate": 1.69131305024202e-05, "loss": 1.9712, "step": 13361500 }, { "epoch": 66.2, "learning_rate": 1.6911891915994116e-05, "loss": 1.942, "step": 13362000 }, { "epoch": 66.2, "learning_rate": 1.6910653329568033e-05, "loss": 1.941, "step": 13362500 }, { "epoch": 66.2, "learning_rate": 1.6909414743141947e-05, "loss": 1.9498, "step": 13363000 }, { "epoch": 66.21, "learning_rate": 1.6908178633888715e-05, "loss": 1.94, "step": 13363500 }, { "epoch": 66.21, "learning_rate": 1.6906940047462632e-05, "loss": 1.9678, "step": 13364000 }, { "epoch": 66.21, "learning_rate": 1.690570146103655e-05, "loss": 1.9648, "step": 13364500 }, { "epoch": 66.21, "learning_rate": 1.6904462874610466e-05, "loss": 1.9577, "step": 13365000 }, { "epoch": 66.22, "learning_rate": 1.6903226765357232e-05, "loss": 1.9344, "step": 13365500 }, { "epoch": 66.22, "learning_rate": 1.690198817893115e-05, "loss": 1.9567, "step": 13366000 }, { "epoch": 66.22, "learning_rate": 1.6900749592505066e-05, "loss": 1.9398, "step": 13366500 }, { "epoch": 66.22, "learning_rate": 1.6899511006078982e-05, "loss": 1.9298, "step": 13367000 }, { "epoch": 66.23, "learning_rate": 1.68982724196529e-05, "loss": 1.9435, "step": 13367500 }, { "epoch": 66.23, "learning_rate": 1.6897033833226816e-05, "loss": 1.9406, "step": 13368000 }, { "epoch": 66.23, "learning_rate": 1.6895797723973582e-05, "loss": 1.9653, "step": 13368500 }, { "epoch": 66.23, "learning_rate": 1.68945591375475e-05, "loss": 1.9383, "step": 13369000 }, { "epoch": 66.24, "learning_rate": 1.6893320551121416e-05, "loss": 1.9423, "step": 13369500 }, { "epoch": 66.24, "learning_rate": 1.6892081964695333e-05, "loss": 1.9464, "step": 13370000 }, { "epoch": 66.24, "learning_rate": 1.689084337826925e-05, "loss": 1.9494, "step": 13370500 }, { "epoch": 66.24, "learning_rate": 1.6889604791843166e-05, "loss": 1.9338, "step": 13371000 }, { "epoch": 66.25, "learning_rate": 1.6888366205417083e-05, "loss": 1.952, "step": 13371500 }, { "epoch": 66.25, "learning_rate": 1.688713009616385e-05, "loss": 1.9575, "step": 13372000 }, { "epoch": 66.25, "learning_rate": 1.6885891509737766e-05, "loss": 1.9316, "step": 13372500 }, { "epoch": 66.25, "learning_rate": 1.6884652923311683e-05, "loss": 1.9289, "step": 13373000 }, { "epoch": 66.26, "learning_rate": 1.68834143368856e-05, "loss": 1.9578, "step": 13373500 }, { "epoch": 66.26, "learning_rate": 1.6882175750459516e-05, "loss": 1.9643, "step": 13374000 }, { "epoch": 66.26, "learning_rate": 1.6880937164033433e-05, "loss": 1.9536, "step": 13374500 }, { "epoch": 66.26, "learning_rate": 1.687969857760735e-05, "loss": 1.953, "step": 13375000 }, { "epoch": 66.27, "learning_rate": 1.6878459991181264e-05, "loss": 1.9532, "step": 13375500 }, { "epoch": 66.27, "learning_rate": 1.6877223881928033e-05, "loss": 1.9459, "step": 13376000 }, { "epoch": 66.27, "learning_rate": 1.687598529550195e-05, "loss": 1.9453, "step": 13376500 }, { "epoch": 66.27, "learning_rate": 1.6874746709075867e-05, "loss": 1.9266, "step": 13377000 }, { "epoch": 66.28, "learning_rate": 1.6873508122649783e-05, "loss": 1.9688, "step": 13377500 }, { "epoch": 66.28, "learning_rate": 1.68722695362237e-05, "loss": 1.9363, "step": 13378000 }, { "epoch": 66.28, "learning_rate": 1.6871038381316173e-05, "loss": 1.961, "step": 13378500 }, { "epoch": 66.28, "learning_rate": 1.686979979489009e-05, "loss": 1.917, "step": 13379000 }, { "epoch": 66.29, "learning_rate": 1.6868561208464003e-05, "loss": 1.9499, "step": 13379500 }, { "epoch": 66.29, "learning_rate": 1.686732262203792e-05, "loss": 1.9247, "step": 13380000 }, { "epoch": 66.29, "learning_rate": 1.6866084035611837e-05, "loss": 1.9575, "step": 13380500 }, { "epoch": 66.29, "learning_rate": 1.6864847926358606e-05, "loss": 1.9678, "step": 13381000 }, { "epoch": 66.3, "learning_rate": 1.6863609339932523e-05, "loss": 1.9438, "step": 13381500 }, { "epoch": 66.3, "learning_rate": 1.686237075350644e-05, "loss": 1.9456, "step": 13382000 }, { "epoch": 66.3, "learning_rate": 1.6861132167080353e-05, "loss": 1.9449, "step": 13382500 }, { "epoch": 66.3, "learning_rate": 1.685989358065427e-05, "loss": 1.9691, "step": 13383000 }, { "epoch": 66.31, "learning_rate": 1.6858654994228187e-05, "loss": 1.9353, "step": 13383500 }, { "epoch": 66.31, "learning_rate": 1.6857416407802104e-05, "loss": 1.9554, "step": 13384000 }, { "epoch": 66.31, "learning_rate": 1.685617782137602e-05, "loss": 1.9076, "step": 13384500 }, { "epoch": 66.31, "learning_rate": 1.6854939234949938e-05, "loss": 1.9588, "step": 13385000 }, { "epoch": 66.32, "learning_rate": 1.6853703125696704e-05, "loss": 1.9489, "step": 13385500 }, { "epoch": 66.32, "learning_rate": 1.685246453927062e-05, "loss": 1.9439, "step": 13386000 }, { "epoch": 66.32, "learning_rate": 1.6851225952844537e-05, "loss": 1.9507, "step": 13386500 }, { "epoch": 66.32, "learning_rate": 1.6849989843591306e-05, "loss": 1.956, "step": 13387000 }, { "epoch": 66.33, "learning_rate": 1.6848751257165223e-05, "loss": 1.956, "step": 13387500 }, { "epoch": 66.33, "learning_rate": 1.684751267073914e-05, "loss": 1.9476, "step": 13388000 }, { "epoch": 66.33, "learning_rate": 1.6846274084313057e-05, "loss": 1.9375, "step": 13388500 }, { "epoch": 66.33, "learning_rate": 1.684503549788697e-05, "loss": 1.9432, "step": 13389000 }, { "epoch": 66.34, "learning_rate": 1.6843796911460887e-05, "loss": 1.9391, "step": 13389500 }, { "epoch": 66.34, "learning_rate": 1.6842558325034804e-05, "loss": 1.9714, "step": 13390000 }, { "epoch": 66.34, "learning_rate": 1.684131973860872e-05, "loss": 1.948, "step": 13390500 }, { "epoch": 66.34, "learning_rate": 1.6840081152182638e-05, "loss": 1.9595, "step": 13391000 }, { "epoch": 66.35, "learning_rate": 1.6838842565756555e-05, "loss": 1.9525, "step": 13391500 }, { "epoch": 66.35, "learning_rate": 1.6837603979330472e-05, "loss": 1.9461, "step": 13392000 }, { "epoch": 66.35, "learning_rate": 1.6836367870077238e-05, "loss": 1.9345, "step": 13392500 }, { "epoch": 66.35, "learning_rate": 1.6835131760824006e-05, "loss": 1.933, "step": 13393000 }, { "epoch": 66.36, "learning_rate": 1.683389565157078e-05, "loss": 1.9293, "step": 13393500 }, { "epoch": 66.36, "learning_rate": 1.6832659542317544e-05, "loss": 1.9559, "step": 13394000 }, { "epoch": 66.36, "learning_rate": 1.683142095589146e-05, "loss": 1.9517, "step": 13394500 }, { "epoch": 66.36, "learning_rate": 1.6830182369465378e-05, "loss": 1.9477, "step": 13395000 }, { "epoch": 66.37, "learning_rate": 1.6828943783039295e-05, "loss": 1.9667, "step": 13395500 }, { "epoch": 66.37, "learning_rate": 1.682770519661321e-05, "loss": 1.964, "step": 13396000 }, { "epoch": 66.37, "learning_rate": 1.682646661018713e-05, "loss": 1.9678, "step": 13396500 }, { "epoch": 66.37, "learning_rate": 1.6825228023761042e-05, "loss": 1.9628, "step": 13397000 }, { "epoch": 66.38, "learning_rate": 1.682398943733496e-05, "loss": 1.9416, "step": 13397500 }, { "epoch": 66.38, "learning_rate": 1.6822750850908876e-05, "loss": 1.963, "step": 13398000 }, { "epoch": 66.38, "learning_rate": 1.6821514741655645e-05, "loss": 1.9639, "step": 13398500 }, { "epoch": 66.38, "learning_rate": 1.682027863240241e-05, "loss": 1.9077, "step": 13399000 }, { "epoch": 66.39, "learning_rate": 1.6819040045976327e-05, "loss": 1.9643, "step": 13399500 }, { "epoch": 66.39, "learning_rate": 1.6817801459550244e-05, "loss": 1.9601, "step": 13400000 }, { "epoch": 66.39, "learning_rate": 1.681656287312416e-05, "loss": 1.9474, "step": 13400500 }, { "epoch": 66.39, "learning_rate": 1.6815324286698078e-05, "loss": 1.9707, "step": 13401000 }, { "epoch": 66.4, "learning_rate": 1.6814085700271995e-05, "loss": 1.9523, "step": 13401500 }, { "epoch": 66.4, "learning_rate": 1.681284959101876e-05, "loss": 1.9417, "step": 13402000 }, { "epoch": 66.4, "learning_rate": 1.6811611004592677e-05, "loss": 1.9591, "step": 13402500 }, { "epoch": 66.4, "learning_rate": 1.6810372418166594e-05, "loss": 1.9408, "step": 13403000 }, { "epoch": 66.41, "learning_rate": 1.680913383174051e-05, "loss": 1.9303, "step": 13403500 }, { "epoch": 66.41, "learning_rate": 1.6807895245314428e-05, "loss": 1.9674, "step": 13404000 }, { "epoch": 66.41, "learning_rate": 1.6806659136061197e-05, "loss": 1.9402, "step": 13404500 }, { "epoch": 66.41, "learning_rate": 1.680542054963511e-05, "loss": 1.9431, "step": 13405000 }, { "epoch": 66.42, "learning_rate": 1.6804181963209027e-05, "loss": 1.9845, "step": 13405500 }, { "epoch": 66.42, "learning_rate": 1.6802943376782944e-05, "loss": 1.9653, "step": 13406000 }, { "epoch": 66.42, "learning_rate": 1.680170479035686e-05, "loss": 1.9773, "step": 13406500 }, { "epoch": 66.42, "learning_rate": 1.6800466203930778e-05, "loss": 1.9674, "step": 13407000 }, { "epoch": 66.43, "learning_rate": 1.6799227617504695e-05, "loss": 1.9532, "step": 13407500 }, { "epoch": 66.43, "learning_rate": 1.6797989031078612e-05, "loss": 1.9647, "step": 13408000 }, { "epoch": 66.43, "learning_rate": 1.679675044465253e-05, "loss": 1.9414, "step": 13408500 }, { "epoch": 66.43, "learning_rate": 1.6795511858226446e-05, "loss": 1.9708, "step": 13409000 }, { "epoch": 66.44, "learning_rate": 1.679427327180036e-05, "loss": 1.9329, "step": 13409500 }, { "epoch": 66.44, "learning_rate": 1.6793037162547128e-05, "loss": 1.9639, "step": 13410000 }, { "epoch": 66.44, "learning_rate": 1.6791798576121045e-05, "loss": 1.95, "step": 13410500 }, { "epoch": 66.44, "learning_rate": 1.6790559989694962e-05, "loss": 1.9433, "step": 13411000 }, { "epoch": 66.45, "learning_rate": 1.678932140326888e-05, "loss": 1.9267, "step": 13411500 }, { "epoch": 66.45, "learning_rate": 1.6788082816842796e-05, "loss": 1.975, "step": 13412000 }, { "epoch": 66.45, "learning_rate": 1.678684423041671e-05, "loss": 1.9506, "step": 13412500 }, { "epoch": 66.45, "learning_rate": 1.6785605643990626e-05, "loss": 1.9376, "step": 13413000 }, { "epoch": 66.46, "learning_rate": 1.6784367057564543e-05, "loss": 1.9575, "step": 13413500 }, { "epoch": 66.46, "learning_rate": 1.678312847113846e-05, "loss": 1.9337, "step": 13414000 }, { "epoch": 66.46, "learning_rate": 1.6781889884712377e-05, "loss": 1.9601, "step": 13414500 }, { "epoch": 66.46, "learning_rate": 1.6780651298286294e-05, "loss": 1.9567, "step": 13415000 }, { "epoch": 66.47, "learning_rate": 1.677941271186021e-05, "loss": 1.9339, "step": 13415500 }, { "epoch": 66.47, "learning_rate": 1.6778174125434128e-05, "loss": 1.979, "step": 13416000 }, { "epoch": 66.47, "learning_rate": 1.677693553900804e-05, "loss": 1.9574, "step": 13416500 }, { "epoch": 66.47, "learning_rate": 1.677569695258196e-05, "loss": 1.945, "step": 13417000 }, { "epoch": 66.47, "learning_rate": 1.6774458366155872e-05, "loss": 1.9584, "step": 13417500 }, { "epoch": 66.48, "learning_rate": 1.6773222256902644e-05, "loss": 1.94, "step": 13418000 }, { "epoch": 66.48, "learning_rate": 1.677198367047656e-05, "loss": 1.9635, "step": 13418500 }, { "epoch": 66.48, "learning_rate": 1.6770745084050478e-05, "loss": 1.9555, "step": 13419000 }, { "epoch": 66.48, "learning_rate": 1.6769506497624395e-05, "loss": 1.9345, "step": 13419500 }, { "epoch": 66.49, "learning_rate": 1.676826791119831e-05, "loss": 1.9507, "step": 13420000 }, { "epoch": 66.49, "learning_rate": 1.6767031801945077e-05, "loss": 1.9391, "step": 13420500 }, { "epoch": 66.49, "learning_rate": 1.6765793215518994e-05, "loss": 1.957, "step": 13421000 }, { "epoch": 66.49, "learning_rate": 1.676455462909291e-05, "loss": 1.9253, "step": 13421500 }, { "epoch": 66.5, "learning_rate": 1.6763316042666828e-05, "loss": 1.9723, "step": 13422000 }, { "epoch": 66.5, "learning_rate": 1.6762079933413593e-05, "loss": 1.9479, "step": 13422500 }, { "epoch": 66.5, "learning_rate": 1.676084134698751e-05, "loss": 1.965, "step": 13423000 }, { "epoch": 66.5, "learning_rate": 1.675960523773428e-05, "loss": 1.9689, "step": 13423500 }, { "epoch": 66.51, "learning_rate": 1.6758366651308196e-05, "loss": 1.9556, "step": 13424000 }, { "epoch": 66.51, "learning_rate": 1.6757128064882113e-05, "loss": 1.9467, "step": 13424500 }, { "epoch": 66.51, "learning_rate": 1.6755889478456027e-05, "loss": 1.9501, "step": 13425000 }, { "epoch": 66.51, "learning_rate": 1.6754650892029943e-05, "loss": 1.9575, "step": 13425500 }, { "epoch": 66.52, "learning_rate": 1.675341230560386e-05, "loss": 1.9444, "step": 13426000 }, { "epoch": 66.52, "learning_rate": 1.675217619635063e-05, "loss": 1.9273, "step": 13426500 }, { "epoch": 66.52, "learning_rate": 1.6750937609924546e-05, "loss": 1.9584, "step": 13427000 }, { "epoch": 66.52, "learning_rate": 1.6749699023498463e-05, "loss": 1.9431, "step": 13427500 }, { "epoch": 66.53, "learning_rate": 1.674846043707238e-05, "loss": 1.9576, "step": 13428000 }, { "epoch": 66.53, "learning_rate": 1.6747224327819145e-05, "loss": 1.9229, "step": 13428500 }, { "epoch": 66.53, "learning_rate": 1.6745985741393062e-05, "loss": 1.9769, "step": 13429000 }, { "epoch": 66.53, "learning_rate": 1.6744749632139835e-05, "loss": 1.9718, "step": 13429500 }, { "epoch": 66.54, "learning_rate": 1.6743511045713748e-05, "loss": 1.9589, "step": 13430000 }, { "epoch": 66.54, "learning_rate": 1.6742272459287665e-05, "loss": 1.9621, "step": 13430500 }, { "epoch": 66.54, "learning_rate": 1.6741033872861582e-05, "loss": 1.9311, "step": 13431000 }, { "epoch": 66.54, "learning_rate": 1.67397952864355e-05, "loss": 1.9355, "step": 13431500 }, { "epoch": 66.55, "learning_rate": 1.6738556700009412e-05, "loss": 1.9677, "step": 13432000 }, { "epoch": 66.55, "learning_rate": 1.673731811358333e-05, "loss": 1.9477, "step": 13432500 }, { "epoch": 66.55, "learning_rate": 1.6736079527157246e-05, "loss": 1.9786, "step": 13433000 }, { "epoch": 66.55, "learning_rate": 1.6734843417904015e-05, "loss": 1.9646, "step": 13433500 }, { "epoch": 66.56, "learning_rate": 1.6733604831477932e-05, "loss": 1.9432, "step": 13434000 }, { "epoch": 66.56, "learning_rate": 1.673236624505185e-05, "loss": 1.9595, "step": 13434500 }, { "epoch": 66.56, "learning_rate": 1.6731127658625766e-05, "loss": 1.9918, "step": 13435000 }, { "epoch": 66.56, "learning_rate": 1.672988907219968e-05, "loss": 1.943, "step": 13435500 }, { "epoch": 66.57, "learning_rate": 1.672865296294645e-05, "loss": 1.9799, "step": 13436000 }, { "epoch": 66.57, "learning_rate": 1.6727414376520365e-05, "loss": 1.9602, "step": 13436500 }, { "epoch": 66.57, "learning_rate": 1.6726175790094282e-05, "loss": 1.9454, "step": 13437000 }, { "epoch": 66.57, "learning_rate": 1.67249372036682e-05, "loss": 1.9853, "step": 13437500 }, { "epoch": 66.58, "learning_rate": 1.6723698617242116e-05, "loss": 1.9265, "step": 13438000 }, { "epoch": 66.58, "learning_rate": 1.6722460030816033e-05, "loss": 1.9638, "step": 13438500 }, { "epoch": 66.58, "learning_rate": 1.6721221444389946e-05, "loss": 1.9412, "step": 13439000 }, { "epoch": 66.58, "learning_rate": 1.6719982857963863e-05, "loss": 1.9678, "step": 13439500 }, { "epoch": 66.59, "learning_rate": 1.671874427153778e-05, "loss": 1.9497, "step": 13440000 }, { "epoch": 66.59, "learning_rate": 1.6717505685111697e-05, "loss": 1.9823, "step": 13440500 }, { "epoch": 66.59, "learning_rate": 1.6716269575858466e-05, "loss": 1.9514, "step": 13441000 }, { "epoch": 66.59, "learning_rate": 1.6715030989432383e-05, "loss": 1.9215, "step": 13441500 }, { "epoch": 66.6, "learning_rate": 1.6713792403006296e-05, "loss": 1.9422, "step": 13442000 }, { "epoch": 66.6, "learning_rate": 1.6712553816580213e-05, "loss": 1.9369, "step": 13442500 }, { "epoch": 66.6, "learning_rate": 1.671131523015413e-05, "loss": 1.9453, "step": 13443000 }, { "epoch": 66.6, "learning_rate": 1.6710076643728047e-05, "loss": 1.9622, "step": 13443500 }, { "epoch": 66.61, "learning_rate": 1.6708840534474816e-05, "loss": 1.9491, "step": 13444000 }, { "epoch": 66.61, "learning_rate": 1.6707601948048733e-05, "loss": 1.9739, "step": 13444500 }, { "epoch": 66.61, "learning_rate": 1.670636336162265e-05, "loss": 1.9503, "step": 13445000 }, { "epoch": 66.61, "learning_rate": 1.6705124775196563e-05, "loss": 1.963, "step": 13445500 }, { "epoch": 66.62, "learning_rate": 1.670388618877048e-05, "loss": 1.9509, "step": 13446000 }, { "epoch": 66.62, "learning_rate": 1.6702647602344397e-05, "loss": 1.9601, "step": 13446500 }, { "epoch": 66.62, "learning_rate": 1.670140901591831e-05, "loss": 1.9629, "step": 13447000 }, { "epoch": 66.62, "learning_rate": 1.6700170429492228e-05, "loss": 1.9285, "step": 13447500 }, { "epoch": 66.63, "learning_rate": 1.6698931843066145e-05, "loss": 1.9676, "step": 13448000 }, { "epoch": 66.63, "learning_rate": 1.669769325664006e-05, "loss": 1.9544, "step": 13448500 }, { "epoch": 66.63, "learning_rate": 1.669645714738683e-05, "loss": 1.9433, "step": 13449000 }, { "epoch": 66.63, "learning_rate": 1.6695218560960747e-05, "loss": 1.9353, "step": 13449500 }, { "epoch": 66.64, "learning_rate": 1.669397997453466e-05, "loss": 1.9656, "step": 13450000 }, { "epoch": 66.64, "learning_rate": 1.6692741388108578e-05, "loss": 1.9527, "step": 13450500 }, { "epoch": 66.64, "learning_rate": 1.6691502801682495e-05, "loss": 1.9701, "step": 13451000 }, { "epoch": 66.64, "learning_rate": 1.669026421525641e-05, "loss": 1.9586, "step": 13451500 }, { "epoch": 66.65, "learning_rate": 1.668902562883033e-05, "loss": 1.9341, "step": 13452000 }, { "epoch": 66.65, "learning_rate": 1.6687787042404246e-05, "loss": 1.9381, "step": 13452500 }, { "epoch": 66.65, "learning_rate": 1.6686548455978162e-05, "loss": 1.9722, "step": 13453000 }, { "epoch": 66.65, "learning_rate": 1.668530986955208e-05, "loss": 1.957, "step": 13453500 }, { "epoch": 66.66, "learning_rate": 1.6684073760298845e-05, "loss": 1.9518, "step": 13454000 }, { "epoch": 66.66, "learning_rate": 1.6682835173872762e-05, "loss": 1.9366, "step": 13454500 }, { "epoch": 66.66, "learning_rate": 1.6681599064619534e-05, "loss": 1.9689, "step": 13455000 }, { "epoch": 66.66, "learning_rate": 1.6680360478193447e-05, "loss": 1.9194, "step": 13455500 }, { "epoch": 66.67, "learning_rate": 1.6679121891767364e-05, "loss": 1.9388, "step": 13456000 }, { "epoch": 66.67, "learning_rate": 1.667788330534128e-05, "loss": 1.957, "step": 13456500 }, { "epoch": 66.67, "learning_rate": 1.6676644718915195e-05, "loss": 1.9406, "step": 13457000 }, { "epoch": 66.67, "learning_rate": 1.6675406132489112e-05, "loss": 1.949, "step": 13457500 }, { "epoch": 66.68, "learning_rate": 1.667416754606303e-05, "loss": 1.9572, "step": 13458000 }, { "epoch": 66.68, "learning_rate": 1.6672928959636946e-05, "loss": 1.947, "step": 13458500 }, { "epoch": 66.68, "learning_rate": 1.6671690373210863e-05, "loss": 1.9508, "step": 13459000 }, { "epoch": 66.68, "learning_rate": 1.667045178678478e-05, "loss": 1.98, "step": 13459500 }, { "epoch": 66.69, "learning_rate": 1.6669213200358696e-05, "loss": 1.9425, "step": 13460000 }, { "epoch": 66.69, "learning_rate": 1.6667977091105462e-05, "loss": 1.947, "step": 13460500 }, { "epoch": 66.69, "learning_rate": 1.666673850467938e-05, "loss": 1.9525, "step": 13461000 }, { "epoch": 66.69, "learning_rate": 1.6665499918253296e-05, "loss": 1.9443, "step": 13461500 }, { "epoch": 66.7, "learning_rate": 1.6664261331827213e-05, "loss": 1.9599, "step": 13462000 }, { "epoch": 66.7, "learning_rate": 1.666302274540113e-05, "loss": 1.9426, "step": 13462500 }, { "epoch": 66.7, "learning_rate": 1.6661784158975047e-05, "loss": 1.9475, "step": 13463000 }, { "epoch": 66.7, "learning_rate": 1.6660545572548963e-05, "loss": 1.9334, "step": 13463500 }, { "epoch": 66.71, "learning_rate": 1.6659306986122877e-05, "loss": 1.9481, "step": 13464000 }, { "epoch": 66.71, "learning_rate": 1.6658068399696794e-05, "loss": 1.9525, "step": 13464500 }, { "epoch": 66.71, "learning_rate": 1.6656832290443563e-05, "loss": 1.9444, "step": 13465000 }, { "epoch": 66.71, "learning_rate": 1.665559370401748e-05, "loss": 1.9624, "step": 13465500 }, { "epoch": 66.72, "learning_rate": 1.6654355117591397e-05, "loss": 1.9504, "step": 13466000 }, { "epoch": 66.72, "learning_rate": 1.6653119008338165e-05, "loss": 1.9431, "step": 13466500 }, { "epoch": 66.72, "learning_rate": 1.665188042191208e-05, "loss": 1.9485, "step": 13467000 }, { "epoch": 66.72, "learning_rate": 1.665064431265885e-05, "loss": 1.9445, "step": 13467500 }, { "epoch": 66.73, "learning_rate": 1.6649405726232768e-05, "loss": 1.9548, "step": 13468000 }, { "epoch": 66.73, "learning_rate": 1.6648167139806685e-05, "loss": 1.9836, "step": 13468500 }, { "epoch": 66.73, "learning_rate": 1.66469285533806e-05, "loss": 1.9671, "step": 13469000 }, { "epoch": 66.73, "learning_rate": 1.6645689966954515e-05, "loss": 1.9624, "step": 13469500 }, { "epoch": 66.74, "learning_rate": 1.6644451380528432e-05, "loss": 1.9595, "step": 13470000 }, { "epoch": 66.74, "learning_rate": 1.66432152712752e-05, "loss": 1.9599, "step": 13470500 }, { "epoch": 66.74, "learning_rate": 1.6641979162021967e-05, "loss": 1.965, "step": 13471000 }, { "epoch": 66.74, "learning_rate": 1.6640740575595884e-05, "loss": 1.9519, "step": 13471500 }, { "epoch": 66.74, "learning_rate": 1.66395019891698e-05, "loss": 1.9496, "step": 13472000 }, { "epoch": 66.75, "learning_rate": 1.6638263402743717e-05, "loss": 1.9469, "step": 13472500 }, { "epoch": 66.75, "learning_rate": 1.6637024816317634e-05, "loss": 1.9628, "step": 13473000 }, { "epoch": 66.75, "learning_rate": 1.663578622989155e-05, "loss": 1.9351, "step": 13473500 }, { "epoch": 66.75, "learning_rate": 1.6634550120638317e-05, "loss": 1.9298, "step": 13474000 }, { "epoch": 66.76, "learning_rate": 1.6633311534212234e-05, "loss": 1.9352, "step": 13474500 }, { "epoch": 66.76, "learning_rate": 1.663207294778615e-05, "loss": 1.9558, "step": 13475000 }, { "epoch": 66.76, "learning_rate": 1.6630834361360067e-05, "loss": 1.9704, "step": 13475500 }, { "epoch": 66.76, "learning_rate": 1.6629595774933984e-05, "loss": 1.9493, "step": 13476000 }, { "epoch": 66.77, "learning_rate": 1.66283571885079e-05, "loss": 1.9384, "step": 13476500 }, { "epoch": 66.77, "learning_rate": 1.6627118602081818e-05, "loss": 1.9333, "step": 13477000 }, { "epoch": 66.77, "learning_rate": 1.6625880015655735e-05, "loss": 1.9527, "step": 13477500 }, { "epoch": 66.77, "learning_rate": 1.66246439064025e-05, "loss": 1.9491, "step": 13478000 }, { "epoch": 66.78, "learning_rate": 1.6623405319976418e-05, "loss": 1.9686, "step": 13478500 }, { "epoch": 66.78, "learning_rate": 1.6622166733550334e-05, "loss": 1.9734, "step": 13479000 }, { "epoch": 66.78, "learning_rate": 1.6620930624297103e-05, "loss": 1.9658, "step": 13479500 }, { "epoch": 66.78, "learning_rate": 1.6619692037871017e-05, "loss": 1.9376, "step": 13480000 }, { "epoch": 66.79, "learning_rate": 1.6618453451444934e-05, "loss": 1.9556, "step": 13480500 }, { "epoch": 66.79, "learning_rate": 1.661721486501885e-05, "loss": 1.9401, "step": 13481000 }, { "epoch": 66.79, "learning_rate": 1.6615976278592768e-05, "loss": 1.9632, "step": 13481500 }, { "epoch": 66.79, "learning_rate": 1.6614737692166685e-05, "loss": 1.964, "step": 13482000 }, { "epoch": 66.8, "learning_rate": 1.66134991057406e-05, "loss": 1.9549, "step": 13482500 }, { "epoch": 66.8, "learning_rate": 1.661226051931452e-05, "loss": 1.966, "step": 13483000 }, { "epoch": 66.8, "learning_rate": 1.6611021932888435e-05, "loss": 1.9616, "step": 13483500 }, { "epoch": 66.8, "learning_rate": 1.6609783346462352e-05, "loss": 1.9563, "step": 13484000 }, { "epoch": 66.81, "learning_rate": 1.6608547237209118e-05, "loss": 1.9731, "step": 13484500 }, { "epoch": 66.81, "learning_rate": 1.6607308650783035e-05, "loss": 1.9674, "step": 13485000 }, { "epoch": 66.81, "learning_rate": 1.660607006435695e-05, "loss": 1.9807, "step": 13485500 }, { "epoch": 66.81, "learning_rate": 1.660483395510372e-05, "loss": 1.9476, "step": 13486000 }, { "epoch": 66.82, "learning_rate": 1.6603595368677634e-05, "loss": 1.9649, "step": 13486500 }, { "epoch": 66.82, "learning_rate": 1.660235678225155e-05, "loss": 1.9769, "step": 13487000 }, { "epoch": 66.82, "learning_rate": 1.6601118195825468e-05, "loss": 1.9502, "step": 13487500 }, { "epoch": 66.82, "learning_rate": 1.6599879609399385e-05, "loss": 1.9484, "step": 13488000 }, { "epoch": 66.83, "learning_rate": 1.65986410229733e-05, "loss": 1.9349, "step": 13488500 }, { "epoch": 66.83, "learning_rate": 1.659740243654722e-05, "loss": 1.9325, "step": 13489000 }, { "epoch": 66.83, "learning_rate": 1.6596163850121135e-05, "loss": 1.933, "step": 13489500 }, { "epoch": 66.83, "learning_rate": 1.6594925263695052e-05, "loss": 1.9562, "step": 13490000 }, { "epoch": 66.84, "learning_rate": 1.659368667726897e-05, "loss": 1.9533, "step": 13490500 }, { "epoch": 66.84, "learning_rate": 1.6592450568015735e-05, "loss": 1.9519, "step": 13491000 }, { "epoch": 66.84, "learning_rate": 1.6591214458762504e-05, "loss": 1.9563, "step": 13491500 }, { "epoch": 66.84, "learning_rate": 1.658997587233642e-05, "loss": 1.949, "step": 13492000 }, { "epoch": 66.85, "learning_rate": 1.6588737285910337e-05, "loss": 1.9582, "step": 13492500 }, { "epoch": 66.85, "learning_rate": 1.658749869948425e-05, "loss": 1.9409, "step": 13493000 }, { "epoch": 66.85, "learning_rate": 1.6586260113058168e-05, "loss": 1.9468, "step": 13493500 }, { "epoch": 66.85, "learning_rate": 1.6585021526632085e-05, "loss": 1.9487, "step": 13494000 }, { "epoch": 66.86, "learning_rate": 1.6583782940206e-05, "loss": 1.9446, "step": 13494500 }, { "epoch": 66.86, "learning_rate": 1.658254435377992e-05, "loss": 1.9497, "step": 13495000 }, { "epoch": 66.86, "learning_rate": 1.6581305767353836e-05, "loss": 1.9752, "step": 13495500 }, { "epoch": 66.86, "learning_rate": 1.6580067180927752e-05, "loss": 1.9387, "step": 13496000 }, { "epoch": 66.87, "learning_rate": 1.657882859450167e-05, "loss": 1.9755, "step": 13496500 }, { "epoch": 66.87, "learning_rate": 1.6577590008075583e-05, "loss": 1.9689, "step": 13497000 }, { "epoch": 66.87, "learning_rate": 1.65763514216495e-05, "loss": 1.9478, "step": 13497500 }, { "epoch": 66.87, "learning_rate": 1.6575112835223417e-05, "loss": 1.9719, "step": 13498000 }, { "epoch": 66.88, "learning_rate": 1.6573874248797334e-05, "loss": 1.9597, "step": 13498500 }, { "epoch": 66.88, "learning_rate": 1.657263566237125e-05, "loss": 1.9567, "step": 13499000 }, { "epoch": 66.88, "learning_rate": 1.6571397075945168e-05, "loss": 1.9384, "step": 13499500 }, { "epoch": 66.88, "learning_rate": 1.6570160966691933e-05, "loss": 1.9508, "step": 13500000 }, { "epoch": 66.89, "learning_rate": 1.656892238026585e-05, "loss": 1.9606, "step": 13500500 }, { "epoch": 66.89, "learning_rate": 1.656768627101262e-05, "loss": 1.9357, "step": 13501000 }, { "epoch": 66.89, "learning_rate": 1.6566447684586536e-05, "loss": 1.923, "step": 13501500 }, { "epoch": 66.89, "learning_rate": 1.6565211575333304e-05, "loss": 1.946, "step": 13502000 }, { "epoch": 66.9, "learning_rate": 1.656397298890722e-05, "loss": 1.9666, "step": 13502500 }, { "epoch": 66.9, "learning_rate": 1.6562734402481135e-05, "loss": 1.9497, "step": 13503000 }, { "epoch": 66.9, "learning_rate": 1.6561495816055052e-05, "loss": 1.9633, "step": 13503500 }, { "epoch": 66.9, "learning_rate": 1.656025722962897e-05, "loss": 1.9276, "step": 13504000 }, { "epoch": 66.91, "learning_rate": 1.6559018643202886e-05, "loss": 1.9686, "step": 13504500 }, { "epoch": 66.91, "learning_rate": 1.6557780056776803e-05, "loss": 1.9658, "step": 13505000 }, { "epoch": 66.91, "learning_rate": 1.655654147035072e-05, "loss": 1.9536, "step": 13505500 }, { "epoch": 66.91, "learning_rate": 1.6555302883924637e-05, "loss": 1.9356, "step": 13506000 }, { "epoch": 66.92, "learning_rate": 1.655406429749855e-05, "loss": 1.9395, "step": 13506500 }, { "epoch": 66.92, "learning_rate": 1.6552825711072467e-05, "loss": 1.9602, "step": 13507000 }, { "epoch": 66.92, "learning_rate": 1.6551587124646384e-05, "loss": 1.938, "step": 13507500 }, { "epoch": 66.92, "learning_rate": 1.65503485382203e-05, "loss": 1.9264, "step": 13508000 }, { "epoch": 66.93, "learning_rate": 1.6549109951794218e-05, "loss": 1.9303, "step": 13508500 }, { "epoch": 66.93, "learning_rate": 1.6547871365368135e-05, "loss": 1.9353, "step": 13509000 }, { "epoch": 66.93, "learning_rate": 1.654663277894205e-05, "loss": 1.9648, "step": 13509500 }, { "epoch": 66.93, "learning_rate": 1.654539419251597e-05, "loss": 1.9392, "step": 13510000 }, { "epoch": 66.94, "learning_rate": 1.6544158083262734e-05, "loss": 1.9531, "step": 13510500 }, { "epoch": 66.94, "learning_rate": 1.654291949683665e-05, "loss": 1.9546, "step": 13511000 }, { "epoch": 66.94, "learning_rate": 1.6541680910410568e-05, "loss": 1.9484, "step": 13511500 }, { "epoch": 66.94, "learning_rate": 1.6540444801157337e-05, "loss": 1.9476, "step": 13512000 }, { "epoch": 66.95, "learning_rate": 1.653920621473125e-05, "loss": 1.9539, "step": 13512500 }, { "epoch": 66.95, "learning_rate": 1.653797010547802e-05, "loss": 1.9619, "step": 13513000 }, { "epoch": 66.95, "learning_rate": 1.6536731519051936e-05, "loss": 1.9476, "step": 13513500 }, { "epoch": 66.95, "learning_rate": 1.6535492932625853e-05, "loss": 1.9515, "step": 13514000 }, { "epoch": 66.96, "learning_rate": 1.653425434619977e-05, "loss": 1.9296, "step": 13514500 }, { "epoch": 66.96, "learning_rate": 1.6533015759773687e-05, "loss": 1.9372, "step": 13515000 }, { "epoch": 66.96, "learning_rate": 1.6531779650520456e-05, "loss": 1.9663, "step": 13515500 }, { "epoch": 66.96, "learning_rate": 1.6530543541267224e-05, "loss": 1.9524, "step": 13516000 }, { "epoch": 66.97, "learning_rate": 1.652930495484114e-05, "loss": 1.9795, "step": 13516500 }, { "epoch": 66.97, "learning_rate": 1.6528066368415058e-05, "loss": 1.9433, "step": 13517000 }, { "epoch": 66.97, "learning_rate": 1.6526827781988972e-05, "loss": 1.9558, "step": 13517500 }, { "epoch": 66.97, "learning_rate": 1.652558919556289e-05, "loss": 1.9878, "step": 13518000 }, { "epoch": 66.98, "learning_rate": 1.6524350609136806e-05, "loss": 1.9524, "step": 13518500 }, { "epoch": 66.98, "learning_rate": 1.6523112022710722e-05, "loss": 1.9496, "step": 13519000 }, { "epoch": 66.98, "learning_rate": 1.652187343628464e-05, "loss": 1.9501, "step": 13519500 }, { "epoch": 66.98, "learning_rate": 1.6520634849858553e-05, "loss": 1.961, "step": 13520000 }, { "epoch": 66.99, "learning_rate": 1.6519398740605325e-05, "loss": 1.9459, "step": 13520500 }, { "epoch": 66.99, "learning_rate": 1.651816015417924e-05, "loss": 1.9533, "step": 13521000 }, { "epoch": 66.99, "learning_rate": 1.6516921567753156e-05, "loss": 1.9379, "step": 13521500 }, { "epoch": 66.99, "learning_rate": 1.6515682981327073e-05, "loss": 1.9328, "step": 13522000 }, { "epoch": 67.0, "learning_rate": 1.651444439490099e-05, "loss": 1.933, "step": 13522500 }, { "epoch": 67.0, "learning_rate": 1.6513205808474906e-05, "loss": 1.9564, "step": 13523000 }, { "epoch": 67.0, "eval_accuracy": 0.6764576765250033, "eval_accuracy_mlm": 0.6361046052565824, "eval_accuracy_nsp": 0.866484415141258, "eval_loss": 2.281733989715576, "eval_runtime": 146.88, "eval_samples_per_second": 1735.832, "eval_steps_per_second": 72.331, "step": 13523481 }, { "epoch": 67.0, "learning_rate": 1.651196722204882e-05, "loss": 1.9792, "step": 13523500 }, { "epoch": 67.0, "learning_rate": 1.6510728635622737e-05, "loss": 1.9149, "step": 13524000 }, { "epoch": 67.01, "learning_rate": 1.6509490049196654e-05, "loss": 1.9298, "step": 13524500 }, { "epoch": 67.01, "learning_rate": 1.6508253939943423e-05, "loss": 1.9108, "step": 13525000 }, { "epoch": 67.01, "learning_rate": 1.650701783069019e-05, "loss": 1.9462, "step": 13525500 }, { "epoch": 67.01, "learning_rate": 1.650577924426411e-05, "loss": 1.9349, "step": 13526000 }, { "epoch": 67.01, "learning_rate": 1.6504540657838025e-05, "loss": 1.9297, "step": 13526500 }, { "epoch": 67.02, "learning_rate": 1.650330207141194e-05, "loss": 1.9531, "step": 13527000 }, { "epoch": 67.02, "learning_rate": 1.6502063484985856e-05, "loss": 1.9347, "step": 13527500 }, { "epoch": 67.02, "learning_rate": 1.6500824898559773e-05, "loss": 1.9398, "step": 13528000 }, { "epoch": 67.02, "learning_rate": 1.649958631213369e-05, "loss": 1.9184, "step": 13528500 }, { "epoch": 67.03, "learning_rate": 1.6498347725707607e-05, "loss": 1.9268, "step": 13529000 }, { "epoch": 67.03, "learning_rate": 1.6497109139281523e-05, "loss": 1.9282, "step": 13529500 }, { "epoch": 67.03, "learning_rate": 1.649587303002829e-05, "loss": 1.9188, "step": 13530000 }, { "epoch": 67.03, "learning_rate": 1.6494634443602206e-05, "loss": 1.9662, "step": 13530500 }, { "epoch": 67.04, "learning_rate": 1.6493395857176123e-05, "loss": 1.9289, "step": 13531000 }, { "epoch": 67.04, "learning_rate": 1.649215974792289e-05, "loss": 1.9144, "step": 13531500 }, { "epoch": 67.04, "learning_rate": 1.649092116149681e-05, "loss": 1.9578, "step": 13532000 }, { "epoch": 67.04, "learning_rate": 1.6489682575070725e-05, "loss": 1.9375, "step": 13532500 }, { "epoch": 67.05, "learning_rate": 1.6488443988644642e-05, "loss": 1.942, "step": 13533000 }, { "epoch": 67.05, "learning_rate": 1.6487207879391408e-05, "loss": 1.939, "step": 13533500 }, { "epoch": 67.05, "learning_rate": 1.6485969292965325e-05, "loss": 1.9311, "step": 13534000 }, { "epoch": 67.05, "learning_rate": 1.648473070653924e-05, "loss": 1.9585, "step": 13534500 }, { "epoch": 67.06, "learning_rate": 1.648349212011316e-05, "loss": 1.9327, "step": 13535000 }, { "epoch": 67.06, "learning_rate": 1.648225848803278e-05, "loss": 1.9236, "step": 13535500 }, { "epoch": 67.06, "learning_rate": 1.6481022378779548e-05, "loss": 1.9517, "step": 13536000 }, { "epoch": 67.06, "learning_rate": 1.6479783792353465e-05, "loss": 1.9266, "step": 13536500 }, { "epoch": 67.07, "learning_rate": 1.647854520592738e-05, "loss": 1.945, "step": 13537000 }, { "epoch": 67.07, "learning_rate": 1.6477306619501295e-05, "loss": 1.9431, "step": 13537500 }, { "epoch": 67.07, "learning_rate": 1.6476068033075212e-05, "loss": 1.9466, "step": 13538000 }, { "epoch": 67.07, "learning_rate": 1.647482944664913e-05, "loss": 1.9238, "step": 13538500 }, { "epoch": 67.08, "learning_rate": 1.6473590860223046e-05, "loss": 1.9457, "step": 13539000 }, { "epoch": 67.08, "learning_rate": 1.6472352273796963e-05, "loss": 1.9574, "step": 13539500 }, { "epoch": 67.08, "learning_rate": 1.647111368737088e-05, "loss": 1.9377, "step": 13540000 }, { "epoch": 67.08, "learning_rate": 1.6469877578117646e-05, "loss": 1.9376, "step": 13540500 }, { "epoch": 67.09, "learning_rate": 1.6468638991691562e-05, "loss": 1.9436, "step": 13541000 }, { "epoch": 67.09, "learning_rate": 1.646740040526548e-05, "loss": 1.9582, "step": 13541500 }, { "epoch": 67.09, "learning_rate": 1.6466161818839396e-05, "loss": 1.932, "step": 13542000 }, { "epoch": 67.09, "learning_rate": 1.6464923232413313e-05, "loss": 1.9341, "step": 13542500 }, { "epoch": 67.1, "learning_rate": 1.646368464598723e-05, "loss": 1.91, "step": 13543000 }, { "epoch": 67.1, "learning_rate": 1.6462446059561147e-05, "loss": 1.9548, "step": 13543500 }, { "epoch": 67.1, "learning_rate": 1.6461207473135064e-05, "loss": 1.9318, "step": 13544000 }, { "epoch": 67.1, "learning_rate": 1.6459968886708978e-05, "loss": 1.9579, "step": 13544500 }, { "epoch": 67.11, "learning_rate": 1.6458732777455746e-05, "loss": 1.9086, "step": 13545000 }, { "epoch": 67.11, "learning_rate": 1.6457496668202515e-05, "loss": 1.9509, "step": 13545500 }, { "epoch": 67.11, "learning_rate": 1.6456258081776432e-05, "loss": 1.9348, "step": 13546000 }, { "epoch": 67.11, "learning_rate": 1.6455019495350346e-05, "loss": 1.9072, "step": 13546500 }, { "epoch": 67.12, "learning_rate": 1.6453780908924263e-05, "loss": 1.9182, "step": 13547000 }, { "epoch": 67.12, "learning_rate": 1.645254232249818e-05, "loss": 1.942, "step": 13547500 }, { "epoch": 67.12, "learning_rate": 1.6451303736072096e-05, "loss": 1.904, "step": 13548000 }, { "epoch": 67.12, "learning_rate": 1.6450065149646013e-05, "loss": 1.9247, "step": 13548500 }, { "epoch": 67.13, "learning_rate": 1.644882656321993e-05, "loss": 1.9364, "step": 13549000 }, { "epoch": 67.13, "learning_rate": 1.6447590453966696e-05, "loss": 1.9336, "step": 13549500 }, { "epoch": 67.13, "learning_rate": 1.6446351867540613e-05, "loss": 1.9315, "step": 13550000 }, { "epoch": 67.13, "learning_rate": 1.644511328111453e-05, "loss": 1.9296, "step": 13550500 }, { "epoch": 67.14, "learning_rate": 1.6443874694688446e-05, "loss": 1.9295, "step": 13551000 }, { "epoch": 67.14, "learning_rate": 1.6442636108262363e-05, "loss": 1.9398, "step": 13551500 }, { "epoch": 67.14, "learning_rate": 1.6441399999009132e-05, "loss": 1.9328, "step": 13552000 }, { "epoch": 67.14, "learning_rate": 1.6440161412583046e-05, "loss": 1.929, "step": 13552500 }, { "epoch": 67.15, "learning_rate": 1.6438922826156963e-05, "loss": 1.9374, "step": 13553000 }, { "epoch": 67.15, "learning_rate": 1.643768423973088e-05, "loss": 1.9318, "step": 13553500 }, { "epoch": 67.15, "learning_rate": 1.6436445653304797e-05, "loss": 1.9392, "step": 13554000 }, { "epoch": 67.15, "learning_rate": 1.6435209544051565e-05, "loss": 1.9443, "step": 13554500 }, { "epoch": 67.16, "learning_rate": 1.6433970957625482e-05, "loss": 1.9293, "step": 13555000 }, { "epoch": 67.16, "learning_rate": 1.64327323711994e-05, "loss": 1.9561, "step": 13555500 }, { "epoch": 67.16, "learning_rate": 1.6431496261946168e-05, "loss": 1.9282, "step": 13556000 }, { "epoch": 67.16, "learning_rate": 1.643025767552008e-05, "loss": 1.9387, "step": 13556500 }, { "epoch": 67.17, "learning_rate": 1.6429019089094e-05, "loss": 1.953, "step": 13557000 }, { "epoch": 67.17, "learning_rate": 1.6427780502667915e-05, "loss": 1.9331, "step": 13557500 }, { "epoch": 67.17, "learning_rate": 1.6426544393414684e-05, "loss": 1.9096, "step": 13558000 }, { "epoch": 67.17, "learning_rate": 1.64253058069886e-05, "loss": 1.9563, "step": 13558500 }, { "epoch": 67.18, "learning_rate": 1.6424067220562518e-05, "loss": 1.939, "step": 13559000 }, { "epoch": 67.18, "learning_rate": 1.6422828634136435e-05, "loss": 1.9608, "step": 13559500 }, { "epoch": 67.18, "learning_rate": 1.642159004771035e-05, "loss": 1.9272, "step": 13560000 }, { "epoch": 67.18, "learning_rate": 1.642035393845712e-05, "loss": 1.9368, "step": 13560500 }, { "epoch": 67.19, "learning_rate": 1.6419117829203886e-05, "loss": 1.9264, "step": 13561000 }, { "epoch": 67.19, "learning_rate": 1.6417879242777803e-05, "loss": 1.9177, "step": 13561500 }, { "epoch": 67.19, "learning_rate": 1.641664065635172e-05, "loss": 1.9497, "step": 13562000 }, { "epoch": 67.19, "learning_rate": 1.6415402069925637e-05, "loss": 1.9689, "step": 13562500 }, { "epoch": 67.2, "learning_rate": 1.6414163483499554e-05, "loss": 1.9396, "step": 13563000 }, { "epoch": 67.2, "learning_rate": 1.641292489707347e-05, "loss": 1.9509, "step": 13563500 }, { "epoch": 67.2, "learning_rate": 1.6411686310647384e-05, "loss": 1.9421, "step": 13564000 }, { "epoch": 67.2, "learning_rate": 1.6410450201394153e-05, "loss": 1.9356, "step": 13564500 }, { "epoch": 67.21, "learning_rate": 1.640921161496807e-05, "loss": 1.9468, "step": 13565000 }, { "epoch": 67.21, "learning_rate": 1.6407973028541987e-05, "loss": 1.942, "step": 13565500 }, { "epoch": 67.21, "learning_rate": 1.6406734442115904e-05, "loss": 1.9536, "step": 13566000 }, { "epoch": 67.21, "learning_rate": 1.640549585568982e-05, "loss": 1.94, "step": 13566500 }, { "epoch": 67.22, "learning_rate": 1.6404257269263734e-05, "loss": 1.9651, "step": 13567000 }, { "epoch": 67.22, "learning_rate": 1.6403021160010503e-05, "loss": 1.9577, "step": 13567500 }, { "epoch": 67.22, "learning_rate": 1.640178257358442e-05, "loss": 1.9615, "step": 13568000 }, { "epoch": 67.22, "learning_rate": 1.6400543987158337e-05, "loss": 1.9285, "step": 13568500 }, { "epoch": 67.23, "learning_rate": 1.6399305400732254e-05, "loss": 1.9474, "step": 13569000 }, { "epoch": 67.23, "learning_rate": 1.639806929147902e-05, "loss": 1.9317, "step": 13569500 }, { "epoch": 67.23, "learning_rate": 1.6396830705052936e-05, "loss": 1.9311, "step": 13570000 }, { "epoch": 67.23, "learning_rate": 1.6395592118626853e-05, "loss": 1.9443, "step": 13570500 }, { "epoch": 67.24, "learning_rate": 1.639435353220077e-05, "loss": 1.952, "step": 13571000 }, { "epoch": 67.24, "learning_rate": 1.6393114945774687e-05, "loss": 1.9232, "step": 13571500 }, { "epoch": 67.24, "learning_rate": 1.6391878836521456e-05, "loss": 1.9896, "step": 13572000 }, { "epoch": 67.24, "learning_rate": 1.639064025009537e-05, "loss": 1.9317, "step": 13572500 }, { "epoch": 67.25, "learning_rate": 1.6389401663669286e-05, "loss": 1.9427, "step": 13573000 }, { "epoch": 67.25, "learning_rate": 1.6388163077243203e-05, "loss": 1.9398, "step": 13573500 }, { "epoch": 67.25, "learning_rate": 1.638692449081712e-05, "loss": 1.9512, "step": 13574000 }, { "epoch": 67.25, "learning_rate": 1.6385685904391037e-05, "loss": 1.9524, "step": 13574500 }, { "epoch": 67.26, "learning_rate": 1.6384447317964954e-05, "loss": 1.9385, "step": 13575000 }, { "epoch": 67.26, "learning_rate": 1.638320873153887e-05, "loss": 1.9396, "step": 13575500 }, { "epoch": 67.26, "learning_rate": 1.6381970145112788e-05, "loss": 1.9545, "step": 13576000 }, { "epoch": 67.26, "learning_rate": 1.63807315586867e-05, "loss": 1.9298, "step": 13576500 }, { "epoch": 67.27, "learning_rate": 1.637949297226062e-05, "loss": 1.9392, "step": 13577000 }, { "epoch": 67.27, "learning_rate": 1.6378254385834535e-05, "loss": 1.957, "step": 13577500 }, { "epoch": 67.27, "learning_rate": 1.6377015799408452e-05, "loss": 1.9634, "step": 13578000 }, { "epoch": 67.27, "learning_rate": 1.637577969015522e-05, "loss": 1.9426, "step": 13578500 }, { "epoch": 67.28, "learning_rate": 1.6374541103729138e-05, "loss": 1.9405, "step": 13579000 }, { "epoch": 67.28, "learning_rate": 1.637330251730305e-05, "loss": 1.9492, "step": 13579500 }, { "epoch": 67.28, "learning_rate": 1.637206393087697e-05, "loss": 1.9401, "step": 13580000 }, { "epoch": 67.28, "learning_rate": 1.6370825344450885e-05, "loss": 1.9047, "step": 13580500 }, { "epoch": 67.28, "learning_rate": 1.6369589235197654e-05, "loss": 1.9297, "step": 13581000 }, { "epoch": 67.29, "learning_rate": 1.6368355603117275e-05, "loss": 1.9452, "step": 13581500 }, { "epoch": 67.29, "learning_rate": 1.6367117016691192e-05, "loss": 1.946, "step": 13582000 }, { "epoch": 67.29, "learning_rate": 1.636587843026511e-05, "loss": 1.945, "step": 13582500 }, { "epoch": 67.29, "learning_rate": 1.6364639843839026e-05, "loss": 1.9338, "step": 13583000 }, { "epoch": 67.3, "learning_rate": 1.6363401257412943e-05, "loss": 1.9262, "step": 13583500 }, { "epoch": 67.3, "learning_rate": 1.6362165148159708e-05, "loss": 1.9511, "step": 13584000 }, { "epoch": 67.3, "learning_rate": 1.6360926561733625e-05, "loss": 1.9771, "step": 13584500 }, { "epoch": 67.3, "learning_rate": 1.6359687975307542e-05, "loss": 1.9271, "step": 13585000 }, { "epoch": 67.31, "learning_rate": 1.635844938888146e-05, "loss": 1.9361, "step": 13585500 }, { "epoch": 67.31, "learning_rate": 1.6357210802455376e-05, "loss": 1.9259, "step": 13586000 }, { "epoch": 67.31, "learning_rate": 1.635597469320214e-05, "loss": 1.9209, "step": 13586500 }, { "epoch": 67.31, "learning_rate": 1.6354736106776058e-05, "loss": 1.9344, "step": 13587000 }, { "epoch": 67.32, "learning_rate": 1.6353497520349975e-05, "loss": 1.9223, "step": 13587500 }, { "epoch": 67.32, "learning_rate": 1.6352258933923892e-05, "loss": 1.9431, "step": 13588000 }, { "epoch": 67.32, "learning_rate": 1.635102034749781e-05, "loss": 1.9122, "step": 13588500 }, { "epoch": 67.32, "learning_rate": 1.6349781761071726e-05, "loss": 1.9214, "step": 13589000 }, { "epoch": 67.33, "learning_rate": 1.6348543174645643e-05, "loss": 1.9686, "step": 13589500 }, { "epoch": 67.33, "learning_rate": 1.634730458821956e-05, "loss": 1.9228, "step": 13590000 }, { "epoch": 67.33, "learning_rate": 1.6346066001793477e-05, "loss": 1.9527, "step": 13590500 }, { "epoch": 67.33, "learning_rate": 1.634482741536739e-05, "loss": 1.9228, "step": 13591000 }, { "epoch": 67.34, "learning_rate": 1.6343588828941307e-05, "loss": 1.9462, "step": 13591500 }, { "epoch": 67.34, "learning_rate": 1.6342350242515224e-05, "loss": 1.9354, "step": 13592000 }, { "epoch": 67.34, "learning_rate": 1.6341114133261993e-05, "loss": 1.9536, "step": 13592500 }, { "epoch": 67.34, "learning_rate": 1.6339878024008758e-05, "loss": 1.9476, "step": 13593000 }, { "epoch": 67.35, "learning_rate": 1.6338639437582675e-05, "loss": 1.9227, "step": 13593500 }, { "epoch": 67.35, "learning_rate": 1.6337400851156592e-05, "loss": 1.9571, "step": 13594000 }, { "epoch": 67.35, "learning_rate": 1.633616226473051e-05, "loss": 1.9464, "step": 13594500 }, { "epoch": 67.35, "learning_rate": 1.6334923678304426e-05, "loss": 1.9448, "step": 13595000 }, { "epoch": 67.36, "learning_rate": 1.6333685091878343e-05, "loss": 1.9539, "step": 13595500 }, { "epoch": 67.36, "learning_rate": 1.633244650545226e-05, "loss": 1.9438, "step": 13596000 }, { "epoch": 67.36, "learning_rate": 1.6331207919026177e-05, "loss": 1.9388, "step": 13596500 }, { "epoch": 67.36, "learning_rate": 1.6329971809772942e-05, "loss": 1.9541, "step": 13597000 }, { "epoch": 67.37, "learning_rate": 1.632873322334686e-05, "loss": 1.9374, "step": 13597500 }, { "epoch": 67.37, "learning_rate": 1.6327494636920776e-05, "loss": 1.964, "step": 13598000 }, { "epoch": 67.37, "learning_rate": 1.6326256050494693e-05, "loss": 1.943, "step": 13598500 }, { "epoch": 67.37, "learning_rate": 1.632501746406861e-05, "loss": 1.9428, "step": 13599000 }, { "epoch": 67.38, "learning_rate": 1.6323778877642527e-05, "loss": 1.9373, "step": 13599500 }, { "epoch": 67.38, "learning_rate": 1.632254029121644e-05, "loss": 1.9254, "step": 13600000 }, { "epoch": 67.38, "learning_rate": 1.6321301704790357e-05, "loss": 1.9198, "step": 13600500 }, { "epoch": 67.38, "learning_rate": 1.6320063118364274e-05, "loss": 1.9331, "step": 13601000 }, { "epoch": 67.39, "learning_rate": 1.6318827009111043e-05, "loss": 1.931, "step": 13601500 }, { "epoch": 67.39, "learning_rate": 1.631758842268496e-05, "loss": 1.9429, "step": 13602000 }, { "epoch": 67.39, "learning_rate": 1.6316349836258877e-05, "loss": 1.9498, "step": 13602500 }, { "epoch": 67.39, "learning_rate": 1.6315111249832794e-05, "loss": 1.9228, "step": 13603000 }, { "epoch": 67.4, "learning_rate": 1.6313872663406707e-05, "loss": 1.9368, "step": 13603500 }, { "epoch": 67.4, "learning_rate": 1.6312634076980624e-05, "loss": 1.945, "step": 13604000 }, { "epoch": 67.4, "learning_rate": 1.631139549055454e-05, "loss": 1.9665, "step": 13604500 }, { "epoch": 67.4, "learning_rate": 1.6310156904128458e-05, "loss": 1.9435, "step": 13605000 }, { "epoch": 67.41, "learning_rate": 1.6308918317702375e-05, "loss": 1.9308, "step": 13605500 }, { "epoch": 67.41, "learning_rate": 1.630767973127629e-05, "loss": 1.9465, "step": 13606000 }, { "epoch": 67.41, "learning_rate": 1.6306441144850206e-05, "loss": 1.9466, "step": 13606500 }, { "epoch": 67.41, "learning_rate": 1.6305202558424122e-05, "loss": 1.9615, "step": 13607000 }, { "epoch": 67.42, "learning_rate": 1.6303963971998036e-05, "loss": 1.9488, "step": 13607500 }, { "epoch": 67.42, "learning_rate": 1.6302725385571953e-05, "loss": 1.9235, "step": 13608000 }, { "epoch": 67.42, "learning_rate": 1.6301491753491577e-05, "loss": 1.9564, "step": 13608500 }, { "epoch": 67.42, "learning_rate": 1.6300253167065494e-05, "loss": 1.924, "step": 13609000 }, { "epoch": 67.43, "learning_rate": 1.6299014580639407e-05, "loss": 1.9439, "step": 13609500 }, { "epoch": 67.43, "learning_rate": 1.6297778471386176e-05, "loss": 1.9491, "step": 13610000 }, { "epoch": 67.43, "learning_rate": 1.6296539884960093e-05, "loss": 1.94, "step": 13610500 }, { "epoch": 67.43, "learning_rate": 1.629530129853401e-05, "loss": 1.9607, "step": 13611000 }, { "epoch": 67.44, "learning_rate": 1.6294062712107927e-05, "loss": 1.9459, "step": 13611500 }, { "epoch": 67.44, "learning_rate": 1.6292824125681844e-05, "loss": 1.959, "step": 13612000 }, { "epoch": 67.44, "learning_rate": 1.6291585539255758e-05, "loss": 1.9476, "step": 13612500 }, { "epoch": 67.44, "learning_rate": 1.6290346952829674e-05, "loss": 1.9262, "step": 13613000 }, { "epoch": 67.45, "learning_rate": 1.628910836640359e-05, "loss": 1.9229, "step": 13613500 }, { "epoch": 67.45, "learning_rate": 1.628786977997751e-05, "loss": 1.9517, "step": 13614000 }, { "epoch": 67.45, "learning_rate": 1.6286631193551425e-05, "loss": 1.9336, "step": 13614500 }, { "epoch": 67.45, "learning_rate": 1.6285392607125342e-05, "loss": 1.9442, "step": 13615000 }, { "epoch": 67.46, "learning_rate": 1.628415402069926e-05, "loss": 1.946, "step": 13615500 }, { "epoch": 67.46, "learning_rate": 1.6282915434273176e-05, "loss": 1.9334, "step": 13616000 }, { "epoch": 67.46, "learning_rate": 1.628167932501994e-05, "loss": 1.9523, "step": 13616500 }, { "epoch": 67.46, "learning_rate": 1.628044073859386e-05, "loss": 1.9572, "step": 13617000 }, { "epoch": 67.47, "learning_rate": 1.6279202152167775e-05, "loss": 1.9344, "step": 13617500 }, { "epoch": 67.47, "learning_rate": 1.6277966042914544e-05, "loss": 1.9547, "step": 13618000 }, { "epoch": 67.47, "learning_rate": 1.627672745648846e-05, "loss": 1.9713, "step": 13618500 }, { "epoch": 67.47, "learning_rate": 1.6275491347235226e-05, "loss": 1.9231, "step": 13619000 }, { "epoch": 67.48, "learning_rate": 1.6274252760809143e-05, "loss": 1.945, "step": 13619500 }, { "epoch": 67.48, "learning_rate": 1.627301417438306e-05, "loss": 1.9567, "step": 13620000 }, { "epoch": 67.48, "learning_rate": 1.6271775587956977e-05, "loss": 1.9309, "step": 13620500 }, { "epoch": 67.48, "learning_rate": 1.6270537001530894e-05, "loss": 1.9259, "step": 13621000 }, { "epoch": 67.49, "learning_rate": 1.626929841510481e-05, "loss": 1.9251, "step": 13621500 }, { "epoch": 67.49, "learning_rate": 1.6268059828678725e-05, "loss": 1.9353, "step": 13622000 }, { "epoch": 67.49, "learning_rate": 1.626682124225264e-05, "loss": 1.9651, "step": 13622500 }, { "epoch": 67.49, "learning_rate": 1.626558265582656e-05, "loss": 1.952, "step": 13623000 }, { "epoch": 67.5, "learning_rate": 1.6264344069400475e-05, "loss": 1.9281, "step": 13623500 }, { "epoch": 67.5, "learning_rate": 1.6263105482974392e-05, "loss": 1.9636, "step": 13624000 }, { "epoch": 67.5, "learning_rate": 1.626186689654831e-05, "loss": 1.9327, "step": 13624500 }, { "epoch": 67.5, "learning_rate": 1.6260628310122226e-05, "loss": 1.93, "step": 13625000 }, { "epoch": 67.51, "learning_rate": 1.6259389723696143e-05, "loss": 1.9479, "step": 13625500 }, { "epoch": 67.51, "learning_rate": 1.625815361444291e-05, "loss": 1.9564, "step": 13626000 }, { "epoch": 67.51, "learning_rate": 1.6256915028016825e-05, "loss": 1.9234, "step": 13626500 }, { "epoch": 67.51, "learning_rate": 1.6255676441590742e-05, "loss": 1.9644, "step": 13627000 }, { "epoch": 67.52, "learning_rate": 1.625443785516466e-05, "loss": 1.9354, "step": 13627500 }, { "epoch": 67.52, "learning_rate": 1.6253199268738576e-05, "loss": 1.9576, "step": 13628000 }, { "epoch": 67.52, "learning_rate": 1.6251960682312493e-05, "loss": 1.949, "step": 13628500 }, { "epoch": 67.52, "learning_rate": 1.625072209588641e-05, "loss": 1.9545, "step": 13629000 }, { "epoch": 67.53, "learning_rate": 1.6249483509460324e-05, "loss": 1.941, "step": 13629500 }, { "epoch": 67.53, "learning_rate": 1.6248247400207092e-05, "loss": 1.94, "step": 13630000 }, { "epoch": 67.53, "learning_rate": 1.624701129095386e-05, "loss": 1.9666, "step": 13630500 }, { "epoch": 67.53, "learning_rate": 1.624577518170063e-05, "loss": 1.9899, "step": 13631000 }, { "epoch": 67.54, "learning_rate": 1.6244536595274547e-05, "loss": 1.9381, "step": 13631500 }, { "epoch": 67.54, "learning_rate": 1.624329800884846e-05, "loss": 1.9471, "step": 13632000 }, { "epoch": 67.54, "learning_rate": 1.6242061899595233e-05, "loss": 1.9362, "step": 13632500 }, { "epoch": 67.54, "learning_rate": 1.6240825790341998e-05, "loss": 1.9646, "step": 13633000 }, { "epoch": 67.55, "learning_rate": 1.6239587203915915e-05, "loss": 1.9516, "step": 13633500 }, { "epoch": 67.55, "learning_rate": 1.6238348617489832e-05, "loss": 1.9441, "step": 13634000 }, { "epoch": 67.55, "learning_rate": 1.623711003106375e-05, "loss": 1.9279, "step": 13634500 }, { "epoch": 67.55, "learning_rate": 1.6235871444637666e-05, "loss": 1.9313, "step": 13635000 }, { "epoch": 67.55, "learning_rate": 1.623463533538443e-05, "loss": 1.935, "step": 13635500 }, { "epoch": 67.56, "learning_rate": 1.6233396748958348e-05, "loss": 1.9279, "step": 13636000 }, { "epoch": 67.56, "learning_rate": 1.6232158162532265e-05, "loss": 1.9436, "step": 13636500 }, { "epoch": 67.56, "learning_rate": 1.6230919576106182e-05, "loss": 1.9353, "step": 13637000 }, { "epoch": 67.56, "learning_rate": 1.62296809896801e-05, "loss": 1.9541, "step": 13637500 }, { "epoch": 67.57, "learning_rate": 1.6228442403254016e-05, "loss": 1.952, "step": 13638000 }, { "epoch": 67.57, "learning_rate": 1.6227203816827933e-05, "loss": 1.9453, "step": 13638500 }, { "epoch": 67.57, "learning_rate": 1.622596523040185e-05, "loss": 1.9322, "step": 13639000 }, { "epoch": 67.57, "learning_rate": 1.6224726643975763e-05, "loss": 1.9343, "step": 13639500 }, { "epoch": 67.58, "learning_rate": 1.622348805754968e-05, "loss": 1.9519, "step": 13640000 }, { "epoch": 67.58, "learning_rate": 1.6222249471123597e-05, "loss": 1.9282, "step": 13640500 }, { "epoch": 67.58, "learning_rate": 1.6221015839043218e-05, "loss": 1.9446, "step": 13641000 }, { "epoch": 67.58, "learning_rate": 1.621977725261713e-05, "loss": 1.9235, "step": 13641500 }, { "epoch": 67.59, "learning_rate": 1.621853866619105e-05, "loss": 1.9271, "step": 13642000 }, { "epoch": 67.59, "learning_rate": 1.6217300079764965e-05, "loss": 1.9465, "step": 13642500 }, { "epoch": 67.59, "learning_rate": 1.6216061493338882e-05, "loss": 1.951, "step": 13643000 }, { "epoch": 67.59, "learning_rate": 1.62148229069128e-05, "loss": 1.9282, "step": 13643500 }, { "epoch": 67.6, "learning_rate": 1.6213584320486716e-05, "loss": 1.942, "step": 13644000 }, { "epoch": 67.6, "learning_rate": 1.6212345734060633e-05, "loss": 1.9495, "step": 13644500 }, { "epoch": 67.6, "learning_rate": 1.621110714763455e-05, "loss": 1.9512, "step": 13645000 }, { "epoch": 67.6, "learning_rate": 1.6209868561208467e-05, "loss": 1.9379, "step": 13645500 }, { "epoch": 67.61, "learning_rate": 1.620862997478238e-05, "loss": 1.9514, "step": 13646000 }, { "epoch": 67.61, "learning_rate": 1.620739386552915e-05, "loss": 1.9538, "step": 13646500 }, { "epoch": 67.61, "learning_rate": 1.6206155279103066e-05, "loss": 1.9278, "step": 13647000 }, { "epoch": 67.61, "learning_rate": 1.6204916692676983e-05, "loss": 1.9326, "step": 13647500 }, { "epoch": 67.62, "learning_rate": 1.62036781062509e-05, "loss": 1.9151, "step": 13648000 }, { "epoch": 67.62, "learning_rate": 1.6202439519824817e-05, "loss": 1.9262, "step": 13648500 }, { "epoch": 67.62, "learning_rate": 1.620120093339873e-05, "loss": 1.9353, "step": 13649000 }, { "epoch": 67.62, "learning_rate": 1.6199962346972647e-05, "loss": 1.9454, "step": 13649500 }, { "epoch": 67.63, "learning_rate": 1.6198723760546564e-05, "loss": 1.9299, "step": 13650000 }, { "epoch": 67.63, "learning_rate": 1.6197487651293333e-05, "loss": 1.9428, "step": 13650500 }, { "epoch": 67.63, "learning_rate": 1.61962515420401e-05, "loss": 1.9433, "step": 13651000 }, { "epoch": 67.63, "learning_rate": 1.6195012955614015e-05, "loss": 1.9406, "step": 13651500 }, { "epoch": 67.64, "learning_rate": 1.6193774369187932e-05, "loss": 1.9551, "step": 13652000 }, { "epoch": 67.64, "learning_rate": 1.619253578276185e-05, "loss": 1.965, "step": 13652500 }, { "epoch": 67.64, "learning_rate": 1.6191299673508618e-05, "loss": 1.9434, "step": 13653000 }, { "epoch": 67.64, "learning_rate": 1.6190063564255387e-05, "loss": 1.9389, "step": 13653500 }, { "epoch": 67.65, "learning_rate": 1.6188824977829304e-05, "loss": 1.9526, "step": 13654000 }, { "epoch": 67.65, "learning_rate": 1.618758639140322e-05, "loss": 1.9483, "step": 13654500 }, { "epoch": 67.65, "learning_rate": 1.6186347804977138e-05, "loss": 1.9304, "step": 13655000 }, { "epoch": 67.65, "learning_rate": 1.6185109218551055e-05, "loss": 1.9261, "step": 13655500 }, { "epoch": 67.66, "learning_rate": 1.618387063212497e-05, "loss": 1.9358, "step": 13656000 }, { "epoch": 67.66, "learning_rate": 1.6182632045698885e-05, "loss": 1.947, "step": 13656500 }, { "epoch": 67.66, "learning_rate": 1.6181393459272802e-05, "loss": 1.9431, "step": 13657000 }, { "epoch": 67.66, "learning_rate": 1.6180154872846716e-05, "loss": 1.9359, "step": 13657500 }, { "epoch": 67.67, "learning_rate": 1.6178916286420633e-05, "loss": 1.9467, "step": 13658000 }, { "epoch": 67.67, "learning_rate": 1.617767769999455e-05, "loss": 1.9521, "step": 13658500 }, { "epoch": 67.67, "learning_rate": 1.6176439113568466e-05, "loss": 1.9326, "step": 13659000 }, { "epoch": 67.67, "learning_rate": 1.6175200527142383e-05, "loss": 1.9281, "step": 13659500 }, { "epoch": 67.68, "learning_rate": 1.61739619407163e-05, "loss": 1.9467, "step": 13660000 }, { "epoch": 67.68, "learning_rate": 1.6172723354290217e-05, "loss": 1.9066, "step": 13660500 }, { "epoch": 67.68, "learning_rate": 1.6171484767864134e-05, "loss": 1.9692, "step": 13661000 }, { "epoch": 67.68, "learning_rate": 1.6170246181438048e-05, "loss": 1.9307, "step": 13661500 }, { "epoch": 67.69, "learning_rate": 1.6169007595011965e-05, "loss": 1.9491, "step": 13662000 }, { "epoch": 67.69, "learning_rate": 1.616776900858588e-05, "loss": 1.949, "step": 13662500 }, { "epoch": 67.69, "learning_rate": 1.61665304221598e-05, "loss": 1.9368, "step": 13663000 }, { "epoch": 67.69, "learning_rate": 1.6165291835733715e-05, "loss": 1.932, "step": 13663500 }, { "epoch": 67.7, "learning_rate": 1.6164053249307632e-05, "loss": 1.9363, "step": 13664000 }, { "epoch": 67.7, "learning_rate": 1.616281466288155e-05, "loss": 1.9366, "step": 13664500 }, { "epoch": 67.7, "learning_rate": 1.6161576076455466e-05, "loss": 1.9365, "step": 13665000 }, { "epoch": 67.7, "learning_rate": 1.6160342444375083e-05, "loss": 1.926, "step": 13665500 }, { "epoch": 67.71, "learning_rate": 1.6159103857949e-05, "loss": 1.9773, "step": 13666000 }, { "epoch": 67.71, "learning_rate": 1.615786774869577e-05, "loss": 1.9238, "step": 13666500 }, { "epoch": 67.71, "learning_rate": 1.6156629162269686e-05, "loss": 1.9431, "step": 13667000 }, { "epoch": 67.71, "learning_rate": 1.6155393053016455e-05, "loss": 1.9703, "step": 13667500 }, { "epoch": 67.72, "learning_rate": 1.6154154466590372e-05, "loss": 1.9487, "step": 13668000 }, { "epoch": 67.72, "learning_rate": 1.615291588016429e-05, "loss": 1.9486, "step": 13668500 }, { "epoch": 67.72, "learning_rate": 1.6151677293738206e-05, "loss": 1.9194, "step": 13669000 }, { "epoch": 67.72, "learning_rate": 1.615043870731212e-05, "loss": 1.9603, "step": 13669500 }, { "epoch": 67.73, "learning_rate": 1.6149202598058888e-05, "loss": 1.9583, "step": 13670000 }, { "epoch": 67.73, "learning_rate": 1.6147964011632805e-05, "loss": 1.9444, "step": 13670500 }, { "epoch": 67.73, "learning_rate": 1.6146725425206722e-05, "loss": 1.9621, "step": 13671000 }, { "epoch": 67.73, "learning_rate": 1.614548683878064e-05, "loss": 1.9372, "step": 13671500 }, { "epoch": 67.74, "learning_rate": 1.6144248252354556e-05, "loss": 1.9373, "step": 13672000 }, { "epoch": 67.74, "learning_rate": 1.6143009665928473e-05, "loss": 1.9517, "step": 13672500 }, { "epoch": 67.74, "learning_rate": 1.6141771079502386e-05, "loss": 1.9306, "step": 13673000 }, { "epoch": 67.74, "learning_rate": 1.6140532493076303e-05, "loss": 1.9403, "step": 13673500 }, { "epoch": 67.75, "learning_rate": 1.613929390665022e-05, "loss": 1.9399, "step": 13674000 }, { "epoch": 67.75, "learning_rate": 1.6138055320224134e-05, "loss": 1.9615, "step": 13674500 }, { "epoch": 67.75, "learning_rate": 1.613681673379805e-05, "loss": 1.9489, "step": 13675000 }, { "epoch": 67.75, "learning_rate": 1.6135578147371967e-05, "loss": 1.9507, "step": 13675500 }, { "epoch": 67.76, "learning_rate": 1.6134339560945884e-05, "loss": 1.9302, "step": 13676000 }, { "epoch": 67.76, "learning_rate": 1.6133103451692653e-05, "loss": 1.9253, "step": 13676500 }, { "epoch": 67.76, "learning_rate": 1.613186486526657e-05, "loss": 1.9471, "step": 13677000 }, { "epoch": 67.76, "learning_rate": 1.613062875601334e-05, "loss": 1.9595, "step": 13677500 }, { "epoch": 67.77, "learning_rate": 1.6129390169587256e-05, "loss": 1.9233, "step": 13678000 }, { "epoch": 67.77, "learning_rate": 1.6128151583161173e-05, "loss": 1.9515, "step": 13678500 }, { "epoch": 67.77, "learning_rate": 1.6126912996735086e-05, "loss": 1.9419, "step": 13679000 }, { "epoch": 67.77, "learning_rate": 1.6125674410309003e-05, "loss": 1.9441, "step": 13679500 }, { "epoch": 67.78, "learning_rate": 1.612443582388292e-05, "loss": 1.9588, "step": 13680000 }, { "epoch": 67.78, "learning_rate": 1.6123197237456837e-05, "loss": 1.9483, "step": 13680500 }, { "epoch": 67.78, "learning_rate": 1.6121958651030754e-05, "loss": 1.9541, "step": 13681000 }, { "epoch": 67.78, "learning_rate": 1.6120720064604668e-05, "loss": 1.9553, "step": 13681500 }, { "epoch": 67.79, "learning_rate": 1.6119483955351436e-05, "loss": 1.9542, "step": 13682000 }, { "epoch": 67.79, "learning_rate": 1.6118245368925353e-05, "loss": 1.9675, "step": 13682500 }, { "epoch": 67.79, "learning_rate": 1.611700678249927e-05, "loss": 1.9623, "step": 13683000 }, { "epoch": 67.79, "learning_rate": 1.6115768196073187e-05, "loss": 1.944, "step": 13683500 }, { "epoch": 67.8, "learning_rate": 1.6114529609647104e-05, "loss": 1.9347, "step": 13684000 }, { "epoch": 67.8, "learning_rate": 1.6113291023221018e-05, "loss": 1.9717, "step": 13684500 }, { "epoch": 67.8, "learning_rate": 1.611205491396779e-05, "loss": 1.9633, "step": 13685000 }, { "epoch": 67.8, "learning_rate": 1.6110816327541703e-05, "loss": 1.97, "step": 13685500 }, { "epoch": 67.81, "learning_rate": 1.610957774111562e-05, "loss": 1.9238, "step": 13686000 }, { "epoch": 67.81, "learning_rate": 1.6108339154689537e-05, "loss": 1.9451, "step": 13686500 }, { "epoch": 67.81, "learning_rate": 1.6107100568263454e-05, "loss": 1.9778, "step": 13687000 }, { "epoch": 67.81, "learning_rate": 1.6105864459010223e-05, "loss": 1.9546, "step": 13687500 }, { "epoch": 67.82, "learning_rate": 1.610462587258414e-05, "loss": 1.9344, "step": 13688000 }, { "epoch": 67.82, "learning_rate": 1.6103387286158053e-05, "loss": 1.9292, "step": 13688500 }, { "epoch": 67.82, "learning_rate": 1.6102151176904822e-05, "loss": 1.9267, "step": 13689000 }, { "epoch": 67.82, "learning_rate": 1.610091259047874e-05, "loss": 1.9567, "step": 13689500 }, { "epoch": 67.82, "learning_rate": 1.6099674004052656e-05, "loss": 1.9368, "step": 13690000 }, { "epoch": 67.83, "learning_rate": 1.6098435417626573e-05, "loss": 1.9731, "step": 13690500 }, { "epoch": 67.83, "learning_rate": 1.609719683120049e-05, "loss": 1.9255, "step": 13691000 }, { "epoch": 67.83, "learning_rate": 1.6095958244774404e-05, "loss": 1.9537, "step": 13691500 }, { "epoch": 67.83, "learning_rate": 1.609471965834832e-05, "loss": 1.9545, "step": 13692000 }, { "epoch": 67.84, "learning_rate": 1.6093481071922237e-05, "loss": 1.9378, "step": 13692500 }, { "epoch": 67.84, "learning_rate": 1.6092242485496154e-05, "loss": 1.9374, "step": 13693000 }, { "epoch": 67.84, "learning_rate": 1.609100389907007e-05, "loss": 1.9387, "step": 13693500 }, { "epoch": 67.84, "learning_rate": 1.6089765312643988e-05, "loss": 1.9681, "step": 13694000 }, { "epoch": 67.85, "learning_rate": 1.6088529203390754e-05, "loss": 1.9539, "step": 13694500 }, { "epoch": 67.85, "learning_rate": 1.608729061696467e-05, "loss": 1.962, "step": 13695000 }, { "epoch": 67.85, "learning_rate": 1.6086052030538587e-05, "loss": 1.9346, "step": 13695500 }, { "epoch": 67.85, "learning_rate": 1.6084813444112504e-05, "loss": 1.9498, "step": 13696000 }, { "epoch": 67.86, "learning_rate": 1.608357485768642e-05, "loss": 1.9485, "step": 13696500 }, { "epoch": 67.86, "learning_rate": 1.608233874843319e-05, "loss": 1.9627, "step": 13697000 }, { "epoch": 67.86, "learning_rate": 1.6081100162007107e-05, "loss": 1.9376, "step": 13697500 }, { "epoch": 67.86, "learning_rate": 1.607986157558102e-05, "loss": 1.9315, "step": 13698000 }, { "epoch": 67.87, "learning_rate": 1.607862546632779e-05, "loss": 1.9678, "step": 13698500 }, { "epoch": 67.87, "learning_rate": 1.6077386879901706e-05, "loss": 1.9506, "step": 13699000 }, { "epoch": 67.87, "learning_rate": 1.6076148293475623e-05, "loss": 1.9486, "step": 13699500 }, { "epoch": 67.87, "learning_rate": 1.607490970704954e-05, "loss": 1.9573, "step": 13700000 }, { "epoch": 67.88, "learning_rate": 1.6073671120623457e-05, "loss": 1.9465, "step": 13700500 }, { "epoch": 67.88, "learning_rate": 1.6072435011370223e-05, "loss": 1.9496, "step": 13701000 }, { "epoch": 67.88, "learning_rate": 1.607119642494414e-05, "loss": 1.9304, "step": 13701500 }, { "epoch": 67.88, "learning_rate": 1.6069957838518056e-05, "loss": 1.9366, "step": 13702000 }, { "epoch": 67.89, "learning_rate": 1.6068719252091973e-05, "loss": 1.9497, "step": 13702500 }, { "epoch": 67.89, "learning_rate": 1.606748066566589e-05, "loss": 1.962, "step": 13703000 }, { "epoch": 67.89, "learning_rate": 1.6066242079239807e-05, "loss": 1.9362, "step": 13703500 }, { "epoch": 67.89, "learning_rate": 1.606500349281372e-05, "loss": 1.9541, "step": 13704000 }, { "epoch": 67.9, "learning_rate": 1.606376738356049e-05, "loss": 1.9643, "step": 13704500 }, { "epoch": 67.9, "learning_rate": 1.6062531274307262e-05, "loss": 1.9299, "step": 13705000 }, { "epoch": 67.9, "learning_rate": 1.606129268788118e-05, "loss": 1.9708, "step": 13705500 }, { "epoch": 67.9, "learning_rate": 1.6060054101455092e-05, "loss": 1.925, "step": 13706000 }, { "epoch": 67.91, "learning_rate": 1.605881551502901e-05, "loss": 1.9419, "step": 13706500 }, { "epoch": 67.91, "learning_rate": 1.6057576928602923e-05, "loss": 1.9375, "step": 13707000 }, { "epoch": 67.91, "learning_rate": 1.605633834217684e-05, "loss": 1.9492, "step": 13707500 }, { "epoch": 67.91, "learning_rate": 1.6055099755750757e-05, "loss": 1.939, "step": 13708000 }, { "epoch": 67.92, "learning_rate": 1.605386364649753e-05, "loss": 1.9485, "step": 13708500 }, { "epoch": 67.92, "learning_rate": 1.6052625060071442e-05, "loss": 1.9173, "step": 13709000 }, { "epoch": 67.92, "learning_rate": 1.605138895081821e-05, "loss": 1.9449, "step": 13709500 }, { "epoch": 67.92, "learning_rate": 1.605015284156498e-05, "loss": 1.9378, "step": 13710000 }, { "epoch": 67.93, "learning_rate": 1.6048914255138897e-05, "loss": 1.9553, "step": 13710500 }, { "epoch": 67.93, "learning_rate": 1.604767566871281e-05, "loss": 1.9513, "step": 13711000 }, { "epoch": 67.93, "learning_rate": 1.6046437082286727e-05, "loss": 1.971, "step": 13711500 }, { "epoch": 67.93, "learning_rate": 1.6045198495860644e-05, "loss": 1.9126, "step": 13712000 }, { "epoch": 67.94, "learning_rate": 1.604395990943456e-05, "loss": 1.9334, "step": 13712500 }, { "epoch": 67.94, "learning_rate": 1.6042721323008478e-05, "loss": 1.9428, "step": 13713000 }, { "epoch": 67.94, "learning_rate": 1.6041482736582395e-05, "loss": 1.9502, "step": 13713500 }, { "epoch": 67.94, "learning_rate": 1.6040244150156312e-05, "loss": 1.9316, "step": 13714000 }, { "epoch": 67.95, "learning_rate": 1.603900556373023e-05, "loss": 1.9419, "step": 13714500 }, { "epoch": 67.95, "learning_rate": 1.6037766977304146e-05, "loss": 1.9209, "step": 13715000 }, { "epoch": 67.95, "learning_rate": 1.603652839087806e-05, "loss": 1.9305, "step": 13715500 }, { "epoch": 67.95, "learning_rate": 1.6035289804451976e-05, "loss": 1.9414, "step": 13716000 }, { "epoch": 67.96, "learning_rate": 1.6034051218025893e-05, "loss": 1.9713, "step": 13716500 }, { "epoch": 67.96, "learning_rate": 1.6032815108772662e-05, "loss": 1.9292, "step": 13717000 }, { "epoch": 67.96, "learning_rate": 1.603157652234658e-05, "loss": 1.9117, "step": 13717500 }, { "epoch": 67.96, "learning_rate": 1.6030337935920496e-05, "loss": 1.9454, "step": 13718000 }, { "epoch": 67.97, "learning_rate": 1.602910182666726e-05, "loss": 1.9325, "step": 13718500 }, { "epoch": 67.97, "learning_rate": 1.6027863240241178e-05, "loss": 1.9336, "step": 13719000 }, { "epoch": 67.97, "learning_rate": 1.6026624653815095e-05, "loss": 1.9592, "step": 13719500 }, { "epoch": 67.97, "learning_rate": 1.6025386067389012e-05, "loss": 1.9571, "step": 13720000 }, { "epoch": 67.98, "learning_rate": 1.602414748096293e-05, "loss": 1.9634, "step": 13720500 }, { "epoch": 67.98, "learning_rate": 1.6022908894536846e-05, "loss": 1.9276, "step": 13721000 }, { "epoch": 67.98, "learning_rate": 1.602167278528361e-05, "loss": 1.9526, "step": 13721500 }, { "epoch": 67.98, "learning_rate": 1.6020434198857528e-05, "loss": 1.9497, "step": 13722000 }, { "epoch": 67.99, "learning_rate": 1.6019195612431445e-05, "loss": 1.9566, "step": 13722500 }, { "epoch": 67.99, "learning_rate": 1.6017957026005362e-05, "loss": 1.9495, "step": 13723000 }, { "epoch": 67.99, "learning_rate": 1.601671843957928e-05, "loss": 1.9457, "step": 13723500 }, { "epoch": 67.99, "learning_rate": 1.6015479853153196e-05, "loss": 1.9309, "step": 13724000 }, { "epoch": 68.0, "learning_rate": 1.601424126672711e-05, "loss": 1.9329, "step": 13724500 }, { "epoch": 68.0, "learning_rate": 1.6013002680301026e-05, "loss": 1.9375, "step": 13725000 }, { "epoch": 68.0, "eval_accuracy": 0.6765789572900922, "eval_accuracy_mlm": 0.6364056233573344, "eval_accuracy_nsp": 0.8662647719829463, "eval_loss": 2.3042352199554443, "eval_runtime": 146.9325, "eval_samples_per_second": 1735.211, "eval_steps_per_second": 72.305, "step": 13725324 }, { "epoch": 68.0, "learning_rate": 1.6011764093874943e-05, "loss": 1.9257, "step": 13725500 }, { "epoch": 68.0, "learning_rate": 1.601052550744886e-05, "loss": 1.9236, "step": 13726000 }, { "epoch": 68.01, "learning_rate": 1.6009286921022777e-05, "loss": 1.9134, "step": 13726500 }, { "epoch": 68.01, "learning_rate": 1.6008048334596694e-05, "loss": 1.923, "step": 13727000 }, { "epoch": 68.01, "learning_rate": 1.6006809748170608e-05, "loss": 1.9587, "step": 13727500 }, { "epoch": 68.01, "learning_rate": 1.6005573638917376e-05, "loss": 1.928, "step": 13728000 }, { "epoch": 68.02, "learning_rate": 1.6004335052491293e-05, "loss": 1.9338, "step": 13728500 }, { "epoch": 68.02, "learning_rate": 1.600309646606521e-05, "loss": 1.9367, "step": 13729000 }, { "epoch": 68.02, "learning_rate": 1.6001857879639127e-05, "loss": 1.9087, "step": 13729500 }, { "epoch": 68.02, "learning_rate": 1.6000619293213044e-05, "loss": 1.9491, "step": 13730000 }, { "epoch": 68.03, "learning_rate": 1.599938070678696e-05, "loss": 1.9392, "step": 13730500 }, { "epoch": 68.03, "learning_rate": 1.5998144597533727e-05, "loss": 1.9382, "step": 13731000 }, { "epoch": 68.03, "learning_rate": 1.5996906011107643e-05, "loss": 1.9113, "step": 13731500 }, { "epoch": 68.03, "learning_rate": 1.599566742468156e-05, "loss": 1.9276, "step": 13732000 }, { "epoch": 68.04, "learning_rate": 1.5994428838255477e-05, "loss": 1.9217, "step": 13732500 }, { "epoch": 68.04, "learning_rate": 1.5993190251829394e-05, "loss": 1.9225, "step": 13733000 }, { "epoch": 68.04, "learning_rate": 1.599195166540331e-05, "loss": 1.935, "step": 13733500 }, { "epoch": 68.04, "learning_rate": 1.5990713078977225e-05, "loss": 1.9246, "step": 13734000 }, { "epoch": 68.05, "learning_rate": 1.598947449255114e-05, "loss": 1.9276, "step": 13734500 }, { "epoch": 68.05, "learning_rate": 1.598823590612506e-05, "loss": 1.9499, "step": 13735000 }, { "epoch": 68.05, "learning_rate": 1.5986999796871827e-05, "loss": 1.9377, "step": 13735500 }, { "epoch": 68.05, "learning_rate": 1.5985761210445744e-05, "loss": 1.931, "step": 13736000 }, { "epoch": 68.06, "learning_rate": 1.598452262401966e-05, "loss": 1.9344, "step": 13736500 }, { "epoch": 68.06, "learning_rate": 1.5983284037593578e-05, "loss": 1.9289, "step": 13737000 }, { "epoch": 68.06, "learning_rate": 1.5982045451167492e-05, "loss": 1.9087, "step": 13737500 }, { "epoch": 68.06, "learning_rate": 1.598080686474141e-05, "loss": 1.9133, "step": 13738000 }, { "epoch": 68.07, "learning_rate": 1.5979568278315322e-05, "loss": 1.9255, "step": 13738500 }, { "epoch": 68.07, "learning_rate": 1.597832969188924e-05, "loss": 1.9399, "step": 13739000 }, { "epoch": 68.07, "learning_rate": 1.5977091105463156e-05, "loss": 1.9203, "step": 13739500 }, { "epoch": 68.07, "learning_rate": 1.5975852519037073e-05, "loss": 1.9256, "step": 13740000 }, { "epoch": 68.08, "learning_rate": 1.5974616409783845e-05, "loss": 1.9401, "step": 13740500 }, { "epoch": 68.08, "learning_rate": 1.597338030053061e-05, "loss": 1.9193, "step": 13741000 }, { "epoch": 68.08, "learning_rate": 1.5972141714104528e-05, "loss": 1.9406, "step": 13741500 }, { "epoch": 68.08, "learning_rate": 1.5970903127678444e-05, "loss": 1.9319, "step": 13742000 }, { "epoch": 68.09, "learning_rate": 1.596966454125236e-05, "loss": 1.9293, "step": 13742500 }, { "epoch": 68.09, "learning_rate": 1.5968425954826278e-05, "loss": 1.9165, "step": 13743000 }, { "epoch": 68.09, "learning_rate": 1.5967187368400195e-05, "loss": 1.9149, "step": 13743500 }, { "epoch": 68.09, "learning_rate": 1.5965948781974112e-05, "loss": 1.94, "step": 13744000 }, { "epoch": 68.1, "learning_rate": 1.5964712672720878e-05, "loss": 1.9205, "step": 13744500 }, { "epoch": 68.1, "learning_rate": 1.5963474086294795e-05, "loss": 1.9447, "step": 13745000 }, { "epoch": 68.1, "learning_rate": 1.596223549986871e-05, "loss": 1.915, "step": 13745500 }, { "epoch": 68.1, "learning_rate": 1.596099691344263e-05, "loss": 1.9659, "step": 13746000 }, { "epoch": 68.1, "learning_rate": 1.5959760804189394e-05, "loss": 1.9338, "step": 13746500 }, { "epoch": 68.11, "learning_rate": 1.595852221776331e-05, "loss": 1.9387, "step": 13747000 }, { "epoch": 68.11, "learning_rate": 1.5957283631337228e-05, "loss": 1.9108, "step": 13747500 }, { "epoch": 68.11, "learning_rate": 1.5956047522083996e-05, "loss": 1.9436, "step": 13748000 }, { "epoch": 68.11, "learning_rate": 1.5954808935657913e-05, "loss": 1.9326, "step": 13748500 }, { "epoch": 68.12, "learning_rate": 1.595357034923183e-05, "loss": 1.9437, "step": 13749000 }, { "epoch": 68.12, "learning_rate": 1.5952331762805744e-05, "loss": 1.9515, "step": 13749500 }, { "epoch": 68.12, "learning_rate": 1.595109317637966e-05, "loss": 1.9244, "step": 13750000 }, { "epoch": 68.12, "learning_rate": 1.5949854589953578e-05, "loss": 1.9437, "step": 13750500 }, { "epoch": 68.13, "learning_rate": 1.5948616003527495e-05, "loss": 1.9237, "step": 13751000 }, { "epoch": 68.13, "learning_rate": 1.594737741710141e-05, "loss": 1.9203, "step": 13751500 }, { "epoch": 68.13, "learning_rate": 1.594613883067533e-05, "loss": 1.9323, "step": 13752000 }, { "epoch": 68.13, "learning_rate": 1.5944900244249245e-05, "loss": 1.9435, "step": 13752500 }, { "epoch": 68.14, "learning_rate": 1.5943666612168863e-05, "loss": 1.9453, "step": 13753000 }, { "epoch": 68.14, "learning_rate": 1.594242802574278e-05, "loss": 1.9214, "step": 13753500 }, { "epoch": 68.14, "learning_rate": 1.5941189439316697e-05, "loss": 1.9349, "step": 13754000 }, { "epoch": 68.14, "learning_rate": 1.5939950852890614e-05, "loss": 1.9179, "step": 13754500 }, { "epoch": 68.15, "learning_rate": 1.593871226646453e-05, "loss": 1.9265, "step": 13755000 }, { "epoch": 68.15, "learning_rate": 1.5937473680038447e-05, "loss": 1.902, "step": 13755500 }, { "epoch": 68.15, "learning_rate": 1.593623509361236e-05, "loss": 1.9428, "step": 13756000 }, { "epoch": 68.15, "learning_rate": 1.5934996507186278e-05, "loss": 1.9458, "step": 13756500 }, { "epoch": 68.16, "learning_rate": 1.5933757920760195e-05, "loss": 1.924, "step": 13757000 }, { "epoch": 68.16, "learning_rate": 1.593251933433411e-05, "loss": 1.9307, "step": 13757500 }, { "epoch": 68.16, "learning_rate": 1.593128322508088e-05, "loss": 1.931, "step": 13758000 }, { "epoch": 68.16, "learning_rate": 1.593004711582765e-05, "loss": 1.935, "step": 13758500 }, { "epoch": 68.17, "learning_rate": 1.5928808529401566e-05, "loss": 1.9279, "step": 13759000 }, { "epoch": 68.17, "learning_rate": 1.5927572420148335e-05, "loss": 1.9268, "step": 13759500 }, { "epoch": 68.17, "learning_rate": 1.5926333833722252e-05, "loss": 1.9388, "step": 13760000 }, { "epoch": 68.17, "learning_rate": 1.592509524729617e-05, "loss": 1.9574, "step": 13760500 }, { "epoch": 68.18, "learning_rate": 1.5923856660870082e-05, "loss": 1.9247, "step": 13761000 }, { "epoch": 68.18, "learning_rate": 1.5922618074444e-05, "loss": 1.9306, "step": 13761500 }, { "epoch": 68.18, "learning_rate": 1.5921379488017916e-05, "loss": 1.9305, "step": 13762000 }, { "epoch": 68.18, "learning_rate": 1.5920140901591833e-05, "loss": 1.8979, "step": 13762500 }, { "epoch": 68.19, "learning_rate": 1.591890231516575e-05, "loss": 1.9461, "step": 13763000 }, { "epoch": 68.19, "learning_rate": 1.5917663728739664e-05, "loss": 1.8983, "step": 13763500 }, { "epoch": 68.19, "learning_rate": 1.591642514231358e-05, "loss": 1.9564, "step": 13764000 }, { "epoch": 68.19, "learning_rate": 1.5915186555887498e-05, "loss": 1.9257, "step": 13764500 }, { "epoch": 68.2, "learning_rate": 1.5913947969461414e-05, "loss": 1.9291, "step": 13765000 }, { "epoch": 68.2, "learning_rate": 1.5912711860208183e-05, "loss": 1.9367, "step": 13765500 }, { "epoch": 68.2, "learning_rate": 1.59114732737821e-05, "loss": 1.8946, "step": 13766000 }, { "epoch": 68.2, "learning_rate": 1.591023716452887e-05, "loss": 1.9498, "step": 13766500 }, { "epoch": 68.21, "learning_rate": 1.5908998578102783e-05, "loss": 1.9457, "step": 13767000 }, { "epoch": 68.21, "learning_rate": 1.59077599916767e-05, "loss": 1.9445, "step": 13767500 }, { "epoch": 68.21, "learning_rate": 1.5906521405250616e-05, "loss": 1.9434, "step": 13768000 }, { "epoch": 68.21, "learning_rate": 1.5905282818824533e-05, "loss": 1.937, "step": 13768500 }, { "epoch": 68.22, "learning_rate": 1.590404423239845e-05, "loss": 1.9073, "step": 13769000 }, { "epoch": 68.22, "learning_rate": 1.5902805645972367e-05, "loss": 1.9459, "step": 13769500 }, { "epoch": 68.22, "learning_rate": 1.590156705954628e-05, "loss": 1.9504, "step": 13770000 }, { "epoch": 68.22, "learning_rate": 1.5900328473120198e-05, "loss": 1.9203, "step": 13770500 }, { "epoch": 68.23, "learning_rate": 1.5899089886694115e-05, "loss": 1.9373, "step": 13771000 }, { "epoch": 68.23, "learning_rate": 1.5897851300268028e-05, "loss": 1.9414, "step": 13771500 }, { "epoch": 68.23, "learning_rate": 1.58966151910148e-05, "loss": 1.9316, "step": 13772000 }, { "epoch": 68.23, "learning_rate": 1.5895376604588717e-05, "loss": 1.9339, "step": 13772500 }, { "epoch": 68.24, "learning_rate": 1.5894138018162634e-05, "loss": 1.9487, "step": 13773000 }, { "epoch": 68.24, "learning_rate": 1.5892899431736548e-05, "loss": 1.9376, "step": 13773500 }, { "epoch": 68.24, "learning_rate": 1.5891660845310465e-05, "loss": 1.93, "step": 13774000 }, { "epoch": 68.24, "learning_rate": 1.5890422258884378e-05, "loss": 1.9406, "step": 13774500 }, { "epoch": 68.25, "learning_rate": 1.5889183672458295e-05, "loss": 1.9118, "step": 13775000 }, { "epoch": 68.25, "learning_rate": 1.5887945086032212e-05, "loss": 1.9338, "step": 13775500 }, { "epoch": 68.25, "learning_rate": 1.588670649960613e-05, "loss": 1.9525, "step": 13776000 }, { "epoch": 68.25, "learning_rate": 1.5885467913180046e-05, "loss": 1.9337, "step": 13776500 }, { "epoch": 68.26, "learning_rate": 1.5884229326753963e-05, "loss": 1.9378, "step": 13777000 }, { "epoch": 68.26, "learning_rate": 1.588299074032788e-05, "loss": 1.9307, "step": 13777500 }, { "epoch": 68.26, "learning_rate": 1.5881754631074645e-05, "loss": 1.9394, "step": 13778000 }, { "epoch": 68.26, "learning_rate": 1.5880516044648562e-05, "loss": 1.9376, "step": 13778500 }, { "epoch": 68.27, "learning_rate": 1.587927745822248e-05, "loss": 1.9264, "step": 13779000 }, { "epoch": 68.27, "learning_rate": 1.5878038871796396e-05, "loss": 1.9457, "step": 13779500 }, { "epoch": 68.27, "learning_rate": 1.5876802762543165e-05, "loss": 1.9148, "step": 13780000 }, { "epoch": 68.27, "learning_rate": 1.5875564176117082e-05, "loss": 1.9377, "step": 13780500 }, { "epoch": 68.28, "learning_rate": 1.5874325589691e-05, "loss": 1.9713, "step": 13781000 }, { "epoch": 68.28, "learning_rate": 1.5873089480437767e-05, "loss": 1.9562, "step": 13781500 }, { "epoch": 68.28, "learning_rate": 1.5871850894011684e-05, "loss": 1.9576, "step": 13782000 }, { "epoch": 68.28, "learning_rate": 1.58706123075856e-05, "loss": 1.939, "step": 13782500 }, { "epoch": 68.29, "learning_rate": 1.5869376198332367e-05, "loss": 1.93, "step": 13783000 }, { "epoch": 68.29, "learning_rate": 1.5868137611906284e-05, "loss": 1.943, "step": 13783500 }, { "epoch": 68.29, "learning_rate": 1.58668990254802e-05, "loss": 1.9473, "step": 13784000 }, { "epoch": 68.29, "learning_rate": 1.586566291622697e-05, "loss": 1.9234, "step": 13784500 }, { "epoch": 68.3, "learning_rate": 1.5864424329800886e-05, "loss": 1.9115, "step": 13785000 }, { "epoch": 68.3, "learning_rate": 1.5863185743374803e-05, "loss": 1.9151, "step": 13785500 }, { "epoch": 68.3, "learning_rate": 1.5861947156948717e-05, "loss": 1.9356, "step": 13786000 }, { "epoch": 68.3, "learning_rate": 1.5860708570522634e-05, "loss": 1.9468, "step": 13786500 }, { "epoch": 68.31, "learning_rate": 1.585946998409655e-05, "loss": 1.9346, "step": 13787000 }, { "epoch": 68.31, "learning_rate": 1.5858231397670468e-05, "loss": 1.9395, "step": 13787500 }, { "epoch": 68.31, "learning_rate": 1.5856992811244385e-05, "loss": 1.9375, "step": 13788000 }, { "epoch": 68.31, "learning_rate": 1.58557542248183e-05, "loss": 1.9379, "step": 13788500 }, { "epoch": 68.32, "learning_rate": 1.585451563839222e-05, "loss": 1.9477, "step": 13789000 }, { "epoch": 68.32, "learning_rate": 1.5853277051966135e-05, "loss": 1.9319, "step": 13789500 }, { "epoch": 68.32, "learning_rate": 1.58520409427129e-05, "loss": 1.939, "step": 13790000 }, { "epoch": 68.32, "learning_rate": 1.5850802356286818e-05, "loss": 1.9339, "step": 13790500 }, { "epoch": 68.33, "learning_rate": 1.5849563769860735e-05, "loss": 1.9461, "step": 13791000 }, { "epoch": 68.33, "learning_rate": 1.584832518343465e-05, "loss": 1.9152, "step": 13791500 }, { "epoch": 68.33, "learning_rate": 1.584708659700857e-05, "loss": 1.9433, "step": 13792000 }, { "epoch": 68.33, "learning_rate": 1.5845848010582485e-05, "loss": 1.937, "step": 13792500 }, { "epoch": 68.34, "learning_rate": 1.5844609424156402e-05, "loss": 1.9372, "step": 13793000 }, { "epoch": 68.34, "learning_rate": 1.5843370837730316e-05, "loss": 1.9493, "step": 13793500 }, { "epoch": 68.34, "learning_rate": 1.5842132251304233e-05, "loss": 1.9591, "step": 13794000 }, { "epoch": 68.34, "learning_rate": 1.5840896142051e-05, "loss": 1.95, "step": 13794500 }, { "epoch": 68.35, "learning_rate": 1.583965755562492e-05, "loss": 1.9387, "step": 13795000 }, { "epoch": 68.35, "learning_rate": 1.5838418969198835e-05, "loss": 1.9362, "step": 13795500 }, { "epoch": 68.35, "learning_rate": 1.5837180382772752e-05, "loss": 1.9372, "step": 13796000 }, { "epoch": 68.35, "learning_rate": 1.5835941796346666e-05, "loss": 1.919, "step": 13796500 }, { "epoch": 68.36, "learning_rate": 1.5834705687093435e-05, "loss": 1.9574, "step": 13797000 }, { "epoch": 68.36, "learning_rate": 1.583346710066735e-05, "loss": 1.9544, "step": 13797500 }, { "epoch": 68.36, "learning_rate": 1.583222851424127e-05, "loss": 1.9291, "step": 13798000 }, { "epoch": 68.36, "learning_rate": 1.5830989927815185e-05, "loss": 1.9309, "step": 13798500 }, { "epoch": 68.37, "learning_rate": 1.5829751341389102e-05, "loss": 1.9433, "step": 13799000 }, { "epoch": 68.37, "learning_rate": 1.5828512754963016e-05, "loss": 1.9301, "step": 13799500 }, { "epoch": 68.37, "learning_rate": 1.5827274168536933e-05, "loss": 1.9417, "step": 13800000 }, { "epoch": 68.37, "learning_rate": 1.582603558211085e-05, "loss": 1.9311, "step": 13800500 }, { "epoch": 68.37, "learning_rate": 1.5824796995684767e-05, "loss": 1.9514, "step": 13801000 }, { "epoch": 68.38, "learning_rate": 1.5823560886431536e-05, "loss": 1.9362, "step": 13801500 }, { "epoch": 68.38, "learning_rate": 1.5822322300005452e-05, "loss": 1.936, "step": 13802000 }, { "epoch": 68.38, "learning_rate": 1.582108371357937e-05, "loss": 1.936, "step": 13802500 }, { "epoch": 68.38, "learning_rate": 1.5819845127153283e-05, "loss": 1.9235, "step": 13803000 }, { "epoch": 68.39, "learning_rate": 1.58186065407272e-05, "loss": 1.9508, "step": 13803500 }, { "epoch": 68.39, "learning_rate": 1.5817367954301117e-05, "loss": 1.9016, "step": 13804000 }, { "epoch": 68.39, "learning_rate": 1.5816131845047886e-05, "loss": 1.9421, "step": 13804500 }, { "epoch": 68.39, "learning_rate": 1.5814893258621803e-05, "loss": 1.9148, "step": 13805000 }, { "epoch": 68.4, "learning_rate": 1.581365467219572e-05, "loss": 1.9384, "step": 13805500 }, { "epoch": 68.4, "learning_rate": 1.5812416085769633e-05, "loss": 1.897, "step": 13806000 }, { "epoch": 68.4, "learning_rate": 1.581117749934355e-05, "loss": 1.915, "step": 13806500 }, { "epoch": 68.4, "learning_rate": 1.5809938912917467e-05, "loss": 1.9269, "step": 13807000 }, { "epoch": 68.41, "learning_rate": 1.5808700326491384e-05, "loss": 1.9349, "step": 13807500 }, { "epoch": 68.41, "learning_rate": 1.58074617400653e-05, "loss": 1.9384, "step": 13808000 }, { "epoch": 68.41, "learning_rate": 1.5806223153639214e-05, "loss": 1.9105, "step": 13808500 }, { "epoch": 68.41, "learning_rate": 1.5804987044385983e-05, "loss": 1.9433, "step": 13809000 }, { "epoch": 68.42, "learning_rate": 1.58037484579599e-05, "loss": 1.9659, "step": 13809500 }, { "epoch": 68.42, "learning_rate": 1.5802509871533817e-05, "loss": 1.9167, "step": 13810000 }, { "epoch": 68.42, "learning_rate": 1.5801271285107734e-05, "loss": 1.9159, "step": 13810500 }, { "epoch": 68.42, "learning_rate": 1.5800035175854503e-05, "loss": 1.9379, "step": 13811000 }, { "epoch": 68.43, "learning_rate": 1.579879658942842e-05, "loss": 1.9205, "step": 13811500 }, { "epoch": 68.43, "learning_rate": 1.5797558003002333e-05, "loss": 1.9335, "step": 13812000 }, { "epoch": 68.43, "learning_rate": 1.579631941657625e-05, "loss": 1.9608, "step": 13812500 }, { "epoch": 68.43, "learning_rate": 1.5795080830150167e-05, "loss": 1.9417, "step": 13813000 }, { "epoch": 68.44, "learning_rate": 1.5793844720896936e-05, "loss": 1.9464, "step": 13813500 }, { "epoch": 68.44, "learning_rate": 1.5792606134470853e-05, "loss": 1.9615, "step": 13814000 }, { "epoch": 68.44, "learning_rate": 1.5791372502390473e-05, "loss": 1.9444, "step": 13814500 }, { "epoch": 68.44, "learning_rate": 1.579013391596439e-05, "loss": 1.9189, "step": 13815000 }, { "epoch": 68.45, "learning_rate": 1.5788895329538307e-05, "loss": 1.9434, "step": 13815500 }, { "epoch": 68.45, "learning_rate": 1.578765674311222e-05, "loss": 1.9347, "step": 13816000 }, { "epoch": 68.45, "learning_rate": 1.5786418156686138e-05, "loss": 1.9453, "step": 13816500 }, { "epoch": 68.45, "learning_rate": 1.5785182047432907e-05, "loss": 1.9395, "step": 13817000 }, { "epoch": 68.46, "learning_rate": 1.5783945938179675e-05, "loss": 1.9422, "step": 13817500 }, { "epoch": 68.46, "learning_rate": 1.5782707351753592e-05, "loss": 1.9527, "step": 13818000 }, { "epoch": 68.46, "learning_rate": 1.578146876532751e-05, "loss": 1.9215, "step": 13818500 }, { "epoch": 68.46, "learning_rate": 1.5780230178901423e-05, "loss": 1.9302, "step": 13819000 }, { "epoch": 68.47, "learning_rate": 1.577899159247534e-05, "loss": 1.9246, "step": 13819500 }, { "epoch": 68.47, "learning_rate": 1.5777753006049257e-05, "loss": 1.9224, "step": 13820000 }, { "epoch": 68.47, "learning_rate": 1.5776514419623174e-05, "loss": 1.9311, "step": 13820500 }, { "epoch": 68.47, "learning_rate": 1.577527583319709e-05, "loss": 1.9231, "step": 13821000 }, { "epoch": 68.48, "learning_rate": 1.5774037246771007e-05, "loss": 1.9415, "step": 13821500 }, { "epoch": 68.48, "learning_rate": 1.5772798660344924e-05, "loss": 1.9483, "step": 13822000 }, { "epoch": 68.48, "learning_rate": 1.577156007391884e-05, "loss": 1.9264, "step": 13822500 }, { "epoch": 68.48, "learning_rate": 1.5770321487492755e-05, "loss": 1.9288, "step": 13823000 }, { "epoch": 68.49, "learning_rate": 1.5769082901066672e-05, "loss": 1.9271, "step": 13823500 }, { "epoch": 68.49, "learning_rate": 1.5767844314640585e-05, "loss": 1.9427, "step": 13824000 }, { "epoch": 68.49, "learning_rate": 1.5766608205387357e-05, "loss": 1.947, "step": 13824500 }, { "epoch": 68.49, "learning_rate": 1.5765369618961274e-05, "loss": 1.9451, "step": 13825000 }, { "epoch": 68.5, "learning_rate": 1.576413103253519e-05, "loss": 1.9033, "step": 13825500 }, { "epoch": 68.5, "learning_rate": 1.5762892446109105e-05, "loss": 1.9732, "step": 13826000 }, { "epoch": 68.5, "learning_rate": 1.5761653859683022e-05, "loss": 1.9286, "step": 13826500 }, { "epoch": 68.5, "learning_rate": 1.576041527325694e-05, "loss": 1.9479, "step": 13827000 }, { "epoch": 68.51, "learning_rate": 1.5759179164003708e-05, "loss": 1.9354, "step": 13827500 }, { "epoch": 68.51, "learning_rate": 1.5757940577577624e-05, "loss": 1.948, "step": 13828000 }, { "epoch": 68.51, "learning_rate": 1.575670199115154e-05, "loss": 1.9583, "step": 13828500 }, { "epoch": 68.51, "learning_rate": 1.5755463404725458e-05, "loss": 1.9486, "step": 13829000 }, { "epoch": 68.52, "learning_rate": 1.5754224818299372e-05, "loss": 1.9259, "step": 13829500 }, { "epoch": 68.52, "learning_rate": 1.575298870904614e-05, "loss": 1.9196, "step": 13830000 }, { "epoch": 68.52, "learning_rate": 1.5751750122620058e-05, "loss": 1.9261, "step": 13830500 }, { "epoch": 68.52, "learning_rate": 1.5750511536193975e-05, "loss": 1.9595, "step": 13831000 }, { "epoch": 68.53, "learning_rate": 1.574927294976789e-05, "loss": 1.9136, "step": 13831500 }, { "epoch": 68.53, "learning_rate": 1.574803436334181e-05, "loss": 1.921, "step": 13832000 }, { "epoch": 68.53, "learning_rate": 1.5746795776915725e-05, "loss": 1.9546, "step": 13832500 }, { "epoch": 68.53, "learning_rate": 1.574555719048964e-05, "loss": 1.9535, "step": 13833000 }, { "epoch": 68.54, "learning_rate": 1.5744318604063556e-05, "loss": 1.9251, "step": 13833500 }, { "epoch": 68.54, "learning_rate": 1.574308001763747e-05, "loss": 1.9447, "step": 13834000 }, { "epoch": 68.54, "learning_rate": 1.574184390838424e-05, "loss": 1.9404, "step": 13834500 }, { "epoch": 68.54, "learning_rate": 1.5740607799131007e-05, "loss": 1.952, "step": 13835000 }, { "epoch": 68.55, "learning_rate": 1.5739369212704924e-05, "loss": 1.9296, "step": 13835500 }, { "epoch": 68.55, "learning_rate": 1.573813062627884e-05, "loss": 1.9302, "step": 13836000 }, { "epoch": 68.55, "learning_rate": 1.5736892039852758e-05, "loss": 1.9424, "step": 13836500 }, { "epoch": 68.55, "learning_rate": 1.5735653453426675e-05, "loss": 1.9422, "step": 13837000 }, { "epoch": 68.56, "learning_rate": 1.573441486700059e-05, "loss": 1.9413, "step": 13837500 }, { "epoch": 68.56, "learning_rate": 1.5733178757747357e-05, "loss": 1.9189, "step": 13838000 }, { "epoch": 68.56, "learning_rate": 1.5731940171321274e-05, "loss": 1.9503, "step": 13838500 }, { "epoch": 68.56, "learning_rate": 1.573070158489519e-05, "loss": 1.9587, "step": 13839000 }, { "epoch": 68.57, "learning_rate": 1.5729462998469108e-05, "loss": 1.9323, "step": 13839500 }, { "epoch": 68.57, "learning_rate": 1.5728224412043025e-05, "loss": 1.9459, "step": 13840000 }, { "epoch": 68.57, "learning_rate": 1.572698582561694e-05, "loss": 1.9274, "step": 13840500 }, { "epoch": 68.57, "learning_rate": 1.572574723919086e-05, "loss": 1.9192, "step": 13841000 }, { "epoch": 68.58, "learning_rate": 1.5724511129937624e-05, "loss": 1.9412, "step": 13841500 }, { "epoch": 68.58, "learning_rate": 1.572327254351154e-05, "loss": 1.9469, "step": 13842000 }, { "epoch": 68.58, "learning_rate": 1.5722033957085458e-05, "loss": 1.9552, "step": 13842500 }, { "epoch": 68.58, "learning_rate": 1.5720795370659375e-05, "loss": 1.9327, "step": 13843000 }, { "epoch": 68.59, "learning_rate": 1.5719559261406144e-05, "loss": 1.9525, "step": 13843500 }, { "epoch": 68.59, "learning_rate": 1.5718320674980057e-05, "loss": 1.9258, "step": 13844000 }, { "epoch": 68.59, "learning_rate": 1.5717082088553974e-05, "loss": 1.9363, "step": 13844500 }, { "epoch": 68.59, "learning_rate": 1.571584350212789e-05, "loss": 1.9236, "step": 13845000 }, { "epoch": 68.6, "learning_rate": 1.5714604915701808e-05, "loss": 1.9435, "step": 13845500 }, { "epoch": 68.6, "learning_rate": 1.5713366329275725e-05, "loss": 1.912, "step": 13846000 }, { "epoch": 68.6, "learning_rate": 1.5712127742849642e-05, "loss": 1.9194, "step": 13846500 }, { "epoch": 68.6, "learning_rate": 1.571088915642356e-05, "loss": 1.9542, "step": 13847000 }, { "epoch": 68.61, "learning_rate": 1.5709653047170324e-05, "loss": 1.9326, "step": 13847500 }, { "epoch": 68.61, "learning_rate": 1.570841446074424e-05, "loss": 1.9602, "step": 13848000 }, { "epoch": 68.61, "learning_rate": 1.5707175874318158e-05, "loss": 1.9206, "step": 13848500 }, { "epoch": 68.61, "learning_rate": 1.5705937287892075e-05, "loss": 1.9331, "step": 13849000 }, { "epoch": 68.62, "learning_rate": 1.5704698701465992e-05, "loss": 1.9286, "step": 13849500 }, { "epoch": 68.62, "learning_rate": 1.570346259221276e-05, "loss": 1.9404, "step": 13850000 }, { "epoch": 68.62, "learning_rate": 1.5702224005786674e-05, "loss": 1.9394, "step": 13850500 }, { "epoch": 68.62, "learning_rate": 1.570098541936059e-05, "loss": 1.9313, "step": 13851000 }, { "epoch": 68.63, "learning_rate": 1.5699746832934508e-05, "loss": 1.9529, "step": 13851500 }, { "epoch": 68.63, "learning_rate": 1.5698508246508425e-05, "loss": 1.9436, "step": 13852000 }, { "epoch": 68.63, "learning_rate": 1.5697272137255194e-05, "loss": 1.9256, "step": 13852500 }, { "epoch": 68.63, "learning_rate": 1.569603355082911e-05, "loss": 1.9472, "step": 13853000 }, { "epoch": 68.64, "learning_rate": 1.5694794964403024e-05, "loss": 1.966, "step": 13853500 }, { "epoch": 68.64, "learning_rate": 1.569355637797694e-05, "loss": 1.9529, "step": 13854000 }, { "epoch": 68.64, "learning_rate": 1.5692320268723713e-05, "loss": 1.9282, "step": 13854500 }, { "epoch": 68.64, "learning_rate": 1.569108168229763e-05, "loss": 1.9337, "step": 13855000 }, { "epoch": 68.64, "learning_rate": 1.5689843095871544e-05, "loss": 1.9623, "step": 13855500 }, { "epoch": 68.65, "learning_rate": 1.568860450944546e-05, "loss": 1.9272, "step": 13856000 }, { "epoch": 68.65, "learning_rate": 1.5687365923019374e-05, "loss": 1.9325, "step": 13856500 }, { "epoch": 68.65, "learning_rate": 1.568612733659329e-05, "loss": 1.9489, "step": 13857000 }, { "epoch": 68.65, "learning_rate": 1.5684888750167208e-05, "loss": 1.9231, "step": 13857500 }, { "epoch": 68.66, "learning_rate": 1.5683650163741125e-05, "loss": 1.9476, "step": 13858000 }, { "epoch": 68.66, "learning_rate": 1.5682411577315042e-05, "loss": 1.9202, "step": 13858500 }, { "epoch": 68.66, "learning_rate": 1.568117299088896e-05, "loss": 1.9193, "step": 13859000 }, { "epoch": 68.66, "learning_rate": 1.5679934404462876e-05, "loss": 1.9352, "step": 13859500 }, { "epoch": 68.67, "learning_rate": 1.567869829520964e-05, "loss": 1.9159, "step": 13860000 }, { "epoch": 68.67, "learning_rate": 1.5677459708783558e-05, "loss": 1.9337, "step": 13860500 }, { "epoch": 68.67, "learning_rate": 1.5676221122357475e-05, "loss": 1.9365, "step": 13861000 }, { "epoch": 68.67, "learning_rate": 1.5674985013104247e-05, "loss": 1.9399, "step": 13861500 }, { "epoch": 68.68, "learning_rate": 1.567374642667816e-05, "loss": 1.967, "step": 13862000 }, { "epoch": 68.68, "learning_rate": 1.5672507840252078e-05, "loss": 1.9369, "step": 13862500 }, { "epoch": 68.68, "learning_rate": 1.5671269253825995e-05, "loss": 1.9117, "step": 13863000 }, { "epoch": 68.68, "learning_rate": 1.5670033144572764e-05, "loss": 1.9055, "step": 13863500 }, { "epoch": 68.69, "learning_rate": 1.566879455814668e-05, "loss": 1.9505, "step": 13864000 }, { "epoch": 68.69, "learning_rate": 1.5667555971720597e-05, "loss": 1.9538, "step": 13864500 }, { "epoch": 68.69, "learning_rate": 1.5666317385294514e-05, "loss": 1.9033, "step": 13865000 }, { "epoch": 68.69, "learning_rate": 1.5665078798868428e-05, "loss": 1.9276, "step": 13865500 }, { "epoch": 68.7, "learning_rate": 1.5663842689615197e-05, "loss": 1.9374, "step": 13866000 }, { "epoch": 68.7, "learning_rate": 1.5662604103189114e-05, "loss": 1.9412, "step": 13866500 }, { "epoch": 68.7, "learning_rate": 1.566136551676303e-05, "loss": 1.9438, "step": 13867000 }, { "epoch": 68.7, "learning_rate": 1.5660126930336947e-05, "loss": 1.9174, "step": 13867500 }, { "epoch": 68.71, "learning_rate": 1.5658888343910864e-05, "loss": 1.9401, "step": 13868000 }, { "epoch": 68.71, "learning_rate": 1.565764975748478e-05, "loss": 1.9349, "step": 13868500 }, { "epoch": 68.71, "learning_rate": 1.5656411171058695e-05, "loss": 1.9407, "step": 13869000 }, { "epoch": 68.71, "learning_rate": 1.5655172584632612e-05, "loss": 1.9407, "step": 13869500 }, { "epoch": 68.72, "learning_rate": 1.5653933998206525e-05, "loss": 1.9168, "step": 13870000 }, { "epoch": 68.72, "learning_rate": 1.5652695411780442e-05, "loss": 1.9234, "step": 13870500 }, { "epoch": 68.72, "learning_rate": 1.565145682535436e-05, "loss": 1.9844, "step": 13871000 }, { "epoch": 68.72, "learning_rate": 1.5650218238928276e-05, "loss": 1.9412, "step": 13871500 }, { "epoch": 68.73, "learning_rate": 1.5648982129675045e-05, "loss": 1.9241, "step": 13872000 }, { "epoch": 68.73, "learning_rate": 1.5647746020421814e-05, "loss": 1.9449, "step": 13872500 }, { "epoch": 68.73, "learning_rate": 1.564650743399573e-05, "loss": 1.9556, "step": 13873000 }, { "epoch": 68.73, "learning_rate": 1.5645268847569648e-05, "loss": 1.956, "step": 13873500 }, { "epoch": 68.74, "learning_rate": 1.5644030261143564e-05, "loss": 1.9297, "step": 13874000 }, { "epoch": 68.74, "learning_rate": 1.564279415189033e-05, "loss": 1.9261, "step": 13874500 }, { "epoch": 68.74, "learning_rate": 1.5641555565464247e-05, "loss": 1.9427, "step": 13875000 }, { "epoch": 68.74, "learning_rate": 1.5640319456211016e-05, "loss": 1.9494, "step": 13875500 }, { "epoch": 68.75, "learning_rate": 1.5639080869784933e-05, "loss": 1.929, "step": 13876000 }, { "epoch": 68.75, "learning_rate": 1.563784228335885e-05, "loss": 1.9508, "step": 13876500 }, { "epoch": 68.75, "learning_rate": 1.5636603696932763e-05, "loss": 1.9418, "step": 13877000 }, { "epoch": 68.75, "learning_rate": 1.563536511050668e-05, "loss": 1.9178, "step": 13877500 }, { "epoch": 68.76, "learning_rate": 1.5634126524080597e-05, "loss": 1.9533, "step": 13878000 }, { "epoch": 68.76, "learning_rate": 1.5632887937654514e-05, "loss": 1.9414, "step": 13878500 }, { "epoch": 68.76, "learning_rate": 1.563164935122843e-05, "loss": 1.9576, "step": 13879000 }, { "epoch": 68.76, "learning_rate": 1.56304132419752e-05, "loss": 1.9442, "step": 13879500 }, { "epoch": 68.77, "learning_rate": 1.5629174655549117e-05, "loss": 1.9338, "step": 13880000 }, { "epoch": 68.77, "learning_rate": 1.562793606912303e-05, "loss": 1.942, "step": 13880500 }, { "epoch": 68.77, "learning_rate": 1.56266999598698e-05, "loss": 1.9121, "step": 13881000 }, { "epoch": 68.77, "learning_rate": 1.5625461373443716e-05, "loss": 1.9401, "step": 13881500 }, { "epoch": 68.78, "learning_rate": 1.5624222787017633e-05, "loss": 1.9424, "step": 13882000 }, { "epoch": 68.78, "learning_rate": 1.56229866777644e-05, "loss": 1.9443, "step": 13882500 }, { "epoch": 68.78, "learning_rate": 1.562174809133832e-05, "loss": 1.9504, "step": 13883000 }, { "epoch": 68.78, "learning_rate": 1.5620509504912235e-05, "loss": 1.9368, "step": 13883500 }, { "epoch": 68.79, "learning_rate": 1.5619270918486152e-05, "loss": 1.944, "step": 13884000 }, { "epoch": 68.79, "learning_rate": 1.561803480923292e-05, "loss": 1.9297, "step": 13884500 }, { "epoch": 68.79, "learning_rate": 1.5616796222806838e-05, "loss": 1.939, "step": 13885000 }, { "epoch": 68.79, "learning_rate": 1.561555763638075e-05, "loss": 1.8975, "step": 13885500 }, { "epoch": 68.8, "learning_rate": 1.561431904995467e-05, "loss": 1.953, "step": 13886000 }, { "epoch": 68.8, "learning_rate": 1.5613080463528585e-05, "loss": 1.9346, "step": 13886500 }, { "epoch": 68.8, "learning_rate": 1.5611841877102502e-05, "loss": 1.9672, "step": 13887000 }, { "epoch": 68.8, "learning_rate": 1.561060329067642e-05, "loss": 1.9648, "step": 13887500 }, { "epoch": 68.81, "learning_rate": 1.5609364704250333e-05, "loss": 1.9318, "step": 13888000 }, { "epoch": 68.81, "learning_rate": 1.56081285949971e-05, "loss": 1.9377, "step": 13888500 }, { "epoch": 68.81, "learning_rate": 1.560689000857102e-05, "loss": 1.9252, "step": 13889000 }, { "epoch": 68.81, "learning_rate": 1.5605651422144935e-05, "loss": 1.9232, "step": 13889500 }, { "epoch": 68.82, "learning_rate": 1.5604412835718852e-05, "loss": 1.9517, "step": 13890000 }, { "epoch": 68.82, "learning_rate": 1.560317424929277e-05, "loss": 1.9317, "step": 13890500 }, { "epoch": 68.82, "learning_rate": 1.5601935662866686e-05, "loss": 1.9393, "step": 13891000 }, { "epoch": 68.82, "learning_rate": 1.56006970764406e-05, "loss": 1.9395, "step": 13891500 }, { "epoch": 68.83, "learning_rate": 1.5599458490014517e-05, "loss": 1.9264, "step": 13892000 }, { "epoch": 68.83, "learning_rate": 1.5598219903588434e-05, "loss": 1.9411, "step": 13892500 }, { "epoch": 68.83, "learning_rate": 1.5596981317162347e-05, "loss": 1.9357, "step": 13893000 }, { "epoch": 68.83, "learning_rate": 1.5595742730736264e-05, "loss": 1.9629, "step": 13893500 }, { "epoch": 68.84, "learning_rate": 1.559450414431018e-05, "loss": 1.9541, "step": 13894000 }, { "epoch": 68.84, "learning_rate": 1.5593265557884098e-05, "loss": 1.9577, "step": 13894500 }, { "epoch": 68.84, "learning_rate": 1.5592026971458015e-05, "loss": 1.9339, "step": 13895000 }, { "epoch": 68.84, "learning_rate": 1.5590788385031932e-05, "loss": 1.9266, "step": 13895500 }, { "epoch": 68.85, "learning_rate": 1.558954979860585e-05, "loss": 1.942, "step": 13896000 }, { "epoch": 68.85, "learning_rate": 1.5588311212179766e-05, "loss": 1.9402, "step": 13896500 }, { "epoch": 68.85, "learning_rate": 1.558707510292653e-05, "loss": 1.9283, "step": 13897000 }, { "epoch": 68.85, "learning_rate": 1.5585841470846155e-05, "loss": 1.9448, "step": 13897500 }, { "epoch": 68.86, "learning_rate": 1.558460288442007e-05, "loss": 1.9357, "step": 13898000 }, { "epoch": 68.86, "learning_rate": 1.5583364297993986e-05, "loss": 1.9448, "step": 13898500 }, { "epoch": 68.86, "learning_rate": 1.5582125711567903e-05, "loss": 1.9644, "step": 13899000 }, { "epoch": 68.86, "learning_rate": 1.558088712514182e-05, "loss": 1.9374, "step": 13899500 }, { "epoch": 68.87, "learning_rate": 1.557965101588859e-05, "loss": 1.9529, "step": 13900000 }, { "epoch": 68.87, "learning_rate": 1.5578412429462505e-05, "loss": 1.938, "step": 13900500 }, { "epoch": 68.87, "learning_rate": 1.557717384303642e-05, "loss": 1.9675, "step": 13901000 }, { "epoch": 68.87, "learning_rate": 1.5575935256610336e-05, "loss": 1.9503, "step": 13901500 }, { "epoch": 68.88, "learning_rate": 1.5574699147357105e-05, "loss": 1.9323, "step": 13902000 }, { "epoch": 68.88, "learning_rate": 1.557346056093102e-05, "loss": 1.9452, "step": 13902500 }, { "epoch": 68.88, "learning_rate": 1.557222197450494e-05, "loss": 1.9333, "step": 13903000 }, { "epoch": 68.88, "learning_rate": 1.5570983388078855e-05, "loss": 1.95, "step": 13903500 }, { "epoch": 68.89, "learning_rate": 1.556974480165277e-05, "loss": 1.9376, "step": 13904000 }, { "epoch": 68.89, "learning_rate": 1.5568506215226686e-05, "loss": 1.9527, "step": 13904500 }, { "epoch": 68.89, "learning_rate": 1.5567267628800603e-05, "loss": 1.9637, "step": 13905000 }, { "epoch": 68.89, "learning_rate": 1.556602904237452e-05, "loss": 1.9408, "step": 13905500 }, { "epoch": 68.9, "learning_rate": 1.5564790455948437e-05, "loss": 1.9377, "step": 13906000 }, { "epoch": 68.9, "learning_rate": 1.5563551869522354e-05, "loss": 1.9408, "step": 13906500 }, { "epoch": 68.9, "learning_rate": 1.556231328309627e-05, "loss": 1.9239, "step": 13907000 }, { "epoch": 68.9, "learning_rate": 1.5561077173843036e-05, "loss": 1.9522, "step": 13907500 }, { "epoch": 68.91, "learning_rate": 1.5559838587416953e-05, "loss": 1.9485, "step": 13908000 }, { "epoch": 68.91, "learning_rate": 1.555860000099087e-05, "loss": 1.9267, "step": 13908500 }, { "epoch": 68.91, "learning_rate": 1.5557361414564787e-05, "loss": 1.924, "step": 13909000 }, { "epoch": 68.91, "learning_rate": 1.5556125305311555e-05, "loss": 1.9136, "step": 13909500 }, { "epoch": 68.91, "learning_rate": 1.5554886718885472e-05, "loss": 1.9466, "step": 13910000 }, { "epoch": 68.92, "learning_rate": 1.5553650609632238e-05, "loss": 1.9283, "step": 13910500 }, { "epoch": 68.92, "learning_rate": 1.5552412023206155e-05, "loss": 1.9565, "step": 13911000 }, { "epoch": 68.92, "learning_rate": 1.555117343678007e-05, "loss": 1.9436, "step": 13911500 }, { "epoch": 68.92, "learning_rate": 1.5549937327526844e-05, "loss": 1.9408, "step": 13912000 }, { "epoch": 68.93, "learning_rate": 1.5548698741100757e-05, "loss": 1.8924, "step": 13912500 }, { "epoch": 68.93, "learning_rate": 1.5547460154674674e-05, "loss": 1.9468, "step": 13913000 }, { "epoch": 68.93, "learning_rate": 1.5546224045421443e-05, "loss": 1.947, "step": 13913500 }, { "epoch": 68.93, "learning_rate": 1.554498545899536e-05, "loss": 1.9351, "step": 13914000 }, { "epoch": 68.94, "learning_rate": 1.5543746872569277e-05, "loss": 1.9566, "step": 13914500 }, { "epoch": 68.94, "learning_rate": 1.5542508286143194e-05, "loss": 1.9562, "step": 13915000 }, { "epoch": 68.94, "learning_rate": 1.5541269699717107e-05, "loss": 1.9371, "step": 13915500 }, { "epoch": 68.94, "learning_rate": 1.5540031113291024e-05, "loss": 1.9267, "step": 13916000 }, { "epoch": 68.95, "learning_rate": 1.553879252686494e-05, "loss": 1.9463, "step": 13916500 }, { "epoch": 68.95, "learning_rate": 1.5537553940438855e-05, "loss": 1.952, "step": 13917000 }, { "epoch": 68.95, "learning_rate": 1.5536315354012772e-05, "loss": 1.9358, "step": 13917500 }, { "epoch": 68.95, "learning_rate": 1.553507676758669e-05, "loss": 1.9469, "step": 13918000 }, { "epoch": 68.96, "learning_rate": 1.5533838181160606e-05, "loss": 1.9259, "step": 13918500 }, { "epoch": 68.96, "learning_rate": 1.5532599594734523e-05, "loss": 1.9106, "step": 13919000 }, { "epoch": 68.96, "learning_rate": 1.553136100830844e-05, "loss": 1.9357, "step": 13919500 }, { "epoch": 68.96, "learning_rate": 1.5530122421882353e-05, "loss": 1.9531, "step": 13920000 }, { "epoch": 68.97, "learning_rate": 1.552888383545627e-05, "loss": 1.9228, "step": 13920500 }, { "epoch": 68.97, "learning_rate": 1.552764772620304e-05, "loss": 1.9356, "step": 13921000 }, { "epoch": 68.97, "learning_rate": 1.5526409139776956e-05, "loss": 1.9373, "step": 13921500 }, { "epoch": 68.97, "learning_rate": 1.5525173030523725e-05, "loss": 1.9505, "step": 13922000 }, { "epoch": 68.98, "learning_rate": 1.552393444409764e-05, "loss": 1.9624, "step": 13922500 }, { "epoch": 68.98, "learning_rate": 1.552269585767156e-05, "loss": 1.939, "step": 13923000 }, { "epoch": 68.98, "learning_rate": 1.5521457271245475e-05, "loss": 1.9103, "step": 13923500 }, { "epoch": 68.98, "learning_rate": 1.552021868481939e-05, "loss": 1.9427, "step": 13924000 }, { "epoch": 68.99, "learning_rate": 1.5518980098393306e-05, "loss": 1.9529, "step": 13924500 }, { "epoch": 68.99, "learning_rate": 1.5517741511967223e-05, "loss": 1.9712, "step": 13925000 }, { "epoch": 68.99, "learning_rate": 1.551650292554114e-05, "loss": 1.9312, "step": 13925500 }, { "epoch": 68.99, "learning_rate": 1.5515264339115053e-05, "loss": 1.935, "step": 13926000 }, { "epoch": 69.0, "learning_rate": 1.551402575268897e-05, "loss": 1.962, "step": 13926500 }, { "epoch": 69.0, "learning_rate": 1.5512787166262887e-05, "loss": 1.9352, "step": 13927000 }, { "epoch": 69.0, "eval_accuracy": 0.6763926275182169, "eval_accuracy_mlm": 0.6363743656320076, "eval_accuracy_nsp": 0.8652410779772434, "eval_loss": 2.3032867908477783, "eval_runtime": 146.9546, "eval_samples_per_second": 1734.951, "eval_steps_per_second": 72.294, "step": 13927167 }, { "epoch": 69.0, "learning_rate": 1.5511548579836804e-05, "loss": 1.9236, "step": 13927500 }, { "epoch": 69.0, "learning_rate": 1.551030999341072e-05, "loss": 1.936, "step": 13928000 }, { "epoch": 69.01, "learning_rate": 1.550907388415749e-05, "loss": 1.9136, "step": 13928500 }, { "epoch": 69.01, "learning_rate": 1.5507835297731403e-05, "loss": 1.9086, "step": 13929000 }, { "epoch": 69.01, "learning_rate": 1.550659671130532e-05, "loss": 1.9244, "step": 13929500 }, { "epoch": 69.01, "learning_rate": 1.5505358124879237e-05, "loss": 1.8997, "step": 13930000 }, { "epoch": 69.02, "learning_rate": 1.5504119538453154e-05, "loss": 1.9238, "step": 13930500 }, { "epoch": 69.02, "learning_rate": 1.5502883429199923e-05, "loss": 1.9198, "step": 13931000 }, { "epoch": 69.02, "learning_rate": 1.550164484277384e-05, "loss": 1.9185, "step": 13931500 }, { "epoch": 69.02, "learning_rate": 1.5500406256347757e-05, "loss": 1.9226, "step": 13932000 }, { "epoch": 69.03, "learning_rate": 1.549916766992167e-05, "loss": 1.9301, "step": 13932500 }, { "epoch": 69.03, "learning_rate": 1.5497931560668442e-05, "loss": 1.9073, "step": 13933000 }, { "epoch": 69.03, "learning_rate": 1.549669297424236e-05, "loss": 1.9099, "step": 13933500 }, { "epoch": 69.03, "learning_rate": 1.5495454387816273e-05, "loss": 1.8793, "step": 13934000 }, { "epoch": 69.04, "learning_rate": 1.549421580139019e-05, "loss": 1.9357, "step": 13934500 }, { "epoch": 69.04, "learning_rate": 1.5492977214964107e-05, "loss": 1.9428, "step": 13935000 }, { "epoch": 69.04, "learning_rate": 1.549173862853802e-05, "loss": 1.9508, "step": 13935500 }, { "epoch": 69.04, "learning_rate": 1.5490500042111937e-05, "loss": 1.9208, "step": 13936000 }, { "epoch": 69.05, "learning_rate": 1.5489261455685854e-05, "loss": 1.9168, "step": 13936500 }, { "epoch": 69.05, "learning_rate": 1.548802286925977e-05, "loss": 1.9077, "step": 13937000 }, { "epoch": 69.05, "learning_rate": 1.5486784282833688e-05, "loss": 1.9244, "step": 13937500 }, { "epoch": 69.05, "learning_rate": 1.5485545696407605e-05, "loss": 1.9208, "step": 13938000 }, { "epoch": 69.06, "learning_rate": 1.5484307109981522e-05, "loss": 1.9166, "step": 13938500 }, { "epoch": 69.06, "learning_rate": 1.548306852355544e-05, "loss": 1.9332, "step": 13939000 }, { "epoch": 69.06, "learning_rate": 1.5481832414302204e-05, "loss": 1.8993, "step": 13939500 }, { "epoch": 69.06, "learning_rate": 1.548059382787612e-05, "loss": 1.915, "step": 13940000 }, { "epoch": 69.07, "learning_rate": 1.5479355241450038e-05, "loss": 1.9388, "step": 13940500 }, { "epoch": 69.07, "learning_rate": 1.5478116655023955e-05, "loss": 1.9511, "step": 13941000 }, { "epoch": 69.07, "learning_rate": 1.5476880545770724e-05, "loss": 1.9334, "step": 13941500 }, { "epoch": 69.07, "learning_rate": 1.5475641959344637e-05, "loss": 1.9158, "step": 13942000 }, { "epoch": 69.08, "learning_rate": 1.5474403372918554e-05, "loss": 1.9325, "step": 13942500 }, { "epoch": 69.08, "learning_rate": 1.547316478649247e-05, "loss": 1.8984, "step": 13943000 }, { "epoch": 69.08, "learning_rate": 1.5471928677239243e-05, "loss": 1.9282, "step": 13943500 }, { "epoch": 69.08, "learning_rate": 1.5470690090813157e-05, "loss": 1.9179, "step": 13944000 }, { "epoch": 69.09, "learning_rate": 1.5469451504387074e-05, "loss": 1.9283, "step": 13944500 }, { "epoch": 69.09, "learning_rate": 1.546821291796099e-05, "loss": 1.9293, "step": 13945000 }, { "epoch": 69.09, "learning_rate": 1.546697680870776e-05, "loss": 1.914, "step": 13945500 }, { "epoch": 69.09, "learning_rate": 1.5465738222281677e-05, "loss": 1.9109, "step": 13946000 }, { "epoch": 69.1, "learning_rate": 1.5464499635855593e-05, "loss": 1.9127, "step": 13946500 }, { "epoch": 69.1, "learning_rate": 1.546326352660236e-05, "loss": 1.8958, "step": 13947000 }, { "epoch": 69.1, "learning_rate": 1.5462024940176276e-05, "loss": 1.9127, "step": 13947500 }, { "epoch": 69.1, "learning_rate": 1.5460786353750193e-05, "loss": 1.9285, "step": 13948000 }, { "epoch": 69.11, "learning_rate": 1.545954776732411e-05, "loss": 1.9217, "step": 13948500 }, { "epoch": 69.11, "learning_rate": 1.5458309180898027e-05, "loss": 1.962, "step": 13949000 }, { "epoch": 69.11, "learning_rate": 1.5457070594471944e-05, "loss": 1.9578, "step": 13949500 }, { "epoch": 69.11, "learning_rate": 1.545583200804586e-05, "loss": 1.9214, "step": 13950000 }, { "epoch": 69.12, "learning_rate": 1.5454593421619777e-05, "loss": 1.909, "step": 13950500 }, { "epoch": 69.12, "learning_rate": 1.5453357312366543e-05, "loss": 1.9214, "step": 13951000 }, { "epoch": 69.12, "learning_rate": 1.545211872594046e-05, "loss": 1.9015, "step": 13951500 }, { "epoch": 69.12, "learning_rate": 1.5450880139514377e-05, "loss": 1.9277, "step": 13952000 }, { "epoch": 69.13, "learning_rate": 1.5449641553088294e-05, "loss": 1.9221, "step": 13952500 }, { "epoch": 69.13, "learning_rate": 1.544840296666221e-05, "loss": 1.9544, "step": 13953000 }, { "epoch": 69.13, "learning_rate": 1.5447164380236127e-05, "loss": 1.9257, "step": 13953500 }, { "epoch": 69.13, "learning_rate": 1.544592579381004e-05, "loss": 1.9377, "step": 13954000 }, { "epoch": 69.14, "learning_rate": 1.544468968455681e-05, "loss": 1.9109, "step": 13954500 }, { "epoch": 69.14, "learning_rate": 1.5443451098130727e-05, "loss": 1.9297, "step": 13955000 }, { "epoch": 69.14, "learning_rate": 1.5442214988877496e-05, "loss": 1.9163, "step": 13955500 }, { "epoch": 69.14, "learning_rate": 1.544097640245141e-05, "loss": 1.9177, "step": 13956000 }, { "epoch": 69.15, "learning_rate": 1.5439737816025326e-05, "loss": 1.9373, "step": 13956500 }, { "epoch": 69.15, "learning_rate": 1.5438499229599243e-05, "loss": 1.9337, "step": 13957000 }, { "epoch": 69.15, "learning_rate": 1.543726064317316e-05, "loss": 1.9318, "step": 13957500 }, { "epoch": 69.15, "learning_rate": 1.5436022056747077e-05, "loss": 1.9303, "step": 13958000 }, { "epoch": 69.16, "learning_rate": 1.5434783470320994e-05, "loss": 1.9435, "step": 13958500 }, { "epoch": 69.16, "learning_rate": 1.543354488389491e-05, "loss": 1.9347, "step": 13959000 }, { "epoch": 69.16, "learning_rate": 1.5432306297468828e-05, "loss": 1.936, "step": 13959500 }, { "epoch": 69.16, "learning_rate": 1.5431067711042744e-05, "loss": 1.903, "step": 13960000 }, { "epoch": 69.17, "learning_rate": 1.5429829124616658e-05, "loss": 1.9201, "step": 13960500 }, { "epoch": 69.17, "learning_rate": 1.5428590538190575e-05, "loss": 1.9131, "step": 13961000 }, { "epoch": 69.17, "learning_rate": 1.5427354428937344e-05, "loss": 1.9158, "step": 13961500 }, { "epoch": 69.17, "learning_rate": 1.542611584251126e-05, "loss": 1.9287, "step": 13962000 }, { "epoch": 69.18, "learning_rate": 1.5424877256085178e-05, "loss": 1.9373, "step": 13962500 }, { "epoch": 69.18, "learning_rate": 1.5423638669659095e-05, "loss": 1.9338, "step": 13963000 }, { "epoch": 69.18, "learning_rate": 1.5422400083233008e-05, "loss": 1.9286, "step": 13963500 }, { "epoch": 69.18, "learning_rate": 1.5421163973979777e-05, "loss": 1.8918, "step": 13964000 }, { "epoch": 69.18, "learning_rate": 1.5419925387553694e-05, "loss": 1.9371, "step": 13964500 }, { "epoch": 69.19, "learning_rate": 1.541868680112761e-05, "loss": 1.9252, "step": 13965000 }, { "epoch": 69.19, "learning_rate": 1.5417448214701528e-05, "loss": 1.9096, "step": 13965500 }, { "epoch": 69.19, "learning_rate": 1.5416209628275445e-05, "loss": 1.9229, "step": 13966000 }, { "epoch": 69.19, "learning_rate": 1.5414971041849358e-05, "loss": 1.9043, "step": 13966500 }, { "epoch": 69.2, "learning_rate": 1.5413732455423275e-05, "loss": 1.9391, "step": 13967000 }, { "epoch": 69.2, "learning_rate": 1.5412493868997192e-05, "loss": 1.9199, "step": 13967500 }, { "epoch": 69.2, "learning_rate": 1.541125775974396e-05, "loss": 1.9292, "step": 13968000 }, { "epoch": 69.2, "learning_rate": 1.5410021650490726e-05, "loss": 1.9397, "step": 13968500 }, { "epoch": 69.21, "learning_rate": 1.5408783064064643e-05, "loss": 1.9335, "step": 13969000 }, { "epoch": 69.21, "learning_rate": 1.540754447763856e-05, "loss": 1.9024, "step": 13969500 }, { "epoch": 69.21, "learning_rate": 1.5406310845558184e-05, "loss": 1.935, "step": 13970000 }, { "epoch": 69.21, "learning_rate": 1.5405072259132098e-05, "loss": 1.9428, "step": 13970500 }, { "epoch": 69.22, "learning_rate": 1.5403833672706015e-05, "loss": 1.9439, "step": 13971000 }, { "epoch": 69.22, "learning_rate": 1.540259508627993e-05, "loss": 1.9321, "step": 13971500 }, { "epoch": 69.22, "learning_rate": 1.540135649985385e-05, "loss": 1.9047, "step": 13972000 }, { "epoch": 69.22, "learning_rate": 1.5400117913427765e-05, "loss": 1.9178, "step": 13972500 }, { "epoch": 69.23, "learning_rate": 1.5398879327001682e-05, "loss": 1.9156, "step": 13973000 }, { "epoch": 69.23, "learning_rate": 1.5397640740575596e-05, "loss": 1.9173, "step": 13973500 }, { "epoch": 69.23, "learning_rate": 1.5396402154149513e-05, "loss": 1.9635, "step": 13974000 }, { "epoch": 69.23, "learning_rate": 1.539516356772343e-05, "loss": 1.9418, "step": 13974500 }, { "epoch": 69.24, "learning_rate": 1.53939274584702e-05, "loss": 1.9364, "step": 13975000 }, { "epoch": 69.24, "learning_rate": 1.5392688872044115e-05, "loss": 1.9253, "step": 13975500 }, { "epoch": 69.24, "learning_rate": 1.5391450285618032e-05, "loss": 1.9427, "step": 13976000 }, { "epoch": 69.24, "learning_rate": 1.5390211699191946e-05, "loss": 1.9059, "step": 13976500 }, { "epoch": 69.25, "learning_rate": 1.5388973112765863e-05, "loss": 1.9177, "step": 13977000 }, { "epoch": 69.25, "learning_rate": 1.538773452633978e-05, "loss": 1.9421, "step": 13977500 }, { "epoch": 69.25, "learning_rate": 1.5386495939913693e-05, "loss": 1.9345, "step": 13978000 }, { "epoch": 69.25, "learning_rate": 1.538525735348761e-05, "loss": 1.9274, "step": 13978500 }, { "epoch": 69.26, "learning_rate": 1.5384018767061527e-05, "loss": 1.9267, "step": 13979000 }, { "epoch": 69.26, "learning_rate": 1.538278513498115e-05, "loss": 1.9295, "step": 13979500 }, { "epoch": 69.26, "learning_rate": 1.5381546548555065e-05, "loss": 1.9315, "step": 13980000 }, { "epoch": 69.26, "learning_rate": 1.5380307962128982e-05, "loss": 1.9032, "step": 13980500 }, { "epoch": 69.27, "learning_rate": 1.53790693757029e-05, "loss": 1.9415, "step": 13981000 }, { "epoch": 69.27, "learning_rate": 1.5377830789276816e-05, "loss": 1.9211, "step": 13981500 }, { "epoch": 69.27, "learning_rate": 1.5376594680023584e-05, "loss": 1.9123, "step": 13982000 }, { "epoch": 69.27, "learning_rate": 1.53753560935975e-05, "loss": 1.9016, "step": 13982500 }, { "epoch": 69.28, "learning_rate": 1.5374117507171415e-05, "loss": 1.9475, "step": 13983000 }, { "epoch": 69.28, "learning_rate": 1.5372878920745332e-05, "loss": 1.9419, "step": 13983500 }, { "epoch": 69.28, "learning_rate": 1.53716428114921e-05, "loss": 1.944, "step": 13984000 }, { "epoch": 69.28, "learning_rate": 1.5370404225066018e-05, "loss": 1.9273, "step": 13984500 }, { "epoch": 69.29, "learning_rate": 1.5369165638639934e-05, "loss": 1.9331, "step": 13985000 }, { "epoch": 69.29, "learning_rate": 1.536792705221385e-05, "loss": 1.9338, "step": 13985500 }, { "epoch": 69.29, "learning_rate": 1.5366688465787765e-05, "loss": 1.9327, "step": 13986000 }, { "epoch": 69.29, "learning_rate": 1.5365449879361682e-05, "loss": 1.9283, "step": 13986500 }, { "epoch": 69.3, "learning_rate": 1.53642112929356e-05, "loss": 1.9328, "step": 13987000 }, { "epoch": 69.3, "learning_rate": 1.5362972706509516e-05, "loss": 1.9284, "step": 13987500 }, { "epoch": 69.3, "learning_rate": 1.5361734120083433e-05, "loss": 1.9138, "step": 13988000 }, { "epoch": 69.3, "learning_rate": 1.536049553365735e-05, "loss": 1.9341, "step": 13988500 }, { "epoch": 69.31, "learning_rate": 1.5359256947231267e-05, "loss": 1.9128, "step": 13989000 }, { "epoch": 69.31, "learning_rate": 1.5358018360805183e-05, "loss": 1.9366, "step": 13989500 }, { "epoch": 69.31, "learning_rate": 1.535678225155195e-05, "loss": 1.9161, "step": 13990000 }, { "epoch": 69.31, "learning_rate": 1.5355546142298718e-05, "loss": 1.9249, "step": 13990500 }, { "epoch": 69.32, "learning_rate": 1.5354307555872635e-05, "loss": 1.9481, "step": 13991000 }, { "epoch": 69.32, "learning_rate": 1.535306896944655e-05, "loss": 1.9269, "step": 13991500 }, { "epoch": 69.32, "learning_rate": 1.535183286019332e-05, "loss": 1.9329, "step": 13992000 }, { "epoch": 69.32, "learning_rate": 1.535059675094009e-05, "loss": 1.9418, "step": 13992500 }, { "epoch": 69.33, "learning_rate": 1.5349358164514006e-05, "loss": 1.9338, "step": 13993000 }, { "epoch": 69.33, "learning_rate": 1.5348119578087923e-05, "loss": 1.9302, "step": 13993500 }, { "epoch": 69.33, "learning_rate": 1.5346880991661837e-05, "loss": 1.9659, "step": 13994000 }, { "epoch": 69.33, "learning_rate": 1.5345642405235753e-05, "loss": 1.947, "step": 13994500 }, { "epoch": 69.34, "learning_rate": 1.534440381880967e-05, "loss": 1.9133, "step": 13995000 }, { "epoch": 69.34, "learning_rate": 1.5343165232383584e-05, "loss": 1.9249, "step": 13995500 }, { "epoch": 69.34, "learning_rate": 1.53419266459575e-05, "loss": 1.9627, "step": 13996000 }, { "epoch": 69.34, "learning_rate": 1.5340688059531418e-05, "loss": 1.9135, "step": 13996500 }, { "epoch": 69.35, "learning_rate": 1.5339449473105335e-05, "loss": 1.9193, "step": 13997000 }, { "epoch": 69.35, "learning_rate": 1.533821088667925e-05, "loss": 1.9481, "step": 13997500 }, { "epoch": 69.35, "learning_rate": 1.533697230025317e-05, "loss": 1.9136, "step": 13998000 }, { "epoch": 69.35, "learning_rate": 1.5335733713827082e-05, "loss": 1.9057, "step": 13998500 }, { "epoch": 69.36, "learning_rate": 1.5334495127401e-05, "loss": 1.9356, "step": 13999000 }, { "epoch": 69.36, "learning_rate": 1.5333256540974916e-05, "loss": 1.932, "step": 13999500 }, { "epoch": 69.36, "learning_rate": 1.5332020431721685e-05, "loss": 1.9355, "step": 14000000 }, { "epoch": 69.36, "learning_rate": 1.5330784322468454e-05, "loss": 1.9214, "step": 14000500 }, { "epoch": 69.37, "learning_rate": 1.532954573604237e-05, "loss": 1.9473, "step": 14001000 }, { "epoch": 69.37, "learning_rate": 1.5328307149616287e-05, "loss": 1.9431, "step": 14001500 }, { "epoch": 69.37, "learning_rate": 1.5327068563190204e-05, "loss": 1.9142, "step": 14002000 }, { "epoch": 69.37, "learning_rate": 1.5325829976764118e-05, "loss": 1.9223, "step": 14002500 }, { "epoch": 69.38, "learning_rate": 1.5324591390338035e-05, "loss": 1.9313, "step": 14003000 }, { "epoch": 69.38, "learning_rate": 1.5323352803911952e-05, "loss": 1.901, "step": 14003500 }, { "epoch": 69.38, "learning_rate": 1.532211421748587e-05, "loss": 1.9366, "step": 14004000 }, { "epoch": 69.38, "learning_rate": 1.5320878108232638e-05, "loss": 1.9304, "step": 14004500 }, { "epoch": 69.39, "learning_rate": 1.5319639521806554e-05, "loss": 1.9005, "step": 14005000 }, { "epoch": 69.39, "learning_rate": 1.531840093538047e-05, "loss": 1.9058, "step": 14005500 }, { "epoch": 69.39, "learning_rate": 1.5317162348954385e-05, "loss": 1.9328, "step": 14006000 }, { "epoch": 69.39, "learning_rate": 1.5315923762528302e-05, "loss": 1.9223, "step": 14006500 }, { "epoch": 69.4, "learning_rate": 1.531468517610222e-05, "loss": 1.9334, "step": 14007000 }, { "epoch": 69.4, "learning_rate": 1.5313446589676136e-05, "loss": 1.9183, "step": 14007500 }, { "epoch": 69.4, "learning_rate": 1.5312210480422905e-05, "loss": 1.9483, "step": 14008000 }, { "epoch": 69.4, "learning_rate": 1.531097189399682e-05, "loss": 1.9477, "step": 14008500 }, { "epoch": 69.41, "learning_rate": 1.5309733307570735e-05, "loss": 1.896, "step": 14009000 }, { "epoch": 69.41, "learning_rate": 1.5308494721144652e-05, "loss": 1.9379, "step": 14009500 }, { "epoch": 69.41, "learning_rate": 1.530725613471857e-05, "loss": 1.9393, "step": 14010000 }, { "epoch": 69.41, "learning_rate": 1.5306017548292486e-05, "loss": 1.9141, "step": 14010500 }, { "epoch": 69.42, "learning_rate": 1.53047789618664e-05, "loss": 1.9056, "step": 14011000 }, { "epoch": 69.42, "learning_rate": 1.5303540375440316e-05, "loss": 1.91, "step": 14011500 }, { "epoch": 69.42, "learning_rate": 1.5302301789014233e-05, "loss": 1.9065, "step": 14012000 }, { "epoch": 69.42, "learning_rate": 1.530106320258815e-05, "loss": 1.9285, "step": 14012500 }, { "epoch": 69.43, "learning_rate": 1.5299824616162067e-05, "loss": 1.9248, "step": 14013000 }, { "epoch": 69.43, "learning_rate": 1.5298588506908836e-05, "loss": 1.9304, "step": 14013500 }, { "epoch": 69.43, "learning_rate": 1.529734992048275e-05, "loss": 1.9309, "step": 14014000 }, { "epoch": 69.43, "learning_rate": 1.5296111334056666e-05, "loss": 1.9258, "step": 14014500 }, { "epoch": 69.44, "learning_rate": 1.5294872747630583e-05, "loss": 1.9269, "step": 14015000 }, { "epoch": 69.44, "learning_rate": 1.52936341612045e-05, "loss": 1.9375, "step": 14015500 }, { "epoch": 69.44, "learning_rate": 1.5292395574778417e-05, "loss": 1.9113, "step": 14016000 }, { "epoch": 69.44, "learning_rate": 1.5291159465525186e-05, "loss": 1.9321, "step": 14016500 }, { "epoch": 69.45, "learning_rate": 1.5289920879099103e-05, "loss": 1.9019, "step": 14017000 }, { "epoch": 69.45, "learning_rate": 1.528868476984587e-05, "loss": 1.9308, "step": 14017500 }, { "epoch": 69.45, "learning_rate": 1.528744618341979e-05, "loss": 1.9278, "step": 14018000 }, { "epoch": 69.45, "learning_rate": 1.5286207596993705e-05, "loss": 1.9554, "step": 14018500 }, { "epoch": 69.45, "learning_rate": 1.5284969010567622e-05, "loss": 1.9398, "step": 14019000 }, { "epoch": 69.46, "learning_rate": 1.5283730424141536e-05, "loss": 1.9443, "step": 14019500 }, { "epoch": 69.46, "learning_rate": 1.5282494314888305e-05, "loss": 1.9308, "step": 14020000 }, { "epoch": 69.46, "learning_rate": 1.528125572846222e-05, "loss": 1.9475, "step": 14020500 }, { "epoch": 69.46, "learning_rate": 1.528001714203614e-05, "loss": 1.9122, "step": 14021000 }, { "epoch": 69.47, "learning_rate": 1.5278778555610056e-05, "loss": 1.9453, "step": 14021500 }, { "epoch": 69.47, "learning_rate": 1.5277539969183972e-05, "loss": 1.9081, "step": 14022000 }, { "epoch": 69.47, "learning_rate": 1.5276301382757886e-05, "loss": 1.9194, "step": 14022500 }, { "epoch": 69.47, "learning_rate": 1.5275062796331803e-05, "loss": 1.9127, "step": 14023000 }, { "epoch": 69.48, "learning_rate": 1.527382420990572e-05, "loss": 1.9315, "step": 14023500 }, { "epoch": 69.48, "learning_rate": 1.5272585623479633e-05, "loss": 1.954, "step": 14024000 }, { "epoch": 69.48, "learning_rate": 1.527134703705355e-05, "loss": 1.917, "step": 14024500 }, { "epoch": 69.48, "learning_rate": 1.5270108450627467e-05, "loss": 1.9277, "step": 14025000 }, { "epoch": 69.49, "learning_rate": 1.5268869864201384e-05, "loss": 1.9252, "step": 14025500 }, { "epoch": 69.49, "learning_rate": 1.52676312777753e-05, "loss": 1.9367, "step": 14026000 }, { "epoch": 69.49, "learning_rate": 1.526639516852207e-05, "loss": 1.9317, "step": 14026500 }, { "epoch": 69.49, "learning_rate": 1.5265156582095987e-05, "loss": 1.9141, "step": 14027000 }, { "epoch": 69.5, "learning_rate": 1.5263920472842756e-05, "loss": 1.9517, "step": 14027500 }, { "epoch": 69.5, "learning_rate": 1.5262681886416673e-05, "loss": 1.9298, "step": 14028000 }, { "epoch": 69.5, "learning_rate": 1.526144329999059e-05, "loss": 1.936, "step": 14028500 }, { "epoch": 69.5, "learning_rate": 1.5260204713564506e-05, "loss": 1.9136, "step": 14029000 }, { "epoch": 69.51, "learning_rate": 1.525896612713842e-05, "loss": 1.9444, "step": 14029500 }, { "epoch": 69.51, "learning_rate": 1.5257727540712335e-05, "loss": 1.9337, "step": 14030000 }, { "epoch": 69.51, "learning_rate": 1.5256488954286252e-05, "loss": 1.9338, "step": 14030500 }, { "epoch": 69.51, "learning_rate": 1.5255252845033021e-05, "loss": 1.9129, "step": 14031000 }, { "epoch": 69.52, "learning_rate": 1.5254014258606938e-05, "loss": 1.9431, "step": 14031500 }, { "epoch": 69.52, "learning_rate": 1.5252775672180855e-05, "loss": 1.9239, "step": 14032000 }, { "epoch": 69.52, "learning_rate": 1.5251539562927622e-05, "loss": 1.9425, "step": 14032500 }, { "epoch": 69.52, "learning_rate": 1.5250300976501539e-05, "loss": 1.9216, "step": 14033000 }, { "epoch": 69.53, "learning_rate": 1.5249062390075456e-05, "loss": 1.9357, "step": 14033500 }, { "epoch": 69.53, "learning_rate": 1.5247823803649373e-05, "loss": 1.9609, "step": 14034000 }, { "epoch": 69.53, "learning_rate": 1.5246585217223288e-05, "loss": 1.9441, "step": 14034500 }, { "epoch": 69.53, "learning_rate": 1.5245346630797205e-05, "loss": 1.9381, "step": 14035000 }, { "epoch": 69.54, "learning_rate": 1.5244108044371122e-05, "loss": 1.9327, "step": 14035500 }, { "epoch": 69.54, "learning_rate": 1.5242869457945035e-05, "loss": 1.9177, "step": 14036000 }, { "epoch": 69.54, "learning_rate": 1.5241630871518952e-05, "loss": 1.9249, "step": 14036500 }, { "epoch": 69.54, "learning_rate": 1.5240394762265723e-05, "loss": 1.9486, "step": 14037000 }, { "epoch": 69.55, "learning_rate": 1.523915617583964e-05, "loss": 1.939, "step": 14037500 }, { "epoch": 69.55, "learning_rate": 1.5237917589413555e-05, "loss": 1.9318, "step": 14038000 }, { "epoch": 69.55, "learning_rate": 1.5236679002987472e-05, "loss": 1.9427, "step": 14038500 }, { "epoch": 69.55, "learning_rate": 1.5235440416561385e-05, "loss": 1.9206, "step": 14039000 }, { "epoch": 69.56, "learning_rate": 1.5234201830135302e-05, "loss": 1.937, "step": 14039500 }, { "epoch": 69.56, "learning_rate": 1.523296324370922e-05, "loss": 1.9231, "step": 14040000 }, { "epoch": 69.56, "learning_rate": 1.5231729611628842e-05, "loss": 1.9199, "step": 14040500 }, { "epoch": 69.56, "learning_rate": 1.5230491025202757e-05, "loss": 1.9329, "step": 14041000 }, { "epoch": 69.57, "learning_rate": 1.5229252438776672e-05, "loss": 1.9259, "step": 14041500 }, { "epoch": 69.57, "learning_rate": 1.5228013852350589e-05, "loss": 1.9243, "step": 14042000 }, { "epoch": 69.57, "learning_rate": 1.5226775265924506e-05, "loss": 1.9507, "step": 14042500 }, { "epoch": 69.57, "learning_rate": 1.5225536679498423e-05, "loss": 1.9493, "step": 14043000 }, { "epoch": 69.58, "learning_rate": 1.522429809307234e-05, "loss": 1.9629, "step": 14043500 }, { "epoch": 69.58, "learning_rate": 1.5223059506646257e-05, "loss": 1.9335, "step": 14044000 }, { "epoch": 69.58, "learning_rate": 1.5221820920220172e-05, "loss": 1.9018, "step": 14044500 }, { "epoch": 69.58, "learning_rate": 1.5220582333794089e-05, "loss": 1.9191, "step": 14045000 }, { "epoch": 69.59, "learning_rate": 1.5219346224540856e-05, "loss": 1.9426, "step": 14045500 }, { "epoch": 69.59, "learning_rate": 1.5218110115287627e-05, "loss": 1.9358, "step": 14046000 }, { "epoch": 69.59, "learning_rate": 1.5216871528861543e-05, "loss": 1.9442, "step": 14046500 }, { "epoch": 69.59, "learning_rate": 1.5215632942435459e-05, "loss": 1.9408, "step": 14047000 }, { "epoch": 69.6, "learning_rate": 1.5214394356009374e-05, "loss": 1.938, "step": 14047500 }, { "epoch": 69.6, "learning_rate": 1.5213158246756143e-05, "loss": 1.9439, "step": 14048000 }, { "epoch": 69.6, "learning_rate": 1.521191966033006e-05, "loss": 1.9355, "step": 14048500 }, { "epoch": 69.6, "learning_rate": 1.5210681073903977e-05, "loss": 1.9345, "step": 14049000 }, { "epoch": 69.61, "learning_rate": 1.5209442487477894e-05, "loss": 1.93, "step": 14049500 }, { "epoch": 69.61, "learning_rate": 1.5208206378224659e-05, "loss": 1.9237, "step": 14050000 }, { "epoch": 69.61, "learning_rate": 1.5206967791798576e-05, "loss": 1.925, "step": 14050500 }, { "epoch": 69.61, "learning_rate": 1.5205729205372493e-05, "loss": 1.9218, "step": 14051000 }, { "epoch": 69.62, "learning_rate": 1.520449061894641e-05, "loss": 1.9608, "step": 14051500 }, { "epoch": 69.62, "learning_rate": 1.5203252032520327e-05, "loss": 1.9209, "step": 14052000 }, { "epoch": 69.62, "learning_rate": 1.5202013446094244e-05, "loss": 1.9373, "step": 14052500 }, { "epoch": 69.62, "learning_rate": 1.520077485966816e-05, "loss": 1.9433, "step": 14053000 }, { "epoch": 69.63, "learning_rate": 1.5199538750414926e-05, "loss": 1.9504, "step": 14053500 }, { "epoch": 69.63, "learning_rate": 1.5198300163988843e-05, "loss": 1.9371, "step": 14054000 }, { "epoch": 69.63, "learning_rate": 1.519706157756276e-05, "loss": 1.9203, "step": 14054500 }, { "epoch": 69.63, "learning_rate": 1.5195822991136677e-05, "loss": 1.9413, "step": 14055000 }, { "epoch": 69.64, "learning_rate": 1.5194584404710594e-05, "loss": 1.8996, "step": 14055500 }, { "epoch": 69.64, "learning_rate": 1.519334581828451e-05, "loss": 1.9279, "step": 14056000 }, { "epoch": 69.64, "learning_rate": 1.5192107231858424e-05, "loss": 1.9221, "step": 14056500 }, { "epoch": 69.64, "learning_rate": 1.5190871122605193e-05, "loss": 1.9079, "step": 14057000 }, { "epoch": 69.65, "learning_rate": 1.518963253617911e-05, "loss": 1.9058, "step": 14057500 }, { "epoch": 69.65, "learning_rate": 1.5188393949753027e-05, "loss": 1.9286, "step": 14058000 }, { "epoch": 69.65, "learning_rate": 1.5187155363326944e-05, "loss": 1.9206, "step": 14058500 }, { "epoch": 69.65, "learning_rate": 1.518591677690086e-05, "loss": 1.9393, "step": 14059000 }, { "epoch": 69.66, "learning_rate": 1.5184678190474778e-05, "loss": 1.8993, "step": 14059500 }, { "epoch": 69.66, "learning_rate": 1.5183439604048691e-05, "loss": 1.943, "step": 14060000 }, { "epoch": 69.66, "learning_rate": 1.5182201017622608e-05, "loss": 1.9562, "step": 14060500 }, { "epoch": 69.66, "learning_rate": 1.5180962431196525e-05, "loss": 1.9271, "step": 14061000 }, { "epoch": 69.67, "learning_rate": 1.517972384477044e-05, "loss": 1.9131, "step": 14061500 }, { "epoch": 69.67, "learning_rate": 1.5178485258344357e-05, "loss": 1.9465, "step": 14062000 }, { "epoch": 69.67, "learning_rate": 1.5177246671918274e-05, "loss": 1.9064, "step": 14062500 }, { "epoch": 69.67, "learning_rate": 1.5176008085492191e-05, "loss": 1.9284, "step": 14063000 }, { "epoch": 69.68, "learning_rate": 1.5174769499066108e-05, "loss": 1.9342, "step": 14063500 }, { "epoch": 69.68, "learning_rate": 1.5173530912640021e-05, "loss": 1.9172, "step": 14064000 }, { "epoch": 69.68, "learning_rate": 1.5172292326213938e-05, "loss": 1.9114, "step": 14064500 }, { "epoch": 69.68, "learning_rate": 1.5171056216960707e-05, "loss": 1.9243, "step": 14065000 }, { "epoch": 69.69, "learning_rate": 1.5169817630534624e-05, "loss": 1.9263, "step": 14065500 }, { "epoch": 69.69, "learning_rate": 1.5168579044108541e-05, "loss": 1.9397, "step": 14066000 }, { "epoch": 69.69, "learning_rate": 1.5167342934855308e-05, "loss": 1.9509, "step": 14066500 }, { "epoch": 69.69, "learning_rate": 1.5166104348429225e-05, "loss": 1.8919, "step": 14067000 }, { "epoch": 69.7, "learning_rate": 1.5164865762003142e-05, "loss": 1.9355, "step": 14067500 }, { "epoch": 69.7, "learning_rate": 1.5163627175577059e-05, "loss": 1.9501, "step": 14068000 }, { "epoch": 69.7, "learning_rate": 1.5162388589150974e-05, "loss": 1.9275, "step": 14068500 }, { "epoch": 69.7, "learning_rate": 1.5161150002724891e-05, "loss": 1.942, "step": 14069000 }, { "epoch": 69.71, "learning_rate": 1.5159913893471658e-05, "loss": 1.9307, "step": 14069500 }, { "epoch": 69.71, "learning_rate": 1.5158675307045575e-05, "loss": 1.9283, "step": 14070000 }, { "epoch": 69.71, "learning_rate": 1.5157439197792344e-05, "loss": 1.9406, "step": 14070500 }, { "epoch": 69.71, "learning_rate": 1.5156200611366261e-05, "loss": 1.9243, "step": 14071000 }, { "epoch": 69.72, "learning_rate": 1.5154962024940178e-05, "loss": 1.9373, "step": 14071500 }, { "epoch": 69.72, "learning_rate": 1.5153723438514095e-05, "loss": 1.9343, "step": 14072000 }, { "epoch": 69.72, "learning_rate": 1.5152484852088008e-05, "loss": 1.9298, "step": 14072500 }, { "epoch": 69.72, "learning_rate": 1.5151246265661925e-05, "loss": 1.9628, "step": 14073000 }, { "epoch": 69.72, "learning_rate": 1.5150007679235842e-05, "loss": 1.9398, "step": 14073500 }, { "epoch": 69.73, "learning_rate": 1.5148769092809759e-05, "loss": 1.9132, "step": 14074000 }, { "epoch": 69.73, "learning_rate": 1.5147530506383676e-05, "loss": 1.9541, "step": 14074500 }, { "epoch": 69.73, "learning_rate": 1.5146294397130445e-05, "loss": 1.9306, "step": 14075000 }, { "epoch": 69.73, "learning_rate": 1.5145055810704358e-05, "loss": 1.9397, "step": 14075500 }, { "epoch": 69.74, "learning_rate": 1.5143817224278275e-05, "loss": 1.9486, "step": 14076000 }, { "epoch": 69.74, "learning_rate": 1.5142578637852192e-05, "loss": 1.9552, "step": 14076500 }, { "epoch": 69.74, "learning_rate": 1.5141340051426109e-05, "loss": 1.9416, "step": 14077000 }, { "epoch": 69.74, "learning_rate": 1.5140101465000026e-05, "loss": 1.9184, "step": 14077500 }, { "epoch": 69.75, "learning_rate": 1.5138865355746795e-05, "loss": 1.9335, "step": 14078000 }, { "epoch": 69.75, "learning_rate": 1.5137626769320708e-05, "loss": 1.9402, "step": 14078500 }, { "epoch": 69.75, "learning_rate": 1.5136388182894625e-05, "loss": 1.9586, "step": 14079000 }, { "epoch": 69.75, "learning_rate": 1.5135152073641396e-05, "loss": 1.951, "step": 14079500 }, { "epoch": 69.76, "learning_rate": 1.5133915964388165e-05, "loss": 1.9487, "step": 14080000 }, { "epoch": 69.76, "learning_rate": 1.5132677377962078e-05, "loss": 1.9065, "step": 14080500 }, { "epoch": 69.76, "learning_rate": 1.5131438791535995e-05, "loss": 1.9154, "step": 14081000 }, { "epoch": 69.76, "learning_rate": 1.5130200205109912e-05, "loss": 1.9338, "step": 14081500 }, { "epoch": 69.77, "learning_rate": 1.5128961618683829e-05, "loss": 1.9291, "step": 14082000 }, { "epoch": 69.77, "learning_rate": 1.5127723032257746e-05, "loss": 1.9314, "step": 14082500 }, { "epoch": 69.77, "learning_rate": 1.5126484445831663e-05, "loss": 1.9514, "step": 14083000 }, { "epoch": 69.77, "learning_rate": 1.512524585940558e-05, "loss": 1.9381, "step": 14083500 }, { "epoch": 69.78, "learning_rate": 1.5124007272979495e-05, "loss": 1.9195, "step": 14084000 }, { "epoch": 69.78, "learning_rate": 1.5122768686553412e-05, "loss": 1.9474, "step": 14084500 }, { "epoch": 69.78, "learning_rate": 1.5121530100127325e-05, "loss": 1.9292, "step": 14085000 }, { "epoch": 69.78, "learning_rate": 1.5120291513701242e-05, "loss": 1.9337, "step": 14085500 }, { "epoch": 69.79, "learning_rate": 1.511905292727516e-05, "loss": 1.935, "step": 14086000 }, { "epoch": 69.79, "learning_rate": 1.511781681802193e-05, "loss": 1.9318, "step": 14086500 }, { "epoch": 69.79, "learning_rate": 1.5116578231595847e-05, "loss": 1.9222, "step": 14087000 }, { "epoch": 69.79, "learning_rate": 1.5115339645169762e-05, "loss": 1.9215, "step": 14087500 }, { "epoch": 69.8, "learning_rate": 1.5114101058743677e-05, "loss": 1.9228, "step": 14088000 }, { "epoch": 69.8, "learning_rate": 1.5112862472317592e-05, "loss": 1.898, "step": 14088500 }, { "epoch": 69.8, "learning_rate": 1.511162388589151e-05, "loss": 1.9448, "step": 14089000 }, { "epoch": 69.8, "learning_rate": 1.5110385299465426e-05, "loss": 1.926, "step": 14089500 }, { "epoch": 69.81, "learning_rate": 1.5109149190212197e-05, "loss": 1.929, "step": 14090000 }, { "epoch": 69.81, "learning_rate": 1.5107910603786112e-05, "loss": 1.9627, "step": 14090500 }, { "epoch": 69.81, "learning_rate": 1.5106672017360027e-05, "loss": 1.9301, "step": 14091000 }, { "epoch": 69.81, "learning_rate": 1.5105433430933944e-05, "loss": 1.9222, "step": 14091500 }, { "epoch": 69.82, "learning_rate": 1.5104197321680713e-05, "loss": 1.925, "step": 14092000 }, { "epoch": 69.82, "learning_rate": 1.510295873525463e-05, "loss": 1.9279, "step": 14092500 }, { "epoch": 69.82, "learning_rate": 1.5101720148828547e-05, "loss": 1.9546, "step": 14093000 }, { "epoch": 69.82, "learning_rate": 1.5100481562402464e-05, "loss": 1.9525, "step": 14093500 }, { "epoch": 69.83, "learning_rate": 1.5099242975976377e-05, "loss": 1.9295, "step": 14094000 }, { "epoch": 69.83, "learning_rate": 1.5098004389550294e-05, "loss": 1.9351, "step": 14094500 }, { "epoch": 69.83, "learning_rate": 1.5096765803124211e-05, "loss": 1.9346, "step": 14095000 }, { "epoch": 69.83, "learning_rate": 1.5095527216698126e-05, "loss": 1.9415, "step": 14095500 }, { "epoch": 69.84, "learning_rate": 1.5094291107444897e-05, "loss": 1.938, "step": 14096000 }, { "epoch": 69.84, "learning_rate": 1.5093052521018814e-05, "loss": 1.9377, "step": 14096500 }, { "epoch": 69.84, "learning_rate": 1.509181393459273e-05, "loss": 1.9223, "step": 14097000 }, { "epoch": 69.84, "learning_rate": 1.5090575348166644e-05, "loss": 1.9362, "step": 14097500 }, { "epoch": 69.85, "learning_rate": 1.5089339238913413e-05, "loss": 1.9433, "step": 14098000 }, { "epoch": 69.85, "learning_rate": 1.5088103129660184e-05, "loss": 1.9571, "step": 14098500 }, { "epoch": 69.85, "learning_rate": 1.5086864543234097e-05, "loss": 1.9254, "step": 14099000 }, { "epoch": 69.85, "learning_rate": 1.5085625956808014e-05, "loss": 1.9327, "step": 14099500 }, { "epoch": 69.86, "learning_rate": 1.5084387370381931e-05, "loss": 1.9366, "step": 14100000 }, { "epoch": 69.86, "learning_rate": 1.5083148783955848e-05, "loss": 1.9165, "step": 14100500 }, { "epoch": 69.86, "learning_rate": 1.5081910197529763e-05, "loss": 1.9315, "step": 14101000 }, { "epoch": 69.86, "learning_rate": 1.508067161110368e-05, "loss": 1.9327, "step": 14101500 }, { "epoch": 69.87, "learning_rate": 1.5079433024677597e-05, "loss": 1.9248, "step": 14102000 }, { "epoch": 69.87, "learning_rate": 1.5078194438251514e-05, "loss": 1.9156, "step": 14102500 }, { "epoch": 69.87, "learning_rate": 1.5076955851825431e-05, "loss": 1.9216, "step": 14103000 }, { "epoch": 69.87, "learning_rate": 1.5075717265399344e-05, "loss": 1.9205, "step": 14103500 }, { "epoch": 69.88, "learning_rate": 1.5074478678973261e-05, "loss": 1.9154, "step": 14104000 }, { "epoch": 69.88, "learning_rate": 1.5073240092547178e-05, "loss": 1.9401, "step": 14104500 }, { "epoch": 69.88, "learning_rate": 1.5072003983293947e-05, "loss": 1.9324, "step": 14105000 }, { "epoch": 69.88, "learning_rate": 1.5070765396867864e-05, "loss": 1.9567, "step": 14105500 }, { "epoch": 69.89, "learning_rate": 1.5069529287614631e-05, "loss": 1.9567, "step": 14106000 }, { "epoch": 69.89, "learning_rate": 1.5068290701188548e-05, "loss": 1.9274, "step": 14106500 }, { "epoch": 69.89, "learning_rate": 1.5067052114762465e-05, "loss": 1.9372, "step": 14107000 }, { "epoch": 69.89, "learning_rate": 1.506581352833638e-05, "loss": 1.9097, "step": 14107500 }, { "epoch": 69.9, "learning_rate": 1.5064574941910297e-05, "loss": 1.923, "step": 14108000 }, { "epoch": 69.9, "learning_rate": 1.5063338832657064e-05, "loss": 1.9334, "step": 14108500 }, { "epoch": 69.9, "learning_rate": 1.5062100246230981e-05, "loss": 1.926, "step": 14109000 }, { "epoch": 69.9, "learning_rate": 1.5060861659804898e-05, "loss": 1.9266, "step": 14109500 }, { "epoch": 69.91, "learning_rate": 1.5059623073378815e-05, "loss": 1.9205, "step": 14110000 }, { "epoch": 69.91, "learning_rate": 1.5058386964125584e-05, "loss": 1.9282, "step": 14110500 }, { "epoch": 69.91, "learning_rate": 1.50571483776995e-05, "loss": 1.9397, "step": 14111000 }, { "epoch": 69.91, "learning_rate": 1.5055909791273414e-05, "loss": 1.933, "step": 14111500 }, { "epoch": 69.92, "learning_rate": 1.5054671204847331e-05, "loss": 1.9513, "step": 14112000 }, { "epoch": 69.92, "learning_rate": 1.5053432618421248e-05, "loss": 1.9499, "step": 14112500 }, { "epoch": 69.92, "learning_rate": 1.5052194031995165e-05, "loss": 1.9349, "step": 14113000 }, { "epoch": 69.92, "learning_rate": 1.5050955445569082e-05, "loss": 1.9212, "step": 14113500 }, { "epoch": 69.93, "learning_rate": 1.5049719336315851e-05, "loss": 1.9314, "step": 14114000 }, { "epoch": 69.93, "learning_rate": 1.5048480749889768e-05, "loss": 1.9303, "step": 14114500 }, { "epoch": 69.93, "learning_rate": 1.5047242163463681e-05, "loss": 1.936, "step": 14115000 }, { "epoch": 69.93, "learning_rate": 1.5046003577037598e-05, "loss": 1.9318, "step": 14115500 }, { "epoch": 69.94, "learning_rate": 1.5044764990611515e-05, "loss": 1.9206, "step": 14116000 }, { "epoch": 69.94, "learning_rate": 1.5043526404185432e-05, "loss": 1.9419, "step": 14116500 }, { "epoch": 69.94, "learning_rate": 1.5042287817759349e-05, "loss": 1.9322, "step": 14117000 }, { "epoch": 69.94, "learning_rate": 1.5041051708506118e-05, "loss": 1.929, "step": 14117500 }, { "epoch": 69.95, "learning_rate": 1.5039815599252885e-05, "loss": 1.924, "step": 14118000 }, { "epoch": 69.95, "learning_rate": 1.5038577012826802e-05, "loss": 1.9128, "step": 14118500 }, { "epoch": 69.95, "learning_rate": 1.5037338426400719e-05, "loss": 1.9211, "step": 14119000 }, { "epoch": 69.95, "learning_rate": 1.5036099839974636e-05, "loss": 1.9311, "step": 14119500 }, { "epoch": 69.96, "learning_rate": 1.5034863730721401e-05, "loss": 1.9503, "step": 14120000 }, { "epoch": 69.96, "learning_rate": 1.5033625144295318e-05, "loss": 1.9222, "step": 14120500 }, { "epoch": 69.96, "learning_rate": 1.5032386557869235e-05, "loss": 1.9337, "step": 14121000 }, { "epoch": 69.96, "learning_rate": 1.5031147971443152e-05, "loss": 1.9428, "step": 14121500 }, { "epoch": 69.97, "learning_rate": 1.5029909385017069e-05, "loss": 1.9351, "step": 14122000 }, { "epoch": 69.97, "learning_rate": 1.5028673275763838e-05, "loss": 1.9113, "step": 14122500 }, { "epoch": 69.97, "learning_rate": 1.5027434689337753e-05, "loss": 1.9459, "step": 14123000 }, { "epoch": 69.97, "learning_rate": 1.5026196102911668e-05, "loss": 1.9659, "step": 14123500 }, { "epoch": 69.98, "learning_rate": 1.5024957516485585e-05, "loss": 1.9277, "step": 14124000 }, { "epoch": 69.98, "learning_rate": 1.5023718930059502e-05, "loss": 1.9408, "step": 14124500 }, { "epoch": 69.98, "learning_rate": 1.5022480343633419e-05, "loss": 1.9385, "step": 14125000 }, { "epoch": 69.98, "learning_rate": 1.5021241757207336e-05, "loss": 1.9317, "step": 14125500 }, { "epoch": 69.99, "learning_rate": 1.5020003170781253e-05, "loss": 1.9205, "step": 14126000 }, { "epoch": 69.99, "learning_rate": 1.5018764584355168e-05, "loss": 1.9355, "step": 14126500 }, { "epoch": 69.99, "learning_rate": 1.5017525997929085e-05, "loss": 1.9405, "step": 14127000 }, { "epoch": 69.99, "learning_rate": 1.5016287411503e-05, "loss": 1.9355, "step": 14127500 }, { "epoch": 69.99, "learning_rate": 1.5015051302249769e-05, "loss": 1.9334, "step": 14128000 }, { "epoch": 70.0, "learning_rate": 1.5013812715823686e-05, "loss": 1.925, "step": 14128500 }, { "epoch": 70.0, "learning_rate": 1.5012574129397603e-05, "loss": 1.9628, "step": 14129000 }, { "epoch": 70.0, "eval_accuracy": 0.6768668471826128, "eval_accuracy_mlm": 0.6369453300232284, "eval_accuracy_nsp": 0.865072423409254, "eval_loss": 2.270502805709839, "eval_runtime": 146.8085, "eval_samples_per_second": 1736.678, "eval_steps_per_second": 72.366, "step": 14129010 } ], "max_steps": 20184300, "num_train_epochs": 100, "total_flos": 1.8292334964832614e+19, "trial_name": null, "trial_params": null }