{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 2018430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9998768845092475e-05, "loss": 3.3856, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999753025866639e-05, "loss": 3.1861, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.999629167224031e-05, "loss": 3.1416, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9995053085814226e-05, "loss": 3.0688, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.999381449938814e-05, "loss": 3.0467, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.999257591296206e-05, "loss": 3.0166, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.999133980370883e-05, "loss": 3.0395, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.9990101217282745e-05, "loss": 2.9982, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.9988862630856655e-05, "loss": 2.9531, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.9987626521603424e-05, "loss": 2.985, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.998638793517734e-05, "loss": 2.9528, "step": 5500 }, { "epoch": 0.03, "learning_rate": 4.998514934875126e-05, "loss": 2.9339, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.9983910762325175e-05, "loss": 2.9353, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.998267217589909e-05, "loss": 2.9251, "step": 7000 }, { "epoch": 0.04, "learning_rate": 4.998143358947301e-05, "loss": 2.9249, "step": 7500 }, { "epoch": 0.04, "learning_rate": 4.9980195003046926e-05, "loss": 2.922, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.997895641662084e-05, "loss": 2.9129, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.997771783019476e-05, "loss": 2.8805, "step": 9000 }, { "epoch": 0.05, "learning_rate": 4.997647924376868e-05, "loss": 2.8891, "step": 9500 }, { "epoch": 0.05, "learning_rate": 4.9975240657342594e-05, "loss": 2.8833, "step": 10000 }, { "epoch": 0.05, "learning_rate": 4.997400207091651e-05, "loss": 2.9069, "step": 10500 }, { "epoch": 0.05, "learning_rate": 4.997276596166328e-05, "loss": 2.8774, "step": 11000 }, { "epoch": 0.06, "learning_rate": 4.997152737523719e-05, "loss": 2.8608, "step": 11500 }, { "epoch": 0.06, "learning_rate": 4.9970288788811106e-05, "loss": 2.8853, "step": 12000 }, { "epoch": 0.06, "learning_rate": 4.996905020238502e-05, "loss": 2.854, "step": 12500 }, { "epoch": 0.06, "learning_rate": 4.996781161595894e-05, "loss": 2.8447, "step": 13000 }, { "epoch": 0.07, "learning_rate": 4.996657550670571e-05, "loss": 2.8466, "step": 13500 }, { "epoch": 0.07, "learning_rate": 4.9965336920279626e-05, "loss": 2.8537, "step": 14000 }, { "epoch": 0.07, "learning_rate": 4.996409833385354e-05, "loss": 2.8332, "step": 14500 }, { "epoch": 0.07, "learning_rate": 4.996285974742746e-05, "loss": 2.8452, "step": 15000 }, { "epoch": 0.08, "learning_rate": 4.996162363817423e-05, "loss": 2.8432, "step": 15500 }, { "epoch": 0.08, "learning_rate": 4.9960385051748146e-05, "loss": 2.846, "step": 16000 }, { "epoch": 0.08, "learning_rate": 4.995914646532206e-05, "loss": 2.8418, "step": 16500 }, { "epoch": 0.08, "learning_rate": 4.995790787889598e-05, "loss": 2.8401, "step": 17000 }, { "epoch": 0.09, "learning_rate": 4.9956669292469896e-05, "loss": 2.8253, "step": 17500 }, { "epoch": 0.09, "learning_rate": 4.9955430706043807e-05, "loss": 2.8343, "step": 18000 }, { "epoch": 0.09, "learning_rate": 4.9954192119617723e-05, "loss": 2.8231, "step": 18500 }, { "epoch": 0.09, "learning_rate": 4.995295353319164e-05, "loss": 2.7981, "step": 19000 }, { "epoch": 0.1, "learning_rate": 4.995171742393841e-05, "loss": 2.8358, "step": 19500 }, { "epoch": 0.1, "learning_rate": 4.9950478837512326e-05, "loss": 2.825, "step": 20000 }, { "epoch": 0.1, "learning_rate": 4.994924025108624e-05, "loss": 2.7965, "step": 20500 }, { "epoch": 0.1, "learning_rate": 4.994800166466016e-05, "loss": 2.8224, "step": 21000 }, { "epoch": 0.11, "learning_rate": 4.9946768032579774e-05, "loss": 2.8108, "step": 21500 }, { "epoch": 0.11, "learning_rate": 4.994552944615369e-05, "loss": 2.8076, "step": 22000 }, { "epoch": 0.11, "learning_rate": 4.994429085972761e-05, "loss": 2.8013, "step": 22500 }, { "epoch": 0.11, "learning_rate": 4.9943052273301525e-05, "loss": 2.818, "step": 23000 }, { "epoch": 0.12, "learning_rate": 4.99418161640483e-05, "loss": 2.8436, "step": 23500 }, { "epoch": 0.12, "learning_rate": 4.994057757762222e-05, "loss": 2.8163, "step": 24000 }, { "epoch": 0.12, "learning_rate": 4.993933899119613e-05, "loss": 2.7991, "step": 24500 }, { "epoch": 0.12, "learning_rate": 4.9938100404770044e-05, "loss": 2.7893, "step": 25000 }, { "epoch": 0.13, "learning_rate": 4.993686181834396e-05, "loss": 2.7931, "step": 25500 }, { "epoch": 0.13, "learning_rate": 4.993562323191788e-05, "loss": 2.8015, "step": 26000 }, { "epoch": 0.13, "learning_rate": 4.9934384645491795e-05, "loss": 2.7764, "step": 26500 }, { "epoch": 0.13, "learning_rate": 4.993314605906571e-05, "loss": 2.7875, "step": 27000 }, { "epoch": 0.14, "learning_rate": 4.993190747263963e-05, "loss": 2.8176, "step": 27500 }, { "epoch": 0.14, "learning_rate": 4.9930668886213546e-05, "loss": 2.7744, "step": 28000 }, { "epoch": 0.14, "learning_rate": 4.992943029978746e-05, "loss": 2.8011, "step": 28500 }, { "epoch": 0.14, "learning_rate": 4.9928194190534225e-05, "loss": 2.7897, "step": 29000 }, { "epoch": 0.15, "learning_rate": 4.992695560410814e-05, "loss": 2.7773, "step": 29500 }, { "epoch": 0.15, "learning_rate": 4.992571701768206e-05, "loss": 2.7895, "step": 30000 }, { "epoch": 0.15, "learning_rate": 4.9924478431255976e-05, "loss": 2.795, "step": 30500 }, { "epoch": 0.15, "learning_rate": 4.992323984482989e-05, "loss": 2.7796, "step": 31000 }, { "epoch": 0.16, "learning_rate": 4.992200125840381e-05, "loss": 2.7868, "step": 31500 }, { "epoch": 0.16, "learning_rate": 4.9920762671977726e-05, "loss": 2.8024, "step": 32000 }, { "epoch": 0.16, "learning_rate": 4.991952408555164e-05, "loss": 2.7609, "step": 32500 }, { "epoch": 0.16, "learning_rate": 4.9918290453471264e-05, "loss": 2.7815, "step": 33000 }, { "epoch": 0.17, "learning_rate": 4.991705186704518e-05, "loss": 2.7409, "step": 33500 }, { "epoch": 0.17, "learning_rate": 4.99158132806191e-05, "loss": 2.7607, "step": 34000 }, { "epoch": 0.17, "learning_rate": 4.9914574694193015e-05, "loss": 2.7929, "step": 34500 }, { "epoch": 0.17, "learning_rate": 4.9913336107766925e-05, "loss": 2.782, "step": 35000 }, { "epoch": 0.18, "learning_rate": 4.99120999985137e-05, "loss": 2.785, "step": 35500 }, { "epoch": 0.18, "learning_rate": 4.991086141208762e-05, "loss": 2.7697, "step": 36000 }, { "epoch": 0.18, "learning_rate": 4.9909622825661534e-05, "loss": 2.7626, "step": 36500 }, { "epoch": 0.18, "learning_rate": 4.99083867164083e-05, "loss": 2.7679, "step": 37000 }, { "epoch": 0.19, "learning_rate": 4.990714812998222e-05, "loss": 2.7565, "step": 37500 }, { "epoch": 0.19, "learning_rate": 4.990590954355614e-05, "loss": 2.7848, "step": 38000 }, { "epoch": 0.19, "learning_rate": 4.99046734343029e-05, "loss": 2.7555, "step": 38500 }, { "epoch": 0.19, "learning_rate": 4.9903434847876816e-05, "loss": 2.7742, "step": 39000 }, { "epoch": 0.2, "learning_rate": 4.990219626145073e-05, "loss": 2.7821, "step": 39500 }, { "epoch": 0.2, "learning_rate": 4.990095767502465e-05, "loss": 2.7616, "step": 40000 }, { "epoch": 0.2, "learning_rate": 4.989971908859857e-05, "loss": 2.7711, "step": 40500 }, { "epoch": 0.2, "learning_rate": 4.9898480502172484e-05, "loss": 2.7499, "step": 41000 }, { "epoch": 0.21, "learning_rate": 4.98972419157464e-05, "loss": 2.7549, "step": 41500 }, { "epoch": 0.21, "learning_rate": 4.989600332932032e-05, "loss": 2.7782, "step": 42000 }, { "epoch": 0.21, "learning_rate": 4.9894764742894234e-05, "loss": 2.748, "step": 42500 }, { "epoch": 0.21, "learning_rate": 4.9893528633641e-05, "loss": 2.7325, "step": 43000 }, { "epoch": 0.22, "learning_rate": 4.989229004721492e-05, "loss": 2.7488, "step": 43500 }, { "epoch": 0.22, "learning_rate": 4.989105146078884e-05, "loss": 2.7606, "step": 44000 }, { "epoch": 0.22, "learning_rate": 4.9889812874362754e-05, "loss": 2.7445, "step": 44500 }, { "epoch": 0.22, "learning_rate": 4.988857428793667e-05, "loss": 2.76, "step": 45000 }, { "epoch": 0.23, "learning_rate": 4.988733570151059e-05, "loss": 2.7375, "step": 45500 }, { "epoch": 0.23, "learning_rate": 4.98860971150845e-05, "loss": 2.7624, "step": 46000 }, { "epoch": 0.23, "learning_rate": 4.9884858528658415e-05, "loss": 2.7588, "step": 46500 }, { "epoch": 0.23, "learning_rate": 4.9883622419405184e-05, "loss": 2.7508, "step": 47000 }, { "epoch": 0.24, "learning_rate": 4.988238631015195e-05, "loss": 2.7562, "step": 47500 }, { "epoch": 0.24, "learning_rate": 4.988114772372587e-05, "loss": 2.7546, "step": 48000 }, { "epoch": 0.24, "learning_rate": 4.9879909137299787e-05, "loss": 2.7657, "step": 48500 }, { "epoch": 0.24, "learning_rate": 4.9878670550873703e-05, "loss": 2.741, "step": 49000 }, { "epoch": 0.25, "learning_rate": 4.9877434441620465e-05, "loss": 2.7813, "step": 49500 }, { "epoch": 0.25, "learning_rate": 4.987619585519438e-05, "loss": 2.7575, "step": 50000 }, { "epoch": 0.25, "learning_rate": 4.98749572687683e-05, "loss": 2.7419, "step": 50500 }, { "epoch": 0.25, "learning_rate": 4.9873718682342216e-05, "loss": 2.7422, "step": 51000 }, { "epoch": 0.26, "learning_rate": 4.987248009591613e-05, "loss": 2.7333, "step": 51500 }, { "epoch": 0.26, "learning_rate": 4.987124150949005e-05, "loss": 2.743, "step": 52000 }, { "epoch": 0.26, "learning_rate": 4.987000540023682e-05, "loss": 2.7217, "step": 52500 }, { "epoch": 0.26, "learning_rate": 4.9868766813810736e-05, "loss": 2.7355, "step": 53000 }, { "epoch": 0.27, "learning_rate": 4.986752822738465e-05, "loss": 2.7393, "step": 53500 }, { "epoch": 0.27, "learning_rate": 4.986628964095857e-05, "loss": 2.7279, "step": 54000 }, { "epoch": 0.27, "learning_rate": 4.986505105453249e-05, "loss": 2.7535, "step": 54500 }, { "epoch": 0.27, "learning_rate": 4.9863812468106404e-05, "loss": 2.7389, "step": 55000 }, { "epoch": 0.27, "learning_rate": 4.986257635885317e-05, "loss": 2.7488, "step": 55500 }, { "epoch": 0.28, "learning_rate": 4.986134024959994e-05, "loss": 2.7452, "step": 56000 }, { "epoch": 0.28, "learning_rate": 4.986010166317385e-05, "loss": 2.7503, "step": 56500 }, { "epoch": 0.28, "learning_rate": 4.985886307674777e-05, "loss": 2.7213, "step": 57000 }, { "epoch": 0.28, "learning_rate": 4.9857624490321685e-05, "loss": 2.7421, "step": 57500 }, { "epoch": 0.29, "learning_rate": 4.98563859038956e-05, "loss": 2.7295, "step": 58000 }, { "epoch": 0.29, "learning_rate": 4.985514731746952e-05, "loss": 2.7164, "step": 58500 }, { "epoch": 0.29, "learning_rate": 4.9853908731043436e-05, "loss": 2.7262, "step": 59000 }, { "epoch": 0.29, "learning_rate": 4.985267014461735e-05, "loss": 2.734, "step": 59500 }, { "epoch": 0.3, "learning_rate": 4.985143155819127e-05, "loss": 2.7186, "step": 60000 }, { "epoch": 0.3, "learning_rate": 4.985019297176519e-05, "loss": 2.739, "step": 60500 }, { "epoch": 0.3, "learning_rate": 4.9848954385339104e-05, "loss": 2.7356, "step": 61000 }, { "epoch": 0.3, "learning_rate": 4.984771579891302e-05, "loss": 2.7406, "step": 61500 }, { "epoch": 0.31, "learning_rate": 4.984647968965979e-05, "loss": 2.7167, "step": 62000 }, { "epoch": 0.31, "learning_rate": 4.9845241103233706e-05, "loss": 2.6958, "step": 62500 }, { "epoch": 0.31, "learning_rate": 4.984400499398047e-05, "loss": 2.7223, "step": 63000 }, { "epoch": 0.31, "learning_rate": 4.9842766407554385e-05, "loss": 2.7279, "step": 63500 }, { "epoch": 0.32, "learning_rate": 4.98415278211283e-05, "loss": 2.7186, "step": 64000 }, { "epoch": 0.32, "learning_rate": 4.984029171187508e-05, "loss": 2.7359, "step": 64500 }, { "epoch": 0.32, "learning_rate": 4.9839053125448995e-05, "loss": 2.7288, "step": 65000 }, { "epoch": 0.32, "learning_rate": 4.983781453902291e-05, "loss": 2.7157, "step": 65500 }, { "epoch": 0.33, "learning_rate": 4.983657595259682e-05, "loss": 2.7194, "step": 66000 }, { "epoch": 0.33, "learning_rate": 4.983533736617074e-05, "loss": 2.7106, "step": 66500 }, { "epoch": 0.33, "learning_rate": 4.9834098779744656e-05, "loss": 2.7313, "step": 67000 }, { "epoch": 0.33, "learning_rate": 4.983286019331857e-05, "loss": 2.7072, "step": 67500 }, { "epoch": 0.34, "learning_rate": 4.983162160689249e-05, "loss": 2.7392, "step": 68000 }, { "epoch": 0.34, "learning_rate": 4.9830383020466406e-05, "loss": 2.7083, "step": 68500 }, { "epoch": 0.34, "learning_rate": 4.982914691121317e-05, "loss": 2.7407, "step": 69000 }, { "epoch": 0.34, "learning_rate": 4.9827908324787085e-05, "loss": 2.7374, "step": 69500 }, { "epoch": 0.35, "learning_rate": 4.9826669738361e-05, "loss": 2.7151, "step": 70000 }, { "epoch": 0.35, "learning_rate": 4.982543115193492e-05, "loss": 2.7057, "step": 70500 }, { "epoch": 0.35, "learning_rate": 4.9824192565508836e-05, "loss": 2.7337, "step": 71000 }, { "epoch": 0.35, "learning_rate": 4.982295397908275e-05, "loss": 2.7017, "step": 71500 }, { "epoch": 0.36, "learning_rate": 4.982171786982952e-05, "loss": 2.7105, "step": 72000 }, { "epoch": 0.36, "learning_rate": 4.982047928340344e-05, "loss": 2.7399, "step": 72500 }, { "epoch": 0.36, "learning_rate": 4.981924565132306e-05, "loss": 2.707, "step": 73000 }, { "epoch": 0.36, "learning_rate": 4.981800954206983e-05, "loss": 2.7199, "step": 73500 }, { "epoch": 0.37, "learning_rate": 4.9816770955643745e-05, "loss": 2.7107, "step": 74000 }, { "epoch": 0.37, "learning_rate": 4.981553236921766e-05, "loss": 2.7136, "step": 74500 }, { "epoch": 0.37, "learning_rate": 4.981429378279158e-05, "loss": 2.711, "step": 75000 }, { "epoch": 0.37, "learning_rate": 4.9813055196365496e-05, "loss": 2.705, "step": 75500 }, { "epoch": 0.38, "learning_rate": 4.981181908711226e-05, "loss": 2.672, "step": 76000 }, { "epoch": 0.38, "learning_rate": 4.9810580500686175e-05, "loss": 2.7155, "step": 76500 }, { "epoch": 0.38, "learning_rate": 4.980934191426009e-05, "loss": 2.7097, "step": 77000 }, { "epoch": 0.38, "learning_rate": 4.980810332783401e-05, "loss": 2.7019, "step": 77500 }, { "epoch": 0.39, "learning_rate": 4.9806864741407926e-05, "loss": 2.7046, "step": 78000 }, { "epoch": 0.39, "learning_rate": 4.980562615498184e-05, "loss": 2.7231, "step": 78500 }, { "epoch": 0.39, "learning_rate": 4.980438756855576e-05, "loss": 2.7074, "step": 79000 }, { "epoch": 0.39, "learning_rate": 4.980314898212968e-05, "loss": 2.7098, "step": 79500 }, { "epoch": 0.4, "learning_rate": 4.9801910395703594e-05, "loss": 2.7095, "step": 80000 }, { "epoch": 0.4, "learning_rate": 4.980067180927751e-05, "loss": 2.6981, "step": 80500 }, { "epoch": 0.4, "learning_rate": 4.979943322285143e-05, "loss": 2.7144, "step": 81000 }, { "epoch": 0.4, "learning_rate": 4.9798194636425344e-05, "loss": 2.7018, "step": 81500 }, { "epoch": 0.41, "learning_rate": 4.979695604999926e-05, "loss": 2.7057, "step": 82000 }, { "epoch": 0.41, "learning_rate": 4.979571746357318e-05, "loss": 2.6865, "step": 82500 }, { "epoch": 0.41, "learning_rate": 4.9794478877147095e-05, "loss": 2.7394, "step": 83000 }, { "epoch": 0.41, "learning_rate": 4.979324029072101e-05, "loss": 2.7101, "step": 83500 }, { "epoch": 0.42, "learning_rate": 4.979200170429493e-05, "loss": 2.7016, "step": 84000 }, { "epoch": 0.42, "learning_rate": 4.979076311786884e-05, "loss": 2.6752, "step": 84500 }, { "epoch": 0.42, "learning_rate": 4.9789524531442756e-05, "loss": 2.7344, "step": 85000 }, { "epoch": 0.42, "learning_rate": 4.978828594501667e-05, "loss": 2.7173, "step": 85500 }, { "epoch": 0.43, "learning_rate": 4.978704735859059e-05, "loss": 2.7205, "step": 86000 }, { "epoch": 0.43, "learning_rate": 4.978581124933736e-05, "loss": 2.6739, "step": 86500 }, { "epoch": 0.43, "learning_rate": 4.9784572662911276e-05, "loss": 2.7142, "step": 87000 }, { "epoch": 0.43, "learning_rate": 4.978333407648519e-05, "loss": 2.6928, "step": 87500 }, { "epoch": 0.44, "learning_rate": 4.97820954900591e-05, "loss": 2.722, "step": 88000 }, { "epoch": 0.44, "learning_rate": 4.978085690363302e-05, "loss": 2.7314, "step": 88500 }, { "epoch": 0.44, "learning_rate": 4.9779620794379795e-05, "loss": 2.7161, "step": 89000 }, { "epoch": 0.44, "learning_rate": 4.977838220795371e-05, "loss": 2.7042, "step": 89500 }, { "epoch": 0.45, "learning_rate": 4.977714362152763e-05, "loss": 2.7171, "step": 90000 }, { "epoch": 0.45, "learning_rate": 4.9775905035101546e-05, "loss": 2.7061, "step": 90500 }, { "epoch": 0.45, "learning_rate": 4.977466892584831e-05, "loss": 2.6724, "step": 91000 }, { "epoch": 0.45, "learning_rate": 4.9773430339422225e-05, "loss": 2.6936, "step": 91500 }, { "epoch": 0.46, "learning_rate": 4.977219175299614e-05, "loss": 2.709, "step": 92000 }, { "epoch": 0.46, "learning_rate": 4.977095316657006e-05, "loss": 2.7078, "step": 92500 }, { "epoch": 0.46, "learning_rate": 4.976971705731683e-05, "loss": 2.6722, "step": 93000 }, { "epoch": 0.46, "learning_rate": 4.9768478470890745e-05, "loss": 2.6995, "step": 93500 }, { "epoch": 0.47, "learning_rate": 4.976723988446466e-05, "loss": 2.6995, "step": 94000 }, { "epoch": 0.47, "learning_rate": 4.976600129803858e-05, "loss": 2.6942, "step": 94500 }, { "epoch": 0.47, "learning_rate": 4.9764762711612495e-05, "loss": 2.69, "step": 95000 }, { "epoch": 0.47, "learning_rate": 4.9763526602359264e-05, "loss": 2.7196, "step": 95500 }, { "epoch": 0.48, "learning_rate": 4.976228801593318e-05, "loss": 2.6655, "step": 96000 }, { "epoch": 0.48, "learning_rate": 4.97610494295071e-05, "loss": 2.7035, "step": 96500 }, { "epoch": 0.48, "learning_rate": 4.9759810843081015e-05, "loss": 2.7225, "step": 97000 }, { "epoch": 0.48, "learning_rate": 4.9758572256654925e-05, "loss": 2.6971, "step": 97500 }, { "epoch": 0.49, "learning_rate": 4.975733367022884e-05, "loss": 2.7039, "step": 98000 }, { "epoch": 0.49, "learning_rate": 4.975609508380276e-05, "loss": 2.6814, "step": 98500 }, { "epoch": 0.49, "learning_rate": 4.9754856497376676e-05, "loss": 2.7068, "step": 99000 }, { "epoch": 0.49, "learning_rate": 4.975361791095059e-05, "loss": 2.6989, "step": 99500 }, { "epoch": 0.5, "learning_rate": 4.975237932452451e-05, "loss": 2.6904, "step": 100000 }, { "epoch": 0.5, "learning_rate": 4.975114073809842e-05, "loss": 2.6863, "step": 100500 }, { "epoch": 0.5, "learning_rate": 4.974990215167234e-05, "loss": 2.6785, "step": 101000 }, { "epoch": 0.5, "learning_rate": 4.974866604241911e-05, "loss": 2.6996, "step": 101500 }, { "epoch": 0.51, "learning_rate": 4.974742993316588e-05, "loss": 2.7096, "step": 102000 }, { "epoch": 0.51, "learning_rate": 4.97461913467398e-05, "loss": 2.6979, "step": 102500 }, { "epoch": 0.51, "learning_rate": 4.9744952760313715e-05, "loss": 2.6932, "step": 103000 }, { "epoch": 0.51, "learning_rate": 4.974371417388763e-05, "loss": 2.6986, "step": 103500 }, { "epoch": 0.52, "learning_rate": 4.974247558746155e-05, "loss": 2.6888, "step": 104000 }, { "epoch": 0.52, "learning_rate": 4.974123700103546e-05, "loss": 2.6854, "step": 104500 }, { "epoch": 0.52, "learning_rate": 4.974000089178223e-05, "loss": 2.697, "step": 105000 }, { "epoch": 0.52, "learning_rate": 4.9738762305356145e-05, "loss": 2.6931, "step": 105500 }, { "epoch": 0.53, "learning_rate": 4.973752371893006e-05, "loss": 2.7098, "step": 106000 }, { "epoch": 0.53, "learning_rate": 4.973628513250398e-05, "loss": 2.6989, "step": 106500 }, { "epoch": 0.53, "learning_rate": 4.9735046546077896e-05, "loss": 2.7059, "step": 107000 }, { "epoch": 0.53, "learning_rate": 4.9733810436824664e-05, "loss": 2.6612, "step": 107500 }, { "epoch": 0.54, "learning_rate": 4.973257185039858e-05, "loss": 2.6945, "step": 108000 }, { "epoch": 0.54, "learning_rate": 4.97313332639725e-05, "loss": 2.6951, "step": 108500 }, { "epoch": 0.54, "learning_rate": 4.9730094677546415e-05, "loss": 2.6871, "step": 109000 }, { "epoch": 0.54, "learning_rate": 4.972885609112033e-05, "loss": 2.6909, "step": 109500 }, { "epoch": 0.54, "learning_rate": 4.972761750469425e-05, "loss": 2.6948, "step": 110000 }, { "epoch": 0.55, "learning_rate": 4.972638139544101e-05, "loss": 2.7037, "step": 110500 }, { "epoch": 0.55, "learning_rate": 4.972514280901493e-05, "loss": 2.6804, "step": 111000 }, { "epoch": 0.55, "learning_rate": 4.9723904222588845e-05, "loss": 2.6895, "step": 111500 }, { "epoch": 0.55, "learning_rate": 4.972266563616276e-05, "loss": 2.6878, "step": 112000 }, { "epoch": 0.56, "learning_rate": 4.972142704973668e-05, "loss": 2.6916, "step": 112500 }, { "epoch": 0.56, "learning_rate": 4.9720188463310596e-05, "loss": 2.6843, "step": 113000 }, { "epoch": 0.56, "learning_rate": 4.9718952354057365e-05, "loss": 2.6903, "step": 113500 }, { "epoch": 0.56, "learning_rate": 4.971771376763128e-05, "loss": 2.6777, "step": 114000 }, { "epoch": 0.57, "learning_rate": 4.97164751812052e-05, "loss": 2.7029, "step": 114500 }, { "epoch": 0.57, "learning_rate": 4.9715236594779115e-05, "loss": 2.7026, "step": 115000 }, { "epoch": 0.57, "learning_rate": 4.971399800835303e-05, "loss": 2.6807, "step": 115500 }, { "epoch": 0.57, "learning_rate": 4.971275942192695e-05, "loss": 2.6868, "step": 116000 }, { "epoch": 0.58, "learning_rate": 4.9711520835500866e-05, "loss": 2.6976, "step": 116500 }, { "epoch": 0.58, "learning_rate": 4.971028472624763e-05, "loss": 2.6817, "step": 117000 }, { "epoch": 0.58, "learning_rate": 4.9709046139821545e-05, "loss": 2.6925, "step": 117500 }, { "epoch": 0.58, "learning_rate": 4.970780755339546e-05, "loss": 2.6972, "step": 118000 }, { "epoch": 0.59, "learning_rate": 4.970656896696938e-05, "loss": 2.6853, "step": 118500 }, { "epoch": 0.59, "learning_rate": 4.9705330380543296e-05, "loss": 2.6735, "step": 119000 }, { "epoch": 0.59, "learning_rate": 4.970409179411721e-05, "loss": 2.702, "step": 119500 }, { "epoch": 0.59, "learning_rate": 4.970285320769113e-05, "loss": 2.6876, "step": 120000 }, { "epoch": 0.6, "learning_rate": 4.970161462126505e-05, "loss": 2.6829, "step": 120500 }, { "epoch": 0.6, "learning_rate": 4.9700378512011815e-05, "loss": 2.6596, "step": 121000 }, { "epoch": 0.6, "learning_rate": 4.969913992558573e-05, "loss": 2.6907, "step": 121500 }, { "epoch": 0.6, "learning_rate": 4.969790133915965e-05, "loss": 2.6968, "step": 122000 }, { "epoch": 0.61, "learning_rate": 4.9696662752733566e-05, "loss": 2.6804, "step": 122500 }, { "epoch": 0.61, "learning_rate": 4.969542664348033e-05, "loss": 2.6914, "step": 123000 }, { "epoch": 0.61, "learning_rate": 4.96941905342271e-05, "loss": 2.6812, "step": 123500 }, { "epoch": 0.61, "learning_rate": 4.9692951947801014e-05, "loss": 2.6607, "step": 124000 }, { "epoch": 0.62, "learning_rate": 4.969171336137493e-05, "loss": 2.686, "step": 124500 }, { "epoch": 0.62, "learning_rate": 4.969047477494885e-05, "loss": 2.6591, "step": 125000 }, { "epoch": 0.62, "learning_rate": 4.9689236188522765e-05, "loss": 2.6824, "step": 125500 }, { "epoch": 0.62, "learning_rate": 4.968799760209668e-05, "loss": 2.694, "step": 126000 }, { "epoch": 0.63, "learning_rate": 4.968676149284345e-05, "loss": 2.6881, "step": 126500 }, { "epoch": 0.63, "learning_rate": 4.968552290641737e-05, "loss": 2.7259, "step": 127000 }, { "epoch": 0.63, "learning_rate": 4.9684284319991284e-05, "loss": 2.68, "step": 127500 }, { "epoch": 0.63, "learning_rate": 4.9683045733565195e-05, "loss": 2.6683, "step": 128000 }, { "epoch": 0.64, "learning_rate": 4.968180714713911e-05, "loss": 2.6481, "step": 128500 }, { "epoch": 0.64, "learning_rate": 4.968056856071303e-05, "loss": 2.6635, "step": 129000 }, { "epoch": 0.64, "learning_rate": 4.96793324514598e-05, "loss": 2.6736, "step": 129500 }, { "epoch": 0.64, "learning_rate": 4.9678093865033714e-05, "loss": 2.7035, "step": 130000 }, { "epoch": 0.65, "learning_rate": 4.967685527860763e-05, "loss": 2.6893, "step": 130500 }, { "epoch": 0.65, "learning_rate": 4.967561669218155e-05, "loss": 2.6946, "step": 131000 }, { "epoch": 0.65, "learning_rate": 4.9674378105755465e-05, "loss": 2.6781, "step": 131500 }, { "epoch": 0.65, "learning_rate": 4.967313951932938e-05, "loss": 2.6633, "step": 132000 }, { "epoch": 0.66, "learning_rate": 4.96719009329033e-05, "loss": 2.6894, "step": 132500 }, { "epoch": 0.66, "learning_rate": 4.9670662346477216e-05, "loss": 2.6659, "step": 133000 }, { "epoch": 0.66, "learning_rate": 4.966942376005113e-05, "loss": 2.668, "step": 133500 }, { "epoch": 0.66, "learning_rate": 4.966818517362505e-05, "loss": 2.6745, "step": 134000 }, { "epoch": 0.67, "learning_rate": 4.9666946587198966e-05, "loss": 2.6407, "step": 134500 }, { "epoch": 0.67, "learning_rate": 4.966571047794573e-05, "loss": 2.6832, "step": 135000 }, { "epoch": 0.67, "learning_rate": 4.96644743686925e-05, "loss": 2.6761, "step": 135500 }, { "epoch": 0.67, "learning_rate": 4.9663235782266414e-05, "loss": 2.7023, "step": 136000 }, { "epoch": 0.68, "learning_rate": 4.966199719584033e-05, "loss": 2.6568, "step": 136500 }, { "epoch": 0.68, "learning_rate": 4.966075860941425e-05, "loss": 2.6889, "step": 137000 }, { "epoch": 0.68, "learning_rate": 4.9659520022988165e-05, "loss": 2.6811, "step": 137500 }, { "epoch": 0.68, "learning_rate": 4.965828391373494e-05, "loss": 2.6648, "step": 138000 }, { "epoch": 0.69, "learning_rate": 4.965704532730885e-05, "loss": 2.668, "step": 138500 }, { "epoch": 0.69, "learning_rate": 4.965580674088277e-05, "loss": 2.6594, "step": 139000 }, { "epoch": 0.69, "learning_rate": 4.9654568154456685e-05, "loss": 2.6537, "step": 139500 }, { "epoch": 0.69, "learning_rate": 4.96533295680306e-05, "loss": 2.7002, "step": 140000 }, { "epoch": 0.7, "learning_rate": 4.965209345877737e-05, "loss": 2.679, "step": 140500 }, { "epoch": 0.7, "learning_rate": 4.965085487235129e-05, "loss": 2.6813, "step": 141000 }, { "epoch": 0.7, "learning_rate": 4.9649618763098056e-05, "loss": 2.6977, "step": 141500 }, { "epoch": 0.7, "learning_rate": 4.964838017667197e-05, "loss": 2.6734, "step": 142000 }, { "epoch": 0.71, "learning_rate": 4.964714159024589e-05, "loss": 2.6827, "step": 142500 }, { "epoch": 0.71, "learning_rate": 4.964590300381981e-05, "loss": 2.6551, "step": 143000 }, { "epoch": 0.71, "learning_rate": 4.9644664417393724e-05, "loss": 2.6671, "step": 143500 }, { "epoch": 0.71, "learning_rate": 4.964342583096764e-05, "loss": 2.6576, "step": 144000 }, { "epoch": 0.72, "learning_rate": 4.96421897217144e-05, "loss": 2.6725, "step": 144500 }, { "epoch": 0.72, "learning_rate": 4.964095113528832e-05, "loss": 2.6767, "step": 145000 }, { "epoch": 0.72, "learning_rate": 4.963971254886224e-05, "loss": 2.6836, "step": 145500 }, { "epoch": 0.72, "learning_rate": 4.9638473962436154e-05, "loss": 2.6378, "step": 146000 }, { "epoch": 0.73, "learning_rate": 4.963723537601007e-05, "loss": 2.6721, "step": 146500 }, { "epoch": 0.73, "learning_rate": 4.963599678958399e-05, "loss": 2.7062, "step": 147000 }, { "epoch": 0.73, "learning_rate": 4.9634758203157904e-05, "loss": 2.6502, "step": 147500 }, { "epoch": 0.73, "learning_rate": 4.9633519616731815e-05, "loss": 2.6751, "step": 148000 }, { "epoch": 0.74, "learning_rate": 4.963228103030573e-05, "loss": 2.6675, "step": 148500 }, { "epoch": 0.74, "learning_rate": 4.963104244387965e-05, "loss": 2.6572, "step": 149000 }, { "epoch": 0.74, "learning_rate": 4.9629803857453565e-05, "loss": 2.6735, "step": 149500 }, { "epoch": 0.74, "learning_rate": 4.962856774820034e-05, "loss": 2.6621, "step": 150000 }, { "epoch": 0.75, "learning_rate": 4.962732916177426e-05, "loss": 2.6808, "step": 150500 }, { "epoch": 0.75, "learning_rate": 4.962609057534817e-05, "loss": 2.6627, "step": 151000 }, { "epoch": 0.75, "learning_rate": 4.9624851988922085e-05, "loss": 2.6605, "step": 151500 }, { "epoch": 0.75, "learning_rate": 4.9623613402496e-05, "loss": 2.6853, "step": 152000 }, { "epoch": 0.76, "learning_rate": 4.962237481606992e-05, "loss": 2.6536, "step": 152500 }, { "epoch": 0.76, "learning_rate": 4.9621136229643836e-05, "loss": 2.6741, "step": 153000 }, { "epoch": 0.76, "learning_rate": 4.961989764321775e-05, "loss": 2.6637, "step": 153500 }, { "epoch": 0.76, "learning_rate": 4.9618666488310225e-05, "loss": 2.65, "step": 154000 }, { "epoch": 0.77, "learning_rate": 4.961742790188414e-05, "loss": 2.6598, "step": 154500 }, { "epoch": 0.77, "learning_rate": 4.961618931545806e-05, "loss": 2.6615, "step": 155000 }, { "epoch": 0.77, "learning_rate": 4.9614950729031976e-05, "loss": 2.6592, "step": 155500 }, { "epoch": 0.77, "learning_rate": 4.9613712142605886e-05, "loss": 2.6707, "step": 156000 }, { "epoch": 0.78, "learning_rate": 4.96124735561798e-05, "loss": 2.6291, "step": 156500 }, { "epoch": 0.78, "learning_rate": 4.961123496975372e-05, "loss": 2.6811, "step": 157000 }, { "epoch": 0.78, "learning_rate": 4.960999638332764e-05, "loss": 2.6534, "step": 157500 }, { "epoch": 0.78, "learning_rate": 4.9608757796901554e-05, "loss": 2.6623, "step": 158000 }, { "epoch": 0.79, "learning_rate": 4.960751921047547e-05, "loss": 2.6498, "step": 158500 }, { "epoch": 0.79, "learning_rate": 4.960628062404939e-05, "loss": 2.6617, "step": 159000 }, { "epoch": 0.79, "learning_rate": 4.9605042037623305e-05, "loss": 2.6545, "step": 159500 }, { "epoch": 0.79, "learning_rate": 4.960380345119722e-05, "loss": 2.6516, "step": 160000 }, { "epoch": 0.8, "learning_rate": 4.960256734194399e-05, "loss": 2.6722, "step": 160500 }, { "epoch": 0.8, "learning_rate": 4.960132875551791e-05, "loss": 2.6724, "step": 161000 }, { "epoch": 0.8, "learning_rate": 4.9600090169091824e-05, "loss": 2.6304, "step": 161500 }, { "epoch": 0.8, "learning_rate": 4.959885158266574e-05, "loss": 2.6624, "step": 162000 }, { "epoch": 0.81, "learning_rate": 4.959761299623966e-05, "loss": 2.6667, "step": 162500 }, { "epoch": 0.81, "learning_rate": 4.9596374409813575e-05, "loss": 2.6568, "step": 163000 }, { "epoch": 0.81, "learning_rate": 4.959513830056034e-05, "loss": 2.6541, "step": 163500 }, { "epoch": 0.81, "learning_rate": 4.9593899714134254e-05, "loss": 2.6696, "step": 164000 }, { "epoch": 0.81, "learning_rate": 4.959266112770817e-05, "loss": 2.6907, "step": 164500 }, { "epoch": 0.82, "learning_rate": 4.959142254128209e-05, "loss": 2.656, "step": 165000 }, { "epoch": 0.82, "learning_rate": 4.9590183954856005e-05, "loss": 2.6893, "step": 165500 }, { "epoch": 0.82, "learning_rate": 4.9588950322775625e-05, "loss": 2.6747, "step": 166000 }, { "epoch": 0.82, "learning_rate": 4.958771173634954e-05, "loss": 2.6528, "step": 166500 }, { "epoch": 0.83, "learning_rate": 4.958647314992346e-05, "loss": 2.6685, "step": 167000 }, { "epoch": 0.83, "learning_rate": 4.9585234563497376e-05, "loss": 2.6476, "step": 167500 }, { "epoch": 0.83, "learning_rate": 4.958399597707129e-05, "loss": 2.66, "step": 168000 }, { "epoch": 0.83, "learning_rate": 4.958275739064521e-05, "loss": 2.6539, "step": 168500 }, { "epoch": 0.84, "learning_rate": 4.958151880421913e-05, "loss": 2.6832, "step": 169000 }, { "epoch": 0.84, "learning_rate": 4.958028021779304e-05, "loss": 2.6409, "step": 169500 }, { "epoch": 0.84, "learning_rate": 4.9579044108539806e-05, "loss": 2.6694, "step": 170000 }, { "epoch": 0.84, "learning_rate": 4.957780552211372e-05, "loss": 2.6599, "step": 170500 }, { "epoch": 0.85, "learning_rate": 4.957656693568764e-05, "loss": 2.6693, "step": 171000 }, { "epoch": 0.85, "learning_rate": 4.957532834926156e-05, "loss": 2.6476, "step": 171500 }, { "epoch": 0.85, "learning_rate": 4.9574089762835474e-05, "loss": 2.6592, "step": 172000 }, { "epoch": 0.85, "learning_rate": 4.957285117640939e-05, "loss": 2.6631, "step": 172500 }, { "epoch": 0.86, "learning_rate": 4.957161258998331e-05, "loss": 2.6674, "step": 173000 }, { "epoch": 0.86, "learning_rate": 4.9570374003557224e-05, "loss": 2.6603, "step": 173500 }, { "epoch": 0.86, "learning_rate": 4.956913541713114e-05, "loss": 2.6461, "step": 174000 }, { "epoch": 0.86, "learning_rate": 4.956789930787791e-05, "loss": 2.6766, "step": 174500 }, { "epoch": 0.87, "learning_rate": 4.956666072145183e-05, "loss": 2.6398, "step": 175000 }, { "epoch": 0.87, "learning_rate": 4.956542461219859e-05, "loss": 2.6699, "step": 175500 }, { "epoch": 0.87, "learning_rate": 4.9564186025772506e-05, "loss": 2.6702, "step": 176000 }, { "epoch": 0.87, "learning_rate": 4.956294743934642e-05, "loss": 2.6357, "step": 176500 }, { "epoch": 0.88, "learning_rate": 4.956170885292034e-05, "loss": 2.6474, "step": 177000 }, { "epoch": 0.88, "learning_rate": 4.956047026649426e-05, "loss": 2.6496, "step": 177500 }, { "epoch": 0.88, "learning_rate": 4.9559231680068174e-05, "loss": 2.664, "step": 178000 }, { "epoch": 0.88, "learning_rate": 4.955799557081494e-05, "loss": 2.6964, "step": 178500 }, { "epoch": 0.89, "learning_rate": 4.955675698438886e-05, "loss": 2.6707, "step": 179000 }, { "epoch": 0.89, "learning_rate": 4.9555518397962776e-05, "loss": 2.654, "step": 179500 }, { "epoch": 0.89, "learning_rate": 4.955427981153669e-05, "loss": 2.6789, "step": 180000 }, { "epoch": 0.89, "learning_rate": 4.9553043702283455e-05, "loss": 2.6482, "step": 180500 }, { "epoch": 0.9, "learning_rate": 4.955180511585737e-05, "loss": 2.6471, "step": 181000 }, { "epoch": 0.9, "learning_rate": 4.955056652943129e-05, "loss": 2.6439, "step": 181500 }, { "epoch": 0.9, "learning_rate": 4.9549327943005206e-05, "loss": 2.6462, "step": 182000 }, { "epoch": 0.9, "learning_rate": 4.954808935657912e-05, "loss": 2.6688, "step": 182500 }, { "epoch": 0.91, "learning_rate": 4.954685077015304e-05, "loss": 2.6614, "step": 183000 }, { "epoch": 0.91, "learning_rate": 4.954561218372696e-05, "loss": 2.673, "step": 183500 }, { "epoch": 0.91, "learning_rate": 4.9544373597300874e-05, "loss": 2.6636, "step": 184000 }, { "epoch": 0.91, "learning_rate": 4.954313748804764e-05, "loss": 2.6533, "step": 184500 }, { "epoch": 0.92, "learning_rate": 4.954189890162156e-05, "loss": 2.6585, "step": 185000 }, { "epoch": 0.92, "learning_rate": 4.9540660315195477e-05, "loss": 2.6397, "step": 185500 }, { "epoch": 0.92, "learning_rate": 4.9539421728769394e-05, "loss": 2.6475, "step": 186000 }, { "epoch": 0.92, "learning_rate": 4.953818314234331e-05, "loss": 2.658, "step": 186500 }, { "epoch": 0.93, "learning_rate": 4.953694455591723e-05, "loss": 2.6754, "step": 187000 }, { "epoch": 0.93, "learning_rate": 4.9535705969491144e-05, "loss": 2.6488, "step": 187500 }, { "epoch": 0.93, "learning_rate": 4.953446738306506e-05, "loss": 2.6816, "step": 188000 }, { "epoch": 0.93, "learning_rate": 4.953322879663898e-05, "loss": 2.6558, "step": 188500 }, { "epoch": 0.94, "learning_rate": 4.953199268738574e-05, "loss": 2.6288, "step": 189000 }, { "epoch": 0.94, "learning_rate": 4.953075410095966e-05, "loss": 2.6406, "step": 189500 }, { "epoch": 0.94, "learning_rate": 4.9529517991706426e-05, "loss": 2.6695, "step": 190000 }, { "epoch": 0.94, "learning_rate": 4.952827940528034e-05, "loss": 2.6565, "step": 190500 }, { "epoch": 0.95, "learning_rate": 4.952704081885426e-05, "loss": 2.6577, "step": 191000 }, { "epoch": 0.95, "learning_rate": 4.952580470960103e-05, "loss": 2.6499, "step": 191500 }, { "epoch": 0.95, "learning_rate": 4.95245686003478e-05, "loss": 2.6423, "step": 192000 }, { "epoch": 0.95, "learning_rate": 4.9523330013921714e-05, "loss": 2.6735, "step": 192500 }, { "epoch": 0.96, "learning_rate": 4.952209142749563e-05, "loss": 2.6441, "step": 193000 }, { "epoch": 0.96, "learning_rate": 4.952085284106955e-05, "loss": 2.6782, "step": 193500 }, { "epoch": 0.96, "learning_rate": 4.9519614254643465e-05, "loss": 2.6542, "step": 194000 }, { "epoch": 0.96, "learning_rate": 4.951837566821738e-05, "loss": 2.6564, "step": 194500 }, { "epoch": 0.97, "learning_rate": 4.95171370817913e-05, "loss": 2.6682, "step": 195000 }, { "epoch": 0.97, "learning_rate": 4.951589849536521e-05, "loss": 2.6803, "step": 195500 }, { "epoch": 0.97, "learning_rate": 4.9514659908939126e-05, "loss": 2.6768, "step": 196000 }, { "epoch": 0.97, "learning_rate": 4.951342132251304e-05, "loss": 2.652, "step": 196500 }, { "epoch": 0.98, "learning_rate": 4.951218273608696e-05, "loss": 2.6467, "step": 197000 }, { "epoch": 0.98, "learning_rate": 4.951094414966088e-05, "loss": 2.631, "step": 197500 }, { "epoch": 0.98, "learning_rate": 4.9509705563234794e-05, "loss": 2.6418, "step": 198000 }, { "epoch": 0.98, "learning_rate": 4.950846697680871e-05, "loss": 2.6617, "step": 198500 }, { "epoch": 0.99, "learning_rate": 4.950722839038263e-05, "loss": 2.6553, "step": 199000 }, { "epoch": 0.99, "learning_rate": 4.9505989803956545e-05, "loss": 2.6369, "step": 199500 }, { "epoch": 0.99, "learning_rate": 4.9504753694703307e-05, "loss": 2.6282, "step": 200000 }, { "epoch": 0.99, "learning_rate": 4.9503515108277224e-05, "loss": 2.65, "step": 200500 }, { "epoch": 1.0, "learning_rate": 4.950227652185114e-05, "loss": 2.6651, "step": 201000 }, { "epoch": 1.0, "learning_rate": 4.950103793542506e-05, "loss": 2.6763, "step": 201500 }, { "epoch": 1.0, "eval_accuracy": 0.6236883143982033, "eval_accuracy_mlm": 0.5761070598072291, "eval_accuracy_nsp": 0.8479990900497727, "eval_loss": 2.567748546600342, "eval_runtime": 145.8951, "eval_samples_per_second": 1747.55, "eval_steps_per_second": 72.819, "step": 201843 }, { "epoch": 1.0, "learning_rate": 4.9499801826171826e-05, "loss": 2.6422, "step": 202000 }, { "epoch": 1.0, "learning_rate": 4.949856323974574e-05, "loss": 2.6314, "step": 202500 }, { "epoch": 1.01, "learning_rate": 4.949732465331966e-05, "loss": 2.6069, "step": 203000 }, { "epoch": 1.01, "learning_rate": 4.949608606689358e-05, "loss": 2.621, "step": 203500 }, { "epoch": 1.01, "learning_rate": 4.9494847480467494e-05, "loss": 2.6315, "step": 204000 }, { "epoch": 1.01, "learning_rate": 4.949361137121426e-05, "loss": 2.6192, "step": 204500 }, { "epoch": 1.02, "learning_rate": 4.949237278478818e-05, "loss": 2.6241, "step": 205000 }, { "epoch": 1.02, "learning_rate": 4.949113667553495e-05, "loss": 2.6318, "step": 205500 }, { "epoch": 1.02, "learning_rate": 4.9489898089108865e-05, "loss": 2.6222, "step": 206000 }, { "epoch": 1.02, "learning_rate": 4.948865950268278e-05, "loss": 2.6478, "step": 206500 }, { "epoch": 1.03, "learning_rate": 4.94874209162567e-05, "loss": 2.6204, "step": 207000 }, { "epoch": 1.03, "learning_rate": 4.9486182329830616e-05, "loss": 2.6316, "step": 207500 }, { "epoch": 1.03, "learning_rate": 4.9484946220577385e-05, "loss": 2.6242, "step": 208000 }, { "epoch": 1.03, "learning_rate": 4.94837076341513e-05, "loss": 2.5915, "step": 208500 }, { "epoch": 1.04, "learning_rate": 4.948246904772522e-05, "loss": 2.6172, "step": 209000 }, { "epoch": 1.04, "learning_rate": 4.9481230461299136e-05, "loss": 2.6155, "step": 209500 }, { "epoch": 1.04, "learning_rate": 4.947999187487305e-05, "loss": 2.6064, "step": 210000 }, { "epoch": 1.04, "learning_rate": 4.947875328844697e-05, "loss": 2.6333, "step": 210500 }, { "epoch": 1.05, "learning_rate": 4.947751470202088e-05, "loss": 2.6295, "step": 211000 }, { "epoch": 1.05, "learning_rate": 4.94762761155948e-05, "loss": 2.6262, "step": 211500 }, { "epoch": 1.05, "learning_rate": 4.9475037529168714e-05, "loss": 2.598, "step": 212000 }, { "epoch": 1.05, "learning_rate": 4.947379894274263e-05, "loss": 2.6367, "step": 212500 }, { "epoch": 1.06, "learning_rate": 4.947256035631655e-05, "loss": 2.6183, "step": 213000 }, { "epoch": 1.06, "learning_rate": 4.947132176989046e-05, "loss": 2.6535, "step": 213500 }, { "epoch": 1.06, "learning_rate": 4.9470083183464375e-05, "loss": 2.6075, "step": 214000 }, { "epoch": 1.06, "learning_rate": 4.946884707421114e-05, "loss": 2.5924, "step": 214500 }, { "epoch": 1.07, "learning_rate": 4.946760848778506e-05, "loss": 2.6268, "step": 215000 }, { "epoch": 1.07, "learning_rate": 4.946636990135898e-05, "loss": 2.6207, "step": 215500 }, { "epoch": 1.07, "learning_rate": 4.9465131314932894e-05, "loss": 2.5849, "step": 216000 }, { "epoch": 1.07, "learning_rate": 4.946389520567967e-05, "loss": 2.6141, "step": 216500 }, { "epoch": 1.08, "learning_rate": 4.946265661925359e-05, "loss": 2.6114, "step": 217000 }, { "epoch": 1.08, "learning_rate": 4.94614180328275e-05, "loss": 2.6018, "step": 217500 }, { "epoch": 1.08, "learning_rate": 4.9460179446401414e-05, "loss": 2.6197, "step": 218000 }, { "epoch": 1.08, "learning_rate": 4.945894085997533e-05, "loss": 2.6252, "step": 218500 }, { "epoch": 1.09, "learning_rate": 4.945770227354925e-05, "loss": 2.6027, "step": 219000 }, { "epoch": 1.09, "learning_rate": 4.9456463687123165e-05, "loss": 2.6315, "step": 219500 }, { "epoch": 1.09, "learning_rate": 4.9455225100697075e-05, "loss": 2.6094, "step": 220000 }, { "epoch": 1.09, "learning_rate": 4.9453988991443843e-05, "loss": 2.6246, "step": 220500 }, { "epoch": 1.09, "learning_rate": 4.945275040501776e-05, "loss": 2.6303, "step": 221000 }, { "epoch": 1.1, "learning_rate": 4.945151181859168e-05, "loss": 2.6081, "step": 221500 }, { "epoch": 1.1, "learning_rate": 4.9450273232165594e-05, "loss": 2.6129, "step": 222000 }, { "epoch": 1.1, "learning_rate": 4.944903464573951e-05, "loss": 2.6229, "step": 222500 }, { "epoch": 1.1, "learning_rate": 4.944780101365913e-05, "loss": 2.594, "step": 223000 }, { "epoch": 1.11, "learning_rate": 4.944656242723305e-05, "loss": 2.621, "step": 223500 }, { "epoch": 1.11, "learning_rate": 4.9445323840806966e-05, "loss": 2.6097, "step": 224000 }, { "epoch": 1.11, "learning_rate": 4.944408525438088e-05, "loss": 2.6152, "step": 224500 }, { "epoch": 1.11, "learning_rate": 4.94428466679548e-05, "loss": 2.6206, "step": 225000 }, { "epoch": 1.12, "learning_rate": 4.944161055870157e-05, "loss": 2.6391, "step": 225500 }, { "epoch": 1.12, "learning_rate": 4.9440371972275485e-05, "loss": 2.6487, "step": 226000 }, { "epoch": 1.12, "learning_rate": 4.94391333858494e-05, "loss": 2.5981, "step": 226500 }, { "epoch": 1.12, "learning_rate": 4.943789479942332e-05, "loss": 2.6175, "step": 227000 }, { "epoch": 1.13, "learning_rate": 4.9436656212997236e-05, "loss": 2.6332, "step": 227500 }, { "epoch": 1.13, "learning_rate": 4.9435420103744e-05, "loss": 2.6234, "step": 228000 }, { "epoch": 1.13, "learning_rate": 4.9434181517317915e-05, "loss": 2.6201, "step": 228500 }, { "epoch": 1.13, "learning_rate": 4.943294293089183e-05, "loss": 2.6235, "step": 229000 }, { "epoch": 1.14, "learning_rate": 4.943170434446575e-05, "loss": 2.6339, "step": 229500 }, { "epoch": 1.14, "learning_rate": 4.9430465758039666e-05, "loss": 2.5977, "step": 230000 }, { "epoch": 1.14, "learning_rate": 4.942922717161358e-05, "loss": 2.612, "step": 230500 }, { "epoch": 1.14, "learning_rate": 4.942799106236035e-05, "loss": 2.6263, "step": 231000 }, { "epoch": 1.15, "learning_rate": 4.942675247593427e-05, "loss": 2.6239, "step": 231500 }, { "epoch": 1.15, "learning_rate": 4.9425513889508185e-05, "loss": 2.6085, "step": 232000 }, { "epoch": 1.15, "learning_rate": 4.94242753030821e-05, "loss": 2.6082, "step": 232500 }, { "epoch": 1.15, "learning_rate": 4.942303671665602e-05, "loss": 2.6257, "step": 233000 }, { "epoch": 1.16, "learning_rate": 4.9421798130229936e-05, "loss": 2.6365, "step": 233500 }, { "epoch": 1.16, "learning_rate": 4.942055954380385e-05, "loss": 2.6226, "step": 234000 }, { "epoch": 1.16, "learning_rate": 4.9419323434550615e-05, "loss": 2.6333, "step": 234500 }, { "epoch": 1.16, "learning_rate": 4.941808484812453e-05, "loss": 2.6214, "step": 235000 }, { "epoch": 1.17, "learning_rate": 4.941684626169845e-05, "loss": 2.614, "step": 235500 }, { "epoch": 1.17, "learning_rate": 4.941561015244522e-05, "loss": 2.5987, "step": 236000 }, { "epoch": 1.17, "learning_rate": 4.9414371566019135e-05, "loss": 2.6155, "step": 236500 }, { "epoch": 1.17, "learning_rate": 4.941313297959305e-05, "loss": 2.6424, "step": 237000 }, { "epoch": 1.18, "learning_rate": 4.941189439316697e-05, "loss": 2.6057, "step": 237500 }, { "epoch": 1.18, "learning_rate": 4.9410655806740886e-05, "loss": 2.6237, "step": 238000 }, { "epoch": 1.18, "learning_rate": 4.94094172203148e-05, "loss": 2.6184, "step": 238500 }, { "epoch": 1.18, "learning_rate": 4.940818111106157e-05, "loss": 2.6616, "step": 239000 }, { "epoch": 1.19, "learning_rate": 4.940694252463549e-05, "loss": 2.6191, "step": 239500 }, { "epoch": 1.19, "learning_rate": 4.9405703938209405e-05, "loss": 2.5849, "step": 240000 }, { "epoch": 1.19, "learning_rate": 4.940446535178332e-05, "loss": 2.6178, "step": 240500 }, { "epoch": 1.19, "learning_rate": 4.940322676535723e-05, "loss": 2.6466, "step": 241000 }, { "epoch": 1.2, "learning_rate": 4.940198817893115e-05, "loss": 2.6025, "step": 241500 }, { "epoch": 1.2, "learning_rate": 4.940075206967792e-05, "loss": 2.5923, "step": 242000 }, { "epoch": 1.2, "learning_rate": 4.9399513483251835e-05, "loss": 2.6381, "step": 242500 }, { "epoch": 1.2, "learning_rate": 4.939827489682575e-05, "loss": 2.5819, "step": 243000 }, { "epoch": 1.21, "learning_rate": 4.939703631039967e-05, "loss": 2.6115, "step": 243500 }, { "epoch": 1.21, "learning_rate": 4.9395797723973586e-05, "loss": 2.6289, "step": 244000 }, { "epoch": 1.21, "learning_rate": 4.93945591375475e-05, "loss": 2.5913, "step": 244500 }, { "epoch": 1.21, "learning_rate": 4.939332302829427e-05, "loss": 2.6194, "step": 245000 }, { "epoch": 1.22, "learning_rate": 4.939208444186819e-05, "loss": 2.6232, "step": 245500 }, { "epoch": 1.22, "learning_rate": 4.9390845855442105e-05, "loss": 2.6176, "step": 246000 }, { "epoch": 1.22, "learning_rate": 4.938960726901602e-05, "loss": 2.6158, "step": 246500 }, { "epoch": 1.22, "learning_rate": 4.938836868258994e-05, "loss": 2.6155, "step": 247000 }, { "epoch": 1.23, "learning_rate": 4.9387130096163856e-05, "loss": 2.6407, "step": 247500 }, { "epoch": 1.23, "learning_rate": 4.9385891509737766e-05, "loss": 2.6334, "step": 248000 }, { "epoch": 1.23, "learning_rate": 4.938465292331168e-05, "loss": 2.62, "step": 248500 }, { "epoch": 1.23, "learning_rate": 4.93834143368856e-05, "loss": 2.6266, "step": 249000 }, { "epoch": 1.24, "learning_rate": 4.938217575045952e-05, "loss": 2.612, "step": 249500 }, { "epoch": 1.24, "learning_rate": 4.9380937164033434e-05, "loss": 2.6358, "step": 250000 }, { "epoch": 1.24, "learning_rate": 4.937969857760735e-05, "loss": 2.6289, "step": 250500 }, { "epoch": 1.24, "learning_rate": 4.937846246835412e-05, "loss": 2.6102, "step": 251000 }, { "epoch": 1.25, "learning_rate": 4.9377223881928037e-05, "loss": 2.6004, "step": 251500 }, { "epoch": 1.25, "learning_rate": 4.9375985295501954e-05, "loss": 2.607, "step": 252000 }, { "epoch": 1.25, "learning_rate": 4.937475166342157e-05, "loss": 2.6224, "step": 252500 }, { "epoch": 1.25, "learning_rate": 4.9373513076995484e-05, "loss": 2.6319, "step": 253000 }, { "epoch": 1.26, "learning_rate": 4.93722744905694e-05, "loss": 2.6201, "step": 253500 }, { "epoch": 1.26, "learning_rate": 4.937103590414332e-05, "loss": 2.605, "step": 254000 }, { "epoch": 1.26, "learning_rate": 4.9369797317717235e-05, "loss": 2.6106, "step": 254500 }, { "epoch": 1.26, "learning_rate": 4.936855873129115e-05, "loss": 2.5989, "step": 255000 }, { "epoch": 1.27, "learning_rate": 4.936732014486507e-05, "loss": 2.6324, "step": 255500 }, { "epoch": 1.27, "learning_rate": 4.9366081558438986e-05, "loss": 2.615, "step": 256000 }, { "epoch": 1.27, "learning_rate": 4.9364845449185755e-05, "loss": 2.6136, "step": 256500 }, { "epoch": 1.27, "learning_rate": 4.936360686275967e-05, "loss": 2.6346, "step": 257000 }, { "epoch": 1.28, "learning_rate": 4.936236827633359e-05, "loss": 2.6109, "step": 257500 }, { "epoch": 1.28, "learning_rate": 4.9361129689907506e-05, "loss": 2.629, "step": 258000 }, { "epoch": 1.28, "learning_rate": 4.935989110348142e-05, "loss": 2.6382, "step": 258500 }, { "epoch": 1.28, "learning_rate": 4.935865251705534e-05, "loss": 2.652, "step": 259000 }, { "epoch": 1.29, "learning_rate": 4.9357413930629256e-05, "loss": 2.5949, "step": 259500 }, { "epoch": 1.29, "learning_rate": 4.935617534420317e-05, "loss": 2.6228, "step": 260000 }, { "epoch": 1.29, "learning_rate": 4.935493675777709e-05, "loss": 2.5946, "step": 260500 }, { "epoch": 1.29, "learning_rate": 4.935369817135101e-05, "loss": 2.6195, "step": 261000 }, { "epoch": 1.3, "learning_rate": 4.935245958492492e-05, "loss": 2.6184, "step": 261500 }, { "epoch": 1.3, "learning_rate": 4.9351223475671686e-05, "loss": 2.6138, "step": 262000 }, { "epoch": 1.3, "learning_rate": 4.93499848892456e-05, "loss": 2.5975, "step": 262500 }, { "epoch": 1.3, "learning_rate": 4.934874630281952e-05, "loss": 2.6182, "step": 263000 }, { "epoch": 1.31, "learning_rate": 4.934750771639344e-05, "loss": 2.6013, "step": 263500 }, { "epoch": 1.31, "learning_rate": 4.9346269129967354e-05, "loss": 2.6424, "step": 264000 }, { "epoch": 1.31, "learning_rate": 4.934503302071412e-05, "loss": 2.6397, "step": 264500 }, { "epoch": 1.31, "learning_rate": 4.934379443428804e-05, "loss": 2.6183, "step": 265000 }, { "epoch": 1.32, "learning_rate": 4.9342555847861956e-05, "loss": 2.6236, "step": 265500 }, { "epoch": 1.32, "learning_rate": 4.934131726143587e-05, "loss": 2.6169, "step": 266000 }, { "epoch": 1.32, "learning_rate": 4.934007867500979e-05, "loss": 2.6104, "step": 266500 }, { "epoch": 1.32, "learning_rate": 4.933884008858371e-05, "loss": 2.6056, "step": 267000 }, { "epoch": 1.33, "learning_rate": 4.9337601502157624e-05, "loss": 2.6044, "step": 267500 }, { "epoch": 1.33, "learning_rate": 4.9336362915731534e-05, "loss": 2.5988, "step": 268000 }, { "epoch": 1.33, "learning_rate": 4.933512432930545e-05, "loss": 2.64, "step": 268500 }, { "epoch": 1.33, "learning_rate": 4.933388822005222e-05, "loss": 2.6459, "step": 269000 }, { "epoch": 1.34, "learning_rate": 4.933264963362614e-05, "loss": 2.6069, "step": 269500 }, { "epoch": 1.34, "learning_rate": 4.9331413524372906e-05, "loss": 2.6332, "step": 270000 }, { "epoch": 1.34, "learning_rate": 4.933017493794682e-05, "loss": 2.6211, "step": 270500 }, { "epoch": 1.34, "learning_rate": 4.932893635152074e-05, "loss": 2.637, "step": 271000 }, { "epoch": 1.35, "learning_rate": 4.9327697765094657e-05, "loss": 2.6399, "step": 271500 }, { "epoch": 1.35, "learning_rate": 4.9326459178668573e-05, "loss": 2.6234, "step": 272000 }, { "epoch": 1.35, "learning_rate": 4.9325223069415336e-05, "loss": 2.6221, "step": 272500 }, { "epoch": 1.35, "learning_rate": 4.932398448298925e-05, "loss": 2.6133, "step": 273000 }, { "epoch": 1.36, "learning_rate": 4.932274589656317e-05, "loss": 2.614, "step": 273500 }, { "epoch": 1.36, "learning_rate": 4.9321507310137086e-05, "loss": 2.6026, "step": 274000 }, { "epoch": 1.36, "learning_rate": 4.9320268723711e-05, "loss": 2.5999, "step": 274500 }, { "epoch": 1.36, "learning_rate": 4.931903013728492e-05, "loss": 2.6374, "step": 275000 }, { "epoch": 1.36, "learning_rate": 4.931779155085884e-05, "loss": 2.6182, "step": 275500 }, { "epoch": 1.37, "learning_rate": 4.9316552964432754e-05, "loss": 2.6048, "step": 276000 }, { "epoch": 1.37, "learning_rate": 4.931531685517952e-05, "loss": 2.6308, "step": 276500 }, { "epoch": 1.37, "learning_rate": 4.931408074592629e-05, "loss": 2.6062, "step": 277000 }, { "epoch": 1.37, "learning_rate": 4.931284215950021e-05, "loss": 2.6164, "step": 277500 }, { "epoch": 1.38, "learning_rate": 4.9311603573074126e-05, "loss": 2.6278, "step": 278000 }, { "epoch": 1.38, "learning_rate": 4.9310364986648036e-05, "loss": 2.6067, "step": 278500 }, { "epoch": 1.38, "learning_rate": 4.930912640022195e-05, "loss": 2.6246, "step": 279000 }, { "epoch": 1.38, "learning_rate": 4.930788781379587e-05, "loss": 2.6262, "step": 279500 }, { "epoch": 1.39, "learning_rate": 4.9306649227369786e-05, "loss": 2.6116, "step": 280000 }, { "epoch": 1.39, "learning_rate": 4.93054106409437e-05, "loss": 2.6325, "step": 280500 }, { "epoch": 1.39, "learning_rate": 4.930417453169047e-05, "loss": 2.5979, "step": 281000 }, { "epoch": 1.39, "learning_rate": 4.930293594526439e-05, "loss": 2.6106, "step": 281500 }, { "epoch": 1.4, "learning_rate": 4.9301699836011165e-05, "loss": 2.6094, "step": 282000 }, { "epoch": 1.4, "learning_rate": 4.9300461249585075e-05, "loss": 2.6065, "step": 282500 }, { "epoch": 1.4, "learning_rate": 4.929922266315899e-05, "loss": 2.5977, "step": 283000 }, { "epoch": 1.4, "learning_rate": 4.929798407673291e-05, "loss": 2.6309, "step": 283500 }, { "epoch": 1.41, "learning_rate": 4.9296745490306826e-05, "loss": 2.6134, "step": 284000 }, { "epoch": 1.41, "learning_rate": 4.929550690388074e-05, "loss": 2.6412, "step": 284500 }, { "epoch": 1.41, "learning_rate": 4.929426831745465e-05, "loss": 2.6118, "step": 285000 }, { "epoch": 1.41, "learning_rate": 4.929302973102857e-05, "loss": 2.5835, "step": 285500 }, { "epoch": 1.42, "learning_rate": 4.9291791144602487e-05, "loss": 2.5993, "step": 286000 }, { "epoch": 1.42, "learning_rate": 4.9290555035349255e-05, "loss": 2.614, "step": 286500 }, { "epoch": 1.42, "learning_rate": 4.928931892609603e-05, "loss": 2.5988, "step": 287000 }, { "epoch": 1.42, "learning_rate": 4.928808033966995e-05, "loss": 2.6119, "step": 287500 }, { "epoch": 1.43, "learning_rate": 4.9286841753243865e-05, "loss": 2.6121, "step": 288000 }, { "epoch": 1.43, "learning_rate": 4.928560316681778e-05, "loss": 2.6205, "step": 288500 }, { "epoch": 1.43, "learning_rate": 4.92843645803917e-05, "loss": 2.6136, "step": 289000 }, { "epoch": 1.43, "learning_rate": 4.928312847113846e-05, "loss": 2.6125, "step": 289500 }, { "epoch": 1.44, "learning_rate": 4.928188988471238e-05, "loss": 2.6199, "step": 290000 }, { "epoch": 1.44, "learning_rate": 4.9280651298286295e-05, "loss": 2.612, "step": 290500 }, { "epoch": 1.44, "learning_rate": 4.927941271186021e-05, "loss": 2.5957, "step": 291000 }, { "epoch": 1.44, "learning_rate": 4.927817412543413e-05, "loss": 2.5938, "step": 291500 }, { "epoch": 1.45, "learning_rate": 4.9276935539008045e-05, "loss": 2.6138, "step": 292000 }, { "epoch": 1.45, "learning_rate": 4.9275699429754814e-05, "loss": 2.6162, "step": 292500 }, { "epoch": 1.45, "learning_rate": 4.927446084332873e-05, "loss": 2.6214, "step": 293000 }, { "epoch": 1.45, "learning_rate": 4.927322225690265e-05, "loss": 2.6009, "step": 293500 }, { "epoch": 1.46, "learning_rate": 4.9271983670476565e-05, "loss": 2.6031, "step": 294000 }, { "epoch": 1.46, "learning_rate": 4.927074508405048e-05, "loss": 2.5915, "step": 294500 }, { "epoch": 1.46, "learning_rate": 4.92695064976244e-05, "loss": 2.6289, "step": 295000 }, { "epoch": 1.46, "learning_rate": 4.9268267911198316e-05, "loss": 2.6141, "step": 295500 }, { "epoch": 1.47, "learning_rate": 4.9267029324772226e-05, "loss": 2.6233, "step": 296000 }, { "epoch": 1.47, "learning_rate": 4.926579073834614e-05, "loss": 2.6347, "step": 296500 }, { "epoch": 1.47, "learning_rate": 4.926455215192006e-05, "loss": 2.619, "step": 297000 }, { "epoch": 1.47, "learning_rate": 4.926331604266683e-05, "loss": 2.5973, "step": 297500 }, { "epoch": 1.48, "learning_rate": 4.9262077456240745e-05, "loss": 2.5999, "step": 298000 }, { "epoch": 1.48, "learning_rate": 4.926083886981466e-05, "loss": 2.6199, "step": 298500 }, { "epoch": 1.48, "learning_rate": 4.925960028338857e-05, "loss": 2.6028, "step": 299000 }, { "epoch": 1.48, "learning_rate": 4.925836417413535e-05, "loss": 2.5981, "step": 299500 }, { "epoch": 1.49, "learning_rate": 4.925712806488211e-05, "loss": 2.6159, "step": 300000 }, { "epoch": 1.49, "learning_rate": 4.925588947845603e-05, "loss": 2.6048, "step": 300500 }, { "epoch": 1.49, "learning_rate": 4.9254650892029944e-05, "loss": 2.5982, "step": 301000 }, { "epoch": 1.49, "learning_rate": 4.925341230560386e-05, "loss": 2.6199, "step": 301500 }, { "epoch": 1.5, "learning_rate": 4.925217619635063e-05, "loss": 2.6351, "step": 302000 }, { "epoch": 1.5, "learning_rate": 4.925093760992455e-05, "loss": 2.6151, "step": 302500 }, { "epoch": 1.5, "learning_rate": 4.9249701500671315e-05, "loss": 2.5925, "step": 303000 }, { "epoch": 1.5, "learning_rate": 4.924846291424523e-05, "loss": 2.5977, "step": 303500 }, { "epoch": 1.51, "learning_rate": 4.924722432781915e-05, "loss": 2.6201, "step": 304000 }, { "epoch": 1.51, "learning_rate": 4.9245985741393066e-05, "loss": 2.6132, "step": 304500 }, { "epoch": 1.51, "learning_rate": 4.924474715496698e-05, "loss": 2.6258, "step": 305000 }, { "epoch": 1.51, "learning_rate": 4.92435085685409e-05, "loss": 2.6223, "step": 305500 }, { "epoch": 1.52, "learning_rate": 4.924226998211482e-05, "loss": 2.6127, "step": 306000 }, { "epoch": 1.52, "learning_rate": 4.924103139568873e-05, "loss": 2.6071, "step": 306500 }, { "epoch": 1.52, "learning_rate": 4.9239795286435496e-05, "loss": 2.6147, "step": 307000 }, { "epoch": 1.52, "learning_rate": 4.923855670000941e-05, "loss": 2.6054, "step": 307500 }, { "epoch": 1.53, "learning_rate": 4.923731811358333e-05, "loss": 2.597, "step": 308000 }, { "epoch": 1.53, "learning_rate": 4.9236082004330105e-05, "loss": 2.599, "step": 308500 }, { "epoch": 1.53, "learning_rate": 4.9234843417904016e-05, "loss": 2.5992, "step": 309000 }, { "epoch": 1.53, "learning_rate": 4.923360483147793e-05, "loss": 2.6026, "step": 309500 }, { "epoch": 1.54, "learning_rate": 4.923236624505185e-05, "loss": 2.5938, "step": 310000 }, { "epoch": 1.54, "learning_rate": 4.9231127658625766e-05, "loss": 2.6023, "step": 310500 }, { "epoch": 1.54, "learning_rate": 4.922988907219968e-05, "loss": 2.586, "step": 311000 }, { "epoch": 1.54, "learning_rate": 4.92286504857736e-05, "loss": 2.6111, "step": 311500 }, { "epoch": 1.55, "learning_rate": 4.922741189934752e-05, "loss": 2.6103, "step": 312000 }, { "epoch": 1.55, "learning_rate": 4.922617579009428e-05, "loss": 2.598, "step": 312500 }, { "epoch": 1.55, "learning_rate": 4.9224937203668196e-05, "loss": 2.5982, "step": 313000 }, { "epoch": 1.55, "learning_rate": 4.922369861724211e-05, "loss": 2.6226, "step": 313500 }, { "epoch": 1.56, "learning_rate": 4.922246250798889e-05, "loss": 2.6292, "step": 314000 }, { "epoch": 1.56, "learning_rate": 4.9221223921562806e-05, "loss": 2.6045, "step": 314500 }, { "epoch": 1.56, "learning_rate": 4.921998533513672e-05, "loss": 2.596, "step": 315000 }, { "epoch": 1.56, "learning_rate": 4.921874674871063e-05, "loss": 2.5956, "step": 315500 }, { "epoch": 1.57, "learning_rate": 4.921750816228455e-05, "loss": 2.5923, "step": 316000 }, { "epoch": 1.57, "learning_rate": 4.9216269575858467e-05, "loss": 2.5815, "step": 316500 }, { "epoch": 1.57, "learning_rate": 4.9215030989432383e-05, "loss": 2.6302, "step": 317000 }, { "epoch": 1.57, "learning_rate": 4.92137924030063e-05, "loss": 2.5967, "step": 317500 }, { "epoch": 1.58, "learning_rate": 4.921255381658022e-05, "loss": 2.611, "step": 318000 }, { "epoch": 1.58, "learning_rate": 4.9211315230154134e-05, "loss": 2.5912, "step": 318500 }, { "epoch": 1.58, "learning_rate": 4.921007664372805e-05, "loss": 2.6341, "step": 319000 }, { "epoch": 1.58, "learning_rate": 4.920883805730197e-05, "loss": 2.5869, "step": 319500 }, { "epoch": 1.59, "learning_rate": 4.920759947087588e-05, "loss": 2.648, "step": 320000 }, { "epoch": 1.59, "learning_rate": 4.9206360884449795e-05, "loss": 2.6136, "step": 320500 }, { "epoch": 1.59, "learning_rate": 4.9205124775196564e-05, "loss": 2.5998, "step": 321000 }, { "epoch": 1.59, "learning_rate": 4.920388618877048e-05, "loss": 2.5955, "step": 321500 }, { "epoch": 1.6, "learning_rate": 4.920265007951725e-05, "loss": 2.5852, "step": 322000 }, { "epoch": 1.6, "learning_rate": 4.920141149309117e-05, "loss": 2.6069, "step": 322500 }, { "epoch": 1.6, "learning_rate": 4.9200172906665084e-05, "loss": 2.5923, "step": 323000 }, { "epoch": 1.6, "learning_rate": 4.9198934320239e-05, "loss": 2.6257, "step": 323500 }, { "epoch": 1.61, "learning_rate": 4.919769573381292e-05, "loss": 2.6118, "step": 324000 }, { "epoch": 1.61, "learning_rate": 4.9196459624559686e-05, "loss": 2.6192, "step": 324500 }, { "epoch": 1.61, "learning_rate": 4.9195221038133596e-05, "loss": 2.6028, "step": 325000 }, { "epoch": 1.61, "learning_rate": 4.919398245170751e-05, "loss": 2.6064, "step": 325500 }, { "epoch": 1.62, "learning_rate": 4.919274386528143e-05, "loss": 2.6158, "step": 326000 }, { "epoch": 1.62, "learning_rate": 4.919150527885535e-05, "loss": 2.5847, "step": 326500 }, { "epoch": 1.62, "learning_rate": 4.9190266692429264e-05, "loss": 2.5866, "step": 327000 }, { "epoch": 1.62, "learning_rate": 4.918902810600318e-05, "loss": 2.6172, "step": 327500 }, { "epoch": 1.63, "learning_rate": 4.91877895195771e-05, "loss": 2.6314, "step": 328000 }, { "epoch": 1.63, "learning_rate": 4.918655341032387e-05, "loss": 2.6196, "step": 328500 }, { "epoch": 1.63, "learning_rate": 4.9185317301070636e-05, "loss": 2.6346, "step": 329000 }, { "epoch": 1.63, "learning_rate": 4.918407871464455e-05, "loss": 2.6118, "step": 329500 }, { "epoch": 1.63, "learning_rate": 4.918284012821847e-05, "loss": 2.5716, "step": 330000 }, { "epoch": 1.64, "learning_rate": 4.9181601541792386e-05, "loss": 2.626, "step": 330500 }, { "epoch": 1.64, "learning_rate": 4.9180362955366297e-05, "loss": 2.6039, "step": 331000 }, { "epoch": 1.64, "learning_rate": 4.9179124368940213e-05, "loss": 2.5957, "step": 331500 }, { "epoch": 1.64, "learning_rate": 4.917788825968699e-05, "loss": 2.6115, "step": 332000 }, { "epoch": 1.65, "learning_rate": 4.9176649673260906e-05, "loss": 2.6013, "step": 332500 }, { "epoch": 1.65, "learning_rate": 4.917541108683482e-05, "loss": 2.6131, "step": 333000 }, { "epoch": 1.65, "learning_rate": 4.917417250040874e-05, "loss": 2.5955, "step": 333500 }, { "epoch": 1.65, "learning_rate": 4.917293391398265e-05, "loss": 2.615, "step": 334000 }, { "epoch": 1.66, "learning_rate": 4.917169780472942e-05, "loss": 2.5883, "step": 334500 }, { "epoch": 1.66, "learning_rate": 4.9170459218303336e-05, "loss": 2.6009, "step": 335000 }, { "epoch": 1.66, "learning_rate": 4.916922063187725e-05, "loss": 2.6009, "step": 335500 }, { "epoch": 1.66, "learning_rate": 4.916798204545117e-05, "loss": 2.6285, "step": 336000 }, { "epoch": 1.67, "learning_rate": 4.916674593619794e-05, "loss": 2.6007, "step": 336500 }, { "epoch": 1.67, "learning_rate": 4.9165507349771855e-05, "loss": 2.5952, "step": 337000 }, { "epoch": 1.67, "learning_rate": 4.916426876334577e-05, "loss": 2.5738, "step": 337500 }, { "epoch": 1.67, "learning_rate": 4.916303017691969e-05, "loss": 2.6101, "step": 338000 }, { "epoch": 1.68, "learning_rate": 4.916179406766646e-05, "loss": 2.5811, "step": 338500 }, { "epoch": 1.68, "learning_rate": 4.9160555481240375e-05, "loss": 2.5833, "step": 339000 }, { "epoch": 1.68, "learning_rate": 4.915931689481429e-05, "loss": 2.6013, "step": 339500 }, { "epoch": 1.68, "learning_rate": 4.915807830838821e-05, "loss": 2.6163, "step": 340000 }, { "epoch": 1.69, "learning_rate": 4.9156839721962126e-05, "loss": 2.5969, "step": 340500 }, { "epoch": 1.69, "learning_rate": 4.9155601135536036e-05, "loss": 2.5872, "step": 341000 }, { "epoch": 1.69, "learning_rate": 4.9154365026282805e-05, "loss": 2.5693, "step": 341500 }, { "epoch": 1.69, "learning_rate": 4.915312643985672e-05, "loss": 2.5994, "step": 342000 }, { "epoch": 1.7, "learning_rate": 4.915188785343064e-05, "loss": 2.6187, "step": 342500 }, { "epoch": 1.7, "learning_rate": 4.9150649267004555e-05, "loss": 2.5982, "step": 343000 }, { "epoch": 1.7, "learning_rate": 4.914941068057847e-05, "loss": 2.5968, "step": 343500 }, { "epoch": 1.7, "learning_rate": 4.914817209415239e-05, "loss": 2.5756, "step": 344000 }, { "epoch": 1.71, "learning_rate": 4.9146933507726306e-05, "loss": 2.5962, "step": 344500 }, { "epoch": 1.71, "learning_rate": 4.914569492130022e-05, "loss": 2.6133, "step": 345000 }, { "epoch": 1.71, "learning_rate": 4.914445881204699e-05, "loss": 2.6052, "step": 345500 }, { "epoch": 1.71, "learning_rate": 4.914322022562091e-05, "loss": 2.6143, "step": 346000 }, { "epoch": 1.72, "learning_rate": 4.9141981639194826e-05, "loss": 2.6077, "step": 346500 }, { "epoch": 1.72, "learning_rate": 4.914074552994159e-05, "loss": 2.6198, "step": 347000 }, { "epoch": 1.72, "learning_rate": 4.9139506943515505e-05, "loss": 2.6435, "step": 347500 }, { "epoch": 1.72, "learning_rate": 4.913826835708942e-05, "loss": 2.6145, "step": 348000 }, { "epoch": 1.73, "learning_rate": 4.913702977066334e-05, "loss": 2.5931, "step": 348500 }, { "epoch": 1.73, "learning_rate": 4.9135791184237256e-05, "loss": 2.6031, "step": 349000 }, { "epoch": 1.73, "learning_rate": 4.913455259781117e-05, "loss": 2.599, "step": 349500 }, { "epoch": 1.73, "learning_rate": 4.913331401138509e-05, "loss": 2.6163, "step": 350000 }, { "epoch": 1.74, "learning_rate": 4.9132075424959006e-05, "loss": 2.6104, "step": 350500 }, { "epoch": 1.74, "learning_rate": 4.913083683853292e-05, "loss": 2.6251, "step": 351000 }, { "epoch": 1.74, "learning_rate": 4.912959825210684e-05, "loss": 2.6044, "step": 351500 }, { "epoch": 1.74, "learning_rate": 4.912835966568076e-05, "loss": 2.6131, "step": 352000 }, { "epoch": 1.75, "learning_rate": 4.9127121079254674e-05, "loss": 2.5796, "step": 352500 }, { "epoch": 1.75, "learning_rate": 4.9125882492828584e-05, "loss": 2.6012, "step": 353000 }, { "epoch": 1.75, "learning_rate": 4.912464638357536e-05, "loss": 2.6166, "step": 353500 }, { "epoch": 1.75, "learning_rate": 4.912340779714928e-05, "loss": 2.6058, "step": 354000 }, { "epoch": 1.76, "learning_rate": 4.912216921072319e-05, "loss": 2.6076, "step": 354500 }, { "epoch": 1.76, "learning_rate": 4.9120930624297104e-05, "loss": 2.5742, "step": 355000 }, { "epoch": 1.76, "learning_rate": 4.911969203787102e-05, "loss": 2.5899, "step": 355500 }, { "epoch": 1.76, "learning_rate": 4.911845592861779e-05, "loss": 2.6106, "step": 356000 }, { "epoch": 1.77, "learning_rate": 4.9117217342191706e-05, "loss": 2.6205, "step": 356500 }, { "epoch": 1.77, "learning_rate": 4.9115978755765623e-05, "loss": 2.6254, "step": 357000 }, { "epoch": 1.77, "learning_rate": 4.911474016933954e-05, "loss": 2.6239, "step": 357500 }, { "epoch": 1.77, "learning_rate": 4.911350158291346e-05, "loss": 2.6001, "step": 358000 }, { "epoch": 1.78, "learning_rate": 4.9112262996487374e-05, "loss": 2.5861, "step": 358500 }, { "epoch": 1.78, "learning_rate": 4.911102688723414e-05, "loss": 2.6104, "step": 359000 }, { "epoch": 1.78, "learning_rate": 4.910978830080806e-05, "loss": 2.5978, "step": 359500 }, { "epoch": 1.78, "learning_rate": 4.910854971438198e-05, "loss": 2.5799, "step": 360000 }, { "epoch": 1.79, "learning_rate": 4.910731360512874e-05, "loss": 2.6136, "step": 360500 }, { "epoch": 1.79, "learning_rate": 4.9106075018702656e-05, "loss": 2.6105, "step": 361000 }, { "epoch": 1.79, "learning_rate": 4.910483643227657e-05, "loss": 2.5947, "step": 361500 }, { "epoch": 1.79, "learning_rate": 4.910359784585049e-05, "loss": 2.5731, "step": 362000 }, { "epoch": 1.8, "learning_rate": 4.910236421377011e-05, "loss": 2.6102, "step": 362500 }, { "epoch": 1.8, "learning_rate": 4.910112562734403e-05, "loss": 2.6048, "step": 363000 }, { "epoch": 1.8, "learning_rate": 4.9099887040917944e-05, "loss": 2.6181, "step": 363500 }, { "epoch": 1.8, "learning_rate": 4.909864845449186e-05, "loss": 2.5967, "step": 364000 }, { "epoch": 1.81, "learning_rate": 4.909740986806577e-05, "loss": 2.6098, "step": 364500 }, { "epoch": 1.81, "learning_rate": 4.909617128163969e-05, "loss": 2.6129, "step": 365000 }, { "epoch": 1.81, "learning_rate": 4.9094932695213605e-05, "loss": 2.5773, "step": 365500 }, { "epoch": 1.81, "learning_rate": 4.9093696585960374e-05, "loss": 2.6096, "step": 366000 }, { "epoch": 1.82, "learning_rate": 4.909245799953429e-05, "loss": 2.6181, "step": 366500 }, { "epoch": 1.82, "learning_rate": 4.909121941310821e-05, "loss": 2.5896, "step": 367000 }, { "epoch": 1.82, "learning_rate": 4.9089980826682125e-05, "loss": 2.5851, "step": 367500 }, { "epoch": 1.82, "learning_rate": 4.90887447174289e-05, "loss": 2.6158, "step": 368000 }, { "epoch": 1.83, "learning_rate": 4.908750613100282e-05, "loss": 2.5965, "step": 368500 }, { "epoch": 1.83, "learning_rate": 4.908626754457673e-05, "loss": 2.6232, "step": 369000 }, { "epoch": 1.83, "learning_rate": 4.9085028958150644e-05, "loss": 2.5897, "step": 369500 }, { "epoch": 1.83, "learning_rate": 4.908379037172456e-05, "loss": 2.577, "step": 370000 }, { "epoch": 1.84, "learning_rate": 4.908255178529848e-05, "loss": 2.6109, "step": 370500 }, { "epoch": 1.84, "learning_rate": 4.9081313198872395e-05, "loss": 2.602, "step": 371000 }, { "epoch": 1.84, "learning_rate": 4.9080074612446305e-05, "loss": 2.5674, "step": 371500 }, { "epoch": 1.84, "learning_rate": 4.907883602602022e-05, "loss": 2.5986, "step": 372000 }, { "epoch": 1.85, "learning_rate": 4.907759743959414e-05, "loss": 2.6051, "step": 372500 }, { "epoch": 1.85, "learning_rate": 4.9076358853168056e-05, "loss": 2.5999, "step": 373000 }, { "epoch": 1.85, "learning_rate": 4.907512026674197e-05, "loss": 2.6057, "step": 373500 }, { "epoch": 1.85, "learning_rate": 4.907388168031589e-05, "loss": 2.5942, "step": 374000 }, { "epoch": 1.86, "learning_rate": 4.907264309388981e-05, "loss": 2.6297, "step": 374500 }, { "epoch": 1.86, "learning_rate": 4.9071404507463724e-05, "loss": 2.5977, "step": 375000 }, { "epoch": 1.86, "learning_rate": 4.907016592103764e-05, "loss": 2.5997, "step": 375500 }, { "epoch": 1.86, "learning_rate": 4.906892733461156e-05, "loss": 2.6261, "step": 376000 }, { "epoch": 1.87, "learning_rate": 4.906769370253118e-05, "loss": 2.6139, "step": 376500 }, { "epoch": 1.87, "learning_rate": 4.9066455116105095e-05, "loss": 2.6229, "step": 377000 }, { "epoch": 1.87, "learning_rate": 4.906521652967901e-05, "loss": 2.6, "step": 377500 }, { "epoch": 1.87, "learning_rate": 4.906397794325292e-05, "loss": 2.5966, "step": 378000 }, { "epoch": 1.88, "learning_rate": 4.906273935682684e-05, "loss": 2.5734, "step": 378500 }, { "epoch": 1.88, "learning_rate": 4.9061500770400756e-05, "loss": 2.6094, "step": 379000 }, { "epoch": 1.88, "learning_rate": 4.906026218397467e-05, "loss": 2.5933, "step": 379500 }, { "epoch": 1.88, "learning_rate": 4.905902359754859e-05, "loss": 2.6042, "step": 380000 }, { "epoch": 1.89, "learning_rate": 4.905778501112251e-05, "loss": 2.6221, "step": 380500 }, { "epoch": 1.89, "learning_rate": 4.9056548901869276e-05, "loss": 2.5897, "step": 381000 }, { "epoch": 1.89, "learning_rate": 4.905531279261605e-05, "loss": 2.5985, "step": 381500 }, { "epoch": 1.89, "learning_rate": 4.9054076683362813e-05, "loss": 2.6131, "step": 382000 }, { "epoch": 1.9, "learning_rate": 4.905283809693673e-05, "loss": 2.6016, "step": 382500 }, { "epoch": 1.9, "learning_rate": 4.905159951051065e-05, "loss": 2.6079, "step": 383000 }, { "epoch": 1.9, "learning_rate": 4.9050360924084564e-05, "loss": 2.6093, "step": 383500 }, { "epoch": 1.9, "learning_rate": 4.904912233765848e-05, "loss": 2.6203, "step": 384000 }, { "epoch": 1.9, "learning_rate": 4.90478837512324e-05, "loss": 2.6072, "step": 384500 }, { "epoch": 1.91, "learning_rate": 4.904664516480631e-05, "loss": 2.5929, "step": 385000 }, { "epoch": 1.91, "learning_rate": 4.9045406578380225e-05, "loss": 2.6029, "step": 385500 }, { "epoch": 1.91, "learning_rate": 4.9044170469127e-05, "loss": 2.6103, "step": 386000 }, { "epoch": 1.91, "learning_rate": 4.904293188270092e-05, "loss": 2.5676, "step": 386500 }, { "epoch": 1.92, "learning_rate": 4.9041693296274835e-05, "loss": 2.6016, "step": 387000 }, { "epoch": 1.92, "learning_rate": 4.904045470984875e-05, "loss": 2.5875, "step": 387500 }, { "epoch": 1.92, "learning_rate": 4.903921612342266e-05, "loss": 2.6147, "step": 388000 }, { "epoch": 1.92, "learning_rate": 4.903797753699658e-05, "loss": 2.573, "step": 388500 }, { "epoch": 1.93, "learning_rate": 4.903674142774335e-05, "loss": 2.6102, "step": 389000 }, { "epoch": 1.93, "learning_rate": 4.9035502841317264e-05, "loss": 2.6018, "step": 389500 }, { "epoch": 1.93, "learning_rate": 4.903426425489118e-05, "loss": 2.5944, "step": 390000 }, { "epoch": 1.93, "learning_rate": 4.90330256684651e-05, "loss": 2.6172, "step": 390500 }, { "epoch": 1.94, "learning_rate": 4.903178708203901e-05, "loss": 2.5899, "step": 391000 }, { "epoch": 1.94, "learning_rate": 4.9030548495612925e-05, "loss": 2.5876, "step": 391500 }, { "epoch": 1.94, "learning_rate": 4.902930990918684e-05, "loss": 2.5859, "step": 392000 }, { "epoch": 1.94, "learning_rate": 4.902807132276076e-05, "loss": 2.586, "step": 392500 }, { "epoch": 1.95, "learning_rate": 4.9026832736334676e-05, "loss": 2.6084, "step": 393000 }, { "epoch": 1.95, "learning_rate": 4.902559662708145e-05, "loss": 2.6038, "step": 393500 }, { "epoch": 1.95, "learning_rate": 4.9024360517828214e-05, "loss": 2.5836, "step": 394000 }, { "epoch": 1.95, "learning_rate": 4.902312193140213e-05, "loss": 2.5929, "step": 394500 }, { "epoch": 1.96, "learning_rate": 4.90218858221489e-05, "loss": 2.5957, "step": 395000 }, { "epoch": 1.96, "learning_rate": 4.9020647235722816e-05, "loss": 2.582, "step": 395500 }, { "epoch": 1.96, "learning_rate": 4.901940864929673e-05, "loss": 2.6287, "step": 396000 }, { "epoch": 1.96, "learning_rate": 4.901817006287065e-05, "loss": 2.6059, "step": 396500 }, { "epoch": 1.97, "learning_rate": 4.901693147644457e-05, "loss": 2.5855, "step": 397000 }, { "epoch": 1.97, "learning_rate": 4.9015692890018484e-05, "loss": 2.5951, "step": 397500 }, { "epoch": 1.97, "learning_rate": 4.90144543035924e-05, "loss": 2.5817, "step": 398000 }, { "epoch": 1.97, "learning_rate": 4.901321571716632e-05, "loss": 2.6157, "step": 398500 }, { "epoch": 1.98, "learning_rate": 4.9011977130740235e-05, "loss": 2.596, "step": 399000 }, { "epoch": 1.98, "learning_rate": 4.901073854431415e-05, "loss": 2.5786, "step": 399500 }, { "epoch": 1.98, "learning_rate": 4.900949995788807e-05, "loss": 2.5946, "step": 400000 }, { "epoch": 1.98, "learning_rate": 4.900826384863483e-05, "loss": 2.6155, "step": 400500 }, { "epoch": 1.99, "learning_rate": 4.900702526220875e-05, "loss": 2.5889, "step": 401000 }, { "epoch": 1.99, "learning_rate": 4.9005786675782665e-05, "loss": 2.5796, "step": 401500 }, { "epoch": 1.99, "learning_rate": 4.900454808935658e-05, "loss": 2.5993, "step": 402000 }, { "epoch": 1.99, "learning_rate": 4.900331198010335e-05, "loss": 2.5894, "step": 402500 }, { "epoch": 2.0, "learning_rate": 4.900207339367727e-05, "loss": 2.6215, "step": 403000 }, { "epoch": 2.0, "learning_rate": 4.9000834807251184e-05, "loss": 2.5994, "step": 403500 }, { "epoch": 2.0, "eval_accuracy": 0.6286475772528078, "eval_accuracy_mlm": 0.5813388926339267, "eval_accuracy_nsp": 0.8516075133648939, "eval_loss": 2.524798631668091, "eval_runtime": 146.063, "eval_samples_per_second": 1745.542, "eval_steps_per_second": 72.736, "step": 403686 }, { "epoch": 2.0, "learning_rate": 4.89995962208251e-05, "loss": 2.5887, "step": 404000 }, { "epoch": 2.0, "learning_rate": 4.899835763439902e-05, "loss": 2.5834, "step": 404500 }, { "epoch": 2.01, "learning_rate": 4.8997119047972935e-05, "loss": 2.5753, "step": 405000 }, { "epoch": 2.01, "learning_rate": 4.8995882938719704e-05, "loss": 2.5542, "step": 405500 }, { "epoch": 2.01, "learning_rate": 4.8994644352293614e-05, "loss": 2.5592, "step": 406000 }, { "epoch": 2.01, "learning_rate": 4.899340576586753e-05, "loss": 2.5818, "step": 406500 }, { "epoch": 2.02, "learning_rate": 4.899216717944145e-05, "loss": 2.5571, "step": 407000 }, { "epoch": 2.02, "learning_rate": 4.8990928593015365e-05, "loss": 2.5746, "step": 407500 }, { "epoch": 2.02, "learning_rate": 4.8989692483762133e-05, "loss": 2.5663, "step": 408000 }, { "epoch": 2.02, "learning_rate": 4.898845389733605e-05, "loss": 2.5716, "step": 408500 }, { "epoch": 2.03, "learning_rate": 4.898721778808282e-05, "loss": 2.5595, "step": 409000 }, { "epoch": 2.03, "learning_rate": 4.8985979201656736e-05, "loss": 2.5811, "step": 409500 }, { "epoch": 2.03, "learning_rate": 4.898474061523065e-05, "loss": 2.5488, "step": 410000 }, { "epoch": 2.03, "learning_rate": 4.898350202880457e-05, "loss": 2.5651, "step": 410500 }, { "epoch": 2.04, "learning_rate": 4.898226344237849e-05, "loss": 2.5839, "step": 411000 }, { "epoch": 2.04, "learning_rate": 4.8981024855952404e-05, "loss": 2.577, "step": 411500 }, { "epoch": 2.04, "learning_rate": 4.897978626952632e-05, "loss": 2.5854, "step": 412000 }, { "epoch": 2.04, "learning_rate": 4.897855016027308e-05, "loss": 2.5607, "step": 412500 }, { "epoch": 2.05, "learning_rate": 4.8977311573847e-05, "loss": 2.5672, "step": 413000 }, { "epoch": 2.05, "learning_rate": 4.897607298742092e-05, "loss": 2.5866, "step": 413500 }, { "epoch": 2.05, "learning_rate": 4.8974834400994834e-05, "loss": 2.6007, "step": 414000 }, { "epoch": 2.05, "learning_rate": 4.897359581456875e-05, "loss": 2.5648, "step": 414500 }, { "epoch": 2.06, "learning_rate": 4.897235722814267e-05, "loss": 2.5832, "step": 415000 }, { "epoch": 2.06, "learning_rate": 4.8971118641716584e-05, "loss": 2.5968, "step": 415500 }, { "epoch": 2.06, "learning_rate": 4.896988253246335e-05, "loss": 2.5548, "step": 416000 }, { "epoch": 2.06, "learning_rate": 4.896864394603727e-05, "loss": 2.5688, "step": 416500 }, { "epoch": 2.07, "learning_rate": 4.896740535961119e-05, "loss": 2.5836, "step": 417000 }, { "epoch": 2.07, "learning_rate": 4.8966166773185104e-05, "loss": 2.571, "step": 417500 }, { "epoch": 2.07, "learning_rate": 4.896492818675902e-05, "loss": 2.5721, "step": 418000 }, { "epoch": 2.07, "learning_rate": 4.896368960033294e-05, "loss": 2.561, "step": 418500 }, { "epoch": 2.08, "learning_rate": 4.8962451013906855e-05, "loss": 2.5737, "step": 419000 }, { "epoch": 2.08, "learning_rate": 4.8961212427480765e-05, "loss": 2.559, "step": 419500 }, { "epoch": 2.08, "learning_rate": 4.895997384105468e-05, "loss": 2.5928, "step": 420000 }, { "epoch": 2.08, "learning_rate": 4.89587352546286e-05, "loss": 2.5754, "step": 420500 }, { "epoch": 2.09, "learning_rate": 4.895749914537537e-05, "loss": 2.5631, "step": 421000 }, { "epoch": 2.09, "learning_rate": 4.8956260558949285e-05, "loss": 2.5715, "step": 421500 }, { "epoch": 2.09, "learning_rate": 4.89550219725232e-05, "loss": 2.5942, "step": 422000 }, { "epoch": 2.09, "learning_rate": 4.895378338609712e-05, "loss": 2.5579, "step": 422500 }, { "epoch": 2.1, "learning_rate": 4.8952544799671035e-05, "loss": 2.5612, "step": 423000 }, { "epoch": 2.1, "learning_rate": 4.895130621324495e-05, "loss": 2.5945, "step": 423500 }, { "epoch": 2.1, "learning_rate": 4.895007010399172e-05, "loss": 2.547, "step": 424000 }, { "epoch": 2.1, "learning_rate": 4.894883151756564e-05, "loss": 2.5908, "step": 424500 }, { "epoch": 2.11, "learning_rate": 4.8947592931139555e-05, "loss": 2.5855, "step": 425000 }, { "epoch": 2.11, "learning_rate": 4.894635434471347e-05, "loss": 2.5796, "step": 425500 }, { "epoch": 2.11, "learning_rate": 4.894511575828739e-05, "loss": 2.5654, "step": 426000 }, { "epoch": 2.11, "learning_rate": 4.89438771718613e-05, "loss": 2.5478, "step": 426500 }, { "epoch": 2.12, "learning_rate": 4.8942638585435216e-05, "loss": 2.5889, "step": 427000 }, { "epoch": 2.12, "learning_rate": 4.894139999900913e-05, "loss": 2.5634, "step": 427500 }, { "epoch": 2.12, "learning_rate": 4.894016141258305e-05, "loss": 2.5461, "step": 428000 }, { "epoch": 2.12, "learning_rate": 4.893892530332982e-05, "loss": 2.5366, "step": 428500 }, { "epoch": 2.13, "learning_rate": 4.893768919407659e-05, "loss": 2.5651, "step": 429000 }, { "epoch": 2.13, "learning_rate": 4.8936450607650504e-05, "loss": 2.5911, "step": 429500 }, { "epoch": 2.13, "learning_rate": 4.893521202122442e-05, "loss": 2.5801, "step": 430000 }, { "epoch": 2.13, "learning_rate": 4.893397591197118e-05, "loss": 2.5771, "step": 430500 }, { "epoch": 2.14, "learning_rate": 4.89327373255451e-05, "loss": 2.5864, "step": 431000 }, { "epoch": 2.14, "learning_rate": 4.8931501216291876e-05, "loss": 2.5512, "step": 431500 }, { "epoch": 2.14, "learning_rate": 4.893026262986579e-05, "loss": 2.5651, "step": 432000 }, { "epoch": 2.14, "learning_rate": 4.89290240434397e-05, "loss": 2.5588, "step": 432500 }, { "epoch": 2.15, "learning_rate": 4.892778545701362e-05, "loss": 2.5744, "step": 433000 }, { "epoch": 2.15, "learning_rate": 4.892654687058754e-05, "loss": 2.5913, "step": 433500 }, { "epoch": 2.15, "learning_rate": 4.8925308284161454e-05, "loss": 2.5761, "step": 434000 }, { "epoch": 2.15, "learning_rate": 4.892406969773537e-05, "loss": 2.5819, "step": 434500 }, { "epoch": 2.16, "learning_rate": 4.892283111130929e-05, "loss": 2.5598, "step": 435000 }, { "epoch": 2.16, "learning_rate": 4.8921592524883204e-05, "loss": 2.574, "step": 435500 }, { "epoch": 2.16, "learning_rate": 4.892035393845712e-05, "loss": 2.5842, "step": 436000 }, { "epoch": 2.16, "learning_rate": 4.891911535203104e-05, "loss": 2.5627, "step": 436500 }, { "epoch": 2.17, "learning_rate": 4.8917876765604955e-05, "loss": 2.5587, "step": 437000 }, { "epoch": 2.17, "learning_rate": 4.891663817917887e-05, "loss": 2.5872, "step": 437500 }, { "epoch": 2.17, "learning_rate": 4.891539959275279e-05, "loss": 2.5496, "step": 438000 }, { "epoch": 2.17, "learning_rate": 4.891416348349955e-05, "loss": 2.5884, "step": 438500 }, { "epoch": 2.17, "learning_rate": 4.891292489707347e-05, "loss": 2.5475, "step": 439000 }, { "epoch": 2.18, "learning_rate": 4.8911686310647385e-05, "loss": 2.5432, "step": 439500 }, { "epoch": 2.18, "learning_rate": 4.8910450201394154e-05, "loss": 2.5509, "step": 440000 }, { "epoch": 2.18, "learning_rate": 4.890921161496807e-05, "loss": 2.5689, "step": 440500 }, { "epoch": 2.18, "learning_rate": 4.890797302854199e-05, "loss": 2.5687, "step": 441000 }, { "epoch": 2.19, "learning_rate": 4.8906734442115904e-05, "loss": 2.5651, "step": 441500 }, { "epoch": 2.19, "learning_rate": 4.890549585568982e-05, "loss": 2.5761, "step": 442000 }, { "epoch": 2.19, "learning_rate": 4.8904267177955146e-05, "loss": 2.5642, "step": 442500 }, { "epoch": 2.19, "learning_rate": 4.890302859152906e-05, "loss": 2.5557, "step": 443000 }, { "epoch": 2.2, "learning_rate": 4.890179000510298e-05, "loss": 2.575, "step": 443500 }, { "epoch": 2.2, "learning_rate": 4.890055141867689e-05, "loss": 2.5907, "step": 444000 }, { "epoch": 2.2, "learning_rate": 4.889931283225081e-05, "loss": 2.5731, "step": 444500 }, { "epoch": 2.2, "learning_rate": 4.8898074245824724e-05, "loss": 2.5548, "step": 445000 }, { "epoch": 2.21, "learning_rate": 4.889683565939864e-05, "loss": 2.5467, "step": 445500 }, { "epoch": 2.21, "learning_rate": 4.889559707297256e-05, "loss": 2.592, "step": 446000 }, { "epoch": 2.21, "learning_rate": 4.8894358486546475e-05, "loss": 2.5996, "step": 446500 }, { "epoch": 2.21, "learning_rate": 4.889311990012039e-05, "loss": 2.5768, "step": 447000 }, { "epoch": 2.22, "learning_rate": 4.889188131369431e-05, "loss": 2.5667, "step": 447500 }, { "epoch": 2.22, "learning_rate": 4.8890642727268225e-05, "loss": 2.5699, "step": 448000 }, { "epoch": 2.22, "learning_rate": 4.888940414084214e-05, "loss": 2.5794, "step": 448500 }, { "epoch": 2.22, "learning_rate": 4.888816555441606e-05, "loss": 2.5778, "step": 449000 }, { "epoch": 2.23, "learning_rate": 4.8886926967989976e-05, "loss": 2.5842, "step": 449500 }, { "epoch": 2.23, "learning_rate": 4.888568838156389e-05, "loss": 2.5859, "step": 450000 }, { "epoch": 2.23, "learning_rate": 4.888444979513781e-05, "loss": 2.5658, "step": 450500 }, { "epoch": 2.23, "learning_rate": 4.888321120871172e-05, "loss": 2.5744, "step": 451000 }, { "epoch": 2.24, "learning_rate": 4.888197262228564e-05, "loss": 2.5542, "step": 451500 }, { "epoch": 2.24, "learning_rate": 4.8880734035859554e-05, "loss": 2.5659, "step": 452000 }, { "epoch": 2.24, "learning_rate": 4.887949544943347e-05, "loss": 2.5667, "step": 452500 }, { "epoch": 2.24, "learning_rate": 4.887825686300739e-05, "loss": 2.5916, "step": 453000 }, { "epoch": 2.25, "learning_rate": 4.887702075375416e-05, "loss": 2.5724, "step": 453500 }, { "epoch": 2.25, "learning_rate": 4.8875782167328074e-05, "loss": 2.5604, "step": 454000 }, { "epoch": 2.25, "learning_rate": 4.887454358090199e-05, "loss": 2.556, "step": 454500 }, { "epoch": 2.25, "learning_rate": 4.887330499447591e-05, "loss": 2.569, "step": 455000 }, { "epoch": 2.26, "learning_rate": 4.8872066408049824e-05, "loss": 2.5667, "step": 455500 }, { "epoch": 2.26, "learning_rate": 4.887082782162374e-05, "loss": 2.5849, "step": 456000 }, { "epoch": 2.26, "learning_rate": 4.886958923519765e-05, "loss": 2.5702, "step": 456500 }, { "epoch": 2.26, "learning_rate": 4.886835064877157e-05, "loss": 2.5687, "step": 457000 }, { "epoch": 2.27, "learning_rate": 4.886711453951834e-05, "loss": 2.5787, "step": 457500 }, { "epoch": 2.27, "learning_rate": 4.8865875953092254e-05, "loss": 2.5678, "step": 458000 }, { "epoch": 2.27, "learning_rate": 4.886463736666617e-05, "loss": 2.5795, "step": 458500 }, { "epoch": 2.27, "learning_rate": 4.8863401257412947e-05, "loss": 2.5772, "step": 459000 }, { "epoch": 2.28, "learning_rate": 4.8862162670986864e-05, "loss": 2.5546, "step": 459500 }, { "epoch": 2.28, "learning_rate": 4.886092408456078e-05, "loss": 2.5824, "step": 460000 }, { "epoch": 2.28, "learning_rate": 4.885968549813469e-05, "loss": 2.5854, "step": 460500 }, { "epoch": 2.28, "learning_rate": 4.885844691170861e-05, "loss": 2.5993, "step": 461000 }, { "epoch": 2.29, "learning_rate": 4.8857208325282524e-05, "loss": 2.5536, "step": 461500 }, { "epoch": 2.29, "learning_rate": 4.885596973885644e-05, "loss": 2.5607, "step": 462000 }, { "epoch": 2.29, "learning_rate": 4.885473362960321e-05, "loss": 2.5777, "step": 462500 }, { "epoch": 2.29, "learning_rate": 4.885349504317713e-05, "loss": 2.5489, "step": 463000 }, { "epoch": 2.3, "learning_rate": 4.885225645675104e-05, "loss": 2.5935, "step": 463500 }, { "epoch": 2.3, "learning_rate": 4.8851017870324954e-05, "loss": 2.5599, "step": 464000 }, { "epoch": 2.3, "learning_rate": 4.884978176107173e-05, "loss": 2.5644, "step": 464500 }, { "epoch": 2.3, "learning_rate": 4.884854317464565e-05, "loss": 2.5661, "step": 465000 }, { "epoch": 2.31, "learning_rate": 4.8847304588219564e-05, "loss": 2.5732, "step": 465500 }, { "epoch": 2.31, "learning_rate": 4.884606600179348e-05, "loss": 2.5702, "step": 466000 }, { "epoch": 2.31, "learning_rate": 4.88448274153674e-05, "loss": 2.5586, "step": 466500 }, { "epoch": 2.31, "learning_rate": 4.884359130611416e-05, "loss": 2.574, "step": 467000 }, { "epoch": 2.32, "learning_rate": 4.8842352719688076e-05, "loss": 2.5618, "step": 467500 }, { "epoch": 2.32, "learning_rate": 4.884111413326199e-05, "loss": 2.5526, "step": 468000 }, { "epoch": 2.32, "learning_rate": 4.883987554683591e-05, "loss": 2.5749, "step": 468500 }, { "epoch": 2.32, "learning_rate": 4.883863696040983e-05, "loss": 2.5694, "step": 469000 }, { "epoch": 2.33, "learning_rate": 4.8837398373983744e-05, "loss": 2.5888, "step": 469500 }, { "epoch": 2.33, "learning_rate": 4.8836159787557654e-05, "loss": 2.5595, "step": 470000 }, { "epoch": 2.33, "learning_rate": 4.883492120113157e-05, "loss": 2.5967, "step": 470500 }, { "epoch": 2.33, "learning_rate": 4.883368261470549e-05, "loss": 2.5484, "step": 471000 }, { "epoch": 2.34, "learning_rate": 4.8832444028279405e-05, "loss": 2.5715, "step": 471500 }, { "epoch": 2.34, "learning_rate": 4.883120544185332e-05, "loss": 2.5756, "step": 472000 }, { "epoch": 2.34, "learning_rate": 4.882996685542724e-05, "loss": 2.5513, "step": 472500 }, { "epoch": 2.34, "learning_rate": 4.8828728269001156e-05, "loss": 2.5952, "step": 473000 }, { "epoch": 2.35, "learning_rate": 4.8827492159747925e-05, "loss": 2.5728, "step": 473500 }, { "epoch": 2.35, "learning_rate": 4.882625357332184e-05, "loss": 2.577, "step": 474000 }, { "epoch": 2.35, "learning_rate": 4.882501498689576e-05, "loss": 2.6014, "step": 474500 }, { "epoch": 2.35, "learning_rate": 4.882377887764253e-05, "loss": 2.5732, "step": 475000 }, { "epoch": 2.36, "learning_rate": 4.8822540291216444e-05, "loss": 2.559, "step": 475500 }, { "epoch": 2.36, "learning_rate": 4.8821301704790354e-05, "loss": 2.5574, "step": 476000 }, { "epoch": 2.36, "learning_rate": 4.882006311836427e-05, "loss": 2.5518, "step": 476500 }, { "epoch": 2.36, "learning_rate": 4.881882453193819e-05, "loss": 2.5777, "step": 477000 }, { "epoch": 2.37, "learning_rate": 4.8817585945512105e-05, "loss": 2.5795, "step": 477500 }, { "epoch": 2.37, "learning_rate": 4.881634735908602e-05, "loss": 2.5723, "step": 478000 }, { "epoch": 2.37, "learning_rate": 4.881510877265994e-05, "loss": 2.5972, "step": 478500 }, { "epoch": 2.37, "learning_rate": 4.8813872663406715e-05, "loss": 2.5405, "step": 479000 }, { "epoch": 2.38, "learning_rate": 4.8812634076980625e-05, "loss": 2.6003, "step": 479500 }, { "epoch": 2.38, "learning_rate": 4.881139549055454e-05, "loss": 2.5426, "step": 480000 }, { "epoch": 2.38, "learning_rate": 4.881015690412846e-05, "loss": 2.587, "step": 480500 }, { "epoch": 2.38, "learning_rate": 4.8808918317702376e-05, "loss": 2.5803, "step": 481000 }, { "epoch": 2.39, "learning_rate": 4.880767973127629e-05, "loss": 2.5914, "step": 481500 }, { "epoch": 2.39, "learning_rate": 4.880644114485021e-05, "loss": 2.5622, "step": 482000 }, { "epoch": 2.39, "learning_rate": 4.8805202558424126e-05, "loss": 2.5813, "step": 482500 }, { "epoch": 2.39, "learning_rate": 4.880396397199804e-05, "loss": 2.5929, "step": 483000 }, { "epoch": 2.4, "learning_rate": 4.8802727862744805e-05, "loss": 2.5919, "step": 483500 }, { "epoch": 2.4, "learning_rate": 4.880148927631872e-05, "loss": 2.5565, "step": 484000 }, { "epoch": 2.4, "learning_rate": 4.88002531670655e-05, "loss": 2.5604, "step": 484500 }, { "epoch": 2.4, "learning_rate": 4.8799014580639415e-05, "loss": 2.5575, "step": 485000 }, { "epoch": 2.41, "learning_rate": 4.8797775994213325e-05, "loss": 2.5634, "step": 485500 }, { "epoch": 2.41, "learning_rate": 4.879653740778724e-05, "loss": 2.5762, "step": 486000 }, { "epoch": 2.41, "learning_rate": 4.879529882136116e-05, "loss": 2.5648, "step": 486500 }, { "epoch": 2.41, "learning_rate": 4.8794060234935076e-05, "loss": 2.5592, "step": 487000 }, { "epoch": 2.42, "learning_rate": 4.879282164850899e-05, "loss": 2.563, "step": 487500 }, { "epoch": 2.42, "learning_rate": 4.879158553925576e-05, "loss": 2.6136, "step": 488000 }, { "epoch": 2.42, "learning_rate": 4.879034695282967e-05, "loss": 2.5676, "step": 488500 }, { "epoch": 2.42, "learning_rate": 4.878910836640359e-05, "loss": 2.5787, "step": 489000 }, { "epoch": 2.43, "learning_rate": 4.8787872257150364e-05, "loss": 2.5805, "step": 489500 }, { "epoch": 2.43, "learning_rate": 4.878663367072428e-05, "loss": 2.5883, "step": 490000 }, { "epoch": 2.43, "learning_rate": 4.87853950842982e-05, "loss": 2.5649, "step": 490500 }, { "epoch": 2.43, "learning_rate": 4.878415897504497e-05, "loss": 2.5689, "step": 491000 }, { "epoch": 2.44, "learning_rate": 4.878292038861888e-05, "loss": 2.5621, "step": 491500 }, { "epoch": 2.44, "learning_rate": 4.8781681802192794e-05, "loss": 2.586, "step": 492000 }, { "epoch": 2.44, "learning_rate": 4.878044321576671e-05, "loss": 2.5845, "step": 492500 }, { "epoch": 2.44, "learning_rate": 4.877920462934063e-05, "loss": 2.5749, "step": 493000 }, { "epoch": 2.44, "learning_rate": 4.8777966042914545e-05, "loss": 2.5925, "step": 493500 }, { "epoch": 2.45, "learning_rate": 4.877672745648846e-05, "loss": 2.5829, "step": 494000 }, { "epoch": 2.45, "learning_rate": 4.877549134723523e-05, "loss": 2.5584, "step": 494500 }, { "epoch": 2.45, "learning_rate": 4.877425276080915e-05, "loss": 2.562, "step": 495000 }, { "epoch": 2.45, "learning_rate": 4.8773014174383064e-05, "loss": 2.5678, "step": 495500 }, { "epoch": 2.46, "learning_rate": 4.877177558795698e-05, "loss": 2.5516, "step": 496000 }, { "epoch": 2.46, "learning_rate": 4.87705370015309e-05, "loss": 2.567, "step": 496500 }, { "epoch": 2.46, "learning_rate": 4.8769298415104815e-05, "loss": 2.5885, "step": 497000 }, { "epoch": 2.46, "learning_rate": 4.876805982867873e-05, "loss": 2.5813, "step": 497500 }, { "epoch": 2.47, "learning_rate": 4.876682124225264e-05, "loss": 2.5801, "step": 498000 }, { "epoch": 2.47, "learning_rate": 4.876558265582656e-05, "loss": 2.567, "step": 498500 }, { "epoch": 2.47, "learning_rate": 4.8764344069400476e-05, "loss": 2.5659, "step": 499000 }, { "epoch": 2.47, "learning_rate": 4.876310548297439e-05, "loss": 2.572, "step": 499500 }, { "epoch": 2.48, "learning_rate": 4.876186689654831e-05, "loss": 2.5771, "step": 500000 }, { "epoch": 2.48, "learning_rate": 4.876062831012223e-05, "loss": 2.5942, "step": 500500 }, { "epoch": 2.48, "learning_rate": 4.8759389723696144e-05, "loss": 2.58, "step": 501000 }, { "epoch": 2.48, "learning_rate": 4.875815113727006e-05, "loss": 2.5474, "step": 501500 }, { "epoch": 2.49, "learning_rate": 4.875691502801682e-05, "loss": 2.5899, "step": 502000 }, { "epoch": 2.49, "learning_rate": 4.87556789187636e-05, "loss": 2.581, "step": 502500 }, { "epoch": 2.49, "learning_rate": 4.8754440332337515e-05, "loss": 2.5766, "step": 503000 }, { "epoch": 2.49, "learning_rate": 4.875320174591143e-05, "loss": 2.5461, "step": 503500 }, { "epoch": 2.5, "learning_rate": 4.875196315948535e-05, "loss": 2.5437, "step": 504000 }, { "epoch": 2.5, "learning_rate": 4.875072457305926e-05, "loss": 2.566, "step": 504500 }, { "epoch": 2.5, "learning_rate": 4.8749485986633176e-05, "loss": 2.5592, "step": 505000 }, { "epoch": 2.5, "learning_rate": 4.8748249877379945e-05, "loss": 2.5718, "step": 505500 }, { "epoch": 2.51, "learning_rate": 4.874701129095386e-05, "loss": 2.554, "step": 506000 }, { "epoch": 2.51, "learning_rate": 4.874577270452778e-05, "loss": 2.5343, "step": 506500 }, { "epoch": 2.51, "learning_rate": 4.8744534118101696e-05, "loss": 2.5618, "step": 507000 }, { "epoch": 2.51, "learning_rate": 4.8743295531675606e-05, "loss": 2.5622, "step": 507500 }, { "epoch": 2.52, "learning_rate": 4.874205694524952e-05, "loss": 2.5814, "step": 508000 }, { "epoch": 2.52, "learning_rate": 4.874081835882344e-05, "loss": 2.5951, "step": 508500 }, { "epoch": 2.52, "learning_rate": 4.8739582249570215e-05, "loss": 2.5568, "step": 509000 }, { "epoch": 2.52, "learning_rate": 4.873834366314413e-05, "loss": 2.5576, "step": 509500 }, { "epoch": 2.53, "learning_rate": 4.873710507671805e-05, "loss": 2.5658, "step": 510000 }, { "epoch": 2.53, "learning_rate": 4.873586649029196e-05, "loss": 2.5628, "step": 510500 }, { "epoch": 2.53, "learning_rate": 4.8734630381038735e-05, "loss": 2.5771, "step": 511000 }, { "epoch": 2.53, "learning_rate": 4.8733391794612645e-05, "loss": 2.5623, "step": 511500 }, { "epoch": 2.54, "learning_rate": 4.873215320818656e-05, "loss": 2.552, "step": 512000 }, { "epoch": 2.54, "learning_rate": 4.873091462176048e-05, "loss": 2.5771, "step": 512500 }, { "epoch": 2.54, "learning_rate": 4.8729676035334396e-05, "loss": 2.5683, "step": 513000 }, { "epoch": 2.54, "learning_rate": 4.872843744890831e-05, "loss": 2.5772, "step": 513500 }, { "epoch": 2.55, "learning_rate": 4.872719886248222e-05, "loss": 2.5589, "step": 514000 }, { "epoch": 2.55, "learning_rate": 4.872596027605614e-05, "loss": 2.5793, "step": 514500 }, { "epoch": 2.55, "learning_rate": 4.872472168963006e-05, "loss": 2.5705, "step": 515000 }, { "epoch": 2.55, "learning_rate": 4.8723483103203974e-05, "loss": 2.5768, "step": 515500 }, { "epoch": 2.56, "learning_rate": 4.872224451677789e-05, "loss": 2.5792, "step": 516000 }, { "epoch": 2.56, "learning_rate": 4.872100593035181e-05, "loss": 2.538, "step": 516500 }, { "epoch": 2.56, "learning_rate": 4.8719767343925724e-05, "loss": 2.5786, "step": 517000 }, { "epoch": 2.56, "learning_rate": 4.871852875749964e-05, "loss": 2.5611, "step": 517500 }, { "epoch": 2.57, "learning_rate": 4.871729017107356e-05, "loss": 2.5601, "step": 518000 }, { "epoch": 2.57, "learning_rate": 4.8716051584647475e-05, "loss": 2.5772, "step": 518500 }, { "epoch": 2.57, "learning_rate": 4.8714817952567096e-05, "loss": 2.5865, "step": 519000 }, { "epoch": 2.57, "learning_rate": 4.871357936614101e-05, "loss": 2.5592, "step": 519500 }, { "epoch": 2.58, "learning_rate": 4.871234077971493e-05, "loss": 2.5937, "step": 520000 }, { "epoch": 2.58, "learning_rate": 4.871110219328885e-05, "loss": 2.5601, "step": 520500 }, { "epoch": 2.58, "learning_rate": 4.870986360686276e-05, "loss": 2.5464, "step": 521000 }, { "epoch": 2.58, "learning_rate": 4.8708625020436674e-05, "loss": 2.5565, "step": 521500 }, { "epoch": 2.59, "learning_rate": 4.870738643401059e-05, "loss": 2.5702, "step": 522000 }, { "epoch": 2.59, "learning_rate": 4.870614784758451e-05, "loss": 2.5839, "step": 522500 }, { "epoch": 2.59, "learning_rate": 4.8704911738331276e-05, "loss": 2.5712, "step": 523000 }, { "epoch": 2.59, "learning_rate": 4.8703673151905193e-05, "loss": 2.5777, "step": 523500 }, { "epoch": 2.6, "learning_rate": 4.870243704265197e-05, "loss": 2.56, "step": 524000 }, { "epoch": 2.6, "learning_rate": 4.8701198456225886e-05, "loss": 2.5688, "step": 524500 }, { "epoch": 2.6, "learning_rate": 4.8699959869799796e-05, "loss": 2.5656, "step": 525000 }, { "epoch": 2.6, "learning_rate": 4.8698723760546565e-05, "loss": 2.5649, "step": 525500 }, { "epoch": 2.61, "learning_rate": 4.869748517412048e-05, "loss": 2.5436, "step": 526000 }, { "epoch": 2.61, "learning_rate": 4.86962465876944e-05, "loss": 2.5476, "step": 526500 }, { "epoch": 2.61, "learning_rate": 4.8695008001268316e-05, "loss": 2.5696, "step": 527000 }, { "epoch": 2.61, "learning_rate": 4.869376941484223e-05, "loss": 2.5495, "step": 527500 }, { "epoch": 2.62, "learning_rate": 4.869253082841615e-05, "loss": 2.5808, "step": 528000 }, { "epoch": 2.62, "learning_rate": 4.8691292241990066e-05, "loss": 2.5641, "step": 528500 }, { "epoch": 2.62, "learning_rate": 4.869005365556398e-05, "loss": 2.5714, "step": 529000 }, { "epoch": 2.62, "learning_rate": 4.8688815069137894e-05, "loss": 2.5913, "step": 529500 }, { "epoch": 2.63, "learning_rate": 4.868757648271181e-05, "loss": 2.5783, "step": 530000 }, { "epoch": 2.63, "learning_rate": 4.868633789628573e-05, "loss": 2.57, "step": 530500 }, { "epoch": 2.63, "learning_rate": 4.86851017870325e-05, "loss": 2.5547, "step": 531000 }, { "epoch": 2.63, "learning_rate": 4.868386320060642e-05, "loss": 2.5468, "step": 531500 }, { "epoch": 2.64, "learning_rate": 4.868262461418033e-05, "loss": 2.5864, "step": 532000 }, { "epoch": 2.64, "learning_rate": 4.868138602775425e-05, "loss": 2.5843, "step": 532500 }, { "epoch": 2.64, "learning_rate": 4.8680147441328164e-05, "loss": 2.5719, "step": 533000 }, { "epoch": 2.64, "learning_rate": 4.867891133207493e-05, "loss": 2.5732, "step": 533500 }, { "epoch": 2.65, "learning_rate": 4.867767274564885e-05, "loss": 2.5765, "step": 534000 }, { "epoch": 2.65, "learning_rate": 4.8676434159222767e-05, "loss": 2.5757, "step": 534500 }, { "epoch": 2.65, "learning_rate": 4.8675195572796683e-05, "loss": 2.5774, "step": 535000 }, { "epoch": 2.65, "learning_rate": 4.8673956986370594e-05, "loss": 2.554, "step": 535500 }, { "epoch": 2.66, "learning_rate": 4.867272087711737e-05, "loss": 2.5533, "step": 536000 }, { "epoch": 2.66, "learning_rate": 4.8671482290691286e-05, "loss": 2.5772, "step": 536500 }, { "epoch": 2.66, "learning_rate": 4.86702437042652e-05, "loss": 2.5513, "step": 537000 }, { "epoch": 2.66, "learning_rate": 4.866900511783912e-05, "loss": 2.5708, "step": 537500 }, { "epoch": 2.67, "learning_rate": 4.866776653141304e-05, "loss": 2.5684, "step": 538000 }, { "epoch": 2.67, "learning_rate": 4.866652794498695e-05, "loss": 2.5832, "step": 538500 }, { "epoch": 2.67, "learning_rate": 4.8665291835733716e-05, "loss": 2.5732, "step": 539000 }, { "epoch": 2.67, "learning_rate": 4.866405324930763e-05, "loss": 2.5757, "step": 539500 }, { "epoch": 2.68, "learning_rate": 4.86628171400544e-05, "loss": 2.5808, "step": 540000 }, { "epoch": 2.68, "learning_rate": 4.866157855362832e-05, "loss": 2.5977, "step": 540500 }, { "epoch": 2.68, "learning_rate": 4.8660339967202236e-05, "loss": 2.5599, "step": 541000 }, { "epoch": 2.68, "learning_rate": 4.865910138077615e-05, "loss": 2.5493, "step": 541500 }, { "epoch": 2.69, "learning_rate": 4.865786279435007e-05, "loss": 2.5864, "step": 542000 }, { "epoch": 2.69, "learning_rate": 4.8656624207923986e-05, "loss": 2.5611, "step": 542500 }, { "epoch": 2.69, "learning_rate": 4.86553856214979e-05, "loss": 2.5601, "step": 543000 }, { "epoch": 2.69, "learning_rate": 4.865414703507182e-05, "loss": 2.5715, "step": 543500 }, { "epoch": 2.7, "learning_rate": 4.865291092581858e-05, "loss": 2.5968, "step": 544000 }, { "epoch": 2.7, "learning_rate": 4.86516723393925e-05, "loss": 2.561, "step": 544500 }, { "epoch": 2.7, "learning_rate": 4.8650433752966416e-05, "loss": 2.593, "step": 545000 }, { "epoch": 2.7, "learning_rate": 4.864919516654033e-05, "loss": 2.5692, "step": 545500 }, { "epoch": 2.71, "learning_rate": 4.864795658011425e-05, "loss": 2.5501, "step": 546000 }, { "epoch": 2.71, "learning_rate": 4.864671799368817e-05, "loss": 2.5733, "step": 546500 }, { "epoch": 2.71, "learning_rate": 4.8645481884434936e-05, "loss": 2.5761, "step": 547000 }, { "epoch": 2.71, "learning_rate": 4.864424329800885e-05, "loss": 2.5604, "step": 547500 }, { "epoch": 2.71, "learning_rate": 4.864300471158277e-05, "loss": 2.5876, "step": 548000 }, { "epoch": 2.72, "learning_rate": 4.8641766125156686e-05, "loss": 2.5696, "step": 548500 }, { "epoch": 2.72, "learning_rate": 4.86405275387306e-05, "loss": 2.5717, "step": 549000 }, { "epoch": 2.72, "learning_rate": 4.863928895230452e-05, "loss": 2.5383, "step": 549500 }, { "epoch": 2.72, "learning_rate": 4.863805284305128e-05, "loss": 2.5773, "step": 550000 }, { "epoch": 2.73, "learning_rate": 4.863681673379805e-05, "loss": 2.5818, "step": 550500 }, { "epoch": 2.73, "learning_rate": 4.863557814737197e-05, "loss": 2.5148, "step": 551000 }, { "epoch": 2.73, "learning_rate": 4.8634339560945885e-05, "loss": 2.5935, "step": 551500 }, { "epoch": 2.73, "learning_rate": 4.86331009745198e-05, "loss": 2.5585, "step": 552000 }, { "epoch": 2.74, "learning_rate": 4.863186238809372e-05, "loss": 2.5496, "step": 552500 }, { "epoch": 2.74, "learning_rate": 4.8630623801667636e-05, "loss": 2.5555, "step": 553000 }, { "epoch": 2.74, "learning_rate": 4.862938521524155e-05, "loss": 2.547, "step": 553500 }, { "epoch": 2.74, "learning_rate": 4.862814662881547e-05, "loss": 2.567, "step": 554000 }, { "epoch": 2.75, "learning_rate": 4.8626908042389387e-05, "loss": 2.554, "step": 554500 }, { "epoch": 2.75, "learning_rate": 4.8625669455963303e-05, "loss": 2.5619, "step": 555000 }, { "epoch": 2.75, "learning_rate": 4.862443582388292e-05, "loss": 2.5697, "step": 555500 }, { "epoch": 2.75, "learning_rate": 4.8623197237456834e-05, "loss": 2.5523, "step": 556000 }, { "epoch": 2.76, "learning_rate": 4.862195865103075e-05, "loss": 2.568, "step": 556500 }, { "epoch": 2.76, "learning_rate": 4.862072006460467e-05, "loss": 2.5945, "step": 557000 }, { "epoch": 2.76, "learning_rate": 4.861948643252429e-05, "loss": 2.5638, "step": 557500 }, { "epoch": 2.76, "learning_rate": 4.8618247846098206e-05, "loss": 2.56, "step": 558000 }, { "epoch": 2.77, "learning_rate": 4.861700925967212e-05, "loss": 2.5682, "step": 558500 }, { "epoch": 2.77, "learning_rate": 4.861577067324604e-05, "loss": 2.5404, "step": 559000 }, { "epoch": 2.77, "learning_rate": 4.8614532086819957e-05, "loss": 2.5794, "step": 559500 }, { "epoch": 2.77, "learning_rate": 4.8613293500393873e-05, "loss": 2.5669, "step": 560000 }, { "epoch": 2.78, "learning_rate": 4.861205491396779e-05, "loss": 2.5423, "step": 560500 }, { "epoch": 2.78, "learning_rate": 4.86108163275417e-05, "loss": 2.5711, "step": 561000 }, { "epoch": 2.78, "learning_rate": 4.860957774111562e-05, "loss": 2.5841, "step": 561500 }, { "epoch": 2.78, "learning_rate": 4.8608339154689534e-05, "loss": 2.563, "step": 562000 }, { "epoch": 2.79, "learning_rate": 4.860710056826345e-05, "loss": 2.5802, "step": 562500 }, { "epoch": 2.79, "learning_rate": 4.860586198183737e-05, "loss": 2.5727, "step": 563000 }, { "epoch": 2.79, "learning_rate": 4.8604625872584144e-05, "loss": 2.5798, "step": 563500 }, { "epoch": 2.79, "learning_rate": 4.860338728615806e-05, "loss": 2.5647, "step": 564000 }, { "epoch": 2.8, "learning_rate": 4.860214869973197e-05, "loss": 2.5512, "step": 564500 }, { "epoch": 2.8, "learning_rate": 4.860091011330589e-05, "loss": 2.5654, "step": 565000 }, { "epoch": 2.8, "learning_rate": 4.8599671526879805e-05, "loss": 2.5424, "step": 565500 }, { "epoch": 2.8, "learning_rate": 4.859843294045372e-05, "loss": 2.554, "step": 566000 }, { "epoch": 2.81, "learning_rate": 4.859719435402764e-05, "loss": 2.5894, "step": 566500 }, { "epoch": 2.81, "learning_rate": 4.8595955767601556e-05, "loss": 2.551, "step": 567000 }, { "epoch": 2.81, "learning_rate": 4.859471965834832e-05, "loss": 2.5805, "step": 567500 }, { "epoch": 2.81, "learning_rate": 4.8593481071922235e-05, "loss": 2.5897, "step": 568000 }, { "epoch": 2.82, "learning_rate": 4.859224248549615e-05, "loss": 2.5625, "step": 568500 }, { "epoch": 2.82, "learning_rate": 4.859100389907007e-05, "loss": 2.5616, "step": 569000 }, { "epoch": 2.82, "learning_rate": 4.8589765312643985e-05, "loss": 2.5768, "step": 569500 }, { "epoch": 2.82, "learning_rate": 4.858852920339076e-05, "loss": 2.5554, "step": 570000 }, { "epoch": 2.83, "learning_rate": 4.858729061696467e-05, "loss": 2.5912, "step": 570500 }, { "epoch": 2.83, "learning_rate": 4.858605203053859e-05, "loss": 2.5492, "step": 571000 }, { "epoch": 2.83, "learning_rate": 4.858481592128536e-05, "loss": 2.5554, "step": 571500 }, { "epoch": 2.83, "learning_rate": 4.8583577334859274e-05, "loss": 2.5641, "step": 572000 }, { "epoch": 2.84, "learning_rate": 4.858233874843319e-05, "loss": 2.5709, "step": 572500 }, { "epoch": 2.84, "learning_rate": 4.858110016200711e-05, "loss": 2.5975, "step": 573000 }, { "epoch": 2.84, "learning_rate": 4.857986157558102e-05, "loss": 2.5723, "step": 573500 }, { "epoch": 2.84, "learning_rate": 4.8578622989154935e-05, "loss": 2.5557, "step": 574000 }, { "epoch": 2.85, "learning_rate": 4.857738440272885e-05, "loss": 2.538, "step": 574500 }, { "epoch": 2.85, "learning_rate": 4.857614581630277e-05, "loss": 2.5816, "step": 575000 }, { "epoch": 2.85, "learning_rate": 4.8574907229876685e-05, "loss": 2.5707, "step": 575500 }, { "epoch": 2.85, "learning_rate": 4.85736686434506e-05, "loss": 2.5592, "step": 576000 }, { "epoch": 2.86, "learning_rate": 4.857243005702452e-05, "loss": 2.5493, "step": 576500 }, { "epoch": 2.86, "learning_rate": 4.8571191470598436e-05, "loss": 2.5495, "step": 577000 }, { "epoch": 2.86, "learning_rate": 4.8569955361345205e-05, "loss": 2.5573, "step": 577500 }, { "epoch": 2.86, "learning_rate": 4.8568719252091974e-05, "loss": 2.5596, "step": 578000 }, { "epoch": 2.87, "learning_rate": 4.856748066566589e-05, "loss": 2.5427, "step": 578500 }, { "epoch": 2.87, "learning_rate": 4.856624207923981e-05, "loss": 2.5828, "step": 579000 }, { "epoch": 2.87, "learning_rate": 4.8565003492813725e-05, "loss": 2.5622, "step": 579500 }, { "epoch": 2.87, "learning_rate": 4.8563764906387635e-05, "loss": 2.5681, "step": 580000 }, { "epoch": 2.88, "learning_rate": 4.856252631996155e-05, "loss": 2.5767, "step": 580500 }, { "epoch": 2.88, "learning_rate": 4.856128773353547e-05, "loss": 2.5359, "step": 581000 }, { "epoch": 2.88, "learning_rate": 4.8560049147109386e-05, "loss": 2.5966, "step": 581500 }, { "epoch": 2.88, "learning_rate": 4.85588105606833e-05, "loss": 2.5783, "step": 582000 }, { "epoch": 2.89, "learning_rate": 4.855757445143008e-05, "loss": 2.563, "step": 582500 }, { "epoch": 2.89, "learning_rate": 4.855633586500399e-05, "loss": 2.5594, "step": 583000 }, { "epoch": 2.89, "learning_rate": 4.855509975575076e-05, "loss": 2.5744, "step": 583500 }, { "epoch": 2.89, "learning_rate": 4.8553861169324674e-05, "loss": 2.5556, "step": 584000 }, { "epoch": 2.9, "learning_rate": 4.855262258289859e-05, "loss": 2.5552, "step": 584500 }, { "epoch": 2.9, "learning_rate": 4.855138399647251e-05, "loss": 2.5402, "step": 585000 }, { "epoch": 2.9, "learning_rate": 4.8550145410046425e-05, "loss": 2.553, "step": 585500 }, { "epoch": 2.9, "learning_rate": 4.8548906823620335e-05, "loss": 2.5668, "step": 586000 }, { "epoch": 2.91, "learning_rate": 4.854766823719425e-05, "loss": 2.5855, "step": 586500 }, { "epoch": 2.91, "learning_rate": 4.854643212794103e-05, "loss": 2.5822, "step": 587000 }, { "epoch": 2.91, "learning_rate": 4.8545193541514944e-05, "loss": 2.5603, "step": 587500 }, { "epoch": 2.91, "learning_rate": 4.854395495508886e-05, "loss": 2.5564, "step": 588000 }, { "epoch": 2.92, "learning_rate": 4.854271884583563e-05, "loss": 2.5659, "step": 588500 }, { "epoch": 2.92, "learning_rate": 4.854148025940955e-05, "loss": 2.5704, "step": 589000 }, { "epoch": 2.92, "learning_rate": 4.8540241672983464e-05, "loss": 2.584, "step": 589500 }, { "epoch": 2.92, "learning_rate": 4.8539003086557374e-05, "loss": 2.5611, "step": 590000 }, { "epoch": 2.93, "learning_rate": 4.853776697730414e-05, "loss": 2.5538, "step": 590500 }, { "epoch": 2.93, "learning_rate": 4.853652839087806e-05, "loss": 2.5419, "step": 591000 }, { "epoch": 2.93, "learning_rate": 4.853528980445198e-05, "loss": 2.5637, "step": 591500 }, { "epoch": 2.93, "learning_rate": 4.8534051218025894e-05, "loss": 2.5852, "step": 592000 }, { "epoch": 2.94, "learning_rate": 4.853281263159981e-05, "loss": 2.5701, "step": 592500 }, { "epoch": 2.94, "learning_rate": 4.853157404517373e-05, "loss": 2.5697, "step": 593000 }, { "epoch": 2.94, "learning_rate": 4.8530335458747644e-05, "loss": 2.5454, "step": 593500 }, { "epoch": 2.94, "learning_rate": 4.852909687232156e-05, "loss": 2.5414, "step": 594000 }, { "epoch": 2.95, "learning_rate": 4.852785828589548e-05, "loss": 2.5856, "step": 594500 }, { "epoch": 2.95, "learning_rate": 4.8526619699469395e-05, "loss": 2.5922, "step": 595000 }, { "epoch": 2.95, "learning_rate": 4.8525381113043305e-05, "loss": 2.5762, "step": 595500 }, { "epoch": 2.95, "learning_rate": 4.852414252661722e-05, "loss": 2.5752, "step": 596000 }, { "epoch": 2.96, "learning_rate": 4.852290394019114e-05, "loss": 2.5849, "step": 596500 }, { "epoch": 2.96, "learning_rate": 4.852166783093791e-05, "loss": 2.5798, "step": 597000 }, { "epoch": 2.96, "learning_rate": 4.8520429244511825e-05, "loss": 2.5716, "step": 597500 }, { "epoch": 2.96, "learning_rate": 4.8519193135258594e-05, "loss": 2.559, "step": 598000 }, { "epoch": 2.97, "learning_rate": 4.851795454883251e-05, "loss": 2.5591, "step": 598500 }, { "epoch": 2.97, "learning_rate": 4.851671596240643e-05, "loss": 2.582, "step": 599000 }, { "epoch": 2.97, "learning_rate": 4.8515477375980345e-05, "loss": 2.581, "step": 599500 }, { "epoch": 2.97, "learning_rate": 4.851423878955426e-05, "loss": 2.5779, "step": 600000 }, { "epoch": 2.98, "learning_rate": 4.851300268030103e-05, "loss": 2.5746, "step": 600500 }, { "epoch": 2.98, "learning_rate": 4.851176409387495e-05, "loss": 2.5805, "step": 601000 }, { "epoch": 2.98, "learning_rate": 4.8510525507448864e-05, "loss": 2.5754, "step": 601500 }, { "epoch": 2.98, "learning_rate": 4.850928692102278e-05, "loss": 2.5709, "step": 602000 }, { "epoch": 2.98, "learning_rate": 4.85080483345967e-05, "loss": 2.5627, "step": 602500 }, { "epoch": 2.99, "learning_rate": 4.8506809748170615e-05, "loss": 2.5808, "step": 603000 }, { "epoch": 2.99, "learning_rate": 4.8505571161744525e-05, "loss": 2.5847, "step": 603500 }, { "epoch": 2.99, "learning_rate": 4.850433257531844e-05, "loss": 2.5929, "step": 604000 }, { "epoch": 2.99, "learning_rate": 4.850309398889236e-05, "loss": 2.5613, "step": 604500 }, { "epoch": 3.0, "learning_rate": 4.850185787963913e-05, "loss": 2.5579, "step": 605000 }, { "epoch": 3.0, "learning_rate": 4.8500619293213045e-05, "loss": 2.5798, "step": 605500 }, { "epoch": 3.0, "eval_accuracy": 0.6309935793243545, "eval_accuracy_mlm": 0.583263169880887, "eval_accuracy_nsp": 0.856231786287207, "eval_loss": 2.503361225128174, "eval_runtime": 145.9652, "eval_samples_per_second": 1746.711, "eval_steps_per_second": 72.784, "step": 605529 }, { "epoch": 3.0, "learning_rate": 4.8499383183959814e-05, "loss": 2.5333, "step": 606000 }, { "epoch": 3.0, "learning_rate": 4.849814459753373e-05, "loss": 2.5139, "step": 606500 }, { "epoch": 3.01, "learning_rate": 4.849690601110765e-05, "loss": 2.5247, "step": 607000 }, { "epoch": 3.01, "learning_rate": 4.849566990185441e-05, "loss": 2.5196, "step": 607500 }, { "epoch": 3.01, "learning_rate": 4.8494431315428326e-05, "loss": 2.5171, "step": 608000 }, { "epoch": 3.01, "learning_rate": 4.849319272900224e-05, "loss": 2.5036, "step": 608500 }, { "epoch": 3.02, "learning_rate": 4.849195414257616e-05, "loss": 2.5135, "step": 609000 }, { "epoch": 3.02, "learning_rate": 4.849071555615008e-05, "loss": 2.5322, "step": 609500 }, { "epoch": 3.02, "learning_rate": 4.8489476969723994e-05, "loss": 2.5542, "step": 610000 }, { "epoch": 3.02, "learning_rate": 4.848823838329791e-05, "loss": 2.5499, "step": 610500 }, { "epoch": 3.03, "learning_rate": 4.848699979687183e-05, "loss": 2.5478, "step": 611000 }, { "epoch": 3.03, "learning_rate": 4.84857636876186e-05, "loss": 2.5333, "step": 611500 }, { "epoch": 3.03, "learning_rate": 4.8484525101192514e-05, "loss": 2.552, "step": 612000 }, { "epoch": 3.03, "learning_rate": 4.848328651476643e-05, "loss": 2.5408, "step": 612500 }, { "epoch": 3.04, "learning_rate": 4.848204792834035e-05, "loss": 2.5539, "step": 613000 }, { "epoch": 3.04, "learning_rate": 4.8480809341914264e-05, "loss": 2.5355, "step": 613500 }, { "epoch": 3.04, "learning_rate": 4.847957075548818e-05, "loss": 2.5418, "step": 614000 }, { "epoch": 3.04, "learning_rate": 4.84783321690621e-05, "loss": 2.5373, "step": 614500 }, { "epoch": 3.05, "learning_rate": 4.847709605980886e-05, "loss": 2.539, "step": 615000 }, { "epoch": 3.05, "learning_rate": 4.847585747338278e-05, "loss": 2.5402, "step": 615500 }, { "epoch": 3.05, "learning_rate": 4.8474618886956694e-05, "loss": 2.5299, "step": 616000 }, { "epoch": 3.05, "learning_rate": 4.847338030053061e-05, "loss": 2.5361, "step": 616500 }, { "epoch": 3.06, "learning_rate": 4.847214171410453e-05, "loss": 2.5464, "step": 617000 }, { "epoch": 3.06, "learning_rate": 4.8470903127678445e-05, "loss": 2.5522, "step": 617500 }, { "epoch": 3.06, "learning_rate": 4.846966454125236e-05, "loss": 2.5553, "step": 618000 }, { "epoch": 3.06, "learning_rate": 4.846842595482628e-05, "loss": 2.5591, "step": 618500 }, { "epoch": 3.07, "learning_rate": 4.8467187368400196e-05, "loss": 2.54, "step": 619000 }, { "epoch": 3.07, "learning_rate": 4.846594878197411e-05, "loss": 2.5405, "step": 619500 }, { "epoch": 3.07, "learning_rate": 4.846471267272088e-05, "loss": 2.5232, "step": 620000 }, { "epoch": 3.07, "learning_rate": 4.84634740862948e-05, "loss": 2.5347, "step": 620500 }, { "epoch": 3.08, "learning_rate": 4.8462235499868715e-05, "loss": 2.528, "step": 621000 }, { "epoch": 3.08, "learning_rate": 4.846099939061548e-05, "loss": 2.5322, "step": 621500 }, { "epoch": 3.08, "learning_rate": 4.8459760804189394e-05, "loss": 2.5317, "step": 622000 }, { "epoch": 3.08, "learning_rate": 4.845852221776331e-05, "loss": 2.5383, "step": 622500 }, { "epoch": 3.09, "learning_rate": 4.845728363133723e-05, "loss": 2.5493, "step": 623000 }, { "epoch": 3.09, "learning_rate": 4.8456045044911145e-05, "loss": 2.538, "step": 623500 }, { "epoch": 3.09, "learning_rate": 4.845480645848506e-05, "loss": 2.5492, "step": 624000 }, { "epoch": 3.09, "learning_rate": 4.845356787205898e-05, "loss": 2.5289, "step": 624500 }, { "epoch": 3.1, "learning_rate": 4.8452329285632896e-05, "loss": 2.566, "step": 625000 }, { "epoch": 3.1, "learning_rate": 4.8451093176379665e-05, "loss": 2.5407, "step": 625500 }, { "epoch": 3.1, "learning_rate": 4.844985458995358e-05, "loss": 2.5053, "step": 626000 }, { "epoch": 3.1, "learning_rate": 4.84486160035275e-05, "loss": 2.5606, "step": 626500 }, { "epoch": 3.11, "learning_rate": 4.844737989427427e-05, "loss": 2.5307, "step": 627000 }, { "epoch": 3.11, "learning_rate": 4.844614130784818e-05, "loss": 2.5339, "step": 627500 }, { "epoch": 3.11, "learning_rate": 4.8444902721422094e-05, "loss": 2.5584, "step": 628000 }, { "epoch": 3.11, "learning_rate": 4.844366413499601e-05, "loss": 2.5237, "step": 628500 }, { "epoch": 3.12, "learning_rate": 4.844242554856993e-05, "loss": 2.5553, "step": 629000 }, { "epoch": 3.12, "learning_rate": 4.84411894393167e-05, "loss": 2.5506, "step": 629500 }, { "epoch": 3.12, "learning_rate": 4.8439950852890614e-05, "loss": 2.5469, "step": 630000 }, { "epoch": 3.12, "learning_rate": 4.843871226646453e-05, "loss": 2.551, "step": 630500 }, { "epoch": 3.13, "learning_rate": 4.843747368003845e-05, "loss": 2.5408, "step": 631000 }, { "epoch": 3.13, "learning_rate": 4.8436235093612365e-05, "loss": 2.5641, "step": 631500 }, { "epoch": 3.13, "learning_rate": 4.843499650718628e-05, "loss": 2.5317, "step": 632000 }, { "epoch": 3.13, "learning_rate": 4.84337579207602e-05, "loss": 2.528, "step": 632500 }, { "epoch": 3.14, "learning_rate": 4.8432519334334116e-05, "loss": 2.5114, "step": 633000 }, { "epoch": 3.14, "learning_rate": 4.843128074790803e-05, "loss": 2.5307, "step": 633500 }, { "epoch": 3.14, "learning_rate": 4.843004216148195e-05, "loss": 2.5363, "step": 634000 }, { "epoch": 3.14, "learning_rate": 4.8428803575055866e-05, "loss": 2.5329, "step": 634500 }, { "epoch": 3.15, "learning_rate": 4.842756498862978e-05, "loss": 2.5189, "step": 635000 }, { "epoch": 3.15, "learning_rate": 4.8426328879376545e-05, "loss": 2.5262, "step": 635500 }, { "epoch": 3.15, "learning_rate": 4.842509029295046e-05, "loss": 2.5593, "step": 636000 }, { "epoch": 3.15, "learning_rate": 4.842385418369723e-05, "loss": 2.5337, "step": 636500 }, { "epoch": 3.16, "learning_rate": 4.842261559727115e-05, "loss": 2.5343, "step": 637000 }, { "epoch": 3.16, "learning_rate": 4.8421377010845065e-05, "loss": 2.5407, "step": 637500 }, { "epoch": 3.16, "learning_rate": 4.842013842441898e-05, "loss": 2.5609, "step": 638000 }, { "epoch": 3.16, "learning_rate": 4.84188998379929e-05, "loss": 2.5219, "step": 638500 }, { "epoch": 3.17, "learning_rate": 4.8417661251566816e-05, "loss": 2.5684, "step": 639000 }, { "epoch": 3.17, "learning_rate": 4.841642266514073e-05, "loss": 2.5422, "step": 639500 }, { "epoch": 3.17, "learning_rate": 4.841518407871465e-05, "loss": 2.5736, "step": 640000 }, { "epoch": 3.17, "learning_rate": 4.841394796946142e-05, "loss": 2.5538, "step": 640500 }, { "epoch": 3.18, "learning_rate": 4.841270938303533e-05, "loss": 2.5404, "step": 641000 }, { "epoch": 3.18, "learning_rate": 4.8411470796609245e-05, "loss": 2.5472, "step": 641500 }, { "epoch": 3.18, "learning_rate": 4.841023221018316e-05, "loss": 2.5434, "step": 642000 }, { "epoch": 3.18, "learning_rate": 4.840899610092993e-05, "loss": 2.5179, "step": 642500 }, { "epoch": 3.19, "learning_rate": 4.840775751450385e-05, "loss": 2.5491, "step": 643000 }, { "epoch": 3.19, "learning_rate": 4.8406518928077765e-05, "loss": 2.5372, "step": 643500 }, { "epoch": 3.19, "learning_rate": 4.840528034165168e-05, "loss": 2.544, "step": 644000 }, { "epoch": 3.19, "learning_rate": 4.84040417552256e-05, "loss": 2.5429, "step": 644500 }, { "epoch": 3.2, "learning_rate": 4.8402803168799516e-05, "loss": 2.5307, "step": 645000 }, { "epoch": 3.2, "learning_rate": 4.840156458237343e-05, "loss": 2.553, "step": 645500 }, { "epoch": 3.2, "learning_rate": 4.84003284731202e-05, "loss": 2.5597, "step": 646000 }, { "epoch": 3.2, "learning_rate": 4.839908988669412e-05, "loss": 2.5398, "step": 646500 }, { "epoch": 3.21, "learning_rate": 4.8397851300268035e-05, "loss": 2.5258, "step": 647000 }, { "epoch": 3.21, "learning_rate": 4.8396612713841946e-05, "loss": 2.525, "step": 647500 }, { "epoch": 3.21, "learning_rate": 4.839537412741586e-05, "loss": 2.5487, "step": 648000 }, { "epoch": 3.21, "learning_rate": 4.839413801816263e-05, "loss": 2.5557, "step": 648500 }, { "epoch": 3.22, "learning_rate": 4.839289943173655e-05, "loss": 2.5434, "step": 649000 }, { "epoch": 3.22, "learning_rate": 4.8391660845310465e-05, "loss": 2.5495, "step": 649500 }, { "epoch": 3.22, "learning_rate": 4.839042225888438e-05, "loss": 2.5504, "step": 650000 }, { "epoch": 3.22, "learning_rate": 4.838918614963115e-05, "loss": 2.5101, "step": 650500 }, { "epoch": 3.23, "learning_rate": 4.838794756320507e-05, "loss": 2.5414, "step": 651000 }, { "epoch": 3.23, "learning_rate": 4.8386708976778985e-05, "loss": 2.5399, "step": 651500 }, { "epoch": 3.23, "learning_rate": 4.83854703903529e-05, "loss": 2.5554, "step": 652000 }, { "epoch": 3.23, "learning_rate": 4.8384234281099664e-05, "loss": 2.5515, "step": 652500 }, { "epoch": 3.24, "learning_rate": 4.838299569467358e-05, "loss": 2.5307, "step": 653000 }, { "epoch": 3.24, "learning_rate": 4.83817571082475e-05, "loss": 2.539, "step": 653500 }, { "epoch": 3.24, "learning_rate": 4.8380518521821415e-05, "loss": 2.5284, "step": 654000 }, { "epoch": 3.24, "learning_rate": 4.837928241256819e-05, "loss": 2.5652, "step": 654500 }, { "epoch": 3.25, "learning_rate": 4.837804382614211e-05, "loss": 2.5605, "step": 655000 }, { "epoch": 3.25, "learning_rate": 4.837680523971602e-05, "loss": 2.5348, "step": 655500 }, { "epoch": 3.25, "learning_rate": 4.8375569130462786e-05, "loss": 2.5298, "step": 656000 }, { "epoch": 3.25, "learning_rate": 4.83743305440367e-05, "loss": 2.5437, "step": 656500 }, { "epoch": 3.26, "learning_rate": 4.837309195761062e-05, "loss": 2.5365, "step": 657000 }, { "epoch": 3.26, "learning_rate": 4.837185337118454e-05, "loss": 2.5457, "step": 657500 }, { "epoch": 3.26, "learning_rate": 4.8370614784758454e-05, "loss": 2.5365, "step": 658000 }, { "epoch": 3.26, "learning_rate": 4.836937619833237e-05, "loss": 2.5297, "step": 658500 }, { "epoch": 3.26, "learning_rate": 4.836813761190628e-05, "loss": 2.5214, "step": 659000 }, { "epoch": 3.27, "learning_rate": 4.83668990254802e-05, "loss": 2.5343, "step": 659500 }, { "epoch": 3.27, "learning_rate": 4.8365660439054115e-05, "loss": 2.535, "step": 660000 }, { "epoch": 3.27, "learning_rate": 4.836442432980089e-05, "loss": 2.5523, "step": 660500 }, { "epoch": 3.27, "learning_rate": 4.836318574337481e-05, "loss": 2.5577, "step": 661000 }, { "epoch": 3.28, "learning_rate": 4.8361947156948724e-05, "loss": 2.5796, "step": 661500 }, { "epoch": 3.28, "learning_rate": 4.8360708570522634e-05, "loss": 2.5263, "step": 662000 }, { "epoch": 3.28, "learning_rate": 4.835946998409655e-05, "loss": 2.535, "step": 662500 }, { "epoch": 3.28, "learning_rate": 4.835823139767047e-05, "loss": 2.5555, "step": 663000 }, { "epoch": 3.29, "learning_rate": 4.835699528841724e-05, "loss": 2.5393, "step": 663500 }, { "epoch": 3.29, "learning_rate": 4.8355756701991154e-05, "loss": 2.5416, "step": 664000 }, { "epoch": 3.29, "learning_rate": 4.835451811556507e-05, "loss": 2.5387, "step": 664500 }, { "epoch": 3.29, "learning_rate": 4.835327952913898e-05, "loss": 2.5604, "step": 665000 }, { "epoch": 3.3, "learning_rate": 4.83520409427129e-05, "loss": 2.565, "step": 665500 }, { "epoch": 3.3, "learning_rate": 4.8350802356286815e-05, "loss": 2.5364, "step": 666000 }, { "epoch": 3.3, "learning_rate": 4.834956376986073e-05, "loss": 2.5461, "step": 666500 }, { "epoch": 3.3, "learning_rate": 4.834832518343465e-05, "loss": 2.5692, "step": 667000 }, { "epoch": 3.31, "learning_rate": 4.8347089074181424e-05, "loss": 2.5424, "step": 667500 }, { "epoch": 3.31, "learning_rate": 4.834585296492819e-05, "loss": 2.557, "step": 668000 }, { "epoch": 3.31, "learning_rate": 4.834461437850211e-05, "loss": 2.5454, "step": 668500 }, { "epoch": 3.31, "learning_rate": 4.834337579207602e-05, "loss": 2.5403, "step": 669000 }, { "epoch": 3.32, "learning_rate": 4.834213720564994e-05, "loss": 2.5301, "step": 669500 }, { "epoch": 3.32, "learning_rate": 4.8340898619223854e-05, "loss": 2.5211, "step": 670000 }, { "epoch": 3.32, "learning_rate": 4.833966003279777e-05, "loss": 2.5401, "step": 670500 }, { "epoch": 3.32, "learning_rate": 4.833842144637169e-05, "loss": 2.541, "step": 671000 }, { "epoch": 3.33, "learning_rate": 4.833718533711846e-05, "loss": 2.5565, "step": 671500 }, { "epoch": 3.33, "learning_rate": 4.8335946750692374e-05, "loss": 2.5404, "step": 672000 }, { "epoch": 3.33, "learning_rate": 4.833470816426629e-05, "loss": 2.567, "step": 672500 }, { "epoch": 3.33, "learning_rate": 4.833346957784021e-05, "loss": 2.5351, "step": 673000 }, { "epoch": 3.34, "learning_rate": 4.8332230991414124e-05, "loss": 2.5467, "step": 673500 }, { "epoch": 3.34, "learning_rate": 4.833099240498804e-05, "loss": 2.5485, "step": 674000 }, { "epoch": 3.34, "learning_rate": 4.832975381856195e-05, "loss": 2.5353, "step": 674500 }, { "epoch": 3.34, "learning_rate": 4.832851523213587e-05, "loss": 2.5495, "step": 675000 }, { "epoch": 3.35, "learning_rate": 4.832728160005549e-05, "loss": 2.5532, "step": 675500 }, { "epoch": 3.35, "learning_rate": 4.8326043013629406e-05, "loss": 2.5324, "step": 676000 }, { "epoch": 3.35, "learning_rate": 4.832480442720332e-05, "loss": 2.538, "step": 676500 }, { "epoch": 3.35, "learning_rate": 4.832356584077724e-05, "loss": 2.5226, "step": 677000 }, { "epoch": 3.36, "learning_rate": 4.832232973152401e-05, "loss": 2.5422, "step": 677500 }, { "epoch": 3.36, "learning_rate": 4.8321091145097926e-05, "loss": 2.5271, "step": 678000 }, { "epoch": 3.36, "learning_rate": 4.831985255867184e-05, "loss": 2.5118, "step": 678500 }, { "epoch": 3.36, "learning_rate": 4.831861397224576e-05, "loss": 2.5408, "step": 679000 }, { "epoch": 3.37, "learning_rate": 4.831737786299252e-05, "loss": 2.5397, "step": 679500 }, { "epoch": 3.37, "learning_rate": 4.831613927656644e-05, "loss": 2.5553, "step": 680000 }, { "epoch": 3.37, "learning_rate": 4.8314900690140355e-05, "loss": 2.5429, "step": 680500 }, { "epoch": 3.37, "learning_rate": 4.831366210371427e-05, "loss": 2.5535, "step": 681000 }, { "epoch": 3.38, "learning_rate": 4.831242351728819e-05, "loss": 2.5647, "step": 681500 }, { "epoch": 3.38, "learning_rate": 4.8311184930862106e-05, "loss": 2.5757, "step": 682000 }, { "epoch": 3.38, "learning_rate": 4.830994634443602e-05, "loss": 2.5574, "step": 682500 }, { "epoch": 3.38, "learning_rate": 4.830870775800994e-05, "loss": 2.53, "step": 683000 }, { "epoch": 3.39, "learning_rate": 4.830746917158386e-05, "loss": 2.5279, "step": 683500 }, { "epoch": 3.39, "learning_rate": 4.8306233062330626e-05, "loss": 2.5323, "step": 684000 }, { "epoch": 3.39, "learning_rate": 4.830499447590454e-05, "loss": 2.5433, "step": 684500 }, { "epoch": 3.39, "learning_rate": 4.830375588947846e-05, "loss": 2.5779, "step": 685000 }, { "epoch": 3.4, "learning_rate": 4.8302517303052376e-05, "loss": 2.5304, "step": 685500 }, { "epoch": 3.4, "learning_rate": 4.830128119379914e-05, "loss": 2.5579, "step": 686000 }, { "epoch": 3.4, "learning_rate": 4.8300045084545914e-05, "loss": 2.5429, "step": 686500 }, { "epoch": 3.4, "learning_rate": 4.829880649811983e-05, "loss": 2.5222, "step": 687000 }, { "epoch": 3.41, "learning_rate": 4.829756791169374e-05, "loss": 2.5562, "step": 687500 }, { "epoch": 3.41, "learning_rate": 4.829632932526766e-05, "loss": 2.579, "step": 688000 }, { "epoch": 3.41, "learning_rate": 4.8295090738841575e-05, "loss": 2.5623, "step": 688500 }, { "epoch": 3.41, "learning_rate": 4.829385215241549e-05, "loss": 2.543, "step": 689000 }, { "epoch": 3.42, "learning_rate": 4.829261356598941e-05, "loss": 2.5659, "step": 689500 }, { "epoch": 3.42, "learning_rate": 4.8291374979563326e-05, "loss": 2.5464, "step": 690000 }, { "epoch": 3.42, "learning_rate": 4.829013639313724e-05, "loss": 2.529, "step": 690500 }, { "epoch": 3.42, "learning_rate": 4.828889780671116e-05, "loss": 2.5534, "step": 691000 }, { "epoch": 3.43, "learning_rate": 4.828765922028508e-05, "loss": 2.5394, "step": 691500 }, { "epoch": 3.43, "learning_rate": 4.8286420633858994e-05, "loss": 2.5359, "step": 692000 }, { "epoch": 3.43, "learning_rate": 4.8285184524605756e-05, "loss": 2.5425, "step": 692500 }, { "epoch": 3.43, "learning_rate": 4.828394593817967e-05, "loss": 2.5602, "step": 693000 }, { "epoch": 3.44, "learning_rate": 4.828270735175359e-05, "loss": 2.5164, "step": 693500 }, { "epoch": 3.44, "learning_rate": 4.8281468765327506e-05, "loss": 2.5582, "step": 694000 }, { "epoch": 3.44, "learning_rate": 4.8280232656074275e-05, "loss": 2.5522, "step": 694500 }, { "epoch": 3.44, "learning_rate": 4.827899406964819e-05, "loss": 2.5528, "step": 695000 }, { "epoch": 3.45, "learning_rate": 4.827775548322211e-05, "loss": 2.5459, "step": 695500 }, { "epoch": 3.45, "learning_rate": 4.8276516896796026e-05, "loss": 2.5493, "step": 696000 }, { "epoch": 3.45, "learning_rate": 4.827527831036994e-05, "loss": 2.5516, "step": 696500 }, { "epoch": 3.45, "learning_rate": 4.827403972394386e-05, "loss": 2.5535, "step": 697000 }, { "epoch": 3.46, "learning_rate": 4.827280113751778e-05, "loss": 2.5477, "step": 697500 }, { "epoch": 3.46, "learning_rate": 4.8271562551091694e-05, "loss": 2.5291, "step": 698000 }, { "epoch": 3.46, "learning_rate": 4.827032396466561e-05, "loss": 2.5557, "step": 698500 }, { "epoch": 3.46, "learning_rate": 4.826908537823953e-05, "loss": 2.5559, "step": 699000 }, { "epoch": 3.47, "learning_rate": 4.8267846791813444e-05, "loss": 2.5445, "step": 699500 }, { "epoch": 3.47, "learning_rate": 4.8266610682560206e-05, "loss": 2.5345, "step": 700000 }, { "epoch": 3.47, "learning_rate": 4.8265374573306975e-05, "loss": 2.563, "step": 700500 }, { "epoch": 3.47, "learning_rate": 4.826413598688089e-05, "loss": 2.5561, "step": 701000 }, { "epoch": 3.48, "learning_rate": 4.826289740045481e-05, "loss": 2.5687, "step": 701500 }, { "epoch": 3.48, "learning_rate": 4.8261658814028726e-05, "loss": 2.544, "step": 702000 }, { "epoch": 3.48, "learning_rate": 4.826042022760264e-05, "loss": 2.5362, "step": 702500 }, { "epoch": 3.48, "learning_rate": 4.825918164117656e-05, "loss": 2.5309, "step": 703000 }, { "epoch": 3.49, "learning_rate": 4.825794305475048e-05, "loss": 2.5421, "step": 703500 }, { "epoch": 3.49, "learning_rate": 4.8256704468324394e-05, "loss": 2.5295, "step": 704000 }, { "epoch": 3.49, "learning_rate": 4.825546588189831e-05, "loss": 2.5354, "step": 704500 }, { "epoch": 3.49, "learning_rate": 4.825422977264508e-05, "loss": 2.5441, "step": 705000 }, { "epoch": 3.5, "learning_rate": 4.8252991186218996e-05, "loss": 2.5388, "step": 705500 }, { "epoch": 3.5, "learning_rate": 4.8251752599792907e-05, "loss": 2.5489, "step": 706000 }, { "epoch": 3.5, "learning_rate": 4.8250514013366824e-05, "loss": 2.5435, "step": 706500 }, { "epoch": 3.5, "learning_rate": 4.824927790411359e-05, "loss": 2.5567, "step": 707000 }, { "epoch": 3.51, "learning_rate": 4.824803931768751e-05, "loss": 2.5394, "step": 707500 }, { "epoch": 3.51, "learning_rate": 4.8246800731261426e-05, "loss": 2.5111, "step": 708000 }, { "epoch": 3.51, "learning_rate": 4.824556214483534e-05, "loss": 2.5513, "step": 708500 }, { "epoch": 3.51, "learning_rate": 4.824432355840926e-05, "loss": 2.5387, "step": 709000 }, { "epoch": 3.52, "learning_rate": 4.824308497198318e-05, "loss": 2.5602, "step": 709500 }, { "epoch": 3.52, "learning_rate": 4.8241846385557094e-05, "loss": 2.549, "step": 710000 }, { "epoch": 3.52, "learning_rate": 4.824060779913101e-05, "loss": 2.5252, "step": 710500 }, { "epoch": 3.52, "learning_rate": 4.823937416705063e-05, "loss": 2.5328, "step": 711000 }, { "epoch": 3.53, "learning_rate": 4.82381380577974e-05, "loss": 2.522, "step": 711500 }, { "epoch": 3.53, "learning_rate": 4.823689947137132e-05, "loss": 2.5387, "step": 712000 }, { "epoch": 3.53, "learning_rate": 4.8235660884945234e-05, "loss": 2.5568, "step": 712500 }, { "epoch": 3.53, "learning_rate": 4.823442229851915e-05, "loss": 2.5318, "step": 713000 }, { "epoch": 3.53, "learning_rate": 4.823318371209307e-05, "loss": 2.5472, "step": 713500 }, { "epoch": 3.54, "learning_rate": 4.8231945125666985e-05, "loss": 2.5452, "step": 714000 }, { "epoch": 3.54, "learning_rate": 4.823070901641375e-05, "loss": 2.5537, "step": 714500 }, { "epoch": 3.54, "learning_rate": 4.8229470429987664e-05, "loss": 2.5685, "step": 715000 }, { "epoch": 3.54, "learning_rate": 4.822823184356158e-05, "loss": 2.5505, "step": 715500 }, { "epoch": 3.55, "learning_rate": 4.82269932571355e-05, "loss": 2.5497, "step": 716000 }, { "epoch": 3.55, "learning_rate": 4.8225754670709415e-05, "loss": 2.529, "step": 716500 }, { "epoch": 3.55, "learning_rate": 4.822451608428333e-05, "loss": 2.5528, "step": 717000 }, { "epoch": 3.55, "learning_rate": 4.822327749785725e-05, "loss": 2.5036, "step": 717500 }, { "epoch": 3.56, "learning_rate": 4.8222038911431166e-05, "loss": 2.5104, "step": 718000 }, { "epoch": 3.56, "learning_rate": 4.822080032500508e-05, "loss": 2.5557, "step": 718500 }, { "epoch": 3.56, "learning_rate": 4.821956173857899e-05, "loss": 2.5172, "step": 719000 }, { "epoch": 3.56, "learning_rate": 4.821832562932577e-05, "loss": 2.5314, "step": 719500 }, { "epoch": 3.57, "learning_rate": 4.821708952007254e-05, "loss": 2.5631, "step": 720000 }, { "epoch": 3.57, "learning_rate": 4.821585093364645e-05, "loss": 2.5495, "step": 720500 }, { "epoch": 3.57, "learning_rate": 4.8214612347220364e-05, "loss": 2.5225, "step": 721000 }, { "epoch": 3.57, "learning_rate": 4.821337376079428e-05, "loss": 2.5358, "step": 721500 }, { "epoch": 3.58, "learning_rate": 4.82121351743682e-05, "loss": 2.5573, "step": 722000 }, { "epoch": 3.58, "learning_rate": 4.8210896587942115e-05, "loss": 2.5512, "step": 722500 }, { "epoch": 3.58, "learning_rate": 4.820965800151603e-05, "loss": 2.5373, "step": 723000 }, { "epoch": 3.58, "learning_rate": 4.820841941508995e-05, "loss": 2.5567, "step": 723500 }, { "epoch": 3.59, "learning_rate": 4.8207180828663866e-05, "loss": 2.5367, "step": 724000 }, { "epoch": 3.59, "learning_rate": 4.820594224223778e-05, "loss": 2.558, "step": 724500 }, { "epoch": 3.59, "learning_rate": 4.820470613298455e-05, "loss": 2.5263, "step": 725000 }, { "epoch": 3.59, "learning_rate": 4.820346754655847e-05, "loss": 2.5248, "step": 725500 }, { "epoch": 3.6, "learning_rate": 4.8202228960132385e-05, "loss": 2.5235, "step": 726000 }, { "epoch": 3.6, "learning_rate": 4.82009903737063e-05, "loss": 2.5575, "step": 726500 }, { "epoch": 3.6, "learning_rate": 4.819975178728022e-05, "loss": 2.5402, "step": 727000 }, { "epoch": 3.6, "learning_rate": 4.8198513200854136e-05, "loss": 2.5331, "step": 727500 }, { "epoch": 3.61, "learning_rate": 4.8197274614428046e-05, "loss": 2.5494, "step": 728000 }, { "epoch": 3.61, "learning_rate": 4.819603602800196e-05, "loss": 2.5456, "step": 728500 }, { "epoch": 3.61, "learning_rate": 4.819479991874873e-05, "loss": 2.5465, "step": 729000 }, { "epoch": 3.61, "learning_rate": 4.819356133232265e-05, "loss": 2.5497, "step": 729500 }, { "epoch": 3.62, "learning_rate": 4.8192322745896566e-05, "loss": 2.5535, "step": 730000 }, { "epoch": 3.62, "learning_rate": 4.819108415947048e-05, "loss": 2.5379, "step": 730500 }, { "epoch": 3.62, "learning_rate": 4.81898455730444e-05, "loss": 2.5503, "step": 731000 }, { "epoch": 3.62, "learning_rate": 4.818860698661831e-05, "loss": 2.5508, "step": 731500 }, { "epoch": 3.63, "learning_rate": 4.818736840019223e-05, "loss": 2.5412, "step": 732000 }, { "epoch": 3.63, "learning_rate": 4.8186129813766144e-05, "loss": 2.5646, "step": 732500 }, { "epoch": 3.63, "learning_rate": 4.818489370451292e-05, "loss": 2.554, "step": 733000 }, { "epoch": 3.63, "learning_rate": 4.8183655118086836e-05, "loss": 2.5507, "step": 733500 }, { "epoch": 3.64, "learning_rate": 4.818241653166075e-05, "loss": 2.5684, "step": 734000 }, { "epoch": 3.64, "learning_rate": 4.818117794523466e-05, "loss": 2.5564, "step": 734500 }, { "epoch": 3.64, "learning_rate": 4.817993935880858e-05, "loss": 2.5357, "step": 735000 }, { "epoch": 3.64, "learning_rate": 4.81787057267282e-05, "loss": 2.5358, "step": 735500 }, { "epoch": 3.65, "learning_rate": 4.817746961747497e-05, "loss": 2.5506, "step": 736000 }, { "epoch": 3.65, "learning_rate": 4.8176231031048887e-05, "loss": 2.5443, "step": 736500 }, { "epoch": 3.65, "learning_rate": 4.8174992444622804e-05, "loss": 2.5271, "step": 737000 }, { "epoch": 3.65, "learning_rate": 4.817375633536957e-05, "loss": 2.5738, "step": 737500 }, { "epoch": 3.66, "learning_rate": 4.817251774894348e-05, "loss": 2.5456, "step": 738000 }, { "epoch": 3.66, "learning_rate": 4.81712791625174e-05, "loss": 2.5431, "step": 738500 }, { "epoch": 3.66, "learning_rate": 4.8170040576091316e-05, "loss": 2.5176, "step": 739000 }, { "epoch": 3.66, "learning_rate": 4.816880198966523e-05, "loss": 2.5471, "step": 739500 }, { "epoch": 3.67, "learning_rate": 4.816756340323915e-05, "loss": 2.5422, "step": 740000 }, { "epoch": 3.67, "learning_rate": 4.816632481681307e-05, "loss": 2.5587, "step": 740500 }, { "epoch": 3.67, "learning_rate": 4.8165086230386984e-05, "loss": 2.5371, "step": 741000 }, { "epoch": 3.67, "learning_rate": 4.81638476439609e-05, "loss": 2.5467, "step": 741500 }, { "epoch": 3.68, "learning_rate": 4.816260905753482e-05, "loss": 2.5384, "step": 742000 }, { "epoch": 3.68, "learning_rate": 4.8161370471108735e-05, "loss": 2.5456, "step": 742500 }, { "epoch": 3.68, "learning_rate": 4.816013188468265e-05, "loss": 2.5356, "step": 743000 }, { "epoch": 3.68, "learning_rate": 4.815889329825657e-05, "loss": 2.5309, "step": 743500 }, { "epoch": 3.69, "learning_rate": 4.815765718900334e-05, "loss": 2.5272, "step": 744000 }, { "epoch": 3.69, "learning_rate": 4.8156418602577254e-05, "loss": 2.5665, "step": 744500 }, { "epoch": 3.69, "learning_rate": 4.8155182493324016e-05, "loss": 2.5413, "step": 745000 }, { "epoch": 3.69, "learning_rate": 4.815394390689793e-05, "loss": 2.568, "step": 745500 }, { "epoch": 3.7, "learning_rate": 4.815270532047185e-05, "loss": 2.5461, "step": 746000 }, { "epoch": 3.7, "learning_rate": 4.815146673404577e-05, "loss": 2.5391, "step": 746500 }, { "epoch": 3.7, "learning_rate": 4.8150228147619684e-05, "loss": 2.5244, "step": 747000 }, { "epoch": 3.7, "learning_rate": 4.81489895611936e-05, "loss": 2.544, "step": 747500 }, { "epoch": 3.71, "learning_rate": 4.814775097476752e-05, "loss": 2.5462, "step": 748000 }, { "epoch": 3.71, "learning_rate": 4.8146512388341435e-05, "loss": 2.5465, "step": 748500 }, { "epoch": 3.71, "learning_rate": 4.814527380191535e-05, "loss": 2.5428, "step": 749000 }, { "epoch": 3.71, "learning_rate": 4.814403769266212e-05, "loss": 2.555, "step": 749500 }, { "epoch": 3.72, "learning_rate": 4.814280158340889e-05, "loss": 2.563, "step": 750000 }, { "epoch": 3.72, "learning_rate": 4.8141562996982806e-05, "loss": 2.5506, "step": 750500 }, { "epoch": 3.72, "learning_rate": 4.8140324410556717e-05, "loss": 2.5451, "step": 751000 }, { "epoch": 3.72, "learning_rate": 4.8139085824130633e-05, "loss": 2.544, "step": 751500 }, { "epoch": 3.73, "learning_rate": 4.813784971487741e-05, "loss": 2.5365, "step": 752000 }, { "epoch": 3.73, "learning_rate": 4.8136611128451326e-05, "loss": 2.5606, "step": 752500 }, { "epoch": 3.73, "learning_rate": 4.813537254202524e-05, "loss": 2.5512, "step": 753000 }, { "epoch": 3.73, "learning_rate": 4.813413395559916e-05, "loss": 2.5219, "step": 753500 }, { "epoch": 3.74, "learning_rate": 4.813289784634593e-05, "loss": 2.5419, "step": 754000 }, { "epoch": 3.74, "learning_rate": 4.813166173709269e-05, "loss": 2.5459, "step": 754500 }, { "epoch": 3.74, "learning_rate": 4.813042315066661e-05, "loss": 2.5357, "step": 755000 }, { "epoch": 3.74, "learning_rate": 4.8129184564240525e-05, "loss": 2.5289, "step": 755500 }, { "epoch": 3.75, "learning_rate": 4.812794597781444e-05, "loss": 2.5534, "step": 756000 }, { "epoch": 3.75, "learning_rate": 4.812670739138836e-05, "loss": 2.5353, "step": 756500 }, { "epoch": 3.75, "learning_rate": 4.8125468804962275e-05, "loss": 2.5421, "step": 757000 }, { "epoch": 3.75, "learning_rate": 4.8124232695709044e-05, "loss": 2.5251, "step": 757500 }, { "epoch": 3.76, "learning_rate": 4.812299410928296e-05, "loss": 2.5294, "step": 758000 }, { "epoch": 3.76, "learning_rate": 4.812175552285688e-05, "loss": 2.5252, "step": 758500 }, { "epoch": 3.76, "learning_rate": 4.8120516936430795e-05, "loss": 2.5331, "step": 759000 }, { "epoch": 3.76, "learning_rate": 4.811927835000471e-05, "loss": 2.5705, "step": 759500 }, { "epoch": 3.77, "learning_rate": 4.811803976357863e-05, "loss": 2.5623, "step": 760000 }, { "epoch": 3.77, "learning_rate": 4.8116801177152546e-05, "loss": 2.5405, "step": 760500 }, { "epoch": 3.77, "learning_rate": 4.811556259072646e-05, "loss": 2.525, "step": 761000 }, { "epoch": 3.77, "learning_rate": 4.811432400430038e-05, "loss": 2.5315, "step": 761500 }, { "epoch": 3.78, "learning_rate": 4.811308541787429e-05, "loss": 2.5486, "step": 762000 }, { "epoch": 3.78, "learning_rate": 4.811184683144821e-05, "loss": 2.549, "step": 762500 }, { "epoch": 3.78, "learning_rate": 4.8110608245022124e-05, "loss": 2.5359, "step": 763000 }, { "epoch": 3.78, "learning_rate": 4.810936965859604e-05, "loss": 2.5533, "step": 763500 }, { "epoch": 3.79, "learning_rate": 4.810813354934281e-05, "loss": 2.5451, "step": 764000 }, { "epoch": 3.79, "learning_rate": 4.8106894962916726e-05, "loss": 2.5645, "step": 764500 }, { "epoch": 3.79, "learning_rate": 4.810565637649064e-05, "loss": 2.5328, "step": 765000 }, { "epoch": 3.79, "learning_rate": 4.810441779006456e-05, "loss": 2.538, "step": 765500 }, { "epoch": 3.8, "learning_rate": 4.810318168081133e-05, "loss": 2.5277, "step": 766000 }, { "epoch": 3.8, "learning_rate": 4.8101943094385246e-05, "loss": 2.5509, "step": 766500 }, { "epoch": 3.8, "learning_rate": 4.810070450795916e-05, "loss": 2.558, "step": 767000 }, { "epoch": 3.8, "learning_rate": 4.809946592153308e-05, "loss": 2.5482, "step": 767500 }, { "epoch": 3.8, "learning_rate": 4.8098227335107e-05, "loss": 2.5564, "step": 768000 }, { "epoch": 3.81, "learning_rate": 4.809699122585376e-05, "loss": 2.5368, "step": 768500 }, { "epoch": 3.81, "learning_rate": 4.809575511660053e-05, "loss": 2.5808, "step": 769000 }, { "epoch": 3.81, "learning_rate": 4.8094516530174444e-05, "loss": 2.5585, "step": 769500 }, { "epoch": 3.81, "learning_rate": 4.809327794374836e-05, "loss": 2.5754, "step": 770000 }, { "epoch": 3.82, "learning_rate": 4.809203935732228e-05, "loss": 2.5389, "step": 770500 }, { "epoch": 3.82, "learning_rate": 4.8090800770896195e-05, "loss": 2.5298, "step": 771000 }, { "epoch": 3.82, "learning_rate": 4.808956218447011e-05, "loss": 2.5495, "step": 771500 }, { "epoch": 3.82, "learning_rate": 4.808832359804403e-05, "loss": 2.5579, "step": 772000 }, { "epoch": 3.83, "learning_rate": 4.808708748879079e-05, "loss": 2.5362, "step": 772500 }, { "epoch": 3.83, "learning_rate": 4.808584890236471e-05, "loss": 2.5724, "step": 773000 }, { "epoch": 3.83, "learning_rate": 4.8084610315938625e-05, "loss": 2.5317, "step": 773500 }, { "epoch": 3.83, "learning_rate": 4.808337172951254e-05, "loss": 2.5207, "step": 774000 }, { "epoch": 3.84, "learning_rate": 4.808213314308646e-05, "loss": 2.5329, "step": 774500 }, { "epoch": 3.84, "learning_rate": 4.8080894556660376e-05, "loss": 2.5449, "step": 775000 }, { "epoch": 3.84, "learning_rate": 4.807965597023429e-05, "loss": 2.5359, "step": 775500 }, { "epoch": 3.84, "learning_rate": 4.807841986098106e-05, "loss": 2.5733, "step": 776000 }, { "epoch": 3.85, "learning_rate": 4.807718375172783e-05, "loss": 2.543, "step": 776500 }, { "epoch": 3.85, "learning_rate": 4.807594516530175e-05, "loss": 2.5573, "step": 777000 }, { "epoch": 3.85, "learning_rate": 4.8074706578875664e-05, "loss": 2.5554, "step": 777500 }, { "epoch": 3.85, "learning_rate": 4.807346799244958e-05, "loss": 2.5289, "step": 778000 }, { "epoch": 3.86, "learning_rate": 4.807222940602349e-05, "loss": 2.5436, "step": 778500 }, { "epoch": 3.86, "learning_rate": 4.807099081959741e-05, "loss": 2.5695, "step": 779000 }, { "epoch": 3.86, "learning_rate": 4.8069752233171325e-05, "loss": 2.5339, "step": 779500 }, { "epoch": 3.86, "learning_rate": 4.806851364674524e-05, "loss": 2.5585, "step": 780000 }, { "epoch": 3.87, "learning_rate": 4.806727506031916e-05, "loss": 2.5488, "step": 780500 }, { "epoch": 3.87, "learning_rate": 4.8066036473893076e-05, "loss": 2.55, "step": 781000 }, { "epoch": 3.87, "learning_rate": 4.806479788746699e-05, "loss": 2.524, "step": 781500 }, { "epoch": 3.87, "learning_rate": 4.806355930104091e-05, "loss": 2.5214, "step": 782000 }, { "epoch": 3.88, "learning_rate": 4.806232071461483e-05, "loss": 2.5534, "step": 782500 }, { "epoch": 3.88, "learning_rate": 4.8061082128188744e-05, "loss": 2.5238, "step": 783000 }, { "epoch": 3.88, "learning_rate": 4.805984354176266e-05, "loss": 2.5444, "step": 783500 }, { "epoch": 3.88, "learning_rate": 4.805860495533658e-05, "loss": 2.5237, "step": 784000 }, { "epoch": 3.89, "learning_rate": 4.8057366368910494e-05, "loss": 2.5258, "step": 784500 }, { "epoch": 3.89, "learning_rate": 4.8056127782484404e-05, "loss": 2.529, "step": 785000 }, { "epoch": 3.89, "learning_rate": 4.805488919605832e-05, "loss": 2.5469, "step": 785500 }, { "epoch": 3.89, "learning_rate": 4.805365060963224e-05, "loss": 2.5389, "step": 786000 }, { "epoch": 3.9, "learning_rate": 4.8052414500379014e-05, "loss": 2.5637, "step": 786500 }, { "epoch": 3.9, "learning_rate": 4.805117591395293e-05, "loss": 2.5438, "step": 787000 }, { "epoch": 3.9, "learning_rate": 4.804993732752685e-05, "loss": 2.5465, "step": 787500 }, { "epoch": 3.9, "learning_rate": 4.804869874110076e-05, "loss": 2.5289, "step": 788000 }, { "epoch": 3.91, "learning_rate": 4.804746263184753e-05, "loss": 2.5684, "step": 788500 }, { "epoch": 3.91, "learning_rate": 4.8046226522594296e-05, "loss": 2.5539, "step": 789000 }, { "epoch": 3.91, "learning_rate": 4.804498793616821e-05, "loss": 2.5358, "step": 789500 }, { "epoch": 3.91, "learning_rate": 4.804374934974213e-05, "loss": 2.5412, "step": 790000 }, { "epoch": 3.92, "learning_rate": 4.8042510763316046e-05, "loss": 2.5593, "step": 790500 }, { "epoch": 3.92, "learning_rate": 4.804127217688996e-05, "loss": 2.5245, "step": 791000 }, { "epoch": 3.92, "learning_rate": 4.804003359046388e-05, "loss": 2.5548, "step": 791500 }, { "epoch": 3.92, "learning_rate": 4.80387950040378e-05, "loss": 2.5387, "step": 792000 }, { "epoch": 3.93, "learning_rate": 4.8037556417611714e-05, "loss": 2.529, "step": 792500 }, { "epoch": 3.93, "learning_rate": 4.8036320308358476e-05, "loss": 2.5248, "step": 793000 }, { "epoch": 3.93, "learning_rate": 4.803508172193239e-05, "loss": 2.5407, "step": 793500 }, { "epoch": 3.93, "learning_rate": 4.803384313550631e-05, "loss": 2.5353, "step": 794000 }, { "epoch": 3.94, "learning_rate": 4.803260454908023e-05, "loss": 2.5249, "step": 794500 }, { "epoch": 3.94, "learning_rate": 4.8031368439826996e-05, "loss": 2.5362, "step": 795000 }, { "epoch": 3.94, "learning_rate": 4.803012985340091e-05, "loss": 2.5309, "step": 795500 }, { "epoch": 3.94, "learning_rate": 4.802889126697483e-05, "loss": 2.5317, "step": 796000 }, { "epoch": 3.95, "learning_rate": 4.8027652680548746e-05, "loss": 2.5299, "step": 796500 }, { "epoch": 3.95, "learning_rate": 4.8026416571295515e-05, "loss": 2.5359, "step": 797000 }, { "epoch": 3.95, "learning_rate": 4.8025182939215136e-05, "loss": 2.5481, "step": 797500 }, { "epoch": 3.95, "learning_rate": 4.802394435278905e-05, "loss": 2.5442, "step": 798000 }, { "epoch": 3.96, "learning_rate": 4.802270576636297e-05, "loss": 2.5507, "step": 798500 }, { "epoch": 3.96, "learning_rate": 4.802146717993689e-05, "loss": 2.5488, "step": 799000 }, { "epoch": 3.96, "learning_rate": 4.8020228593510804e-05, "loss": 2.5537, "step": 799500 }, { "epoch": 3.96, "learning_rate": 4.801899000708472e-05, "loss": 2.5368, "step": 800000 }, { "epoch": 3.97, "learning_rate": 4.801775142065864e-05, "loss": 2.5376, "step": 800500 }, { "epoch": 3.97, "learning_rate": 4.8016512834232554e-05, "loss": 2.5168, "step": 801000 }, { "epoch": 3.97, "learning_rate": 4.8015276724979317e-05, "loss": 2.5168, "step": 801500 }, { "epoch": 3.97, "learning_rate": 4.8014038138553233e-05, "loss": 2.5698, "step": 802000 }, { "epoch": 3.98, "learning_rate": 4.801279955212715e-05, "loss": 2.5456, "step": 802500 }, { "epoch": 3.98, "learning_rate": 4.801156096570107e-05, "loss": 2.5533, "step": 803000 }, { "epoch": 3.98, "learning_rate": 4.8010322379274984e-05, "loss": 2.5442, "step": 803500 }, { "epoch": 3.98, "learning_rate": 4.80090837928489e-05, "loss": 2.5533, "step": 804000 }, { "epoch": 3.99, "learning_rate": 4.800784768359567e-05, "loss": 2.5392, "step": 804500 }, { "epoch": 3.99, "learning_rate": 4.800661157434244e-05, "loss": 2.5661, "step": 805000 }, { "epoch": 3.99, "learning_rate": 4.8005372987916356e-05, "loss": 2.5666, "step": 805500 }, { "epoch": 3.99, "learning_rate": 4.800413440149027e-05, "loss": 2.5285, "step": 806000 }, { "epoch": 4.0, "learning_rate": 4.800289581506418e-05, "loss": 2.5249, "step": 806500 }, { "epoch": 4.0, "learning_rate": 4.800165970581095e-05, "loss": 2.5377, "step": 807000 }, { "epoch": 4.0, "eval_accuracy": 0.6340747980022936, "eval_accuracy_mlm": 0.5867373422775914, "eval_accuracy_nsp": 0.8572672468906766, "eval_loss": 2.4791698455810547, "eval_runtime": 145.9611, "eval_samples_per_second": 1746.76, "eval_steps_per_second": 72.787, "step": 807372 }, { "epoch": 4.0, "learning_rate": 4.800042111938487e-05, "loss": 2.5294, "step": 807500 }, { "epoch": 4.0, "learning_rate": 4.7999182532958785e-05, "loss": 2.4976, "step": 808000 }, { "epoch": 4.01, "learning_rate": 4.79979439465327e-05, "loss": 2.5014, "step": 808500 }, { "epoch": 4.01, "learning_rate": 4.799670536010662e-05, "loss": 2.5063, "step": 809000 }, { "epoch": 4.01, "learning_rate": 4.7995466773680536e-05, "loss": 2.5132, "step": 809500 }, { "epoch": 4.01, "learning_rate": 4.799422818725445e-05, "loss": 2.5018, "step": 810000 }, { "epoch": 4.02, "learning_rate": 4.799298960082837e-05, "loss": 2.5117, "step": 810500 }, { "epoch": 4.02, "learning_rate": 4.799175101440229e-05, "loss": 2.5232, "step": 811000 }, { "epoch": 4.02, "learning_rate": 4.7990514905149056e-05, "loss": 2.5043, "step": 811500 }, { "epoch": 4.02, "learning_rate": 4.798927631872297e-05, "loss": 2.5114, "step": 812000 }, { "epoch": 4.03, "learning_rate": 4.798803773229689e-05, "loss": 2.5253, "step": 812500 }, { "epoch": 4.03, "learning_rate": 4.7986799145870807e-05, "loss": 2.5181, "step": 813000 }, { "epoch": 4.03, "learning_rate": 4.798556055944472e-05, "loss": 2.5291, "step": 813500 }, { "epoch": 4.03, "learning_rate": 4.7984321973018634e-05, "loss": 2.499, "step": 814000 }, { "epoch": 4.04, "learning_rate": 4.798308338659255e-05, "loss": 2.5222, "step": 814500 }, { "epoch": 4.04, "learning_rate": 4.798184480016647e-05, "loss": 2.5236, "step": 815000 }, { "epoch": 4.04, "learning_rate": 4.7980606213740384e-05, "loss": 2.5314, "step": 815500 }, { "epoch": 4.04, "learning_rate": 4.797937010448715e-05, "loss": 2.5234, "step": 816000 }, { "epoch": 4.05, "learning_rate": 4.797813151806107e-05, "loss": 2.4951, "step": 816500 }, { "epoch": 4.05, "learning_rate": 4.797689293163499e-05, "loss": 2.529, "step": 817000 }, { "epoch": 4.05, "learning_rate": 4.7975654345208904e-05, "loss": 2.5055, "step": 817500 }, { "epoch": 4.05, "learning_rate": 4.797441575878282e-05, "loss": 2.4981, "step": 818000 }, { "epoch": 4.06, "learning_rate": 4.797317964952959e-05, "loss": 2.512, "step": 818500 }, { "epoch": 4.06, "learning_rate": 4.797194106310351e-05, "loss": 2.515, "step": 819000 }, { "epoch": 4.06, "learning_rate": 4.7970702476677424e-05, "loss": 2.4995, "step": 819500 }, { "epoch": 4.06, "learning_rate": 4.7969463890251334e-05, "loss": 2.5222, "step": 820000 }, { "epoch": 4.07, "learning_rate": 4.79682277809981e-05, "loss": 2.5096, "step": 820500 }, { "epoch": 4.07, "learning_rate": 4.796698919457202e-05, "loss": 2.5198, "step": 821000 }, { "epoch": 4.07, "learning_rate": 4.796575308531879e-05, "loss": 2.5011, "step": 821500 }, { "epoch": 4.07, "learning_rate": 4.7964514498892705e-05, "loss": 2.5134, "step": 822000 }, { "epoch": 4.07, "learning_rate": 4.796327591246662e-05, "loss": 2.486, "step": 822500 }, { "epoch": 4.08, "learning_rate": 4.796203732604054e-05, "loss": 2.5452, "step": 823000 }, { "epoch": 4.08, "learning_rate": 4.7960798739614456e-05, "loss": 2.5249, "step": 823500 }, { "epoch": 4.08, "learning_rate": 4.795956015318837e-05, "loss": 2.5277, "step": 824000 }, { "epoch": 4.08, "learning_rate": 4.795832156676229e-05, "loss": 2.517, "step": 824500 }, { "epoch": 4.09, "learning_rate": 4.795708298033621e-05, "loss": 2.5276, "step": 825000 }, { "epoch": 4.09, "learning_rate": 4.7955844393910124e-05, "loss": 2.5128, "step": 825500 }, { "epoch": 4.09, "learning_rate": 4.7954610761829744e-05, "loss": 2.5389, "step": 826000 }, { "epoch": 4.09, "learning_rate": 4.795337217540366e-05, "loss": 2.5206, "step": 826500 }, { "epoch": 4.1, "learning_rate": 4.795213358897757e-05, "loss": 2.5004, "step": 827000 }, { "epoch": 4.1, "learning_rate": 4.795089747972435e-05, "loss": 2.5165, "step": 827500 }, { "epoch": 4.1, "learning_rate": 4.794965889329826e-05, "loss": 2.5113, "step": 828000 }, { "epoch": 4.1, "learning_rate": 4.7948420306872174e-05, "loss": 2.4997, "step": 828500 }, { "epoch": 4.11, "learning_rate": 4.794718172044609e-05, "loss": 2.5323, "step": 829000 }, { "epoch": 4.11, "learning_rate": 4.794594313402001e-05, "loss": 2.5239, "step": 829500 }, { "epoch": 4.11, "learning_rate": 4.794470454759392e-05, "loss": 2.5154, "step": 830000 }, { "epoch": 4.11, "learning_rate": 4.7943465961167835e-05, "loss": 2.5174, "step": 830500 }, { "epoch": 4.12, "learning_rate": 4.794222737474175e-05, "loss": 2.5047, "step": 831000 }, { "epoch": 4.12, "learning_rate": 4.794098878831567e-05, "loss": 2.5309, "step": 831500 }, { "epoch": 4.12, "learning_rate": 4.7939750201889586e-05, "loss": 2.5058, "step": 832000 }, { "epoch": 4.12, "learning_rate": 4.793851409263636e-05, "loss": 2.5134, "step": 832500 }, { "epoch": 4.13, "learning_rate": 4.793727550621028e-05, "loss": 2.5142, "step": 833000 }, { "epoch": 4.13, "learning_rate": 4.793603691978419e-05, "loss": 2.5138, "step": 833500 }, { "epoch": 4.13, "learning_rate": 4.7934798333358106e-05, "loss": 2.5339, "step": 834000 }, { "epoch": 4.13, "learning_rate": 4.793355974693202e-05, "loss": 2.5123, "step": 834500 }, { "epoch": 4.14, "learning_rate": 4.793232363767879e-05, "loss": 2.5296, "step": 835000 }, { "epoch": 4.14, "learning_rate": 4.793108752842556e-05, "loss": 2.5339, "step": 835500 }, { "epoch": 4.14, "learning_rate": 4.792984894199948e-05, "loss": 2.538, "step": 836000 }, { "epoch": 4.14, "learning_rate": 4.7928610355573394e-05, "loss": 2.5208, "step": 836500 }, { "epoch": 4.15, "learning_rate": 4.792737176914731e-05, "loss": 2.4823, "step": 837000 }, { "epoch": 4.15, "learning_rate": 4.792613318272123e-05, "loss": 2.5293, "step": 837500 }, { "epoch": 4.15, "learning_rate": 4.7924894596295145e-05, "loss": 2.5136, "step": 838000 }, { "epoch": 4.15, "learning_rate": 4.792365600986906e-05, "loss": 2.497, "step": 838500 }, { "epoch": 4.16, "learning_rate": 4.792241742344298e-05, "loss": 2.5301, "step": 839000 }, { "epoch": 4.16, "learning_rate": 4.792117883701689e-05, "loss": 2.5154, "step": 839500 }, { "epoch": 4.16, "learning_rate": 4.7919940250590806e-05, "loss": 2.5086, "step": 840000 }, { "epoch": 4.16, "learning_rate": 4.791870166416472e-05, "loss": 2.506, "step": 840500 }, { "epoch": 4.17, "learning_rate": 4.791746307773864e-05, "loss": 2.5198, "step": 841000 }, { "epoch": 4.17, "learning_rate": 4.7916224491312556e-05, "loss": 2.5042, "step": 841500 }, { "epoch": 4.17, "learning_rate": 4.791499085923218e-05, "loss": 2.5122, "step": 842000 }, { "epoch": 4.17, "learning_rate": 4.7913752272806094e-05, "loss": 2.5176, "step": 842500 }, { "epoch": 4.18, "learning_rate": 4.791251368638001e-05, "loss": 2.5354, "step": 843000 }, { "epoch": 4.18, "learning_rate": 4.791127509995393e-05, "loss": 2.515, "step": 843500 }, { "epoch": 4.18, "learning_rate": 4.7910036513527845e-05, "loss": 2.5099, "step": 844000 }, { "epoch": 4.18, "learning_rate": 4.790879792710176e-05, "loss": 2.5267, "step": 844500 }, { "epoch": 4.19, "learning_rate": 4.790755934067568e-05, "loss": 2.5115, "step": 845000 }, { "epoch": 4.19, "learning_rate": 4.7906320754249596e-05, "loss": 2.5051, "step": 845500 }, { "epoch": 4.19, "learning_rate": 4.7905082167823506e-05, "loss": 2.5294, "step": 846000 }, { "epoch": 4.19, "learning_rate": 4.790384605857028e-05, "loss": 2.5381, "step": 846500 }, { "epoch": 4.2, "learning_rate": 4.79026074721442e-05, "loss": 2.509, "step": 847000 }, { "epoch": 4.2, "learning_rate": 4.7901368885718115e-05, "loss": 2.5301, "step": 847500 }, { "epoch": 4.2, "learning_rate": 4.7900130299292025e-05, "loss": 2.5011, "step": 848000 }, { "epoch": 4.2, "learning_rate": 4.789889171286594e-05, "loss": 2.5179, "step": 848500 }, { "epoch": 4.21, "learning_rate": 4.789765312643986e-05, "loss": 2.5437, "step": 849000 }, { "epoch": 4.21, "learning_rate": 4.789641701718663e-05, "loss": 2.5317, "step": 849500 }, { "epoch": 4.21, "learning_rate": 4.78951809079334e-05, "loss": 2.5221, "step": 850000 }, { "epoch": 4.21, "learning_rate": 4.7893942321507314e-05, "loss": 2.5152, "step": 850500 }, { "epoch": 4.22, "learning_rate": 4.789270621225408e-05, "loss": 2.5534, "step": 851000 }, { "epoch": 4.22, "learning_rate": 4.789146762582799e-05, "loss": 2.5463, "step": 851500 }, { "epoch": 4.22, "learning_rate": 4.789022903940191e-05, "loss": 2.528, "step": 852000 }, { "epoch": 4.22, "learning_rate": 4.7888990452975827e-05, "loss": 2.5066, "step": 852500 }, { "epoch": 4.23, "learning_rate": 4.7887751866549744e-05, "loss": 2.5129, "step": 853000 }, { "epoch": 4.23, "learning_rate": 4.788651328012366e-05, "loss": 2.5264, "step": 853500 }, { "epoch": 4.23, "learning_rate": 4.788527469369758e-05, "loss": 2.5151, "step": 854000 }, { "epoch": 4.23, "learning_rate": 4.7884036107271494e-05, "loss": 2.5214, "step": 854500 }, { "epoch": 4.24, "learning_rate": 4.788279752084541e-05, "loss": 2.5097, "step": 855000 }, { "epoch": 4.24, "learning_rate": 4.788155893441933e-05, "loss": 2.5109, "step": 855500 }, { "epoch": 4.24, "learning_rate": 4.7880320347993245e-05, "loss": 2.522, "step": 856000 }, { "epoch": 4.24, "learning_rate": 4.7879084238740014e-05, "loss": 2.5233, "step": 856500 }, { "epoch": 4.25, "learning_rate": 4.787784565231393e-05, "loss": 2.5345, "step": 857000 }, { "epoch": 4.25, "learning_rate": 4.787660706588785e-05, "loss": 2.5216, "step": 857500 }, { "epoch": 4.25, "learning_rate": 4.7875368479461765e-05, "loss": 2.565, "step": 858000 }, { "epoch": 4.25, "learning_rate": 4.787412989303568e-05, "loss": 2.5278, "step": 858500 }, { "epoch": 4.26, "learning_rate": 4.7872893783782444e-05, "loss": 2.5257, "step": 859000 }, { "epoch": 4.26, "learning_rate": 4.787165519735636e-05, "loss": 2.5161, "step": 859500 }, { "epoch": 4.26, "learning_rate": 4.787041661093028e-05, "loss": 2.5183, "step": 860000 }, { "epoch": 4.26, "learning_rate": 4.7869178024504194e-05, "loss": 2.52, "step": 860500 }, { "epoch": 4.27, "learning_rate": 4.786793943807811e-05, "loss": 2.5174, "step": 861000 }, { "epoch": 4.27, "learning_rate": 4.786670085165203e-05, "loss": 2.5502, "step": 861500 }, { "epoch": 4.27, "learning_rate": 4.78654647423988e-05, "loss": 2.5172, "step": 862000 }, { "epoch": 4.27, "learning_rate": 4.7864226155972714e-05, "loss": 2.5307, "step": 862500 }, { "epoch": 4.28, "learning_rate": 4.786298756954663e-05, "loss": 2.5203, "step": 863000 }, { "epoch": 4.28, "learning_rate": 4.786174898312055e-05, "loss": 2.528, "step": 863500 }, { "epoch": 4.28, "learning_rate": 4.7860510396694465e-05, "loss": 2.5293, "step": 864000 }, { "epoch": 4.28, "learning_rate": 4.785927181026838e-05, "loss": 2.5237, "step": 864500 }, { "epoch": 4.29, "learning_rate": 4.78580332238423e-05, "loss": 2.5579, "step": 865000 }, { "epoch": 4.29, "learning_rate": 4.7856794637416216e-05, "loss": 2.5067, "step": 865500 }, { "epoch": 4.29, "learning_rate": 4.785555605099013e-05, "loss": 2.5152, "step": 866000 }, { "epoch": 4.29, "learning_rate": 4.785431746456405e-05, "loss": 2.5302, "step": 866500 }, { "epoch": 4.3, "learning_rate": 4.785308135531081e-05, "loss": 2.5225, "step": 867000 }, { "epoch": 4.3, "learning_rate": 4.785184524605758e-05, "loss": 2.507, "step": 867500 }, { "epoch": 4.3, "learning_rate": 4.78506066596315e-05, "loss": 2.5427, "step": 868000 }, { "epoch": 4.3, "learning_rate": 4.7849370550378266e-05, "loss": 2.508, "step": 868500 }, { "epoch": 4.31, "learning_rate": 4.784813196395218e-05, "loss": 2.5365, "step": 869000 }, { "epoch": 4.31, "learning_rate": 4.78468933775261e-05, "loss": 2.5121, "step": 869500 }, { "epoch": 4.31, "learning_rate": 4.784565479110002e-05, "loss": 2.519, "step": 870000 }, { "epoch": 4.31, "learning_rate": 4.7844416204673934e-05, "loss": 2.5295, "step": 870500 }, { "epoch": 4.32, "learning_rate": 4.784317761824785e-05, "loss": 2.5398, "step": 871000 }, { "epoch": 4.32, "learning_rate": 4.784193903182176e-05, "loss": 2.5191, "step": 871500 }, { "epoch": 4.32, "learning_rate": 4.784070044539568e-05, "loss": 2.523, "step": 872000 }, { "epoch": 4.32, "learning_rate": 4.7839461858969595e-05, "loss": 2.5589, "step": 872500 }, { "epoch": 4.33, "learning_rate": 4.783822327254351e-05, "loss": 2.5345, "step": 873000 }, { "epoch": 4.33, "learning_rate": 4.783698468611743e-05, "loss": 2.5171, "step": 873500 }, { "epoch": 4.33, "learning_rate": 4.7835746099691345e-05, "loss": 2.5048, "step": 874000 }, { "epoch": 4.33, "learning_rate": 4.783450751326526e-05, "loss": 2.5152, "step": 874500 }, { "epoch": 4.34, "learning_rate": 4.783327140401203e-05, "loss": 2.5189, "step": 875000 }, { "epoch": 4.34, "learning_rate": 4.783203281758595e-05, "loss": 2.506, "step": 875500 }, { "epoch": 4.34, "learning_rate": 4.7830794231159865e-05, "loss": 2.5281, "step": 876000 }, { "epoch": 4.34, "learning_rate": 4.782955564473378e-05, "loss": 2.5282, "step": 876500 }, { "epoch": 4.34, "learning_rate": 4.782831953548055e-05, "loss": 2.4956, "step": 877000 }, { "epoch": 4.35, "learning_rate": 4.782708094905447e-05, "loss": 2.5356, "step": 877500 }, { "epoch": 4.35, "learning_rate": 4.7825842362628385e-05, "loss": 2.5127, "step": 878000 }, { "epoch": 4.35, "learning_rate": 4.7824603776202295e-05, "loss": 2.536, "step": 878500 }, { "epoch": 4.35, "learning_rate": 4.782336518977621e-05, "loss": 2.5432, "step": 879000 }, { "epoch": 4.36, "learning_rate": 4.782212660335013e-05, "loss": 2.5164, "step": 879500 }, { "epoch": 4.36, "learning_rate": 4.78208904940969e-05, "loss": 2.5096, "step": 880000 }, { "epoch": 4.36, "learning_rate": 4.7819651907670814e-05, "loss": 2.5279, "step": 880500 }, { "epoch": 4.36, "learning_rate": 4.781841332124473e-05, "loss": 2.5284, "step": 881000 }, { "epoch": 4.37, "learning_rate": 4.781717473481865e-05, "loss": 2.496, "step": 881500 }, { "epoch": 4.37, "learning_rate": 4.7815936148392565e-05, "loss": 2.5169, "step": 882000 }, { "epoch": 4.37, "learning_rate": 4.781469756196648e-05, "loss": 2.5015, "step": 882500 }, { "epoch": 4.37, "learning_rate": 4.781346145271325e-05, "loss": 2.5389, "step": 883000 }, { "epoch": 4.38, "learning_rate": 4.781222286628717e-05, "loss": 2.5235, "step": 883500 }, { "epoch": 4.38, "learning_rate": 4.7810984279861085e-05, "loss": 2.5245, "step": 884000 }, { "epoch": 4.38, "learning_rate": 4.780974817060785e-05, "loss": 2.5214, "step": 884500 }, { "epoch": 4.38, "learning_rate": 4.780851206135462e-05, "loss": 2.5422, "step": 885000 }, { "epoch": 4.39, "learning_rate": 4.780727347492854e-05, "loss": 2.5327, "step": 885500 }, { "epoch": 4.39, "learning_rate": 4.7806034888502456e-05, "loss": 2.5164, "step": 886000 }, { "epoch": 4.39, "learning_rate": 4.780479630207637e-05, "loss": 2.5537, "step": 886500 }, { "epoch": 4.39, "learning_rate": 4.780355771565028e-05, "loss": 2.513, "step": 887000 }, { "epoch": 4.4, "learning_rate": 4.78023191292242e-05, "loss": 2.5196, "step": 887500 }, { "epoch": 4.4, "learning_rate": 4.780108054279812e-05, "loss": 2.5014, "step": 888000 }, { "epoch": 4.4, "learning_rate": 4.7799841956372034e-05, "loss": 2.5319, "step": 888500 }, { "epoch": 4.4, "learning_rate": 4.779860336994595e-05, "loss": 2.5317, "step": 889000 }, { "epoch": 4.41, "learning_rate": 4.779736478351987e-05, "loss": 2.5447, "step": 889500 }, { "epoch": 4.41, "learning_rate": 4.7796126197093785e-05, "loss": 2.513, "step": 890000 }, { "epoch": 4.41, "learning_rate": 4.77948876106677e-05, "loss": 2.5189, "step": 890500 }, { "epoch": 4.41, "learning_rate": 4.7793651501414464e-05, "loss": 2.5308, "step": 891000 }, { "epoch": 4.42, "learning_rate": 4.779241291498838e-05, "loss": 2.5264, "step": 891500 }, { "epoch": 4.42, "learning_rate": 4.77911743285623e-05, "loss": 2.5147, "step": 892000 }, { "epoch": 4.42, "learning_rate": 4.7789935742136215e-05, "loss": 2.5411, "step": 892500 }, { "epoch": 4.42, "learning_rate": 4.7788702110055835e-05, "loss": 2.496, "step": 893000 }, { "epoch": 4.43, "learning_rate": 4.778746352362975e-05, "loss": 2.5043, "step": 893500 }, { "epoch": 4.43, "learning_rate": 4.778622493720367e-05, "loss": 2.51, "step": 894000 }, { "epoch": 4.43, "learning_rate": 4.7784986350777586e-05, "loss": 2.518, "step": 894500 }, { "epoch": 4.43, "learning_rate": 4.77837477643515e-05, "loss": 2.5077, "step": 895000 }, { "epoch": 4.44, "learning_rate": 4.778250917792542e-05, "loss": 2.512, "step": 895500 }, { "epoch": 4.44, "learning_rate": 4.778127306867219e-05, "loss": 2.5042, "step": 896000 }, { "epoch": 4.44, "learning_rate": 4.7780034482246106e-05, "loss": 2.5105, "step": 896500 }, { "epoch": 4.44, "learning_rate": 4.777879589582002e-05, "loss": 2.5393, "step": 897000 }, { "epoch": 4.45, "learning_rate": 4.777755730939394e-05, "loss": 2.5374, "step": 897500 }, { "epoch": 4.45, "learning_rate": 4.7776318722967856e-05, "loss": 2.4974, "step": 898000 }, { "epoch": 4.45, "learning_rate": 4.7775080136541773e-05, "loss": 2.5217, "step": 898500 }, { "epoch": 4.45, "learning_rate": 4.777384155011569e-05, "loss": 2.5425, "step": 899000 }, { "epoch": 4.46, "learning_rate": 4.77726029636896e-05, "loss": 2.5461, "step": 899500 }, { "epoch": 4.46, "learning_rate": 4.777136933160922e-05, "loss": 2.5432, "step": 900000 }, { "epoch": 4.46, "learning_rate": 4.777013074518314e-05, "loss": 2.5105, "step": 900500 }, { "epoch": 4.46, "learning_rate": 4.7768892158757055e-05, "loss": 2.5198, "step": 901000 }, { "epoch": 4.47, "learning_rate": 4.7767656049503824e-05, "loss": 2.5226, "step": 901500 }, { "epoch": 4.47, "learning_rate": 4.776641746307774e-05, "loss": 2.5397, "step": 902000 }, { "epoch": 4.47, "learning_rate": 4.776517887665166e-05, "loss": 2.5173, "step": 902500 }, { "epoch": 4.47, "learning_rate": 4.7763940290225575e-05, "loss": 2.5123, "step": 903000 }, { "epoch": 4.48, "learning_rate": 4.776270170379949e-05, "loss": 2.5164, "step": 903500 }, { "epoch": 4.48, "learning_rate": 4.776146311737341e-05, "loss": 2.5277, "step": 904000 }, { "epoch": 4.48, "learning_rate": 4.7760224530947325e-05, "loss": 2.5085, "step": 904500 }, { "epoch": 4.48, "learning_rate": 4.775898594452124e-05, "loss": 2.5297, "step": 905000 }, { "epoch": 4.49, "learning_rate": 4.775774735809516e-05, "loss": 2.5393, "step": 905500 }, { "epoch": 4.49, "learning_rate": 4.775651124884192e-05, "loss": 2.5383, "step": 906000 }, { "epoch": 4.49, "learning_rate": 4.775527266241584e-05, "loss": 2.5199, "step": 906500 }, { "epoch": 4.49, "learning_rate": 4.7754034075989755e-05, "loss": 2.5323, "step": 907000 }, { "epoch": 4.5, "learning_rate": 4.775279548956367e-05, "loss": 2.5211, "step": 907500 }, { "epoch": 4.5, "learning_rate": 4.775155690313759e-05, "loss": 2.5245, "step": 908000 }, { "epoch": 4.5, "learning_rate": 4.7750318316711506e-05, "loss": 2.5016, "step": 908500 }, { "epoch": 4.5, "learning_rate": 4.774907973028542e-05, "loss": 2.5191, "step": 909000 }, { "epoch": 4.51, "learning_rate": 4.774784114385934e-05, "loss": 2.5471, "step": 909500 }, { "epoch": 4.51, "learning_rate": 4.774660255743326e-05, "loss": 2.5372, "step": 910000 }, { "epoch": 4.51, "learning_rate": 4.7745363971007174e-05, "loss": 2.52, "step": 910500 }, { "epoch": 4.51, "learning_rate": 4.774412538458109e-05, "loss": 2.512, "step": 911000 }, { "epoch": 4.52, "learning_rate": 4.774288679815501e-05, "loss": 2.5325, "step": 911500 }, { "epoch": 4.52, "learning_rate": 4.774164821172892e-05, "loss": 2.5194, "step": 912000 }, { "epoch": 4.52, "learning_rate": 4.7740409625302835e-05, "loss": 2.5473, "step": 912500 }, { "epoch": 4.52, "learning_rate": 4.773917103887675e-05, "loss": 2.5345, "step": 913000 }, { "epoch": 4.53, "learning_rate": 4.773793245245067e-05, "loss": 2.5285, "step": 913500 }, { "epoch": 4.53, "learning_rate": 4.7736693866024585e-05, "loss": 2.5442, "step": 914000 }, { "epoch": 4.53, "learning_rate": 4.7735457756771354e-05, "loss": 2.5358, "step": 914500 }, { "epoch": 4.53, "learning_rate": 4.773421917034527e-05, "loss": 2.5193, "step": 915000 }, { "epoch": 4.54, "learning_rate": 4.773298306109204e-05, "loss": 2.5213, "step": 915500 }, { "epoch": 4.54, "learning_rate": 4.773174447466596e-05, "loss": 2.5143, "step": 916000 }, { "epoch": 4.54, "learning_rate": 4.7730505888239874e-05, "loss": 2.507, "step": 916500 }, { "epoch": 4.54, "learning_rate": 4.772926977898664e-05, "loss": 2.5088, "step": 917000 }, { "epoch": 4.55, "learning_rate": 4.772803119256056e-05, "loss": 2.538, "step": 917500 }, { "epoch": 4.55, "learning_rate": 4.7726792606134476e-05, "loss": 2.5344, "step": 918000 }, { "epoch": 4.55, "learning_rate": 4.7725554019708393e-05, "loss": 2.5217, "step": 918500 }, { "epoch": 4.55, "learning_rate": 4.772431543328231e-05, "loss": 2.5035, "step": 919000 }, { "epoch": 4.56, "learning_rate": 4.772307684685623e-05, "loss": 2.5022, "step": 919500 }, { "epoch": 4.56, "learning_rate": 4.772183826043014e-05, "loss": 2.535, "step": 920000 }, { "epoch": 4.56, "learning_rate": 4.7720599674004054e-05, "loss": 2.481, "step": 920500 }, { "epoch": 4.56, "learning_rate": 4.771936356475082e-05, "loss": 2.5258, "step": 921000 }, { "epoch": 4.57, "learning_rate": 4.771812497832474e-05, "loss": 2.5533, "step": 921500 }, { "epoch": 4.57, "learning_rate": 4.771688639189866e-05, "loss": 2.5407, "step": 922000 }, { "epoch": 4.57, "learning_rate": 4.7715647805472574e-05, "loss": 2.5222, "step": 922500 }, { "epoch": 4.57, "learning_rate": 4.771440921904649e-05, "loss": 2.5202, "step": 923000 }, { "epoch": 4.58, "learning_rate": 4.7713178064138963e-05, "loss": 2.5326, "step": 923500 }, { "epoch": 4.58, "learning_rate": 4.771193947771288e-05, "loss": 2.5148, "step": 924000 }, { "epoch": 4.58, "learning_rate": 4.77107008912868e-05, "loss": 2.5138, "step": 924500 }, { "epoch": 4.58, "learning_rate": 4.770946230486071e-05, "loss": 2.4793, "step": 925000 }, { "epoch": 4.59, "learning_rate": 4.7708223718434624e-05, "loss": 2.5133, "step": 925500 }, { "epoch": 4.59, "learning_rate": 4.770698513200854e-05, "loss": 2.5307, "step": 926000 }, { "epoch": 4.59, "learning_rate": 4.770574902275532e-05, "loss": 2.5068, "step": 926500 }, { "epoch": 4.59, "learning_rate": 4.7704510436329234e-05, "loss": 2.528, "step": 927000 }, { "epoch": 4.6, "learning_rate": 4.7703271849903144e-05, "loss": 2.5122, "step": 927500 }, { "epoch": 4.6, "learning_rate": 4.770203326347706e-05, "loss": 2.5372, "step": 928000 }, { "epoch": 4.6, "learning_rate": 4.770079467705098e-05, "loss": 2.5282, "step": 928500 }, { "epoch": 4.6, "learning_rate": 4.7699556090624895e-05, "loss": 2.5162, "step": 929000 }, { "epoch": 4.61, "learning_rate": 4.769831750419881e-05, "loss": 2.5279, "step": 929500 }, { "epoch": 4.61, "learning_rate": 4.769707891777272e-05, "loss": 2.5302, "step": 930000 }, { "epoch": 4.61, "learning_rate": 4.769584033134664e-05, "loss": 2.5265, "step": 930500 }, { "epoch": 4.61, "learning_rate": 4.7694601744920556e-05, "loss": 2.5167, "step": 931000 }, { "epoch": 4.61, "learning_rate": 4.769336315849447e-05, "loss": 2.5033, "step": 931500 }, { "epoch": 4.62, "learning_rate": 4.769212457206839e-05, "loss": 2.5239, "step": 932000 }, { "epoch": 4.62, "learning_rate": 4.769088846281516e-05, "loss": 2.5485, "step": 932500 }, { "epoch": 4.62, "learning_rate": 4.7689649876389075e-05, "loss": 2.5165, "step": 933000 }, { "epoch": 4.62, "learning_rate": 4.768841376713585e-05, "loss": 2.5295, "step": 933500 }, { "epoch": 4.63, "learning_rate": 4.768717518070977e-05, "loss": 2.5282, "step": 934000 }, { "epoch": 4.63, "learning_rate": 4.768593659428368e-05, "loss": 2.505, "step": 934500 }, { "epoch": 4.63, "learning_rate": 4.768470048503045e-05, "loss": 2.5145, "step": 935000 }, { "epoch": 4.63, "learning_rate": 4.7683461898604364e-05, "loss": 2.5239, "step": 935500 }, { "epoch": 4.64, "learning_rate": 4.768222331217828e-05, "loss": 2.512, "step": 936000 }, { "epoch": 4.64, "learning_rate": 4.76809847257522e-05, "loss": 2.5265, "step": 936500 }, { "epoch": 4.64, "learning_rate": 4.7679746139326114e-05, "loss": 2.528, "step": 937000 }, { "epoch": 4.64, "learning_rate": 4.7678507552900025e-05, "loss": 2.5295, "step": 937500 }, { "epoch": 4.65, "learning_rate": 4.767726896647394e-05, "loss": 2.4952, "step": 938000 }, { "epoch": 4.65, "learning_rate": 4.767603038004786e-05, "loss": 2.5174, "step": 938500 }, { "epoch": 4.65, "learning_rate": 4.7674791793621775e-05, "loss": 2.5244, "step": 939000 }, { "epoch": 4.65, "learning_rate": 4.767355320719569e-05, "loss": 2.5318, "step": 939500 }, { "epoch": 4.66, "learning_rate": 4.767231462076961e-05, "loss": 2.5375, "step": 940000 }, { "epoch": 4.66, "learning_rate": 4.7671078511516385e-05, "loss": 2.5271, "step": 940500 }, { "epoch": 4.66, "learning_rate": 4.7669839925090295e-05, "loss": 2.5511, "step": 941000 }, { "epoch": 4.66, "learning_rate": 4.766860133866421e-05, "loss": 2.5225, "step": 941500 }, { "epoch": 4.67, "learning_rate": 4.766736275223813e-05, "loss": 2.5055, "step": 942000 }, { "epoch": 4.67, "learning_rate": 4.7666124165812046e-05, "loss": 2.5206, "step": 942500 }, { "epoch": 4.67, "learning_rate": 4.766488557938596e-05, "loss": 2.5182, "step": 943000 }, { "epoch": 4.67, "learning_rate": 4.766364947013273e-05, "loss": 2.5254, "step": 943500 }, { "epoch": 4.68, "learning_rate": 4.766241088370664e-05, "loss": 2.4945, "step": 944000 }, { "epoch": 4.68, "learning_rate": 4.766117229728056e-05, "loss": 2.5185, "step": 944500 }, { "epoch": 4.68, "learning_rate": 4.7659933710854476e-05, "loss": 2.5111, "step": 945000 }, { "epoch": 4.68, "learning_rate": 4.765869512442839e-05, "loss": 2.5186, "step": 945500 }, { "epoch": 4.69, "learning_rate": 4.765745653800231e-05, "loss": 2.5001, "step": 946000 }, { "epoch": 4.69, "learning_rate": 4.7656217951576226e-05, "loss": 2.5132, "step": 946500 }, { "epoch": 4.69, "learning_rate": 4.7654981842322995e-05, "loss": 2.5195, "step": 947000 }, { "epoch": 4.69, "learning_rate": 4.765374325589691e-05, "loss": 2.5179, "step": 947500 }, { "epoch": 4.7, "learning_rate": 4.765250466947083e-05, "loss": 2.5338, "step": 948000 }, { "epoch": 4.7, "learning_rate": 4.7651266083044746e-05, "loss": 2.5614, "step": 948500 }, { "epoch": 4.7, "learning_rate": 4.7650029973791515e-05, "loss": 2.5336, "step": 949000 }, { "epoch": 4.7, "learning_rate": 4.764879138736543e-05, "loss": 2.4901, "step": 949500 }, { "epoch": 4.71, "learning_rate": 4.76475552781122e-05, "loss": 2.5283, "step": 950000 }, { "epoch": 4.71, "learning_rate": 4.764631669168612e-05, "loss": 2.5171, "step": 950500 }, { "epoch": 4.71, "learning_rate": 4.764508058243288e-05, "loss": 2.5353, "step": 951000 }, { "epoch": 4.71, "learning_rate": 4.7643841996006796e-05, "loss": 2.5246, "step": 951500 }, { "epoch": 4.72, "learning_rate": 4.764260340958071e-05, "loss": 2.5173, "step": 952000 }, { "epoch": 4.72, "learning_rate": 4.764136482315463e-05, "loss": 2.5125, "step": 952500 }, { "epoch": 4.72, "learning_rate": 4.764012623672855e-05, "loss": 2.5237, "step": 953000 }, { "epoch": 4.72, "learning_rate": 4.7638887650302464e-05, "loss": 2.5033, "step": 953500 }, { "epoch": 4.73, "learning_rate": 4.763764906387638e-05, "loss": 2.5233, "step": 954000 }, { "epoch": 4.73, "learning_rate": 4.76364104774503e-05, "loss": 2.5408, "step": 954500 }, { "epoch": 4.73, "learning_rate": 4.7635171891024215e-05, "loss": 2.5335, "step": 955000 }, { "epoch": 4.73, "learning_rate": 4.763393330459813e-05, "loss": 2.5481, "step": 955500 }, { "epoch": 4.74, "learning_rate": 4.763269471817205e-05, "loss": 2.5219, "step": 956000 }, { "epoch": 4.74, "learning_rate": 4.763145613174596e-05, "loss": 2.5032, "step": 956500 }, { "epoch": 4.74, "learning_rate": 4.7630217545319876e-05, "loss": 2.5114, "step": 957000 }, { "epoch": 4.74, "learning_rate": 4.762897895889379e-05, "loss": 2.5286, "step": 957500 }, { "epoch": 4.75, "learning_rate": 4.762774284964057e-05, "loss": 2.5163, "step": 958000 }, { "epoch": 4.75, "learning_rate": 4.7626504263214485e-05, "loss": 2.5127, "step": 958500 }, { "epoch": 4.75, "learning_rate": 4.76252656767884e-05, "loss": 2.5466, "step": 959000 }, { "epoch": 4.75, "learning_rate": 4.762402709036231e-05, "loss": 2.5094, "step": 959500 }, { "epoch": 4.76, "learning_rate": 4.762278850393623e-05, "loss": 2.5276, "step": 960000 }, { "epoch": 4.76, "learning_rate": 4.7621549917510146e-05, "loss": 2.5316, "step": 960500 }, { "epoch": 4.76, "learning_rate": 4.762031133108406e-05, "loss": 2.5051, "step": 961000 }, { "epoch": 4.76, "learning_rate": 4.761907522183083e-05, "loss": 2.495, "step": 961500 }, { "epoch": 4.77, "learning_rate": 4.761783663540475e-05, "loss": 2.5125, "step": 962000 }, { "epoch": 4.77, "learning_rate": 4.761659804897866e-05, "loss": 2.5293, "step": 962500 }, { "epoch": 4.77, "learning_rate": 4.7615359462552576e-05, "loss": 2.5661, "step": 963000 }, { "epoch": 4.77, "learning_rate": 4.761412087612649e-05, "loss": 2.5403, "step": 963500 }, { "epoch": 4.78, "learning_rate": 4.761288228970041e-05, "loss": 2.4998, "step": 964000 }, { "epoch": 4.78, "learning_rate": 4.761164370327433e-05, "loss": 2.5193, "step": 964500 }, { "epoch": 4.78, "learning_rate": 4.7610405116848244e-05, "loss": 2.5213, "step": 965000 }, { "epoch": 4.78, "learning_rate": 4.760916653042216e-05, "loss": 2.5163, "step": 965500 }, { "epoch": 4.79, "learning_rate": 4.760792794399608e-05, "loss": 2.5139, "step": 966000 }, { "epoch": 4.79, "learning_rate": 4.7606689357569994e-05, "loss": 2.5008, "step": 966500 }, { "epoch": 4.79, "learning_rate": 4.760545324831676e-05, "loss": 2.5123, "step": 967000 }, { "epoch": 4.79, "learning_rate": 4.760421466189068e-05, "loss": 2.519, "step": 967500 }, { "epoch": 4.8, "learning_rate": 4.76029760754646e-05, "loss": 2.5064, "step": 968000 }, { "epoch": 4.8, "learning_rate": 4.7601739966211366e-05, "loss": 2.5199, "step": 968500 }, { "epoch": 4.8, "learning_rate": 4.7600501379785276e-05, "loss": 2.5431, "step": 969000 }, { "epoch": 4.8, "learning_rate": 4.759926279335919e-05, "loss": 2.5357, "step": 969500 }, { "epoch": 4.81, "learning_rate": 4.759802420693311e-05, "loss": 2.5141, "step": 970000 }, { "epoch": 4.81, "learning_rate": 4.759678562050703e-05, "loss": 2.5247, "step": 970500 }, { "epoch": 4.81, "learning_rate": 4.7595547034080944e-05, "loss": 2.5246, "step": 971000 }, { "epoch": 4.81, "learning_rate": 4.759430844765486e-05, "loss": 2.5307, "step": 971500 }, { "epoch": 4.82, "learning_rate": 4.759307233840163e-05, "loss": 2.5291, "step": 972000 }, { "epoch": 4.82, "learning_rate": 4.7591833751975546e-05, "loss": 2.5249, "step": 972500 }, { "epoch": 4.82, "learning_rate": 4.759059516554946e-05, "loss": 2.5034, "step": 973000 }, { "epoch": 4.82, "learning_rate": 4.758935657912338e-05, "loss": 2.4944, "step": 973500 }, { "epoch": 4.83, "learning_rate": 4.75881179926973e-05, "loss": 2.5173, "step": 974000 }, { "epoch": 4.83, "learning_rate": 4.7586881883444066e-05, "loss": 2.5288, "step": 974500 }, { "epoch": 4.83, "learning_rate": 4.7585643297017976e-05, "loss": 2.5382, "step": 975000 }, { "epoch": 4.83, "learning_rate": 4.758440471059189e-05, "loss": 2.5082, "step": 975500 }, { "epoch": 4.84, "learning_rate": 4.758316612416581e-05, "loss": 2.4987, "step": 976000 }, { "epoch": 4.84, "learning_rate": 4.758192753773973e-05, "loss": 2.5288, "step": 976500 }, { "epoch": 4.84, "learning_rate": 4.7580693905659354e-05, "loss": 2.5066, "step": 977000 }, { "epoch": 4.84, "learning_rate": 4.757945531923327e-05, "loss": 2.5299, "step": 977500 }, { "epoch": 4.85, "learning_rate": 4.757821673280718e-05, "loss": 2.5367, "step": 978000 }, { "epoch": 4.85, "learning_rate": 4.75769781463811e-05, "loss": 2.5034, "step": 978500 }, { "epoch": 4.85, "learning_rate": 4.7575739559955015e-05, "loss": 2.5142, "step": 979000 }, { "epoch": 4.85, "learning_rate": 4.757450097352893e-05, "loss": 2.5349, "step": 979500 }, { "epoch": 4.86, "learning_rate": 4.757326238710285e-05, "loss": 2.5567, "step": 980000 }, { "epoch": 4.86, "learning_rate": 4.7572023800676766e-05, "loss": 2.5057, "step": 980500 }, { "epoch": 4.86, "learning_rate": 4.757078521425068e-05, "loss": 2.5152, "step": 981000 }, { "epoch": 4.86, "learning_rate": 4.756954662782459e-05, "loss": 2.5161, "step": 981500 }, { "epoch": 4.87, "learning_rate": 4.756830804139851e-05, "loss": 2.4941, "step": 982000 }, { "epoch": 4.87, "learning_rate": 4.756706945497243e-05, "loss": 2.5139, "step": 982500 }, { "epoch": 4.87, "learning_rate": 4.7565830868546344e-05, "loss": 2.5407, "step": 983000 }, { "epoch": 4.87, "learning_rate": 4.756459475929312e-05, "loss": 2.5312, "step": 983500 }, { "epoch": 4.88, "learning_rate": 4.7563356172867036e-05, "loss": 2.5106, "step": 984000 }, { "epoch": 4.88, "learning_rate": 4.756211758644095e-05, "loss": 2.5268, "step": 984500 }, { "epoch": 4.88, "learning_rate": 4.7560879000014864e-05, "loss": 2.537, "step": 985000 }, { "epoch": 4.88, "learning_rate": 4.755964041358878e-05, "loss": 2.5343, "step": 985500 }, { "epoch": 4.88, "learning_rate": 4.75584018271627e-05, "loss": 2.5492, "step": 986000 }, { "epoch": 4.89, "learning_rate": 4.7557163240736614e-05, "loss": 2.5359, "step": 986500 }, { "epoch": 4.89, "learning_rate": 4.755592465431053e-05, "loss": 2.5285, "step": 987000 }, { "epoch": 4.89, "learning_rate": 4.755468854505729e-05, "loss": 2.5059, "step": 987500 }, { "epoch": 4.89, "learning_rate": 4.755344995863121e-05, "loss": 2.5226, "step": 988000 }, { "epoch": 4.9, "learning_rate": 4.755221137220513e-05, "loss": 2.5191, "step": 988500 }, { "epoch": 4.9, "learning_rate": 4.75509752629519e-05, "loss": 2.5028, "step": 989000 }, { "epoch": 4.9, "learning_rate": 4.754973915369867e-05, "loss": 2.5224, "step": 989500 }, { "epoch": 4.9, "learning_rate": 4.7548503044445434e-05, "loss": 2.5351, "step": 990000 }, { "epoch": 4.91, "learning_rate": 4.754726445801935e-05, "loss": 2.5295, "step": 990500 }, { "epoch": 4.91, "learning_rate": 4.754602587159327e-05, "loss": 2.5256, "step": 991000 }, { "epoch": 4.91, "learning_rate": 4.7544787285167184e-05, "loss": 2.5175, "step": 991500 }, { "epoch": 4.91, "learning_rate": 4.75435486987411e-05, "loss": 2.5368, "step": 992000 }, { "epoch": 4.92, "learning_rate": 4.754231011231502e-05, "loss": 2.5184, "step": 992500 }, { "epoch": 4.92, "learning_rate": 4.7541071525888935e-05, "loss": 2.5307, "step": 993000 }, { "epoch": 4.92, "learning_rate": 4.753983293946285e-05, "loss": 2.5217, "step": 993500 }, { "epoch": 4.92, "learning_rate": 4.753859435303677e-05, "loss": 2.5294, "step": 994000 }, { "epoch": 4.93, "learning_rate": 4.753735824378354e-05, "loss": 2.505, "step": 994500 }, { "epoch": 4.93, "learning_rate": 4.7536119657357455e-05, "loss": 2.5285, "step": 995000 }, { "epoch": 4.93, "learning_rate": 4.753488107093137e-05, "loss": 2.5311, "step": 995500 }, { "epoch": 4.93, "learning_rate": 4.753364248450529e-05, "loss": 2.5233, "step": 996000 }, { "epoch": 4.94, "learning_rate": 4.7532403898079206e-05, "loss": 2.5115, "step": 996500 }, { "epoch": 4.94, "learning_rate": 4.753116531165312e-05, "loss": 2.5477, "step": 997000 }, { "epoch": 4.94, "learning_rate": 4.7529929202399885e-05, "loss": 2.5459, "step": 997500 }, { "epoch": 4.94, "learning_rate": 4.75286906159738e-05, "loss": 2.5032, "step": 998000 }, { "epoch": 4.95, "learning_rate": 4.752745202954772e-05, "loss": 2.506, "step": 998500 }, { "epoch": 4.95, "learning_rate": 4.7526213443121635e-05, "loss": 2.5271, "step": 999000 }, { "epoch": 4.95, "learning_rate": 4.7524977333868404e-05, "loss": 2.5142, "step": 999500 }, { "epoch": 4.95, "learning_rate": 4.752373874744232e-05, "loss": 2.5061, "step": 1000000 }, { "epoch": 4.96, "learning_rate": 4.752250016101624e-05, "loss": 2.5104, "step": 1000500 }, { "epoch": 4.96, "learning_rate": 4.7521261574590155e-05, "loss": 2.5104, "step": 1001000 }, { "epoch": 4.96, "learning_rate": 4.752002298816407e-05, "loss": 2.5243, "step": 1001500 }, { "epoch": 4.96, "learning_rate": 4.751878440173799e-05, "loss": 2.5186, "step": 1002000 }, { "epoch": 4.97, "learning_rate": 4.7517545815311906e-05, "loss": 2.52, "step": 1002500 }, { "epoch": 4.97, "learning_rate": 4.751630722888582e-05, "loss": 2.5164, "step": 1003000 }, { "epoch": 4.97, "learning_rate": 4.751506864245974e-05, "loss": 2.5276, "step": 1003500 }, { "epoch": 4.97, "learning_rate": 4.7513830056033656e-05, "loss": 2.5109, "step": 1004000 }, { "epoch": 4.98, "learning_rate": 4.751259146960757e-05, "loss": 2.5262, "step": 1004500 }, { "epoch": 4.98, "learning_rate": 4.7511352883181484e-05, "loss": 2.5065, "step": 1005000 }, { "epoch": 4.98, "learning_rate": 4.751011677392825e-05, "loss": 2.5033, "step": 1005500 }, { "epoch": 4.98, "learning_rate": 4.750887818750217e-05, "loss": 2.5218, "step": 1006000 }, { "epoch": 4.99, "learning_rate": 4.7507639601076086e-05, "loss": 2.5046, "step": 1006500 }, { "epoch": 4.99, "learning_rate": 4.750640101465e-05, "loss": 2.5356, "step": 1007000 }, { "epoch": 4.99, "learning_rate": 4.750516490539677e-05, "loss": 2.5308, "step": 1007500 }, { "epoch": 4.99, "learning_rate": 4.750392879614354e-05, "loss": 2.5266, "step": 1008000 }, { "epoch": 5.0, "learning_rate": 4.750269020971745e-05, "loss": 2.5191, "step": 1008500 }, { "epoch": 5.0, "learning_rate": 4.750145162329137e-05, "loss": 2.5071, "step": 1009000 }, { "epoch": 5.0, "eval_accuracy": 0.635423503873417, "eval_accuracy_mlm": 0.588502945831283, "eval_accuracy_nsp": 0.8568240383747975, "eval_loss": 2.4675350189208984, "eval_runtime": 146.0196, "eval_samples_per_second": 1746.06, "eval_steps_per_second": 72.757, "step": 1009215 }, { "epoch": 5.0, "learning_rate": 4.7500213036865285e-05, "loss": 2.5021, "step": 1009500 }, { "epoch": 5.0, "learning_rate": 4.74989744504392e-05, "loss": 2.4848, "step": 1010000 }, { "epoch": 5.01, "learning_rate": 4.749773586401312e-05, "loss": 2.4841, "step": 1010500 }, { "epoch": 5.01, "learning_rate": 4.7496497277587036e-05, "loss": 2.4968, "step": 1011000 }, { "epoch": 5.01, "learning_rate": 4.749525869116095e-05, "loss": 2.4742, "step": 1011500 }, { "epoch": 5.01, "learning_rate": 4.749402010473487e-05, "loss": 2.5161, "step": 1012000 }, { "epoch": 5.02, "learning_rate": 4.7492781518308786e-05, "loss": 2.496, "step": 1012500 }, { "epoch": 5.02, "learning_rate": 4.74915429318827e-05, "loss": 2.4761, "step": 1013000 }, { "epoch": 5.02, "learning_rate": 4.749030434545662e-05, "loss": 2.514, "step": 1013500 }, { "epoch": 5.02, "learning_rate": 4.748906823620339e-05, "loss": 2.4858, "step": 1014000 }, { "epoch": 5.03, "learning_rate": 4.7487829649777306e-05, "loss": 2.5104, "step": 1014500 }, { "epoch": 5.03, "learning_rate": 4.7486593540524075e-05, "loss": 2.4841, "step": 1015000 }, { "epoch": 5.03, "learning_rate": 4.7485354954097985e-05, "loss": 2.4851, "step": 1015500 }, { "epoch": 5.03, "learning_rate": 4.748411884484476e-05, "loss": 2.4796, "step": 1016000 }, { "epoch": 5.04, "learning_rate": 4.748288025841867e-05, "loss": 2.512, "step": 1016500 }, { "epoch": 5.04, "learning_rate": 4.7481644149165446e-05, "loss": 2.4949, "step": 1017000 }, { "epoch": 5.04, "learning_rate": 4.748040556273936e-05, "loss": 2.4787, "step": 1017500 }, { "epoch": 5.04, "learning_rate": 4.747916697631328e-05, "loss": 2.4854, "step": 1018000 }, { "epoch": 5.05, "learning_rate": 4.74779283898872e-05, "loss": 2.505, "step": 1018500 }, { "epoch": 5.05, "learning_rate": 4.7476689803461114e-05, "loss": 2.4824, "step": 1019000 }, { "epoch": 5.05, "learning_rate": 4.7475451217035024e-05, "loss": 2.5006, "step": 1019500 }, { "epoch": 5.05, "learning_rate": 4.747421263060894e-05, "loss": 2.4915, "step": 1020000 }, { "epoch": 5.06, "learning_rate": 4.747297652135571e-05, "loss": 2.5021, "step": 1020500 }, { "epoch": 5.06, "learning_rate": 4.747173793492963e-05, "loss": 2.4845, "step": 1021000 }, { "epoch": 5.06, "learning_rate": 4.7470499348503544e-05, "loss": 2.4846, "step": 1021500 }, { "epoch": 5.06, "learning_rate": 4.746926076207746e-05, "loss": 2.4703, "step": 1022000 }, { "epoch": 5.07, "learning_rate": 4.746802465282423e-05, "loss": 2.5154, "step": 1022500 }, { "epoch": 5.07, "learning_rate": 4.7466786066398146e-05, "loss": 2.4895, "step": 1023000 }, { "epoch": 5.07, "learning_rate": 4.746554747997206e-05, "loss": 2.4909, "step": 1023500 }, { "epoch": 5.07, "learning_rate": 4.746430889354598e-05, "loss": 2.4931, "step": 1024000 }, { "epoch": 5.08, "learning_rate": 4.74630703071199e-05, "loss": 2.4837, "step": 1024500 }, { "epoch": 5.08, "learning_rate": 4.746183667503951e-05, "loss": 2.499, "step": 1025000 }, { "epoch": 5.08, "learning_rate": 4.746059808861343e-05, "loss": 2.5291, "step": 1025500 }, { "epoch": 5.08, "learning_rate": 4.7459359502187345e-05, "loss": 2.4858, "step": 1026000 }, { "epoch": 5.09, "learning_rate": 4.745812091576126e-05, "loss": 2.4969, "step": 1026500 }, { "epoch": 5.09, "learning_rate": 4.745688232933518e-05, "loss": 2.4516, "step": 1027000 }, { "epoch": 5.09, "learning_rate": 4.7455643742909096e-05, "loss": 2.5106, "step": 1027500 }, { "epoch": 5.09, "learning_rate": 4.745440515648301e-05, "loss": 2.4804, "step": 1028000 }, { "epoch": 5.1, "learning_rate": 4.745316657005693e-05, "loss": 2.4969, "step": 1028500 }, { "epoch": 5.1, "learning_rate": 4.7451927983630846e-05, "loss": 2.5142, "step": 1029000 }, { "epoch": 5.1, "learning_rate": 4.745068939720476e-05, "loss": 2.4816, "step": 1029500 }, { "epoch": 5.1, "learning_rate": 4.744945081077868e-05, "loss": 2.4882, "step": 1030000 }, { "epoch": 5.11, "learning_rate": 4.74482122243526e-05, "loss": 2.4912, "step": 1030500 }, { "epoch": 5.11, "learning_rate": 4.7446973637926514e-05, "loss": 2.4795, "step": 1031000 }, { "epoch": 5.11, "learning_rate": 4.744573505150043e-05, "loss": 2.498, "step": 1031500 }, { "epoch": 5.11, "learning_rate": 4.744449646507434e-05, "loss": 2.5025, "step": 1032000 }, { "epoch": 5.12, "learning_rate": 4.744325787864826e-05, "loss": 2.4867, "step": 1032500 }, { "epoch": 5.12, "learning_rate": 4.7442019292222175e-05, "loss": 2.5047, "step": 1033000 }, { "epoch": 5.12, "learning_rate": 4.744078070579609e-05, "loss": 2.493, "step": 1033500 }, { "epoch": 5.12, "learning_rate": 4.743954211937001e-05, "loss": 2.4766, "step": 1034000 }, { "epoch": 5.13, "learning_rate": 4.7438303532943926e-05, "loss": 2.4811, "step": 1034500 }, { "epoch": 5.13, "learning_rate": 4.743706742369069e-05, "loss": 2.4792, "step": 1035000 }, { "epoch": 5.13, "learning_rate": 4.7435828837264605e-05, "loss": 2.4694, "step": 1035500 }, { "epoch": 5.13, "learning_rate": 4.743459272801138e-05, "loss": 2.4567, "step": 1036000 }, { "epoch": 5.14, "learning_rate": 4.7433359095930994e-05, "loss": 2.4914, "step": 1036500 }, { "epoch": 5.14, "learning_rate": 4.743212050950491e-05, "loss": 2.4963, "step": 1037000 }, { "epoch": 5.14, "learning_rate": 4.743088192307883e-05, "loss": 2.5135, "step": 1037500 }, { "epoch": 5.14, "learning_rate": 4.7429643336652745e-05, "loss": 2.5065, "step": 1038000 }, { "epoch": 5.15, "learning_rate": 4.742840475022666e-05, "loss": 2.5155, "step": 1038500 }, { "epoch": 5.15, "learning_rate": 4.742716616380058e-05, "loss": 2.5172, "step": 1039000 }, { "epoch": 5.15, "learning_rate": 4.7425927577374496e-05, "loss": 2.4787, "step": 1039500 }, { "epoch": 5.15, "learning_rate": 4.742468899094841e-05, "loss": 2.4847, "step": 1040000 }, { "epoch": 5.15, "learning_rate": 4.742345040452233e-05, "loss": 2.5204, "step": 1040500 }, { "epoch": 5.16, "learning_rate": 4.742221181809625e-05, "loss": 2.4876, "step": 1041000 }, { "epoch": 5.16, "learning_rate": 4.7420973231670164e-05, "loss": 2.5044, "step": 1041500 }, { "epoch": 5.16, "learning_rate": 4.741973464524408e-05, "loss": 2.4813, "step": 1042000 }, { "epoch": 5.16, "learning_rate": 4.7418496058818e-05, "loss": 2.4927, "step": 1042500 }, { "epoch": 5.17, "learning_rate": 4.7417257472391914e-05, "loss": 2.5097, "step": 1043000 }, { "epoch": 5.17, "learning_rate": 4.741601888596583e-05, "loss": 2.5111, "step": 1043500 }, { "epoch": 5.17, "learning_rate": 4.741478277671259e-05, "loss": 2.5181, "step": 1044000 }, { "epoch": 5.17, "learning_rate": 4.741354419028651e-05, "loss": 2.5167, "step": 1044500 }, { "epoch": 5.18, "learning_rate": 4.741230560386043e-05, "loss": 2.5013, "step": 1045000 }, { "epoch": 5.18, "learning_rate": 4.7411067017434344e-05, "loss": 2.516, "step": 1045500 }, { "epoch": 5.18, "learning_rate": 4.740982843100826e-05, "loss": 2.4898, "step": 1046000 }, { "epoch": 5.18, "learning_rate": 4.740858984458218e-05, "loss": 2.5233, "step": 1046500 }, { "epoch": 5.19, "learning_rate": 4.7407351258156095e-05, "loss": 2.5021, "step": 1047000 }, { "epoch": 5.19, "learning_rate": 4.7406115148902864e-05, "loss": 2.4977, "step": 1047500 }, { "epoch": 5.19, "learning_rate": 4.740487656247678e-05, "loss": 2.5229, "step": 1048000 }, { "epoch": 5.19, "learning_rate": 4.74036379760507e-05, "loss": 2.5149, "step": 1048500 }, { "epoch": 5.2, "learning_rate": 4.7402399389624615e-05, "loss": 2.5227, "step": 1049000 }, { "epoch": 5.2, "learning_rate": 4.740116328037138e-05, "loss": 2.5052, "step": 1049500 }, { "epoch": 5.2, "learning_rate": 4.7399924693945293e-05, "loss": 2.5153, "step": 1050000 }, { "epoch": 5.2, "learning_rate": 4.739868610751921e-05, "loss": 2.5106, "step": 1050500 }, { "epoch": 5.21, "learning_rate": 4.739744752109313e-05, "loss": 2.4979, "step": 1051000 }, { "epoch": 5.21, "learning_rate": 4.7396208934667044e-05, "loss": 2.4952, "step": 1051500 }, { "epoch": 5.21, "learning_rate": 4.739497034824096e-05, "loss": 2.5124, "step": 1052000 }, { "epoch": 5.21, "learning_rate": 4.739373176181488e-05, "loss": 2.4948, "step": 1052500 }, { "epoch": 5.22, "learning_rate": 4.7392493175388795e-05, "loss": 2.5208, "step": 1053000 }, { "epoch": 5.22, "learning_rate": 4.7391257066135564e-05, "loss": 2.4947, "step": 1053500 }, { "epoch": 5.22, "learning_rate": 4.7390023434055185e-05, "loss": 2.505, "step": 1054000 }, { "epoch": 5.22, "learning_rate": 4.7388784847629095e-05, "loss": 2.5084, "step": 1054500 }, { "epoch": 5.23, "learning_rate": 4.738754626120301e-05, "loss": 2.5115, "step": 1055000 }, { "epoch": 5.23, "learning_rate": 4.738630767477693e-05, "loss": 2.5227, "step": 1055500 }, { "epoch": 5.23, "learning_rate": 4.7385069088350846e-05, "loss": 2.5089, "step": 1056000 }, { "epoch": 5.23, "learning_rate": 4.738383050192476e-05, "loss": 2.5291, "step": 1056500 }, { "epoch": 5.24, "learning_rate": 4.738259191549868e-05, "loss": 2.5091, "step": 1057000 }, { "epoch": 5.24, "learning_rate": 4.7381353329072596e-05, "loss": 2.4981, "step": 1057500 }, { "epoch": 5.24, "learning_rate": 4.738011474264651e-05, "loss": 2.4948, "step": 1058000 }, { "epoch": 5.24, "learning_rate": 4.737887615622043e-05, "loss": 2.5134, "step": 1058500 }, { "epoch": 5.25, "learning_rate": 4.737763756979435e-05, "loss": 2.528, "step": 1059000 }, { "epoch": 5.25, "learning_rate": 4.7376398983368264e-05, "loss": 2.5002, "step": 1059500 }, { "epoch": 5.25, "learning_rate": 4.737516287411503e-05, "loss": 2.4989, "step": 1060000 }, { "epoch": 5.25, "learning_rate": 4.737392428768895e-05, "loss": 2.5081, "step": 1060500 }, { "epoch": 5.26, "learning_rate": 4.737268570126287e-05, "loss": 2.4678, "step": 1061000 }, { "epoch": 5.26, "learning_rate": 4.7371447114836784e-05, "loss": 2.5176, "step": 1061500 }, { "epoch": 5.26, "learning_rate": 4.73702085284107e-05, "loss": 2.5324, "step": 1062000 }, { "epoch": 5.26, "learning_rate": 4.736897489633032e-05, "loss": 2.4977, "step": 1062500 }, { "epoch": 5.27, "learning_rate": 4.736773630990424e-05, "loss": 2.4846, "step": 1063000 }, { "epoch": 5.27, "learning_rate": 4.7366497723478155e-05, "loss": 2.4992, "step": 1063500 }, { "epoch": 5.27, "learning_rate": 4.7365259137052065e-05, "loss": 2.5183, "step": 1064000 }, { "epoch": 5.27, "learning_rate": 4.736402055062598e-05, "loss": 2.5053, "step": 1064500 }, { "epoch": 5.28, "learning_rate": 4.73627819641999e-05, "loss": 2.5262, "step": 1065000 }, { "epoch": 5.28, "learning_rate": 4.7361543377773816e-05, "loss": 2.515, "step": 1065500 }, { "epoch": 5.28, "learning_rate": 4.736030479134773e-05, "loss": 2.4938, "step": 1066000 }, { "epoch": 5.28, "learning_rate": 4.735906620492165e-05, "loss": 2.5149, "step": 1066500 }, { "epoch": 5.29, "learning_rate": 4.735782761849557e-05, "loss": 2.5019, "step": 1067000 }, { "epoch": 5.29, "learning_rate": 4.735659150924233e-05, "loss": 2.5112, "step": 1067500 }, { "epoch": 5.29, "learning_rate": 4.7355352922816246e-05, "loss": 2.5042, "step": 1068000 }, { "epoch": 5.29, "learning_rate": 4.735411681356302e-05, "loss": 2.5022, "step": 1068500 }, { "epoch": 5.3, "learning_rate": 4.735287822713694e-05, "loss": 2.5294, "step": 1069000 }, { "epoch": 5.3, "learning_rate": 4.7351639640710855e-05, "loss": 2.4906, "step": 1069500 }, { "epoch": 5.3, "learning_rate": 4.735040105428477e-05, "loss": 2.497, "step": 1070000 }, { "epoch": 5.3, "learning_rate": 4.734916246785868e-05, "loss": 2.5093, "step": 1070500 }, { "epoch": 5.31, "learning_rate": 4.73479238814326e-05, "loss": 2.4948, "step": 1071000 }, { "epoch": 5.31, "learning_rate": 4.7346685295006516e-05, "loss": 2.5087, "step": 1071500 }, { "epoch": 5.31, "learning_rate": 4.734544670858043e-05, "loss": 2.5076, "step": 1072000 }, { "epoch": 5.31, "learning_rate": 4.734420812215435e-05, "loss": 2.4856, "step": 1072500 }, { "epoch": 5.32, "learning_rate": 4.734296953572827e-05, "loss": 2.4979, "step": 1073000 }, { "epoch": 5.32, "learning_rate": 4.7341730949302184e-05, "loss": 2.4767, "step": 1073500 }, { "epoch": 5.32, "learning_rate": 4.7340494840048946e-05, "loss": 2.4792, "step": 1074000 }, { "epoch": 5.32, "learning_rate": 4.733925625362286e-05, "loss": 2.4931, "step": 1074500 }, { "epoch": 5.33, "learning_rate": 4.733801766719678e-05, "loss": 2.4973, "step": 1075000 }, { "epoch": 5.33, "learning_rate": 4.73367790807707e-05, "loss": 2.4988, "step": 1075500 }, { "epoch": 5.33, "learning_rate": 4.7335540494344614e-05, "loss": 2.4931, "step": 1076000 }, { "epoch": 5.33, "learning_rate": 4.733430190791853e-05, "loss": 2.5197, "step": 1076500 }, { "epoch": 5.34, "learning_rate": 4.733306332149245e-05, "loss": 2.4846, "step": 1077000 }, { "epoch": 5.34, "learning_rate": 4.7331824735066364e-05, "loss": 2.5041, "step": 1077500 }, { "epoch": 5.34, "learning_rate": 4.733058862581313e-05, "loss": 2.4894, "step": 1078000 }, { "epoch": 5.34, "learning_rate": 4.732935003938705e-05, "loss": 2.5004, "step": 1078500 }, { "epoch": 5.35, "learning_rate": 4.732811145296097e-05, "loss": 2.5042, "step": 1079000 }, { "epoch": 5.35, "learning_rate": 4.7326875343707736e-05, "loss": 2.4997, "step": 1079500 }, { "epoch": 5.35, "learning_rate": 4.732563675728165e-05, "loss": 2.5063, "step": 1080000 }, { "epoch": 5.35, "learning_rate": 4.732439817085556e-05, "loss": 2.5086, "step": 1080500 }, { "epoch": 5.36, "learning_rate": 4.732315958442948e-05, "loss": 2.4812, "step": 1081000 }, { "epoch": 5.36, "learning_rate": 4.73219209980034e-05, "loss": 2.4943, "step": 1081500 }, { "epoch": 5.36, "learning_rate": 4.7320682411577314e-05, "loss": 2.5031, "step": 1082000 }, { "epoch": 5.36, "learning_rate": 4.731944382515123e-05, "loss": 2.5015, "step": 1082500 }, { "epoch": 5.37, "learning_rate": 4.731820523872515e-05, "loss": 2.5043, "step": 1083000 }, { "epoch": 5.37, "learning_rate": 4.7316966652299064e-05, "loss": 2.5235, "step": 1083500 }, { "epoch": 5.37, "learning_rate": 4.731572806587298e-05, "loss": 2.491, "step": 1084000 }, { "epoch": 5.37, "learning_rate": 4.73144894794469e-05, "loss": 2.4956, "step": 1084500 }, { "epoch": 5.38, "learning_rate": 4.7313250893020815e-05, "loss": 2.5086, "step": 1085000 }, { "epoch": 5.38, "learning_rate": 4.731201230659473e-05, "loss": 2.534, "step": 1085500 }, { "epoch": 5.38, "learning_rate": 4.73107761973415e-05, "loss": 2.5147, "step": 1086000 }, { "epoch": 5.38, "learning_rate": 4.730953761091542e-05, "loss": 2.5247, "step": 1086500 }, { "epoch": 5.39, "learning_rate": 4.7308299024489335e-05, "loss": 2.5079, "step": 1087000 }, { "epoch": 5.39, "learning_rate": 4.730706043806325e-05, "loss": 2.5156, "step": 1087500 }, { "epoch": 5.39, "learning_rate": 4.730582185163717e-05, "loss": 2.4809, "step": 1088000 }, { "epoch": 5.39, "learning_rate": 4.730458574238393e-05, "loss": 2.5121, "step": 1088500 }, { "epoch": 5.4, "learning_rate": 4.730334715595785e-05, "loss": 2.5155, "step": 1089000 }, { "epoch": 5.4, "learning_rate": 4.7302108569531765e-05, "loss": 2.4882, "step": 1089500 }, { "epoch": 5.4, "learning_rate": 4.730086998310568e-05, "loss": 2.4941, "step": 1090000 }, { "epoch": 5.4, "learning_rate": 4.72996313966796e-05, "loss": 2.4765, "step": 1090500 }, { "epoch": 5.41, "learning_rate": 4.7298392810253515e-05, "loss": 2.4786, "step": 1091000 }, { "epoch": 5.41, "learning_rate": 4.729715422382743e-05, "loss": 2.5199, "step": 1091500 }, { "epoch": 5.41, "learning_rate": 4.72959181145742e-05, "loss": 2.4885, "step": 1092000 }, { "epoch": 5.41, "learning_rate": 4.729468200532097e-05, "loss": 2.523, "step": 1092500 }, { "epoch": 5.42, "learning_rate": 4.729344589606774e-05, "loss": 2.5936, "step": 1093000 }, { "epoch": 5.42, "learning_rate": 4.7292207309641656e-05, "loss": 2.5146, "step": 1093500 }, { "epoch": 5.42, "learning_rate": 4.729096872321557e-05, "loss": 2.5291, "step": 1094000 }, { "epoch": 5.42, "learning_rate": 4.728973013678949e-05, "loss": 2.5103, "step": 1094500 }, { "epoch": 5.43, "learning_rate": 4.7288491550363406e-05, "loss": 2.5062, "step": 1095000 }, { "epoch": 5.43, "learning_rate": 4.728725791828302e-05, "loss": 2.5114, "step": 1095500 }, { "epoch": 5.43, "learning_rate": 4.728602180902979e-05, "loss": 2.5239, "step": 1096000 }, { "epoch": 5.43, "learning_rate": 4.7284783222603706e-05, "loss": 2.5374, "step": 1096500 }, { "epoch": 5.43, "learning_rate": 4.728354463617762e-05, "loss": 2.5018, "step": 1097000 }, { "epoch": 5.44, "learning_rate": 4.728230604975154e-05, "loss": 2.4786, "step": 1097500 }, { "epoch": 5.44, "learning_rate": 4.7281069940498316e-05, "loss": 2.5172, "step": 1098000 }, { "epoch": 5.44, "learning_rate": 4.727983135407223e-05, "loss": 2.5365, "step": 1098500 }, { "epoch": 5.44, "learning_rate": 4.727859276764614e-05, "loss": 2.6215, "step": 1099000 }, { "epoch": 5.45, "learning_rate": 4.727735418122006e-05, "loss": 2.5575, "step": 1099500 }, { "epoch": 5.45, "learning_rate": 4.7276115594793977e-05, "loss": 2.549, "step": 1100000 }, { "epoch": 5.45, "learning_rate": 4.7274877008367893e-05, "loss": 2.538, "step": 1100500 }, { "epoch": 5.45, "learning_rate": 4.727364089911466e-05, "loss": 2.698, "step": 1101000 }, { "epoch": 5.46, "learning_rate": 4.727240231268858e-05, "loss": 2.6845, "step": 1101500 }, { "epoch": 5.46, "learning_rate": 4.727116372626249e-05, "loss": 2.6379, "step": 1102000 }, { "epoch": 5.46, "learning_rate": 4.7269925139836406e-05, "loss": 2.5858, "step": 1102500 }, { "epoch": 5.46, "learning_rate": 4.726868655341032e-05, "loss": 2.5711, "step": 1103000 }, { "epoch": 5.47, "learning_rate": 4.7267452921329944e-05, "loss": 2.6267, "step": 1103500 }, { "epoch": 5.47, "learning_rate": 4.726621681207671e-05, "loss": 2.6757, "step": 1104000 }, { "epoch": 5.47, "learning_rate": 4.726497822565063e-05, "loss": 2.6265, "step": 1104500 }, { "epoch": 5.47, "learning_rate": 4.7263739639224547e-05, "loss": 2.6267, "step": 1105000 }, { "epoch": 5.48, "learning_rate": 4.7262501052798463e-05, "loss": 2.6607, "step": 1105500 }, { "epoch": 5.48, "learning_rate": 4.726126246637238e-05, "loss": 2.5848, "step": 1106000 }, { "epoch": 5.48, "learning_rate": 4.72600238799463e-05, "loss": 2.5908, "step": 1106500 }, { "epoch": 5.48, "learning_rate": 4.7258785293520214e-05, "loss": 2.6656, "step": 1107000 }, { "epoch": 5.49, "learning_rate": 4.725754918426698e-05, "loss": 2.5915, "step": 1107500 }, { "epoch": 5.49, "learning_rate": 4.72563105978409e-05, "loss": 2.6518, "step": 1108000 }, { "epoch": 5.49, "learning_rate": 4.725507201141482e-05, "loss": 2.617, "step": 1108500 }, { "epoch": 5.49, "learning_rate": 4.725383342498873e-05, "loss": 2.6337, "step": 1109000 }, { "epoch": 5.5, "learning_rate": 4.7252594838562644e-05, "loss": 2.5885, "step": 1109500 }, { "epoch": 5.5, "learning_rate": 4.725135625213656e-05, "loss": 2.5465, "step": 1110000 }, { "epoch": 5.5, "learning_rate": 4.725011766571048e-05, "loss": 2.5822, "step": 1110500 }, { "epoch": 5.5, "learning_rate": 4.7248879079284395e-05, "loss": 2.6102, "step": 1111000 }, { "epoch": 5.51, "learning_rate": 4.724764049285831e-05, "loss": 2.6242, "step": 1111500 }, { "epoch": 5.51, "learning_rate": 4.724640438360508e-05, "loss": 2.6015, "step": 1112000 }, { "epoch": 5.51, "learning_rate": 4.7245165797179e-05, "loss": 2.5667, "step": 1112500 }, { "epoch": 5.51, "learning_rate": 4.7243927210752914e-05, "loss": 2.5967, "step": 1113000 }, { "epoch": 5.52, "learning_rate": 4.724268862432683e-05, "loss": 2.591, "step": 1113500 }, { "epoch": 5.52, "learning_rate": 4.72414525150736e-05, "loss": 2.6024, "step": 1114000 }, { "epoch": 5.52, "learning_rate": 4.724021392864752e-05, "loss": 2.6239, "step": 1114500 }, { "epoch": 5.52, "learning_rate": 4.7238975342221434e-05, "loss": 2.6354, "step": 1115000 }, { "epoch": 5.53, "learning_rate": 4.723773675579535e-05, "loss": 2.6408, "step": 1115500 }, { "epoch": 5.53, "learning_rate": 4.723649816936926e-05, "loss": 2.6031, "step": 1116000 }, { "epoch": 5.53, "learning_rate": 4.723525958294318e-05, "loss": 2.6564, "step": 1116500 }, { "epoch": 5.53, "learning_rate": 4.7234020996517095e-05, "loss": 2.5826, "step": 1117000 }, { "epoch": 5.54, "learning_rate": 4.723278241009101e-05, "loss": 2.583, "step": 1117500 }, { "epoch": 5.54, "learning_rate": 4.723154382366493e-05, "loss": 2.5424, "step": 1118000 }, { "epoch": 5.54, "learning_rate": 4.7230305237238846e-05, "loss": 2.5366, "step": 1118500 }, { "epoch": 5.54, "learning_rate": 4.722906665081276e-05, "loss": 2.5275, "step": 1119000 }, { "epoch": 5.55, "learning_rate": 4.722782806438668e-05, "loss": 2.546, "step": 1119500 }, { "epoch": 5.55, "learning_rate": 4.7226589477960596e-05, "loss": 2.5238, "step": 1120000 }, { "epoch": 5.55, "learning_rate": 4.7225350891534513e-05, "loss": 2.535, "step": 1120500 }, { "epoch": 5.55, "learning_rate": 4.7224112305108424e-05, "loss": 2.52, "step": 1121000 }, { "epoch": 5.56, "learning_rate": 4.722287371868234e-05, "loss": 2.5252, "step": 1121500 }, { "epoch": 5.56, "learning_rate": 4.722163513225626e-05, "loss": 2.544, "step": 1122000 }, { "epoch": 5.56, "learning_rate": 4.7220396545830174e-05, "loss": 2.5424, "step": 1122500 }, { "epoch": 5.56, "learning_rate": 4.721915795940409e-05, "loss": 2.5489, "step": 1123000 }, { "epoch": 5.57, "learning_rate": 4.721791937297801e-05, "loss": 2.5057, "step": 1123500 }, { "epoch": 5.57, "learning_rate": 4.7216680786551925e-05, "loss": 2.5233, "step": 1124000 }, { "epoch": 5.57, "learning_rate": 4.721544220012584e-05, "loss": 2.53, "step": 1124500 }, { "epoch": 5.57, "learning_rate": 4.721420361369976e-05, "loss": 2.5091, "step": 1125000 }, { "epoch": 5.58, "learning_rate": 4.7212965027273676e-05, "loss": 2.5253, "step": 1125500 }, { "epoch": 5.58, "learning_rate": 4.7211728918020445e-05, "loss": 2.5188, "step": 1126000 }, { "epoch": 5.58, "learning_rate": 4.721049033159436e-05, "loss": 2.522, "step": 1126500 }, { "epoch": 5.58, "learning_rate": 4.720925669951398e-05, "loss": 2.5627, "step": 1127000 }, { "epoch": 5.59, "learning_rate": 4.72080181130879e-05, "loss": 2.526, "step": 1127500 }, { "epoch": 5.59, "learning_rate": 4.7206779526661816e-05, "loss": 2.5394, "step": 1128000 }, { "epoch": 5.59, "learning_rate": 4.720554094023573e-05, "loss": 2.5182, "step": 1128500 }, { "epoch": 5.59, "learning_rate": 4.720430235380965e-05, "loss": 2.568, "step": 1129000 }, { "epoch": 5.6, "learning_rate": 4.720306624455641e-05, "loss": 2.5795, "step": 1129500 }, { "epoch": 5.6, "learning_rate": 4.720182765813033e-05, "loss": 2.6119, "step": 1130000 }, { "epoch": 5.6, "learning_rate": 4.7200589071704246e-05, "loss": 2.5682, "step": 1130500 }, { "epoch": 5.6, "learning_rate": 4.719935048527816e-05, "loss": 2.5436, "step": 1131000 }, { "epoch": 5.61, "learning_rate": 4.719811189885208e-05, "loss": 2.5482, "step": 1131500 }, { "epoch": 5.61, "learning_rate": 4.7196873312426e-05, "loss": 2.5595, "step": 1132000 }, { "epoch": 5.61, "learning_rate": 4.7195634725999914e-05, "loss": 2.5303, "step": 1132500 }, { "epoch": 5.61, "learning_rate": 4.719439861674668e-05, "loss": 2.5571, "step": 1133000 }, { "epoch": 5.62, "learning_rate": 4.71931600303206e-05, "loss": 2.5494, "step": 1133500 }, { "epoch": 5.62, "learning_rate": 4.7191921443894516e-05, "loss": 2.5194, "step": 1134000 }, { "epoch": 5.62, "learning_rate": 4.719068285746843e-05, "loss": 2.5303, "step": 1134500 }, { "epoch": 5.62, "learning_rate": 4.718944427104235e-05, "loss": 2.5406, "step": 1135000 }, { "epoch": 5.63, "learning_rate": 4.718820568461627e-05, "loss": 2.5231, "step": 1135500 }, { "epoch": 5.63, "learning_rate": 4.7186967098190184e-05, "loss": 2.5372, "step": 1136000 }, { "epoch": 5.63, "learning_rate": 4.7185728511764094e-05, "loss": 2.5714, "step": 1136500 }, { "epoch": 5.63, "learning_rate": 4.718448992533801e-05, "loss": 2.5142, "step": 1137000 }, { "epoch": 5.64, "learning_rate": 4.718325381608478e-05, "loss": 2.5527, "step": 1137500 }, { "epoch": 5.64, "learning_rate": 4.71820152296587e-05, "loss": 2.574, "step": 1138000 }, { "epoch": 5.64, "learning_rate": 4.7180776643232614e-05, "loss": 2.5483, "step": 1138500 }, { "epoch": 5.64, "learning_rate": 4.717953805680653e-05, "loss": 2.5275, "step": 1139000 }, { "epoch": 5.65, "learning_rate": 4.717829947038044e-05, "loss": 2.5176, "step": 1139500 }, { "epoch": 5.65, "learning_rate": 4.717706088395436e-05, "loss": 2.5255, "step": 1140000 }, { "epoch": 5.65, "learning_rate": 4.717582477470113e-05, "loss": 2.5277, "step": 1140500 }, { "epoch": 5.65, "learning_rate": 4.717458618827505e-05, "loss": 2.5264, "step": 1141000 }, { "epoch": 5.66, "learning_rate": 4.717334760184897e-05, "loss": 2.5172, "step": 1141500 }, { "epoch": 5.66, "learning_rate": 4.7172109015422884e-05, "loss": 2.5394, "step": 1142000 }, { "epoch": 5.66, "learning_rate": 4.71708704289968e-05, "loss": 2.4941, "step": 1142500 }, { "epoch": 5.66, "learning_rate": 4.716963184257071e-05, "loss": 2.5234, "step": 1143000 }, { "epoch": 5.67, "learning_rate": 4.716839325614463e-05, "loss": 2.5233, "step": 1143500 }, { "epoch": 5.67, "learning_rate": 4.71671571468914e-05, "loss": 2.5376, "step": 1144000 }, { "epoch": 5.67, "learning_rate": 4.7165918560465314e-05, "loss": 2.5352, "step": 1144500 }, { "epoch": 5.67, "learning_rate": 4.716467997403923e-05, "loss": 2.5087, "step": 1145000 }, { "epoch": 5.68, "learning_rate": 4.716344138761315e-05, "loss": 2.5188, "step": 1145500 }, { "epoch": 5.68, "learning_rate": 4.716220280118706e-05, "loss": 2.5193, "step": 1146000 }, { "epoch": 5.68, "learning_rate": 4.7160964214760975e-05, "loss": 2.5462, "step": 1146500 }, { "epoch": 5.68, "learning_rate": 4.71597305826806e-05, "loss": 2.5272, "step": 1147000 }, { "epoch": 5.69, "learning_rate": 4.715849199625452e-05, "loss": 2.5139, "step": 1147500 }, { "epoch": 5.69, "learning_rate": 4.7157253409828436e-05, "loss": 2.5406, "step": 1148000 }, { "epoch": 5.69, "learning_rate": 4.715601482340235e-05, "loss": 2.5199, "step": 1148500 }, { "epoch": 5.69, "learning_rate": 4.715477623697627e-05, "loss": 2.4929, "step": 1149000 }, { "epoch": 5.7, "learning_rate": 4.715354012772303e-05, "loss": 2.5136, "step": 1149500 }, { "epoch": 5.7, "learning_rate": 4.715230154129695e-05, "loss": 2.5388, "step": 1150000 }, { "epoch": 5.7, "learning_rate": 4.7151062954870866e-05, "loss": 2.5158, "step": 1150500 }, { "epoch": 5.7, "learning_rate": 4.714982436844478e-05, "loss": 2.4921, "step": 1151000 }, { "epoch": 5.7, "learning_rate": 4.71485857820187e-05, "loss": 2.5272, "step": 1151500 }, { "epoch": 5.71, "learning_rate": 4.714734719559262e-05, "loss": 2.5284, "step": 1152000 }, { "epoch": 5.71, "learning_rate": 4.7146108609166534e-05, "loss": 2.5297, "step": 1152500 }, { "epoch": 5.71, "learning_rate": 4.714487002274045e-05, "loss": 2.5186, "step": 1153000 }, { "epoch": 5.71, "learning_rate": 4.714363143631437e-05, "loss": 2.4969, "step": 1153500 }, { "epoch": 5.72, "learning_rate": 4.7142392849888284e-05, "loss": 2.5141, "step": 1154000 }, { "epoch": 5.72, "learning_rate": 4.71411542634622e-05, "loss": 2.5465, "step": 1154500 }, { "epoch": 5.72, "learning_rate": 4.713991567703612e-05, "loss": 2.5263, "step": 1155000 }, { "epoch": 5.72, "learning_rate": 4.713867709061003e-05, "loss": 2.5282, "step": 1155500 }, { "epoch": 5.73, "learning_rate": 4.713744345852965e-05, "loss": 2.5718, "step": 1156000 }, { "epoch": 5.73, "learning_rate": 4.7136204872103566e-05, "loss": 2.5614, "step": 1156500 }, { "epoch": 5.73, "learning_rate": 4.7134968762850335e-05, "loss": 2.5299, "step": 1157000 }, { "epoch": 5.73, "learning_rate": 4.713373017642425e-05, "loss": 2.5281, "step": 1157500 }, { "epoch": 5.74, "learning_rate": 4.713249158999817e-05, "loss": 2.503, "step": 1158000 }, { "epoch": 5.74, "learning_rate": 4.7131253003572086e-05, "loss": 2.5163, "step": 1158500 }, { "epoch": 5.74, "learning_rate": 4.7130014417146e-05, "loss": 2.5324, "step": 1159000 }, { "epoch": 5.74, "learning_rate": 4.712877583071992e-05, "loss": 2.522, "step": 1159500 }, { "epoch": 5.75, "learning_rate": 4.7127537244293836e-05, "loss": 2.55, "step": 1160000 }, { "epoch": 5.75, "learning_rate": 4.712629865786775e-05, "loss": 2.5301, "step": 1160500 }, { "epoch": 5.75, "learning_rate": 4.712506007144167e-05, "loss": 2.5052, "step": 1161000 }, { "epoch": 5.75, "learning_rate": 4.712382148501559e-05, "loss": 2.4877, "step": 1161500 }, { "epoch": 5.76, "learning_rate": 4.7122582898589504e-05, "loss": 2.5126, "step": 1162000 }, { "epoch": 5.76, "learning_rate": 4.712134431216342e-05, "loss": 2.5242, "step": 1162500 }, { "epoch": 5.76, "learning_rate": 4.712010572573734e-05, "loss": 2.5206, "step": 1163000 }, { "epoch": 5.76, "learning_rate": 4.711886713931125e-05, "loss": 2.5255, "step": 1163500 }, { "epoch": 5.77, "learning_rate": 4.711763103005802e-05, "loss": 2.5042, "step": 1164000 }, { "epoch": 5.77, "learning_rate": 4.7116392443631934e-05, "loss": 2.5521, "step": 1164500 }, { "epoch": 5.77, "learning_rate": 4.711515385720585e-05, "loss": 2.5334, "step": 1165000 }, { "epoch": 5.77, "learning_rate": 4.711391527077977e-05, "loss": 2.536, "step": 1165500 }, { "epoch": 5.78, "learning_rate": 4.7112676684353685e-05, "loss": 2.5293, "step": 1166000 }, { "epoch": 5.78, "learning_rate": 4.71114380979276e-05, "loss": 2.5166, "step": 1166500 }, { "epoch": 5.78, "learning_rate": 4.711019951150152e-05, "loss": 2.4953, "step": 1167000 }, { "epoch": 5.78, "learning_rate": 4.7108960925075435e-05, "loss": 2.4882, "step": 1167500 }, { "epoch": 5.79, "learning_rate": 4.7107724815822204e-05, "loss": 2.5227, "step": 1168000 }, { "epoch": 5.79, "learning_rate": 4.710648622939612e-05, "loss": 2.514, "step": 1168500 }, { "epoch": 5.79, "learning_rate": 4.710524764297004e-05, "loss": 2.5371, "step": 1169000 }, { "epoch": 5.79, "learning_rate": 4.7104009056543955e-05, "loss": 2.5194, "step": 1169500 }, { "epoch": 5.8, "learning_rate": 4.7102770470117865e-05, "loss": 2.5025, "step": 1170000 }, { "epoch": 5.8, "learning_rate": 4.710153188369178e-05, "loss": 2.5248, "step": 1170500 }, { "epoch": 5.8, "learning_rate": 4.71002932972657e-05, "loss": 2.4913, "step": 1171000 }, { "epoch": 5.8, "learning_rate": 4.7099054710839616e-05, "loss": 2.5103, "step": 1171500 }, { "epoch": 5.81, "learning_rate": 4.709781612441353e-05, "loss": 2.4856, "step": 1172000 }, { "epoch": 5.81, "learning_rate": 4.709657753798744e-05, "loss": 2.536, "step": 1172500 }, { "epoch": 5.81, "learning_rate": 4.709534142873422e-05, "loss": 2.4982, "step": 1173000 }, { "epoch": 5.81, "learning_rate": 4.7094102842308136e-05, "loss": 2.5312, "step": 1173500 }, { "epoch": 5.82, "learning_rate": 4.7092866733054904e-05, "loss": 2.4911, "step": 1174000 }, { "epoch": 5.82, "learning_rate": 4.709162814662882e-05, "loss": 2.5099, "step": 1174500 }, { "epoch": 5.82, "learning_rate": 4.709038956020274e-05, "loss": 2.5238, "step": 1175000 }, { "epoch": 5.82, "learning_rate": 4.7089150973776655e-05, "loss": 2.5279, "step": 1175500 }, { "epoch": 5.83, "learning_rate": 4.708791486452342e-05, "loss": 2.5004, "step": 1176000 }, { "epoch": 5.83, "learning_rate": 4.7086676278097334e-05, "loss": 2.5314, "step": 1176500 }, { "epoch": 5.83, "learning_rate": 4.708543769167125e-05, "loss": 2.5071, "step": 1177000 }, { "epoch": 5.83, "learning_rate": 4.708419910524517e-05, "loss": 2.5197, "step": 1177500 }, { "epoch": 5.84, "learning_rate": 4.7082960518819085e-05, "loss": 2.5249, "step": 1178000 }, { "epoch": 5.84, "learning_rate": 4.7081721932393e-05, "loss": 2.5054, "step": 1178500 }, { "epoch": 5.84, "learning_rate": 4.708048334596692e-05, "loss": 2.5106, "step": 1179000 }, { "epoch": 5.84, "learning_rate": 4.7079244759540836e-05, "loss": 2.5198, "step": 1179500 }, { "epoch": 5.85, "learning_rate": 4.7078008650287604e-05, "loss": 2.5138, "step": 1180000 }, { "epoch": 5.85, "learning_rate": 4.707677006386152e-05, "loss": 2.5156, "step": 1180500 }, { "epoch": 5.85, "learning_rate": 4.707553147743544e-05, "loss": 2.5207, "step": 1181000 }, { "epoch": 5.85, "learning_rate": 4.7074292891009355e-05, "loss": 2.4657, "step": 1181500 }, { "epoch": 5.86, "learning_rate": 4.707305430458327e-05, "loss": 2.4906, "step": 1182000 }, { "epoch": 5.86, "learning_rate": 4.707181571815719e-05, "loss": 2.4839, "step": 1182500 }, { "epoch": 5.86, "learning_rate": 4.7070577131731106e-05, "loss": 2.514, "step": 1183000 }, { "epoch": 5.86, "learning_rate": 4.7069338545305016e-05, "loss": 2.4829, "step": 1183500 }, { "epoch": 5.87, "learning_rate": 4.706809995887893e-05, "loss": 2.5311, "step": 1184000 }, { "epoch": 5.87, "learning_rate": 4.706686137245285e-05, "loss": 2.5153, "step": 1184500 }, { "epoch": 5.87, "learning_rate": 4.706562774037247e-05, "loss": 2.5128, "step": 1185000 }, { "epoch": 5.87, "learning_rate": 4.706438915394639e-05, "loss": 2.5017, "step": 1185500 }, { "epoch": 5.88, "learning_rate": 4.7063153044693156e-05, "loss": 2.5261, "step": 1186000 }, { "epoch": 5.88, "learning_rate": 4.7061914458267073e-05, "loss": 2.5201, "step": 1186500 }, { "epoch": 5.88, "learning_rate": 4.7060675871840984e-05, "loss": 2.5188, "step": 1187000 }, { "epoch": 5.88, "learning_rate": 4.70594372854149e-05, "loss": 2.534, "step": 1187500 }, { "epoch": 5.89, "learning_rate": 4.705819869898882e-05, "loss": 2.5313, "step": 1188000 }, { "epoch": 5.89, "learning_rate": 4.7056960112562734e-05, "loss": 2.5164, "step": 1188500 }, { "epoch": 5.89, "learning_rate": 4.705572152613665e-05, "loss": 2.5065, "step": 1189000 }, { "epoch": 5.89, "learning_rate": 4.705448293971057e-05, "loss": 2.5141, "step": 1189500 }, { "epoch": 5.9, "learning_rate": 4.7053244353284485e-05, "loss": 2.4913, "step": 1190000 }, { "epoch": 5.9, "learning_rate": 4.70520057668584e-05, "loss": 2.5257, "step": 1190500 }, { "epoch": 5.9, "learning_rate": 4.705076965760517e-05, "loss": 2.5149, "step": 1191000 }, { "epoch": 5.9, "learning_rate": 4.704953107117909e-05, "loss": 2.524, "step": 1191500 }, { "epoch": 5.91, "learning_rate": 4.7048292484753005e-05, "loss": 2.5101, "step": 1192000 }, { "epoch": 5.91, "learning_rate": 4.704705389832692e-05, "loss": 2.5176, "step": 1192500 }, { "epoch": 5.91, "learning_rate": 4.704581531190084e-05, "loss": 2.5155, "step": 1193000 }, { "epoch": 5.91, "learning_rate": 4.7044576725474756e-05, "loss": 2.4957, "step": 1193500 }, { "epoch": 5.92, "learning_rate": 4.704333813904867e-05, "loss": 2.5396, "step": 1194000 }, { "epoch": 5.92, "learning_rate": 4.7042102029795434e-05, "loss": 2.5241, "step": 1194500 }, { "epoch": 5.92, "learning_rate": 4.704086344336935e-05, "loss": 2.5134, "step": 1195000 }, { "epoch": 5.92, "learning_rate": 4.703962485694327e-05, "loss": 2.5014, "step": 1195500 }, { "epoch": 5.93, "learning_rate": 4.7038386270517185e-05, "loss": 2.5075, "step": 1196000 }, { "epoch": 5.93, "learning_rate": 4.70371476840911e-05, "loss": 2.5256, "step": 1196500 }, { "epoch": 5.93, "learning_rate": 4.703590909766502e-05, "loss": 2.4918, "step": 1197000 }, { "epoch": 5.93, "learning_rate": 4.7034670511238936e-05, "loss": 2.4843, "step": 1197500 }, { "epoch": 5.94, "learning_rate": 4.703343192481285e-05, "loss": 2.5463, "step": 1198000 }, { "epoch": 5.94, "learning_rate": 4.703219333838677e-05, "loss": 2.4987, "step": 1198500 }, { "epoch": 5.94, "learning_rate": 4.703095475196068e-05, "loss": 2.5182, "step": 1199000 }, { "epoch": 5.94, "learning_rate": 4.702972111988031e-05, "loss": 2.5022, "step": 1199500 }, { "epoch": 5.95, "learning_rate": 4.7028482533454224e-05, "loss": 2.5201, "step": 1200000 }, { "epoch": 5.95, "learning_rate": 4.7027243947028135e-05, "loss": 2.5196, "step": 1200500 }, { "epoch": 5.95, "learning_rate": 4.702600536060205e-05, "loss": 2.5223, "step": 1201000 }, { "epoch": 5.95, "learning_rate": 4.702476677417597e-05, "loss": 2.5153, "step": 1201500 }, { "epoch": 5.96, "learning_rate": 4.7023528187749885e-05, "loss": 2.5415, "step": 1202000 }, { "epoch": 5.96, "learning_rate": 4.70222896013238e-05, "loss": 2.529, "step": 1202500 }, { "epoch": 5.96, "learning_rate": 4.702105349207057e-05, "loss": 2.5326, "step": 1203000 }, { "epoch": 5.96, "learning_rate": 4.701981490564449e-05, "loss": 2.5434, "step": 1203500 }, { "epoch": 5.97, "learning_rate": 4.7018576319218405e-05, "loss": 2.5482, "step": 1204000 }, { "epoch": 5.97, "learning_rate": 4.701733773279232e-05, "loss": 2.5158, "step": 1204500 }, { "epoch": 5.97, "learning_rate": 4.701609914636624e-05, "loss": 2.5229, "step": 1205000 }, { "epoch": 5.97, "learning_rate": 4.7014860559940156e-05, "loss": 2.5232, "step": 1205500 }, { "epoch": 5.97, "learning_rate": 4.7013624450686925e-05, "loss": 2.5345, "step": 1206000 }, { "epoch": 5.98, "learning_rate": 4.701238586426084e-05, "loss": 2.4912, "step": 1206500 }, { "epoch": 5.98, "learning_rate": 4.701114727783475e-05, "loss": 2.5206, "step": 1207000 }, { "epoch": 5.98, "learning_rate": 4.700990869140867e-05, "loss": 2.4784, "step": 1207500 }, { "epoch": 5.98, "learning_rate": 4.7008670104982586e-05, "loss": 2.4862, "step": 1208000 }, { "epoch": 5.99, "learning_rate": 4.70074315185565e-05, "loss": 2.5107, "step": 1208500 }, { "epoch": 5.99, "learning_rate": 4.700619293213042e-05, "loss": 2.5165, "step": 1209000 }, { "epoch": 5.99, "learning_rate": 4.700495682287719e-05, "loss": 2.5188, "step": 1209500 }, { "epoch": 5.99, "learning_rate": 4.7003718236451105e-05, "loss": 2.4935, "step": 1210000 }, { "epoch": 6.0, "learning_rate": 4.700247965002502e-05, "loss": 2.5275, "step": 1210500 }, { "epoch": 6.0, "learning_rate": 4.700124106359894e-05, "loss": 2.5212, "step": 1211000 }, { "epoch": 6.0, "eval_accuracy": 0.6358168518016798, "eval_accuracy_mlm": 0.5887641252682907, "eval_accuracy_nsp": 0.8578281213842226, "eval_loss": 2.467965841293335, "eval_runtime": 145.9334, "eval_samples_per_second": 1747.092, "eval_steps_per_second": 72.8, "step": 1211058 }, { "epoch": 6.0, "learning_rate": 4.7000002477172856e-05, "loss": 2.4902, "step": 1211500 }, { "epoch": 6.0, "learning_rate": 4.699876389074677e-05, "loss": 2.493, "step": 1212000 }, { "epoch": 6.01, "learning_rate": 4.699752530432069e-05, "loss": 2.5038, "step": 1212500 }, { "epoch": 6.01, "learning_rate": 4.699628671789461e-05, "loss": 2.4571, "step": 1213000 }, { "epoch": 6.01, "learning_rate": 4.6995048131468524e-05, "loss": 2.4688, "step": 1213500 }, { "epoch": 6.01, "learning_rate": 4.699380954504244e-05, "loss": 2.5051, "step": 1214000 }, { "epoch": 6.02, "learning_rate": 4.699257095861635e-05, "loss": 2.4776, "step": 1214500 }, { "epoch": 6.02, "learning_rate": 4.699133237219027e-05, "loss": 2.4888, "step": 1215000 }, { "epoch": 6.02, "learning_rate": 4.6990096262937036e-05, "loss": 2.4839, "step": 1215500 }, { "epoch": 6.02, "learning_rate": 4.698885767651095e-05, "loss": 2.4875, "step": 1216000 }, { "epoch": 6.03, "learning_rate": 4.698762156725772e-05, "loss": 2.5075, "step": 1216500 }, { "epoch": 6.03, "learning_rate": 4.698638545800449e-05, "loss": 2.4875, "step": 1217000 }, { "epoch": 6.03, "learning_rate": 4.698514687157841e-05, "loss": 2.4917, "step": 1217500 }, { "epoch": 6.03, "learning_rate": 4.6983908285152325e-05, "loss": 2.4866, "step": 1218000 }, { "epoch": 6.04, "learning_rate": 4.698266969872624e-05, "loss": 2.5019, "step": 1218500 }, { "epoch": 6.04, "learning_rate": 4.698143111230016e-05, "loss": 2.4532, "step": 1219000 }, { "epoch": 6.04, "learning_rate": 4.6980192525874076e-05, "loss": 2.4699, "step": 1219500 }, { "epoch": 6.04, "learning_rate": 4.697895393944799e-05, "loss": 2.5117, "step": 1220000 }, { "epoch": 6.05, "learning_rate": 4.69777153530219e-05, "loss": 2.4897, "step": 1220500 }, { "epoch": 6.05, "learning_rate": 4.697647676659582e-05, "loss": 2.4558, "step": 1221000 }, { "epoch": 6.05, "learning_rate": 4.6975238180169737e-05, "loss": 2.4775, "step": 1221500 }, { "epoch": 6.05, "learning_rate": 4.6973999593743653e-05, "loss": 2.4828, "step": 1222000 }, { "epoch": 6.06, "learning_rate": 4.697276100731757e-05, "loss": 2.487, "step": 1222500 }, { "epoch": 6.06, "learning_rate": 4.697152242089149e-05, "loss": 2.4936, "step": 1223000 }, { "epoch": 6.06, "learning_rate": 4.6970283834465404e-05, "loss": 2.489, "step": 1223500 }, { "epoch": 6.06, "learning_rate": 4.6969045248039314e-05, "loss": 2.4504, "step": 1224000 }, { "epoch": 6.07, "learning_rate": 4.696780666161323e-05, "loss": 2.4844, "step": 1224500 }, { "epoch": 6.07, "learning_rate": 4.696656807518715e-05, "loss": 2.4735, "step": 1225000 }, { "epoch": 6.07, "learning_rate": 4.6965329488761065e-05, "loss": 2.4685, "step": 1225500 }, { "epoch": 6.07, "learning_rate": 4.696409090233498e-05, "loss": 2.4795, "step": 1226000 }, { "epoch": 6.08, "learning_rate": 4.69628523159089e-05, "loss": 2.4918, "step": 1226500 }, { "epoch": 6.08, "learning_rate": 4.696161620665567e-05, "loss": 2.4753, "step": 1227000 }, { "epoch": 6.08, "learning_rate": 4.696038009740244e-05, "loss": 2.4695, "step": 1227500 }, { "epoch": 6.08, "learning_rate": 4.6959141510976354e-05, "loss": 2.4831, "step": 1228000 }, { "epoch": 6.09, "learning_rate": 4.695790292455027e-05, "loss": 2.4936, "step": 1228500 }, { "epoch": 6.09, "learning_rate": 4.695666433812419e-05, "loss": 2.4876, "step": 1229000 }, { "epoch": 6.09, "learning_rate": 4.6955425751698104e-05, "loss": 2.4676, "step": 1229500 }, { "epoch": 6.09, "learning_rate": 4.695418716527202e-05, "loss": 2.4873, "step": 1230000 }, { "epoch": 6.1, "learning_rate": 4.695295105601879e-05, "loss": 2.4812, "step": 1230500 }, { "epoch": 6.1, "learning_rate": 4.695171246959271e-05, "loss": 2.4859, "step": 1231000 }, { "epoch": 6.1, "learning_rate": 4.6950473883166624e-05, "loss": 2.4744, "step": 1231500 }, { "epoch": 6.1, "learning_rate": 4.694923529674054e-05, "loss": 2.4792, "step": 1232000 }, { "epoch": 6.11, "learning_rate": 4.694799918748731e-05, "loss": 2.4847, "step": 1232500 }, { "epoch": 6.11, "learning_rate": 4.694676060106123e-05, "loss": 2.5086, "step": 1233000 }, { "epoch": 6.11, "learning_rate": 4.6945522014635144e-05, "loss": 2.4921, "step": 1233500 }, { "epoch": 6.11, "learning_rate": 4.6944283428209054e-05, "loss": 2.471, "step": 1234000 }, { "epoch": 6.12, "learning_rate": 4.694304484178297e-05, "loss": 2.4858, "step": 1234500 }, { "epoch": 6.12, "learning_rate": 4.694180625535689e-05, "loss": 2.4519, "step": 1235000 }, { "epoch": 6.12, "learning_rate": 4.6940567668930805e-05, "loss": 2.4916, "step": 1235500 }, { "epoch": 6.12, "learning_rate": 4.693932908250472e-05, "loss": 2.4775, "step": 1236000 }, { "epoch": 6.13, "learning_rate": 4.693809049607863e-05, "loss": 2.483, "step": 1236500 }, { "epoch": 6.13, "learning_rate": 4.693685190965255e-05, "loss": 2.4979, "step": 1237000 }, { "epoch": 6.13, "learning_rate": 4.6935613323226465e-05, "loss": 2.4822, "step": 1237500 }, { "epoch": 6.13, "learning_rate": 4.693437473680038e-05, "loss": 2.4846, "step": 1238000 }, { "epoch": 6.14, "learning_rate": 4.69331361503743e-05, "loss": 2.4769, "step": 1238500 }, { "epoch": 6.14, "learning_rate": 4.6931900041121075e-05, "loss": 2.5055, "step": 1239000 }, { "epoch": 6.14, "learning_rate": 4.6930661454694985e-05, "loss": 2.4656, "step": 1239500 }, { "epoch": 6.14, "learning_rate": 4.69294228682689e-05, "loss": 2.5074, "step": 1240000 }, { "epoch": 6.15, "learning_rate": 4.692818428184282e-05, "loss": 2.4729, "step": 1240500 }, { "epoch": 6.15, "learning_rate": 4.692694817258959e-05, "loss": 2.4935, "step": 1241000 }, { "epoch": 6.15, "learning_rate": 4.6925709586163505e-05, "loss": 2.4919, "step": 1241500 }, { "epoch": 6.15, "learning_rate": 4.692447099973742e-05, "loss": 2.4865, "step": 1242000 }, { "epoch": 6.16, "learning_rate": 4.692323241331134e-05, "loss": 2.4812, "step": 1242500 }, { "epoch": 6.16, "learning_rate": 4.6921993826885255e-05, "loss": 2.483, "step": 1243000 }, { "epoch": 6.16, "learning_rate": 4.6920757717632024e-05, "loss": 2.4902, "step": 1243500 }, { "epoch": 6.16, "learning_rate": 4.691951913120594e-05, "loss": 2.47, "step": 1244000 }, { "epoch": 6.17, "learning_rate": 4.691828054477986e-05, "loss": 2.4547, "step": 1244500 }, { "epoch": 6.17, "learning_rate": 4.6917041958353775e-05, "loss": 2.4744, "step": 1245000 }, { "epoch": 6.17, "learning_rate": 4.691580337192769e-05, "loss": 2.4984, "step": 1245500 }, { "epoch": 6.17, "learning_rate": 4.69145647855016e-05, "loss": 2.4811, "step": 1246000 }, { "epoch": 6.18, "learning_rate": 4.691332619907552e-05, "loss": 2.4872, "step": 1246500 }, { "epoch": 6.18, "learning_rate": 4.6912090089822295e-05, "loss": 2.481, "step": 1247000 }, { "epoch": 6.18, "learning_rate": 4.691085150339621e-05, "loss": 2.4718, "step": 1247500 }, { "epoch": 6.18, "learning_rate": 4.690961291697012e-05, "loss": 2.4848, "step": 1248000 }, { "epoch": 6.19, "learning_rate": 4.690837433054404e-05, "loss": 2.483, "step": 1248500 }, { "epoch": 6.19, "learning_rate": 4.6907135744117956e-05, "loss": 2.4704, "step": 1249000 }, { "epoch": 6.19, "learning_rate": 4.690589715769187e-05, "loss": 2.4682, "step": 1249500 }, { "epoch": 6.19, "learning_rate": 4.690466104843864e-05, "loss": 2.4647, "step": 1250000 }, { "epoch": 6.2, "learning_rate": 4.690342493918541e-05, "loss": 2.4835, "step": 1250500 }, { "epoch": 6.2, "learning_rate": 4.690218635275933e-05, "loss": 2.4615, "step": 1251000 }, { "epoch": 6.2, "learning_rate": 4.6900947766333244e-05, "loss": 2.4774, "step": 1251500 }, { "epoch": 6.2, "learning_rate": 4.689970917990716e-05, "loss": 2.4772, "step": 1252000 }, { "epoch": 6.21, "learning_rate": 4.689847059348108e-05, "loss": 2.4977, "step": 1252500 }, { "epoch": 6.21, "learning_rate": 4.6897232007054995e-05, "loss": 2.5088, "step": 1253000 }, { "epoch": 6.21, "learning_rate": 4.689599342062891e-05, "loss": 2.4723, "step": 1253500 }, { "epoch": 6.21, "learning_rate": 4.689475483420283e-05, "loss": 2.4965, "step": 1254000 }, { "epoch": 6.22, "learning_rate": 4.689351624777674e-05, "loss": 2.4822, "step": 1254500 }, { "epoch": 6.22, "learning_rate": 4.689228013852351e-05, "loss": 2.5036, "step": 1255000 }, { "epoch": 6.22, "learning_rate": 4.6891041552097424e-05, "loss": 2.4874, "step": 1255500 }, { "epoch": 6.22, "learning_rate": 4.688980544284419e-05, "loss": 2.4798, "step": 1256000 }, { "epoch": 6.23, "learning_rate": 4.688856685641811e-05, "loss": 2.4956, "step": 1256500 }, { "epoch": 6.23, "learning_rate": 4.688733074716488e-05, "loss": 2.4933, "step": 1257000 }, { "epoch": 6.23, "learning_rate": 4.6886092160738796e-05, "loss": 2.5024, "step": 1257500 }, { "epoch": 6.23, "learning_rate": 4.6884853574312706e-05, "loss": 2.4811, "step": 1258000 }, { "epoch": 6.24, "learning_rate": 4.688361498788662e-05, "loss": 2.4707, "step": 1258500 }, { "epoch": 6.24, "learning_rate": 4.688237887863339e-05, "loss": 2.4857, "step": 1259000 }, { "epoch": 6.24, "learning_rate": 4.688114029220731e-05, "loss": 2.5269, "step": 1259500 }, { "epoch": 6.24, "learning_rate": 4.6879901705781226e-05, "loss": 2.4841, "step": 1260000 }, { "epoch": 6.24, "learning_rate": 4.687866311935514e-05, "loss": 2.5034, "step": 1260500 }, { "epoch": 6.25, "learning_rate": 4.687742453292906e-05, "loss": 2.4715, "step": 1261000 }, { "epoch": 6.25, "learning_rate": 4.6876185946502976e-05, "loss": 2.4924, "step": 1261500 }, { "epoch": 6.25, "learning_rate": 4.6874947360076893e-05, "loss": 2.5025, "step": 1262000 }, { "epoch": 6.25, "learning_rate": 4.687370877365081e-05, "loss": 2.4942, "step": 1262500 }, { "epoch": 6.26, "learning_rate": 4.687247018722473e-05, "loss": 2.4852, "step": 1263000 }, { "epoch": 6.26, "learning_rate": 4.6871231600798644e-05, "loss": 2.4815, "step": 1263500 }, { "epoch": 6.26, "learning_rate": 4.686999301437256e-05, "loss": 2.4833, "step": 1264000 }, { "epoch": 6.26, "learning_rate": 4.686875690511932e-05, "loss": 2.5097, "step": 1264500 }, { "epoch": 6.27, "learning_rate": 4.686751831869324e-05, "loss": 2.5067, "step": 1265000 }, { "epoch": 6.27, "learning_rate": 4.686627973226716e-05, "loss": 2.5029, "step": 1265500 }, { "epoch": 6.27, "learning_rate": 4.6865041145841074e-05, "loss": 2.4842, "step": 1266000 }, { "epoch": 6.27, "learning_rate": 4.686380255941499e-05, "loss": 2.4757, "step": 1266500 }, { "epoch": 6.28, "learning_rate": 4.686256397298891e-05, "loss": 2.4729, "step": 1267000 }, { "epoch": 6.28, "learning_rate": 4.6861325386562825e-05, "loss": 2.4995, "step": 1267500 }, { "epoch": 6.28, "learning_rate": 4.686008680013674e-05, "loss": 2.5044, "step": 1268000 }, { "epoch": 6.28, "learning_rate": 4.685884821371066e-05, "loss": 2.483, "step": 1268500 }, { "epoch": 6.29, "learning_rate": 4.685761458163028e-05, "loss": 2.5019, "step": 1269000 }, { "epoch": 6.29, "learning_rate": 4.6856375995204196e-05, "loss": 2.5024, "step": 1269500 }, { "epoch": 6.29, "learning_rate": 4.685513740877811e-05, "loss": 2.4707, "step": 1270000 }, { "epoch": 6.29, "learning_rate": 4.685389882235203e-05, "loss": 2.4647, "step": 1270500 }, { "epoch": 6.3, "learning_rate": 4.68526627130988e-05, "loss": 2.4922, "step": 1271000 }, { "epoch": 6.3, "learning_rate": 4.685142412667271e-05, "loss": 2.5003, "step": 1271500 }, { "epoch": 6.3, "learning_rate": 4.6850185540246626e-05, "loss": 2.4678, "step": 1272000 }, { "epoch": 6.3, "learning_rate": 4.684894695382054e-05, "loss": 2.4832, "step": 1272500 }, { "epoch": 6.31, "learning_rate": 4.684771084456732e-05, "loss": 2.4824, "step": 1273000 }, { "epoch": 6.31, "learning_rate": 4.6846472258141235e-05, "loss": 2.4748, "step": 1273500 }, { "epoch": 6.31, "learning_rate": 4.684523367171515e-05, "loss": 2.5172, "step": 1274000 }, { "epoch": 6.31, "learning_rate": 4.684399508528906e-05, "loss": 2.4525, "step": 1274500 }, { "epoch": 6.32, "learning_rate": 4.684275649886298e-05, "loss": 2.4783, "step": 1275000 }, { "epoch": 6.32, "learning_rate": 4.6841517912436896e-05, "loss": 2.5077, "step": 1275500 }, { "epoch": 6.32, "learning_rate": 4.6840281803183665e-05, "loss": 2.4907, "step": 1276000 }, { "epoch": 6.32, "learning_rate": 4.683904321675758e-05, "loss": 2.464, "step": 1276500 }, { "epoch": 6.33, "learning_rate": 4.68378046303315e-05, "loss": 2.4991, "step": 1277000 }, { "epoch": 6.33, "learning_rate": 4.6836566043905416e-05, "loss": 2.5031, "step": 1277500 }, { "epoch": 6.33, "learning_rate": 4.6835327457479326e-05, "loss": 2.4927, "step": 1278000 }, { "epoch": 6.33, "learning_rate": 4.683408887105324e-05, "loss": 2.4785, "step": 1278500 }, { "epoch": 6.34, "learning_rate": 4.683285028462716e-05, "loss": 2.465, "step": 1279000 }, { "epoch": 6.34, "learning_rate": 4.683161169820108e-05, "loss": 2.4818, "step": 1279500 }, { "epoch": 6.34, "learning_rate": 4.6830373111774994e-05, "loss": 2.5001, "step": 1280000 }, { "epoch": 6.34, "learning_rate": 4.682913452534891e-05, "loss": 2.4687, "step": 1280500 }, { "epoch": 6.35, "learning_rate": 4.682789841609568e-05, "loss": 2.4916, "step": 1281000 }, { "epoch": 6.35, "learning_rate": 4.6826659829669596e-05, "loss": 2.4898, "step": 1281500 }, { "epoch": 6.35, "learning_rate": 4.682542124324351e-05, "loss": 2.4705, "step": 1282000 }, { "epoch": 6.35, "learning_rate": 4.682418265681743e-05, "loss": 2.4925, "step": 1282500 }, { "epoch": 6.36, "learning_rate": 4.682294407039135e-05, "loss": 2.4848, "step": 1283000 }, { "epoch": 6.36, "learning_rate": 4.6821705483965264e-05, "loss": 2.4765, "step": 1283500 }, { "epoch": 6.36, "learning_rate": 4.6820469374712026e-05, "loss": 2.4905, "step": 1284000 }, { "epoch": 6.36, "learning_rate": 4.681923078828594e-05, "loss": 2.4852, "step": 1284500 }, { "epoch": 6.37, "learning_rate": 4.681799220185986e-05, "loss": 2.4962, "step": 1285000 }, { "epoch": 6.37, "learning_rate": 4.681675361543378e-05, "loss": 2.4493, "step": 1285500 }, { "epoch": 6.37, "learning_rate": 4.6815515029007694e-05, "loss": 2.4891, "step": 1286000 }, { "epoch": 6.37, "learning_rate": 4.681427644258161e-05, "loss": 2.4824, "step": 1286500 }, { "epoch": 6.38, "learning_rate": 4.681303785615553e-05, "loss": 2.4763, "step": 1287000 }, { "epoch": 6.38, "learning_rate": 4.6811799269729445e-05, "loss": 2.5012, "step": 1287500 }, { "epoch": 6.38, "learning_rate": 4.681056068330336e-05, "loss": 2.4999, "step": 1288000 }, { "epoch": 6.38, "learning_rate": 4.680932209687728e-05, "loss": 2.4742, "step": 1288500 }, { "epoch": 6.39, "learning_rate": 4.680808598762405e-05, "loss": 2.4774, "step": 1289000 }, { "epoch": 6.39, "learning_rate": 4.6806847401197964e-05, "loss": 2.4746, "step": 1289500 }, { "epoch": 6.39, "learning_rate": 4.680560881477188e-05, "loss": 2.4768, "step": 1290000 }, { "epoch": 6.39, "learning_rate": 4.68043702283458e-05, "loss": 2.4646, "step": 1290500 }, { "epoch": 6.4, "learning_rate": 4.6803131641919715e-05, "loss": 2.4499, "step": 1291000 }, { "epoch": 6.4, "learning_rate": 4.680189553266648e-05, "loss": 2.4654, "step": 1291500 }, { "epoch": 6.4, "learning_rate": 4.6800656946240394e-05, "loss": 2.4719, "step": 1292000 }, { "epoch": 6.4, "learning_rate": 4.679941835981431e-05, "loss": 2.4862, "step": 1292500 }, { "epoch": 6.41, "learning_rate": 4.6798182250561087e-05, "loss": 2.4977, "step": 1293000 }, { "epoch": 6.41, "learning_rate": 4.679694614130785e-05, "loss": 2.4832, "step": 1293500 }, { "epoch": 6.41, "learning_rate": 4.6795707554881766e-05, "loss": 2.5195, "step": 1294000 }, { "epoch": 6.41, "learning_rate": 4.679446896845568e-05, "loss": 2.4994, "step": 1294500 }, { "epoch": 6.42, "learning_rate": 4.67932303820296e-05, "loss": 2.4854, "step": 1295000 }, { "epoch": 6.42, "learning_rate": 4.6791991795603516e-05, "loss": 2.4757, "step": 1295500 }, { "epoch": 6.42, "learning_rate": 4.679075320917743e-05, "loss": 2.5, "step": 1296000 }, { "epoch": 6.42, "learning_rate": 4.678951462275134e-05, "loss": 2.4907, "step": 1296500 }, { "epoch": 6.43, "learning_rate": 4.678827603632526e-05, "loss": 2.5016, "step": 1297000 }, { "epoch": 6.43, "learning_rate": 4.678703744989918e-05, "loss": 2.5061, "step": 1297500 }, { "epoch": 6.43, "learning_rate": 4.6785798863473094e-05, "loss": 2.4797, "step": 1298000 }, { "epoch": 6.43, "learning_rate": 4.678456027704701e-05, "loss": 2.5274, "step": 1298500 }, { "epoch": 6.44, "learning_rate": 4.678332169062093e-05, "loss": 2.489, "step": 1299000 }, { "epoch": 6.44, "learning_rate": 4.6782083104194845e-05, "loss": 2.4759, "step": 1299500 }, { "epoch": 6.44, "learning_rate": 4.6780846994941614e-05, "loss": 2.4618, "step": 1300000 }, { "epoch": 6.44, "learning_rate": 4.677961088568838e-05, "loss": 2.4962, "step": 1300500 }, { "epoch": 6.45, "learning_rate": 4.67783722992623e-05, "loss": 2.485, "step": 1301000 }, { "epoch": 6.45, "learning_rate": 4.6777133712836216e-05, "loss": 2.4917, "step": 1301500 }, { "epoch": 6.45, "learning_rate": 4.677589512641013e-05, "loss": 2.4872, "step": 1302000 }, { "epoch": 6.45, "learning_rate": 4.677465653998405e-05, "loss": 2.4978, "step": 1302500 }, { "epoch": 6.46, "learning_rate": 4.677342043073082e-05, "loss": 2.486, "step": 1303000 }, { "epoch": 6.46, "learning_rate": 4.6772181844304736e-05, "loss": 2.5053, "step": 1303500 }, { "epoch": 6.46, "learning_rate": 4.6770945735051505e-05, "loss": 2.4912, "step": 1304000 }, { "epoch": 6.46, "learning_rate": 4.676970714862542e-05, "loss": 2.4926, "step": 1304500 }, { "epoch": 6.47, "learning_rate": 4.676846856219934e-05, "loss": 2.4839, "step": 1305000 }, { "epoch": 6.47, "learning_rate": 4.6767229975773256e-05, "loss": 2.4895, "step": 1305500 }, { "epoch": 6.47, "learning_rate": 4.6765991389347166e-05, "loss": 2.5066, "step": 1306000 }, { "epoch": 6.47, "learning_rate": 4.6764755280093935e-05, "loss": 2.4824, "step": 1306500 }, { "epoch": 6.48, "learning_rate": 4.676351669366785e-05, "loss": 2.482, "step": 1307000 }, { "epoch": 6.48, "learning_rate": 4.676227810724177e-05, "loss": 2.4795, "step": 1307500 }, { "epoch": 6.48, "learning_rate": 4.6761039520815685e-05, "loss": 2.5152, "step": 1308000 }, { "epoch": 6.48, "learning_rate": 4.67598009343896e-05, "loss": 2.4962, "step": 1308500 }, { "epoch": 6.49, "learning_rate": 4.675856234796352e-05, "loss": 2.499, "step": 1309000 }, { "epoch": 6.49, "learning_rate": 4.6757323761537436e-05, "loss": 2.5016, "step": 1309500 }, { "epoch": 6.49, "learning_rate": 4.675608517511135e-05, "loss": 2.5232, "step": 1310000 }, { "epoch": 6.49, "learning_rate": 4.675484658868527e-05, "loss": 2.4946, "step": 1310500 }, { "epoch": 6.5, "learning_rate": 4.675361047943204e-05, "loss": 2.4769, "step": 1311000 }, { "epoch": 6.5, "learning_rate": 4.6752371893005956e-05, "loss": 2.4588, "step": 1311500 }, { "epoch": 6.5, "learning_rate": 4.675113578375272e-05, "loss": 2.514, "step": 1312000 }, { "epoch": 6.5, "learning_rate": 4.6749897197326635e-05, "loss": 2.5118, "step": 1312500 }, { "epoch": 6.51, "learning_rate": 4.674865861090055e-05, "loss": 2.4955, "step": 1313000 }, { "epoch": 6.51, "learning_rate": 4.674742002447447e-05, "loss": 2.4849, "step": 1313500 }, { "epoch": 6.51, "learning_rate": 4.6746181438048385e-05, "loss": 2.4894, "step": 1314000 }, { "epoch": 6.51, "learning_rate": 4.67449428516223e-05, "loss": 2.4816, "step": 1314500 }, { "epoch": 6.51, "learning_rate": 4.674370426519622e-05, "loss": 2.4829, "step": 1315000 }, { "epoch": 6.52, "learning_rate": 4.6742465678770136e-05, "loss": 2.4969, "step": 1315500 }, { "epoch": 6.52, "learning_rate": 4.6741229569516905e-05, "loss": 2.4945, "step": 1316000 }, { "epoch": 6.52, "learning_rate": 4.673999346026367e-05, "loss": 2.4751, "step": 1316500 }, { "epoch": 6.52, "learning_rate": 4.6738754873837584e-05, "loss": 2.5224, "step": 1317000 }, { "epoch": 6.53, "learning_rate": 4.67375162874115e-05, "loss": 2.5149, "step": 1317500 }, { "epoch": 6.53, "learning_rate": 4.673627770098542e-05, "loss": 2.5144, "step": 1318000 }, { "epoch": 6.53, "learning_rate": 4.6735039114559335e-05, "loss": 2.4687, "step": 1318500 }, { "epoch": 6.53, "learning_rate": 4.673380052813325e-05, "loss": 2.4861, "step": 1319000 }, { "epoch": 6.54, "learning_rate": 4.673256441888002e-05, "loss": 2.5114, "step": 1319500 }, { "epoch": 6.54, "learning_rate": 4.673132583245394e-05, "loss": 2.482, "step": 1320000 }, { "epoch": 6.54, "learning_rate": 4.6730087246027854e-05, "loss": 2.5231, "step": 1320500 }, { "epoch": 6.54, "learning_rate": 4.672885113677462e-05, "loss": 2.511, "step": 1321000 }, { "epoch": 6.55, "learning_rate": 4.672761502752139e-05, "loss": 2.512, "step": 1321500 }, { "epoch": 6.55, "learning_rate": 4.672637644109531e-05, "loss": 2.5054, "step": 1322000 }, { "epoch": 6.55, "learning_rate": 4.6725137854669226e-05, "loss": 2.502, "step": 1322500 }, { "epoch": 6.55, "learning_rate": 4.6723901745415995e-05, "loss": 2.4838, "step": 1323000 }, { "epoch": 6.56, "learning_rate": 4.672266315898991e-05, "loss": 2.5259, "step": 1323500 }, { "epoch": 6.56, "learning_rate": 4.672142457256383e-05, "loss": 2.5036, "step": 1324000 }, { "epoch": 6.56, "learning_rate": 4.6720185986137745e-05, "loss": 2.5052, "step": 1324500 }, { "epoch": 6.56, "learning_rate": 4.671894739971166e-05, "loss": 2.4959, "step": 1325000 }, { "epoch": 6.57, "learning_rate": 4.671770881328558e-05, "loss": 2.5092, "step": 1325500 }, { "epoch": 6.57, "learning_rate": 4.6716470226859496e-05, "loss": 2.5141, "step": 1326000 }, { "epoch": 6.57, "learning_rate": 4.671523164043341e-05, "loss": 2.5202, "step": 1326500 }, { "epoch": 6.57, "learning_rate": 4.671399305400732e-05, "loss": 2.4919, "step": 1327000 }, { "epoch": 6.58, "learning_rate": 4.671275446758124e-05, "loss": 2.4807, "step": 1327500 }, { "epoch": 6.58, "learning_rate": 4.671151588115516e-05, "loss": 2.4948, "step": 1328000 }, { "epoch": 6.58, "learning_rate": 4.6710277294729074e-05, "loss": 2.51, "step": 1328500 }, { "epoch": 6.58, "learning_rate": 4.670903870830299e-05, "loss": 2.4887, "step": 1329000 }, { "epoch": 6.59, "learning_rate": 4.67078001218769e-05, "loss": 2.4943, "step": 1329500 }, { "epoch": 6.59, "learning_rate": 4.670656153545082e-05, "loss": 2.5102, "step": 1330000 }, { "epoch": 6.59, "learning_rate": 4.6705322949024735e-05, "loss": 2.4674, "step": 1330500 }, { "epoch": 6.59, "learning_rate": 4.670408436259865e-05, "loss": 2.4995, "step": 1331000 }, { "epoch": 6.6, "learning_rate": 4.670285073051828e-05, "loss": 2.5186, "step": 1331500 }, { "epoch": 6.6, "learning_rate": 4.6701612144092196e-05, "loss": 2.485, "step": 1332000 }, { "epoch": 6.6, "learning_rate": 4.670037355766611e-05, "loss": 2.487, "step": 1332500 }, { "epoch": 6.6, "learning_rate": 4.669913497124003e-05, "loss": 2.5017, "step": 1333000 }, { "epoch": 6.61, "learning_rate": 4.669789886198679e-05, "loss": 2.4821, "step": 1333500 }, { "epoch": 6.61, "learning_rate": 4.669666027556071e-05, "loss": 2.4815, "step": 1334000 }, { "epoch": 6.61, "learning_rate": 4.6695421689134626e-05, "loss": 2.4971, "step": 1334500 }, { "epoch": 6.61, "learning_rate": 4.669418310270854e-05, "loss": 2.5461, "step": 1335000 }, { "epoch": 6.62, "learning_rate": 4.669294451628246e-05, "loss": 2.514, "step": 1335500 }, { "epoch": 6.62, "learning_rate": 4.669170592985638e-05, "loss": 2.5117, "step": 1336000 }, { "epoch": 6.62, "learning_rate": 4.6690467343430294e-05, "loss": 2.5189, "step": 1336500 }, { "epoch": 6.62, "learning_rate": 4.668922875700421e-05, "loss": 2.4931, "step": 1337000 }, { "epoch": 6.63, "learning_rate": 4.668799017057813e-05, "loss": 2.5089, "step": 1337500 }, { "epoch": 6.63, "learning_rate": 4.668675158415204e-05, "loss": 2.5183, "step": 1338000 }, { "epoch": 6.63, "learning_rate": 4.6685512997725955e-05, "loss": 2.5247, "step": 1338500 }, { "epoch": 6.63, "learning_rate": 4.668427441129987e-05, "loss": 2.4885, "step": 1339000 }, { "epoch": 6.64, "learning_rate": 4.668303830204665e-05, "loss": 2.5007, "step": 1339500 }, { "epoch": 6.64, "learning_rate": 4.6681799715620564e-05, "loss": 2.483, "step": 1340000 }, { "epoch": 6.64, "learning_rate": 4.668056112919448e-05, "loss": 2.5025, "step": 1340500 }, { "epoch": 6.64, "learning_rate": 4.667932254276839e-05, "loss": 2.4848, "step": 1341000 }, { "epoch": 6.65, "learning_rate": 4.667808395634231e-05, "loss": 2.5055, "step": 1341500 }, { "epoch": 6.65, "learning_rate": 4.6676845369916225e-05, "loss": 2.4936, "step": 1342000 }, { "epoch": 6.65, "learning_rate": 4.6675609260662994e-05, "loss": 2.5113, "step": 1342500 }, { "epoch": 6.65, "learning_rate": 4.667437315140976e-05, "loss": 2.4663, "step": 1343000 }, { "epoch": 6.66, "learning_rate": 4.667313456498368e-05, "loss": 2.4992, "step": 1343500 }, { "epoch": 6.66, "learning_rate": 4.66718959785576e-05, "loss": 2.4944, "step": 1344000 }, { "epoch": 6.66, "learning_rate": 4.6670657392131514e-05, "loss": 2.4973, "step": 1344500 }, { "epoch": 6.66, "learning_rate": 4.666941880570543e-05, "loss": 2.469, "step": 1345000 }, { "epoch": 6.67, "learning_rate": 4.666818021927935e-05, "loss": 2.5108, "step": 1345500 }, { "epoch": 6.67, "learning_rate": 4.6666941632853264e-05, "loss": 2.4903, "step": 1346000 }, { "epoch": 6.67, "learning_rate": 4.666570304642718e-05, "loss": 2.4975, "step": 1346500 }, { "epoch": 6.67, "learning_rate": 4.66644644600011e-05, "loss": 2.5192, "step": 1347000 }, { "epoch": 6.68, "learning_rate": 4.666322587357501e-05, "loss": 2.5035, "step": 1347500 }, { "epoch": 6.68, "learning_rate": 4.666198976432178e-05, "loss": 2.4976, "step": 1348000 }, { "epoch": 6.68, "learning_rate": 4.6660751177895694e-05, "loss": 2.5002, "step": 1348500 }, { "epoch": 6.68, "learning_rate": 4.665951506864246e-05, "loss": 2.5158, "step": 1349000 }, { "epoch": 6.69, "learning_rate": 4.665827648221638e-05, "loss": 2.4809, "step": 1349500 }, { "epoch": 6.69, "learning_rate": 4.66570378957903e-05, "loss": 2.4874, "step": 1350000 }, { "epoch": 6.69, "learning_rate": 4.6655799309364214e-05, "loss": 2.5005, "step": 1350500 }, { "epoch": 6.69, "learning_rate": 4.665456072293813e-05, "loss": 2.4906, "step": 1351000 }, { "epoch": 6.7, "learning_rate": 4.665332213651205e-05, "loss": 2.5107, "step": 1351500 }, { "epoch": 6.7, "learning_rate": 4.6652083550085964e-05, "loss": 2.4649, "step": 1352000 }, { "epoch": 6.7, "learning_rate": 4.665084496365988e-05, "loss": 2.4886, "step": 1352500 }, { "epoch": 6.7, "learning_rate": 4.66496063772338e-05, "loss": 2.5207, "step": 1353000 }, { "epoch": 6.71, "learning_rate": 4.664837026798056e-05, "loss": 2.4898, "step": 1353500 }, { "epoch": 6.71, "learning_rate": 4.664713168155448e-05, "loss": 2.5024, "step": 1354000 }, { "epoch": 6.71, "learning_rate": 4.6645893095128394e-05, "loss": 2.517, "step": 1354500 }, { "epoch": 6.71, "learning_rate": 4.664465450870231e-05, "loss": 2.4937, "step": 1355000 }, { "epoch": 6.72, "learning_rate": 4.664341592227623e-05, "loss": 2.4872, "step": 1355500 }, { "epoch": 6.72, "learning_rate": 4.6642177335850145e-05, "loss": 2.5017, "step": 1356000 }, { "epoch": 6.72, "learning_rate": 4.6640938749424055e-05, "loss": 2.5051, "step": 1356500 }, { "epoch": 6.72, "learning_rate": 4.663970016299797e-05, "loss": 2.477, "step": 1357000 }, { "epoch": 6.73, "learning_rate": 4.663846157657189e-05, "loss": 2.5128, "step": 1357500 }, { "epoch": 6.73, "learning_rate": 4.6637222990145806e-05, "loss": 2.4908, "step": 1358000 }, { "epoch": 6.73, "learning_rate": 4.663598440371972e-05, "loss": 2.4832, "step": 1358500 }, { "epoch": 6.73, "learning_rate": 4.66347482944665e-05, "loss": 2.4844, "step": 1359000 }, { "epoch": 6.74, "learning_rate": 4.6633509708040415e-05, "loss": 2.5207, "step": 1359500 }, { "epoch": 6.74, "learning_rate": 4.663227359878718e-05, "loss": 2.496, "step": 1360000 }, { "epoch": 6.74, "learning_rate": 4.6631035012361094e-05, "loss": 2.5076, "step": 1360500 }, { "epoch": 6.74, "learning_rate": 4.662979642593501e-05, "loss": 2.4935, "step": 1361000 }, { "epoch": 6.75, "learning_rate": 4.662855783950893e-05, "loss": 2.4879, "step": 1361500 }, { "epoch": 6.75, "learning_rate": 4.6627319253082845e-05, "loss": 2.4986, "step": 1362000 }, { "epoch": 6.75, "learning_rate": 4.6626083143829614e-05, "loss": 2.515, "step": 1362500 }, { "epoch": 6.75, "learning_rate": 4.662484455740353e-05, "loss": 2.501, "step": 1363000 }, { "epoch": 6.76, "learning_rate": 4.662360597097745e-05, "loss": 2.4869, "step": 1363500 }, { "epoch": 6.76, "learning_rate": 4.6622367384551365e-05, "loss": 2.4833, "step": 1364000 }, { "epoch": 6.76, "learning_rate": 4.662113127529813e-05, "loss": 2.4934, "step": 1364500 }, { "epoch": 6.76, "learning_rate": 4.6619895166044896e-05, "loss": 2.48, "step": 1365000 }, { "epoch": 6.77, "learning_rate": 4.661865657961881e-05, "loss": 2.4729, "step": 1365500 }, { "epoch": 6.77, "learning_rate": 4.661741799319273e-05, "loss": 2.4902, "step": 1366000 }, { "epoch": 6.77, "learning_rate": 4.6616179406766646e-05, "loss": 2.5141, "step": 1366500 }, { "epoch": 6.77, "learning_rate": 4.661494082034056e-05, "loss": 2.4894, "step": 1367000 }, { "epoch": 6.78, "learning_rate": 4.661370223391448e-05, "loss": 2.4918, "step": 1367500 }, { "epoch": 6.78, "learning_rate": 4.66124636474884e-05, "loss": 2.5045, "step": 1368000 }, { "epoch": 6.78, "learning_rate": 4.6611225061062314e-05, "loss": 2.4838, "step": 1368500 }, { "epoch": 6.78, "learning_rate": 4.660998647463623e-05, "loss": 2.487, "step": 1369000 }, { "epoch": 6.78, "learning_rate": 4.660874788821015e-05, "loss": 2.5039, "step": 1369500 }, { "epoch": 6.79, "learning_rate": 4.6607509301784065e-05, "loss": 2.5063, "step": 1370000 }, { "epoch": 6.79, "learning_rate": 4.660627071535798e-05, "loss": 2.5151, "step": 1370500 }, { "epoch": 6.79, "learning_rate": 4.6605039560450454e-05, "loss": 2.4945, "step": 1371000 }, { "epoch": 6.79, "learning_rate": 4.660380097402437e-05, "loss": 2.4963, "step": 1371500 }, { "epoch": 6.8, "learning_rate": 4.660256238759829e-05, "loss": 2.5327, "step": 1372000 }, { "epoch": 6.8, "learning_rate": 4.660132627834505e-05, "loss": 2.4942, "step": 1372500 }, { "epoch": 6.8, "learning_rate": 4.660008769191897e-05, "loss": 2.512, "step": 1373000 }, { "epoch": 6.8, "learning_rate": 4.6598849105492884e-05, "loss": 2.4764, "step": 1373500 }, { "epoch": 6.81, "learning_rate": 4.65976105190668e-05, "loss": 2.4803, "step": 1374000 }, { "epoch": 6.81, "learning_rate": 4.659637193264072e-05, "loss": 2.4855, "step": 1374500 }, { "epoch": 6.81, "learning_rate": 4.6595133346214635e-05, "loss": 2.4853, "step": 1375000 }, { "epoch": 6.81, "learning_rate": 4.6593897236961404e-05, "loss": 2.4791, "step": 1375500 }, { "epoch": 6.82, "learning_rate": 4.659265865053532e-05, "loss": 2.4887, "step": 1376000 }, { "epoch": 6.82, "learning_rate": 4.659142006410924e-05, "loss": 2.4982, "step": 1376500 }, { "epoch": 6.82, "learning_rate": 4.6590181477683154e-05, "loss": 2.5017, "step": 1377000 }, { "epoch": 6.82, "learning_rate": 4.658894536842992e-05, "loss": 2.5206, "step": 1377500 }, { "epoch": 6.83, "learning_rate": 4.658770678200384e-05, "loss": 2.4919, "step": 1378000 }, { "epoch": 6.83, "learning_rate": 4.658646819557776e-05, "loss": 2.5036, "step": 1378500 }, { "epoch": 6.83, "learning_rate": 4.658522960915167e-05, "loss": 2.4691, "step": 1379000 }, { "epoch": 6.83, "learning_rate": 4.6583991022725584e-05, "loss": 2.4853, "step": 1379500 }, { "epoch": 6.84, "learning_rate": 4.65827524362995e-05, "loss": 2.5148, "step": 1380000 }, { "epoch": 6.84, "learning_rate": 4.658151384987342e-05, "loss": 2.5151, "step": 1380500 }, { "epoch": 6.84, "learning_rate": 4.6580275263447335e-05, "loss": 2.4836, "step": 1381000 }, { "epoch": 6.84, "learning_rate": 4.657903667702125e-05, "loss": 2.4898, "step": 1381500 }, { "epoch": 6.85, "learning_rate": 4.657779809059517e-05, "loss": 2.4917, "step": 1382000 }, { "epoch": 6.85, "learning_rate": 4.657655950416908e-05, "loss": 2.5036, "step": 1382500 }, { "epoch": 6.85, "learning_rate": 4.6575320917742996e-05, "loss": 2.4992, "step": 1383000 }, { "epoch": 6.85, "learning_rate": 4.657408233131691e-05, "loss": 2.502, "step": 1383500 }, { "epoch": 6.86, "learning_rate": 4.657284374489083e-05, "loss": 2.4944, "step": 1384000 }, { "epoch": 6.86, "learning_rate": 4.657160515846475e-05, "loss": 2.4917, "step": 1384500 }, { "epoch": 6.86, "learning_rate": 4.6570366572038664e-05, "loss": 2.4888, "step": 1385000 }, { "epoch": 6.86, "learning_rate": 4.656912798561258e-05, "loss": 2.485, "step": 1385500 }, { "epoch": 6.87, "learning_rate": 4.65678893991865e-05, "loss": 2.4949, "step": 1386000 }, { "epoch": 6.87, "learning_rate": 4.6566653289933266e-05, "loss": 2.4753, "step": 1386500 }, { "epoch": 6.87, "learning_rate": 4.656541470350718e-05, "loss": 2.5044, "step": 1387000 }, { "epoch": 6.87, "learning_rate": 4.65641761170811e-05, "loss": 2.5088, "step": 1387500 }, { "epoch": 6.88, "learning_rate": 4.656293753065502e-05, "loss": 2.4922, "step": 1388000 }, { "epoch": 6.88, "learning_rate": 4.6561698944228934e-05, "loss": 2.5054, "step": 1388500 }, { "epoch": 6.88, "learning_rate": 4.656046035780285e-05, "loss": 2.4948, "step": 1389000 }, { "epoch": 6.88, "learning_rate": 4.655922177137677e-05, "loss": 2.5002, "step": 1389500 }, { "epoch": 6.89, "learning_rate": 4.655798566212353e-05, "loss": 2.4978, "step": 1390000 }, { "epoch": 6.89, "learning_rate": 4.655674707569745e-05, "loss": 2.5017, "step": 1390500 }, { "epoch": 6.89, "learning_rate": 4.6555508489271364e-05, "loss": 2.4925, "step": 1391000 }, { "epoch": 6.89, "learning_rate": 4.655426990284528e-05, "loss": 2.4932, "step": 1391500 }, { "epoch": 6.9, "learning_rate": 4.655303379359205e-05, "loss": 2.5058, "step": 1392000 }, { "epoch": 6.9, "learning_rate": 4.6551795207165966e-05, "loss": 2.4799, "step": 1392500 }, { "epoch": 6.9, "learning_rate": 4.655055662073988e-05, "loss": 2.5225, "step": 1393000 }, { "epoch": 6.9, "learning_rate": 4.65493180343138e-05, "loss": 2.4859, "step": 1393500 }, { "epoch": 6.91, "learning_rate": 4.654807944788772e-05, "loss": 2.521, "step": 1394000 }, { "epoch": 6.91, "learning_rate": 4.6546840861461634e-05, "loss": 2.4728, "step": 1394500 }, { "epoch": 6.91, "learning_rate": 4.6545604752208396e-05, "loss": 2.4708, "step": 1395000 }, { "epoch": 6.91, "learning_rate": 4.654436616578231e-05, "loss": 2.4922, "step": 1395500 }, { "epoch": 6.92, "learning_rate": 4.654312757935623e-05, "loss": 2.4731, "step": 1396000 }, { "epoch": 6.92, "learning_rate": 4.654188899293015e-05, "loss": 2.4955, "step": 1396500 }, { "epoch": 6.92, "learning_rate": 4.6540650406504064e-05, "loss": 2.4996, "step": 1397000 }, { "epoch": 6.92, "learning_rate": 4.653941182007798e-05, "loss": 2.4988, "step": 1397500 }, { "epoch": 6.93, "learning_rate": 4.65381732336519e-05, "loss": 2.476, "step": 1398000 }, { "epoch": 6.93, "learning_rate": 4.6536934647225815e-05, "loss": 2.487, "step": 1398500 }, { "epoch": 6.93, "learning_rate": 4.6535698537972583e-05, "loss": 2.4695, "step": 1399000 }, { "epoch": 6.93, "learning_rate": 4.65344599515465e-05, "loss": 2.4825, "step": 1399500 }, { "epoch": 6.94, "learning_rate": 4.653322136512042e-05, "loss": 2.4803, "step": 1400000 }, { "epoch": 6.94, "learning_rate": 4.6531982778694334e-05, "loss": 2.4772, "step": 1400500 }, { "epoch": 6.94, "learning_rate": 4.653074419226825e-05, "loss": 2.4992, "step": 1401000 }, { "epoch": 6.94, "learning_rate": 4.652950560584217e-05, "loss": 2.4755, "step": 1401500 }, { "epoch": 6.95, "learning_rate": 4.6528267019416085e-05, "loss": 2.486, "step": 1402000 }, { "epoch": 6.95, "learning_rate": 4.652703091016285e-05, "loss": 2.5144, "step": 1402500 }, { "epoch": 6.95, "learning_rate": 4.652579480090962e-05, "loss": 2.4947, "step": 1403000 }, { "epoch": 6.95, "learning_rate": 4.652455621448354e-05, "loss": 2.4977, "step": 1403500 }, { "epoch": 6.96, "learning_rate": 4.652331762805745e-05, "loss": 2.508, "step": 1404000 }, { "epoch": 6.96, "learning_rate": 4.652207904163137e-05, "loss": 2.5197, "step": 1404500 }, { "epoch": 6.96, "learning_rate": 4.6520840455205284e-05, "loss": 2.4885, "step": 1405000 }, { "epoch": 6.96, "learning_rate": 4.65196018687792e-05, "loss": 2.4976, "step": 1405500 }, { "epoch": 6.97, "learning_rate": 4.651836328235312e-05, "loss": 2.4476, "step": 1406000 }, { "epoch": 6.97, "learning_rate": 4.6517124695927034e-05, "loss": 2.5003, "step": 1406500 }, { "epoch": 6.97, "learning_rate": 4.651588610950095e-05, "loss": 2.4807, "step": 1407000 }, { "epoch": 6.97, "learning_rate": 4.651464752307487e-05, "loss": 2.5, "step": 1407500 }, { "epoch": 6.98, "learning_rate": 4.6513408936648785e-05, "loss": 2.5051, "step": 1408000 }, { "epoch": 6.98, "learning_rate": 4.65121703502227e-05, "loss": 2.4952, "step": 1408500 }, { "epoch": 6.98, "learning_rate": 4.651093176379662e-05, "loss": 2.4864, "step": 1409000 }, { "epoch": 6.98, "learning_rate": 4.650969565454338e-05, "loss": 2.4935, "step": 1409500 }, { "epoch": 6.99, "learning_rate": 4.65084570681173e-05, "loss": 2.5068, "step": 1410000 }, { "epoch": 6.99, "learning_rate": 4.650722095886407e-05, "loss": 2.5097, "step": 1410500 }, { "epoch": 6.99, "learning_rate": 4.6505982372437984e-05, "loss": 2.4744, "step": 1411000 }, { "epoch": 6.99, "learning_rate": 4.65047437860119e-05, "loss": 2.4966, "step": 1411500 }, { "epoch": 7.0, "learning_rate": 4.650350519958582e-05, "loss": 2.4788, "step": 1412000 }, { "epoch": 7.0, "learning_rate": 4.6502266613159735e-05, "loss": 2.4876, "step": 1412500 }, { "epoch": 7.0, "eval_accuracy": 0.6390809050019882, "eval_accuracy_mlm": 0.5919321078827258, "eval_accuracy_nsp": 0.8611933683454986, "eval_loss": 2.4429469108581543, "eval_runtime": 145.8536, "eval_samples_per_second": 1748.048, "eval_steps_per_second": 72.84, "step": 1412901 }, { "epoch": 7.0, "learning_rate": 4.650102802673365e-05, "loss": 2.4845, "step": 1413000 }, { "epoch": 7.0, "learning_rate": 4.649978944030757e-05, "loss": 2.4572, "step": 1413500 }, { "epoch": 7.01, "learning_rate": 4.6498550853881485e-05, "loss": 2.4539, "step": 1414000 }, { "epoch": 7.01, "learning_rate": 4.6497314744628254e-05, "loss": 2.4568, "step": 1414500 }, { "epoch": 7.01, "learning_rate": 4.6496076158202164e-05, "loss": 2.4616, "step": 1415000 }, { "epoch": 7.01, "learning_rate": 4.649483757177608e-05, "loss": 2.4551, "step": 1415500 }, { "epoch": 7.02, "learning_rate": 4.649360146252286e-05, "loss": 2.4791, "step": 1416000 }, { "epoch": 7.02, "learning_rate": 4.649236287609677e-05, "loss": 2.481, "step": 1416500 }, { "epoch": 7.02, "learning_rate": 4.6491124289670684e-05, "loss": 2.4784, "step": 1417000 }, { "epoch": 7.02, "learning_rate": 4.64898857032446e-05, "loss": 2.467, "step": 1417500 }, { "epoch": 7.03, "learning_rate": 4.648864711681852e-05, "loss": 2.4805, "step": 1418000 }, { "epoch": 7.03, "learning_rate": 4.6487408530392435e-05, "loss": 2.4275, "step": 1418500 }, { "epoch": 7.03, "learning_rate": 4.648616994396635e-05, "loss": 2.4766, "step": 1419000 }, { "epoch": 7.03, "learning_rate": 4.648493135754027e-05, "loss": 2.4559, "step": 1419500 }, { "epoch": 7.04, "learning_rate": 4.6483692771114185e-05, "loss": 2.5089, "step": 1420000 }, { "epoch": 7.04, "learning_rate": 4.64824541846881e-05, "loss": 2.4865, "step": 1420500 }, { "epoch": 7.04, "learning_rate": 4.648121559826202e-05, "loss": 2.4508, "step": 1421000 }, { "epoch": 7.04, "learning_rate": 4.6479977011835936e-05, "loss": 2.4655, "step": 1421500 }, { "epoch": 7.05, "learning_rate": 4.647873842540985e-05, "loss": 2.4671, "step": 1422000 }, { "epoch": 7.05, "learning_rate": 4.647749983898377e-05, "loss": 2.4695, "step": 1422500 }, { "epoch": 7.05, "learning_rate": 4.647626372973053e-05, "loss": 2.4488, "step": 1423000 }, { "epoch": 7.05, "learning_rate": 4.647502514330445e-05, "loss": 2.4521, "step": 1423500 }, { "epoch": 7.05, "learning_rate": 4.647378903405122e-05, "loss": 2.4709, "step": 1424000 }, { "epoch": 7.06, "learning_rate": 4.647255540197084e-05, "loss": 2.4587, "step": 1424500 }, { "epoch": 7.06, "learning_rate": 4.6471316815544755e-05, "loss": 2.4739, "step": 1425000 }, { "epoch": 7.06, "learning_rate": 4.647007822911867e-05, "loss": 2.4977, "step": 1425500 }, { "epoch": 7.06, "learning_rate": 4.646883964269259e-05, "loss": 2.4725, "step": 1426000 }, { "epoch": 7.07, "learning_rate": 4.6467601056266506e-05, "loss": 2.4716, "step": 1426500 }, { "epoch": 7.07, "learning_rate": 4.646636246984042e-05, "loss": 2.4874, "step": 1427000 }, { "epoch": 7.07, "learning_rate": 4.646512388341434e-05, "loss": 2.4674, "step": 1427500 }, { "epoch": 7.07, "learning_rate": 4.646388529698826e-05, "loss": 2.4617, "step": 1428000 }, { "epoch": 7.08, "learning_rate": 4.6462646710562174e-05, "loss": 2.4703, "step": 1428500 }, { "epoch": 7.08, "learning_rate": 4.6461408124136084e-05, "loss": 2.4612, "step": 1429000 }, { "epoch": 7.08, "learning_rate": 4.646016953771e-05, "loss": 2.4505, "step": 1429500 }, { "epoch": 7.08, "learning_rate": 4.645893095128392e-05, "loss": 2.4645, "step": 1430000 }, { "epoch": 7.09, "learning_rate": 4.6457692364857835e-05, "loss": 2.4425, "step": 1430500 }, { "epoch": 7.09, "learning_rate": 4.645645377843175e-05, "loss": 2.4478, "step": 1431000 }, { "epoch": 7.09, "learning_rate": 4.645521519200567e-05, "loss": 2.4645, "step": 1431500 }, { "epoch": 7.09, "learning_rate": 4.6453979082752444e-05, "loss": 2.4471, "step": 1432000 }, { "epoch": 7.1, "learning_rate": 4.6452740496326354e-05, "loss": 2.4378, "step": 1432500 }, { "epoch": 7.1, "learning_rate": 4.645150190990027e-05, "loss": 2.4872, "step": 1433000 }, { "epoch": 7.1, "learning_rate": 4.645026332347419e-05, "loss": 2.4629, "step": 1433500 }, { "epoch": 7.1, "learning_rate": 4.6449024737048105e-05, "loss": 2.4646, "step": 1434000 }, { "epoch": 7.11, "learning_rate": 4.644778615062202e-05, "loss": 2.4917, "step": 1434500 }, { "epoch": 7.11, "learning_rate": 4.644654756419594e-05, "loss": 2.4704, "step": 1435000 }, { "epoch": 7.11, "learning_rate": 4.644530897776985e-05, "loss": 2.4439, "step": 1435500 }, { "epoch": 7.11, "learning_rate": 4.6444070391343766e-05, "loss": 2.458, "step": 1436000 }, { "epoch": 7.12, "learning_rate": 4.6442834282090535e-05, "loss": 2.4867, "step": 1436500 }, { "epoch": 7.12, "learning_rate": 4.644159569566445e-05, "loss": 2.4849, "step": 1437000 }, { "epoch": 7.12, "learning_rate": 4.644035710923837e-05, "loss": 2.4636, "step": 1437500 }, { "epoch": 7.12, "learning_rate": 4.6439118522812286e-05, "loss": 2.4742, "step": 1438000 }, { "epoch": 7.13, "learning_rate": 4.643788736790476e-05, "loss": 2.4822, "step": 1438500 }, { "epoch": 7.13, "learning_rate": 4.6436648781478675e-05, "loss": 2.4591, "step": 1439000 }, { "epoch": 7.13, "learning_rate": 4.643541019505259e-05, "loss": 2.4727, "step": 1439500 }, { "epoch": 7.13, "learning_rate": 4.643417408579936e-05, "loss": 2.4814, "step": 1440000 }, { "epoch": 7.14, "learning_rate": 4.643293549937328e-05, "loss": 2.4816, "step": 1440500 }, { "epoch": 7.14, "learning_rate": 4.6431696912947195e-05, "loss": 2.4745, "step": 1441000 }, { "epoch": 7.14, "learning_rate": 4.643045832652111e-05, "loss": 2.467, "step": 1441500 }, { "epoch": 7.14, "learning_rate": 4.642921974009503e-05, "loss": 2.4839, "step": 1442000 }, { "epoch": 7.15, "learning_rate": 4.6427981153668946e-05, "loss": 2.4538, "step": 1442500 }, { "epoch": 7.15, "learning_rate": 4.6426742567242856e-05, "loss": 2.4355, "step": 1443000 }, { "epoch": 7.15, "learning_rate": 4.642550398081677e-05, "loss": 2.479, "step": 1443500 }, { "epoch": 7.15, "learning_rate": 4.642426539439069e-05, "loss": 2.4619, "step": 1444000 }, { "epoch": 7.16, "learning_rate": 4.642302680796461e-05, "loss": 2.4853, "step": 1444500 }, { "epoch": 7.16, "learning_rate": 4.6421788221538524e-05, "loss": 2.4502, "step": 1445000 }, { "epoch": 7.16, "learning_rate": 4.642054963511244e-05, "loss": 2.4703, "step": 1445500 }, { "epoch": 7.16, "learning_rate": 4.641931104868636e-05, "loss": 2.4679, "step": 1446000 }, { "epoch": 7.17, "learning_rate": 4.6418072462260274e-05, "loss": 2.4749, "step": 1446500 }, { "epoch": 7.17, "learning_rate": 4.641683387583419e-05, "loss": 2.4873, "step": 1447000 }, { "epoch": 7.17, "learning_rate": 4.641559528940811e-05, "loss": 2.4585, "step": 1447500 }, { "epoch": 7.17, "learning_rate": 4.641435670298202e-05, "loss": 2.4642, "step": 1448000 }, { "epoch": 7.18, "learning_rate": 4.6413120593728794e-05, "loss": 2.4696, "step": 1448500 }, { "epoch": 7.18, "learning_rate": 4.641188200730271e-05, "loss": 2.4597, "step": 1449000 }, { "epoch": 7.18, "learning_rate": 4.641064342087663e-05, "loss": 2.4897, "step": 1449500 }, { "epoch": 7.18, "learning_rate": 4.640940731162339e-05, "loss": 2.4707, "step": 1450000 }, { "epoch": 7.19, "learning_rate": 4.640816872519731e-05, "loss": 2.4917, "step": 1450500 }, { "epoch": 7.19, "learning_rate": 4.6406930138771224e-05, "loss": 2.4877, "step": 1451000 }, { "epoch": 7.19, "learning_rate": 4.640569155234514e-05, "loss": 2.4417, "step": 1451500 }, { "epoch": 7.19, "learning_rate": 4.640445544309191e-05, "loss": 2.4632, "step": 1452000 }, { "epoch": 7.2, "learning_rate": 4.6403216856665826e-05, "loss": 2.5164, "step": 1452500 }, { "epoch": 7.2, "learning_rate": 4.640197827023974e-05, "loss": 2.476, "step": 1453000 }, { "epoch": 7.2, "learning_rate": 4.640073968381366e-05, "loss": 2.4726, "step": 1453500 }, { "epoch": 7.2, "learning_rate": 4.639950357456043e-05, "loss": 2.4642, "step": 1454000 }, { "epoch": 7.21, "learning_rate": 4.6398264988134346e-05, "loss": 2.4863, "step": 1454500 }, { "epoch": 7.21, "learning_rate": 4.639702640170826e-05, "loss": 2.4524, "step": 1455000 }, { "epoch": 7.21, "learning_rate": 4.639578781528218e-05, "loss": 2.4725, "step": 1455500 }, { "epoch": 7.21, "learning_rate": 4.639455170602894e-05, "loss": 2.4505, "step": 1456000 }, { "epoch": 7.22, "learning_rate": 4.639331311960286e-05, "loss": 2.4743, "step": 1456500 }, { "epoch": 7.22, "learning_rate": 4.6392074533176776e-05, "loss": 2.4581, "step": 1457000 }, { "epoch": 7.22, "learning_rate": 4.639083594675069e-05, "loss": 2.4725, "step": 1457500 }, { "epoch": 7.22, "learning_rate": 4.638959736032461e-05, "loss": 2.4704, "step": 1458000 }, { "epoch": 7.23, "learning_rate": 4.6388358773898526e-05, "loss": 2.4718, "step": 1458500 }, { "epoch": 7.23, "learning_rate": 4.638712018747244e-05, "loss": 2.4867, "step": 1459000 }, { "epoch": 7.23, "learning_rate": 4.638588407821921e-05, "loss": 2.4673, "step": 1459500 }, { "epoch": 7.23, "learning_rate": 4.638464549179313e-05, "loss": 2.4954, "step": 1460000 }, { "epoch": 7.24, "learning_rate": 4.6383406905367046e-05, "loss": 2.4766, "step": 1460500 }, { "epoch": 7.24, "learning_rate": 4.638216831894096e-05, "loss": 2.4681, "step": 1461000 }, { "epoch": 7.24, "learning_rate": 4.638092973251488e-05, "loss": 2.4931, "step": 1461500 }, { "epoch": 7.24, "learning_rate": 4.63796911460888e-05, "loss": 2.4671, "step": 1462000 }, { "epoch": 7.25, "learning_rate": 4.6378452559662714e-05, "loss": 2.4901, "step": 1462500 }, { "epoch": 7.25, "learning_rate": 4.637721397323663e-05, "loss": 2.4586, "step": 1463000 }, { "epoch": 7.25, "learning_rate": 4.637597538681054e-05, "loss": 2.4707, "step": 1463500 }, { "epoch": 7.25, "learning_rate": 4.637474175473016e-05, "loss": 2.4735, "step": 1464000 }, { "epoch": 7.26, "learning_rate": 4.637350316830408e-05, "loss": 2.4678, "step": 1464500 }, { "epoch": 7.26, "learning_rate": 4.6372264581877995e-05, "loss": 2.485, "step": 1465000 }, { "epoch": 7.26, "learning_rate": 4.637102599545191e-05, "loss": 2.4831, "step": 1465500 }, { "epoch": 7.26, "learning_rate": 4.636978740902583e-05, "loss": 2.4813, "step": 1466000 }, { "epoch": 7.27, "learning_rate": 4.6368548822599746e-05, "loss": 2.4825, "step": 1466500 }, { "epoch": 7.27, "learning_rate": 4.6367312713346515e-05, "loss": 2.4867, "step": 1467000 }, { "epoch": 7.27, "learning_rate": 4.6366074126920425e-05, "loss": 2.4784, "step": 1467500 }, { "epoch": 7.27, "learning_rate": 4.636483554049434e-05, "loss": 2.4478, "step": 1468000 }, { "epoch": 7.28, "learning_rate": 4.636359695406826e-05, "loss": 2.4608, "step": 1468500 }, { "epoch": 7.28, "learning_rate": 4.6362358367642176e-05, "loss": 2.4759, "step": 1469000 }, { "epoch": 7.28, "learning_rate": 4.636111978121609e-05, "loss": 2.4748, "step": 1469500 }, { "epoch": 7.28, "learning_rate": 4.635988119479001e-05, "loss": 2.4616, "step": 1470000 }, { "epoch": 7.29, "learning_rate": 4.635864260836393e-05, "loss": 2.4663, "step": 1470500 }, { "epoch": 7.29, "learning_rate": 4.6357404021937844e-05, "loss": 2.4643, "step": 1471000 }, { "epoch": 7.29, "learning_rate": 4.635616543551176e-05, "loss": 2.482, "step": 1471500 }, { "epoch": 7.29, "learning_rate": 4.635492932625853e-05, "loss": 2.4848, "step": 1472000 }, { "epoch": 7.3, "learning_rate": 4.6353690739832446e-05, "loss": 2.4771, "step": 1472500 }, { "epoch": 7.3, "learning_rate": 4.635245215340636e-05, "loss": 2.4642, "step": 1473000 }, { "epoch": 7.3, "learning_rate": 4.635121356698028e-05, "loss": 2.4742, "step": 1473500 }, { "epoch": 7.3, "learning_rate": 4.63499749805542e-05, "loss": 2.4884, "step": 1474000 }, { "epoch": 7.31, "learning_rate": 4.6348736394128114e-05, "loss": 2.4658, "step": 1474500 }, { "epoch": 7.31, "learning_rate": 4.634749780770203e-05, "loss": 2.49, "step": 1475000 }, { "epoch": 7.31, "learning_rate": 4.634625922127595e-05, "loss": 2.4885, "step": 1475500 }, { "epoch": 7.31, "learning_rate": 4.634502558919557e-05, "loss": 2.4919, "step": 1476000 }, { "epoch": 7.32, "learning_rate": 4.634378700276948e-05, "loss": 2.4588, "step": 1476500 }, { "epoch": 7.32, "learning_rate": 4.6342548416343396e-05, "loss": 2.4474, "step": 1477000 }, { "epoch": 7.32, "learning_rate": 4.634130982991731e-05, "loss": 2.4867, "step": 1477500 }, { "epoch": 7.32, "learning_rate": 4.634007124349123e-05, "loss": 2.4749, "step": 1478000 }, { "epoch": 7.33, "learning_rate": 4.6338832657065146e-05, "loss": 2.4603, "step": 1478500 }, { "epoch": 7.33, "learning_rate": 4.633759407063906e-05, "loss": 2.4551, "step": 1479000 }, { "epoch": 7.33, "learning_rate": 4.633635548421298e-05, "loss": 2.4716, "step": 1479500 }, { "epoch": 7.33, "learning_rate": 4.633511937495974e-05, "loss": 2.4879, "step": 1480000 }, { "epoch": 7.33, "learning_rate": 4.633388078853366e-05, "loss": 2.4664, "step": 1480500 }, { "epoch": 7.34, "learning_rate": 4.6332642202107576e-05, "loss": 2.4596, "step": 1481000 }, { "epoch": 7.34, "learning_rate": 4.633140361568149e-05, "loss": 2.5133, "step": 1481500 }, { "epoch": 7.34, "learning_rate": 4.633016502925541e-05, "loss": 2.4767, "step": 1482000 }, { "epoch": 7.34, "learning_rate": 4.632892644282933e-05, "loss": 2.4489, "step": 1482500 }, { "epoch": 7.35, "learning_rate": 4.6327687856403244e-05, "loss": 2.4446, "step": 1483000 }, { "epoch": 7.35, "learning_rate": 4.632644926997716e-05, "loss": 2.4838, "step": 1483500 }, { "epoch": 7.35, "learning_rate": 4.632521563789679e-05, "loss": 2.4423, "step": 1484000 }, { "epoch": 7.35, "learning_rate": 4.63239770514707e-05, "loss": 2.4774, "step": 1484500 }, { "epoch": 7.36, "learning_rate": 4.6322738465044615e-05, "loss": 2.4497, "step": 1485000 }, { "epoch": 7.36, "learning_rate": 4.632149987861853e-05, "loss": 2.4882, "step": 1485500 }, { "epoch": 7.36, "learning_rate": 4.63202637693653e-05, "loss": 2.4747, "step": 1486000 }, { "epoch": 7.36, "learning_rate": 4.631902518293922e-05, "loss": 2.4873, "step": 1486500 }, { "epoch": 7.37, "learning_rate": 4.6317786596513135e-05, "loss": 2.4719, "step": 1487000 }, { "epoch": 7.37, "learning_rate": 4.631654801008705e-05, "loss": 2.466, "step": 1487500 }, { "epoch": 7.37, "learning_rate": 4.631530942366097e-05, "loss": 2.5012, "step": 1488000 }, { "epoch": 7.37, "learning_rate": 4.6314070837234886e-05, "loss": 2.4666, "step": 1488500 }, { "epoch": 7.38, "learning_rate": 4.6312832250808796e-05, "loss": 2.476, "step": 1489000 }, { "epoch": 7.38, "learning_rate": 4.631159366438271e-05, "loss": 2.4908, "step": 1489500 }, { "epoch": 7.38, "learning_rate": 4.631035507795663e-05, "loss": 2.4713, "step": 1490000 }, { "epoch": 7.38, "learning_rate": 4.630911649153055e-05, "loss": 2.4714, "step": 1490500 }, { "epoch": 7.39, "learning_rate": 4.6307877905104464e-05, "loss": 2.4822, "step": 1491000 }, { "epoch": 7.39, "learning_rate": 4.630663931867838e-05, "loss": 2.468, "step": 1491500 }, { "epoch": 7.39, "learning_rate": 4.63054007322523e-05, "loss": 2.4597, "step": 1492000 }, { "epoch": 7.39, "learning_rate": 4.6304162145826214e-05, "loss": 2.4886, "step": 1492500 }, { "epoch": 7.4, "learning_rate": 4.630292355940013e-05, "loss": 2.4621, "step": 1493000 }, { "epoch": 7.4, "learning_rate": 4.630168745014689e-05, "loss": 2.4916, "step": 1493500 }, { "epoch": 7.4, "learning_rate": 4.630044886372081e-05, "loss": 2.5105, "step": 1494000 }, { "epoch": 7.4, "learning_rate": 4.629921027729473e-05, "loss": 2.4965, "step": 1494500 }, { "epoch": 7.41, "learning_rate": 4.62979741680415e-05, "loss": 2.4962, "step": 1495000 }, { "epoch": 7.41, "learning_rate": 4.629673558161541e-05, "loss": 2.4789, "step": 1495500 }, { "epoch": 7.41, "learning_rate": 4.629549947236219e-05, "loss": 2.4785, "step": 1496000 }, { "epoch": 7.41, "learning_rate": 4.6294260885936105e-05, "loss": 2.4689, "step": 1496500 }, { "epoch": 7.42, "learning_rate": 4.629302229951002e-05, "loss": 2.5094, "step": 1497000 }, { "epoch": 7.42, "learning_rate": 4.629178371308394e-05, "loss": 2.4664, "step": 1497500 }, { "epoch": 7.42, "learning_rate": 4.629054512665785e-05, "loss": 2.4711, "step": 1498000 }, { "epoch": 7.42, "learning_rate": 4.6289306540231766e-05, "loss": 2.4982, "step": 1498500 }, { "epoch": 7.43, "learning_rate": 4.628806795380568e-05, "loss": 2.4652, "step": 1499000 }, { "epoch": 7.43, "learning_rate": 4.62868293673796e-05, "loss": 2.464, "step": 1499500 }, { "epoch": 7.43, "learning_rate": 4.628559078095352e-05, "loss": 2.4816, "step": 1500000 }, { "epoch": 7.43, "learning_rate": 4.628435219452743e-05, "loss": 2.461, "step": 1500500 }, { "epoch": 7.44, "learning_rate": 4.6283113608101344e-05, "loss": 2.4604, "step": 1501000 }, { "epoch": 7.44, "learning_rate": 4.628187502167526e-05, "loss": 2.4845, "step": 1501500 }, { "epoch": 7.44, "learning_rate": 4.628063643524918e-05, "loss": 2.4844, "step": 1502000 }, { "epoch": 7.44, "learning_rate": 4.6279397848823095e-05, "loss": 2.4942, "step": 1502500 }, { "epoch": 7.45, "learning_rate": 4.627815926239701e-05, "loss": 2.4517, "step": 1503000 }, { "epoch": 7.45, "learning_rate": 4.627692067597093e-05, "loss": 2.4613, "step": 1503500 }, { "epoch": 7.45, "learning_rate": 4.62756845667177e-05, "loss": 2.4785, "step": 1504000 }, { "epoch": 7.45, "learning_rate": 4.6274445980291615e-05, "loss": 2.4746, "step": 1504500 }, { "epoch": 7.46, "learning_rate": 4.627320739386553e-05, "loss": 2.4834, "step": 1505000 }, { "epoch": 7.46, "learning_rate": 4.627196880743945e-05, "loss": 2.5137, "step": 1505500 }, { "epoch": 7.46, "learning_rate": 4.6270730221013365e-05, "loss": 2.4748, "step": 1506000 }, { "epoch": 7.46, "learning_rate": 4.626949163458728e-05, "loss": 2.459, "step": 1506500 }, { "epoch": 7.47, "learning_rate": 4.62682530481612e-05, "loss": 2.4794, "step": 1507000 }, { "epoch": 7.47, "learning_rate": 4.6267014461735116e-05, "loss": 2.4742, "step": 1507500 }, { "epoch": 7.47, "learning_rate": 4.626577587530903e-05, "loss": 2.4774, "step": 1508000 }, { "epoch": 7.47, "learning_rate": 4.626453728888295e-05, "loss": 2.4662, "step": 1508500 }, { "epoch": 7.48, "learning_rate": 4.626330117962971e-05, "loss": 2.4627, "step": 1509000 }, { "epoch": 7.48, "learning_rate": 4.626206259320363e-05, "loss": 2.4786, "step": 1509500 }, { "epoch": 7.48, "learning_rate": 4.62608264839504e-05, "loss": 2.4726, "step": 1510000 }, { "epoch": 7.48, "learning_rate": 4.6259590374697173e-05, "loss": 2.4954, "step": 1510500 }, { "epoch": 7.49, "learning_rate": 4.6258351788271084e-05, "loss": 2.4493, "step": 1511000 }, { "epoch": 7.49, "learning_rate": 4.6257113201845e-05, "loss": 2.4704, "step": 1511500 }, { "epoch": 7.49, "learning_rate": 4.625587461541892e-05, "loss": 2.4773, "step": 1512000 }, { "epoch": 7.49, "learning_rate": 4.6254636028992834e-05, "loss": 2.4571, "step": 1512500 }, { "epoch": 7.5, "learning_rate": 4.62533999197396e-05, "loss": 2.4826, "step": 1513000 }, { "epoch": 7.5, "learning_rate": 4.625216133331352e-05, "loss": 2.4711, "step": 1513500 }, { "epoch": 7.5, "learning_rate": 4.625092274688743e-05, "loss": 2.5154, "step": 1514000 }, { "epoch": 7.5, "learning_rate": 4.624968416046135e-05, "loss": 2.4689, "step": 1514500 }, { "epoch": 7.51, "learning_rate": 4.624844805120812e-05, "loss": 2.4558, "step": 1515000 }, { "epoch": 7.51, "learning_rate": 4.624720946478204e-05, "loss": 2.489, "step": 1515500 }, { "epoch": 7.51, "learning_rate": 4.624597087835596e-05, "loss": 2.4775, "step": 1516000 }, { "epoch": 7.51, "learning_rate": 4.6244732291929874e-05, "loss": 2.5, "step": 1516500 }, { "epoch": 7.52, "learning_rate": 4.6243496182676636e-05, "loss": 2.5144, "step": 1517000 }, { "epoch": 7.52, "learning_rate": 4.624225759625055e-05, "loss": 2.4776, "step": 1517500 }, { "epoch": 7.52, "learning_rate": 4.624101900982447e-05, "loss": 2.5141, "step": 1518000 }, { "epoch": 7.52, "learning_rate": 4.6239780423398386e-05, "loss": 2.4846, "step": 1518500 }, { "epoch": 7.53, "learning_rate": 4.62385418369723e-05, "loss": 2.4617, "step": 1519000 }, { "epoch": 7.53, "learning_rate": 4.623730325054622e-05, "loss": 2.4419, "step": 1519500 }, { "epoch": 7.53, "learning_rate": 4.623606466412014e-05, "loss": 2.4669, "step": 1520000 }, { "epoch": 7.53, "learning_rate": 4.623482607769405e-05, "loss": 2.4905, "step": 1520500 }, { "epoch": 7.54, "learning_rate": 4.6233587491267964e-05, "loss": 2.4972, "step": 1521000 }, { "epoch": 7.54, "learning_rate": 4.623234890484188e-05, "loss": 2.4975, "step": 1521500 }, { "epoch": 7.54, "learning_rate": 4.62311103184158e-05, "loss": 2.4753, "step": 1522000 }, { "epoch": 7.54, "learning_rate": 4.6229871731989715e-05, "loss": 2.4884, "step": 1522500 }, { "epoch": 7.55, "learning_rate": 4.622863314556363e-05, "loss": 2.4908, "step": 1523000 }, { "epoch": 7.55, "learning_rate": 4.62273970363104e-05, "loss": 2.4883, "step": 1523500 }, { "epoch": 7.55, "learning_rate": 4.622615844988432e-05, "loss": 2.48, "step": 1524000 }, { "epoch": 7.55, "learning_rate": 4.6224919863458235e-05, "loss": 2.4724, "step": 1524500 }, { "epoch": 7.56, "learning_rate": 4.622368127703215e-05, "loss": 2.4895, "step": 1525000 }, { "epoch": 7.56, "learning_rate": 4.622244516777892e-05, "loss": 2.4562, "step": 1525500 }, { "epoch": 7.56, "learning_rate": 4.622120658135284e-05, "loss": 2.4682, "step": 1526000 }, { "epoch": 7.56, "learning_rate": 4.621996799492675e-05, "loss": 2.4951, "step": 1526500 }, { "epoch": 7.57, "learning_rate": 4.6218729408500664e-05, "loss": 2.4807, "step": 1527000 }, { "epoch": 7.57, "learning_rate": 4.621749082207458e-05, "loss": 2.51, "step": 1527500 }, { "epoch": 7.57, "learning_rate": 4.621625471282136e-05, "loss": 2.465, "step": 1528000 }, { "epoch": 7.57, "learning_rate": 4.621501860356812e-05, "loss": 2.4833, "step": 1528500 }, { "epoch": 7.58, "learning_rate": 4.6213780017142036e-05, "loss": 2.4647, "step": 1529000 }, { "epoch": 7.58, "learning_rate": 4.621254143071595e-05, "loss": 2.461, "step": 1529500 }, { "epoch": 7.58, "learning_rate": 4.621130284428987e-05, "loss": 2.4943, "step": 1530000 }, { "epoch": 7.58, "learning_rate": 4.6210064257863787e-05, "loss": 2.4836, "step": 1530500 }, { "epoch": 7.59, "learning_rate": 4.6208825671437704e-05, "loss": 2.4815, "step": 1531000 }, { "epoch": 7.59, "learning_rate": 4.620758956218447e-05, "loss": 2.4911, "step": 1531500 }, { "epoch": 7.59, "learning_rate": 4.620635097575839e-05, "loss": 2.5123, "step": 1532000 }, { "epoch": 7.59, "learning_rate": 4.6205112389332306e-05, "loss": 2.4855, "step": 1532500 }, { "epoch": 7.6, "learning_rate": 4.620387380290622e-05, "loss": 2.4682, "step": 1533000 }, { "epoch": 7.6, "learning_rate": 4.620263521648014e-05, "loss": 2.4504, "step": 1533500 }, { "epoch": 7.6, "learning_rate": 4.620139663005406e-05, "loss": 2.4983, "step": 1534000 }, { "epoch": 7.6, "learning_rate": 4.6200158043627974e-05, "loss": 2.444, "step": 1534500 }, { "epoch": 7.6, "learning_rate": 4.619891945720189e-05, "loss": 2.4729, "step": 1535000 }, { "epoch": 7.61, "learning_rate": 4.619768087077581e-05, "loss": 2.4705, "step": 1535500 }, { "epoch": 7.61, "learning_rate": 4.619644476152257e-05, "loss": 2.4669, "step": 1536000 }, { "epoch": 7.61, "learning_rate": 4.619520617509649e-05, "loss": 2.4638, "step": 1536500 }, { "epoch": 7.61, "learning_rate": 4.6193967588670404e-05, "loss": 2.4943, "step": 1537000 }, { "epoch": 7.62, "learning_rate": 4.6192733956590024e-05, "loss": 2.4793, "step": 1537500 }, { "epoch": 7.62, "learning_rate": 4.619149537016394e-05, "loss": 2.4636, "step": 1538000 }, { "epoch": 7.62, "learning_rate": 4.619025678373786e-05, "loss": 2.4792, "step": 1538500 }, { "epoch": 7.62, "learning_rate": 4.6189018197311775e-05, "loss": 2.4809, "step": 1539000 }, { "epoch": 7.63, "learning_rate": 4.618777961088569e-05, "loss": 2.4845, "step": 1539500 }, { "epoch": 7.63, "learning_rate": 4.618654102445961e-05, "loss": 2.4399, "step": 1540000 }, { "epoch": 7.63, "learning_rate": 4.6185302438033526e-05, "loss": 2.4847, "step": 1540500 }, { "epoch": 7.63, "learning_rate": 4.618406385160744e-05, "loss": 2.4646, "step": 1541000 }, { "epoch": 7.64, "learning_rate": 4.618282526518136e-05, "loss": 2.4743, "step": 1541500 }, { "epoch": 7.64, "learning_rate": 4.618158667875527e-05, "loss": 2.4826, "step": 1542000 }, { "epoch": 7.64, "learning_rate": 4.618034809232919e-05, "loss": 2.5099, "step": 1542500 }, { "epoch": 7.64, "learning_rate": 4.6179109505903104e-05, "loss": 2.478, "step": 1543000 }, { "epoch": 7.65, "learning_rate": 4.617787091947702e-05, "loss": 2.4752, "step": 1543500 }, { "epoch": 7.65, "learning_rate": 4.617663481022379e-05, "loss": 2.4662, "step": 1544000 }, { "epoch": 7.65, "learning_rate": 4.6175396223797706e-05, "loss": 2.48, "step": 1544500 }, { "epoch": 7.65, "learning_rate": 4.617415763737162e-05, "loss": 2.498, "step": 1545000 }, { "epoch": 7.66, "learning_rate": 4.617291905094554e-05, "loss": 2.4554, "step": 1545500 }, { "epoch": 7.66, "learning_rate": 4.617168046451946e-05, "loss": 2.481, "step": 1546000 }, { "epoch": 7.66, "learning_rate": 4.6170444355266226e-05, "loss": 2.4825, "step": 1546500 }, { "epoch": 7.66, "learning_rate": 4.616920576884014e-05, "loss": 2.4594, "step": 1547000 }, { "epoch": 7.67, "learning_rate": 4.616796718241406e-05, "loss": 2.4847, "step": 1547500 }, { "epoch": 7.67, "learning_rate": 4.616672859598798e-05, "loss": 2.4623, "step": 1548000 }, { "epoch": 7.67, "learning_rate": 4.616549000956189e-05, "loss": 2.4656, "step": 1548500 }, { "epoch": 7.67, "learning_rate": 4.6164251423135804e-05, "loss": 2.4649, "step": 1549000 }, { "epoch": 7.68, "learning_rate": 4.616301531388257e-05, "loss": 2.474, "step": 1549500 }, { "epoch": 7.68, "learning_rate": 4.616177672745649e-05, "loss": 2.4778, "step": 1550000 }, { "epoch": 7.68, "learning_rate": 4.616054061820326e-05, "loss": 2.4788, "step": 1550500 }, { "epoch": 7.68, "learning_rate": 4.6159302031777175e-05, "loss": 2.4836, "step": 1551000 }, { "epoch": 7.69, "learning_rate": 4.615806344535109e-05, "loss": 2.4977, "step": 1551500 }, { "epoch": 7.69, "learning_rate": 4.615682733609786e-05, "loss": 2.4888, "step": 1552000 }, { "epoch": 7.69, "learning_rate": 4.615558874967177e-05, "loss": 2.4875, "step": 1552500 }, { "epoch": 7.69, "learning_rate": 4.61543551175914e-05, "loss": 2.503, "step": 1553000 }, { "epoch": 7.7, "learning_rate": 4.6153116531165316e-05, "loss": 2.5001, "step": 1553500 }, { "epoch": 7.7, "learning_rate": 4.615187794473923e-05, "loss": 2.4949, "step": 1554000 }, { "epoch": 7.7, "learning_rate": 4.615063935831315e-05, "loss": 2.4934, "step": 1554500 }, { "epoch": 7.7, "learning_rate": 4.6149400771887066e-05, "loss": 2.4785, "step": 1555000 }, { "epoch": 7.71, "learning_rate": 4.614816218546098e-05, "loss": 2.4767, "step": 1555500 }, { "epoch": 7.71, "learning_rate": 4.61469235990349e-05, "loss": 2.4847, "step": 1556000 }, { "epoch": 7.71, "learning_rate": 4.614568501260881e-05, "loss": 2.5089, "step": 1556500 }, { "epoch": 7.71, "learning_rate": 4.614444642618273e-05, "loss": 2.4748, "step": 1557000 }, { "epoch": 7.72, "learning_rate": 4.6143207839756644e-05, "loss": 2.4601, "step": 1557500 }, { "epoch": 7.72, "learning_rate": 4.614196925333056e-05, "loss": 2.5098, "step": 1558000 }, { "epoch": 7.72, "learning_rate": 4.614073066690447e-05, "loss": 2.4963, "step": 1558500 }, { "epoch": 7.72, "learning_rate": 4.613949208047839e-05, "loss": 2.4725, "step": 1559000 }, { "epoch": 7.73, "learning_rate": 4.6138253494052305e-05, "loss": 2.4686, "step": 1559500 }, { "epoch": 7.73, "learning_rate": 4.613701490762622e-05, "loss": 2.4655, "step": 1560000 }, { "epoch": 7.73, "learning_rate": 4.613577632120014e-05, "loss": 2.4731, "step": 1560500 }, { "epoch": 7.73, "learning_rate": 4.6134537734774056e-05, "loss": 2.4864, "step": 1561000 }, { "epoch": 7.74, "learning_rate": 4.613329914834797e-05, "loss": 2.5237, "step": 1561500 }, { "epoch": 7.74, "learning_rate": 4.613206056192189e-05, "loss": 2.4653, "step": 1562000 }, { "epoch": 7.74, "learning_rate": 4.613082197549581e-05, "loss": 2.4584, "step": 1562500 }, { "epoch": 7.74, "learning_rate": 4.6129583389069724e-05, "loss": 2.4773, "step": 1563000 }, { "epoch": 7.75, "learning_rate": 4.612834727981649e-05, "loss": 2.4909, "step": 1563500 }, { "epoch": 7.75, "learning_rate": 4.612711117056326e-05, "loss": 2.4713, "step": 1564000 }, { "epoch": 7.75, "learning_rate": 4.612587258413718e-05, "loss": 2.4771, "step": 1564500 }, { "epoch": 7.75, "learning_rate": 4.6124633997711095e-05, "loss": 2.4854, "step": 1565000 }, { "epoch": 7.76, "learning_rate": 4.6123397888457864e-05, "loss": 2.4965, "step": 1565500 }, { "epoch": 7.76, "learning_rate": 4.612215930203178e-05, "loss": 2.4689, "step": 1566000 }, { "epoch": 7.76, "learning_rate": 4.61209207156057e-05, "loss": 2.4708, "step": 1566500 }, { "epoch": 7.76, "learning_rate": 4.6119682129179615e-05, "loss": 2.4845, "step": 1567000 }, { "epoch": 7.77, "learning_rate": 4.611844354275353e-05, "loss": 2.473, "step": 1567500 }, { "epoch": 7.77, "learning_rate": 4.611720495632744e-05, "loss": 2.4954, "step": 1568000 }, { "epoch": 7.77, "learning_rate": 4.611596636990136e-05, "loss": 2.4764, "step": 1568500 }, { "epoch": 7.77, "learning_rate": 4.6114727783475276e-05, "loss": 2.4517, "step": 1569000 }, { "epoch": 7.78, "learning_rate": 4.611348919704919e-05, "loss": 2.4814, "step": 1569500 }, { "epoch": 7.78, "learning_rate": 4.611225061062311e-05, "loss": 2.4804, "step": 1570000 }, { "epoch": 7.78, "learning_rate": 4.6111012024197027e-05, "loss": 2.4678, "step": 1570500 }, { "epoch": 7.78, "learning_rate": 4.6109773437770943e-05, "loss": 2.4607, "step": 1571000 }, { "epoch": 7.79, "learning_rate": 4.610853485134486e-05, "loss": 2.4835, "step": 1571500 }, { "epoch": 7.79, "learning_rate": 4.610729874209162e-05, "loss": 2.4995, "step": 1572000 }, { "epoch": 7.79, "learning_rate": 4.610606015566554e-05, "loss": 2.4387, "step": 1572500 }, { "epoch": 7.79, "learning_rate": 4.6104824046412315e-05, "loss": 2.451, "step": 1573000 }, { "epoch": 7.8, "learning_rate": 4.610358545998623e-05, "loss": 2.4665, "step": 1573500 }, { "epoch": 7.8, "learning_rate": 4.610234687356015e-05, "loss": 2.4839, "step": 1574000 }, { "epoch": 7.8, "learning_rate": 4.610110828713406e-05, "loss": 2.4874, "step": 1574500 }, { "epoch": 7.8, "learning_rate": 4.6099869700707976e-05, "loss": 2.4841, "step": 1575000 }, { "epoch": 7.81, "learning_rate": 4.609863111428189e-05, "loss": 2.4755, "step": 1575500 }, { "epoch": 7.81, "learning_rate": 4.609739252785581e-05, "loss": 2.4554, "step": 1576000 }, { "epoch": 7.81, "learning_rate": 4.609615394142973e-05, "loss": 2.4956, "step": 1576500 }, { "epoch": 7.81, "learning_rate": 4.6094915355003644e-05, "loss": 2.4772, "step": 1577000 }, { "epoch": 7.82, "learning_rate": 4.609367676857756e-05, "loss": 2.4684, "step": 1577500 }, { "epoch": 7.82, "learning_rate": 4.609244065932433e-05, "loss": 2.4815, "step": 1578000 }, { "epoch": 7.82, "learning_rate": 4.6091202072898246e-05, "loss": 2.4774, "step": 1578500 }, { "epoch": 7.82, "learning_rate": 4.6089963486472156e-05, "loss": 2.4714, "step": 1579000 }, { "epoch": 7.83, "learning_rate": 4.608872490004607e-05, "loss": 2.4645, "step": 1579500 }, { "epoch": 7.83, "learning_rate": 4.608748631361999e-05, "loss": 2.485, "step": 1580000 }, { "epoch": 7.83, "learning_rate": 4.608624772719391e-05, "loss": 2.4805, "step": 1580500 }, { "epoch": 7.83, "learning_rate": 4.6085009140767824e-05, "loss": 2.4852, "step": 1581000 }, { "epoch": 7.84, "learning_rate": 4.608377055434174e-05, "loss": 2.4744, "step": 1581500 }, { "epoch": 7.84, "learning_rate": 4.608253196791566e-05, "loss": 2.4791, "step": 1582000 }, { "epoch": 7.84, "learning_rate": 4.608129585866243e-05, "loss": 2.4555, "step": 1582500 }, { "epoch": 7.84, "learning_rate": 4.60800597494092e-05, "loss": 2.4637, "step": 1583000 }, { "epoch": 7.85, "learning_rate": 4.607882116298311e-05, "loss": 2.4863, "step": 1583500 }, { "epoch": 7.85, "learning_rate": 4.607758257655703e-05, "loss": 2.4752, "step": 1584000 }, { "epoch": 7.85, "learning_rate": 4.6076343990130946e-05, "loss": 2.4594, "step": 1584500 }, { "epoch": 7.85, "learning_rate": 4.6075107880877715e-05, "loss": 2.4782, "step": 1585000 }, { "epoch": 7.86, "learning_rate": 4.607386929445163e-05, "loss": 2.4713, "step": 1585500 }, { "epoch": 7.86, "learning_rate": 4.607263070802555e-05, "loss": 2.4576, "step": 1586000 }, { "epoch": 7.86, "learning_rate": 4.607139459877232e-05, "loss": 2.4664, "step": 1586500 }, { "epoch": 7.86, "learning_rate": 4.6070156012346235e-05, "loss": 2.4834, "step": 1587000 }, { "epoch": 7.87, "learning_rate": 4.606891742592015e-05, "loss": 2.5103, "step": 1587500 }, { "epoch": 7.87, "learning_rate": 4.6067681316666914e-05, "loss": 2.4813, "step": 1588000 }, { "epoch": 7.87, "learning_rate": 4.606644273024083e-05, "loss": 2.4637, "step": 1588500 }, { "epoch": 7.87, "learning_rate": 4.606520414381475e-05, "loss": 2.4753, "step": 1589000 }, { "epoch": 7.87, "learning_rate": 4.6063965557388665e-05, "loss": 2.4819, "step": 1589500 }, { "epoch": 7.88, "learning_rate": 4.606272697096258e-05, "loss": 2.4511, "step": 1590000 }, { "epoch": 7.88, "learning_rate": 4.60614883845365e-05, "loss": 2.4717, "step": 1590500 }, { "epoch": 7.88, "learning_rate": 4.6060249798110415e-05, "loss": 2.489, "step": 1591000 }, { "epoch": 7.88, "learning_rate": 4.605901121168433e-05, "loss": 2.4643, "step": 1591500 }, { "epoch": 7.89, "learning_rate": 4.605777262525825e-05, "loss": 2.4807, "step": 1592000 }, { "epoch": 7.89, "learning_rate": 4.605653651600502e-05, "loss": 2.474, "step": 1592500 }, { "epoch": 7.89, "learning_rate": 4.6055297929578935e-05, "loss": 2.4841, "step": 1593000 }, { "epoch": 7.89, "learning_rate": 4.605405934315285e-05, "loss": 2.4805, "step": 1593500 }, { "epoch": 7.9, "learning_rate": 4.605282075672677e-05, "loss": 2.4747, "step": 1594000 }, { "epoch": 7.9, "learning_rate": 4.6051582170300686e-05, "loss": 2.4757, "step": 1594500 }, { "epoch": 7.9, "learning_rate": 4.60503435838746e-05, "loss": 2.4795, "step": 1595000 }, { "epoch": 7.9, "learning_rate": 4.604910499744852e-05, "loss": 2.4725, "step": 1595500 }, { "epoch": 7.91, "learning_rate": 4.604786641102243e-05, "loss": 2.4809, "step": 1596000 }, { "epoch": 7.91, "learning_rate": 4.604662782459635e-05, "loss": 2.4966, "step": 1596500 }, { "epoch": 7.91, "learning_rate": 4.6045389238170264e-05, "loss": 2.4485, "step": 1597000 }, { "epoch": 7.91, "learning_rate": 4.604415065174418e-05, "loss": 2.4603, "step": 1597500 }, { "epoch": 7.92, "learning_rate": 4.60429120653181e-05, "loss": 2.4999, "step": 1598000 }, { "epoch": 7.92, "learning_rate": 4.6041673478892014e-05, "loss": 2.4641, "step": 1598500 }, { "epoch": 7.92, "learning_rate": 4.604043736963878e-05, "loss": 2.473, "step": 1599000 }, { "epoch": 7.92, "learning_rate": 4.603919878321269e-05, "loss": 2.4771, "step": 1599500 }, { "epoch": 7.93, "learning_rate": 4.603796267395947e-05, "loss": 2.4965, "step": 1600000 }, { "epoch": 7.93, "learning_rate": 4.6036724087533386e-05, "loss": 2.4797, "step": 1600500 }, { "epoch": 7.93, "learning_rate": 4.60354855011073e-05, "loss": 2.4846, "step": 1601000 }, { "epoch": 7.93, "learning_rate": 4.603424691468122e-05, "loss": 2.4528, "step": 1601500 }, { "epoch": 7.94, "learning_rate": 4.6033008328255137e-05, "loss": 2.4811, "step": 1602000 }, { "epoch": 7.94, "learning_rate": 4.603176974182905e-05, "loss": 2.4997, "step": 1602500 }, { "epoch": 7.94, "learning_rate": 4.6030533632575816e-05, "loss": 2.4839, "step": 1603000 }, { "epoch": 7.94, "learning_rate": 4.602929504614973e-05, "loss": 2.4663, "step": 1603500 }, { "epoch": 7.95, "learning_rate": 4.60280589368965e-05, "loss": 2.4979, "step": 1604000 }, { "epoch": 7.95, "learning_rate": 4.602682035047042e-05, "loss": 2.4584, "step": 1604500 }, { "epoch": 7.95, "learning_rate": 4.6025581764044335e-05, "loss": 2.4736, "step": 1605000 }, { "epoch": 7.95, "learning_rate": 4.602434317761825e-05, "loss": 2.4942, "step": 1605500 }, { "epoch": 7.96, "learning_rate": 4.602310459119217e-05, "loss": 2.4717, "step": 1606000 }, { "epoch": 7.96, "learning_rate": 4.6021866004766086e-05, "loss": 2.4614, "step": 1606500 }, { "epoch": 7.96, "learning_rate": 4.602062741834e-05, "loss": 2.4844, "step": 1607000 }, { "epoch": 7.96, "learning_rate": 4.601938883191392e-05, "loss": 2.469, "step": 1607500 }, { "epoch": 7.97, "learning_rate": 4.601815272266068e-05, "loss": 2.48, "step": 1608000 }, { "epoch": 7.97, "learning_rate": 4.60169141362346e-05, "loss": 2.4687, "step": 1608500 }, { "epoch": 7.97, "learning_rate": 4.6015675549808516e-05, "loss": 2.4747, "step": 1609000 }, { "epoch": 7.97, "learning_rate": 4.601443696338243e-05, "loss": 2.4615, "step": 1609500 }, { "epoch": 7.98, "learning_rate": 4.601319837695635e-05, "loss": 2.5101, "step": 1610000 }, { "epoch": 7.98, "learning_rate": 4.6011959790530266e-05, "loss": 2.4775, "step": 1610500 }, { "epoch": 7.98, "learning_rate": 4.6010723681277035e-05, "loss": 2.4492, "step": 1611000 }, { "epoch": 7.98, "learning_rate": 4.600948509485095e-05, "loss": 2.49, "step": 1611500 }, { "epoch": 7.99, "learning_rate": 4.600824650842487e-05, "loss": 2.4681, "step": 1612000 }, { "epoch": 7.99, "learning_rate": 4.6007007921998786e-05, "loss": 2.4747, "step": 1612500 }, { "epoch": 7.99, "learning_rate": 4.60057693355727e-05, "loss": 2.4882, "step": 1613000 }, { "epoch": 7.99, "learning_rate": 4.600453074914662e-05, "loss": 2.4759, "step": 1613500 }, { "epoch": 8.0, "learning_rate": 4.600329216272054e-05, "loss": 2.4905, "step": 1614000 }, { "epoch": 8.0, "learning_rate": 4.6002053576294454e-05, "loss": 2.4694, "step": 1614500 }, { "epoch": 8.0, "eval_accuracy": 0.6397272831364085, "eval_accuracy_mlm": 0.59286278093972, "eval_accuracy_nsp": 0.8609541141909092, "eval_loss": 2.4369935989379883, "eval_runtime": 147.379, "eval_samples_per_second": 1729.955, "eval_steps_per_second": 72.086, "step": 1614744 }, { "epoch": 8.0, "learning_rate": 4.6000814989868364e-05, "loss": 2.4571, "step": 1615000 }, { "epoch": 8.0, "learning_rate": 4.599957640344228e-05, "loss": 2.4342, "step": 1615500 }, { "epoch": 8.01, "learning_rate": 4.599834029418905e-05, "loss": 2.4282, "step": 1616000 }, { "epoch": 8.01, "learning_rate": 4.599710418493582e-05, "loss": 2.4413, "step": 1616500 }, { "epoch": 8.01, "learning_rate": 4.5995865598509735e-05, "loss": 2.4362, "step": 1617000 }, { "epoch": 8.01, "learning_rate": 4.599462701208365e-05, "loss": 2.4393, "step": 1617500 }, { "epoch": 8.02, "learning_rate": 4.599338842565757e-05, "loss": 2.4598, "step": 1618000 }, { "epoch": 8.02, "learning_rate": 4.599215231640434e-05, "loss": 2.4666, "step": 1618500 }, { "epoch": 8.02, "learning_rate": 4.5990913729978255e-05, "loss": 2.4484, "step": 1619000 }, { "epoch": 8.02, "learning_rate": 4.598967514355217e-05, "loss": 2.4482, "step": 1619500 }, { "epoch": 8.03, "learning_rate": 4.598843655712609e-05, "loss": 2.4395, "step": 1620000 }, { "epoch": 8.03, "learning_rate": 4.598720044787285e-05, "loss": 2.4311, "step": 1620500 }, { "epoch": 8.03, "learning_rate": 4.598596186144677e-05, "loss": 2.438, "step": 1621000 }, { "epoch": 8.03, "learning_rate": 4.5984723275020685e-05, "loss": 2.4487, "step": 1621500 }, { "epoch": 8.04, "learning_rate": 4.59834846885946e-05, "loss": 2.4532, "step": 1622000 }, { "epoch": 8.04, "learning_rate": 4.598224610216852e-05, "loss": 2.4735, "step": 1622500 }, { "epoch": 8.04, "learning_rate": 4.5981007515742436e-05, "loss": 2.4447, "step": 1623000 }, { "epoch": 8.04, "learning_rate": 4.597976892931635e-05, "loss": 2.4488, "step": 1623500 }, { "epoch": 8.05, "learning_rate": 4.597853034289027e-05, "loss": 2.437, "step": 1624000 }, { "epoch": 8.05, "learning_rate": 4.597729423363704e-05, "loss": 2.4128, "step": 1624500 }, { "epoch": 8.05, "learning_rate": 4.5976055647210955e-05, "loss": 2.4574, "step": 1625000 }, { "epoch": 8.05, "learning_rate": 4.5974822015130576e-05, "loss": 2.4699, "step": 1625500 }, { "epoch": 8.06, "learning_rate": 4.597358342870449e-05, "loss": 2.4535, "step": 1626000 }, { "epoch": 8.06, "learning_rate": 4.597234484227841e-05, "loss": 2.463, "step": 1626500 }, { "epoch": 8.06, "learning_rate": 4.5971106255852327e-05, "loss": 2.477, "step": 1627000 }, { "epoch": 8.06, "learning_rate": 4.5969867669426244e-05, "loss": 2.4738, "step": 1627500 }, { "epoch": 8.07, "learning_rate": 4.5968629083000154e-05, "loss": 2.4388, "step": 1628000 }, { "epoch": 8.07, "learning_rate": 4.596739049657407e-05, "loss": 2.4269, "step": 1628500 }, { "epoch": 8.07, "learning_rate": 4.596615191014799e-05, "loss": 2.453, "step": 1629000 }, { "epoch": 8.07, "learning_rate": 4.5964913323721904e-05, "loss": 2.4559, "step": 1629500 }, { "epoch": 8.08, "learning_rate": 4.596367473729582e-05, "loss": 2.4375, "step": 1630000 }, { "epoch": 8.08, "learning_rate": 4.596243615086974e-05, "loss": 2.4509, "step": 1630500 }, { "epoch": 8.08, "learning_rate": 4.5961197564443655e-05, "loss": 2.4442, "step": 1631000 }, { "epoch": 8.08, "learning_rate": 4.595995897801757e-05, "loss": 2.4474, "step": 1631500 }, { "epoch": 8.09, "learning_rate": 4.595872039159149e-05, "loss": 2.4552, "step": 1632000 }, { "epoch": 8.09, "learning_rate": 4.5957481805165406e-05, "loss": 2.4695, "step": 1632500 }, { "epoch": 8.09, "learning_rate": 4.595624321873932e-05, "loss": 2.4425, "step": 1633000 }, { "epoch": 8.09, "learning_rate": 4.595500463231324e-05, "loss": 2.4627, "step": 1633500 }, { "epoch": 8.1, "learning_rate": 4.595376852306e-05, "loss": 2.4539, "step": 1634000 }, { "epoch": 8.1, "learning_rate": 4.595252993663392e-05, "loss": 2.4307, "step": 1634500 }, { "epoch": 8.1, "learning_rate": 4.5951291350207836e-05, "loss": 2.446, "step": 1635000 }, { "epoch": 8.1, "learning_rate": 4.595005276378175e-05, "loss": 2.4706, "step": 1635500 }, { "epoch": 8.11, "learning_rate": 4.594881417735567e-05, "loss": 2.4612, "step": 1636000 }, { "epoch": 8.11, "learning_rate": 4.594757806810244e-05, "loss": 2.4764, "step": 1636500 }, { "epoch": 8.11, "learning_rate": 4.5946339481676355e-05, "loss": 2.4155, "step": 1637000 }, { "epoch": 8.11, "learning_rate": 4.594510337242312e-05, "loss": 2.4596, "step": 1637500 }, { "epoch": 8.12, "learning_rate": 4.5943864785997034e-05, "loss": 2.4651, "step": 1638000 }, { "epoch": 8.12, "learning_rate": 4.594262619957095e-05, "loss": 2.4594, "step": 1638500 }, { "epoch": 8.12, "learning_rate": 4.594138761314487e-05, "loss": 2.4223, "step": 1639000 }, { "epoch": 8.12, "learning_rate": 4.5940149026718785e-05, "loss": 2.4587, "step": 1639500 }, { "epoch": 8.13, "learning_rate": 4.59389104402927e-05, "loss": 2.4586, "step": 1640000 }, { "epoch": 8.13, "learning_rate": 4.593767185386662e-05, "loss": 2.4567, "step": 1640500 }, { "epoch": 8.13, "learning_rate": 4.5936433267440536e-05, "loss": 2.4287, "step": 1641000 }, { "epoch": 8.13, "learning_rate": 4.593519468101445e-05, "loss": 2.4658, "step": 1641500 }, { "epoch": 8.14, "learning_rate": 4.593395609458837e-05, "loss": 2.4436, "step": 1642000 }, { "epoch": 8.14, "learning_rate": 4.593271750816229e-05, "loss": 2.4596, "step": 1642500 }, { "epoch": 8.14, "learning_rate": 4.5931478921736204e-05, "loss": 2.4541, "step": 1643000 }, { "epoch": 8.14, "learning_rate": 4.593024281248297e-05, "loss": 2.4404, "step": 1643500 }, { "epoch": 8.14, "learning_rate": 4.592900422605689e-05, "loss": 2.4754, "step": 1644000 }, { "epoch": 8.15, "learning_rate": 4.592776811680365e-05, "loss": 2.4406, "step": 1644500 }, { "epoch": 8.15, "learning_rate": 4.592652953037757e-05, "loss": 2.4571, "step": 1645000 }, { "epoch": 8.15, "learning_rate": 4.5925290943951485e-05, "loss": 2.4375, "step": 1645500 }, { "epoch": 8.15, "learning_rate": 4.59240523575254e-05, "loss": 2.4535, "step": 1646000 }, { "epoch": 8.16, "learning_rate": 4.592281377109932e-05, "loss": 2.4388, "step": 1646500 }, { "epoch": 8.16, "learning_rate": 4.5921575184673236e-05, "loss": 2.4679, "step": 1647000 }, { "epoch": 8.16, "learning_rate": 4.592033659824715e-05, "loss": 2.443, "step": 1647500 }, { "epoch": 8.16, "learning_rate": 4.591909801182107e-05, "loss": 2.449, "step": 1648000 }, { "epoch": 8.17, "learning_rate": 4.591786190256784e-05, "loss": 2.463, "step": 1648500 }, { "epoch": 8.17, "learning_rate": 4.5916623316141756e-05, "loss": 2.4809, "step": 1649000 }, { "epoch": 8.17, "learning_rate": 4.591538472971567e-05, "loss": 2.4569, "step": 1649500 }, { "epoch": 8.17, "learning_rate": 4.591414614328959e-05, "loss": 2.4376, "step": 1650000 }, { "epoch": 8.18, "learning_rate": 4.5912907556863506e-05, "loss": 2.4565, "step": 1650500 }, { "epoch": 8.18, "learning_rate": 4.591166897043742e-05, "loss": 2.448, "step": 1651000 }, { "epoch": 8.18, "learning_rate": 4.591043038401134e-05, "loss": 2.4753, "step": 1651500 }, { "epoch": 8.18, "learning_rate": 4.590919179758526e-05, "loss": 2.4302, "step": 1652000 }, { "epoch": 8.19, "learning_rate": 4.5907953211159174e-05, "loss": 2.4589, "step": 1652500 }, { "epoch": 8.19, "learning_rate": 4.590671462473309e-05, "loss": 2.454, "step": 1653000 }, { "epoch": 8.19, "learning_rate": 4.590547603830701e-05, "loss": 2.442, "step": 1653500 }, { "epoch": 8.19, "learning_rate": 4.590423992905377e-05, "loss": 2.4509, "step": 1654000 }, { "epoch": 8.2, "learning_rate": 4.590300134262769e-05, "loss": 2.4578, "step": 1654500 }, { "epoch": 8.2, "learning_rate": 4.5901765233374456e-05, "loss": 2.4688, "step": 1655000 }, { "epoch": 8.2, "learning_rate": 4.590052664694837e-05, "loss": 2.4823, "step": 1655500 }, { "epoch": 8.2, "learning_rate": 4.589928806052229e-05, "loss": 2.4619, "step": 1656000 }, { "epoch": 8.21, "learning_rate": 4.5898049474096207e-05, "loss": 2.443, "step": 1656500 }, { "epoch": 8.21, "learning_rate": 4.5896810887670123e-05, "loss": 2.4455, "step": 1657000 }, { "epoch": 8.21, "learning_rate": 4.5895574778416885e-05, "loss": 2.4619, "step": 1657500 }, { "epoch": 8.21, "learning_rate": 4.589433866916366e-05, "loss": 2.4513, "step": 1658000 }, { "epoch": 8.22, "learning_rate": 4.589310008273758e-05, "loss": 2.4317, "step": 1658500 }, { "epoch": 8.22, "learning_rate": 4.5891861496311495e-05, "loss": 2.4583, "step": 1659000 }, { "epoch": 8.22, "learning_rate": 4.5890625387058264e-05, "loss": 2.4789, "step": 1659500 }, { "epoch": 8.22, "learning_rate": 4.588938680063218e-05, "loss": 2.4496, "step": 1660000 }, { "epoch": 8.23, "learning_rate": 4.58881482142061e-05, "loss": 2.4006, "step": 1660500 }, { "epoch": 8.23, "learning_rate": 4.5886909627780015e-05, "loss": 2.4678, "step": 1661000 }, { "epoch": 8.23, "learning_rate": 4.588567104135393e-05, "loss": 2.4779, "step": 1661500 }, { "epoch": 8.23, "learning_rate": 4.588443245492784e-05, "loss": 2.4671, "step": 1662000 }, { "epoch": 8.24, "learning_rate": 4.588319386850176e-05, "loss": 2.457, "step": 1662500 }, { "epoch": 8.24, "learning_rate": 4.5881955282075675e-05, "loss": 2.466, "step": 1663000 }, { "epoch": 8.24, "learning_rate": 4.588071669564959e-05, "loss": 2.4845, "step": 1663500 }, { "epoch": 8.24, "learning_rate": 4.587947810922351e-05, "loss": 2.4621, "step": 1664000 }, { "epoch": 8.25, "learning_rate": 4.587823952279742e-05, "loss": 2.455, "step": 1664500 }, { "epoch": 8.25, "learning_rate": 4.5877000936371336e-05, "loss": 2.4329, "step": 1665000 }, { "epoch": 8.25, "learning_rate": 4.587576234994525e-05, "loss": 2.4424, "step": 1665500 }, { "epoch": 8.25, "learning_rate": 4.587452376351917e-05, "loss": 2.4667, "step": 1666000 }, { "epoch": 8.26, "learning_rate": 4.587328765426594e-05, "loss": 2.4531, "step": 1666500 }, { "epoch": 8.26, "learning_rate": 4.5872049067839856e-05, "loss": 2.4527, "step": 1667000 }, { "epoch": 8.26, "learning_rate": 4.587081048141377e-05, "loss": 2.4457, "step": 1667500 }, { "epoch": 8.26, "learning_rate": 4.586957189498769e-05, "loss": 2.4557, "step": 1668000 }, { "epoch": 8.27, "learning_rate": 4.586833330856161e-05, "loss": 2.4627, "step": 1668500 }, { "epoch": 8.27, "learning_rate": 4.5867094722135524e-05, "loss": 2.4349, "step": 1669000 }, { "epoch": 8.27, "learning_rate": 4.586585613570944e-05, "loss": 2.4301, "step": 1669500 }, { "epoch": 8.27, "learning_rate": 4.586461754928336e-05, "loss": 2.4527, "step": 1670000 }, { "epoch": 8.28, "learning_rate": 4.5863378962857274e-05, "loss": 2.4991, "step": 1670500 }, { "epoch": 8.28, "learning_rate": 4.5862142853604037e-05, "loss": 2.4676, "step": 1671000 }, { "epoch": 8.28, "learning_rate": 4.5860904267177953e-05, "loss": 2.4682, "step": 1671500 }, { "epoch": 8.28, "learning_rate": 4.585966568075187e-05, "loss": 2.4547, "step": 1672000 }, { "epoch": 8.29, "learning_rate": 4.585842709432579e-05, "loss": 2.454, "step": 1672500 }, { "epoch": 8.29, "learning_rate": 4.5857190985072556e-05, "loss": 2.4367, "step": 1673000 }, { "epoch": 8.29, "learning_rate": 4.585595239864647e-05, "loss": 2.4829, "step": 1673500 }, { "epoch": 8.29, "learning_rate": 4.585471381222039e-05, "loss": 2.4399, "step": 1674000 }, { "epoch": 8.3, "learning_rate": 4.585347522579431e-05, "loss": 2.4632, "step": 1674500 }, { "epoch": 8.3, "learning_rate": 4.5852236639368224e-05, "loss": 2.4526, "step": 1675000 }, { "epoch": 8.3, "learning_rate": 4.585099805294214e-05, "loss": 2.4805, "step": 1675500 }, { "epoch": 8.3, "learning_rate": 4.584976194368891e-05, "loss": 2.4633, "step": 1676000 }, { "epoch": 8.31, "learning_rate": 4.5848523357262826e-05, "loss": 2.4558, "step": 1676500 }, { "epoch": 8.31, "learning_rate": 4.5847284770836743e-05, "loss": 2.4494, "step": 1677000 }, { "epoch": 8.31, "learning_rate": 4.584604618441066e-05, "loss": 2.4412, "step": 1677500 }, { "epoch": 8.31, "learning_rate": 4.584481255233028e-05, "loss": 2.4841, "step": 1678000 }, { "epoch": 8.32, "learning_rate": 4.58435739659042e-05, "loss": 2.4545, "step": 1678500 }, { "epoch": 8.32, "learning_rate": 4.5842335379478115e-05, "loss": 2.4488, "step": 1679000 }, { "epoch": 8.32, "learning_rate": 4.584109679305203e-05, "loss": 2.454, "step": 1679500 }, { "epoch": 8.32, "learning_rate": 4.583985820662595e-05, "loss": 2.4556, "step": 1680000 }, { "epoch": 8.33, "learning_rate": 4.5838619620199866e-05, "loss": 2.4793, "step": 1680500 }, { "epoch": 8.33, "learning_rate": 4.5837381033773776e-05, "loss": 2.4807, "step": 1681000 }, { "epoch": 8.33, "learning_rate": 4.583614244734769e-05, "loss": 2.4468, "step": 1681500 }, { "epoch": 8.33, "learning_rate": 4.583490386092161e-05, "loss": 2.4587, "step": 1682000 }, { "epoch": 8.34, "learning_rate": 4.583366775166838e-05, "loss": 2.4789, "step": 1682500 }, { "epoch": 8.34, "learning_rate": 4.583243164241515e-05, "loss": 2.4712, "step": 1683000 }, { "epoch": 8.34, "learning_rate": 4.5831193055989064e-05, "loss": 2.4732, "step": 1683500 }, { "epoch": 8.34, "learning_rate": 4.582995446956298e-05, "loss": 2.4527, "step": 1684000 }, { "epoch": 8.35, "learning_rate": 4.582871836030975e-05, "loss": 2.4415, "step": 1684500 }, { "epoch": 8.35, "learning_rate": 4.582747977388367e-05, "loss": 2.4473, "step": 1685000 }, { "epoch": 8.35, "learning_rate": 4.582624118745758e-05, "loss": 2.4513, "step": 1685500 }, { "epoch": 8.35, "learning_rate": 4.5825002601031494e-05, "loss": 2.4484, "step": 1686000 }, { "epoch": 8.36, "learning_rate": 4.582376649177826e-05, "loss": 2.454, "step": 1686500 }, { "epoch": 8.36, "learning_rate": 4.582252790535218e-05, "loss": 2.4445, "step": 1687000 }, { "epoch": 8.36, "learning_rate": 4.58212893189261e-05, "loss": 2.4324, "step": 1687500 }, { "epoch": 8.36, "learning_rate": 4.5820050732500014e-05, "loss": 2.4387, "step": 1688000 }, { "epoch": 8.37, "learning_rate": 4.581881214607393e-05, "loss": 2.4302, "step": 1688500 }, { "epoch": 8.37, "learning_rate": 4.581757355964785e-05, "loss": 2.4632, "step": 1689000 }, { "epoch": 8.37, "learning_rate": 4.5816334973221764e-05, "loss": 2.4692, "step": 1689500 }, { "epoch": 8.37, "learning_rate": 4.581509638679568e-05, "loss": 2.4576, "step": 1690000 }, { "epoch": 8.38, "learning_rate": 4.58138578003696e-05, "loss": 2.454, "step": 1690500 }, { "epoch": 8.38, "learning_rate": 4.5812619213943515e-05, "loss": 2.4575, "step": 1691000 }, { "epoch": 8.38, "learning_rate": 4.581138062751743e-05, "loss": 2.4608, "step": 1691500 }, { "epoch": 8.38, "learning_rate": 4.581014204109135e-05, "loss": 2.4546, "step": 1692000 }, { "epoch": 8.39, "learning_rate": 4.580890593183811e-05, "loss": 2.4915, "step": 1692500 }, { "epoch": 8.39, "learning_rate": 4.580766734541203e-05, "loss": 2.4625, "step": 1693000 }, { "epoch": 8.39, "learning_rate": 4.5806428758985945e-05, "loss": 2.4287, "step": 1693500 }, { "epoch": 8.39, "learning_rate": 4.580519017255986e-05, "loss": 2.45, "step": 1694000 }, { "epoch": 8.4, "learning_rate": 4.580395406330663e-05, "loss": 2.4605, "step": 1694500 }, { "epoch": 8.4, "learning_rate": 4.580271547688055e-05, "loss": 2.4412, "step": 1695000 }, { "epoch": 8.4, "learning_rate": 4.5801476890454464e-05, "loss": 2.4722, "step": 1695500 }, { "epoch": 8.4, "learning_rate": 4.580023830402838e-05, "loss": 2.4585, "step": 1696000 }, { "epoch": 8.41, "learning_rate": 4.57989997176023e-05, "loss": 2.4503, "step": 1696500 }, { "epoch": 8.41, "learning_rate": 4.579776360834907e-05, "loss": 2.4558, "step": 1697000 }, { "epoch": 8.41, "learning_rate": 4.5796525021922984e-05, "loss": 2.4635, "step": 1697500 }, { "epoch": 8.41, "learning_rate": 4.5795288912669746e-05, "loss": 2.4488, "step": 1698000 }, { "epoch": 8.41, "learning_rate": 4.579405032624366e-05, "loss": 2.4235, "step": 1698500 }, { "epoch": 8.42, "learning_rate": 4.579281173981758e-05, "loss": 2.471, "step": 1699000 }, { "epoch": 8.42, "learning_rate": 4.5791575630564356e-05, "loss": 2.4608, "step": 1699500 }, { "epoch": 8.42, "learning_rate": 4.579033704413827e-05, "loss": 2.4558, "step": 1700000 }, { "epoch": 8.42, "learning_rate": 4.578909845771218e-05, "loss": 2.4506, "step": 1700500 }, { "epoch": 8.43, "learning_rate": 4.57878598712861e-05, "loss": 2.4858, "step": 1701000 }, { "epoch": 8.43, "learning_rate": 4.5786621284860016e-05, "loss": 2.4645, "step": 1701500 }, { "epoch": 8.43, "learning_rate": 4.5785385175606785e-05, "loss": 2.4548, "step": 1702000 }, { "epoch": 8.43, "learning_rate": 4.57841465891807e-05, "loss": 2.4721, "step": 1702500 }, { "epoch": 8.44, "learning_rate": 4.578290800275462e-05, "loss": 2.4628, "step": 1703000 }, { "epoch": 8.44, "learning_rate": 4.578166941632853e-05, "loss": 2.4656, "step": 1703500 }, { "epoch": 8.44, "learning_rate": 4.5780430829902446e-05, "loss": 2.4542, "step": 1704000 }, { "epoch": 8.44, "learning_rate": 4.577919224347636e-05, "loss": 2.4838, "step": 1704500 }, { "epoch": 8.45, "learning_rate": 4.577795365705028e-05, "loss": 2.4511, "step": 1705000 }, { "epoch": 8.45, "learning_rate": 4.57767150706242e-05, "loss": 2.4663, "step": 1705500 }, { "epoch": 8.45, "learning_rate": 4.5775476484198114e-05, "loss": 2.4657, "step": 1706000 }, { "epoch": 8.45, "learning_rate": 4.577423789777203e-05, "loss": 2.4396, "step": 1706500 }, { "epoch": 8.46, "learning_rate": 4.577299931134595e-05, "loss": 2.4544, "step": 1707000 }, { "epoch": 8.46, "learning_rate": 4.5771760724919865e-05, "loss": 2.4735, "step": 1707500 }, { "epoch": 8.46, "learning_rate": 4.577052213849378e-05, "loss": 2.4564, "step": 1708000 }, { "epoch": 8.46, "learning_rate": 4.57692835520677e-05, "loss": 2.4455, "step": 1708500 }, { "epoch": 8.47, "learning_rate": 4.576804744281447e-05, "loss": 2.4593, "step": 1709000 }, { "epoch": 8.47, "learning_rate": 4.5766808856388384e-05, "loss": 2.468, "step": 1709500 }, { "epoch": 8.47, "learning_rate": 4.57655702699623e-05, "loss": 2.4607, "step": 1710000 }, { "epoch": 8.47, "learning_rate": 4.576433168353622e-05, "loss": 2.4671, "step": 1710500 }, { "epoch": 8.48, "learning_rate": 4.5763093097110135e-05, "loss": 2.4854, "step": 1711000 }, { "epoch": 8.48, "learning_rate": 4.576185451068405e-05, "loss": 2.4792, "step": 1711500 }, { "epoch": 8.48, "learning_rate": 4.576061592425797e-05, "loss": 2.4591, "step": 1712000 }, { "epoch": 8.48, "learning_rate": 4.575937733783188e-05, "loss": 2.464, "step": 1712500 }, { "epoch": 8.49, "learning_rate": 4.5758138751405796e-05, "loss": 2.4626, "step": 1713000 }, { "epoch": 8.49, "learning_rate": 4.575690016497971e-05, "loss": 2.4861, "step": 1713500 }, { "epoch": 8.49, "learning_rate": 4.5755666532899334e-05, "loss": 2.4691, "step": 1714000 }, { "epoch": 8.49, "learning_rate": 4.575442794647325e-05, "loss": 2.4656, "step": 1714500 }, { "epoch": 8.5, "learning_rate": 4.575318936004717e-05, "loss": 2.4999, "step": 1715000 }, { "epoch": 8.5, "learning_rate": 4.5751950773621084e-05, "loss": 2.4376, "step": 1715500 }, { "epoch": 8.5, "learning_rate": 4.5750712187195e-05, "loss": 2.4519, "step": 1716000 }, { "epoch": 8.5, "learning_rate": 4.5749476077941763e-05, "loss": 2.4505, "step": 1716500 }, { "epoch": 8.51, "learning_rate": 4.574823749151568e-05, "loss": 2.4783, "step": 1717000 }, { "epoch": 8.51, "learning_rate": 4.57469989050896e-05, "loss": 2.437, "step": 1717500 }, { "epoch": 8.51, "learning_rate": 4.5745760318663514e-05, "loss": 2.4739, "step": 1718000 }, { "epoch": 8.51, "learning_rate": 4.574452420941029e-05, "loss": 2.4648, "step": 1718500 }, { "epoch": 8.52, "learning_rate": 4.574328562298421e-05, "loss": 2.4603, "step": 1719000 }, { "epoch": 8.52, "learning_rate": 4.574204703655812e-05, "loss": 2.4343, "step": 1719500 }, { "epoch": 8.52, "learning_rate": 4.5740808450132034e-05, "loss": 2.4705, "step": 1720000 }, { "epoch": 8.52, "learning_rate": 4.573956986370595e-05, "loss": 2.4513, "step": 1720500 }, { "epoch": 8.53, "learning_rate": 4.573833127727987e-05, "loss": 2.4702, "step": 1721000 }, { "epoch": 8.53, "learning_rate": 4.5737092690853785e-05, "loss": 2.4558, "step": 1721500 }, { "epoch": 8.53, "learning_rate": 4.57358541044277e-05, "loss": 2.4708, "step": 1722000 }, { "epoch": 8.53, "learning_rate": 4.573461551800162e-05, "loss": 2.4531, "step": 1722500 }, { "epoch": 8.54, "learning_rate": 4.5733376931575535e-05, "loss": 2.4685, "step": 1723000 }, { "epoch": 8.54, "learning_rate": 4.573213834514945e-05, "loss": 2.4622, "step": 1723500 }, { "epoch": 8.54, "learning_rate": 4.573089975872337e-05, "loss": 2.4663, "step": 1724000 }, { "epoch": 8.54, "learning_rate": 4.572966364947013e-05, "loss": 2.4865, "step": 1724500 }, { "epoch": 8.55, "learning_rate": 4.572842506304405e-05, "loss": 2.4635, "step": 1725000 }, { "epoch": 8.55, "learning_rate": 4.5727186476617965e-05, "loss": 2.4426, "step": 1725500 }, { "epoch": 8.55, "learning_rate": 4.572594789019188e-05, "loss": 2.4789, "step": 1726000 }, { "epoch": 8.55, "learning_rate": 4.57247093037658e-05, "loss": 2.4653, "step": 1726500 }, { "epoch": 8.56, "learning_rate": 4.5723470717339716e-05, "loss": 2.4574, "step": 1727000 }, { "epoch": 8.56, "learning_rate": 4.572223213091363e-05, "loss": 2.447, "step": 1727500 }, { "epoch": 8.56, "learning_rate": 4.572099354448755e-05, "loss": 2.4368, "step": 1728000 }, { "epoch": 8.56, "learning_rate": 4.571975495806147e-05, "loss": 2.4754, "step": 1728500 }, { "epoch": 8.57, "learning_rate": 4.5718516371635384e-05, "loss": 2.4884, "step": 1729000 }, { "epoch": 8.57, "learning_rate": 4.57172777852093e-05, "loss": 2.4793, "step": 1729500 }, { "epoch": 8.57, "learning_rate": 4.571603919878322e-05, "loss": 2.4638, "step": 1730000 }, { "epoch": 8.57, "learning_rate": 4.5714803089529986e-05, "loss": 2.4545, "step": 1730500 }, { "epoch": 8.58, "learning_rate": 4.57135645031039e-05, "loss": 2.4627, "step": 1731000 }, { "epoch": 8.58, "learning_rate": 4.571232591667782e-05, "loss": 2.46, "step": 1731500 }, { "epoch": 8.58, "learning_rate": 4.571108733025174e-05, "loss": 2.4664, "step": 1732000 }, { "epoch": 8.58, "learning_rate": 4.570985369817135e-05, "loss": 2.468, "step": 1732500 }, { "epoch": 8.59, "learning_rate": 4.570861511174527e-05, "loss": 2.4468, "step": 1733000 }, { "epoch": 8.59, "learning_rate": 4.5707376525319185e-05, "loss": 2.4756, "step": 1733500 }, { "epoch": 8.59, "learning_rate": 4.57061379388931e-05, "loss": 2.4659, "step": 1734000 }, { "epoch": 8.59, "learning_rate": 4.570489935246702e-05, "loss": 2.4663, "step": 1734500 }, { "epoch": 8.6, "learning_rate": 4.570366324321379e-05, "loss": 2.4561, "step": 1735000 }, { "epoch": 8.6, "learning_rate": 4.5702424656787704e-05, "loss": 2.4384, "step": 1735500 }, { "epoch": 8.6, "learning_rate": 4.5701186070361615e-05, "loss": 2.4622, "step": 1736000 }, { "epoch": 8.6, "learning_rate": 4.569994748393553e-05, "loss": 2.4654, "step": 1736500 }, { "epoch": 8.61, "learning_rate": 4.569870889750945e-05, "loss": 2.4673, "step": 1737000 }, { "epoch": 8.61, "learning_rate": 4.5697472788256224e-05, "loss": 2.4546, "step": 1737500 }, { "epoch": 8.61, "learning_rate": 4.569623667900299e-05, "loss": 2.4721, "step": 1738000 }, { "epoch": 8.61, "learning_rate": 4.569499809257691e-05, "loss": 2.4357, "step": 1738500 }, { "epoch": 8.62, "learning_rate": 4.569375950615083e-05, "loss": 2.4688, "step": 1739000 }, { "epoch": 8.62, "learning_rate": 4.5692520919724744e-05, "loss": 2.4617, "step": 1739500 }, { "epoch": 8.62, "learning_rate": 4.569128233329866e-05, "loss": 2.4783, "step": 1740000 }, { "epoch": 8.62, "learning_rate": 4.569004374687257e-05, "loss": 2.4474, "step": 1740500 }, { "epoch": 8.63, "learning_rate": 4.568880516044649e-05, "loss": 2.476, "step": 1741000 }, { "epoch": 8.63, "learning_rate": 4.5687566574020405e-05, "loss": 2.4684, "step": 1741500 }, { "epoch": 8.63, "learning_rate": 4.568632798759432e-05, "loss": 2.4567, "step": 1742000 }, { "epoch": 8.63, "learning_rate": 4.568508940116824e-05, "loss": 2.4522, "step": 1742500 }, { "epoch": 8.64, "learning_rate": 4.568385081474215e-05, "loss": 2.4613, "step": 1743000 }, { "epoch": 8.64, "learning_rate": 4.5682612228316065e-05, "loss": 2.481, "step": 1743500 }, { "epoch": 8.64, "learning_rate": 4.568137611906284e-05, "loss": 2.4624, "step": 1744000 }, { "epoch": 8.64, "learning_rate": 4.568013753263675e-05, "loss": 2.4642, "step": 1744500 }, { "epoch": 8.65, "learning_rate": 4.567889894621067e-05, "loss": 2.4412, "step": 1745000 }, { "epoch": 8.65, "learning_rate": 4.5677660359784585e-05, "loss": 2.4577, "step": 1745500 }, { "epoch": 8.65, "learning_rate": 4.56764217733585e-05, "loss": 2.4878, "step": 1746000 }, { "epoch": 8.65, "learning_rate": 4.567518566410528e-05, "loss": 2.4542, "step": 1746500 }, { "epoch": 8.66, "learning_rate": 4.567394707767919e-05, "loss": 2.466, "step": 1747000 }, { "epoch": 8.66, "learning_rate": 4.5672708491253105e-05, "loss": 2.4531, "step": 1747500 }, { "epoch": 8.66, "learning_rate": 4.567146990482702e-05, "loss": 2.4706, "step": 1748000 }, { "epoch": 8.66, "learning_rate": 4.567023131840094e-05, "loss": 2.4771, "step": 1748500 }, { "epoch": 8.67, "learning_rate": 4.5668992731974855e-05, "loss": 2.4667, "step": 1749000 }, { "epoch": 8.67, "learning_rate": 4.5667754145548766e-05, "loss": 2.4672, "step": 1749500 }, { "epoch": 8.67, "learning_rate": 4.566651803629554e-05, "loss": 2.4299, "step": 1750000 }, { "epoch": 8.67, "learning_rate": 4.566527944986945e-05, "loss": 2.4456, "step": 1750500 }, { "epoch": 8.68, "learning_rate": 4.566404334061623e-05, "loss": 2.4839, "step": 1751000 }, { "epoch": 8.68, "learning_rate": 4.5662804754190144e-05, "loss": 2.453, "step": 1751500 }, { "epoch": 8.68, "learning_rate": 4.5661568644936906e-05, "loss": 2.4527, "step": 1752000 }, { "epoch": 8.68, "learning_rate": 4.566033005851082e-05, "loss": 2.4506, "step": 1752500 }, { "epoch": 8.68, "learning_rate": 4.565909147208474e-05, "loss": 2.4345, "step": 1753000 }, { "epoch": 8.69, "learning_rate": 4.565785288565866e-05, "loss": 2.4588, "step": 1753500 }, { "epoch": 8.69, "learning_rate": 4.5656614299232574e-05, "loss": 2.4735, "step": 1754000 }, { "epoch": 8.69, "learning_rate": 4.565537571280649e-05, "loss": 2.4679, "step": 1754500 }, { "epoch": 8.69, "learning_rate": 4.565413712638041e-05, "loss": 2.4841, "step": 1755000 }, { "epoch": 8.7, "learning_rate": 4.5652898539954324e-05, "loss": 2.4871, "step": 1755500 }, { "epoch": 8.7, "learning_rate": 4.565165995352824e-05, "loss": 2.4499, "step": 1756000 }, { "epoch": 8.7, "learning_rate": 4.565042136710216e-05, "loss": 2.4511, "step": 1756500 }, { "epoch": 8.7, "learning_rate": 4.564918278067607e-05, "loss": 2.4915, "step": 1757000 }, { "epoch": 8.71, "learning_rate": 4.5647944194249985e-05, "loss": 2.4732, "step": 1757500 }, { "epoch": 8.71, "learning_rate": 4.56467056078239e-05, "loss": 2.4507, "step": 1758000 }, { "epoch": 8.71, "learning_rate": 4.564546702139782e-05, "loss": 2.4399, "step": 1758500 }, { "epoch": 8.71, "learning_rate": 4.5644228434971736e-05, "loss": 2.4765, "step": 1759000 }, { "epoch": 8.72, "learning_rate": 4.564299480289136e-05, "loss": 2.4684, "step": 1759500 }, { "epoch": 8.72, "learning_rate": 4.5641756216465274e-05, "loss": 2.4655, "step": 1760000 }, { "epoch": 8.72, "learning_rate": 4.564051763003919e-05, "loss": 2.4645, "step": 1760500 }, { "epoch": 8.72, "learning_rate": 4.563927904361311e-05, "loss": 2.4831, "step": 1761000 }, { "epoch": 8.73, "learning_rate": 4.5638040457187025e-05, "loss": 2.4761, "step": 1761500 }, { "epoch": 8.73, "learning_rate": 4.563680187076094e-05, "loss": 2.4824, "step": 1762000 }, { "epoch": 8.73, "learning_rate": 4.563556328433486e-05, "loss": 2.4461, "step": 1762500 }, { "epoch": 8.73, "learning_rate": 4.563432469790877e-05, "loss": 2.4469, "step": 1763000 }, { "epoch": 8.74, "learning_rate": 4.5633086111482685e-05, "loss": 2.4606, "step": 1763500 }, { "epoch": 8.74, "learning_rate": 4.56318475250566e-05, "loss": 2.4706, "step": 1764000 }, { "epoch": 8.74, "learning_rate": 4.563061141580338e-05, "loss": 2.4447, "step": 1764500 }, { "epoch": 8.74, "learning_rate": 4.562937530655014e-05, "loss": 2.4601, "step": 1765000 }, { "epoch": 8.75, "learning_rate": 4.562813672012406e-05, "loss": 2.4485, "step": 1765500 }, { "epoch": 8.75, "learning_rate": 4.5626898133697974e-05, "loss": 2.4624, "step": 1766000 }, { "epoch": 8.75, "learning_rate": 4.562565954727189e-05, "loss": 2.5026, "step": 1766500 }, { "epoch": 8.75, "learning_rate": 4.562442096084581e-05, "loss": 2.4555, "step": 1767000 }, { "epoch": 8.76, "learning_rate": 4.5623182374419725e-05, "loss": 2.4672, "step": 1767500 }, { "epoch": 8.76, "learning_rate": 4.562194378799364e-05, "loss": 2.4704, "step": 1768000 }, { "epoch": 8.76, "learning_rate": 4.5620712633086114e-05, "loss": 2.5025, "step": 1768500 }, { "epoch": 8.76, "learning_rate": 4.561947404666003e-05, "loss": 2.4781, "step": 1769000 }, { "epoch": 8.77, "learning_rate": 4.561823546023395e-05, "loss": 2.4675, "step": 1769500 }, { "epoch": 8.77, "learning_rate": 4.561699687380786e-05, "loss": 2.4574, "step": 1770000 }, { "epoch": 8.77, "learning_rate": 4.5615758287381775e-05, "loss": 2.4715, "step": 1770500 }, { "epoch": 8.77, "learning_rate": 4.561451970095569e-05, "loss": 2.4416, "step": 1771000 }, { "epoch": 8.78, "learning_rate": 4.561328111452961e-05, "loss": 2.4696, "step": 1771500 }, { "epoch": 8.78, "learning_rate": 4.5612042528103526e-05, "loss": 2.4609, "step": 1772000 }, { "epoch": 8.78, "learning_rate": 4.561080394167744e-05, "loss": 2.4613, "step": 1772500 }, { "epoch": 8.78, "learning_rate": 4.560956535525136e-05, "loss": 2.4924, "step": 1773000 }, { "epoch": 8.79, "learning_rate": 4.560832676882528e-05, "loss": 2.4574, "step": 1773500 }, { "epoch": 8.79, "learning_rate": 4.5607088182399194e-05, "loss": 2.479, "step": 1774000 }, { "epoch": 8.79, "learning_rate": 4.560584959597311e-05, "loss": 2.4495, "step": 1774500 }, { "epoch": 8.79, "learning_rate": 4.560461100954703e-05, "loss": 2.4675, "step": 1775000 }, { "epoch": 8.8, "learning_rate": 4.5603372423120944e-05, "loss": 2.472, "step": 1775500 }, { "epoch": 8.8, "learning_rate": 4.560213631386771e-05, "loss": 2.4506, "step": 1776000 }, { "epoch": 8.8, "learning_rate": 4.560089772744163e-05, "loss": 2.453, "step": 1776500 }, { "epoch": 8.8, "learning_rate": 4.559965914101555e-05, "loss": 2.4638, "step": 1777000 }, { "epoch": 8.81, "learning_rate": 4.559842055458946e-05, "loss": 2.4947, "step": 1777500 }, { "epoch": 8.81, "learning_rate": 4.5597181968163374e-05, "loss": 2.4435, "step": 1778000 }, { "epoch": 8.81, "learning_rate": 4.559594338173729e-05, "loss": 2.4739, "step": 1778500 }, { "epoch": 8.81, "learning_rate": 4.559470727248406e-05, "loss": 2.435, "step": 1779000 }, { "epoch": 8.82, "learning_rate": 4.559346868605798e-05, "loss": 2.4844, "step": 1779500 }, { "epoch": 8.82, "learning_rate": 4.5592230099631894e-05, "loss": 2.4432, "step": 1780000 }, { "epoch": 8.82, "learning_rate": 4.559099151320581e-05, "loss": 2.4639, "step": 1780500 }, { "epoch": 8.82, "learning_rate": 4.558975540395258e-05, "loss": 2.4533, "step": 1781000 }, { "epoch": 8.83, "learning_rate": 4.558851929469935e-05, "loss": 2.4545, "step": 1781500 }, { "epoch": 8.83, "learning_rate": 4.5587280708273265e-05, "loss": 2.4481, "step": 1782000 }, { "epoch": 8.83, "learning_rate": 4.5586042121847175e-05, "loss": 2.4613, "step": 1782500 }, { "epoch": 8.83, "learning_rate": 4.558480353542109e-05, "loss": 2.4775, "step": 1783000 }, { "epoch": 8.84, "learning_rate": 4.558356494899501e-05, "loss": 2.4433, "step": 1783500 }, { "epoch": 8.84, "learning_rate": 4.5582326362568926e-05, "loss": 2.4653, "step": 1784000 }, { "epoch": 8.84, "learning_rate": 4.558108777614284e-05, "loss": 2.4877, "step": 1784500 }, { "epoch": 8.84, "learning_rate": 4.557984918971676e-05, "loss": 2.4707, "step": 1785000 }, { "epoch": 8.85, "learning_rate": 4.557861060329068e-05, "loss": 2.4879, "step": 1785500 }, { "epoch": 8.85, "learning_rate": 4.5577372016864594e-05, "loss": 2.4454, "step": 1786000 }, { "epoch": 8.85, "learning_rate": 4.557613343043851e-05, "loss": 2.458, "step": 1786500 }, { "epoch": 8.85, "learning_rate": 4.557489732118528e-05, "loss": 2.4683, "step": 1787000 }, { "epoch": 8.86, "learning_rate": 4.5573658734759196e-05, "loss": 2.4623, "step": 1787500 }, { "epoch": 8.86, "learning_rate": 4.5572420148333113e-05, "loss": 2.4821, "step": 1788000 }, { "epoch": 8.86, "learning_rate": 4.557118156190703e-05, "loss": 2.456, "step": 1788500 }, { "epoch": 8.86, "learning_rate": 4.556994297548095e-05, "loss": 2.4476, "step": 1789000 }, { "epoch": 8.87, "learning_rate": 4.5568704389054864e-05, "loss": 2.4679, "step": 1789500 }, { "epoch": 8.87, "learning_rate": 4.556746580262878e-05, "loss": 2.4734, "step": 1790000 }, { "epoch": 8.87, "learning_rate": 4.55662272162027e-05, "loss": 2.4734, "step": 1790500 }, { "epoch": 8.87, "learning_rate": 4.556498862977661e-05, "loss": 2.4945, "step": 1791000 }, { "epoch": 8.88, "learning_rate": 4.5563750043350525e-05, "loss": 2.4307, "step": 1791500 }, { "epoch": 8.88, "learning_rate": 4.5562513934097294e-05, "loss": 2.4534, "step": 1792000 }, { "epoch": 8.88, "learning_rate": 4.556127534767121e-05, "loss": 2.4577, "step": 1792500 }, { "epoch": 8.88, "learning_rate": 4.556003676124513e-05, "loss": 2.4634, "step": 1793000 }, { "epoch": 8.89, "learning_rate": 4.5558798174819045e-05, "loss": 2.4808, "step": 1793500 }, { "epoch": 8.89, "learning_rate": 4.555755958839296e-05, "loss": 2.4439, "step": 1794000 }, { "epoch": 8.89, "learning_rate": 4.555632100196688e-05, "loss": 2.4453, "step": 1794500 }, { "epoch": 8.89, "learning_rate": 4.555508489271365e-05, "loss": 2.442, "step": 1795000 }, { "epoch": 8.9, "learning_rate": 4.5553846306287564e-05, "loss": 2.4453, "step": 1795500 }, { "epoch": 8.9, "learning_rate": 4.555260771986148e-05, "loss": 2.4428, "step": 1796000 }, { "epoch": 8.9, "learning_rate": 4.555137161060824e-05, "loss": 2.48, "step": 1796500 }, { "epoch": 8.9, "learning_rate": 4.555013302418216e-05, "loss": 2.4658, "step": 1797000 }, { "epoch": 8.91, "learning_rate": 4.554889443775608e-05, "loss": 2.4606, "step": 1797500 }, { "epoch": 8.91, "learning_rate": 4.5547655851329994e-05, "loss": 2.449, "step": 1798000 }, { "epoch": 8.91, "learning_rate": 4.554641726490391e-05, "loss": 2.4647, "step": 1798500 }, { "epoch": 8.91, "learning_rate": 4.554517867847783e-05, "loss": 2.4644, "step": 1799000 }, { "epoch": 8.92, "learning_rate": 4.5543940092051745e-05, "loss": 2.4737, "step": 1799500 }, { "epoch": 8.92, "learning_rate": 4.554270150562566e-05, "loss": 2.491, "step": 1800000 }, { "epoch": 8.92, "learning_rate": 4.554146291919958e-05, "loss": 2.4842, "step": 1800500 }, { "epoch": 8.92, "learning_rate": 4.55402292871192e-05, "loss": 2.4706, "step": 1801000 }, { "epoch": 8.93, "learning_rate": 4.553899070069311e-05, "loss": 2.4853, "step": 1801500 }, { "epoch": 8.93, "learning_rate": 4.5537752114267026e-05, "loss": 2.4699, "step": 1802000 }, { "epoch": 8.93, "learning_rate": 4.5536513527840943e-05, "loss": 2.4815, "step": 1802500 }, { "epoch": 8.93, "learning_rate": 4.553527494141486e-05, "loss": 2.458, "step": 1803000 }, { "epoch": 8.94, "learning_rate": 4.553403635498878e-05, "loss": 2.4418, "step": 1803500 }, { "epoch": 8.94, "learning_rate": 4.5532797768562694e-05, "loss": 2.4691, "step": 1804000 }, { "epoch": 8.94, "learning_rate": 4.553155918213661e-05, "loss": 2.4687, "step": 1804500 }, { "epoch": 8.94, "learning_rate": 4.553032059571053e-05, "loss": 2.4617, "step": 1805000 }, { "epoch": 8.95, "learning_rate": 4.55290844864573e-05, "loss": 2.491, "step": 1805500 }, { "epoch": 8.95, "learning_rate": 4.5527845900031214e-05, "loss": 2.4481, "step": 1806000 }, { "epoch": 8.95, "learning_rate": 4.552660731360513e-05, "loss": 2.447, "step": 1806500 }, { "epoch": 8.95, "learning_rate": 4.552536872717905e-05, "loss": 2.4697, "step": 1807000 }, { "epoch": 8.95, "learning_rate": 4.5524132617925816e-05, "loss": 2.4696, "step": 1807500 }, { "epoch": 8.96, "learning_rate": 4.5522894031499727e-05, "loss": 2.4883, "step": 1808000 }, { "epoch": 8.96, "learning_rate": 4.5521655445073644e-05, "loss": 2.429, "step": 1808500 }, { "epoch": 8.96, "learning_rate": 4.552041685864756e-05, "loss": 2.4636, "step": 1809000 }, { "epoch": 8.96, "learning_rate": 4.5519180749394336e-05, "loss": 2.4663, "step": 1809500 }, { "epoch": 8.97, "learning_rate": 4.5517944640141105e-05, "loss": 2.4526, "step": 1810000 }, { "epoch": 8.97, "learning_rate": 4.551670605371502e-05, "loss": 2.4824, "step": 1810500 }, { "epoch": 8.97, "learning_rate": 4.551546746728894e-05, "loss": 2.4671, "step": 1811000 }, { "epoch": 8.97, "learning_rate": 4.5514228880862856e-05, "loss": 2.4767, "step": 1811500 }, { "epoch": 8.98, "learning_rate": 4.551299029443677e-05, "loss": 2.4661, "step": 1812000 }, { "epoch": 8.98, "learning_rate": 4.551175170801068e-05, "loss": 2.4633, "step": 1812500 }, { "epoch": 8.98, "learning_rate": 4.55105131215846e-05, "loss": 2.4379, "step": 1813000 }, { "epoch": 8.98, "learning_rate": 4.5509274535158517e-05, "loss": 2.4667, "step": 1813500 }, { "epoch": 8.99, "learning_rate": 4.5508035948732434e-05, "loss": 2.4808, "step": 1814000 }, { "epoch": 8.99, "learning_rate": 4.55067998394792e-05, "loss": 2.4708, "step": 1814500 }, { "epoch": 8.99, "learning_rate": 4.550556125305312e-05, "loss": 2.4644, "step": 1815000 }, { "epoch": 8.99, "learning_rate": 4.5504322666627036e-05, "loss": 2.4861, "step": 1815500 }, { "epoch": 9.0, "learning_rate": 4.550308408020095e-05, "loss": 2.4598, "step": 1816000 }, { "epoch": 9.0, "learning_rate": 4.550184549377487e-05, "loss": 2.4818, "step": 1816500 }, { "epoch": 9.0, "eval_accuracy": 0.6403843342409302, "eval_accuracy_mlm": 0.5934790457919371, "eval_accuracy_nsp": 0.8615189108837107, "eval_loss": 2.434657096862793, "eval_runtime": 145.7954, "eval_samples_per_second": 1748.745, "eval_steps_per_second": 72.869, "step": 1816587 }, { "epoch": 9.0, "learning_rate": 4.550060938452164e-05, "loss": 2.424, "step": 1817000 }, { "epoch": 9.0, "learning_rate": 4.5499370798095556e-05, "loss": 2.426, "step": 1817500 }, { "epoch": 9.01, "learning_rate": 4.549813221166947e-05, "loss": 2.4208, "step": 1818000 }, { "epoch": 9.01, "learning_rate": 4.549689362524339e-05, "loss": 2.4265, "step": 1818500 }, { "epoch": 9.01, "learning_rate": 4.549565751599015e-05, "loss": 2.4424, "step": 1819000 }, { "epoch": 9.01, "learning_rate": 4.549441892956407e-05, "loss": 2.4181, "step": 1819500 }, { "epoch": 9.02, "learning_rate": 4.5493180343137986e-05, "loss": 2.4305, "step": 1820000 }, { "epoch": 9.02, "learning_rate": 4.54919417567119e-05, "loss": 2.4542, "step": 1820500 }, { "epoch": 9.02, "learning_rate": 4.549070317028582e-05, "loss": 2.4495, "step": 1821000 }, { "epoch": 9.02, "learning_rate": 4.5489464583859736e-05, "loss": 2.4497, "step": 1821500 }, { "epoch": 9.03, "learning_rate": 4.5488228474606505e-05, "loss": 2.442, "step": 1822000 }, { "epoch": 9.03, "learning_rate": 4.548698988818042e-05, "loss": 2.4593, "step": 1822500 }, { "epoch": 9.03, "learning_rate": 4.548575130175434e-05, "loss": 2.4229, "step": 1823000 }, { "epoch": 9.03, "learning_rate": 4.5484512715328256e-05, "loss": 2.4115, "step": 1823500 }, { "epoch": 9.04, "learning_rate": 4.548327412890217e-05, "loss": 2.4466, "step": 1824000 }, { "epoch": 9.04, "learning_rate": 4.548203554247609e-05, "loss": 2.4517, "step": 1824500 }, { "epoch": 9.04, "learning_rate": 4.548079695605001e-05, "loss": 2.4276, "step": 1825000 }, { "epoch": 9.04, "learning_rate": 4.5479558369623924e-05, "loss": 2.4327, "step": 1825500 }, { "epoch": 9.05, "learning_rate": 4.5478319783197834e-05, "loss": 2.4248, "step": 1826000 }, { "epoch": 9.05, "learning_rate": 4.547708119677175e-05, "loss": 2.4232, "step": 1826500 }, { "epoch": 9.05, "learning_rate": 4.547584508751852e-05, "loss": 2.4336, "step": 1827000 }, { "epoch": 9.05, "learning_rate": 4.547460897826529e-05, "loss": 2.4489, "step": 1827500 }, { "epoch": 9.06, "learning_rate": 4.5473370391839205e-05, "loss": 2.4391, "step": 1828000 }, { "epoch": 9.06, "learning_rate": 4.5472134282585974e-05, "loss": 2.4429, "step": 1828500 }, { "epoch": 9.06, "learning_rate": 4.5470895696159884e-05, "loss": 2.4328, "step": 1829000 }, { "epoch": 9.06, "learning_rate": 4.54696571097338e-05, "loss": 2.4022, "step": 1829500 }, { "epoch": 9.07, "learning_rate": 4.546841852330772e-05, "loss": 2.4167, "step": 1830000 }, { "epoch": 9.07, "learning_rate": 4.5467179936881635e-05, "loss": 2.4667, "step": 1830500 }, { "epoch": 9.07, "learning_rate": 4.546594135045555e-05, "loss": 2.4303, "step": 1831000 }, { "epoch": 9.07, "learning_rate": 4.546470276402947e-05, "loss": 2.4245, "step": 1831500 }, { "epoch": 9.08, "learning_rate": 4.5463464177603386e-05, "loss": 2.4439, "step": 1832000 }, { "epoch": 9.08, "learning_rate": 4.54622255911773e-05, "loss": 2.447, "step": 1832500 }, { "epoch": 9.08, "learning_rate": 4.546098700475122e-05, "loss": 2.4434, "step": 1833000 }, { "epoch": 9.08, "learning_rate": 4.5459748418325137e-05, "loss": 2.4311, "step": 1833500 }, { "epoch": 9.09, "learning_rate": 4.5458509831899053e-05, "loss": 2.4513, "step": 1834000 }, { "epoch": 9.09, "learning_rate": 4.545727372264582e-05, "loss": 2.4607, "step": 1834500 }, { "epoch": 9.09, "learning_rate": 4.545603513621974e-05, "loss": 2.4291, "step": 1835000 }, { "epoch": 9.09, "learning_rate": 4.5454796549793656e-05, "loss": 2.4439, "step": 1835500 }, { "epoch": 9.1, "learning_rate": 4.545355796336757e-05, "loss": 2.4741, "step": 1836000 }, { "epoch": 9.1, "learning_rate": 4.5452321854114335e-05, "loss": 2.4591, "step": 1836500 }, { "epoch": 9.1, "learning_rate": 4.545108326768825e-05, "loss": 2.4535, "step": 1837000 }, { "epoch": 9.1, "learning_rate": 4.544984468126217e-05, "loss": 2.4464, "step": 1837500 }, { "epoch": 9.11, "learning_rate": 4.5448606094836086e-05, "loss": 2.4405, "step": 1838000 }, { "epoch": 9.11, "learning_rate": 4.544736750841e-05, "loss": 2.4423, "step": 1838500 }, { "epoch": 9.11, "learning_rate": 4.544612892198392e-05, "loss": 2.4383, "step": 1839000 }, { "epoch": 9.11, "learning_rate": 4.544489033555784e-05, "loss": 2.4421, "step": 1839500 }, { "epoch": 9.12, "learning_rate": 4.5443651749131754e-05, "loss": 2.4476, "step": 1840000 }, { "epoch": 9.12, "learning_rate": 4.544241316270567e-05, "loss": 2.4121, "step": 1840500 }, { "epoch": 9.12, "learning_rate": 4.544117457627959e-05, "loss": 2.4078, "step": 1841000 }, { "epoch": 9.12, "learning_rate": 4.5439935989853504e-05, "loss": 2.4349, "step": 1841500 }, { "epoch": 9.13, "learning_rate": 4.543869988060027e-05, "loss": 2.4562, "step": 1842000 }, { "epoch": 9.13, "learning_rate": 4.543746129417419e-05, "loss": 2.404, "step": 1842500 }, { "epoch": 9.13, "learning_rate": 4.543622270774811e-05, "loss": 2.4584, "step": 1843000 }, { "epoch": 9.13, "learning_rate": 4.543498659849487e-05, "loss": 2.4404, "step": 1843500 }, { "epoch": 9.14, "learning_rate": 4.5433748012068786e-05, "loss": 2.4358, "step": 1844000 }, { "epoch": 9.14, "learning_rate": 4.54325094256427e-05, "loss": 2.4061, "step": 1844500 }, { "epoch": 9.14, "learning_rate": 4.543127083921662e-05, "loss": 2.4233, "step": 1845000 }, { "epoch": 9.14, "learning_rate": 4.543003472996339e-05, "loss": 2.4524, "step": 1845500 }, { "epoch": 9.15, "learning_rate": 4.542879862071016e-05, "loss": 2.4392, "step": 1846000 }, { "epoch": 9.15, "learning_rate": 4.5427562511456926e-05, "loss": 2.4372, "step": 1846500 }, { "epoch": 9.15, "learning_rate": 4.542632392503084e-05, "loss": 2.4474, "step": 1847000 }, { "epoch": 9.15, "learning_rate": 4.542508533860476e-05, "loss": 2.4468, "step": 1847500 }, { "epoch": 9.16, "learning_rate": 4.542384675217868e-05, "loss": 2.4432, "step": 1848000 }, { "epoch": 9.16, "learning_rate": 4.5422608165752594e-05, "loss": 2.4501, "step": 1848500 }, { "epoch": 9.16, "learning_rate": 4.5421369579326504e-05, "loss": 2.4495, "step": 1849000 }, { "epoch": 9.16, "learning_rate": 4.542013099290042e-05, "loss": 2.4418, "step": 1849500 }, { "epoch": 9.17, "learning_rate": 4.541889240647434e-05, "loss": 2.4484, "step": 1850000 }, { "epoch": 9.17, "learning_rate": 4.5417653820048255e-05, "loss": 2.4385, "step": 1850500 }, { "epoch": 9.17, "learning_rate": 4.541641523362217e-05, "loss": 2.4258, "step": 1851000 }, { "epoch": 9.17, "learning_rate": 4.541517664719609e-05, "loss": 2.4504, "step": 1851500 }, { "epoch": 9.18, "learning_rate": 4.5413938060770006e-05, "loss": 2.458, "step": 1852000 }, { "epoch": 9.18, "learning_rate": 4.541269947434392e-05, "loss": 2.4314, "step": 1852500 }, { "epoch": 9.18, "learning_rate": 4.541146088791784e-05, "loss": 2.436, "step": 1853000 }, { "epoch": 9.18, "learning_rate": 4.5410222301491757e-05, "loss": 2.426, "step": 1853500 }, { "epoch": 9.19, "learning_rate": 4.5408983715065673e-05, "loss": 2.435, "step": 1854000 }, { "epoch": 9.19, "learning_rate": 4.540774512863959e-05, "loss": 2.4389, "step": 1854500 }, { "epoch": 9.19, "learning_rate": 4.540650901938636e-05, "loss": 2.461, "step": 1855000 }, { "epoch": 9.19, "learning_rate": 4.5405270432960276e-05, "loss": 2.4472, "step": 1855500 }, { "epoch": 9.2, "learning_rate": 4.540403432370704e-05, "loss": 2.4388, "step": 1856000 }, { "epoch": 9.2, "learning_rate": 4.5402795737280955e-05, "loss": 2.4427, "step": 1856500 }, { "epoch": 9.2, "learning_rate": 4.540155715085487e-05, "loss": 2.4563, "step": 1857000 }, { "epoch": 9.2, "learning_rate": 4.540032104160165e-05, "loss": 2.4632, "step": 1857500 }, { "epoch": 9.21, "learning_rate": 4.539908245517556e-05, "loss": 2.4482, "step": 1858000 }, { "epoch": 9.21, "learning_rate": 4.5397843868749475e-05, "loss": 2.4332, "step": 1858500 }, { "epoch": 9.21, "learning_rate": 4.539660528232339e-05, "loss": 2.4423, "step": 1859000 }, { "epoch": 9.21, "learning_rate": 4.539536669589731e-05, "loss": 2.4584, "step": 1859500 }, { "epoch": 9.22, "learning_rate": 4.5394128109471225e-05, "loss": 2.4123, "step": 1860000 }, { "epoch": 9.22, "learning_rate": 4.539288952304514e-05, "loss": 2.4395, "step": 1860500 }, { "epoch": 9.22, "learning_rate": 4.539165093661906e-05, "loss": 2.4833, "step": 1861000 }, { "epoch": 9.22, "learning_rate": 4.5390412350192976e-05, "loss": 2.4595, "step": 1861500 }, { "epoch": 9.22, "learning_rate": 4.538917376376689e-05, "loss": 2.4364, "step": 1862000 }, { "epoch": 9.23, "learning_rate": 4.538793517734081e-05, "loss": 2.4205, "step": 1862500 }, { "epoch": 9.23, "learning_rate": 4.538669659091472e-05, "loss": 2.4233, "step": 1863000 }, { "epoch": 9.23, "learning_rate": 4.538545800448864e-05, "loss": 2.4394, "step": 1863500 }, { "epoch": 9.23, "learning_rate": 4.5384219418062554e-05, "loss": 2.4386, "step": 1864000 }, { "epoch": 9.24, "learning_rate": 4.538298330880932e-05, "loss": 2.4466, "step": 1864500 }, { "epoch": 9.24, "learning_rate": 4.538174472238324e-05, "loss": 2.4483, "step": 1865000 }, { "epoch": 9.24, "learning_rate": 4.538050613595716e-05, "loss": 2.454, "step": 1865500 }, { "epoch": 9.24, "learning_rate": 4.5379267549531074e-05, "loss": 2.4675, "step": 1866000 }, { "epoch": 9.25, "learning_rate": 4.537803144027784e-05, "loss": 2.4575, "step": 1866500 }, { "epoch": 9.25, "learning_rate": 4.537679285385176e-05, "loss": 2.4484, "step": 1867000 }, { "epoch": 9.25, "learning_rate": 4.5375554267425676e-05, "loss": 2.4436, "step": 1867500 }, { "epoch": 9.25, "learning_rate": 4.537431568099959e-05, "loss": 2.4271, "step": 1868000 }, { "epoch": 9.26, "learning_rate": 4.537307709457351e-05, "loss": 2.4645, "step": 1868500 }, { "epoch": 9.26, "learning_rate": 4.537183850814743e-05, "loss": 2.4648, "step": 1869000 }, { "epoch": 9.26, "learning_rate": 4.537059992172134e-05, "loss": 2.4415, "step": 1869500 }, { "epoch": 9.26, "learning_rate": 4.5369363812468106e-05, "loss": 2.4357, "step": 1870000 }, { "epoch": 9.27, "learning_rate": 4.536812522604202e-05, "loss": 2.4403, "step": 1870500 }, { "epoch": 9.27, "learning_rate": 4.536688663961594e-05, "loss": 2.4475, "step": 1871000 }, { "epoch": 9.27, "learning_rate": 4.536564805318986e-05, "loss": 2.4376, "step": 1871500 }, { "epoch": 9.27, "learning_rate": 4.5364411943936626e-05, "loss": 2.4186, "step": 1872000 }, { "epoch": 9.28, "learning_rate": 4.536317335751054e-05, "loss": 2.4547, "step": 1872500 }, { "epoch": 9.28, "learning_rate": 4.536193477108446e-05, "loss": 2.4273, "step": 1873000 }, { "epoch": 9.28, "learning_rate": 4.5360696184658376e-05, "loss": 2.453, "step": 1873500 }, { "epoch": 9.28, "learning_rate": 4.5359457598232293e-05, "loss": 2.4315, "step": 1874000 }, { "epoch": 9.29, "learning_rate": 4.535821901180621e-05, "loss": 2.4556, "step": 1874500 }, { "epoch": 9.29, "learning_rate": 4.535698042538013e-05, "loss": 2.4386, "step": 1875000 }, { "epoch": 9.29, "learning_rate": 4.5355741838954044e-05, "loss": 2.4383, "step": 1875500 }, { "epoch": 9.29, "learning_rate": 4.535450325252796e-05, "loss": 2.4127, "step": 1876000 }, { "epoch": 9.3, "learning_rate": 4.535326466610187e-05, "loss": 2.4339, "step": 1876500 }, { "epoch": 9.3, "learning_rate": 4.535202607967579e-05, "loss": 2.4542, "step": 1877000 }, { "epoch": 9.3, "learning_rate": 4.5350787493249705e-05, "loss": 2.4275, "step": 1877500 }, { "epoch": 9.3, "learning_rate": 4.5349551383996474e-05, "loss": 2.4308, "step": 1878000 }, { "epoch": 9.31, "learning_rate": 4.534831279757039e-05, "loss": 2.4147, "step": 1878500 }, { "epoch": 9.31, "learning_rate": 4.534707421114431e-05, "loss": 2.4667, "step": 1879000 }, { "epoch": 9.31, "learning_rate": 4.5345835624718225e-05, "loss": 2.4236, "step": 1879500 }, { "epoch": 9.31, "learning_rate": 4.534459703829214e-05, "loss": 2.4343, "step": 1880000 }, { "epoch": 9.32, "learning_rate": 4.534335845186606e-05, "loss": 2.4451, "step": 1880500 }, { "epoch": 9.32, "learning_rate": 4.5342119865439975e-05, "loss": 2.4541, "step": 1881000 }, { "epoch": 9.32, "learning_rate": 4.534088127901389e-05, "loss": 2.4212, "step": 1881500 }, { "epoch": 9.32, "learning_rate": 4.533964516976066e-05, "loss": 2.4321, "step": 1882000 }, { "epoch": 9.33, "learning_rate": 4.533840658333458e-05, "loss": 2.4481, "step": 1882500 }, { "epoch": 9.33, "learning_rate": 4.533716799690849e-05, "loss": 2.4062, "step": 1883000 }, { "epoch": 9.33, "learning_rate": 4.5335929410482405e-05, "loss": 2.4301, "step": 1883500 }, { "epoch": 9.33, "learning_rate": 4.533469082405632e-05, "loss": 2.4163, "step": 1884000 }, { "epoch": 9.34, "learning_rate": 4.533345471480309e-05, "loss": 2.4449, "step": 1884500 }, { "epoch": 9.34, "learning_rate": 4.533221612837701e-05, "loss": 2.4703, "step": 1885000 }, { "epoch": 9.34, "learning_rate": 4.5330977541950925e-05, "loss": 2.4336, "step": 1885500 }, { "epoch": 9.34, "learning_rate": 4.5329741432697694e-05, "loss": 2.4546, "step": 1886000 }, { "epoch": 9.35, "learning_rate": 4.532850284627161e-05, "loss": 2.4623, "step": 1886500 }, { "epoch": 9.35, "learning_rate": 4.532726425984553e-05, "loss": 2.4571, "step": 1887000 }, { "epoch": 9.35, "learning_rate": 4.5326025673419444e-05, "loss": 2.4461, "step": 1887500 }, { "epoch": 9.35, "learning_rate": 4.532478708699336e-05, "loss": 2.4349, "step": 1888000 }, { "epoch": 9.36, "learning_rate": 4.532354850056728e-05, "loss": 2.4261, "step": 1888500 }, { "epoch": 9.36, "learning_rate": 4.5322309914141195e-05, "loss": 2.4164, "step": 1889000 }, { "epoch": 9.36, "learning_rate": 4.532107132771511e-05, "loss": 2.462, "step": 1889500 }, { "epoch": 9.36, "learning_rate": 4.5319835218461874e-05, "loss": 2.4433, "step": 1890000 }, { "epoch": 9.37, "learning_rate": 4.531859663203579e-05, "loss": 2.4574, "step": 1890500 }, { "epoch": 9.37, "learning_rate": 4.531735804560971e-05, "loss": 2.4477, "step": 1891000 }, { "epoch": 9.37, "learning_rate": 4.5316119459183625e-05, "loss": 2.4444, "step": 1891500 }, { "epoch": 9.37, "learning_rate": 4.531488087275754e-05, "loss": 2.4569, "step": 1892000 }, { "epoch": 9.38, "learning_rate": 4.531364476350431e-05, "loss": 2.4576, "step": 1892500 }, { "epoch": 9.38, "learning_rate": 4.531240617707823e-05, "loss": 2.4366, "step": 1893000 }, { "epoch": 9.38, "learning_rate": 4.5311167590652145e-05, "loss": 2.455, "step": 1893500 }, { "epoch": 9.38, "learning_rate": 4.530992900422606e-05, "loss": 2.4634, "step": 1894000 }, { "epoch": 9.39, "learning_rate": 4.530869041779998e-05, "loss": 2.4578, "step": 1894500 }, { "epoch": 9.39, "learning_rate": 4.530745430854674e-05, "loss": 2.4391, "step": 1895000 }, { "epoch": 9.39, "learning_rate": 4.530621572212066e-05, "loss": 2.4376, "step": 1895500 }, { "epoch": 9.39, "learning_rate": 4.5304977135694574e-05, "loss": 2.4466, "step": 1896000 }, { "epoch": 9.4, "learning_rate": 4.530373854926849e-05, "loss": 2.429, "step": 1896500 }, { "epoch": 9.4, "learning_rate": 4.530249996284241e-05, "loss": 2.4456, "step": 1897000 }, { "epoch": 9.4, "learning_rate": 4.5301261376416325e-05, "loss": 2.4541, "step": 1897500 }, { "epoch": 9.4, "learning_rate": 4.530002278999024e-05, "loss": 2.4651, "step": 1898000 }, { "epoch": 9.41, "learning_rate": 4.529878668073701e-05, "loss": 2.4453, "step": 1898500 }, { "epoch": 9.41, "learning_rate": 4.529754809431093e-05, "loss": 2.4451, "step": 1899000 }, { "epoch": 9.41, "learning_rate": 4.5296309507884845e-05, "loss": 2.4266, "step": 1899500 }, { "epoch": 9.41, "learning_rate": 4.529507092145876e-05, "loss": 2.455, "step": 1900000 }, { "epoch": 9.42, "learning_rate": 4.529383233503268e-05, "loss": 2.4657, "step": 1900500 }, { "epoch": 9.42, "learning_rate": 4.5292593748606595e-05, "loss": 2.4469, "step": 1901000 }, { "epoch": 9.42, "learning_rate": 4.529135516218051e-05, "loss": 2.4411, "step": 1901500 }, { "epoch": 9.42, "learning_rate": 4.5290121530100126e-05, "loss": 2.4473, "step": 1902000 }, { "epoch": 9.43, "learning_rate": 4.528888294367404e-05, "loss": 2.4457, "step": 1902500 }, { "epoch": 9.43, "learning_rate": 4.528764435724796e-05, "loss": 2.4583, "step": 1903000 }, { "epoch": 9.43, "learning_rate": 4.528640577082188e-05, "loss": 2.4524, "step": 1903500 }, { "epoch": 9.43, "learning_rate": 4.5285167184395794e-05, "loss": 2.4532, "step": 1904000 }, { "epoch": 9.44, "learning_rate": 4.528392859796971e-05, "loss": 2.4809, "step": 1904500 }, { "epoch": 9.44, "learning_rate": 4.528269001154363e-05, "loss": 2.4212, "step": 1905000 }, { "epoch": 9.44, "learning_rate": 4.5281451425117545e-05, "loss": 2.4673, "step": 1905500 }, { "epoch": 9.44, "learning_rate": 4.528021283869146e-05, "loss": 2.4051, "step": 1906000 }, { "epoch": 9.45, "learning_rate": 4.527897425226538e-05, "loss": 2.4195, "step": 1906500 }, { "epoch": 9.45, "learning_rate": 4.5277735665839296e-05, "loss": 2.4623, "step": 1907000 }, { "epoch": 9.45, "learning_rate": 4.527649955658606e-05, "loss": 2.4388, "step": 1907500 }, { "epoch": 9.45, "learning_rate": 4.5275260970159975e-05, "loss": 2.4585, "step": 1908000 }, { "epoch": 9.46, "learning_rate": 4.527402238373389e-05, "loss": 2.4361, "step": 1908500 }, { "epoch": 9.46, "learning_rate": 4.527278379730781e-05, "loss": 2.442, "step": 1909000 }, { "epoch": 9.46, "learning_rate": 4.5271545210881725e-05, "loss": 2.4635, "step": 1909500 }, { "epoch": 9.46, "learning_rate": 4.527030662445564e-05, "loss": 2.4437, "step": 1910000 }, { "epoch": 9.47, "learning_rate": 4.526906803802956e-05, "loss": 2.4507, "step": 1910500 }, { "epoch": 9.47, "learning_rate": 4.5267829451603476e-05, "loss": 2.4323, "step": 1911000 }, { "epoch": 9.47, "learning_rate": 4.526659086517739e-05, "loss": 2.45, "step": 1911500 }, { "epoch": 9.47, "learning_rate": 4.526535227875131e-05, "loss": 2.4588, "step": 1912000 }, { "epoch": 9.48, "learning_rate": 4.526411369232523e-05, "loss": 2.4592, "step": 1912500 }, { "epoch": 9.48, "learning_rate": 4.5262875105899144e-05, "loss": 2.4493, "step": 1913000 }, { "epoch": 9.48, "learning_rate": 4.526163899664591e-05, "loss": 2.4565, "step": 1913500 }, { "epoch": 9.48, "learning_rate": 4.5260402887392675e-05, "loss": 2.4562, "step": 1914000 }, { "epoch": 9.49, "learning_rate": 4.525916430096659e-05, "loss": 2.463, "step": 1914500 }, { "epoch": 9.49, "learning_rate": 4.525792819171336e-05, "loss": 2.4384, "step": 1915000 }, { "epoch": 9.49, "learning_rate": 4.525668960528728e-05, "loss": 2.429, "step": 1915500 }, { "epoch": 9.49, "learning_rate": 4.5255451018861194e-05, "loss": 2.4265, "step": 1916000 }, { "epoch": 9.5, "learning_rate": 4.525421243243511e-05, "loss": 2.457, "step": 1916500 }, { "epoch": 9.5, "learning_rate": 4.525297384600903e-05, "loss": 2.4622, "step": 1917000 }, { "epoch": 9.5, "learning_rate": 4.52517377367558e-05, "loss": 2.4689, "step": 1917500 }, { "epoch": 9.5, "learning_rate": 4.5250499150329714e-05, "loss": 2.4288, "step": 1918000 }, { "epoch": 9.5, "learning_rate": 4.524926304107648e-05, "loss": 2.454, "step": 1918500 }, { "epoch": 9.51, "learning_rate": 4.52480244546504e-05, "loss": 2.4701, "step": 1919000 }, { "epoch": 9.51, "learning_rate": 4.5246785868224317e-05, "loss": 2.4518, "step": 1919500 }, { "epoch": 9.51, "learning_rate": 4.5245547281798233e-05, "loss": 2.4624, "step": 1920000 }, { "epoch": 9.51, "learning_rate": 4.5244308695372144e-05, "loss": 2.4445, "step": 1920500 }, { "epoch": 9.52, "learning_rate": 4.524307010894606e-05, "loss": 2.4495, "step": 1921000 }, { "epoch": 9.52, "learning_rate": 4.524183152251998e-05, "loss": 2.4558, "step": 1921500 }, { "epoch": 9.52, "learning_rate": 4.5240592936093894e-05, "loss": 2.4547, "step": 1922000 }, { "epoch": 9.52, "learning_rate": 4.523935682684067e-05, "loss": 2.441, "step": 1922500 }, { "epoch": 9.53, "learning_rate": 4.523811824041459e-05, "loss": 2.4597, "step": 1923000 }, { "epoch": 9.53, "learning_rate": 4.5236879653988504e-05, "loss": 2.444, "step": 1923500 }, { "epoch": 9.53, "learning_rate": 4.523564602190812e-05, "loss": 2.4567, "step": 1924000 }, { "epoch": 9.53, "learning_rate": 4.5234407435482035e-05, "loss": 2.4353, "step": 1924500 }, { "epoch": 9.54, "learning_rate": 4.523316884905595e-05, "loss": 2.4716, "step": 1925000 }, { "epoch": 9.54, "learning_rate": 4.523193026262987e-05, "loss": 2.4343, "step": 1925500 }, { "epoch": 9.54, "learning_rate": 4.5230691676203785e-05, "loss": 2.4463, "step": 1926000 }, { "epoch": 9.54, "learning_rate": 4.52294530897777e-05, "loss": 2.438, "step": 1926500 }, { "epoch": 9.55, "learning_rate": 4.522821450335162e-05, "loss": 2.4469, "step": 1927000 }, { "epoch": 9.55, "learning_rate": 4.5226975916925536e-05, "loss": 2.4329, "step": 1927500 }, { "epoch": 9.55, "learning_rate": 4.522573733049945e-05, "loss": 2.4451, "step": 1928000 }, { "epoch": 9.55, "learning_rate": 4.522449874407337e-05, "loss": 2.4711, "step": 1928500 }, { "epoch": 9.56, "learning_rate": 4.522326015764729e-05, "loss": 2.4351, "step": 1929000 }, { "epoch": 9.56, "learning_rate": 4.5222021571221204e-05, "loss": 2.4218, "step": 1929500 }, { "epoch": 9.56, "learning_rate": 4.5220782984795114e-05, "loss": 2.437, "step": 1930000 }, { "epoch": 9.56, "learning_rate": 4.521954439836903e-05, "loss": 2.4446, "step": 1930500 }, { "epoch": 9.57, "learning_rate": 4.52183082891158e-05, "loss": 2.4568, "step": 1931000 }, { "epoch": 9.57, "learning_rate": 4.521706970268972e-05, "loss": 2.4456, "step": 1931500 }, { "epoch": 9.57, "learning_rate": 4.5215831116263634e-05, "loss": 2.435, "step": 1932000 }, { "epoch": 9.57, "learning_rate": 4.521459252983755e-05, "loss": 2.4542, "step": 1932500 }, { "epoch": 9.58, "learning_rate": 4.521335394341146e-05, "loss": 2.4418, "step": 1933000 }, { "epoch": 9.58, "learning_rate": 4.521211535698538e-05, "loss": 2.4507, "step": 1933500 }, { "epoch": 9.58, "learning_rate": 4.5210876770559295e-05, "loss": 2.4544, "step": 1934000 }, { "epoch": 9.58, "learning_rate": 4.520963818413321e-05, "loss": 2.4652, "step": 1934500 }, { "epoch": 9.59, "learning_rate": 4.520840207487999e-05, "loss": 2.4531, "step": 1935000 }, { "epoch": 9.59, "learning_rate": 4.5207163488453904e-05, "loss": 2.4594, "step": 1935500 }, { "epoch": 9.59, "learning_rate": 4.520592490202782e-05, "loss": 2.453, "step": 1936000 }, { "epoch": 9.59, "learning_rate": 4.5204693747120294e-05, "loss": 2.4411, "step": 1936500 }, { "epoch": 9.6, "learning_rate": 4.5203455160694204e-05, "loss": 2.4588, "step": 1937000 }, { "epoch": 9.6, "learning_rate": 4.520221657426812e-05, "loss": 2.4649, "step": 1937500 }, { "epoch": 9.6, "learning_rate": 4.520097798784204e-05, "loss": 2.4384, "step": 1938000 }, { "epoch": 9.6, "learning_rate": 4.5199739401415955e-05, "loss": 2.4294, "step": 1938500 }, { "epoch": 9.61, "learning_rate": 4.519850081498987e-05, "loss": 2.4468, "step": 1939000 }, { "epoch": 9.61, "learning_rate": 4.519726222856379e-05, "loss": 2.451, "step": 1939500 }, { "epoch": 9.61, "learning_rate": 4.5196023642137705e-05, "loss": 2.4349, "step": 1940000 }, { "epoch": 9.61, "learning_rate": 4.519478505571162e-05, "loss": 2.439, "step": 1940500 }, { "epoch": 9.62, "learning_rate": 4.519354646928554e-05, "loss": 2.4513, "step": 1941000 }, { "epoch": 9.62, "learning_rate": 4.519230788285945e-05, "loss": 2.4451, "step": 1941500 }, { "epoch": 9.62, "learning_rate": 4.5191069296433366e-05, "loss": 2.4526, "step": 1942000 }, { "epoch": 9.62, "learning_rate": 4.518983071000728e-05, "loss": 2.4393, "step": 1942500 }, { "epoch": 9.63, "learning_rate": 4.51885921235812e-05, "loss": 2.4453, "step": 1943000 }, { "epoch": 9.63, "learning_rate": 4.518735353715512e-05, "loss": 2.4257, "step": 1943500 }, { "epoch": 9.63, "learning_rate": 4.5186114950729034e-05, "loss": 2.4565, "step": 1944000 }, { "epoch": 9.63, "learning_rate": 4.51848788414758e-05, "loss": 2.4816, "step": 1944500 }, { "epoch": 9.64, "learning_rate": 4.518364273222257e-05, "loss": 2.4714, "step": 1945000 }, { "epoch": 9.64, "learning_rate": 4.518240662296934e-05, "loss": 2.4386, "step": 1945500 }, { "epoch": 9.64, "learning_rate": 4.518116803654326e-05, "loss": 2.4524, "step": 1946000 }, { "epoch": 9.64, "learning_rate": 4.517992945011717e-05, "loss": 2.4481, "step": 1946500 }, { "epoch": 9.65, "learning_rate": 4.5178690863691084e-05, "loss": 2.4431, "step": 1947000 }, { "epoch": 9.65, "learning_rate": 4.5177452277265e-05, "loss": 2.4608, "step": 1947500 }, { "epoch": 9.65, "learning_rate": 4.517621369083892e-05, "loss": 2.4459, "step": 1948000 }, { "epoch": 9.65, "learning_rate": 4.5174977581585694e-05, "loss": 2.4347, "step": 1948500 }, { "epoch": 9.66, "learning_rate": 4.517373899515961e-05, "loss": 2.4307, "step": 1949000 }, { "epoch": 9.66, "learning_rate": 4.517250040873352e-05, "loss": 2.4474, "step": 1949500 }, { "epoch": 9.66, "learning_rate": 4.517126182230744e-05, "loss": 2.4829, "step": 1950000 }, { "epoch": 9.66, "learning_rate": 4.5170023235881355e-05, "loss": 2.4657, "step": 1950500 }, { "epoch": 9.67, "learning_rate": 4.516878464945527e-05, "loss": 2.4238, "step": 1951000 }, { "epoch": 9.67, "learning_rate": 4.516754854020204e-05, "loss": 2.4664, "step": 1951500 }, { "epoch": 9.67, "learning_rate": 4.516630995377596e-05, "loss": 2.4432, "step": 1952000 }, { "epoch": 9.67, "learning_rate": 4.516507136734987e-05, "loss": 2.4409, "step": 1952500 }, { "epoch": 9.68, "learning_rate": 4.5163832780923785e-05, "loss": 2.4568, "step": 1953000 }, { "epoch": 9.68, "learning_rate": 4.51625941944977e-05, "loss": 2.4436, "step": 1953500 }, { "epoch": 9.68, "learning_rate": 4.516135560807162e-05, "loss": 2.4369, "step": 1954000 }, { "epoch": 9.68, "learning_rate": 4.5160117021645535e-05, "loss": 2.4344, "step": 1954500 }, { "epoch": 9.69, "learning_rate": 4.515887843521945e-05, "loss": 2.4531, "step": 1955000 }, { "epoch": 9.69, "learning_rate": 4.515764232596622e-05, "loss": 2.4491, "step": 1955500 }, { "epoch": 9.69, "learning_rate": 4.515640373954014e-05, "loss": 2.4332, "step": 1956000 }, { "epoch": 9.69, "learning_rate": 4.5155165153114055e-05, "loss": 2.4424, "step": 1956500 }, { "epoch": 9.7, "learning_rate": 4.515392656668797e-05, "loss": 2.4608, "step": 1957000 }, { "epoch": 9.7, "learning_rate": 4.515268798026189e-05, "loss": 2.444, "step": 1957500 }, { "epoch": 9.7, "learning_rate": 4.5151449393835806e-05, "loss": 2.4751, "step": 1958000 }, { "epoch": 9.7, "learning_rate": 4.5150213284582574e-05, "loss": 2.4599, "step": 1958500 }, { "epoch": 9.71, "learning_rate": 4.5148974698156485e-05, "loss": 2.4676, "step": 1959000 }, { "epoch": 9.71, "learning_rate": 4.51477361117304e-05, "loss": 2.4294, "step": 1959500 }, { "epoch": 9.71, "learning_rate": 4.514649752530432e-05, "loss": 2.4494, "step": 1960000 }, { "epoch": 9.71, "learning_rate": 4.5145263893223946e-05, "loss": 2.4253, "step": 1960500 }, { "epoch": 9.72, "learning_rate": 4.514402530679786e-05, "loss": 2.4498, "step": 1961000 }, { "epoch": 9.72, "learning_rate": 4.514278672037178e-05, "loss": 2.4453, "step": 1961500 }, { "epoch": 9.72, "learning_rate": 4.51415481339457e-05, "loss": 2.4619, "step": 1962000 }, { "epoch": 9.72, "learning_rate": 4.514030954751961e-05, "loss": 2.4357, "step": 1962500 }, { "epoch": 9.73, "learning_rate": 4.5139070961093524e-05, "loss": 2.4439, "step": 1963000 }, { "epoch": 9.73, "learning_rate": 4.513783485184029e-05, "loss": 2.4647, "step": 1963500 }, { "epoch": 9.73, "learning_rate": 4.513659626541421e-05, "loss": 2.4701, "step": 1964000 }, { "epoch": 9.73, "learning_rate": 4.5135357678988126e-05, "loss": 2.4486, "step": 1964500 }, { "epoch": 9.74, "learning_rate": 4.5134119092562043e-05, "loss": 2.4749, "step": 1965000 }, { "epoch": 9.74, "learning_rate": 4.513288050613596e-05, "loss": 2.4462, "step": 1965500 }, { "epoch": 9.74, "learning_rate": 4.513164191970988e-05, "loss": 2.452, "step": 1966000 }, { "epoch": 9.74, "learning_rate": 4.5130403333283794e-05, "loss": 2.4414, "step": 1966500 }, { "epoch": 9.75, "learning_rate": 4.512916474685771e-05, "loss": 2.4396, "step": 1967000 }, { "epoch": 9.75, "learning_rate": 4.512792616043163e-05, "loss": 2.4279, "step": 1967500 }, { "epoch": 9.75, "learning_rate": 4.512668757400554e-05, "loss": 2.4615, "step": 1968000 }, { "epoch": 9.75, "learning_rate": 4.5125451464752314e-05, "loss": 2.4715, "step": 1968500 }, { "epoch": 9.76, "learning_rate": 4.512421287832623e-05, "loss": 2.4397, "step": 1969000 }, { "epoch": 9.76, "learning_rate": 4.512297429190014e-05, "loss": 2.4726, "step": 1969500 }, { "epoch": 9.76, "learning_rate": 4.512173570547406e-05, "loss": 2.4296, "step": 1970000 }, { "epoch": 9.76, "learning_rate": 4.5120497119047975e-05, "loss": 2.4543, "step": 1970500 }, { "epoch": 9.77, "learning_rate": 4.511925853262189e-05, "loss": 2.4283, "step": 1971000 }, { "epoch": 9.77, "learning_rate": 4.511801994619581e-05, "loss": 2.4433, "step": 1971500 }, { "epoch": 9.77, "learning_rate": 4.511678135976972e-05, "loss": 2.431, "step": 1972000 }, { "epoch": 9.77, "learning_rate": 4.5115542773343636e-05, "loss": 2.4314, "step": 1972500 }, { "epoch": 9.77, "learning_rate": 4.511430666409041e-05, "loss": 2.4454, "step": 1973000 }, { "epoch": 9.78, "learning_rate": 4.511306807766433e-05, "loss": 2.4631, "step": 1973500 }, { "epoch": 9.78, "learning_rate": 4.5111829491238245e-05, "loss": 2.4612, "step": 1974000 }, { "epoch": 9.78, "learning_rate": 4.5110590904812155e-05, "loss": 2.4322, "step": 1974500 }, { "epoch": 9.78, "learning_rate": 4.510935231838607e-05, "loss": 2.4259, "step": 1975000 }, { "epoch": 9.79, "learning_rate": 4.510811620913285e-05, "loss": 2.4497, "step": 1975500 }, { "epoch": 9.79, "learning_rate": 4.510687762270676e-05, "loss": 2.4183, "step": 1976000 }, { "epoch": 9.79, "learning_rate": 4.5105639036280675e-05, "loss": 2.4897, "step": 1976500 }, { "epoch": 9.79, "learning_rate": 4.510440044985459e-05, "loss": 2.4587, "step": 1977000 }, { "epoch": 9.8, "learning_rate": 4.510316434060136e-05, "loss": 2.4401, "step": 1977500 }, { "epoch": 9.8, "learning_rate": 4.510192575417528e-05, "loss": 2.4357, "step": 1978000 }, { "epoch": 9.8, "learning_rate": 4.5100687167749194e-05, "loss": 2.4482, "step": 1978500 }, { "epoch": 9.8, "learning_rate": 4.509944858132311e-05, "loss": 2.4574, "step": 1979000 }, { "epoch": 9.81, "learning_rate": 4.509821247206988e-05, "loss": 2.4316, "step": 1979500 }, { "epoch": 9.81, "learning_rate": 4.50969738856438e-05, "loss": 2.4638, "step": 1980000 }, { "epoch": 9.81, "learning_rate": 4.5095735299217714e-05, "loss": 2.4208, "step": 1980500 }, { "epoch": 9.81, "learning_rate": 4.509449671279163e-05, "loss": 2.4633, "step": 1981000 }, { "epoch": 9.82, "learning_rate": 4.5093263080711245e-05, "loss": 2.4471, "step": 1981500 }, { "epoch": 9.82, "learning_rate": 4.509202697145802e-05, "loss": 2.4612, "step": 1982000 }, { "epoch": 9.82, "learning_rate": 4.509078838503194e-05, "loss": 2.4536, "step": 1982500 }, { "epoch": 9.82, "learning_rate": 4.50895522757787e-05, "loss": 2.4203, "step": 1983000 }, { "epoch": 9.83, "learning_rate": 4.5088313689352616e-05, "loss": 2.4689, "step": 1983500 }, { "epoch": 9.83, "learning_rate": 4.508707510292653e-05, "loss": 2.4635, "step": 1984000 }, { "epoch": 9.83, "learning_rate": 4.508583651650045e-05, "loss": 2.4427, "step": 1984500 }, { "epoch": 9.83, "learning_rate": 4.508459793007437e-05, "loss": 2.4488, "step": 1985000 }, { "epoch": 9.84, "learning_rate": 4.5083359343648284e-05, "loss": 2.4687, "step": 1985500 }, { "epoch": 9.84, "learning_rate": 4.50821207572222e-05, "loss": 2.4482, "step": 1986000 }, { "epoch": 9.84, "learning_rate": 4.508088217079612e-05, "loss": 2.4457, "step": 1986500 }, { "epoch": 9.84, "learning_rate": 4.5079643584370035e-05, "loss": 2.4492, "step": 1987000 }, { "epoch": 9.85, "learning_rate": 4.5078404997943945e-05, "loss": 2.468, "step": 1987500 }, { "epoch": 9.85, "learning_rate": 4.507716888869072e-05, "loss": 2.4591, "step": 1988000 }, { "epoch": 9.85, "learning_rate": 4.507593030226464e-05, "loss": 2.4664, "step": 1988500 }, { "epoch": 9.85, "learning_rate": 4.5074691715838554e-05, "loss": 2.4511, "step": 1989000 }, { "epoch": 9.86, "learning_rate": 4.507345312941247e-05, "loss": 2.4497, "step": 1989500 }, { "epoch": 9.86, "learning_rate": 4.507221454298639e-05, "loss": 2.4432, "step": 1990000 }, { "epoch": 9.86, "learning_rate": 4.50709759565603e-05, "loss": 2.4519, "step": 1990500 }, { "epoch": 9.86, "learning_rate": 4.5069737370134215e-05, "loss": 2.4516, "step": 1991000 }, { "epoch": 9.87, "learning_rate": 4.506849878370813e-05, "loss": 2.4417, "step": 1991500 }, { "epoch": 9.87, "learning_rate": 4.506726019728205e-05, "loss": 2.464, "step": 1992000 }, { "epoch": 9.87, "learning_rate": 4.5066021610855966e-05, "loss": 2.4332, "step": 1992500 }, { "epoch": 9.87, "learning_rate": 4.5064783024429876e-05, "loss": 2.4693, "step": 1993000 }, { "epoch": 9.88, "learning_rate": 4.506354443800379e-05, "loss": 2.4477, "step": 1993500 }, { "epoch": 9.88, "learning_rate": 4.506230585157771e-05, "loss": 2.4455, "step": 1994000 }, { "epoch": 9.88, "learning_rate": 4.506106974232448e-05, "loss": 2.4597, "step": 1994500 }, { "epoch": 9.88, "learning_rate": 4.5059833633071255e-05, "loss": 2.446, "step": 1995000 }, { "epoch": 9.89, "learning_rate": 4.505859504664517e-05, "loss": 2.4577, "step": 1995500 }, { "epoch": 9.89, "learning_rate": 4.505735646021909e-05, "loss": 2.4678, "step": 1996000 }, { "epoch": 9.89, "learning_rate": 4.5056117873793005e-05, "loss": 2.4562, "step": 1996500 }, { "epoch": 9.89, "learning_rate": 4.5054879287366916e-05, "loss": 2.4657, "step": 1997000 }, { "epoch": 9.9, "learning_rate": 4.505364070094083e-05, "loss": 2.433, "step": 1997500 }, { "epoch": 9.9, "learning_rate": 4.505240211451475e-05, "loss": 2.4243, "step": 1998000 }, { "epoch": 9.9, "learning_rate": 4.5051163528088666e-05, "loss": 2.4324, "step": 1998500 }, { "epoch": 9.9, "learning_rate": 4.504992494166258e-05, "loss": 2.4494, "step": 1999000 }, { "epoch": 9.91, "learning_rate": 4.50486863552365e-05, "loss": 2.4347, "step": 1999500 }, { "epoch": 9.91, "learning_rate": 4.504744776881041e-05, "loss": 2.4345, "step": 2000000 }, { "epoch": 9.91, "learning_rate": 4.504620918238433e-05, "loss": 2.4564, "step": 2000500 }, { "epoch": 9.91, "learning_rate": 4.5044973073131096e-05, "loss": 2.4482, "step": 2001000 }, { "epoch": 9.92, "learning_rate": 4.504373448670501e-05, "loss": 2.4592, "step": 2001500 }, { "epoch": 9.92, "learning_rate": 4.504249590027893e-05, "loss": 2.4584, "step": 2002000 }, { "epoch": 9.92, "learning_rate": 4.504125731385285e-05, "loss": 2.4937, "step": 2002500 }, { "epoch": 9.92, "learning_rate": 4.5040018727426764e-05, "loss": 2.4234, "step": 2003000 }, { "epoch": 9.93, "learning_rate": 4.503878014100068e-05, "loss": 2.4364, "step": 2003500 }, { "epoch": 9.93, "learning_rate": 4.50375415545746e-05, "loss": 2.4408, "step": 2004000 }, { "epoch": 9.93, "learning_rate": 4.5036302968148515e-05, "loss": 2.4403, "step": 2004500 }, { "epoch": 9.93, "learning_rate": 4.503506685889528e-05, "loss": 2.4583, "step": 2005000 }, { "epoch": 9.94, "learning_rate": 4.503383074964205e-05, "loss": 2.4657, "step": 2005500 }, { "epoch": 9.94, "learning_rate": 4.503259216321597e-05, "loss": 2.4634, "step": 2006000 }, { "epoch": 9.94, "learning_rate": 4.503135357678988e-05, "loss": 2.4743, "step": 2006500 }, { "epoch": 9.94, "learning_rate": 4.5030114990363796e-05, "loss": 2.4383, "step": 2007000 }, { "epoch": 9.95, "learning_rate": 4.502887640393771e-05, "loss": 2.4592, "step": 2007500 }, { "epoch": 9.95, "learning_rate": 4.502764029468449e-05, "loss": 2.458, "step": 2008000 }, { "epoch": 9.95, "learning_rate": 4.5026401708258406e-05, "loss": 2.4377, "step": 2008500 }, { "epoch": 9.95, "learning_rate": 4.502516312183232e-05, "loss": 2.4445, "step": 2009000 }, { "epoch": 9.96, "learning_rate": 4.502392453540623e-05, "loss": 2.4318, "step": 2009500 }, { "epoch": 9.96, "learning_rate": 4.502268594898015e-05, "loss": 2.4583, "step": 2010000 }, { "epoch": 9.96, "learning_rate": 4.502144983972692e-05, "loss": 2.4609, "step": 2010500 }, { "epoch": 9.96, "learning_rate": 4.5020211253300835e-05, "loss": 2.4503, "step": 2011000 }, { "epoch": 9.97, "learning_rate": 4.501897266687475e-05, "loss": 2.4607, "step": 2011500 }, { "epoch": 9.97, "learning_rate": 4.501773408044867e-05, "loss": 2.4503, "step": 2012000 }, { "epoch": 9.97, "learning_rate": 4.501649549402258e-05, "loss": 2.4397, "step": 2012500 }, { "epoch": 9.97, "learning_rate": 4.5015256907596496e-05, "loss": 2.4582, "step": 2013000 }, { "epoch": 9.98, "learning_rate": 4.501401832117041e-05, "loss": 2.438, "step": 2013500 }, { "epoch": 9.98, "learning_rate": 4.501277973474433e-05, "loss": 2.462, "step": 2014000 }, { "epoch": 9.98, "learning_rate": 4.5011543625491106e-05, "loss": 2.4676, "step": 2014500 }, { "epoch": 9.98, "learning_rate": 4.501030751623787e-05, "loss": 2.4411, "step": 2015000 }, { "epoch": 9.99, "learning_rate": 4.5009068929811785e-05, "loss": 2.4521, "step": 2015500 }, { "epoch": 9.99, "learning_rate": 4.50078303433857e-05, "loss": 2.4713, "step": 2016000 }, { "epoch": 9.99, "learning_rate": 4.500659175695962e-05, "loss": 2.432, "step": 2016500 }, { "epoch": 9.99, "learning_rate": 4.5005353170533535e-05, "loss": 2.4529, "step": 2017000 }, { "epoch": 10.0, "learning_rate": 4.500411458410745e-05, "loss": 2.4451, "step": 2017500 }, { "epoch": 10.0, "learning_rate": 4.500287847485422e-05, "loss": 2.4712, "step": 2018000 }, { "epoch": 10.0, "eval_accuracy": 0.6423499793751883, "eval_accuracy_mlm": 0.5954648160103292, "eval_accuracy_nsp": 0.8633937221278716, "eval_loss": 2.427269697189331, "eval_runtime": 146.0474, "eval_samples_per_second": 1745.728, "eval_steps_per_second": 72.743, "step": 2018430 } ], "max_steps": 20184300, "num_train_epochs": 100, "total_flos": 2.6129529363964503e+18, "trial_name": null, "trial_params": null }