diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100755--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,30548 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.28736, + "global_step": 449000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 16.4528, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 11.3853, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-05, + "loss": 8.4021, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 7.2919, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-05, + "loss": 6.1762, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 5.2568, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 3.5e-05, + "loss": 4.796, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 4.6566, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-05, + "loss": 4.482, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 4.2754, + "step": 1000 + }, + { + "epoch": 0.0, + "eval_loss": 3.176161766052246, + "eval_runtime": 174.8066, + "eval_samples_per_second": 57.206, + "eval_steps_per_second": 3.575, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999679795068844e-05, + "loss": 4.1532, + "step": 1100 + }, + { + "epoch": 0.0, + "learning_rate": 4.999359590137688e-05, + "loss": 4.045, + "step": 1200 + }, + { + "epoch": 0.0, + "learning_rate": 4.999039385206533e-05, + "loss": 3.9962, + "step": 1300 + }, + { + "epoch": 0.0, + "learning_rate": 4.998719180275376e-05, + "loss": 3.9307, + "step": 1400 + }, + { + "epoch": 0.0, + "learning_rate": 4.998398975344221e-05, + "loss": 3.901, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 4.998078770413064e-05, + "loss": 3.8329, + "step": 1600 + }, + { + "epoch": 0.0, + "learning_rate": 4.997758565481909e-05, + "loss": 3.8621, + "step": 1700 + }, + { + "epoch": 0.0, + "learning_rate": 4.997438360550753e-05, + "loss": 3.782, + "step": 1800 + }, + { + "epoch": 0.0, + "learning_rate": 4.997118155619597e-05, + "loss": 3.7489, + "step": 1900 + }, + { + "epoch": 0.0, + "learning_rate": 4.996797950688441e-05, + "loss": 3.7324, + "step": 2000 + }, + { + "epoch": 0.0, + "eval_loss": 3.0246834754943848, + "eval_runtime": 174.5444, + "eval_samples_per_second": 57.292, + "eval_steps_per_second": 3.581, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9964777457572846e-05, + "loss": 3.6862, + "step": 2100 + }, + { + "epoch": 0.0, + "learning_rate": 4.996157540826129e-05, + "loss": 3.6695, + "step": 2200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9958373358949726e-05, + "loss": 3.644, + "step": 2300 + }, + { + "epoch": 0.0, + "learning_rate": 4.995517130963817e-05, + "loss": 3.6352, + "step": 2400 + }, + { + "epoch": 0.0, + "learning_rate": 4.995196926032661e-05, + "loss": 3.6435, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994876721101505e-05, + "loss": 3.5992, + "step": 2600 + }, + { + "epoch": 0.0, + "learning_rate": 4.994556516170349e-05, + "loss": 3.6032, + "step": 2700 + }, + { + "epoch": 0.0, + "learning_rate": 4.994236311239193e-05, + "loss": 3.5885, + "step": 2800 + }, + { + "epoch": 0.0, + "learning_rate": 4.993916106308038e-05, + "loss": 3.5502, + "step": 2900 + }, + { + "epoch": 0.0, + "learning_rate": 4.993595901376881e-05, + "loss": 3.5497, + "step": 3000 + }, + { + "epoch": 0.0, + "eval_loss": 2.9612457752227783, + "eval_runtime": 176.0693, + "eval_samples_per_second": 56.796, + "eval_steps_per_second": 3.55, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993275696445726e-05, + "loss": 3.5265, + "step": 3100 + }, + { + "epoch": 0.0, + "learning_rate": 4.992955491514569e-05, + "loss": 3.5073, + "step": 3200 + }, + { + "epoch": 0.0, + "learning_rate": 4.992635286583414e-05, + "loss": 3.5148, + "step": 3300 + }, + { + "epoch": 0.0, + "learning_rate": 4.9923150816522576e-05, + "loss": 3.4933, + "step": 3400 + }, + { + "epoch": 0.0, + "learning_rate": 4.9919948767211016e-05, + "loss": 3.5003, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 4.991674671789946e-05, + "loss": 3.4647, + "step": 3600 + }, + { + "epoch": 0.0, + "learning_rate": 4.9913544668587896e-05, + "loss": 3.4672, + "step": 3700 + }, + { + "epoch": 0.0, + "learning_rate": 4.991034261927634e-05, + "loss": 3.4315, + "step": 3800 + }, + { + "epoch": 0.0, + "learning_rate": 4.9907140569964775e-05, + "loss": 3.4564, + "step": 3900 + }, + { + "epoch": 0.0, + "learning_rate": 4.990393852065322e-05, + "loss": 3.4795, + "step": 4000 + }, + { + "epoch": 0.0, + "eval_loss": 2.9161300659179688, + "eval_runtime": 176.0774, + "eval_samples_per_second": 56.793, + "eval_steps_per_second": 3.55, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 4.990073647134166e-05, + "loss": 3.4406, + "step": 4100 + }, + { + "epoch": 0.0, + "learning_rate": 4.98975344220301e-05, + "loss": 3.4532, + "step": 4200 + }, + { + "epoch": 0.0, + "learning_rate": 4.989433237271854e-05, + "loss": 3.4261, + "step": 4300 + }, + { + "epoch": 0.0, + "learning_rate": 4.989113032340698e-05, + "loss": 3.4327, + "step": 4400 + }, + { + "epoch": 0.0, + "learning_rate": 4.988792827409543e-05, + "loss": 3.4158, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 4.988472622478386e-05, + "loss": 3.3788, + "step": 4600 + }, + { + "epoch": 0.0, + "learning_rate": 4.9881524175472307e-05, + "loss": 3.3911, + "step": 4700 + }, + { + "epoch": 0.0, + "learning_rate": 4.987832212616074e-05, + "loss": 3.4317, + "step": 4800 + }, + { + "epoch": 0.0, + "learning_rate": 4.9875120076849186e-05, + "loss": 3.3859, + "step": 4900 + }, + { + "epoch": 0.0, + "learning_rate": 4.9871918027537626e-05, + "loss": 3.3852, + "step": 5000 + }, + { + "epoch": 0.0, + "eval_loss": 2.8852083683013916, + "eval_runtime": 175.4843, + "eval_samples_per_second": 56.985, + "eval_steps_per_second": 3.562, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9868715978226066e-05, + "loss": 3.3619, + "step": 5100 + }, + { + "epoch": 0.0, + "learning_rate": 4.986551392891451e-05, + "loss": 3.3791, + "step": 5200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9862311879602945e-05, + "loss": 3.3694, + "step": 5300 + }, + { + "epoch": 0.0, + "learning_rate": 4.985910983029139e-05, + "loss": 3.3651, + "step": 5400 + }, + { + "epoch": 0.0, + "learning_rate": 4.9855907780979825e-05, + "loss": 3.3703, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 4.985270573166827e-05, + "loss": 3.3411, + "step": 5600 + }, + { + "epoch": 0.0, + "learning_rate": 4.984950368235671e-05, + "loss": 3.3514, + "step": 5700 + }, + { + "epoch": 0.0, + "learning_rate": 4.984630163304515e-05, + "loss": 3.3375, + "step": 5800 + }, + { + "epoch": 0.0, + "learning_rate": 4.984309958373359e-05, + "loss": 3.3522, + "step": 5900 + }, + { + "epoch": 0.0, + "learning_rate": 4.983989753442203e-05, + "loss": 3.3496, + "step": 6000 + }, + { + "epoch": 0.0, + "eval_loss": 2.8661420345306396, + "eval_runtime": 175.3115, + "eval_samples_per_second": 57.041, + "eval_steps_per_second": 3.565, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9836695485110477e-05, + "loss": 3.3567, + "step": 6100 + }, + { + "epoch": 0.0, + "learning_rate": 4.983349343579891e-05, + "loss": 3.3431, + "step": 6200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9830291386487356e-05, + "loss": 3.3389, + "step": 6300 + }, + { + "epoch": 0.0, + "learning_rate": 4.9827089337175796e-05, + "loss": 3.3292, + "step": 6400 + }, + { + "epoch": 0.0, + "learning_rate": 4.9823887287864235e-05, + "loss": 3.3314, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9820685238552675e-05, + "loss": 3.319, + "step": 6600 + }, + { + "epoch": 0.0, + "learning_rate": 4.9817483189241115e-05, + "loss": 3.3457, + "step": 6700 + }, + { + "epoch": 0.0, + "learning_rate": 4.981428113992956e-05, + "loss": 3.3041, + "step": 6800 + }, + { + "epoch": 0.0, + "learning_rate": 4.9811079090617994e-05, + "loss": 3.3096, + "step": 6900 + }, + { + "epoch": 0.0, + "learning_rate": 4.980787704130644e-05, + "loss": 3.2979, + "step": 7000 + }, + { + "epoch": 0.0, + "eval_loss": 2.8510663509368896, + "eval_runtime": 176.6117, + "eval_samples_per_second": 56.621, + "eval_steps_per_second": 3.539, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 4.980467499199488e-05, + "loss": 3.3189, + "step": 7100 + }, + { + "epoch": 0.0, + "learning_rate": 4.980147294268332e-05, + "loss": 3.3005, + "step": 7200 + }, + { + "epoch": 0.0, + "learning_rate": 4.979827089337176e-05, + "loss": 3.2669, + "step": 7300 + }, + { + "epoch": 0.0, + "learning_rate": 4.97950688440602e-05, + "loss": 3.2911, + "step": 7400 + }, + { + "epoch": 0.0, + "learning_rate": 4.979186679474864e-05, + "loss": 3.3187, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 4.978866474543708e-05, + "loss": 3.3035, + "step": 7600 + }, + { + "epoch": 0.0, + "learning_rate": 4.9785462696125526e-05, + "loss": 3.2913, + "step": 7700 + }, + { + "epoch": 0.0, + "learning_rate": 4.978226064681396e-05, + "loss": 3.261, + "step": 7800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9779058597502405e-05, + "loss": 3.28, + "step": 7900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9775856548190845e-05, + "loss": 3.3035, + "step": 8000 + }, + { + "epoch": 0.01, + "eval_loss": 2.8327794075012207, + "eval_runtime": 175.8544, + "eval_samples_per_second": 56.865, + "eval_steps_per_second": 3.554, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9772654498879285e-05, + "loss": 3.2916, + "step": 8100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9769452449567725e-05, + "loss": 3.2971, + "step": 8200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9766250400256164e-05, + "loss": 3.2899, + "step": 8300 + }, + { + "epoch": 0.01, + "learning_rate": 4.976304835094461e-05, + "loss": 3.2649, + "step": 8400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9759846301633044e-05, + "loss": 3.2695, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975664425232149e-05, + "loss": 3.2501, + "step": 8600 + }, + { + "epoch": 0.01, + "learning_rate": 4.975344220300993e-05, + "loss": 3.2668, + "step": 8700 + }, + { + "epoch": 0.01, + "learning_rate": 4.975024015369837e-05, + "loss": 3.2317, + "step": 8800 + }, + { + "epoch": 0.01, + "learning_rate": 4.974703810438681e-05, + "loss": 3.2606, + "step": 8900 + }, + { + "epoch": 0.01, + "learning_rate": 4.974383605507525e-05, + "loss": 3.264, + "step": 9000 + }, + { + "epoch": 0.01, + "eval_loss": 2.8224401473999023, + "eval_runtime": 177.2958, + "eval_samples_per_second": 56.403, + "eval_steps_per_second": 3.525, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 4.974063400576369e-05, + "loss": 3.2375, + "step": 9100 + }, + { + "epoch": 0.01, + "learning_rate": 4.973743195645213e-05, + "loss": 3.2443, + "step": 9200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9734229907140575e-05, + "loss": 3.2454, + "step": 9300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9731027857829015e-05, + "loss": 3.2352, + "step": 9400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9727825808517455e-05, + "loss": 3.2763, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9724623759205894e-05, + "loss": 3.2415, + "step": 9600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9721421709894334e-05, + "loss": 3.2385, + "step": 9700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9718219660582774e-05, + "loss": 3.2458, + "step": 9800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9715017611271214e-05, + "loss": 3.267, + "step": 9900 + }, + { + "epoch": 0.01, + "learning_rate": 4.971181556195966e-05, + "loss": 3.2282, + "step": 10000 + }, + { + "epoch": 0.01, + "eval_loss": 2.811612367630005, + "eval_runtime": 175.957, + "eval_samples_per_second": 56.832, + "eval_steps_per_second": 3.552, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 4.970861351264809e-05, + "loss": 3.2373, + "step": 10100 + }, + { + "epoch": 0.01, + "learning_rate": 4.970541146333654e-05, + "loss": 3.2365, + "step": 10200 + }, + { + "epoch": 0.01, + "learning_rate": 4.970220941402498e-05, + "loss": 3.2106, + "step": 10300 + }, + { + "epoch": 0.01, + "learning_rate": 4.969900736471342e-05, + "loss": 3.2247, + "step": 10400 + }, + { + "epoch": 0.01, + "learning_rate": 4.969580531540186e-05, + "loss": 3.2031, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 4.96926032660903e-05, + "loss": 3.2485, + "step": 10600 + }, + { + "epoch": 0.01, + "learning_rate": 4.968940121677874e-05, + "loss": 3.2195, + "step": 10700 + }, + { + "epoch": 0.01, + "learning_rate": 4.968619916746718e-05, + "loss": 3.2253, + "step": 10800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9682997118155625e-05, + "loss": 3.209, + "step": 10900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9679795068844064e-05, + "loss": 3.1864, + "step": 11000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7983367443084717, + "eval_runtime": 176.0989, + "eval_samples_per_second": 56.786, + "eval_steps_per_second": 3.549, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9676593019532504e-05, + "loss": 3.2072, + "step": 11100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9673390970220944e-05, + "loss": 3.1995, + "step": 11200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9670188920909384e-05, + "loss": 3.1948, + "step": 11300 + }, + { + "epoch": 0.01, + "learning_rate": 4.966698687159782e-05, + "loss": 3.1858, + "step": 11400 + }, + { + "epoch": 0.01, + "learning_rate": 4.966378482228626e-05, + "loss": 3.1929, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 4.966058277297471e-05, + "loss": 3.17, + "step": 11600 + }, + { + "epoch": 0.01, + "learning_rate": 4.965738072366315e-05, + "loss": 3.1996, + "step": 11700 + }, + { + "epoch": 0.01, + "learning_rate": 4.965417867435159e-05, + "loss": 3.1755, + "step": 11800 + }, + { + "epoch": 0.01, + "learning_rate": 4.965097662504003e-05, + "loss": 3.1663, + "step": 11900 + }, + { + "epoch": 0.01, + "learning_rate": 4.964777457572847e-05, + "loss": 3.1999, + "step": 12000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7982311248779297, + "eval_runtime": 177.464, + "eval_samples_per_second": 56.349, + "eval_steps_per_second": 3.522, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 4.964457252641691e-05, + "loss": 3.1752, + "step": 12100 + }, + { + "epoch": 0.01, + "learning_rate": 4.964137047710535e-05, + "loss": 3.1834, + "step": 12200 + }, + { + "epoch": 0.01, + "learning_rate": 4.963816842779379e-05, + "loss": 3.1905, + "step": 12300 + }, + { + "epoch": 0.01, + "learning_rate": 4.963496637848223e-05, + "loss": 3.1793, + "step": 12400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9631764329170674e-05, + "loss": 3.1939, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9628562279859114e-05, + "loss": 3.1722, + "step": 12600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9625360230547553e-05, + "loss": 3.1581, + "step": 12700 + }, + { + "epoch": 0.01, + "learning_rate": 4.962215818123599e-05, + "loss": 3.1796, + "step": 12800 + }, + { + "epoch": 0.01, + "learning_rate": 4.961895613192443e-05, + "loss": 3.2063, + "step": 12900 + }, + { + "epoch": 0.01, + "learning_rate": 4.961575408261287e-05, + "loss": 3.1999, + "step": 13000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7836506366729736, + "eval_runtime": 175.8615, + "eval_samples_per_second": 56.863, + "eval_steps_per_second": 3.554, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 4.961255203330131e-05, + "loss": 3.179, + "step": 13100 + }, + { + "epoch": 0.01, + "learning_rate": 4.960934998398976e-05, + "loss": 3.1719, + "step": 13200 + }, + { + "epoch": 0.01, + "learning_rate": 4.96061479346782e-05, + "loss": 3.181, + "step": 13300 + }, + { + "epoch": 0.01, + "learning_rate": 4.960294588536664e-05, + "loss": 3.1698, + "step": 13400 + }, + { + "epoch": 0.01, + "learning_rate": 4.959974383605508e-05, + "loss": 3.1541, + "step": 13500 + }, + { + "epoch": 0.01, + "learning_rate": 4.959654178674352e-05, + "loss": 3.1493, + "step": 13600 + }, + { + "epoch": 0.01, + "learning_rate": 4.959333973743196e-05, + "loss": 3.1774, + "step": 13700 + }, + { + "epoch": 0.01, + "learning_rate": 4.95901376881204e-05, + "loss": 3.1627, + "step": 13800 + }, + { + "epoch": 0.01, + "learning_rate": 4.958693563880884e-05, + "loss": 3.1446, + "step": 13900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9583733589497284e-05, + "loss": 3.1574, + "step": 14000 + }, + { + "epoch": 0.01, + "eval_loss": 2.776357412338257, + "eval_runtime": 176.1722, + "eval_samples_per_second": 56.763, + "eval_steps_per_second": 3.548, + "step": 14000 + }, + { + "epoch": 0.01, + "learning_rate": 4.958053154018572e-05, + "loss": 3.1542, + "step": 14100 + }, + { + "epoch": 0.01, + "learning_rate": 4.957732949087416e-05, + "loss": 3.1338, + "step": 14200 + }, + { + "epoch": 0.01, + "learning_rate": 4.95741274415626e-05, + "loss": 3.1304, + "step": 14300 + }, + { + "epoch": 0.01, + "learning_rate": 4.957092539225104e-05, + "loss": 3.1243, + "step": 14400 + }, + { + "epoch": 0.01, + "learning_rate": 4.956772334293948e-05, + "loss": 3.1412, + "step": 14500 + }, + { + "epoch": 0.01, + "learning_rate": 4.956452129362792e-05, + "loss": 3.174, + "step": 14600 + }, + { + "epoch": 0.01, + "learning_rate": 4.956131924431636e-05, + "loss": 3.1614, + "step": 14700 + }, + { + "epoch": 0.01, + "learning_rate": 4.955811719500481e-05, + "loss": 3.1496, + "step": 14800 + }, + { + "epoch": 0.01, + "learning_rate": 4.955491514569325e-05, + "loss": 3.1417, + "step": 14900 + }, + { + "epoch": 0.01, + "learning_rate": 4.955171309638169e-05, + "loss": 3.1302, + "step": 15000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7699687480926514, + "eval_runtime": 175.7202, + "eval_samples_per_second": 56.909, + "eval_steps_per_second": 3.557, + "step": 15000 + }, + { + "epoch": 0.01, + "learning_rate": 4.954851104707013e-05, + "loss": 3.1279, + "step": 15100 + }, + { + "epoch": 0.01, + "learning_rate": 4.954530899775857e-05, + "loss": 3.1472, + "step": 15200 + }, + { + "epoch": 0.01, + "learning_rate": 4.954210694844701e-05, + "loss": 3.1461, + "step": 15300 + }, + { + "epoch": 0.01, + "learning_rate": 4.953890489913545e-05, + "loss": 3.1369, + "step": 15400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9535702849823886e-05, + "loss": 3.1469, + "step": 15500 + }, + { + "epoch": 0.01, + "learning_rate": 4.953250080051233e-05, + "loss": 3.1522, + "step": 15600 + }, + { + "epoch": 0.01, + "learning_rate": 4.952929875120077e-05, + "loss": 3.1273, + "step": 15700 + }, + { + "epoch": 0.01, + "learning_rate": 4.952609670188921e-05, + "loss": 3.1445, + "step": 15800 + }, + { + "epoch": 0.01, + "learning_rate": 4.952289465257765e-05, + "loss": 3.1391, + "step": 15900 + }, + { + "epoch": 0.01, + "learning_rate": 4.951969260326609e-05, + "loss": 3.1212, + "step": 16000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7630386352539062, + "eval_runtime": 176.7557, + "eval_samples_per_second": 56.575, + "eval_steps_per_second": 3.536, + "step": 16000 + }, + { + "epoch": 0.01, + "learning_rate": 4.951649055395453e-05, + "loss": 3.1535, + "step": 16100 + }, + { + "epoch": 0.01, + "learning_rate": 4.951328850464297e-05, + "loss": 3.127, + "step": 16200 + }, + { + "epoch": 0.01, + "learning_rate": 4.951008645533142e-05, + "loss": 3.1363, + "step": 16300 + }, + { + "epoch": 0.01, + "learning_rate": 4.950688440601986e-05, + "loss": 3.1224, + "step": 16400 + }, + { + "epoch": 0.01, + "learning_rate": 4.95036823567083e-05, + "loss": 3.1081, + "step": 16500 + }, + { + "epoch": 0.01, + "learning_rate": 4.950048030739674e-05, + "loss": 3.1115, + "step": 16600 + }, + { + "epoch": 0.01, + "learning_rate": 4.949727825808518e-05, + "loss": 3.1432, + "step": 16700 + }, + { + "epoch": 0.01, + "learning_rate": 4.949407620877362e-05, + "loss": 3.1042, + "step": 16800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9490874159462056e-05, + "loss": 3.1345, + "step": 16900 + }, + { + "epoch": 0.01, + "learning_rate": 4.94876721101505e-05, + "loss": 3.1021, + "step": 17000 + }, + { + "epoch": 0.01, + "eval_loss": 2.75553560256958, + "eval_runtime": 176.7294, + "eval_samples_per_second": 56.584, + "eval_steps_per_second": 3.536, + "step": 17000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9484470060838936e-05, + "loss": 3.0994, + "step": 17100 + }, + { + "epoch": 0.01, + "learning_rate": 4.948126801152738e-05, + "loss": 3.1197, + "step": 17200 + }, + { + "epoch": 0.01, + "learning_rate": 4.947806596221582e-05, + "loss": 3.1163, + "step": 17300 + }, + { + "epoch": 0.01, + "learning_rate": 4.947486391290426e-05, + "loss": 3.1142, + "step": 17400 + }, + { + "epoch": 0.01, + "learning_rate": 4.94716618635927e-05, + "loss": 3.115, + "step": 17500 + }, + { + "epoch": 0.01, + "learning_rate": 4.946845981428114e-05, + "loss": 3.118, + "step": 17600 + }, + { + "epoch": 0.01, + "learning_rate": 4.946525776496958e-05, + "loss": 3.1086, + "step": 17700 + }, + { + "epoch": 0.01, + "learning_rate": 4.946205571565802e-05, + "loss": 3.0966, + "step": 17800 + }, + { + "epoch": 0.01, + "learning_rate": 4.945885366634647e-05, + "loss": 3.1024, + "step": 17900 + }, + { + "epoch": 0.01, + "learning_rate": 4.945565161703491e-05, + "loss": 3.1026, + "step": 18000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7484090328216553, + "eval_runtime": 175.5292, + "eval_samples_per_second": 56.971, + "eval_steps_per_second": 3.561, + "step": 18000 + }, + { + "epoch": 0.01, + "learning_rate": 4.945244956772335e-05, + "loss": 3.1031, + "step": 18100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9449247518411787e-05, + "loss": 3.0998, + "step": 18200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9446045469100226e-05, + "loss": 3.1153, + "step": 18300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9442843419788666e-05, + "loss": 3.1114, + "step": 18400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9439641370477106e-05, + "loss": 3.1124, + "step": 18500 + }, + { + "epoch": 0.01, + "learning_rate": 4.943643932116555e-05, + "loss": 3.1088, + "step": 18600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9433237271853985e-05, + "loss": 3.0875, + "step": 18700 + }, + { + "epoch": 0.01, + "learning_rate": 4.943003522254243e-05, + "loss": 3.1066, + "step": 18800 + }, + { + "epoch": 0.01, + "learning_rate": 4.942683317323087e-05, + "loss": 3.0911, + "step": 18900 + }, + { + "epoch": 0.01, + "learning_rate": 4.942363112391931e-05, + "loss": 3.1073, + "step": 19000 + }, + { + "epoch": 0.01, + "eval_loss": 2.745687484741211, + "eval_runtime": 178.9968, + "eval_samples_per_second": 55.867, + "eval_steps_per_second": 3.492, + "step": 19000 + }, + { + "epoch": 0.01, + "learning_rate": 4.942042907460775e-05, + "loss": 3.0936, + "step": 19100 + }, + { + "epoch": 0.01, + "learning_rate": 4.941722702529619e-05, + "loss": 3.0894, + "step": 19200 + }, + { + "epoch": 0.01, + "learning_rate": 4.941402497598464e-05, + "loss": 3.0944, + "step": 19300 + }, + { + "epoch": 0.01, + "learning_rate": 4.941082292667307e-05, + "loss": 3.065, + "step": 19400 + }, + { + "epoch": 0.01, + "learning_rate": 4.940762087736152e-05, + "loss": 3.0982, + "step": 19500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9404418828049956e-05, + "loss": 3.0787, + "step": 19600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9401216778738396e-05, + "loss": 3.0753, + "step": 19700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9398014729426836e-05, + "loss": 3.0793, + "step": 19800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9394812680115276e-05, + "loss": 3.0907, + "step": 19900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9391610630803715e-05, + "loss": 3.099, + "step": 20000 + }, + { + "epoch": 0.01, + "eval_loss": 2.741987466812134, + "eval_runtime": 177.0933, + "eval_samples_per_second": 56.467, + "eval_steps_per_second": 3.529, + "step": 20000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9388408581492155e-05, + "loss": 3.0733, + "step": 20100 + }, + { + "epoch": 0.01, + "learning_rate": 4.93852065321806e-05, + "loss": 3.0774, + "step": 20200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9382004482869035e-05, + "loss": 3.1003, + "step": 20300 + }, + { + "epoch": 0.01, + "learning_rate": 4.937880243355748e-05, + "loss": 3.08, + "step": 20400 + }, + { + "epoch": 0.01, + "learning_rate": 4.937560038424592e-05, + "loss": 3.0872, + "step": 20500 + }, + { + "epoch": 0.01, + "learning_rate": 4.937239833493436e-05, + "loss": 3.0771, + "step": 20600 + }, + { + "epoch": 0.01, + "learning_rate": 4.93691962856228e-05, + "loss": 3.0922, + "step": 20700 + }, + { + "epoch": 0.01, + "learning_rate": 4.936599423631124e-05, + "loss": 3.0874, + "step": 20800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9362792186999687e-05, + "loss": 3.0597, + "step": 20900 + }, + { + "epoch": 0.01, + "learning_rate": 4.935959013768812e-05, + "loss": 3.065, + "step": 21000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7343788146972656, + "eval_runtime": 177.3457, + "eval_samples_per_second": 56.387, + "eval_steps_per_second": 3.524, + "step": 21000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9356388088376566e-05, + "loss": 3.0777, + "step": 21100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9353186039065006e-05, + "loss": 3.075, + "step": 21200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9349983989753446e-05, + "loss": 3.052, + "step": 21300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9346781940441885e-05, + "loss": 3.0808, + "step": 21400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9343579891130325e-05, + "loss": 3.0748, + "step": 21500 + }, + { + "epoch": 0.01, + "learning_rate": 4.934037784181877e-05, + "loss": 3.0511, + "step": 21600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9337175792507204e-05, + "loss": 3.0571, + "step": 21700 + }, + { + "epoch": 0.01, + "learning_rate": 4.933397374319565e-05, + "loss": 3.0703, + "step": 21800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9330771693884084e-05, + "loss": 3.0712, + "step": 21900 + }, + { + "epoch": 0.01, + "learning_rate": 4.932756964457253e-05, + "loss": 3.069, + "step": 22000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7288901805877686, + "eval_runtime": 177.5959, + "eval_samples_per_second": 56.308, + "eval_steps_per_second": 3.519, + "step": 22000 + }, + { + "epoch": 0.01, + "learning_rate": 4.932436759526097e-05, + "loss": 3.0585, + "step": 22100 + }, + { + "epoch": 0.01, + "learning_rate": 4.932116554594941e-05, + "loss": 3.0851, + "step": 22200 + }, + { + "epoch": 0.01, + "learning_rate": 4.931796349663785e-05, + "loss": 3.0438, + "step": 22300 + }, + { + "epoch": 0.01, + "learning_rate": 4.931476144732629e-05, + "loss": 3.0897, + "step": 22400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9311559398014736e-05, + "loss": 3.0659, + "step": 22500 + }, + { + "epoch": 0.01, + "learning_rate": 4.930835734870317e-05, + "loss": 3.0311, + "step": 22600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9305155299391615e-05, + "loss": 3.0685, + "step": 22700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9301953250080055e-05, + "loss": 3.0618, + "step": 22800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9298751200768495e-05, + "loss": 3.0552, + "step": 22900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9295549151456935e-05, + "loss": 3.0592, + "step": 23000 + }, + { + "epoch": 0.01, + "eval_loss": 2.7260777950286865, + "eval_runtime": 179.666, + "eval_samples_per_second": 55.659, + "eval_steps_per_second": 3.479, + "step": 23000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9292347102145374e-05, + "loss": 3.0724, + "step": 23100 + }, + { + "epoch": 0.01, + "learning_rate": 4.928914505283382e-05, + "loss": 3.0681, + "step": 23200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9285943003522254e-05, + "loss": 3.0484, + "step": 23300 + }, + { + "epoch": 0.01, + "learning_rate": 4.92827409542107e-05, + "loss": 3.0626, + "step": 23400 + }, + { + "epoch": 0.02, + "learning_rate": 4.927953890489913e-05, + "loss": 3.0594, + "step": 23500 + }, + { + "epoch": 0.02, + "learning_rate": 4.927633685558758e-05, + "loss": 3.0619, + "step": 23600 + }, + { + "epoch": 0.02, + "learning_rate": 4.927313480627602e-05, + "loss": 3.0485, + "step": 23700 + }, + { + "epoch": 0.02, + "learning_rate": 4.926993275696446e-05, + "loss": 3.0581, + "step": 23800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9266730707652906e-05, + "loss": 3.0889, + "step": 23900 + }, + { + "epoch": 0.02, + "learning_rate": 4.926352865834134e-05, + "loss": 3.0676, + "step": 24000 + }, + { + "epoch": 0.02, + "eval_loss": 2.72514271736145, + "eval_runtime": 176.9986, + "eval_samples_per_second": 56.498, + "eval_steps_per_second": 3.531, + "step": 24000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9260326609029785e-05, + "loss": 3.045, + "step": 24100 + }, + { + "epoch": 0.02, + "learning_rate": 4.925712455971822e-05, + "loss": 3.0581, + "step": 24200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9253922510406665e-05, + "loss": 3.0509, + "step": 24300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9250720461095105e-05, + "loss": 3.0461, + "step": 24400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9247518411783544e-05, + "loss": 3.0504, + "step": 24500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9244316362471984e-05, + "loss": 3.0376, + "step": 24600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9241114313160424e-05, + "loss": 3.0529, + "step": 24700 + }, + { + "epoch": 0.02, + "learning_rate": 4.923791226384887e-05, + "loss": 3.0611, + "step": 24800 + }, + { + "epoch": 0.02, + "learning_rate": 4.92347102145373e-05, + "loss": 3.0552, + "step": 24900 + }, + { + "epoch": 0.02, + "learning_rate": 4.923150816522575e-05, + "loss": 3.0424, + "step": 25000 + }, + { + "epoch": 0.02, + "eval_loss": 2.7178871631622314, + "eval_runtime": 177.5357, + "eval_samples_per_second": 56.327, + "eval_steps_per_second": 3.52, + "step": 25000 + }, + { + "epoch": 0.02, + "learning_rate": 4.922830611591418e-05, + "loss": 3.0364, + "step": 25100 + }, + { + "epoch": 0.02, + "learning_rate": 4.922510406660263e-05, + "loss": 3.05, + "step": 25200 + }, + { + "epoch": 0.02, + "learning_rate": 4.922190201729107e-05, + "loss": 3.0679, + "step": 25300 + }, + { + "epoch": 0.02, + "learning_rate": 4.921869996797951e-05, + "loss": 3.0362, + "step": 25400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9215497918667955e-05, + "loss": 3.028, + "step": 25500 + }, + { + "epoch": 0.02, + "learning_rate": 4.921229586935639e-05, + "loss": 3.0405, + "step": 25600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9209093820044835e-05, + "loss": 3.046, + "step": 25700 + }, + { + "epoch": 0.02, + "learning_rate": 4.920589177073327e-05, + "loss": 3.0299, + "step": 25800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9202689721421714e-05, + "loss": 3.0186, + "step": 25900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9199487672110154e-05, + "loss": 3.0578, + "step": 26000 + }, + { + "epoch": 0.02, + "eval_loss": 2.713019609451294, + "eval_runtime": 178.6143, + "eval_samples_per_second": 55.987, + "eval_steps_per_second": 3.499, + "step": 26000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9196285622798594e-05, + "loss": 3.0344, + "step": 26100 + }, + { + "epoch": 0.02, + "learning_rate": 4.919308357348703e-05, + "loss": 3.0358, + "step": 26200 + }, + { + "epoch": 0.02, + "learning_rate": 4.918988152417547e-05, + "loss": 3.0481, + "step": 26300 + }, + { + "epoch": 0.02, + "learning_rate": 4.918667947486392e-05, + "loss": 3.0317, + "step": 26400 + }, + { + "epoch": 0.02, + "learning_rate": 4.918347742555235e-05, + "loss": 3.0464, + "step": 26500 + }, + { + "epoch": 0.02, + "learning_rate": 4.91802753762408e-05, + "loss": 3.0183, + "step": 26600 + }, + { + "epoch": 0.02, + "learning_rate": 4.917707332692923e-05, + "loss": 3.0408, + "step": 26700 + }, + { + "epoch": 0.02, + "learning_rate": 4.917387127761768e-05, + "loss": 3.0367, + "step": 26800 + }, + { + "epoch": 0.02, + "learning_rate": 4.917066922830612e-05, + "loss": 3.015, + "step": 26900 + }, + { + "epoch": 0.02, + "learning_rate": 4.916746717899456e-05, + "loss": 3.0268, + "step": 27000 + }, + { + "epoch": 0.02, + "eval_loss": 2.709913730621338, + "eval_runtime": 177.2666, + "eval_samples_per_second": 56.412, + "eval_steps_per_second": 3.526, + "step": 27000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9164265129683005e-05, + "loss": 3.035, + "step": 27100 + }, + { + "epoch": 0.02, + "learning_rate": 4.916106308037144e-05, + "loss": 3.0458, + "step": 27200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9157861031059884e-05, + "loss": 3.034, + "step": 27300 + }, + { + "epoch": 0.02, + "learning_rate": 4.915465898174832e-05, + "loss": 3.0237, + "step": 27400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9151456932436764e-05, + "loss": 3.0161, + "step": 27500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9148254883125197e-05, + "loss": 3.017, + "step": 27600 + }, + { + "epoch": 0.02, + "learning_rate": 4.914505283381364e-05, + "loss": 3.0347, + "step": 27700 + }, + { + "epoch": 0.02, + "learning_rate": 4.914185078450208e-05, + "loss": 3.0212, + "step": 27800 + }, + { + "epoch": 0.02, + "learning_rate": 4.913864873519052e-05, + "loss": 3.0185, + "step": 27900 + }, + { + "epoch": 0.02, + "learning_rate": 4.913544668587897e-05, + "loss": 3.0408, + "step": 28000 + }, + { + "epoch": 0.02, + "eval_loss": 2.7085154056549072, + "eval_runtime": 175.5048, + "eval_samples_per_second": 56.978, + "eval_steps_per_second": 3.561, + "step": 28000 + }, + { + "epoch": 0.02, + "learning_rate": 4.91322446365674e-05, + "loss": 3.0364, + "step": 28100 + }, + { + "epoch": 0.02, + "learning_rate": 4.912904258725585e-05, + "loss": 3.0225, + "step": 28200 + }, + { + "epoch": 0.02, + "learning_rate": 4.912584053794428e-05, + "loss": 3.0096, + "step": 28300 + }, + { + "epoch": 0.02, + "learning_rate": 4.912263848863273e-05, + "loss": 3.0086, + "step": 28400 + }, + { + "epoch": 0.02, + "learning_rate": 4.911943643932117e-05, + "loss": 3.0182, + "step": 28500 + }, + { + "epoch": 0.02, + "learning_rate": 4.911623439000961e-05, + "loss": 3.0092, + "step": 28600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9113032340698054e-05, + "loss": 3.0079, + "step": 28700 + }, + { + "epoch": 0.02, + "learning_rate": 4.910983029138649e-05, + "loss": 2.9888, + "step": 28800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9106628242074933e-05, + "loss": 3.0265, + "step": 28900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9103426192763366e-05, + "loss": 3.0221, + "step": 29000 + }, + { + "epoch": 0.02, + "eval_loss": 2.7025697231292725, + "eval_runtime": 177.1206, + "eval_samples_per_second": 56.459, + "eval_steps_per_second": 3.529, + "step": 29000 + }, + { + "epoch": 0.02, + "learning_rate": 4.910022414345181e-05, + "loss": 3.0081, + "step": 29100 + }, + { + "epoch": 0.02, + "learning_rate": 4.909702209414025e-05, + "loss": 3.029, + "step": 29200 + }, + { + "epoch": 0.02, + "learning_rate": 4.909382004482869e-05, + "loss": 3.0081, + "step": 29300 + }, + { + "epoch": 0.02, + "learning_rate": 4.909061799551713e-05, + "loss": 3.0051, + "step": 29400 + }, + { + "epoch": 0.02, + "learning_rate": 4.908741594620557e-05, + "loss": 3.0204, + "step": 29500 + }, + { + "epoch": 0.02, + "learning_rate": 4.908421389689402e-05, + "loss": 3.0034, + "step": 29600 + }, + { + "epoch": 0.02, + "learning_rate": 4.908101184758245e-05, + "loss": 3.0299, + "step": 29700 + }, + { + "epoch": 0.02, + "learning_rate": 4.90778097982709e-05, + "loss": 3.0136, + "step": 29800 + }, + { + "epoch": 0.02, + "learning_rate": 4.907460774895933e-05, + "loss": 3.0318, + "step": 29900 + }, + { + "epoch": 0.02, + "learning_rate": 4.907140569964778e-05, + "loss": 3.0013, + "step": 30000 + }, + { + "epoch": 0.02, + "eval_loss": 2.7001781463623047, + "eval_runtime": 178.1965, + "eval_samples_per_second": 56.118, + "eval_steps_per_second": 3.507, + "step": 30000 + }, + { + "epoch": 0.02, + "learning_rate": 4.906820365033622e-05, + "loss": 3.0124, + "step": 30100 + }, + { + "epoch": 0.02, + "learning_rate": 4.906500160102466e-05, + "loss": 3.0026, + "step": 30200 + }, + { + "epoch": 0.02, + "learning_rate": 4.90617995517131e-05, + "loss": 3.0211, + "step": 30300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9058597502401536e-05, + "loss": 3.0049, + "step": 30400 + }, + { + "epoch": 0.02, + "learning_rate": 4.905539545308998e-05, + "loss": 3.0135, + "step": 30500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9052193403778416e-05, + "loss": 2.9869, + "step": 30600 + }, + { + "epoch": 0.02, + "learning_rate": 4.904899135446686e-05, + "loss": 2.9998, + "step": 30700 + }, + { + "epoch": 0.02, + "learning_rate": 4.90457893051553e-05, + "loss": 3.0038, + "step": 30800 + }, + { + "epoch": 0.02, + "learning_rate": 4.904258725584374e-05, + "loss": 3.0082, + "step": 30900 + }, + { + "epoch": 0.02, + "learning_rate": 4.903938520653218e-05, + "loss": 3.0048, + "step": 31000 + }, + { + "epoch": 0.02, + "eval_loss": 2.6981754302978516, + "eval_runtime": 176.8202, + "eval_samples_per_second": 56.555, + "eval_steps_per_second": 3.535, + "step": 31000 + }, + { + "epoch": 0.02, + "learning_rate": 4.903618315722062e-05, + "loss": 3.0059, + "step": 31100 + }, + { + "epoch": 0.02, + "learning_rate": 4.903298110790907e-05, + "loss": 3.0255, + "step": 31200 + }, + { + "epoch": 0.02, + "learning_rate": 4.90297790585975e-05, + "loss": 2.9953, + "step": 31300 + }, + { + "epoch": 0.02, + "learning_rate": 4.902657700928595e-05, + "loss": 2.9831, + "step": 31400 + }, + { + "epoch": 0.02, + "learning_rate": 4.902337495997439e-05, + "loss": 2.9981, + "step": 31500 + }, + { + "epoch": 0.02, + "learning_rate": 4.902017291066283e-05, + "loss": 2.9991, + "step": 31600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9016970861351266e-05, + "loss": 2.9946, + "step": 31700 + }, + { + "epoch": 0.02, + "learning_rate": 4.9013768812039706e-05, + "loss": 3.0027, + "step": 31800 + }, + { + "epoch": 0.02, + "learning_rate": 4.901056676272815e-05, + "loss": 2.971, + "step": 31900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9007364713416586e-05, + "loss": 3.0191, + "step": 32000 + }, + { + "epoch": 0.02, + "eval_loss": 2.6920597553253174, + "eval_runtime": 176.7041, + "eval_samples_per_second": 56.592, + "eval_steps_per_second": 3.537, + "step": 32000 + }, + { + "epoch": 0.02, + "learning_rate": 4.900416266410503e-05, + "loss": 2.9977, + "step": 32100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9000960614793465e-05, + "loss": 3.0034, + "step": 32200 + }, + { + "epoch": 0.02, + "learning_rate": 4.899775856548191e-05, + "loss": 2.9882, + "step": 32300 + }, + { + "epoch": 0.02, + "learning_rate": 4.899455651617035e-05, + "loss": 2.9881, + "step": 32400 + }, + { + "epoch": 0.02, + "learning_rate": 4.899135446685879e-05, + "loss": 2.9803, + "step": 32500 + }, + { + "epoch": 0.02, + "learning_rate": 4.898815241754723e-05, + "loss": 2.9922, + "step": 32600 + }, + { + "epoch": 0.02, + "learning_rate": 4.898495036823567e-05, + "loss": 2.9709, + "step": 32700 + }, + { + "epoch": 0.02, + "learning_rate": 4.898174831892412e-05, + "loss": 2.9976, + "step": 32800 + }, + { + "epoch": 0.02, + "learning_rate": 4.897854626961255e-05, + "loss": 2.9985, + "step": 32900 + }, + { + "epoch": 0.02, + "learning_rate": 4.8975344220300997e-05, + "loss": 3.0019, + "step": 33000 + }, + { + "epoch": 0.02, + "eval_loss": 2.692898988723755, + "eval_runtime": 177.3804, + "eval_samples_per_second": 56.376, + "eval_steps_per_second": 3.524, + "step": 33000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8972142170989436e-05, + "loss": 2.9773, + "step": 33100 + }, + { + "epoch": 0.02, + "learning_rate": 4.8968940121677876e-05, + "loss": 2.9913, + "step": 33200 + }, + { + "epoch": 0.02, + "learning_rate": 4.8965738072366316e-05, + "loss": 2.9822, + "step": 33300 + }, + { + "epoch": 0.02, + "learning_rate": 4.8962536023054756e-05, + "loss": 3.0064, + "step": 33400 + }, + { + "epoch": 0.02, + "learning_rate": 4.89593339737432e-05, + "loss": 2.989, + "step": 33500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8956131924431635e-05, + "loss": 2.9827, + "step": 33600 + }, + { + "epoch": 0.02, + "learning_rate": 4.895292987512008e-05, + "loss": 3.0022, + "step": 33700 + }, + { + "epoch": 0.02, + "learning_rate": 4.894972782580852e-05, + "loss": 2.9779, + "step": 33800 + }, + { + "epoch": 0.02, + "learning_rate": 4.894652577649696e-05, + "loss": 2.9612, + "step": 33900 + }, + { + "epoch": 0.02, + "learning_rate": 4.89433237271854e-05, + "loss": 2.9764, + "step": 34000 + }, + { + "epoch": 0.02, + "eval_loss": 2.688096761703491, + "eval_runtime": 177.6839, + "eval_samples_per_second": 56.28, + "eval_steps_per_second": 3.517, + "step": 34000 + }, + { + "epoch": 0.02, + "learning_rate": 4.894012167787384e-05, + "loss": 2.9807, + "step": 34100 + }, + { + "epoch": 0.02, + "learning_rate": 4.893691962856228e-05, + "loss": 3.0115, + "step": 34200 + }, + { + "epoch": 0.02, + "learning_rate": 4.893371757925072e-05, + "loss": 2.9909, + "step": 34300 + }, + { + "epoch": 0.02, + "learning_rate": 4.8930515529939166e-05, + "loss": 3.007, + "step": 34400 + }, + { + "epoch": 0.02, + "learning_rate": 4.8927313480627606e-05, + "loss": 2.9952, + "step": 34500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8924111431316046e-05, + "loss": 2.9875, + "step": 34600 + }, + { + "epoch": 0.02, + "learning_rate": 4.8920909382004486e-05, + "loss": 2.9829, + "step": 34700 + }, + { + "epoch": 0.02, + "learning_rate": 4.8917707332692925e-05, + "loss": 2.9682, + "step": 34800 + }, + { + "epoch": 0.02, + "learning_rate": 4.8914505283381365e-05, + "loss": 2.9665, + "step": 34900 + }, + { + "epoch": 0.02, + "learning_rate": 4.8911303234069805e-05, + "loss": 2.9666, + "step": 35000 + }, + { + "epoch": 0.02, + "eval_loss": 2.6841659545898438, + "eval_runtime": 177.4963, + "eval_samples_per_second": 56.339, + "eval_steps_per_second": 3.521, + "step": 35000 + }, + { + "epoch": 0.02, + "learning_rate": 4.890810118475825e-05, + "loss": 2.9709, + "step": 35100 + }, + { + "epoch": 0.02, + "learning_rate": 4.8904899135446684e-05, + "loss": 2.9748, + "step": 35200 + }, + { + "epoch": 0.02, + "learning_rate": 4.890169708613513e-05, + "loss": 2.9516, + "step": 35300 + }, + { + "epoch": 0.02, + "learning_rate": 4.889849503682357e-05, + "loss": 2.9764, + "step": 35400 + }, + { + "epoch": 0.02, + "learning_rate": 4.889529298751201e-05, + "loss": 2.9867, + "step": 35500 + }, + { + "epoch": 0.02, + "learning_rate": 4.889209093820045e-05, + "loss": 3.0054, + "step": 35600 + }, + { + "epoch": 0.02, + "learning_rate": 4.888888888888889e-05, + "loss": 2.9506, + "step": 35700 + }, + { + "epoch": 0.02, + "learning_rate": 4.888568683957733e-05, + "loss": 2.976, + "step": 35800 + }, + { + "epoch": 0.02, + "learning_rate": 4.888248479026577e-05, + "loss": 2.9867, + "step": 35900 + }, + { + "epoch": 0.02, + "learning_rate": 4.8879282740954216e-05, + "loss": 2.9753, + "step": 36000 + }, + { + "epoch": 0.02, + "eval_loss": 2.68265438079834, + "eval_runtime": 175.295, + "eval_samples_per_second": 57.047, + "eval_steps_per_second": 3.565, + "step": 36000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8876080691642656e-05, + "loss": 2.9837, + "step": 36100 + }, + { + "epoch": 0.02, + "learning_rate": 4.8872878642331095e-05, + "loss": 2.9745, + "step": 36200 + }, + { + "epoch": 0.02, + "learning_rate": 4.8869676593019535e-05, + "loss": 2.9655, + "step": 36300 + }, + { + "epoch": 0.02, + "learning_rate": 4.8866474543707975e-05, + "loss": 2.979, + "step": 36400 + }, + { + "epoch": 0.02, + "learning_rate": 4.8863272494396415e-05, + "loss": 2.982, + "step": 36500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8860070445084854e-05, + "loss": 2.9916, + "step": 36600 + }, + { + "epoch": 0.02, + "learning_rate": 4.88568683957733e-05, + "loss": 2.9592, + "step": 36700 + }, + { + "epoch": 0.02, + "learning_rate": 4.885366634646174e-05, + "loss": 2.961, + "step": 36800 + }, + { + "epoch": 0.02, + "learning_rate": 4.885046429715018e-05, + "loss": 2.961, + "step": 36900 + }, + { + "epoch": 0.02, + "learning_rate": 4.884726224783862e-05, + "loss": 2.9635, + "step": 37000 + }, + { + "epoch": 0.02, + "eval_loss": 2.6792824268341064, + "eval_runtime": 178.649, + "eval_samples_per_second": 55.976, + "eval_steps_per_second": 3.498, + "step": 37000 + }, + { + "epoch": 0.02, + "learning_rate": 4.884406019852706e-05, + "loss": 2.9596, + "step": 37100 + }, + { + "epoch": 0.02, + "learning_rate": 4.88408581492155e-05, + "loss": 2.9909, + "step": 37200 + }, + { + "epoch": 0.02, + "learning_rate": 4.883765609990394e-05, + "loss": 2.9932, + "step": 37300 + }, + { + "epoch": 0.02, + "learning_rate": 4.883445405059238e-05, + "loss": 2.9736, + "step": 37400 + }, + { + "epoch": 0.02, + "learning_rate": 4.883125200128082e-05, + "loss": 2.974, + "step": 37500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8828049951969265e-05, + "loss": 2.9549, + "step": 37600 + }, + { + "epoch": 0.02, + "learning_rate": 4.8824847902657705e-05, + "loss": 2.9616, + "step": 37700 + }, + { + "epoch": 0.02, + "learning_rate": 4.8821645853346145e-05, + "loss": 2.9713, + "step": 37800 + }, + { + "epoch": 0.02, + "learning_rate": 4.8818443804034584e-05, + "loss": 2.9662, + "step": 37900 + }, + { + "epoch": 0.02, + "learning_rate": 4.8815241754723024e-05, + "loss": 2.978, + "step": 38000 + }, + { + "epoch": 0.02, + "eval_loss": 2.6746363639831543, + "eval_runtime": 177.2123, + "eval_samples_per_second": 56.429, + "eval_steps_per_second": 3.527, + "step": 38000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8812039705411464e-05, + "loss": 2.9669, + "step": 38100 + }, + { + "epoch": 0.02, + "learning_rate": 4.8808837656099904e-05, + "loss": 2.9603, + "step": 38200 + }, + { + "epoch": 0.02, + "learning_rate": 4.880563560678835e-05, + "loss": 2.9543, + "step": 38300 + }, + { + "epoch": 0.02, + "learning_rate": 4.880243355747679e-05, + "loss": 2.9636, + "step": 38400 + }, + { + "epoch": 0.02, + "learning_rate": 4.879923150816523e-05, + "loss": 2.9742, + "step": 38500 + }, + { + "epoch": 0.02, + "learning_rate": 4.879602945885367e-05, + "loss": 2.9462, + "step": 38600 + }, + { + "epoch": 0.02, + "learning_rate": 4.879282740954211e-05, + "loss": 2.9654, + "step": 38700 + }, + { + "epoch": 0.02, + "learning_rate": 4.878962536023055e-05, + "loss": 2.9638, + "step": 38800 + }, + { + "epoch": 0.02, + "learning_rate": 4.878642331091899e-05, + "loss": 2.9718, + "step": 38900 + }, + { + "epoch": 0.02, + "learning_rate": 4.878322126160743e-05, + "loss": 2.9599, + "step": 39000 + }, + { + "epoch": 0.02, + "eval_loss": 2.67553448677063, + "eval_runtime": 178.0811, + "eval_samples_per_second": 56.154, + "eval_steps_per_second": 3.51, + "step": 39000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8780019212295875e-05, + "loss": 2.9634, + "step": 39100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8776817162984315e-05, + "loss": 2.9718, + "step": 39200 + }, + { + "epoch": 0.03, + "learning_rate": 4.8773615113672754e-05, + "loss": 2.9626, + "step": 39300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8770413064361194e-05, + "loss": 2.9759, + "step": 39400 + }, + { + "epoch": 0.03, + "learning_rate": 4.8767211015049634e-05, + "loss": 2.9666, + "step": 39500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8764008965738074e-05, + "loss": 2.9583, + "step": 39600 + }, + { + "epoch": 0.03, + "learning_rate": 4.876080691642651e-05, + "loss": 2.9415, + "step": 39700 + }, + { + "epoch": 0.03, + "learning_rate": 4.875760486711495e-05, + "loss": 2.9487, + "step": 39800 + }, + { + "epoch": 0.03, + "learning_rate": 4.87544028178034e-05, + "loss": 2.9589, + "step": 39900 + }, + { + "epoch": 0.03, + "learning_rate": 4.875120076849184e-05, + "loss": 2.9576, + "step": 40000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6733338832855225, + "eval_runtime": 175.3918, + "eval_samples_per_second": 57.015, + "eval_steps_per_second": 3.563, + "step": 40000 + }, + { + "epoch": 0.03, + "learning_rate": 4.874799871918028e-05, + "loss": 2.9754, + "step": 40100 + }, + { + "epoch": 0.03, + "learning_rate": 4.874479666986872e-05, + "loss": 2.97, + "step": 40200 + }, + { + "epoch": 0.03, + "learning_rate": 4.874159462055716e-05, + "loss": 2.9666, + "step": 40300 + }, + { + "epoch": 0.03, + "learning_rate": 4.87383925712456e-05, + "loss": 2.9546, + "step": 40400 + }, + { + "epoch": 0.03, + "learning_rate": 4.873519052193404e-05, + "loss": 2.9265, + "step": 40500 + }, + { + "epoch": 0.03, + "learning_rate": 4.873198847262248e-05, + "loss": 2.9588, + "step": 40600 + }, + { + "epoch": 0.03, + "learning_rate": 4.8728786423310924e-05, + "loss": 2.9534, + "step": 40700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8725584373999364e-05, + "loss": 2.9445, + "step": 40800 + }, + { + "epoch": 0.03, + "learning_rate": 4.8722382324687804e-05, + "loss": 2.9338, + "step": 40900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8719180275376243e-05, + "loss": 2.98, + "step": 41000 + }, + { + "epoch": 0.03, + "eval_loss": 2.671525001525879, + "eval_runtime": 175.1529, + "eval_samples_per_second": 57.093, + "eval_steps_per_second": 3.568, + "step": 41000 + }, + { + "epoch": 0.03, + "learning_rate": 4.871597822606468e-05, + "loss": 2.952, + "step": 41100 + }, + { + "epoch": 0.03, + "learning_rate": 4.871277617675312e-05, + "loss": 2.9436, + "step": 41200 + }, + { + "epoch": 0.03, + "learning_rate": 4.870957412744156e-05, + "loss": 2.9371, + "step": 41300 + }, + { + "epoch": 0.03, + "learning_rate": 4.870637207813001e-05, + "loss": 2.9551, + "step": 41400 + }, + { + "epoch": 0.03, + "learning_rate": 4.870317002881845e-05, + "loss": 2.938, + "step": 41500 + }, + { + "epoch": 0.03, + "learning_rate": 4.869996797950689e-05, + "loss": 2.9563, + "step": 41600 + }, + { + "epoch": 0.03, + "learning_rate": 4.869676593019533e-05, + "loss": 2.9677, + "step": 41700 + }, + { + "epoch": 0.03, + "learning_rate": 4.869356388088377e-05, + "loss": 2.9425, + "step": 41800 + }, + { + "epoch": 0.03, + "learning_rate": 4.869036183157221e-05, + "loss": 2.9733, + "step": 41900 + }, + { + "epoch": 0.03, + "learning_rate": 4.868715978226065e-05, + "loss": 2.9384, + "step": 42000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6695194244384766, + "eval_runtime": 177.5003, + "eval_samples_per_second": 56.338, + "eval_steps_per_second": 3.521, + "step": 42000 + }, + { + "epoch": 0.03, + "learning_rate": 4.868395773294909e-05, + "loss": 2.9512, + "step": 42100 + }, + { + "epoch": 0.03, + "learning_rate": 4.868075568363753e-05, + "loss": 2.9533, + "step": 42200 + }, + { + "epoch": 0.03, + "learning_rate": 4.8677553634325974e-05, + "loss": 2.9451, + "step": 42300 + }, + { + "epoch": 0.03, + "learning_rate": 4.867435158501441e-05, + "loss": 2.9512, + "step": 42400 + }, + { + "epoch": 0.03, + "learning_rate": 4.867114953570285e-05, + "loss": 2.953, + "step": 42500 + }, + { + "epoch": 0.03, + "learning_rate": 4.866794748639129e-05, + "loss": 2.9541, + "step": 42600 + }, + { + "epoch": 0.03, + "learning_rate": 4.866474543707973e-05, + "loss": 2.9516, + "step": 42700 + }, + { + "epoch": 0.03, + "learning_rate": 4.866154338776817e-05, + "loss": 2.9462, + "step": 42800 + }, + { + "epoch": 0.03, + "learning_rate": 4.865834133845661e-05, + "loss": 2.9565, + "step": 42900 + }, + { + "epoch": 0.03, + "learning_rate": 4.865513928914506e-05, + "loss": 2.9434, + "step": 43000 + }, + { + "epoch": 0.03, + "eval_loss": 2.663595676422119, + "eval_runtime": 177.606, + "eval_samples_per_second": 56.304, + "eval_steps_per_second": 3.519, + "step": 43000 + }, + { + "epoch": 0.03, + "learning_rate": 4.86519372398335e-05, + "loss": 2.9467, + "step": 43100 + }, + { + "epoch": 0.03, + "learning_rate": 4.864873519052194e-05, + "loss": 2.9429, + "step": 43200 + }, + { + "epoch": 0.03, + "learning_rate": 4.864553314121038e-05, + "loss": 2.9387, + "step": 43300 + }, + { + "epoch": 0.03, + "learning_rate": 4.864233109189882e-05, + "loss": 2.9609, + "step": 43400 + }, + { + "epoch": 0.03, + "learning_rate": 4.863912904258726e-05, + "loss": 2.9611, + "step": 43500 + }, + { + "epoch": 0.03, + "learning_rate": 4.86359269932757e-05, + "loss": 2.9362, + "step": 43600 + }, + { + "epoch": 0.03, + "learning_rate": 4.8632724943964144e-05, + "loss": 2.9425, + "step": 43700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8629522894652576e-05, + "loss": 2.9494, + "step": 43800 + }, + { + "epoch": 0.03, + "learning_rate": 4.862632084534102e-05, + "loss": 2.9261, + "step": 43900 + }, + { + "epoch": 0.03, + "learning_rate": 4.862311879602946e-05, + "loss": 2.9369, + "step": 44000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6637465953826904, + "eval_runtime": 176.6283, + "eval_samples_per_second": 56.616, + "eval_steps_per_second": 3.539, + "step": 44000 + }, + { + "epoch": 0.03, + "learning_rate": 4.86199167467179e-05, + "loss": 2.9292, + "step": 44100 + }, + { + "epoch": 0.03, + "learning_rate": 4.861671469740634e-05, + "loss": 2.9375, + "step": 44200 + }, + { + "epoch": 0.03, + "learning_rate": 4.861351264809478e-05, + "loss": 2.9606, + "step": 44300 + }, + { + "epoch": 0.03, + "learning_rate": 4.861031059878323e-05, + "loss": 2.9337, + "step": 44400 + }, + { + "epoch": 0.03, + "learning_rate": 4.860710854947166e-05, + "loss": 2.9415, + "step": 44500 + }, + { + "epoch": 0.03, + "learning_rate": 4.860390650016011e-05, + "loss": 2.9612, + "step": 44600 + }, + { + "epoch": 0.03, + "learning_rate": 4.860070445084855e-05, + "loss": 2.9257, + "step": 44700 + }, + { + "epoch": 0.03, + "learning_rate": 4.859750240153699e-05, + "loss": 2.9384, + "step": 44800 + }, + { + "epoch": 0.03, + "learning_rate": 4.859430035222543e-05, + "loss": 2.9426, + "step": 44900 + }, + { + "epoch": 0.03, + "learning_rate": 4.859109830291387e-05, + "loss": 2.9349, + "step": 45000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6627140045166016, + "eval_runtime": 176.793, + "eval_samples_per_second": 56.563, + "eval_steps_per_second": 3.535, + "step": 45000 + }, + { + "epoch": 0.03, + "learning_rate": 4.858789625360231e-05, + "loss": 2.9307, + "step": 45100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8584694204290746e-05, + "loss": 2.9294, + "step": 45200 + }, + { + "epoch": 0.03, + "learning_rate": 4.858149215497919e-05, + "loss": 2.921, + "step": 45300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8578290105667626e-05, + "loss": 2.9285, + "step": 45400 + }, + { + "epoch": 0.03, + "learning_rate": 4.857508805635607e-05, + "loss": 2.9419, + "step": 45500 + }, + { + "epoch": 0.03, + "learning_rate": 4.857188600704451e-05, + "loss": 2.9245, + "step": 45600 + }, + { + "epoch": 0.03, + "learning_rate": 4.856868395773295e-05, + "loss": 2.9332, + "step": 45700 + }, + { + "epoch": 0.03, + "learning_rate": 4.856548190842139e-05, + "loss": 2.9201, + "step": 45800 + }, + { + "epoch": 0.03, + "learning_rate": 4.856227985910983e-05, + "loss": 2.9444, + "step": 45900 + }, + { + "epoch": 0.03, + "learning_rate": 4.855907780979828e-05, + "loss": 2.9321, + "step": 46000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6629557609558105, + "eval_runtime": 177.2252, + "eval_samples_per_second": 56.425, + "eval_steps_per_second": 3.527, + "step": 46000 + }, + { + "epoch": 0.03, + "learning_rate": 4.855587576048671e-05, + "loss": 2.9485, + "step": 46100 + }, + { + "epoch": 0.03, + "learning_rate": 4.855267371117516e-05, + "loss": 2.9094, + "step": 46200 + }, + { + "epoch": 0.03, + "learning_rate": 4.854947166186359e-05, + "loss": 2.9352, + "step": 46300 + }, + { + "epoch": 0.03, + "learning_rate": 4.854626961255204e-05, + "loss": 2.9278, + "step": 46400 + }, + { + "epoch": 0.03, + "learning_rate": 4.8543067563240477e-05, + "loss": 2.9452, + "step": 46500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8539865513928916e-05, + "loss": 2.9558, + "step": 46600 + }, + { + "epoch": 0.03, + "learning_rate": 4.853666346461736e-05, + "loss": 2.9374, + "step": 46700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8533461415305796e-05, + "loss": 2.9263, + "step": 46800 + }, + { + "epoch": 0.03, + "learning_rate": 4.853025936599424e-05, + "loss": 2.9426, + "step": 46900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8527057316682675e-05, + "loss": 2.931, + "step": 47000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6591949462890625, + "eval_runtime": 175.6051, + "eval_samples_per_second": 56.946, + "eval_steps_per_second": 3.559, + "step": 47000 + }, + { + "epoch": 0.03, + "learning_rate": 4.852385526737112e-05, + "loss": 2.9286, + "step": 47100 + }, + { + "epoch": 0.03, + "learning_rate": 4.852065321805956e-05, + "loss": 2.9236, + "step": 47200 + }, + { + "epoch": 0.03, + "learning_rate": 4.8517451168748e-05, + "loss": 2.9303, + "step": 47300 + }, + { + "epoch": 0.03, + "learning_rate": 4.851424911943644e-05, + "loss": 2.9203, + "step": 47400 + }, + { + "epoch": 0.03, + "learning_rate": 4.851104707012488e-05, + "loss": 2.9563, + "step": 47500 + }, + { + "epoch": 0.03, + "learning_rate": 4.850784502081333e-05, + "loss": 2.9176, + "step": 47600 + }, + { + "epoch": 0.03, + "learning_rate": 4.850464297150176e-05, + "loss": 2.9453, + "step": 47700 + }, + { + "epoch": 0.03, + "learning_rate": 4.850144092219021e-05, + "loss": 2.9437, + "step": 47800 + }, + { + "epoch": 0.03, + "learning_rate": 4.849823887287864e-05, + "loss": 2.9204, + "step": 47900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8495036823567086e-05, + "loss": 2.9205, + "step": 48000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6586101055145264, + "eval_runtime": 178.3108, + "eval_samples_per_second": 56.082, + "eval_steps_per_second": 3.505, + "step": 48000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8491834774255526e-05, + "loss": 2.9378, + "step": 48100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8488632724943966e-05, + "loss": 2.9229, + "step": 48200 + }, + { + "epoch": 0.03, + "learning_rate": 4.848543067563241e-05, + "loss": 2.9282, + "step": 48300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8482228626320845e-05, + "loss": 2.9312, + "step": 48400 + }, + { + "epoch": 0.03, + "learning_rate": 4.847902657700929e-05, + "loss": 2.9039, + "step": 48500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8475824527697725e-05, + "loss": 2.9361, + "step": 48600 + }, + { + "epoch": 0.03, + "learning_rate": 4.847262247838617e-05, + "loss": 2.9241, + "step": 48700 + }, + { + "epoch": 0.03, + "learning_rate": 4.846942042907461e-05, + "loss": 2.9296, + "step": 48800 + }, + { + "epoch": 0.03, + "learning_rate": 4.846621837976305e-05, + "loss": 2.9315, + "step": 48900 + }, + { + "epoch": 0.03, + "learning_rate": 4.84630163304515e-05, + "loss": 2.9147, + "step": 49000 + }, + { + "epoch": 0.03, + "eval_loss": 2.653660535812378, + "eval_runtime": 175.1839, + "eval_samples_per_second": 57.083, + "eval_steps_per_second": 3.568, + "step": 49000 + }, + { + "epoch": 0.03, + "learning_rate": 4.845981428113993e-05, + "loss": 2.9236, + "step": 49100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8456612231828377e-05, + "loss": 2.9235, + "step": 49200 + }, + { + "epoch": 0.03, + "learning_rate": 4.845341018251681e-05, + "loss": 2.926, + "step": 49300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8450208133205256e-05, + "loss": 2.8989, + "step": 49400 + }, + { + "epoch": 0.03, + "learning_rate": 4.844700608389369e-05, + "loss": 2.9362, + "step": 49500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8443804034582136e-05, + "loss": 2.9101, + "step": 49600 + }, + { + "epoch": 0.03, + "learning_rate": 4.8440601985270575e-05, + "loss": 2.944, + "step": 49700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8437399935959015e-05, + "loss": 2.9158, + "step": 49800 + }, + { + "epoch": 0.03, + "learning_rate": 4.843419788664746e-05, + "loss": 2.9221, + "step": 49900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8430995837335894e-05, + "loss": 2.9005, + "step": 50000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6547129154205322, + "eval_runtime": 178.6492, + "eval_samples_per_second": 55.976, + "eval_steps_per_second": 3.498, + "step": 50000 + }, + { + "epoch": 0.03, + "learning_rate": 4.842779378802434e-05, + "loss": 2.9158, + "step": 50100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8424591738712774e-05, + "loss": 2.9161, + "step": 50200 + }, + { + "epoch": 0.03, + "learning_rate": 4.842138968940122e-05, + "loss": 2.9282, + "step": 50300 + }, + { + "epoch": 0.03, + "learning_rate": 4.841818764008966e-05, + "loss": 2.9058, + "step": 50400 + }, + { + "epoch": 0.03, + "learning_rate": 4.84149855907781e-05, + "loss": 2.9191, + "step": 50500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8411783541466546e-05, + "loss": 2.9172, + "step": 50600 + }, + { + "epoch": 0.03, + "learning_rate": 4.840858149215498e-05, + "loss": 2.8963, + "step": 50700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8405379442843426e-05, + "loss": 2.9345, + "step": 50800 + }, + { + "epoch": 0.03, + "learning_rate": 4.840217739353186e-05, + "loss": 2.9197, + "step": 50900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8398975344220305e-05, + "loss": 2.9018, + "step": 51000 + }, + { + "epoch": 0.03, + "eval_loss": 2.6506595611572266, + "eval_runtime": 178.1968, + "eval_samples_per_second": 56.118, + "eval_steps_per_second": 3.507, + "step": 51000 + }, + { + "epoch": 0.03, + "learning_rate": 4.839577329490874e-05, + "loss": 2.9245, + "step": 51100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8392571245597185e-05, + "loss": 2.92, + "step": 51200 + }, + { + "epoch": 0.03, + "learning_rate": 4.8389369196285625e-05, + "loss": 2.9326, + "step": 51300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8386167146974064e-05, + "loss": 2.9369, + "step": 51400 + }, + { + "epoch": 0.03, + "learning_rate": 4.838296509766251e-05, + "loss": 2.8936, + "step": 51500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8379763048350944e-05, + "loss": 2.9116, + "step": 51600 + }, + { + "epoch": 0.03, + "learning_rate": 4.837656099903939e-05, + "loss": 2.9032, + "step": 51700 + }, + { + "epoch": 0.03, + "learning_rate": 4.837335894972782e-05, + "loss": 2.926, + "step": 51800 + }, + { + "epoch": 0.03, + "learning_rate": 4.837015690041627e-05, + "loss": 2.9204, + "step": 51900 + }, + { + "epoch": 0.03, + "learning_rate": 4.836695485110471e-05, + "loss": 2.9144, + "step": 52000 + }, + { + "epoch": 0.03, + "eval_loss": 2.64819073677063, + "eval_runtime": 175.6308, + "eval_samples_per_second": 56.938, + "eval_steps_per_second": 3.559, + "step": 52000 + }, + { + "epoch": 0.03, + "learning_rate": 4.836375280179315e-05, + "loss": 2.9234, + "step": 52100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8360550752481596e-05, + "loss": 2.9201, + "step": 52200 + }, + { + "epoch": 0.03, + "learning_rate": 4.835734870317003e-05, + "loss": 2.8795, + "step": 52300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8354146653858475e-05, + "loss": 2.9024, + "step": 52400 + }, + { + "epoch": 0.03, + "learning_rate": 4.835094460454691e-05, + "loss": 2.923, + "step": 52500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8347742555235355e-05, + "loss": 2.9094, + "step": 52600 + }, + { + "epoch": 0.03, + "learning_rate": 4.834454050592379e-05, + "loss": 2.9084, + "step": 52700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8341338456612234e-05, + "loss": 2.9046, + "step": 52800 + }, + { + "epoch": 0.03, + "learning_rate": 4.8338136407300674e-05, + "loss": 2.897, + "step": 52900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8334934357989114e-05, + "loss": 2.8898, + "step": 53000 + }, + { + "epoch": 0.03, + "eval_loss": 2.645299196243286, + "eval_runtime": 177.085, + "eval_samples_per_second": 56.47, + "eval_steps_per_second": 3.529, + "step": 53000 + }, + { + "epoch": 0.03, + "learning_rate": 4.833173230867756e-05, + "loss": 2.9105, + "step": 53100 + }, + { + "epoch": 0.03, + "learning_rate": 4.832853025936599e-05, + "loss": 2.8876, + "step": 53200 + }, + { + "epoch": 0.03, + "learning_rate": 4.832532821005444e-05, + "loss": 2.917, + "step": 53300 + }, + { + "epoch": 0.03, + "learning_rate": 4.832212616074287e-05, + "loss": 2.8781, + "step": 53400 + }, + { + "epoch": 0.03, + "learning_rate": 4.831892411143132e-05, + "loss": 2.8958, + "step": 53500 + }, + { + "epoch": 0.03, + "learning_rate": 4.831572206211976e-05, + "loss": 2.9015, + "step": 53600 + }, + { + "epoch": 0.03, + "learning_rate": 4.83125200128082e-05, + "loss": 2.8878, + "step": 53700 + }, + { + "epoch": 0.03, + "learning_rate": 4.8309317963496645e-05, + "loss": 2.9211, + "step": 53800 + }, + { + "epoch": 0.03, + "learning_rate": 4.830611591418508e-05, + "loss": 2.9124, + "step": 53900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8302913864873525e-05, + "loss": 2.9082, + "step": 54000 + }, + { + "epoch": 0.03, + "eval_loss": 2.64392352104187, + "eval_runtime": 177.3707, + "eval_samples_per_second": 56.379, + "eval_steps_per_second": 3.524, + "step": 54000 + }, + { + "epoch": 0.03, + "learning_rate": 4.829971181556196e-05, + "loss": 2.926, + "step": 54100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8296509766250404e-05, + "loss": 2.8951, + "step": 54200 + }, + { + "epoch": 0.03, + "learning_rate": 4.8293307716938844e-05, + "loss": 2.901, + "step": 54300 + }, + { + "epoch": 0.03, + "learning_rate": 4.8290105667627284e-05, + "loss": 2.8796, + "step": 54400 + }, + { + "epoch": 0.03, + "learning_rate": 4.828690361831572e-05, + "loss": 2.8966, + "step": 54500 + }, + { + "epoch": 0.03, + "learning_rate": 4.828370156900416e-05, + "loss": 2.9169, + "step": 54600 + }, + { + "epoch": 0.04, + "learning_rate": 4.828049951969261e-05, + "loss": 2.8987, + "step": 54700 + }, + { + "epoch": 0.04, + "learning_rate": 4.827729747038104e-05, + "loss": 2.8955, + "step": 54800 + }, + { + "epoch": 0.04, + "learning_rate": 4.827409542106949e-05, + "loss": 2.9023, + "step": 54900 + }, + { + "epoch": 0.04, + "learning_rate": 4.827089337175792e-05, + "loss": 2.9066, + "step": 55000 + }, + { + "epoch": 0.04, + "eval_loss": 2.642181634902954, + "eval_runtime": 176.3672, + "eval_samples_per_second": 56.7, + "eval_steps_per_second": 3.544, + "step": 55000 + }, + { + "epoch": 0.04, + "learning_rate": 4.826769132244637e-05, + "loss": 2.9017, + "step": 55100 + }, + { + "epoch": 0.04, + "learning_rate": 4.826448927313481e-05, + "loss": 2.9021, + "step": 55200 + }, + { + "epoch": 0.04, + "learning_rate": 4.826128722382325e-05, + "loss": 2.8813, + "step": 55300 + }, + { + "epoch": 0.04, + "learning_rate": 4.8258085174511695e-05, + "loss": 2.9016, + "step": 55400 + }, + { + "epoch": 0.04, + "learning_rate": 4.825488312520013e-05, + "loss": 2.902, + "step": 55500 + }, + { + "epoch": 0.04, + "learning_rate": 4.8251681075888574e-05, + "loss": 2.8913, + "step": 55600 + }, + { + "epoch": 0.04, + "learning_rate": 4.824847902657701e-05, + "loss": 2.8846, + "step": 55700 + }, + { + "epoch": 0.04, + "learning_rate": 4.8245276977265454e-05, + "loss": 2.9166, + "step": 55800 + }, + { + "epoch": 0.04, + "learning_rate": 4.824207492795389e-05, + "loss": 2.897, + "step": 55900 + }, + { + "epoch": 0.04, + "learning_rate": 4.823887287864233e-05, + "loss": 2.8851, + "step": 56000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6408231258392334, + "eval_runtime": 178.371, + "eval_samples_per_second": 56.063, + "eval_steps_per_second": 3.504, + "step": 56000 + }, + { + "epoch": 0.04, + "learning_rate": 4.823567082933077e-05, + "loss": 2.8904, + "step": 56100 + }, + { + "epoch": 0.04, + "learning_rate": 4.823246878001921e-05, + "loss": 2.9088, + "step": 56200 + }, + { + "epoch": 0.04, + "learning_rate": 4.822926673070766e-05, + "loss": 2.9073, + "step": 56300 + }, + { + "epoch": 0.04, + "learning_rate": 4.822606468139609e-05, + "loss": 2.8893, + "step": 56400 + }, + { + "epoch": 0.04, + "learning_rate": 4.822286263208454e-05, + "loss": 2.8898, + "step": 56500 + }, + { + "epoch": 0.04, + "learning_rate": 4.821966058277298e-05, + "loss": 2.9206, + "step": 56600 + }, + { + "epoch": 0.04, + "learning_rate": 4.821645853346142e-05, + "loss": 2.9178, + "step": 56700 + }, + { + "epoch": 0.04, + "learning_rate": 4.821325648414986e-05, + "loss": 2.8975, + "step": 56800 + }, + { + "epoch": 0.04, + "learning_rate": 4.82100544348383e-05, + "loss": 2.9013, + "step": 56900 + }, + { + "epoch": 0.04, + "learning_rate": 4.8206852385526744e-05, + "loss": 2.8921, + "step": 57000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6402812004089355, + "eval_runtime": 176.0762, + "eval_samples_per_second": 56.794, + "eval_steps_per_second": 3.55, + "step": 57000 + }, + { + "epoch": 0.04, + "learning_rate": 4.820365033621518e-05, + "loss": 2.8888, + "step": 57100 + }, + { + "epoch": 0.04, + "learning_rate": 4.8200448286903623e-05, + "loss": 2.8943, + "step": 57200 + }, + { + "epoch": 0.04, + "learning_rate": 4.8197246237592056e-05, + "loss": 2.9095, + "step": 57300 + }, + { + "epoch": 0.04, + "learning_rate": 4.81940441882805e-05, + "loss": 2.8993, + "step": 57400 + }, + { + "epoch": 0.04, + "learning_rate": 4.819084213896894e-05, + "loss": 2.886, + "step": 57500 + }, + { + "epoch": 0.04, + "learning_rate": 4.818764008965738e-05, + "loss": 2.8974, + "step": 57600 + }, + { + "epoch": 0.04, + "learning_rate": 4.818443804034582e-05, + "loss": 2.8738, + "step": 57700 + }, + { + "epoch": 0.04, + "learning_rate": 4.818123599103426e-05, + "loss": 2.9148, + "step": 57800 + }, + { + "epoch": 0.04, + "learning_rate": 4.817803394172271e-05, + "loss": 2.8871, + "step": 57900 + }, + { + "epoch": 0.04, + "learning_rate": 4.817483189241114e-05, + "loss": 2.8949, + "step": 58000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6379764080047607, + "eval_runtime": 178.1605, + "eval_samples_per_second": 56.129, + "eval_steps_per_second": 3.508, + "step": 58000 + }, + { + "epoch": 0.04, + "learning_rate": 4.817162984309959e-05, + "loss": 2.8779, + "step": 58100 + }, + { + "epoch": 0.04, + "learning_rate": 4.816842779378803e-05, + "loss": 2.877, + "step": 58200 + }, + { + "epoch": 0.04, + "learning_rate": 4.816522574447647e-05, + "loss": 2.8852, + "step": 58300 + }, + { + "epoch": 0.04, + "learning_rate": 4.816202369516491e-05, + "loss": 2.8913, + "step": 58400 + }, + { + "epoch": 0.04, + "learning_rate": 4.815882164585335e-05, + "loss": 2.8927, + "step": 58500 + }, + { + "epoch": 0.04, + "learning_rate": 4.815561959654179e-05, + "loss": 2.9023, + "step": 58600 + }, + { + "epoch": 0.04, + "learning_rate": 4.8152417547230226e-05, + "loss": 2.8747, + "step": 58700 + }, + { + "epoch": 0.04, + "learning_rate": 4.814921549791867e-05, + "loss": 2.925, + "step": 58800 + }, + { + "epoch": 0.04, + "learning_rate": 4.814601344860711e-05, + "loss": 2.8965, + "step": 58900 + }, + { + "epoch": 0.04, + "learning_rate": 4.814281139929555e-05, + "loss": 2.8893, + "step": 59000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6361653804779053, + "eval_runtime": 176.4963, + "eval_samples_per_second": 56.658, + "eval_steps_per_second": 3.541, + "step": 59000 + }, + { + "epoch": 0.04, + "learning_rate": 4.813960934998399e-05, + "loss": 2.8837, + "step": 59100 + }, + { + "epoch": 0.04, + "learning_rate": 4.813640730067243e-05, + "loss": 2.8872, + "step": 59200 + }, + { + "epoch": 0.04, + "learning_rate": 4.813320525136087e-05, + "loss": 2.8987, + "step": 59300 + }, + { + "epoch": 0.04, + "learning_rate": 4.813000320204931e-05, + "loss": 2.8938, + "step": 59400 + }, + { + "epoch": 0.04, + "learning_rate": 4.812680115273776e-05, + "loss": 2.887, + "step": 59500 + }, + { + "epoch": 0.04, + "learning_rate": 4.812359910342619e-05, + "loss": 2.8972, + "step": 59600 + }, + { + "epoch": 0.04, + "learning_rate": 4.812039705411464e-05, + "loss": 2.8788, + "step": 59700 + }, + { + "epoch": 0.04, + "learning_rate": 4.811719500480308e-05, + "loss": 2.8931, + "step": 59800 + }, + { + "epoch": 0.04, + "learning_rate": 4.811399295549152e-05, + "loss": 2.8809, + "step": 59900 + }, + { + "epoch": 0.04, + "learning_rate": 4.8110790906179956e-05, + "loss": 2.895, + "step": 60000 + }, + { + "epoch": 0.04, + "eval_loss": 2.636247158050537, + "eval_runtime": 176.1455, + "eval_samples_per_second": 56.771, + "eval_steps_per_second": 3.548, + "step": 60000 + }, + { + "epoch": 0.04, + "learning_rate": 4.8107588856868396e-05, + "loss": 2.8971, + "step": 60100 + }, + { + "epoch": 0.04, + "learning_rate": 4.810438680755684e-05, + "loss": 2.8754, + "step": 60200 + }, + { + "epoch": 0.04, + "learning_rate": 4.8101184758245276e-05, + "loss": 2.9116, + "step": 60300 + }, + { + "epoch": 0.04, + "learning_rate": 4.809798270893372e-05, + "loss": 2.8929, + "step": 60400 + }, + { + "epoch": 0.04, + "learning_rate": 4.809478065962216e-05, + "loss": 2.8687, + "step": 60500 + }, + { + "epoch": 0.04, + "learning_rate": 4.80915786103106e-05, + "loss": 2.8883, + "step": 60600 + }, + { + "epoch": 0.04, + "learning_rate": 4.808837656099904e-05, + "loss": 2.8928, + "step": 60700 + }, + { + "epoch": 0.04, + "learning_rate": 4.808517451168748e-05, + "loss": 2.8977, + "step": 60800 + }, + { + "epoch": 0.04, + "learning_rate": 4.808197246237592e-05, + "loss": 2.8938, + "step": 60900 + }, + { + "epoch": 0.04, + "learning_rate": 4.807877041306436e-05, + "loss": 2.8765, + "step": 61000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6320600509643555, + "eval_runtime": 177.2981, + "eval_samples_per_second": 56.402, + "eval_steps_per_second": 3.525, + "step": 61000 + }, + { + "epoch": 0.04, + "learning_rate": 4.807556836375281e-05, + "loss": 2.8716, + "step": 61100 + }, + { + "epoch": 0.04, + "learning_rate": 4.807236631444125e-05, + "loss": 2.8953, + "step": 61200 + }, + { + "epoch": 0.04, + "learning_rate": 4.8069164265129687e-05, + "loss": 2.8848, + "step": 61300 + }, + { + "epoch": 0.04, + "learning_rate": 4.8065962215818126e-05, + "loss": 2.892, + "step": 61400 + }, + { + "epoch": 0.04, + "learning_rate": 4.8062760166506566e-05, + "loss": 2.8878, + "step": 61500 + }, + { + "epoch": 0.04, + "learning_rate": 4.8059558117195006e-05, + "loss": 2.8692, + "step": 61600 + }, + { + "epoch": 0.04, + "learning_rate": 4.8056356067883446e-05, + "loss": 2.8683, + "step": 61700 + }, + { + "epoch": 0.04, + "learning_rate": 4.805315401857189e-05, + "loss": 2.8776, + "step": 61800 + }, + { + "epoch": 0.04, + "learning_rate": 4.8049951969260325e-05, + "loss": 2.8778, + "step": 61900 + }, + { + "epoch": 0.04, + "learning_rate": 4.804674991994877e-05, + "loss": 2.8915, + "step": 62000 + }, + { + "epoch": 0.04, + "eval_loss": 2.630981206893921, + "eval_runtime": 176.4905, + "eval_samples_per_second": 56.66, + "eval_steps_per_second": 3.541, + "step": 62000 + }, + { + "epoch": 0.04, + "learning_rate": 4.804354787063721e-05, + "loss": 2.8944, + "step": 62100 + }, + { + "epoch": 0.04, + "learning_rate": 4.804034582132565e-05, + "loss": 2.8721, + "step": 62200 + }, + { + "epoch": 0.04, + "learning_rate": 4.803714377201409e-05, + "loss": 2.8661, + "step": 62300 + }, + { + "epoch": 0.04, + "learning_rate": 4.803394172270253e-05, + "loss": 2.8959, + "step": 62400 + }, + { + "epoch": 0.04, + "learning_rate": 4.803073967339097e-05, + "loss": 2.8752, + "step": 62500 + }, + { + "epoch": 0.04, + "learning_rate": 4.802753762407941e-05, + "loss": 2.8858, + "step": 62600 + }, + { + "epoch": 0.04, + "learning_rate": 4.8024335574767856e-05, + "loss": 2.8771, + "step": 62700 + }, + { + "epoch": 0.04, + "learning_rate": 4.8021133525456296e-05, + "loss": 2.8746, + "step": 62800 + }, + { + "epoch": 0.04, + "learning_rate": 4.8017931476144736e-05, + "loss": 2.8726, + "step": 62900 + }, + { + "epoch": 0.04, + "learning_rate": 4.8014729426833176e-05, + "loss": 2.8841, + "step": 63000 + }, + { + "epoch": 0.04, + "eval_loss": 2.631925106048584, + "eval_runtime": 178.91, + "eval_samples_per_second": 55.894, + "eval_steps_per_second": 3.493, + "step": 63000 + }, + { + "epoch": 0.04, + "learning_rate": 4.8011527377521615e-05, + "loss": 2.8828, + "step": 63100 + }, + { + "epoch": 0.04, + "learning_rate": 4.8008325328210055e-05, + "loss": 2.8462, + "step": 63200 + }, + { + "epoch": 0.04, + "learning_rate": 4.8005123278898495e-05, + "loss": 2.8835, + "step": 63300 + }, + { + "epoch": 0.04, + "learning_rate": 4.8001921229586935e-05, + "loss": 2.8805, + "step": 63400 + }, + { + "epoch": 0.04, + "learning_rate": 4.799871918027538e-05, + "loss": 2.8842, + "step": 63500 + }, + { + "epoch": 0.04, + "learning_rate": 4.799551713096382e-05, + "loss": 2.8914, + "step": 63600 + }, + { + "epoch": 0.04, + "learning_rate": 4.799231508165226e-05, + "loss": 2.8578, + "step": 63700 + }, + { + "epoch": 0.04, + "learning_rate": 4.79891130323407e-05, + "loss": 2.8729, + "step": 63800 + }, + { + "epoch": 0.04, + "learning_rate": 4.798591098302914e-05, + "loss": 2.8858, + "step": 63900 + }, + { + "epoch": 0.04, + "learning_rate": 4.798270893371758e-05, + "loss": 2.8701, + "step": 64000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6272077560424805, + "eval_runtime": 176.6554, + "eval_samples_per_second": 56.607, + "eval_steps_per_second": 3.538, + "step": 64000 + }, + { + "epoch": 0.04, + "learning_rate": 4.797950688440602e-05, + "loss": 2.8801, + "step": 64100 + }, + { + "epoch": 0.04, + "learning_rate": 4.7976304835094466e-05, + "loss": 2.9025, + "step": 64200 + }, + { + "epoch": 0.04, + "learning_rate": 4.7973102785782906e-05, + "loss": 2.8743, + "step": 64300 + }, + { + "epoch": 0.04, + "learning_rate": 4.7969900736471346e-05, + "loss": 2.8923, + "step": 64400 + }, + { + "epoch": 0.04, + "learning_rate": 4.7966698687159785e-05, + "loss": 2.8944, + "step": 64500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7963496637848225e-05, + "loss": 2.887, + "step": 64600 + }, + { + "epoch": 0.04, + "learning_rate": 4.7960294588536665e-05, + "loss": 2.8828, + "step": 64700 + }, + { + "epoch": 0.04, + "learning_rate": 4.7957092539225105e-05, + "loss": 2.8754, + "step": 64800 + }, + { + "epoch": 0.04, + "learning_rate": 4.7953890489913544e-05, + "loss": 2.8791, + "step": 64900 + }, + { + "epoch": 0.04, + "learning_rate": 4.7950688440601984e-05, + "loss": 2.8407, + "step": 65000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6278929710388184, + "eval_runtime": 177.1352, + "eval_samples_per_second": 56.454, + "eval_steps_per_second": 3.528, + "step": 65000 + }, + { + "epoch": 0.04, + "learning_rate": 4.794748639129043e-05, + "loss": 2.8656, + "step": 65100 + }, + { + "epoch": 0.04, + "learning_rate": 4.794428434197887e-05, + "loss": 2.8675, + "step": 65200 + }, + { + "epoch": 0.04, + "learning_rate": 4.794108229266731e-05, + "loss": 2.8924, + "step": 65300 + }, + { + "epoch": 0.04, + "learning_rate": 4.793788024335575e-05, + "loss": 2.8682, + "step": 65400 + }, + { + "epoch": 0.04, + "learning_rate": 4.793467819404419e-05, + "loss": 2.883, + "step": 65500 + }, + { + "epoch": 0.04, + "learning_rate": 4.793147614473263e-05, + "loss": 2.8548, + "step": 65600 + }, + { + "epoch": 0.04, + "learning_rate": 4.792827409542107e-05, + "loss": 2.8845, + "step": 65700 + }, + { + "epoch": 0.04, + "learning_rate": 4.7925072046109515e-05, + "loss": 2.8869, + "step": 65800 + }, + { + "epoch": 0.04, + "learning_rate": 4.7921869996797955e-05, + "loss": 2.857, + "step": 65900 + }, + { + "epoch": 0.04, + "learning_rate": 4.7918667947486395e-05, + "loss": 2.8659, + "step": 66000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6252381801605225, + "eval_runtime": 178.5365, + "eval_samples_per_second": 56.011, + "eval_steps_per_second": 3.501, + "step": 66000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7915465898174835e-05, + "loss": 2.8654, + "step": 66100 + }, + { + "epoch": 0.04, + "learning_rate": 4.7912263848863274e-05, + "loss": 2.87, + "step": 66200 + }, + { + "epoch": 0.04, + "learning_rate": 4.7909061799551714e-05, + "loss": 2.8696, + "step": 66300 + }, + { + "epoch": 0.04, + "learning_rate": 4.7905859750240154e-05, + "loss": 2.87, + "step": 66400 + }, + { + "epoch": 0.04, + "learning_rate": 4.79026577009286e-05, + "loss": 2.8805, + "step": 66500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7899455651617033e-05, + "loss": 2.8696, + "step": 66600 + }, + { + "epoch": 0.04, + "learning_rate": 4.789625360230548e-05, + "loss": 2.8675, + "step": 66700 + }, + { + "epoch": 0.04, + "learning_rate": 4.789305155299392e-05, + "loss": 2.8673, + "step": 66800 + }, + { + "epoch": 0.04, + "learning_rate": 4.788984950368236e-05, + "loss": 2.8822, + "step": 66900 + }, + { + "epoch": 0.04, + "learning_rate": 4.78866474543708e-05, + "loss": 2.8722, + "step": 67000 + }, + { + "epoch": 0.04, + "eval_loss": 2.626574993133545, + "eval_runtime": 175.2826, + "eval_samples_per_second": 57.051, + "eval_steps_per_second": 3.566, + "step": 67000 + }, + { + "epoch": 0.04, + "learning_rate": 4.788344540505924e-05, + "loss": 2.8481, + "step": 67100 + }, + { + "epoch": 0.04, + "learning_rate": 4.788024335574768e-05, + "loss": 2.8707, + "step": 67200 + }, + { + "epoch": 0.04, + "learning_rate": 4.787704130643612e-05, + "loss": 2.8761, + "step": 67300 + }, + { + "epoch": 0.04, + "learning_rate": 4.7873839257124565e-05, + "loss": 2.8592, + "step": 67400 + }, + { + "epoch": 0.04, + "learning_rate": 4.7870637207813005e-05, + "loss": 2.8697, + "step": 67500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7867435158501444e-05, + "loss": 2.8961, + "step": 67600 + }, + { + "epoch": 0.04, + "learning_rate": 4.7864233109189884e-05, + "loss": 2.8615, + "step": 67700 + }, + { + "epoch": 0.04, + "learning_rate": 4.7861031059878324e-05, + "loss": 2.8696, + "step": 67800 + }, + { + "epoch": 0.04, + "learning_rate": 4.7857829010566764e-05, + "loss": 2.8787, + "step": 67900 + }, + { + "epoch": 0.04, + "learning_rate": 4.78546269612552e-05, + "loss": 2.8539, + "step": 68000 + }, + { + "epoch": 0.04, + "eval_loss": 2.624630928039551, + "eval_runtime": 179.2846, + "eval_samples_per_second": 55.777, + "eval_steps_per_second": 3.486, + "step": 68000 + }, + { + "epoch": 0.04, + "learning_rate": 4.785142491194365e-05, + "loss": 2.8506, + "step": 68100 + }, + { + "epoch": 0.04, + "learning_rate": 4.784822286263208e-05, + "loss": 2.8606, + "step": 68200 + }, + { + "epoch": 0.04, + "learning_rate": 4.784502081332053e-05, + "loss": 2.8456, + "step": 68300 + }, + { + "epoch": 0.04, + "learning_rate": 4.784181876400897e-05, + "loss": 2.8524, + "step": 68400 + }, + { + "epoch": 0.04, + "learning_rate": 4.783861671469741e-05, + "loss": 2.8596, + "step": 68500 + }, + { + "epoch": 0.04, + "learning_rate": 4.783541466538585e-05, + "loss": 2.8607, + "step": 68600 + }, + { + "epoch": 0.04, + "learning_rate": 4.783221261607429e-05, + "loss": 2.8647, + "step": 68700 + }, + { + "epoch": 0.04, + "learning_rate": 4.7829010566762735e-05, + "loss": 2.8447, + "step": 68800 + }, + { + "epoch": 0.04, + "learning_rate": 4.782580851745117e-05, + "loss": 2.8862, + "step": 68900 + }, + { + "epoch": 0.04, + "learning_rate": 4.7822606468139614e-05, + "loss": 2.8641, + "step": 69000 + }, + { + "epoch": 0.04, + "eval_loss": 2.623765468597412, + "eval_runtime": 177.1886, + "eval_samples_per_second": 56.437, + "eval_steps_per_second": 3.527, + "step": 69000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7819404418828054e-05, + "loss": 2.8487, + "step": 69100 + }, + { + "epoch": 0.04, + "learning_rate": 4.7816202369516494e-05, + "loss": 2.8696, + "step": 69200 + }, + { + "epoch": 0.04, + "learning_rate": 4.7813000320204933e-05, + "loss": 2.8499, + "step": 69300 + }, + { + "epoch": 0.04, + "learning_rate": 4.780979827089337e-05, + "loss": 2.8757, + "step": 69400 + }, + { + "epoch": 0.04, + "learning_rate": 4.780659622158181e-05, + "loss": 2.8534, + "step": 69500 + }, + { + "epoch": 0.04, + "learning_rate": 4.780339417227025e-05, + "loss": 2.8931, + "step": 69600 + }, + { + "epoch": 0.04, + "learning_rate": 4.78001921229587e-05, + "loss": 2.8608, + "step": 69700 + }, + { + "epoch": 0.04, + "learning_rate": 4.779699007364713e-05, + "loss": 2.8753, + "step": 69800 + }, + { + "epoch": 0.04, + "learning_rate": 4.779378802433558e-05, + "loss": 2.8602, + "step": 69900 + }, + { + "epoch": 0.04, + "learning_rate": 4.779058597502402e-05, + "loss": 2.8386, + "step": 70000 + }, + { + "epoch": 0.04, + "eval_loss": 2.623189687728882, + "eval_runtime": 178.3982, + "eval_samples_per_second": 56.054, + "eval_steps_per_second": 3.503, + "step": 70000 + }, + { + "epoch": 0.04, + "learning_rate": 4.778738392571246e-05, + "loss": 2.8434, + "step": 70100 + }, + { + "epoch": 0.04, + "learning_rate": 4.77841818764009e-05, + "loss": 2.8547, + "step": 70200 + }, + { + "epoch": 0.04, + "learning_rate": 4.778097982708934e-05, + "loss": 2.8507, + "step": 70300 + }, + { + "epoch": 0.05, + "learning_rate": 4.7777777777777784e-05, + "loss": 2.83, + "step": 70400 + }, + { + "epoch": 0.05, + "learning_rate": 4.777457572846622e-05, + "loss": 2.8491, + "step": 70500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7771373679154664e-05, + "loss": 2.8496, + "step": 70600 + }, + { + "epoch": 0.05, + "learning_rate": 4.77681716298431e-05, + "loss": 2.8441, + "step": 70700 + }, + { + "epoch": 0.05, + "learning_rate": 4.776496958053154e-05, + "loss": 2.8496, + "step": 70800 + }, + { + "epoch": 0.05, + "learning_rate": 4.776176753121998e-05, + "loss": 2.8633, + "step": 70900 + }, + { + "epoch": 0.05, + "learning_rate": 4.775856548190842e-05, + "loss": 2.8588, + "step": 71000 + }, + { + "epoch": 0.05, + "eval_loss": 2.62199068069458, + "eval_runtime": 177.949, + "eval_samples_per_second": 56.196, + "eval_steps_per_second": 3.512, + "step": 71000 + }, + { + "epoch": 0.05, + "learning_rate": 4.775536343259687e-05, + "loss": 2.8538, + "step": 71100 + }, + { + "epoch": 0.05, + "learning_rate": 4.77521613832853e-05, + "loss": 2.8511, + "step": 71200 + }, + { + "epoch": 0.05, + "learning_rate": 4.774895933397375e-05, + "loss": 2.8625, + "step": 71300 + }, + { + "epoch": 0.05, + "learning_rate": 4.774575728466218e-05, + "loss": 2.861, + "step": 71400 + }, + { + "epoch": 0.05, + "learning_rate": 4.774255523535063e-05, + "loss": 2.8455, + "step": 71500 + }, + { + "epoch": 0.05, + "learning_rate": 4.773935318603907e-05, + "loss": 2.8341, + "step": 71600 + }, + { + "epoch": 0.05, + "learning_rate": 4.773615113672751e-05, + "loss": 2.8646, + "step": 71700 + }, + { + "epoch": 0.05, + "learning_rate": 4.773294908741595e-05, + "loss": 2.8435, + "step": 71800 + }, + { + "epoch": 0.05, + "learning_rate": 4.772974703810439e-05, + "loss": 2.832, + "step": 71900 + }, + { + "epoch": 0.05, + "learning_rate": 4.7726544988792834e-05, + "loss": 2.8626, + "step": 72000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6206774711608887, + "eval_runtime": 179.2437, + "eval_samples_per_second": 55.79, + "eval_steps_per_second": 3.487, + "step": 72000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7723342939481266e-05, + "loss": 2.8369, + "step": 72100 + }, + { + "epoch": 0.05, + "learning_rate": 4.772014089016971e-05, + "loss": 2.8413, + "step": 72200 + }, + { + "epoch": 0.05, + "learning_rate": 4.771693884085815e-05, + "loss": 2.8481, + "step": 72300 + }, + { + "epoch": 0.05, + "learning_rate": 4.771373679154659e-05, + "loss": 2.8515, + "step": 72400 + }, + { + "epoch": 0.05, + "learning_rate": 4.771053474223503e-05, + "loss": 2.836, + "step": 72500 + }, + { + "epoch": 0.05, + "learning_rate": 4.770733269292347e-05, + "loss": 2.8408, + "step": 72600 + }, + { + "epoch": 0.05, + "learning_rate": 4.770413064361192e-05, + "loss": 2.8476, + "step": 72700 + }, + { + "epoch": 0.05, + "learning_rate": 4.770092859430035e-05, + "loss": 2.8521, + "step": 72800 + }, + { + "epoch": 0.05, + "learning_rate": 4.76977265449888e-05, + "loss": 2.8486, + "step": 72900 + }, + { + "epoch": 0.05, + "learning_rate": 4.769452449567723e-05, + "loss": 2.8588, + "step": 73000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6182003021240234, + "eval_runtime": 179.3657, + "eval_samples_per_second": 55.752, + "eval_steps_per_second": 3.485, + "step": 73000 + }, + { + "epoch": 0.05, + "learning_rate": 4.769132244636568e-05, + "loss": 2.8372, + "step": 73100 + }, + { + "epoch": 0.05, + "learning_rate": 4.768812039705412e-05, + "loss": 2.8277, + "step": 73200 + }, + { + "epoch": 0.05, + "learning_rate": 4.768491834774256e-05, + "loss": 2.8451, + "step": 73300 + }, + { + "epoch": 0.05, + "learning_rate": 4.7681716298431003e-05, + "loss": 2.8498, + "step": 73400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7678514249119436e-05, + "loss": 2.8222, + "step": 73500 + }, + { + "epoch": 0.05, + "learning_rate": 4.767531219980788e-05, + "loss": 2.8307, + "step": 73600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7672110150496316e-05, + "loss": 2.8404, + "step": 73700 + }, + { + "epoch": 0.05, + "learning_rate": 4.766890810118476e-05, + "loss": 2.8521, + "step": 73800 + }, + { + "epoch": 0.05, + "learning_rate": 4.76657060518732e-05, + "loss": 2.8389, + "step": 73900 + }, + { + "epoch": 0.05, + "learning_rate": 4.766250400256164e-05, + "loss": 2.8513, + "step": 74000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6168673038482666, + "eval_runtime": 179.3293, + "eval_samples_per_second": 55.763, + "eval_steps_per_second": 3.485, + "step": 74000 + }, + { + "epoch": 0.05, + "learning_rate": 4.765930195325009e-05, + "loss": 2.8545, + "step": 74100 + }, + { + "epoch": 0.05, + "learning_rate": 4.765609990393852e-05, + "loss": 2.8304, + "step": 74200 + }, + { + "epoch": 0.05, + "learning_rate": 4.765289785462697e-05, + "loss": 2.8522, + "step": 74300 + }, + { + "epoch": 0.05, + "learning_rate": 4.76496958053154e-05, + "loss": 2.8184, + "step": 74400 + }, + { + "epoch": 0.05, + "learning_rate": 4.764649375600385e-05, + "loss": 2.8588, + "step": 74500 + }, + { + "epoch": 0.05, + "learning_rate": 4.764329170669228e-05, + "loss": 2.8538, + "step": 74600 + }, + { + "epoch": 0.05, + "learning_rate": 4.764008965738073e-05, + "loss": 2.8357, + "step": 74700 + }, + { + "epoch": 0.05, + "learning_rate": 4.7636887608069167e-05, + "loss": 2.824, + "step": 74800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7633685558757606e-05, + "loss": 2.8445, + "step": 74900 + }, + { + "epoch": 0.05, + "learning_rate": 4.763048350944605e-05, + "loss": 2.8508, + "step": 75000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6163156032562256, + "eval_runtime": 174.9884, + "eval_samples_per_second": 57.147, + "eval_steps_per_second": 3.572, + "step": 75000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7627281460134486e-05, + "loss": 2.828, + "step": 75100 + }, + { + "epoch": 0.05, + "learning_rate": 4.762407941082293e-05, + "loss": 2.8371, + "step": 75200 + }, + { + "epoch": 0.05, + "learning_rate": 4.7620877361511365e-05, + "loss": 2.8336, + "step": 75300 + }, + { + "epoch": 0.05, + "learning_rate": 4.761767531219981e-05, + "loss": 2.8408, + "step": 75400 + }, + { + "epoch": 0.05, + "learning_rate": 4.761447326288825e-05, + "loss": 2.8385, + "step": 75500 + }, + { + "epoch": 0.05, + "learning_rate": 4.761127121357669e-05, + "loss": 2.8697, + "step": 75600 + }, + { + "epoch": 0.05, + "learning_rate": 4.760806916426514e-05, + "loss": 2.8561, + "step": 75700 + }, + { + "epoch": 0.05, + "learning_rate": 4.760486711495357e-05, + "loss": 2.8452, + "step": 75800 + }, + { + "epoch": 0.05, + "learning_rate": 4.760166506564202e-05, + "loss": 2.8632, + "step": 75900 + }, + { + "epoch": 0.05, + "learning_rate": 4.759846301633045e-05, + "loss": 2.8484, + "step": 76000 + }, + { + "epoch": 0.05, + "eval_loss": 2.615889072418213, + "eval_runtime": 175.9824, + "eval_samples_per_second": 56.824, + "eval_steps_per_second": 3.551, + "step": 76000 + }, + { + "epoch": 0.05, + "learning_rate": 4.75952609670189e-05, + "loss": 2.847, + "step": 76100 + }, + { + "epoch": 0.05, + "learning_rate": 4.759205891770733e-05, + "loss": 2.8438, + "step": 76200 + }, + { + "epoch": 0.05, + "learning_rate": 4.7588856868395776e-05, + "loss": 2.8175, + "step": 76300 + }, + { + "epoch": 0.05, + "learning_rate": 4.7585654819084216e-05, + "loss": 2.829, + "step": 76400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7582452769772656e-05, + "loss": 2.8611, + "step": 76500 + }, + { + "epoch": 0.05, + "learning_rate": 4.75792507204611e-05, + "loss": 2.851, + "step": 76600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7576048671149535e-05, + "loss": 2.8397, + "step": 76700 + }, + { + "epoch": 0.05, + "learning_rate": 4.757284662183798e-05, + "loss": 2.8409, + "step": 76800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7569644572526415e-05, + "loss": 2.833, + "step": 76900 + }, + { + "epoch": 0.05, + "learning_rate": 4.756644252321486e-05, + "loss": 2.8406, + "step": 77000 + }, + { + "epoch": 0.05, + "eval_loss": 2.613297700881958, + "eval_runtime": 177.3489, + "eval_samples_per_second": 56.386, + "eval_steps_per_second": 3.524, + "step": 77000 + }, + { + "epoch": 0.05, + "learning_rate": 4.75632404739033e-05, + "loss": 2.836, + "step": 77100 + }, + { + "epoch": 0.05, + "learning_rate": 4.756003842459174e-05, + "loss": 2.8227, + "step": 77200 + }, + { + "epoch": 0.05, + "learning_rate": 4.755683637528019e-05, + "loss": 2.8241, + "step": 77300 + }, + { + "epoch": 0.05, + "learning_rate": 4.755363432596862e-05, + "loss": 2.8277, + "step": 77400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7550432276657067e-05, + "loss": 2.8311, + "step": 77500 + }, + { + "epoch": 0.05, + "learning_rate": 4.75472302273455e-05, + "loss": 2.8333, + "step": 77600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7544028178033946e-05, + "loss": 2.8526, + "step": 77700 + }, + { + "epoch": 0.05, + "learning_rate": 4.754082612872238e-05, + "loss": 2.8425, + "step": 77800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7537624079410826e-05, + "loss": 2.8275, + "step": 77900 + }, + { + "epoch": 0.05, + "learning_rate": 4.7534422030099265e-05, + "loss": 2.8275, + "step": 78000 + }, + { + "epoch": 0.05, + "eval_loss": 2.611859083175659, + "eval_runtime": 176.8529, + "eval_samples_per_second": 56.544, + "eval_steps_per_second": 3.534, + "step": 78000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7531219980787705e-05, + "loss": 2.8123, + "step": 78100 + }, + { + "epoch": 0.05, + "learning_rate": 4.752801793147615e-05, + "loss": 2.842, + "step": 78200 + }, + { + "epoch": 0.05, + "learning_rate": 4.7524815882164584e-05, + "loss": 2.8515, + "step": 78300 + }, + { + "epoch": 0.05, + "learning_rate": 4.752161383285303e-05, + "loss": 2.8293, + "step": 78400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7518411783541464e-05, + "loss": 2.8288, + "step": 78500 + }, + { + "epoch": 0.05, + "learning_rate": 4.751520973422991e-05, + "loss": 2.8517, + "step": 78600 + }, + { + "epoch": 0.05, + "learning_rate": 4.751200768491835e-05, + "loss": 2.8363, + "step": 78700 + }, + { + "epoch": 0.05, + "learning_rate": 4.750880563560679e-05, + "loss": 2.8586, + "step": 78800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7505603586295236e-05, + "loss": 2.8319, + "step": 78900 + }, + { + "epoch": 0.05, + "learning_rate": 4.750240153698367e-05, + "loss": 2.8237, + "step": 79000 + }, + { + "epoch": 0.05, + "eval_loss": 2.610048532485962, + "eval_runtime": 177.9401, + "eval_samples_per_second": 56.199, + "eval_steps_per_second": 3.512, + "step": 79000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7499199487672116e-05, + "loss": 2.8294, + "step": 79100 + }, + { + "epoch": 0.05, + "learning_rate": 4.749599743836055e-05, + "loss": 2.8175, + "step": 79200 + }, + { + "epoch": 0.05, + "learning_rate": 4.7492795389048995e-05, + "loss": 2.8375, + "step": 79300 + }, + { + "epoch": 0.05, + "learning_rate": 4.748959333973743e-05, + "loss": 2.8302, + "step": 79400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7486391290425875e-05, + "loss": 2.8193, + "step": 79500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7483189241114315e-05, + "loss": 2.8389, + "step": 79600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7479987191802754e-05, + "loss": 2.8373, + "step": 79700 + }, + { + "epoch": 0.05, + "learning_rate": 4.74767851424912e-05, + "loss": 2.8473, + "step": 79800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7473583093179634e-05, + "loss": 2.8022, + "step": 79900 + }, + { + "epoch": 0.05, + "learning_rate": 4.747038104386808e-05, + "loss": 2.8151, + "step": 80000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6109461784362793, + "eval_runtime": 179.9943, + "eval_samples_per_second": 55.557, + "eval_steps_per_second": 3.472, + "step": 80000 + }, + { + "epoch": 0.05, + "learning_rate": 4.746717899455651e-05, + "loss": 2.8428, + "step": 80100 + }, + { + "epoch": 0.05, + "learning_rate": 4.746397694524496e-05, + "loss": 2.8367, + "step": 80200 + }, + { + "epoch": 0.05, + "learning_rate": 4.74607748959334e-05, + "loss": 2.8256, + "step": 80300 + }, + { + "epoch": 0.05, + "learning_rate": 4.745757284662184e-05, + "loss": 2.8322, + "step": 80400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7454370797310286e-05, + "loss": 2.8296, + "step": 80500 + }, + { + "epoch": 0.05, + "learning_rate": 4.745116874799872e-05, + "loss": 2.8231, + "step": 80600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7447966698687165e-05, + "loss": 2.8306, + "step": 80700 + }, + { + "epoch": 0.05, + "learning_rate": 4.74447646493756e-05, + "loss": 2.8411, + "step": 80800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7441562600064045e-05, + "loss": 2.8214, + "step": 80900 + }, + { + "epoch": 0.05, + "learning_rate": 4.7438360550752485e-05, + "loss": 2.8316, + "step": 81000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6093032360076904, + "eval_runtime": 180.9743, + "eval_samples_per_second": 55.256, + "eval_steps_per_second": 3.454, + "step": 81000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7435158501440924e-05, + "loss": 2.8423, + "step": 81100 + }, + { + "epoch": 0.05, + "learning_rate": 4.7431956452129364e-05, + "loss": 2.8475, + "step": 81200 + }, + { + "epoch": 0.05, + "learning_rate": 4.7428754402817804e-05, + "loss": 2.8288, + "step": 81300 + }, + { + "epoch": 0.05, + "learning_rate": 4.742555235350625e-05, + "loss": 2.8268, + "step": 81400 + }, + { + "epoch": 0.05, + "learning_rate": 4.742235030419468e-05, + "loss": 2.8296, + "step": 81500 + }, + { + "epoch": 0.05, + "learning_rate": 4.741914825488313e-05, + "loss": 2.8115, + "step": 81600 + }, + { + "epoch": 0.05, + "learning_rate": 4.741594620557156e-05, + "loss": 2.8189, + "step": 81700 + }, + { + "epoch": 0.05, + "learning_rate": 4.741274415626001e-05, + "loss": 2.826, + "step": 81800 + }, + { + "epoch": 0.05, + "learning_rate": 4.740954210694845e-05, + "loss": 2.8234, + "step": 81900 + }, + { + "epoch": 0.05, + "learning_rate": 4.740634005763689e-05, + "loss": 2.8323, + "step": 82000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6061840057373047, + "eval_runtime": 176.2652, + "eval_samples_per_second": 56.733, + "eval_steps_per_second": 3.546, + "step": 82000 + }, + { + "epoch": 0.05, + "learning_rate": 4.740313800832533e-05, + "loss": 2.8221, + "step": 82100 + }, + { + "epoch": 0.05, + "learning_rate": 4.739993595901377e-05, + "loss": 2.8164, + "step": 82200 + }, + { + "epoch": 0.05, + "learning_rate": 4.7396733909702215e-05, + "loss": 2.8348, + "step": 82300 + }, + { + "epoch": 0.05, + "learning_rate": 4.739353186039065e-05, + "loss": 2.8126, + "step": 82400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7390329811079094e-05, + "loss": 2.8422, + "step": 82500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7387127761767534e-05, + "loss": 2.8054, + "step": 82600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7383925712455974e-05, + "loss": 2.8152, + "step": 82700 + }, + { + "epoch": 0.05, + "learning_rate": 4.738072366314441e-05, + "loss": 2.8113, + "step": 82800 + }, + { + "epoch": 0.05, + "learning_rate": 4.737752161383285e-05, + "loss": 2.8288, + "step": 82900 + }, + { + "epoch": 0.05, + "learning_rate": 4.73743195645213e-05, + "loss": 2.8339, + "step": 83000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6048617362976074, + "eval_runtime": 180.0118, + "eval_samples_per_second": 55.552, + "eval_steps_per_second": 3.472, + "step": 83000 + }, + { + "epoch": 0.05, + "learning_rate": 4.737111751520973e-05, + "loss": 2.8217, + "step": 83100 + }, + { + "epoch": 0.05, + "learning_rate": 4.736791546589818e-05, + "loss": 2.805, + "step": 83200 + }, + { + "epoch": 0.05, + "learning_rate": 4.736471341658662e-05, + "loss": 2.8304, + "step": 83300 + }, + { + "epoch": 0.05, + "learning_rate": 4.736151136727506e-05, + "loss": 2.8284, + "step": 83400 + }, + { + "epoch": 0.05, + "learning_rate": 4.73583093179635e-05, + "loss": 2.8139, + "step": 83500 + }, + { + "epoch": 0.05, + "learning_rate": 4.735510726865194e-05, + "loss": 2.8407, + "step": 83600 + }, + { + "epoch": 0.05, + "learning_rate": 4.735190521934038e-05, + "loss": 2.8052, + "step": 83700 + }, + { + "epoch": 0.05, + "learning_rate": 4.734870317002882e-05, + "loss": 2.8452, + "step": 83800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7345501120717264e-05, + "loss": 2.8436, + "step": 83900 + }, + { + "epoch": 0.05, + "learning_rate": 4.7342299071405704e-05, + "loss": 2.8197, + "step": 84000 + }, + { + "epoch": 0.05, + "eval_loss": 2.6033012866973877, + "eval_runtime": 177.5805, + "eval_samples_per_second": 56.312, + "eval_steps_per_second": 3.52, + "step": 84000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7339097022094144e-05, + "loss": 2.8055, + "step": 84100 + }, + { + "epoch": 0.05, + "learning_rate": 4.733589497278258e-05, + "loss": 2.8088, + "step": 84200 + }, + { + "epoch": 0.05, + "learning_rate": 4.733269292347102e-05, + "loss": 2.8306, + "step": 84300 + }, + { + "epoch": 0.05, + "learning_rate": 4.732949087415946e-05, + "loss": 2.8089, + "step": 84400 + }, + { + "epoch": 0.05, + "learning_rate": 4.73262888248479e-05, + "loss": 2.7994, + "step": 84500 + }, + { + "epoch": 0.05, + "learning_rate": 4.732308677553635e-05, + "loss": 2.8545, + "step": 84600 + }, + { + "epoch": 0.05, + "learning_rate": 4.731988472622478e-05, + "loss": 2.8154, + "step": 84700 + }, + { + "epoch": 0.05, + "learning_rate": 4.731668267691323e-05, + "loss": 2.8359, + "step": 84800 + }, + { + "epoch": 0.05, + "learning_rate": 4.731348062760167e-05, + "loss": 2.8292, + "step": 84900 + }, + { + "epoch": 0.05, + "learning_rate": 4.731027857829011e-05, + "loss": 2.7941, + "step": 85000 + }, + { + "epoch": 0.05, + "eval_loss": 2.604931116104126, + "eval_runtime": 180.7023, + "eval_samples_per_second": 55.34, + "eval_steps_per_second": 3.459, + "step": 85000 + }, + { + "epoch": 0.05, + "learning_rate": 4.730707652897855e-05, + "loss": 2.8105, + "step": 85100 + }, + { + "epoch": 0.05, + "learning_rate": 4.730387447966699e-05, + "loss": 2.8295, + "step": 85200 + }, + { + "epoch": 0.05, + "learning_rate": 4.730067243035543e-05, + "loss": 2.7998, + "step": 85300 + }, + { + "epoch": 0.05, + "learning_rate": 4.729747038104387e-05, + "loss": 2.821, + "step": 85400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7294268331732313e-05, + "loss": 2.8151, + "step": 85500 + }, + { + "epoch": 0.05, + "learning_rate": 4.729106628242075e-05, + "loss": 2.8, + "step": 85600 + }, + { + "epoch": 0.05, + "learning_rate": 4.728786423310919e-05, + "loss": 2.8241, + "step": 85700 + }, + { + "epoch": 0.05, + "learning_rate": 4.728466218379763e-05, + "loss": 2.813, + "step": 85800 + }, + { + "epoch": 0.05, + "learning_rate": 4.728146013448607e-05, + "loss": 2.8031, + "step": 85900 + }, + { + "epoch": 0.06, + "learning_rate": 4.727825808517451e-05, + "loss": 2.791, + "step": 86000 + }, + { + "epoch": 0.06, + "eval_loss": 2.6037590503692627, + "eval_runtime": 176.2434, + "eval_samples_per_second": 56.74, + "eval_steps_per_second": 3.546, + "step": 86000 + }, + { + "epoch": 0.06, + "learning_rate": 4.727505603586295e-05, + "loss": 2.7966, + "step": 86100 + }, + { + "epoch": 0.06, + "learning_rate": 4.72718539865514e-05, + "loss": 2.8253, + "step": 86200 + }, + { + "epoch": 0.06, + "learning_rate": 4.726865193723984e-05, + "loss": 2.7886, + "step": 86300 + }, + { + "epoch": 0.06, + "learning_rate": 4.726544988792828e-05, + "loss": 2.7999, + "step": 86400 + }, + { + "epoch": 0.06, + "learning_rate": 4.726224783861672e-05, + "loss": 2.8172, + "step": 86500 + }, + { + "epoch": 0.06, + "learning_rate": 4.725904578930516e-05, + "loss": 2.7939, + "step": 86600 + }, + { + "epoch": 0.06, + "learning_rate": 4.72558437399936e-05, + "loss": 2.8033, + "step": 86700 + }, + { + "epoch": 0.06, + "learning_rate": 4.725264169068204e-05, + "loss": 2.8201, + "step": 86800 + }, + { + "epoch": 0.06, + "learning_rate": 4.7249439641370477e-05, + "loss": 2.8243, + "step": 86900 + }, + { + "epoch": 0.06, + "learning_rate": 4.7246237592058916e-05, + "loss": 2.8016, + "step": 87000 + }, + { + "epoch": 0.06, + "eval_loss": 2.6006062030792236, + "eval_runtime": 179.333, + "eval_samples_per_second": 55.762, + "eval_steps_per_second": 3.485, + "step": 87000 + }, + { + "epoch": 0.06, + "learning_rate": 4.724303554274736e-05, + "loss": 2.8173, + "step": 87100 + }, + { + "epoch": 0.06, + "learning_rate": 4.72398334934358e-05, + "loss": 2.817, + "step": 87200 + }, + { + "epoch": 0.06, + "learning_rate": 4.723663144412424e-05, + "loss": 2.8145, + "step": 87300 + }, + { + "epoch": 0.06, + "learning_rate": 4.723342939481268e-05, + "loss": 2.7841, + "step": 87400 + }, + { + "epoch": 0.06, + "learning_rate": 4.723022734550112e-05, + "loss": 2.8264, + "step": 87500 + }, + { + "epoch": 0.06, + "learning_rate": 4.722702529618956e-05, + "loss": 2.8281, + "step": 87600 + }, + { + "epoch": 0.06, + "learning_rate": 4.7223823246878e-05, + "loss": 2.8169, + "step": 87700 + }, + { + "epoch": 0.06, + "learning_rate": 4.722062119756645e-05, + "loss": 2.8085, + "step": 87800 + }, + { + "epoch": 0.06, + "learning_rate": 4.721741914825489e-05, + "loss": 2.8151, + "step": 87900 + }, + { + "epoch": 0.06, + "learning_rate": 4.721421709894333e-05, + "loss": 2.7913, + "step": 88000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5992484092712402, + "eval_runtime": 178.437, + "eval_samples_per_second": 56.042, + "eval_steps_per_second": 3.503, + "step": 88000 + }, + { + "epoch": 0.06, + "learning_rate": 4.721101504963177e-05, + "loss": 2.8022, + "step": 88100 + }, + { + "epoch": 0.06, + "learning_rate": 4.720781300032021e-05, + "loss": 2.7993, + "step": 88200 + }, + { + "epoch": 0.06, + "learning_rate": 4.7204610951008646e-05, + "loss": 2.8054, + "step": 88300 + }, + { + "epoch": 0.06, + "learning_rate": 4.7201408901697086e-05, + "loss": 2.8266, + "step": 88400 + }, + { + "epoch": 0.06, + "learning_rate": 4.7198206852385526e-05, + "loss": 2.7973, + "step": 88500 + }, + { + "epoch": 0.06, + "learning_rate": 4.719500480307397e-05, + "loss": 2.7932, + "step": 88600 + }, + { + "epoch": 0.06, + "learning_rate": 4.719180275376241e-05, + "loss": 2.8047, + "step": 88700 + }, + { + "epoch": 0.06, + "learning_rate": 4.718860070445085e-05, + "loss": 2.8009, + "step": 88800 + }, + { + "epoch": 0.06, + "learning_rate": 4.718539865513929e-05, + "loss": 2.794, + "step": 88900 + }, + { + "epoch": 0.06, + "learning_rate": 4.718219660582773e-05, + "loss": 2.8174, + "step": 89000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5968127250671387, + "eval_runtime": 177.4518, + "eval_samples_per_second": 56.353, + "eval_steps_per_second": 3.522, + "step": 89000 + }, + { + "epoch": 0.06, + "learning_rate": 4.717899455651617e-05, + "loss": 2.8056, + "step": 89100 + }, + { + "epoch": 0.06, + "learning_rate": 4.717579250720461e-05, + "loss": 2.792, + "step": 89200 + }, + { + "epoch": 0.06, + "learning_rate": 4.717259045789305e-05, + "loss": 2.7907, + "step": 89300 + }, + { + "epoch": 0.06, + "learning_rate": 4.71693884085815e-05, + "loss": 2.8287, + "step": 89400 + }, + { + "epoch": 0.06, + "learning_rate": 4.716618635926994e-05, + "loss": 2.803, + "step": 89500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7162984309958377e-05, + "loss": 2.8202, + "step": 89600 + }, + { + "epoch": 0.06, + "learning_rate": 4.7159782260646816e-05, + "loss": 2.8129, + "step": 89700 + }, + { + "epoch": 0.06, + "learning_rate": 4.7156580211335256e-05, + "loss": 2.8159, + "step": 89800 + }, + { + "epoch": 0.06, + "learning_rate": 4.7153378162023696e-05, + "loss": 2.8032, + "step": 89900 + }, + { + "epoch": 0.06, + "learning_rate": 4.7150176112712136e-05, + "loss": 2.8074, + "step": 90000 + }, + { + "epoch": 0.06, + "eval_loss": 2.595768451690674, + "eval_runtime": 180.4275, + "eval_samples_per_second": 55.424, + "eval_steps_per_second": 3.464, + "step": 90000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7146974063400575e-05, + "loss": 2.8138, + "step": 90100 + }, + { + "epoch": 0.06, + "learning_rate": 4.714377201408902e-05, + "loss": 2.808, + "step": 90200 + }, + { + "epoch": 0.06, + "learning_rate": 4.714056996477746e-05, + "loss": 2.7893, + "step": 90300 + }, + { + "epoch": 0.06, + "learning_rate": 4.71373679154659e-05, + "loss": 2.7843, + "step": 90400 + }, + { + "epoch": 0.06, + "learning_rate": 4.713416586615434e-05, + "loss": 2.8119, + "step": 90500 + }, + { + "epoch": 0.06, + "learning_rate": 4.713096381684278e-05, + "loss": 2.794, + "step": 90600 + }, + { + "epoch": 0.06, + "learning_rate": 4.712776176753122e-05, + "loss": 2.8023, + "step": 90700 + }, + { + "epoch": 0.06, + "learning_rate": 4.712455971821966e-05, + "loss": 2.7732, + "step": 90800 + }, + { + "epoch": 0.06, + "learning_rate": 4.712135766890811e-05, + "loss": 2.7963, + "step": 90900 + }, + { + "epoch": 0.06, + "learning_rate": 4.7118155619596546e-05, + "loss": 2.8083, + "step": 91000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5970373153686523, + "eval_runtime": 178.3132, + "eval_samples_per_second": 56.081, + "eval_steps_per_second": 3.505, + "step": 91000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7114953570284986e-05, + "loss": 2.7963, + "step": 91100 + }, + { + "epoch": 0.06, + "learning_rate": 4.7111751520973426e-05, + "loss": 2.8041, + "step": 91200 + }, + { + "epoch": 0.06, + "learning_rate": 4.7108549471661866e-05, + "loss": 2.7839, + "step": 91300 + }, + { + "epoch": 0.06, + "learning_rate": 4.7105347422350305e-05, + "loss": 2.8125, + "step": 91400 + }, + { + "epoch": 0.06, + "learning_rate": 4.7102145373038745e-05, + "loss": 2.8114, + "step": 91500 + }, + { + "epoch": 0.06, + "learning_rate": 4.709894332372719e-05, + "loss": 2.8269, + "step": 91600 + }, + { + "epoch": 0.06, + "learning_rate": 4.7095741274415625e-05, + "loss": 2.8165, + "step": 91700 + }, + { + "epoch": 0.06, + "learning_rate": 4.709253922510407e-05, + "loss": 2.804, + "step": 91800 + }, + { + "epoch": 0.06, + "learning_rate": 4.708933717579251e-05, + "loss": 2.777, + "step": 91900 + }, + { + "epoch": 0.06, + "learning_rate": 4.708613512648095e-05, + "loss": 2.8046, + "step": 92000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5963518619537354, + "eval_runtime": 176.577, + "eval_samples_per_second": 56.633, + "eval_steps_per_second": 3.54, + "step": 92000 + }, + { + "epoch": 0.06, + "learning_rate": 4.708293307716939e-05, + "loss": 2.8146, + "step": 92100 + }, + { + "epoch": 0.06, + "learning_rate": 4.707973102785783e-05, + "loss": 2.8026, + "step": 92200 + }, + { + "epoch": 0.06, + "learning_rate": 4.707652897854627e-05, + "loss": 2.8044, + "step": 92300 + }, + { + "epoch": 0.06, + "learning_rate": 4.707332692923471e-05, + "loss": 2.8016, + "step": 92400 + }, + { + "epoch": 0.06, + "learning_rate": 4.7070124879923156e-05, + "loss": 2.7925, + "step": 92500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7066922830611596e-05, + "loss": 2.8174, + "step": 92600 + }, + { + "epoch": 0.06, + "learning_rate": 4.7063720781300036e-05, + "loss": 2.7998, + "step": 92700 + }, + { + "epoch": 0.06, + "learning_rate": 4.7060518731988475e-05, + "loss": 2.8043, + "step": 92800 + }, + { + "epoch": 0.06, + "learning_rate": 4.7057316682676915e-05, + "loss": 2.8038, + "step": 92900 + }, + { + "epoch": 0.06, + "learning_rate": 4.7054114633365355e-05, + "loss": 2.7837, + "step": 93000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5965936183929443, + "eval_runtime": 177.0282, + "eval_samples_per_second": 56.488, + "eval_steps_per_second": 3.531, + "step": 93000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7050912584053795e-05, + "loss": 2.8096, + "step": 93100 + }, + { + "epoch": 0.06, + "learning_rate": 4.704771053474224e-05, + "loss": 2.8016, + "step": 93200 + }, + { + "epoch": 0.06, + "learning_rate": 4.7044508485430674e-05, + "loss": 2.8249, + "step": 93300 + }, + { + "epoch": 0.06, + "learning_rate": 4.704130643611912e-05, + "loss": 2.8016, + "step": 93400 + }, + { + "epoch": 0.06, + "learning_rate": 4.703810438680756e-05, + "loss": 2.7854, + "step": 93500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7034902337496e-05, + "loss": 2.8012, + "step": 93600 + }, + { + "epoch": 0.06, + "learning_rate": 4.703170028818444e-05, + "loss": 2.7958, + "step": 93700 + }, + { + "epoch": 0.06, + "learning_rate": 4.702849823887288e-05, + "loss": 2.7949, + "step": 93800 + }, + { + "epoch": 0.06, + "learning_rate": 4.7025296189561326e-05, + "loss": 2.7713, + "step": 93900 + }, + { + "epoch": 0.06, + "learning_rate": 4.702209414024976e-05, + "loss": 2.8173, + "step": 94000 + }, + { + "epoch": 0.06, + "eval_loss": 2.594791889190674, + "eval_runtime": 177.325, + "eval_samples_per_second": 56.394, + "eval_steps_per_second": 3.525, + "step": 94000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7018892090938205e-05, + "loss": 2.7776, + "step": 94100 + }, + { + "epoch": 0.06, + "learning_rate": 4.7015690041626645e-05, + "loss": 2.8078, + "step": 94200 + }, + { + "epoch": 0.06, + "learning_rate": 4.7012487992315085e-05, + "loss": 2.7899, + "step": 94300 + }, + { + "epoch": 0.06, + "learning_rate": 4.7009285943003525e-05, + "loss": 2.7685, + "step": 94400 + }, + { + "epoch": 0.06, + "learning_rate": 4.7006083893691964e-05, + "loss": 2.7962, + "step": 94500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7002881844380404e-05, + "loss": 2.7968, + "step": 94600 + }, + { + "epoch": 0.06, + "learning_rate": 4.6999679795068844e-05, + "loss": 2.7893, + "step": 94700 + }, + { + "epoch": 0.06, + "learning_rate": 4.699647774575729e-05, + "loss": 2.7776, + "step": 94800 + }, + { + "epoch": 0.06, + "learning_rate": 4.6993275696445723e-05, + "loss": 2.8141, + "step": 94900 + }, + { + "epoch": 0.06, + "learning_rate": 4.699007364713417e-05, + "loss": 2.7897, + "step": 95000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5926544666290283, + "eval_runtime": 178.4679, + "eval_samples_per_second": 56.033, + "eval_steps_per_second": 3.502, + "step": 95000 + }, + { + "epoch": 0.06, + "learning_rate": 4.698687159782261e-05, + "loss": 2.8053, + "step": 95100 + }, + { + "epoch": 0.06, + "learning_rate": 4.698366954851105e-05, + "loss": 2.7968, + "step": 95200 + }, + { + "epoch": 0.06, + "learning_rate": 4.698046749919949e-05, + "loss": 2.7947, + "step": 95300 + }, + { + "epoch": 0.06, + "learning_rate": 4.697726544988793e-05, + "loss": 2.7685, + "step": 95400 + }, + { + "epoch": 0.06, + "learning_rate": 4.6974063400576375e-05, + "loss": 2.8079, + "step": 95500 + }, + { + "epoch": 0.06, + "learning_rate": 4.697086135126481e-05, + "loss": 2.7833, + "step": 95600 + }, + { + "epoch": 0.06, + "learning_rate": 4.6967659301953255e-05, + "loss": 2.7639, + "step": 95700 + }, + { + "epoch": 0.06, + "learning_rate": 4.6964457252641695e-05, + "loss": 2.7842, + "step": 95800 + }, + { + "epoch": 0.06, + "learning_rate": 4.6961255203330134e-05, + "loss": 2.7973, + "step": 95900 + }, + { + "epoch": 0.06, + "learning_rate": 4.6958053154018574e-05, + "loss": 2.79, + "step": 96000 + }, + { + "epoch": 0.06, + "eval_loss": 2.592930555343628, + "eval_runtime": 178.0121, + "eval_samples_per_second": 56.176, + "eval_steps_per_second": 3.511, + "step": 96000 + }, + { + "epoch": 0.06, + "learning_rate": 4.6954851104707014e-05, + "loss": 2.7893, + "step": 96100 + }, + { + "epoch": 0.06, + "learning_rate": 4.695164905539546e-05, + "loss": 2.7867, + "step": 96200 + }, + { + "epoch": 0.06, + "learning_rate": 4.694844700608389e-05, + "loss": 2.7888, + "step": 96300 + }, + { + "epoch": 0.06, + "learning_rate": 4.694524495677234e-05, + "loss": 2.7878, + "step": 96400 + }, + { + "epoch": 0.06, + "learning_rate": 4.694204290746077e-05, + "loss": 2.7961, + "step": 96500 + }, + { + "epoch": 0.06, + "learning_rate": 4.693884085814922e-05, + "loss": 2.7752, + "step": 96600 + }, + { + "epoch": 0.06, + "learning_rate": 4.693563880883766e-05, + "loss": 2.7838, + "step": 96700 + }, + { + "epoch": 0.06, + "learning_rate": 4.69324367595261e-05, + "loss": 2.7856, + "step": 96800 + }, + { + "epoch": 0.06, + "learning_rate": 4.692923471021454e-05, + "loss": 2.7888, + "step": 96900 + }, + { + "epoch": 0.06, + "learning_rate": 4.692603266090298e-05, + "loss": 2.7849, + "step": 97000 + }, + { + "epoch": 0.06, + "eval_loss": 2.591611623764038, + "eval_runtime": 178.4835, + "eval_samples_per_second": 56.028, + "eval_steps_per_second": 3.502, + "step": 97000 + }, + { + "epoch": 0.06, + "learning_rate": 4.6922830611591425e-05, + "loss": 2.7922, + "step": 97100 + }, + { + "epoch": 0.06, + "learning_rate": 4.691962856227986e-05, + "loss": 2.782, + "step": 97200 + }, + { + "epoch": 0.06, + "learning_rate": 4.6916426512968304e-05, + "loss": 2.792, + "step": 97300 + }, + { + "epoch": 0.06, + "learning_rate": 4.6913224463656744e-05, + "loss": 2.7722, + "step": 97400 + }, + { + "epoch": 0.06, + "learning_rate": 4.6910022414345184e-05, + "loss": 2.7712, + "step": 97500 + }, + { + "epoch": 0.06, + "learning_rate": 4.6906820365033623e-05, + "loss": 2.7903, + "step": 97600 + }, + { + "epoch": 0.06, + "learning_rate": 4.690361831572206e-05, + "loss": 2.7668, + "step": 97700 + }, + { + "epoch": 0.06, + "learning_rate": 4.690041626641051e-05, + "loss": 2.8183, + "step": 97800 + }, + { + "epoch": 0.06, + "learning_rate": 4.689721421709894e-05, + "loss": 2.7972, + "step": 97900 + }, + { + "epoch": 0.06, + "learning_rate": 4.689401216778739e-05, + "loss": 2.7841, + "step": 98000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5918164253234863, + "eval_runtime": 177.2044, + "eval_samples_per_second": 56.432, + "eval_steps_per_second": 3.527, + "step": 98000 + }, + { + "epoch": 0.06, + "learning_rate": 4.689081011847582e-05, + "loss": 2.7741, + "step": 98100 + }, + { + "epoch": 0.06, + "learning_rate": 4.688760806916427e-05, + "loss": 2.7918, + "step": 98200 + }, + { + "epoch": 0.06, + "learning_rate": 4.688440601985271e-05, + "loss": 2.7682, + "step": 98300 + }, + { + "epoch": 0.06, + "learning_rate": 4.688120397054115e-05, + "loss": 2.7929, + "step": 98400 + }, + { + "epoch": 0.06, + "learning_rate": 4.6878001921229595e-05, + "loss": 2.7776, + "step": 98500 + }, + { + "epoch": 0.06, + "learning_rate": 4.687479987191803e-05, + "loss": 2.799, + "step": 98600 + }, + { + "epoch": 0.06, + "learning_rate": 4.6871597822606474e-05, + "loss": 2.7845, + "step": 98700 + }, + { + "epoch": 0.06, + "learning_rate": 4.686839577329491e-05, + "loss": 2.7769, + "step": 98800 + }, + { + "epoch": 0.06, + "learning_rate": 4.6865193723983354e-05, + "loss": 2.7855, + "step": 98900 + }, + { + "epoch": 0.06, + "learning_rate": 4.686199167467179e-05, + "loss": 2.7917, + "step": 99000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5912928581237793, + "eval_runtime": 175.7874, + "eval_samples_per_second": 56.887, + "eval_steps_per_second": 3.555, + "step": 99000 + }, + { + "epoch": 0.06, + "learning_rate": 4.685878962536023e-05, + "loss": 2.7712, + "step": 99100 + }, + { + "epoch": 0.06, + "learning_rate": 4.685558757604867e-05, + "loss": 2.7852, + "step": 99200 + }, + { + "epoch": 0.06, + "learning_rate": 4.685238552673711e-05, + "loss": 2.7987, + "step": 99300 + }, + { + "epoch": 0.06, + "learning_rate": 4.684918347742556e-05, + "loss": 2.7848, + "step": 99400 + }, + { + "epoch": 0.06, + "learning_rate": 4.684598142811399e-05, + "loss": 2.8, + "step": 99500 + }, + { + "epoch": 0.06, + "learning_rate": 4.684277937880244e-05, + "loss": 2.7905, + "step": 99600 + }, + { + "epoch": 0.06, + "learning_rate": 4.683957732949087e-05, + "loss": 2.7905, + "step": 99700 + }, + { + "epoch": 0.06, + "learning_rate": 4.683637528017932e-05, + "loss": 2.7818, + "step": 99800 + }, + { + "epoch": 0.06, + "learning_rate": 4.683317323086776e-05, + "loss": 2.7893, + "step": 99900 + }, + { + "epoch": 0.06, + "learning_rate": 4.68299711815562e-05, + "loss": 2.7955, + "step": 100000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5897228717803955, + "eval_runtime": 175.0685, + "eval_samples_per_second": 57.12, + "eval_steps_per_second": 3.57, + "step": 100000 + }, + { + "epoch": 0.06, + "learning_rate": 4.6826769132244644e-05, + "loss": 2.7748, + "step": 100100 + }, + { + "epoch": 0.06, + "learning_rate": 4.682356708293308e-05, + "loss": 2.7957, + "step": 100200 + }, + { + "epoch": 0.06, + "learning_rate": 4.6820365033621523e-05, + "loss": 2.7879, + "step": 100300 + }, + { + "epoch": 0.06, + "learning_rate": 4.6817162984309956e-05, + "loss": 2.7955, + "step": 100400 + }, + { + "epoch": 0.06, + "learning_rate": 4.68139609349984e-05, + "loss": 2.7963, + "step": 100500 + }, + { + "epoch": 0.06, + "learning_rate": 4.681075888568684e-05, + "loss": 2.7765, + "step": 100600 + }, + { + "epoch": 0.06, + "learning_rate": 4.680755683637528e-05, + "loss": 2.7767, + "step": 100700 + }, + { + "epoch": 0.06, + "learning_rate": 4.680435478706372e-05, + "loss": 2.7808, + "step": 100800 + }, + { + "epoch": 0.06, + "learning_rate": 4.680115273775216e-05, + "loss": 2.7726, + "step": 100900 + }, + { + "epoch": 0.06, + "learning_rate": 4.679795068844061e-05, + "loss": 2.7805, + "step": 101000 + }, + { + "epoch": 0.06, + "eval_loss": 2.5878331661224365, + "eval_runtime": 175.8575, + "eval_samples_per_second": 56.864, + "eval_steps_per_second": 3.554, + "step": 101000 + }, + { + "epoch": 0.06, + "learning_rate": 4.679474863912904e-05, + "loss": 2.7823, + "step": 101100 + }, + { + "epoch": 0.06, + "learning_rate": 4.679154658981749e-05, + "loss": 2.7645, + "step": 101200 + }, + { + "epoch": 0.06, + "learning_rate": 4.678834454050592e-05, + "loss": 2.7889, + "step": 101300 + }, + { + "epoch": 0.06, + "learning_rate": 4.678514249119437e-05, + "loss": 2.7796, + "step": 101400 + }, + { + "epoch": 0.06, + "learning_rate": 4.678194044188281e-05, + "loss": 2.7939, + "step": 101500 + }, + { + "epoch": 0.07, + "learning_rate": 4.677873839257125e-05, + "loss": 2.7863, + "step": 101600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6775536343259693e-05, + "loss": 2.7841, + "step": 101700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6772334293948126e-05, + "loss": 2.7648, + "step": 101800 + }, + { + "epoch": 0.07, + "learning_rate": 4.676913224463657e-05, + "loss": 2.7965, + "step": 101900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6765930195325006e-05, + "loss": 2.7859, + "step": 102000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5862410068511963, + "eval_runtime": 175.807, + "eval_samples_per_second": 56.881, + "eval_steps_per_second": 3.555, + "step": 102000 + }, + { + "epoch": 0.07, + "learning_rate": 4.676272814601345e-05, + "loss": 2.807, + "step": 102100 + }, + { + "epoch": 0.07, + "learning_rate": 4.675952609670189e-05, + "loss": 2.7853, + "step": 102200 + }, + { + "epoch": 0.07, + "learning_rate": 4.675632404739033e-05, + "loss": 2.778, + "step": 102300 + }, + { + "epoch": 0.07, + "learning_rate": 4.675312199807877e-05, + "loss": 2.7789, + "step": 102400 + }, + { + "epoch": 0.07, + "learning_rate": 4.674991994876721e-05, + "loss": 2.7656, + "step": 102500 + }, + { + "epoch": 0.07, + "learning_rate": 4.674671789945566e-05, + "loss": 2.7784, + "step": 102600 + }, + { + "epoch": 0.07, + "learning_rate": 4.674351585014409e-05, + "loss": 2.7832, + "step": 102700 + }, + { + "epoch": 0.07, + "learning_rate": 4.674031380083254e-05, + "loss": 2.779, + "step": 102800 + }, + { + "epoch": 0.07, + "learning_rate": 4.673711175152097e-05, + "loss": 2.7935, + "step": 102900 + }, + { + "epoch": 0.07, + "learning_rate": 4.673390970220942e-05, + "loss": 2.7702, + "step": 103000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5846774578094482, + "eval_runtime": 175.2999, + "eval_samples_per_second": 57.045, + "eval_steps_per_second": 3.565, + "step": 103000 + }, + { + "epoch": 0.07, + "learning_rate": 4.6730707652897857e-05, + "loss": 2.7767, + "step": 103100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6727505603586296e-05, + "loss": 2.7939, + "step": 103200 + }, + { + "epoch": 0.07, + "learning_rate": 4.672430355427474e-05, + "loss": 2.7715, + "step": 103300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6721101504963176e-05, + "loss": 2.7721, + "step": 103400 + }, + { + "epoch": 0.07, + "learning_rate": 4.671789945565162e-05, + "loss": 2.753, + "step": 103500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6714697406340055e-05, + "loss": 2.7613, + "step": 103600 + }, + { + "epoch": 0.07, + "learning_rate": 4.67114953570285e-05, + "loss": 2.7891, + "step": 103700 + }, + { + "epoch": 0.07, + "learning_rate": 4.670829330771694e-05, + "loss": 2.761, + "step": 103800 + }, + { + "epoch": 0.07, + "learning_rate": 4.670509125840538e-05, + "loss": 2.7755, + "step": 103900 + }, + { + "epoch": 0.07, + "learning_rate": 4.670188920909382e-05, + "loss": 2.7838, + "step": 104000 + }, + { + "epoch": 0.07, + "eval_loss": 2.584876537322998, + "eval_runtime": 175.7481, + "eval_samples_per_second": 56.9, + "eval_steps_per_second": 3.556, + "step": 104000 + }, + { + "epoch": 0.07, + "learning_rate": 4.669868715978226e-05, + "loss": 2.7767, + "step": 104100 + }, + { + "epoch": 0.07, + "learning_rate": 4.669548511047071e-05, + "loss": 2.7673, + "step": 104200 + }, + { + "epoch": 0.07, + "learning_rate": 4.669228306115914e-05, + "loss": 2.7829, + "step": 104300 + }, + { + "epoch": 0.07, + "learning_rate": 4.668908101184759e-05, + "loss": 2.7594, + "step": 104400 + }, + { + "epoch": 0.07, + "learning_rate": 4.668587896253602e-05, + "loss": 2.7741, + "step": 104500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6682676913224466e-05, + "loss": 2.7697, + "step": 104600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6679474863912906e-05, + "loss": 2.782, + "step": 104700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6676272814601346e-05, + "loss": 2.7872, + "step": 104800 + }, + { + "epoch": 0.07, + "learning_rate": 4.667307076528979e-05, + "loss": 2.7659, + "step": 104900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6669868715978225e-05, + "loss": 2.7707, + "step": 105000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5838239192962646, + "eval_runtime": 179.8684, + "eval_samples_per_second": 55.596, + "eval_steps_per_second": 3.475, + "step": 105000 + }, + { + "epoch": 0.07, + "learning_rate": 4.666666666666667e-05, + "loss": 2.7765, + "step": 105100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6663464617355105e-05, + "loss": 2.7746, + "step": 105200 + }, + { + "epoch": 0.07, + "learning_rate": 4.666026256804355e-05, + "loss": 2.7765, + "step": 105300 + }, + { + "epoch": 0.07, + "learning_rate": 4.665706051873199e-05, + "loss": 2.7768, + "step": 105400 + }, + { + "epoch": 0.07, + "learning_rate": 4.665385846942043e-05, + "loss": 2.7716, + "step": 105500 + }, + { + "epoch": 0.07, + "learning_rate": 4.665065642010887e-05, + "loss": 2.7677, + "step": 105600 + }, + { + "epoch": 0.07, + "learning_rate": 4.664745437079731e-05, + "loss": 2.7885, + "step": 105700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6644252321485757e-05, + "loss": 2.7738, + "step": 105800 + }, + { + "epoch": 0.07, + "learning_rate": 4.664105027217419e-05, + "loss": 2.7773, + "step": 105900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6637848222862636e-05, + "loss": 2.7775, + "step": 106000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5846199989318848, + "eval_runtime": 175.7932, + "eval_samples_per_second": 56.885, + "eval_steps_per_second": 3.555, + "step": 106000 + }, + { + "epoch": 0.07, + "learning_rate": 4.6634646173551076e-05, + "loss": 2.7822, + "step": 106100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6631444124239516e-05, + "loss": 2.7706, + "step": 106200 + }, + { + "epoch": 0.07, + "learning_rate": 4.6628242074927955e-05, + "loss": 2.7801, + "step": 106300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6625040025616395e-05, + "loss": 2.7994, + "step": 106400 + }, + { + "epoch": 0.07, + "learning_rate": 4.662183797630484e-05, + "loss": 2.7621, + "step": 106500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6618635926993274e-05, + "loss": 2.7701, + "step": 106600 + }, + { + "epoch": 0.07, + "learning_rate": 4.661543387768172e-05, + "loss": 2.7529, + "step": 106700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6612231828370154e-05, + "loss": 2.7872, + "step": 106800 + }, + { + "epoch": 0.07, + "learning_rate": 4.66090297790586e-05, + "loss": 2.7569, + "step": 106900 + }, + { + "epoch": 0.07, + "learning_rate": 4.660582772974704e-05, + "loss": 2.7841, + "step": 107000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5815844535827637, + "eval_runtime": 174.957, + "eval_samples_per_second": 57.157, + "eval_steps_per_second": 3.572, + "step": 107000 + }, + { + "epoch": 0.07, + "learning_rate": 4.660262568043548e-05, + "loss": 2.7862, + "step": 107100 + }, + { + "epoch": 0.07, + "learning_rate": 4.659942363112392e-05, + "loss": 2.7593, + "step": 107200 + }, + { + "epoch": 0.07, + "learning_rate": 4.659622158181236e-05, + "loss": 2.7682, + "step": 107300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6593019532500806e-05, + "loss": 2.7627, + "step": 107400 + }, + { + "epoch": 0.07, + "learning_rate": 4.658981748318924e-05, + "loss": 2.7803, + "step": 107500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6586615433877685e-05, + "loss": 2.7773, + "step": 107600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6583413384566125e-05, + "loss": 2.7752, + "step": 107700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6580211335254565e-05, + "loss": 2.7701, + "step": 107800 + }, + { + "epoch": 0.07, + "learning_rate": 4.6577009285943005e-05, + "loss": 2.7795, + "step": 107900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6573807236631444e-05, + "loss": 2.7827, + "step": 108000 + }, + { + "epoch": 0.07, + "eval_loss": 2.580613374710083, + "eval_runtime": 175.685, + "eval_samples_per_second": 56.92, + "eval_steps_per_second": 3.558, + "step": 108000 + }, + { + "epoch": 0.07, + "learning_rate": 4.657060518731989e-05, + "loss": 2.766, + "step": 108100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6567403138008324e-05, + "loss": 2.7593, + "step": 108200 + }, + { + "epoch": 0.07, + "learning_rate": 4.656420108869677e-05, + "loss": 2.7864, + "step": 108300 + }, + { + "epoch": 0.07, + "learning_rate": 4.656099903938521e-05, + "loss": 2.8006, + "step": 108400 + }, + { + "epoch": 0.07, + "learning_rate": 4.655779699007365e-05, + "loss": 2.7565, + "step": 108500 + }, + { + "epoch": 0.07, + "learning_rate": 4.655459494076209e-05, + "loss": 2.787, + "step": 108600 + }, + { + "epoch": 0.07, + "learning_rate": 4.655139289145053e-05, + "loss": 2.7661, + "step": 108700 + }, + { + "epoch": 0.07, + "learning_rate": 4.654819084213897e-05, + "loss": 2.7825, + "step": 108800 + }, + { + "epoch": 0.07, + "learning_rate": 4.654498879282741e-05, + "loss": 2.7784, + "step": 108900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6541786743515855e-05, + "loss": 2.7816, + "step": 109000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5822932720184326, + "eval_runtime": 176.0692, + "eval_samples_per_second": 56.796, + "eval_steps_per_second": 3.55, + "step": 109000 + }, + { + "epoch": 0.07, + "learning_rate": 4.653858469420429e-05, + "loss": 2.7873, + "step": 109100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6535382644892735e-05, + "loss": 2.763, + "step": 109200 + }, + { + "epoch": 0.07, + "learning_rate": 4.6532180595581175e-05, + "loss": 2.776, + "step": 109300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6528978546269614e-05, + "loss": 2.7801, + "step": 109400 + }, + { + "epoch": 0.07, + "learning_rate": 4.6525776496958054e-05, + "loss": 2.7916, + "step": 109500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6522574447646494e-05, + "loss": 2.7791, + "step": 109600 + }, + { + "epoch": 0.07, + "learning_rate": 4.651937239833494e-05, + "loss": 2.7781, + "step": 109700 + }, + { + "epoch": 0.07, + "learning_rate": 4.651617034902337e-05, + "loss": 2.7731, + "step": 109800 + }, + { + "epoch": 0.07, + "learning_rate": 4.651296829971182e-05, + "loss": 2.7709, + "step": 109900 + }, + { + "epoch": 0.07, + "learning_rate": 4.650976625040026e-05, + "loss": 2.7724, + "step": 110000 + }, + { + "epoch": 0.07, + "eval_loss": 2.581916332244873, + "eval_runtime": 175.7459, + "eval_samples_per_second": 56.9, + "eval_steps_per_second": 3.556, + "step": 110000 + }, + { + "epoch": 0.07, + "learning_rate": 4.65065642010887e-05, + "loss": 2.7789, + "step": 110100 + }, + { + "epoch": 0.07, + "learning_rate": 4.650336215177714e-05, + "loss": 2.7736, + "step": 110200 + }, + { + "epoch": 0.07, + "learning_rate": 4.650016010246558e-05, + "loss": 2.7725, + "step": 110300 + }, + { + "epoch": 0.07, + "learning_rate": 4.649695805315402e-05, + "loss": 2.7735, + "step": 110400 + }, + { + "epoch": 0.07, + "learning_rate": 4.649375600384246e-05, + "loss": 2.7679, + "step": 110500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6490553954530905e-05, + "loss": 2.7728, + "step": 110600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6487351905219344e-05, + "loss": 2.7534, + "step": 110700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6484149855907784e-05, + "loss": 2.773, + "step": 110800 + }, + { + "epoch": 0.07, + "learning_rate": 4.6480947806596224e-05, + "loss": 2.7793, + "step": 110900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6477745757284664e-05, + "loss": 2.7658, + "step": 111000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5806679725646973, + "eval_runtime": 176.035, + "eval_samples_per_second": 56.807, + "eval_steps_per_second": 3.55, + "step": 111000 + }, + { + "epoch": 0.07, + "learning_rate": 4.64745437079731e-05, + "loss": 2.779, + "step": 111100 + }, + { + "epoch": 0.07, + "learning_rate": 4.647134165866154e-05, + "loss": 2.7775, + "step": 111200 + }, + { + "epoch": 0.07, + "learning_rate": 4.646813960934999e-05, + "loss": 2.7601, + "step": 111300 + }, + { + "epoch": 0.07, + "learning_rate": 4.646493756003843e-05, + "loss": 2.7548, + "step": 111400 + }, + { + "epoch": 0.07, + "learning_rate": 4.646173551072687e-05, + "loss": 2.7505, + "step": 111500 + }, + { + "epoch": 0.07, + "learning_rate": 4.645853346141531e-05, + "loss": 2.7647, + "step": 111600 + }, + { + "epoch": 0.07, + "learning_rate": 4.645533141210375e-05, + "loss": 2.7719, + "step": 111700 + }, + { + "epoch": 0.07, + "learning_rate": 4.645212936279219e-05, + "loss": 2.7607, + "step": 111800 + }, + { + "epoch": 0.07, + "learning_rate": 4.644892731348063e-05, + "loss": 2.7816, + "step": 111900 + }, + { + "epoch": 0.07, + "learning_rate": 4.644572526416907e-05, + "loss": 2.7741, + "step": 112000 + }, + { + "epoch": 0.07, + "eval_loss": 2.578455924987793, + "eval_runtime": 176.0528, + "eval_samples_per_second": 56.801, + "eval_steps_per_second": 3.55, + "step": 112000 + }, + { + "epoch": 0.07, + "learning_rate": 4.644252321485751e-05, + "loss": 2.7481, + "step": 112100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6439321165545954e-05, + "loss": 2.751, + "step": 112200 + }, + { + "epoch": 0.07, + "learning_rate": 4.6436119116234394e-05, + "loss": 2.7685, + "step": 112300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6432917066922834e-05, + "loss": 2.7622, + "step": 112400 + }, + { + "epoch": 0.07, + "learning_rate": 4.642971501761127e-05, + "loss": 2.7674, + "step": 112500 + }, + { + "epoch": 0.07, + "learning_rate": 4.642651296829971e-05, + "loss": 2.7388, + "step": 112600 + }, + { + "epoch": 0.07, + "learning_rate": 4.642331091898815e-05, + "loss": 2.7537, + "step": 112700 + }, + { + "epoch": 0.07, + "learning_rate": 4.642010886967659e-05, + "loss": 2.757, + "step": 112800 + }, + { + "epoch": 0.07, + "learning_rate": 4.641690682036504e-05, + "loss": 2.7497, + "step": 112900 + }, + { + "epoch": 0.07, + "learning_rate": 4.641370477105348e-05, + "loss": 2.8012, + "step": 113000 + }, + { + "epoch": 0.07, + "eval_loss": 2.575178384780884, + "eval_runtime": 176.1143, + "eval_samples_per_second": 56.781, + "eval_steps_per_second": 3.549, + "step": 113000 + }, + { + "epoch": 0.07, + "learning_rate": 4.641050272174192e-05, + "loss": 2.772, + "step": 113100 + }, + { + "epoch": 0.07, + "learning_rate": 4.640730067243036e-05, + "loss": 2.7572, + "step": 113200 + }, + { + "epoch": 0.07, + "learning_rate": 4.64040986231188e-05, + "loss": 2.763, + "step": 113300 + }, + { + "epoch": 0.07, + "learning_rate": 4.640089657380724e-05, + "loss": 2.74, + "step": 113400 + }, + { + "epoch": 0.07, + "learning_rate": 4.639769452449568e-05, + "loss": 2.7686, + "step": 113500 + }, + { + "epoch": 0.07, + "learning_rate": 4.639449247518412e-05, + "loss": 2.7636, + "step": 113600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6391290425872564e-05, + "loss": 2.7625, + "step": 113700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6388088376561003e-05, + "loss": 2.7554, + "step": 113800 + }, + { + "epoch": 0.07, + "learning_rate": 4.638488632724944e-05, + "loss": 2.7588, + "step": 113900 + }, + { + "epoch": 0.07, + "learning_rate": 4.638168427793788e-05, + "loss": 2.7621, + "step": 114000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5783112049102783, + "eval_runtime": 178.5692, + "eval_samples_per_second": 56.001, + "eval_steps_per_second": 3.5, + "step": 114000 + }, + { + "epoch": 0.07, + "learning_rate": 4.637848222862632e-05, + "loss": 2.7509, + "step": 114100 + }, + { + "epoch": 0.07, + "learning_rate": 4.637528017931476e-05, + "loss": 2.7537, + "step": 114200 + }, + { + "epoch": 0.07, + "learning_rate": 4.63720781300032e-05, + "loss": 2.7738, + "step": 114300 + }, + { + "epoch": 0.07, + "learning_rate": 4.636887608069164e-05, + "loss": 2.7833, + "step": 114400 + }, + { + "epoch": 0.07, + "learning_rate": 4.636567403138009e-05, + "loss": 2.7972, + "step": 114500 + }, + { + "epoch": 0.07, + "learning_rate": 4.636247198206853e-05, + "loss": 2.7572, + "step": 114600 + }, + { + "epoch": 0.07, + "learning_rate": 4.635926993275697e-05, + "loss": 2.7588, + "step": 114700 + }, + { + "epoch": 0.07, + "learning_rate": 4.635606788344541e-05, + "loss": 2.7773, + "step": 114800 + }, + { + "epoch": 0.07, + "learning_rate": 4.635286583413385e-05, + "loss": 2.7797, + "step": 114900 + }, + { + "epoch": 0.07, + "learning_rate": 4.634966378482229e-05, + "loss": 2.7656, + "step": 115000 + }, + { + "epoch": 0.07, + "eval_loss": 2.577411413192749, + "eval_runtime": 175.7584, + "eval_samples_per_second": 56.896, + "eval_steps_per_second": 3.556, + "step": 115000 + }, + { + "epoch": 0.07, + "learning_rate": 4.634646173551073e-05, + "loss": 2.7533, + "step": 115100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6343259686199167e-05, + "loss": 2.7707, + "step": 115200 + }, + { + "epoch": 0.07, + "learning_rate": 4.634005763688761e-05, + "loss": 2.7447, + "step": 115300 + }, + { + "epoch": 0.07, + "learning_rate": 4.633685558757605e-05, + "loss": 2.7532, + "step": 115400 + }, + { + "epoch": 0.07, + "learning_rate": 4.633365353826449e-05, + "loss": 2.7768, + "step": 115500 + }, + { + "epoch": 0.07, + "learning_rate": 4.633045148895293e-05, + "loss": 2.7717, + "step": 115600 + }, + { + "epoch": 0.07, + "learning_rate": 4.632724943964137e-05, + "loss": 2.7603, + "step": 115700 + }, + { + "epoch": 0.07, + "learning_rate": 4.632404739032981e-05, + "loss": 2.7537, + "step": 115800 + }, + { + "epoch": 0.07, + "learning_rate": 4.632084534101825e-05, + "loss": 2.7497, + "step": 115900 + }, + { + "epoch": 0.07, + "learning_rate": 4.63176432917067e-05, + "loss": 2.7698, + "step": 116000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5763938426971436, + "eval_runtime": 176.1475, + "eval_samples_per_second": 56.771, + "eval_steps_per_second": 3.548, + "step": 116000 + }, + { + "epoch": 0.07, + "learning_rate": 4.631444124239514e-05, + "loss": 2.7675, + "step": 116100 + }, + { + "epoch": 0.07, + "learning_rate": 4.631123919308358e-05, + "loss": 2.7518, + "step": 116200 + }, + { + "epoch": 0.07, + "learning_rate": 4.630803714377202e-05, + "loss": 2.7729, + "step": 116300 + }, + { + "epoch": 0.07, + "learning_rate": 4.630483509446046e-05, + "loss": 2.7543, + "step": 116400 + }, + { + "epoch": 0.07, + "learning_rate": 4.63016330451489e-05, + "loss": 2.752, + "step": 116500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6298430995837336e-05, + "loss": 2.7687, + "step": 116600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6295228946525776e-05, + "loss": 2.7495, + "step": 116700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6292026897214216e-05, + "loss": 2.7704, + "step": 116800 + }, + { + "epoch": 0.07, + "learning_rate": 4.628882484790266e-05, + "loss": 2.7339, + "step": 116900 + }, + { + "epoch": 0.07, + "learning_rate": 4.62856227985911e-05, + "loss": 2.7739, + "step": 117000 + }, + { + "epoch": 0.07, + "eval_loss": 2.5727386474609375, + "eval_runtime": 177.1346, + "eval_samples_per_second": 56.454, + "eval_steps_per_second": 3.528, + "step": 117000 + }, + { + "epoch": 0.07, + "learning_rate": 4.628242074927954e-05, + "loss": 2.7556, + "step": 117100 + }, + { + "epoch": 0.08, + "learning_rate": 4.627921869996798e-05, + "loss": 2.7758, + "step": 117200 + }, + { + "epoch": 0.08, + "learning_rate": 4.627601665065642e-05, + "loss": 2.7593, + "step": 117300 + }, + { + "epoch": 0.08, + "learning_rate": 4.627281460134486e-05, + "loss": 2.7444, + "step": 117400 + }, + { + "epoch": 0.08, + "learning_rate": 4.62696125520333e-05, + "loss": 2.7513, + "step": 117500 + }, + { + "epoch": 0.08, + "learning_rate": 4.626641050272175e-05, + "loss": 2.7554, + "step": 117600 + }, + { + "epoch": 0.08, + "learning_rate": 4.626320845341019e-05, + "loss": 2.7477, + "step": 117700 + }, + { + "epoch": 0.08, + "learning_rate": 4.626000640409863e-05, + "loss": 2.7488, + "step": 117800 + }, + { + "epoch": 0.08, + "learning_rate": 4.6256804354787067e-05, + "loss": 2.7491, + "step": 117900 + }, + { + "epoch": 0.08, + "learning_rate": 4.6253602305475506e-05, + "loss": 2.7655, + "step": 118000 + }, + { + "epoch": 0.08, + "eval_loss": 2.572309732437134, + "eval_runtime": 174.965, + "eval_samples_per_second": 57.154, + "eval_steps_per_second": 3.572, + "step": 118000 + }, + { + "epoch": 0.08, + "learning_rate": 4.6250400256163946e-05, + "loss": 2.7691, + "step": 118100 + }, + { + "epoch": 0.08, + "learning_rate": 4.6247198206852386e-05, + "loss": 2.7688, + "step": 118200 + }, + { + "epoch": 0.08, + "learning_rate": 4.624399615754083e-05, + "loss": 2.7428, + "step": 118300 + }, + { + "epoch": 0.08, + "learning_rate": 4.6240794108229265e-05, + "loss": 2.7508, + "step": 118400 + }, + { + "epoch": 0.08, + "learning_rate": 4.623759205891771e-05, + "loss": 2.7109, + "step": 118500 + }, + { + "epoch": 0.08, + "learning_rate": 4.623439000960615e-05, + "loss": 2.7504, + "step": 118600 + }, + { + "epoch": 0.08, + "learning_rate": 4.623118796029459e-05, + "loss": 2.759, + "step": 118700 + }, + { + "epoch": 0.08, + "learning_rate": 4.622798591098303e-05, + "loss": 2.7441, + "step": 118800 + }, + { + "epoch": 0.08, + "learning_rate": 4.622478386167147e-05, + "loss": 2.7396, + "step": 118900 + }, + { + "epoch": 0.08, + "learning_rate": 4.622158181235991e-05, + "loss": 2.747, + "step": 119000 + }, + { + "epoch": 0.08, + "eval_loss": 2.574122190475464, + "eval_runtime": 175.7122, + "eval_samples_per_second": 56.911, + "eval_steps_per_second": 3.557, + "step": 119000 + }, + { + "epoch": 0.08, + "learning_rate": 4.621837976304835e-05, + "loss": 2.757, + "step": 119100 + }, + { + "epoch": 0.08, + "learning_rate": 4.62151777137368e-05, + "loss": 2.7434, + "step": 119200 + }, + { + "epoch": 0.08, + "learning_rate": 4.6211975664425236e-05, + "loss": 2.7499, + "step": 119300 + }, + { + "epoch": 0.08, + "learning_rate": 4.6208773615113676e-05, + "loss": 2.7546, + "step": 119400 + }, + { + "epoch": 0.08, + "learning_rate": 4.6205571565802116e-05, + "loss": 2.7485, + "step": 119500 + }, + { + "epoch": 0.08, + "learning_rate": 4.6202369516490556e-05, + "loss": 2.7285, + "step": 119600 + }, + { + "epoch": 0.08, + "learning_rate": 4.6199167467178995e-05, + "loss": 2.7476, + "step": 119700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6195965417867435e-05, + "loss": 2.7547, + "step": 119800 + }, + { + "epoch": 0.08, + "learning_rate": 4.619276336855588e-05, + "loss": 2.7497, + "step": 119900 + }, + { + "epoch": 0.08, + "learning_rate": 4.6189561319244315e-05, + "loss": 2.7221, + "step": 120000 + }, + { + "epoch": 0.08, + "eval_loss": 2.573463201522827, + "eval_runtime": 177.4563, + "eval_samples_per_second": 56.352, + "eval_steps_per_second": 3.522, + "step": 120000 + }, + { + "epoch": 0.08, + "learning_rate": 4.618635926993276e-05, + "loss": 2.7397, + "step": 120100 + }, + { + "epoch": 0.08, + "learning_rate": 4.61831572206212e-05, + "loss": 2.7323, + "step": 120200 + }, + { + "epoch": 0.08, + "learning_rate": 4.617995517130964e-05, + "loss": 2.7262, + "step": 120300 + }, + { + "epoch": 0.08, + "learning_rate": 4.617675312199808e-05, + "loss": 2.7333, + "step": 120400 + }, + { + "epoch": 0.08, + "learning_rate": 4.617355107268652e-05, + "loss": 2.7472, + "step": 120500 + }, + { + "epoch": 0.08, + "learning_rate": 4.617034902337497e-05, + "loss": 2.7451, + "step": 120600 + }, + { + "epoch": 0.08, + "learning_rate": 4.61671469740634e-05, + "loss": 2.7302, + "step": 120700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6163944924751846e-05, + "loss": 2.7399, + "step": 120800 + }, + { + "epoch": 0.08, + "learning_rate": 4.6160742875440286e-05, + "loss": 2.7264, + "step": 120900 + }, + { + "epoch": 0.08, + "learning_rate": 4.6157540826128726e-05, + "loss": 2.7283, + "step": 121000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5729925632476807, + "eval_runtime": 177.6122, + "eval_samples_per_second": 56.302, + "eval_steps_per_second": 3.519, + "step": 121000 + }, + { + "epoch": 0.08, + "learning_rate": 4.6154338776817165e-05, + "loss": 2.7362, + "step": 121100 + }, + { + "epoch": 0.08, + "learning_rate": 4.6151136727505605e-05, + "loss": 2.7247, + "step": 121200 + }, + { + "epoch": 0.08, + "learning_rate": 4.614793467819405e-05, + "loss": 2.7468, + "step": 121300 + }, + { + "epoch": 0.08, + "learning_rate": 4.6144732628882485e-05, + "loss": 2.7575, + "step": 121400 + }, + { + "epoch": 0.08, + "learning_rate": 4.614153057957093e-05, + "loss": 2.7403, + "step": 121500 + }, + { + "epoch": 0.08, + "learning_rate": 4.6138328530259364e-05, + "loss": 2.7355, + "step": 121600 + }, + { + "epoch": 0.08, + "learning_rate": 4.613512648094781e-05, + "loss": 2.746, + "step": 121700 + }, + { + "epoch": 0.08, + "learning_rate": 4.613192443163625e-05, + "loss": 2.7353, + "step": 121800 + }, + { + "epoch": 0.08, + "learning_rate": 4.612872238232469e-05, + "loss": 2.7497, + "step": 121900 + }, + { + "epoch": 0.08, + "learning_rate": 4.612552033301313e-05, + "loss": 2.7617, + "step": 122000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5720784664154053, + "eval_runtime": 176.4862, + "eval_samples_per_second": 56.662, + "eval_steps_per_second": 3.541, + "step": 122000 + }, + { + "epoch": 0.08, + "learning_rate": 4.612231828370157e-05, + "loss": 2.7553, + "step": 122100 + }, + { + "epoch": 0.08, + "learning_rate": 4.6119116234390016e-05, + "loss": 2.7597, + "step": 122200 + }, + { + "epoch": 0.08, + "learning_rate": 4.611591418507845e-05, + "loss": 2.7637, + "step": 122300 + }, + { + "epoch": 0.08, + "learning_rate": 4.6112712135766895e-05, + "loss": 2.7447, + "step": 122400 + }, + { + "epoch": 0.08, + "learning_rate": 4.6109510086455335e-05, + "loss": 2.7318, + "step": 122500 + }, + { + "epoch": 0.08, + "learning_rate": 4.6106308037143775e-05, + "loss": 2.7382, + "step": 122600 + }, + { + "epoch": 0.08, + "learning_rate": 4.6103105987832215e-05, + "loss": 2.7487, + "step": 122700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6099903938520654e-05, + "loss": 2.7538, + "step": 122800 + }, + { + "epoch": 0.08, + "learning_rate": 4.60967018892091e-05, + "loss": 2.7384, + "step": 122900 + }, + { + "epoch": 0.08, + "learning_rate": 4.6093499839897534e-05, + "loss": 2.7676, + "step": 123000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5681958198547363, + "eval_runtime": 175.8725, + "eval_samples_per_second": 56.859, + "eval_steps_per_second": 3.554, + "step": 123000 + }, + { + "epoch": 0.08, + "learning_rate": 4.609029779058598e-05, + "loss": 2.7297, + "step": 123100 + }, + { + "epoch": 0.08, + "learning_rate": 4.6087095741274413e-05, + "loss": 2.7523, + "step": 123200 + }, + { + "epoch": 0.08, + "learning_rate": 4.608389369196286e-05, + "loss": 2.7259, + "step": 123300 + }, + { + "epoch": 0.08, + "learning_rate": 4.60806916426513e-05, + "loss": 2.7461, + "step": 123400 + }, + { + "epoch": 0.08, + "learning_rate": 4.607748959333974e-05, + "loss": 2.7475, + "step": 123500 + }, + { + "epoch": 0.08, + "learning_rate": 4.6074287544028186e-05, + "loss": 2.7419, + "step": 123600 + }, + { + "epoch": 0.08, + "learning_rate": 4.607108549471662e-05, + "loss": 2.7554, + "step": 123700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6067883445405065e-05, + "loss": 2.7616, + "step": 123800 + }, + { + "epoch": 0.08, + "learning_rate": 4.60646813960935e-05, + "loss": 2.7714, + "step": 123900 + }, + { + "epoch": 0.08, + "learning_rate": 4.6061479346781945e-05, + "loss": 2.7425, + "step": 124000 + }, + { + "epoch": 0.08, + "eval_loss": 2.56851863861084, + "eval_runtime": 175.5851, + "eval_samples_per_second": 56.952, + "eval_steps_per_second": 3.56, + "step": 124000 + }, + { + "epoch": 0.08, + "learning_rate": 4.6058277297470385e-05, + "loss": 2.7394, + "step": 124100 + }, + { + "epoch": 0.08, + "learning_rate": 4.6055075248158824e-05, + "loss": 2.7452, + "step": 124200 + }, + { + "epoch": 0.08, + "learning_rate": 4.6051873198847264e-05, + "loss": 2.7259, + "step": 124300 + }, + { + "epoch": 0.08, + "learning_rate": 4.6048671149535704e-05, + "loss": 2.7442, + "step": 124400 + }, + { + "epoch": 0.08, + "learning_rate": 4.604546910022415e-05, + "loss": 2.7356, + "step": 124500 + }, + { + "epoch": 0.08, + "learning_rate": 4.604226705091258e-05, + "loss": 2.7473, + "step": 124600 + }, + { + "epoch": 0.08, + "learning_rate": 4.603906500160103e-05, + "loss": 2.7423, + "step": 124700 + }, + { + "epoch": 0.08, + "learning_rate": 4.603586295228946e-05, + "loss": 2.761, + "step": 124800 + }, + { + "epoch": 0.08, + "learning_rate": 4.603266090297791e-05, + "loss": 2.7476, + "step": 124900 + }, + { + "epoch": 0.08, + "learning_rate": 4.602945885366635e-05, + "loss": 2.7267, + "step": 125000 + }, + { + "epoch": 0.08, + "eval_loss": 2.567716360092163, + "eval_runtime": 176.6756, + "eval_samples_per_second": 56.601, + "eval_steps_per_second": 3.538, + "step": 125000 + }, + { + "epoch": 0.08, + "learning_rate": 4.602625680435479e-05, + "loss": 2.7399, + "step": 125100 + }, + { + "epoch": 0.08, + "learning_rate": 4.6023054755043235e-05, + "loss": 2.7479, + "step": 125200 + }, + { + "epoch": 0.08, + "learning_rate": 4.601985270573167e-05, + "loss": 2.7493, + "step": 125300 + }, + { + "epoch": 0.08, + "learning_rate": 4.6016650656420115e-05, + "loss": 2.7457, + "step": 125400 + }, + { + "epoch": 0.08, + "learning_rate": 4.601344860710855e-05, + "loss": 2.7359, + "step": 125500 + }, + { + "epoch": 0.08, + "learning_rate": 4.6010246557796994e-05, + "loss": 2.7371, + "step": 125600 + }, + { + "epoch": 0.08, + "learning_rate": 4.6007044508485434e-05, + "loss": 2.7412, + "step": 125700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6003842459173874e-05, + "loss": 2.7709, + "step": 125800 + }, + { + "epoch": 0.08, + "learning_rate": 4.6000640409862313e-05, + "loss": 2.7478, + "step": 125900 + }, + { + "epoch": 0.08, + "learning_rate": 4.599743836055075e-05, + "loss": 2.7378, + "step": 126000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5691282749176025, + "eval_runtime": 178.5443, + "eval_samples_per_second": 56.009, + "eval_steps_per_second": 3.501, + "step": 126000 + }, + { + "epoch": 0.08, + "learning_rate": 4.59942363112392e-05, + "loss": 2.7571, + "step": 126100 + }, + { + "epoch": 0.08, + "learning_rate": 4.599103426192763e-05, + "loss": 2.7205, + "step": 126200 + }, + { + "epoch": 0.08, + "learning_rate": 4.598783221261608e-05, + "loss": 2.7195, + "step": 126300 + }, + { + "epoch": 0.08, + "learning_rate": 4.598463016330451e-05, + "loss": 2.7403, + "step": 126400 + }, + { + "epoch": 0.08, + "learning_rate": 4.598142811399296e-05, + "loss": 2.7524, + "step": 126500 + }, + { + "epoch": 0.08, + "learning_rate": 4.59782260646814e-05, + "loss": 2.7472, + "step": 126600 + }, + { + "epoch": 0.08, + "learning_rate": 4.597502401536984e-05, + "loss": 2.7372, + "step": 126700 + }, + { + "epoch": 0.08, + "learning_rate": 4.5971821966058285e-05, + "loss": 2.736, + "step": 126800 + }, + { + "epoch": 0.08, + "learning_rate": 4.596861991674672e-05, + "loss": 2.7493, + "step": 126900 + }, + { + "epoch": 0.08, + "learning_rate": 4.5965417867435164e-05, + "loss": 2.7429, + "step": 127000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5668647289276123, + "eval_runtime": 177.1021, + "eval_samples_per_second": 56.465, + "eval_steps_per_second": 3.529, + "step": 127000 + }, + { + "epoch": 0.08, + "learning_rate": 4.59622158181236e-05, + "loss": 2.7402, + "step": 127100 + }, + { + "epoch": 0.08, + "learning_rate": 4.5959013768812044e-05, + "loss": 2.747, + "step": 127200 + }, + { + "epoch": 0.08, + "learning_rate": 4.595581171950048e-05, + "loss": 2.7192, + "step": 127300 + }, + { + "epoch": 0.08, + "learning_rate": 4.595260967018892e-05, + "loss": 2.7243, + "step": 127400 + }, + { + "epoch": 0.08, + "learning_rate": 4.594940762087736e-05, + "loss": 2.7299, + "step": 127500 + }, + { + "epoch": 0.08, + "learning_rate": 4.59462055715658e-05, + "loss": 2.7256, + "step": 127600 + }, + { + "epoch": 0.08, + "learning_rate": 4.594300352225425e-05, + "loss": 2.7402, + "step": 127700 + }, + { + "epoch": 0.08, + "learning_rate": 4.593980147294268e-05, + "loss": 2.7284, + "step": 127800 + }, + { + "epoch": 0.08, + "learning_rate": 4.593659942363113e-05, + "loss": 2.7088, + "step": 127900 + }, + { + "epoch": 0.08, + "learning_rate": 4.593339737431956e-05, + "loss": 2.7382, + "step": 128000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5666918754577637, + "eval_runtime": 177.4538, + "eval_samples_per_second": 56.353, + "eval_steps_per_second": 3.522, + "step": 128000 + }, + { + "epoch": 0.08, + "learning_rate": 4.593019532500801e-05, + "loss": 2.7279, + "step": 128100 + }, + { + "epoch": 0.08, + "learning_rate": 4.592699327569645e-05, + "loss": 2.7276, + "step": 128200 + }, + { + "epoch": 0.08, + "learning_rate": 4.592379122638489e-05, + "loss": 2.7334, + "step": 128300 + }, + { + "epoch": 0.08, + "learning_rate": 4.5920589177073334e-05, + "loss": 2.7367, + "step": 128400 + }, + { + "epoch": 0.08, + "learning_rate": 4.591738712776177e-05, + "loss": 2.7565, + "step": 128500 + }, + { + "epoch": 0.08, + "learning_rate": 4.5914185078450213e-05, + "loss": 2.7269, + "step": 128600 + }, + { + "epoch": 0.08, + "learning_rate": 4.5910983029138646e-05, + "loss": 2.7653, + "step": 128700 + }, + { + "epoch": 0.08, + "learning_rate": 4.590778097982709e-05, + "loss": 2.7261, + "step": 128800 + }, + { + "epoch": 0.08, + "learning_rate": 4.590457893051553e-05, + "loss": 2.7526, + "step": 128900 + }, + { + "epoch": 0.08, + "learning_rate": 4.590137688120397e-05, + "loss": 2.7317, + "step": 129000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5652542114257812, + "eval_runtime": 177.1478, + "eval_samples_per_second": 56.45, + "eval_steps_per_second": 3.528, + "step": 129000 + }, + { + "epoch": 0.08, + "learning_rate": 4.589817483189241e-05, + "loss": 2.7361, + "step": 129100 + }, + { + "epoch": 0.08, + "learning_rate": 4.589497278258085e-05, + "loss": 2.7237, + "step": 129200 + }, + { + "epoch": 0.08, + "learning_rate": 4.58917707332693e-05, + "loss": 2.7527, + "step": 129300 + }, + { + "epoch": 0.08, + "learning_rate": 4.588856868395773e-05, + "loss": 2.7399, + "step": 129400 + }, + { + "epoch": 0.08, + "learning_rate": 4.588536663464618e-05, + "loss": 2.7329, + "step": 129500 + }, + { + "epoch": 0.08, + "learning_rate": 4.588216458533461e-05, + "loss": 2.7414, + "step": 129600 + }, + { + "epoch": 0.08, + "learning_rate": 4.587896253602306e-05, + "loss": 2.7156, + "step": 129700 + }, + { + "epoch": 0.08, + "learning_rate": 4.58757604867115e-05, + "loss": 2.7384, + "step": 129800 + }, + { + "epoch": 0.08, + "learning_rate": 4.587255843739994e-05, + "loss": 2.7398, + "step": 129900 + }, + { + "epoch": 0.08, + "learning_rate": 4.586935638808838e-05, + "loss": 2.7147, + "step": 130000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5645546913146973, + "eval_runtime": 178.8022, + "eval_samples_per_second": 55.928, + "eval_steps_per_second": 3.495, + "step": 130000 + }, + { + "epoch": 0.08, + "learning_rate": 4.5866154338776816e-05, + "loss": 2.7349, + "step": 130100 + }, + { + "epoch": 0.08, + "learning_rate": 4.586295228946526e-05, + "loss": 2.7412, + "step": 130200 + }, + { + "epoch": 0.08, + "learning_rate": 4.5859750240153696e-05, + "loss": 2.7231, + "step": 130300 + }, + { + "epoch": 0.08, + "learning_rate": 4.585654819084214e-05, + "loss": 2.7387, + "step": 130400 + }, + { + "epoch": 0.08, + "learning_rate": 4.585334614153058e-05, + "loss": 2.717, + "step": 130500 + }, + { + "epoch": 0.08, + "learning_rate": 4.585014409221902e-05, + "loss": 2.7248, + "step": 130600 + }, + { + "epoch": 0.08, + "learning_rate": 4.584694204290746e-05, + "loss": 2.7428, + "step": 130700 + }, + { + "epoch": 0.08, + "learning_rate": 4.58437399935959e-05, + "loss": 2.7421, + "step": 130800 + }, + { + "epoch": 0.08, + "learning_rate": 4.584053794428435e-05, + "loss": 2.7298, + "step": 130900 + }, + { + "epoch": 0.08, + "learning_rate": 4.583733589497278e-05, + "loss": 2.7347, + "step": 131000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5643277168273926, + "eval_runtime": 177.5491, + "eval_samples_per_second": 56.322, + "eval_steps_per_second": 3.52, + "step": 131000 + }, + { + "epoch": 0.08, + "learning_rate": 4.583413384566123e-05, + "loss": 2.7193, + "step": 131100 + }, + { + "epoch": 0.08, + "learning_rate": 4.583093179634967e-05, + "loss": 2.7488, + "step": 131200 + }, + { + "epoch": 0.08, + "learning_rate": 4.582772974703811e-05, + "loss": 2.7346, + "step": 131300 + }, + { + "epoch": 0.08, + "learning_rate": 4.5824527697726547e-05, + "loss": 2.742, + "step": 131400 + }, + { + "epoch": 0.08, + "learning_rate": 4.5821325648414986e-05, + "loss": 2.7283, + "step": 131500 + }, + { + "epoch": 0.08, + "learning_rate": 4.581812359910343e-05, + "loss": 2.7401, + "step": 131600 + }, + { + "epoch": 0.08, + "learning_rate": 4.5814921549791866e-05, + "loss": 2.7387, + "step": 131700 + }, + { + "epoch": 0.08, + "learning_rate": 4.581171950048031e-05, + "loss": 2.7359, + "step": 131800 + }, + { + "epoch": 0.08, + "learning_rate": 4.5808517451168745e-05, + "loss": 2.7505, + "step": 131900 + }, + { + "epoch": 0.08, + "learning_rate": 4.580531540185719e-05, + "loss": 2.7317, + "step": 132000 + }, + { + "epoch": 0.08, + "eval_loss": 2.5639023780822754, + "eval_runtime": 175.6025, + "eval_samples_per_second": 56.947, + "eval_steps_per_second": 3.559, + "step": 132000 + }, + { + "epoch": 0.08, + "learning_rate": 4.580211335254563e-05, + "loss": 2.7439, + "step": 132100 + }, + { + "epoch": 0.08, + "learning_rate": 4.579891130323407e-05, + "loss": 2.7339, + "step": 132200 + }, + { + "epoch": 0.08, + "learning_rate": 4.579570925392251e-05, + "loss": 2.7229, + "step": 132300 + }, + { + "epoch": 0.08, + "learning_rate": 4.579250720461095e-05, + "loss": 2.7235, + "step": 132400 + }, + { + "epoch": 0.08, + "learning_rate": 4.57893051552994e-05, + "loss": 2.7449, + "step": 132500 + }, + { + "epoch": 0.08, + "learning_rate": 4.578610310598783e-05, + "loss": 2.7339, + "step": 132600 + }, + { + "epoch": 0.08, + "learning_rate": 4.578290105667628e-05, + "loss": 2.7365, + "step": 132700 + }, + { + "epoch": 0.08, + "learning_rate": 4.5779699007364716e-05, + "loss": 2.7345, + "step": 132800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5776496958053156e-05, + "loss": 2.7119, + "step": 132900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5773294908741596e-05, + "loss": 2.7415, + "step": 133000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5623250007629395, + "eval_runtime": 176.2335, + "eval_samples_per_second": 56.743, + "eval_steps_per_second": 3.546, + "step": 133000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5770092859430036e-05, + "loss": 2.7264, + "step": 133100 + }, + { + "epoch": 0.09, + "learning_rate": 4.576689081011848e-05, + "loss": 2.7485, + "step": 133200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5763688760806915e-05, + "loss": 2.7293, + "step": 133300 + }, + { + "epoch": 0.09, + "learning_rate": 4.576048671149536e-05, + "loss": 2.7132, + "step": 133400 + }, + { + "epoch": 0.09, + "learning_rate": 4.57572846621838e-05, + "loss": 2.7241, + "step": 133500 + }, + { + "epoch": 0.09, + "learning_rate": 4.575408261287224e-05, + "loss": 2.7353, + "step": 133600 + }, + { + "epoch": 0.09, + "learning_rate": 4.575088056356068e-05, + "loss": 2.7326, + "step": 133700 + }, + { + "epoch": 0.09, + "learning_rate": 4.574767851424912e-05, + "loss": 2.7297, + "step": 133800 + }, + { + "epoch": 0.09, + "learning_rate": 4.574447646493756e-05, + "loss": 2.7046, + "step": 133900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5741274415626e-05, + "loss": 2.74, + "step": 134000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5623319149017334, + "eval_runtime": 177.8784, + "eval_samples_per_second": 56.218, + "eval_steps_per_second": 3.514, + "step": 134000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5738072366314447e-05, + "loss": 2.7165, + "step": 134100 + }, + { + "epoch": 0.09, + "learning_rate": 4.573487031700288e-05, + "loss": 2.733, + "step": 134200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5731668267691326e-05, + "loss": 2.7403, + "step": 134300 + }, + { + "epoch": 0.09, + "learning_rate": 4.5728466218379766e-05, + "loss": 2.7304, + "step": 134400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5725264169068206e-05, + "loss": 2.7204, + "step": 134500 + }, + { + "epoch": 0.09, + "learning_rate": 4.5722062119756645e-05, + "loss": 2.7082, + "step": 134600 + }, + { + "epoch": 0.09, + "learning_rate": 4.5718860070445085e-05, + "loss": 2.7246, + "step": 134700 + }, + { + "epoch": 0.09, + "learning_rate": 4.571565802113353e-05, + "loss": 2.7432, + "step": 134800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5712455971821964e-05, + "loss": 2.727, + "step": 134900 + }, + { + "epoch": 0.09, + "learning_rate": 4.570925392251041e-05, + "loss": 2.7105, + "step": 135000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5619659423828125, + "eval_runtime": 175.7613, + "eval_samples_per_second": 56.895, + "eval_steps_per_second": 3.556, + "step": 135000 + }, + { + "epoch": 0.09, + "learning_rate": 4.570605187319885e-05, + "loss": 2.7047, + "step": 135100 + }, + { + "epoch": 0.09, + "learning_rate": 4.570284982388729e-05, + "loss": 2.7222, + "step": 135200 + }, + { + "epoch": 0.09, + "learning_rate": 4.569964777457573e-05, + "loss": 2.7088, + "step": 135300 + }, + { + "epoch": 0.09, + "learning_rate": 4.569644572526417e-05, + "loss": 2.7243, + "step": 135400 + }, + { + "epoch": 0.09, + "learning_rate": 4.569324367595261e-05, + "loss": 2.7281, + "step": 135500 + }, + { + "epoch": 0.09, + "learning_rate": 4.569004162664105e-05, + "loss": 2.7179, + "step": 135600 + }, + { + "epoch": 0.09, + "learning_rate": 4.5686839577329496e-05, + "loss": 2.7315, + "step": 135700 + }, + { + "epoch": 0.09, + "learning_rate": 4.5683637528017936e-05, + "loss": 2.7263, + "step": 135800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5680435478706375e-05, + "loss": 2.7381, + "step": 135900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5677233429394815e-05, + "loss": 2.7224, + "step": 136000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5620172023773193, + "eval_runtime": 177.9081, + "eval_samples_per_second": 56.209, + "eval_steps_per_second": 3.513, + "step": 136000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5674031380083255e-05, + "loss": 2.72, + "step": 136100 + }, + { + "epoch": 0.09, + "learning_rate": 4.5670829330771695e-05, + "loss": 2.7203, + "step": 136200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5667627281460134e-05, + "loss": 2.7109, + "step": 136300 + }, + { + "epoch": 0.09, + "learning_rate": 4.566442523214858e-05, + "loss": 2.7284, + "step": 136400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5661223182837014e-05, + "loss": 2.7323, + "step": 136500 + }, + { + "epoch": 0.09, + "learning_rate": 4.565802113352546e-05, + "loss": 2.7328, + "step": 136600 + }, + { + "epoch": 0.09, + "learning_rate": 4.56548190842139e-05, + "loss": 2.7508, + "step": 136700 + }, + { + "epoch": 0.09, + "learning_rate": 4.565161703490234e-05, + "loss": 2.7357, + "step": 136800 + }, + { + "epoch": 0.09, + "learning_rate": 4.564841498559078e-05, + "loss": 2.7068, + "step": 136900 + }, + { + "epoch": 0.09, + "learning_rate": 4.564521293627922e-05, + "loss": 2.6924, + "step": 137000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5612192153930664, + "eval_runtime": 176.2584, + "eval_samples_per_second": 56.735, + "eval_steps_per_second": 3.546, + "step": 137000 + }, + { + "epoch": 0.09, + "learning_rate": 4.564201088696766e-05, + "loss": 2.7036, + "step": 137100 + }, + { + "epoch": 0.09, + "learning_rate": 4.56388088376561e-05, + "loss": 2.7022, + "step": 137200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5635606788344545e-05, + "loss": 2.7178, + "step": 137300 + }, + { + "epoch": 0.09, + "learning_rate": 4.5632404739032985e-05, + "loss": 2.7343, + "step": 137400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5629202689721425e-05, + "loss": 2.7193, + "step": 137500 + }, + { + "epoch": 0.09, + "learning_rate": 4.5626000640409865e-05, + "loss": 2.7133, + "step": 137600 + }, + { + "epoch": 0.09, + "learning_rate": 4.5622798591098304e-05, + "loss": 2.7309, + "step": 137700 + }, + { + "epoch": 0.09, + "learning_rate": 4.5619596541786744e-05, + "loss": 2.7199, + "step": 137800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5616394492475184e-05, + "loss": 2.7136, + "step": 137900 + }, + { + "epoch": 0.09, + "learning_rate": 4.561319244316363e-05, + "loss": 2.7224, + "step": 138000 + }, + { + "epoch": 0.09, + "eval_loss": 2.562304973602295, + "eval_runtime": 177.3201, + "eval_samples_per_second": 56.395, + "eval_steps_per_second": 3.525, + "step": 138000 + }, + { + "epoch": 0.09, + "learning_rate": 4.560999039385207e-05, + "loss": 2.7246, + "step": 138100 + }, + { + "epoch": 0.09, + "learning_rate": 4.560678834454051e-05, + "loss": 2.7087, + "step": 138200 + }, + { + "epoch": 0.09, + "learning_rate": 4.560358629522895e-05, + "loss": 2.7094, + "step": 138300 + }, + { + "epoch": 0.09, + "learning_rate": 4.560038424591739e-05, + "loss": 2.7054, + "step": 138400 + }, + { + "epoch": 0.09, + "learning_rate": 4.559718219660583e-05, + "loss": 2.7108, + "step": 138500 + }, + { + "epoch": 0.09, + "learning_rate": 4.559398014729427e-05, + "loss": 2.7208, + "step": 138600 + }, + { + "epoch": 0.09, + "learning_rate": 4.559077809798271e-05, + "loss": 2.7004, + "step": 138700 + }, + { + "epoch": 0.09, + "learning_rate": 4.558757604867115e-05, + "loss": 2.7181, + "step": 138800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5584373999359595e-05, + "loss": 2.7302, + "step": 138900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5581171950048034e-05, + "loss": 2.7118, + "step": 139000 + }, + { + "epoch": 0.09, + "eval_loss": 2.559722661972046, + "eval_runtime": 175.7826, + "eval_samples_per_second": 56.888, + "eval_steps_per_second": 3.556, + "step": 139000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5577969900736474e-05, + "loss": 2.7417, + "step": 139100 + }, + { + "epoch": 0.09, + "learning_rate": 4.5574767851424914e-05, + "loss": 2.7227, + "step": 139200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5571565802113354e-05, + "loss": 2.7247, + "step": 139300 + }, + { + "epoch": 0.09, + "learning_rate": 4.556836375280179e-05, + "loss": 2.7179, + "step": 139400 + }, + { + "epoch": 0.09, + "learning_rate": 4.556516170349023e-05, + "loss": 2.7423, + "step": 139500 + }, + { + "epoch": 0.09, + "learning_rate": 4.556195965417868e-05, + "loss": 2.7376, + "step": 139600 + }, + { + "epoch": 0.09, + "learning_rate": 4.555875760486712e-05, + "loss": 2.7304, + "step": 139700 + }, + { + "epoch": 0.09, + "learning_rate": 4.555555555555556e-05, + "loss": 2.6881, + "step": 139800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5552353506244e-05, + "loss": 2.7232, + "step": 139900 + }, + { + "epoch": 0.09, + "learning_rate": 4.554915145693244e-05, + "loss": 2.7231, + "step": 140000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5592551231384277, + "eval_runtime": 176.3317, + "eval_samples_per_second": 56.711, + "eval_steps_per_second": 3.544, + "step": 140000 + }, + { + "epoch": 0.09, + "learning_rate": 4.554594940762088e-05, + "loss": 2.7048, + "step": 140100 + }, + { + "epoch": 0.09, + "learning_rate": 4.554274735830932e-05, + "loss": 2.7048, + "step": 140200 + }, + { + "epoch": 0.09, + "learning_rate": 4.553954530899776e-05, + "loss": 2.7241, + "step": 140300 + }, + { + "epoch": 0.09, + "learning_rate": 4.5536343259686204e-05, + "loss": 2.7153, + "step": 140400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5533141210374644e-05, + "loss": 2.7301, + "step": 140500 + }, + { + "epoch": 0.09, + "learning_rate": 4.5529939161063084e-05, + "loss": 2.7249, + "step": 140600 + }, + { + "epoch": 0.09, + "learning_rate": 4.5526737111751524e-05, + "loss": 2.7163, + "step": 140700 + }, + { + "epoch": 0.09, + "learning_rate": 4.552353506243996e-05, + "loss": 2.722, + "step": 140800 + }, + { + "epoch": 0.09, + "learning_rate": 4.55203330131284e-05, + "loss": 2.7067, + "step": 140900 + }, + { + "epoch": 0.09, + "learning_rate": 4.551713096381684e-05, + "loss": 2.7196, + "step": 141000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5608913898468018, + "eval_runtime": 191.9454, + "eval_samples_per_second": 52.098, + "eval_steps_per_second": 3.256, + "step": 141000 + }, + { + "epoch": 0.09, + "learning_rate": 4.551392891450529e-05, + "loss": 2.7199, + "step": 141100 + }, + { + "epoch": 0.09, + "learning_rate": 4.551072686519373e-05, + "loss": 2.7258, + "step": 141200 + }, + { + "epoch": 0.09, + "learning_rate": 4.550752481588217e-05, + "loss": 2.718, + "step": 141300 + }, + { + "epoch": 0.09, + "learning_rate": 4.550432276657061e-05, + "loss": 2.7175, + "step": 141400 + }, + { + "epoch": 0.09, + "learning_rate": 4.550112071725905e-05, + "loss": 2.7386, + "step": 141500 + }, + { + "epoch": 0.09, + "learning_rate": 4.549791866794749e-05, + "loss": 2.724, + "step": 141600 + }, + { + "epoch": 0.09, + "learning_rate": 4.549471661863593e-05, + "loss": 2.6896, + "step": 141700 + }, + { + "epoch": 0.09, + "learning_rate": 4.549151456932437e-05, + "loss": 2.715, + "step": 141800 + }, + { + "epoch": 0.09, + "learning_rate": 4.548831252001281e-05, + "loss": 2.6942, + "step": 141900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5485110470701254e-05, + "loss": 2.7205, + "step": 142000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5587964057922363, + "eval_runtime": 195.6699, + "eval_samples_per_second": 51.106, + "eval_steps_per_second": 3.194, + "step": 142000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5481908421389693e-05, + "loss": 2.7107, + "step": 142100 + }, + { + "epoch": 0.09, + "learning_rate": 4.547870637207813e-05, + "loss": 2.7236, + "step": 142200 + }, + { + "epoch": 0.09, + "learning_rate": 4.547550432276657e-05, + "loss": 2.7218, + "step": 142300 + }, + { + "epoch": 0.09, + "learning_rate": 4.547230227345501e-05, + "loss": 2.7444, + "step": 142400 + }, + { + "epoch": 0.09, + "learning_rate": 4.546910022414345e-05, + "loss": 2.7069, + "step": 142500 + }, + { + "epoch": 0.09, + "learning_rate": 4.546589817483189e-05, + "loss": 2.7037, + "step": 142600 + }, + { + "epoch": 0.09, + "learning_rate": 4.546269612552034e-05, + "loss": 2.7266, + "step": 142700 + }, + { + "epoch": 0.09, + "learning_rate": 4.545949407620878e-05, + "loss": 2.7243, + "step": 142800 + }, + { + "epoch": 0.09, + "learning_rate": 4.545629202689722e-05, + "loss": 2.726, + "step": 142900 + }, + { + "epoch": 0.09, + "learning_rate": 4.545308997758566e-05, + "loss": 2.722, + "step": 143000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5569887161254883, + "eval_runtime": 195.9697, + "eval_samples_per_second": 51.028, + "eval_steps_per_second": 3.189, + "step": 143000 + }, + { + "epoch": 0.09, + "learning_rate": 4.54498879282741e-05, + "loss": 2.7214, + "step": 143100 + }, + { + "epoch": 0.09, + "learning_rate": 4.544668587896254e-05, + "loss": 2.7294, + "step": 143200 + }, + { + "epoch": 0.09, + "learning_rate": 4.544348382965098e-05, + "loss": 2.7132, + "step": 143300 + }, + { + "epoch": 0.09, + "learning_rate": 4.5440281780339424e-05, + "loss": 2.7248, + "step": 143400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5437079731027857e-05, + "loss": 2.6972, + "step": 143500 + }, + { + "epoch": 0.09, + "learning_rate": 4.54338776817163e-05, + "loss": 2.7372, + "step": 143600 + }, + { + "epoch": 0.09, + "learning_rate": 4.543067563240474e-05, + "loss": 2.7202, + "step": 143700 + }, + { + "epoch": 0.09, + "learning_rate": 4.542747358309318e-05, + "loss": 2.7169, + "step": 143800 + }, + { + "epoch": 0.09, + "learning_rate": 4.542427153378162e-05, + "loss": 2.7168, + "step": 143900 + }, + { + "epoch": 0.09, + "learning_rate": 4.542106948447006e-05, + "loss": 2.7124, + "step": 144000 + }, + { + "epoch": 0.09, + "eval_loss": 2.557919979095459, + "eval_runtime": 196.9517, + "eval_samples_per_second": 50.774, + "eval_steps_per_second": 3.173, + "step": 144000 + }, + { + "epoch": 0.09, + "learning_rate": 4.54178674351585e-05, + "loss": 2.6882, + "step": 144100 + }, + { + "epoch": 0.09, + "learning_rate": 4.541466538584694e-05, + "loss": 2.7108, + "step": 144200 + }, + { + "epoch": 0.09, + "learning_rate": 4.541146333653539e-05, + "loss": 2.7141, + "step": 144300 + }, + { + "epoch": 0.09, + "learning_rate": 4.540826128722383e-05, + "loss": 2.6942, + "step": 144400 + }, + { + "epoch": 0.09, + "learning_rate": 4.540505923791227e-05, + "loss": 2.7091, + "step": 144500 + }, + { + "epoch": 0.09, + "learning_rate": 4.540185718860071e-05, + "loss": 2.7041, + "step": 144600 + }, + { + "epoch": 0.09, + "learning_rate": 4.539865513928915e-05, + "loss": 2.7005, + "step": 144700 + }, + { + "epoch": 0.09, + "learning_rate": 4.539545308997759e-05, + "loss": 2.6966, + "step": 144800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5392251040666026e-05, + "loss": 2.7228, + "step": 144900 + }, + { + "epoch": 0.09, + "learning_rate": 4.538904899135447e-05, + "loss": 2.7238, + "step": 145000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5575151443481445, + "eval_runtime": 192.2068, + "eval_samples_per_second": 52.027, + "eval_steps_per_second": 3.252, + "step": 145000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5385846942042906e-05, + "loss": 2.7109, + "step": 145100 + }, + { + "epoch": 0.09, + "learning_rate": 4.538264489273135e-05, + "loss": 2.7055, + "step": 145200 + }, + { + "epoch": 0.09, + "learning_rate": 4.537944284341979e-05, + "loss": 2.7007, + "step": 145300 + }, + { + "epoch": 0.09, + "learning_rate": 4.537624079410823e-05, + "loss": 2.7377, + "step": 145400 + }, + { + "epoch": 0.09, + "learning_rate": 4.537303874479667e-05, + "loss": 2.6945, + "step": 145500 + }, + { + "epoch": 0.09, + "learning_rate": 4.536983669548511e-05, + "loss": 2.7092, + "step": 145600 + }, + { + "epoch": 0.09, + "learning_rate": 4.536663464617356e-05, + "loss": 2.7244, + "step": 145700 + }, + { + "epoch": 0.09, + "learning_rate": 4.536343259686199e-05, + "loss": 2.7308, + "step": 145800 + }, + { + "epoch": 0.09, + "learning_rate": 4.536023054755044e-05, + "loss": 2.6933, + "step": 145900 + }, + { + "epoch": 0.09, + "learning_rate": 4.535702849823888e-05, + "loss": 2.7148, + "step": 146000 + }, + { + "epoch": 0.09, + "eval_loss": 2.555891752243042, + "eval_runtime": 176.8889, + "eval_samples_per_second": 56.533, + "eval_steps_per_second": 3.533, + "step": 146000 + }, + { + "epoch": 0.09, + "learning_rate": 4.535382644892732e-05, + "loss": 2.7073, + "step": 146100 + }, + { + "epoch": 0.09, + "learning_rate": 4.5350624399615757e-05, + "loss": 2.7099, + "step": 146200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5347422350304196e-05, + "loss": 2.7376, + "step": 146300 + }, + { + "epoch": 0.09, + "learning_rate": 4.5344220300992636e-05, + "loss": 2.722, + "step": 146400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5341018251681076e-05, + "loss": 2.7329, + "step": 146500 + }, + { + "epoch": 0.09, + "learning_rate": 4.533781620236952e-05, + "loss": 2.7116, + "step": 146600 + }, + { + "epoch": 0.09, + "learning_rate": 4.5334614153057955e-05, + "loss": 2.7152, + "step": 146700 + }, + { + "epoch": 0.09, + "learning_rate": 4.53314121037464e-05, + "loss": 2.7132, + "step": 146800 + }, + { + "epoch": 0.09, + "learning_rate": 4.532821005443484e-05, + "loss": 2.7152, + "step": 146900 + }, + { + "epoch": 0.09, + "learning_rate": 4.532500800512328e-05, + "loss": 2.7224, + "step": 147000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5564117431640625, + "eval_runtime": 176.4467, + "eval_samples_per_second": 56.674, + "eval_steps_per_second": 3.542, + "step": 147000 + }, + { + "epoch": 0.09, + "learning_rate": 4.532180595581172e-05, + "loss": 2.7205, + "step": 147100 + }, + { + "epoch": 0.09, + "learning_rate": 4.531860390650016e-05, + "loss": 2.6828, + "step": 147200 + }, + { + "epoch": 0.09, + "learning_rate": 4.531540185718861e-05, + "loss": 2.7089, + "step": 147300 + }, + { + "epoch": 0.09, + "learning_rate": 4.531219980787704e-05, + "loss": 2.7076, + "step": 147400 + }, + { + "epoch": 0.09, + "learning_rate": 4.530899775856549e-05, + "loss": 2.7319, + "step": 147500 + }, + { + "epoch": 0.09, + "learning_rate": 4.5305795709253926e-05, + "loss": 2.7201, + "step": 147600 + }, + { + "epoch": 0.09, + "learning_rate": 4.5302593659942366e-05, + "loss": 2.7186, + "step": 147700 + }, + { + "epoch": 0.09, + "learning_rate": 4.5299391610630806e-05, + "loss": 2.7015, + "step": 147800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5296189561319246e-05, + "loss": 2.693, + "step": 147900 + }, + { + "epoch": 0.09, + "learning_rate": 4.529298751200769e-05, + "loss": 2.7169, + "step": 148000 + }, + { + "epoch": 0.09, + "eval_loss": 2.5550851821899414, + "eval_runtime": 177.3833, + "eval_samples_per_second": 56.375, + "eval_steps_per_second": 3.523, + "step": 148000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5289785462696125e-05, + "loss": 2.7141, + "step": 148100 + }, + { + "epoch": 0.09, + "learning_rate": 4.528658341338457e-05, + "loss": 2.7082, + "step": 148200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5283381364073005e-05, + "loss": 2.6801, + "step": 148300 + }, + { + "epoch": 0.09, + "learning_rate": 4.528017931476145e-05, + "loss": 2.7095, + "step": 148400 + }, + { + "epoch": 0.1, + "learning_rate": 4.527697726544989e-05, + "loss": 2.7016, + "step": 148500 + }, + { + "epoch": 0.1, + "learning_rate": 4.527377521613833e-05, + "loss": 2.7217, + "step": 148600 + }, + { + "epoch": 0.1, + "learning_rate": 4.527057316682678e-05, + "loss": 2.6865, + "step": 148700 + }, + { + "epoch": 0.1, + "learning_rate": 4.526737111751521e-05, + "loss": 2.7088, + "step": 148800 + }, + { + "epoch": 0.1, + "learning_rate": 4.526416906820366e-05, + "loss": 2.6978, + "step": 148900 + }, + { + "epoch": 0.1, + "learning_rate": 4.526096701889209e-05, + "loss": 2.7199, + "step": 149000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5551364421844482, + "eval_runtime": 176.4025, + "eval_samples_per_second": 56.689, + "eval_steps_per_second": 3.543, + "step": 149000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5257764969580536e-05, + "loss": 2.7284, + "step": 149100 + }, + { + "epoch": 0.1, + "learning_rate": 4.5254562920268976e-05, + "loss": 2.7104, + "step": 149200 + }, + { + "epoch": 0.1, + "learning_rate": 4.5251360870957416e-05, + "loss": 2.6976, + "step": 149300 + }, + { + "epoch": 0.1, + "learning_rate": 4.5248158821645855e-05, + "loss": 2.6883, + "step": 149400 + }, + { + "epoch": 0.1, + "learning_rate": 4.5244956772334295e-05, + "loss": 2.7103, + "step": 149500 + }, + { + "epoch": 0.1, + "learning_rate": 4.524175472302274e-05, + "loss": 2.6983, + "step": 149600 + }, + { + "epoch": 0.1, + "learning_rate": 4.5238552673711175e-05, + "loss": 2.7162, + "step": 149700 + }, + { + "epoch": 0.1, + "learning_rate": 4.523535062439962e-05, + "loss": 2.699, + "step": 149800 + }, + { + "epoch": 0.1, + "learning_rate": 4.5232148575088054e-05, + "loss": 2.7355, + "step": 149900 + }, + { + "epoch": 0.1, + "learning_rate": 4.52289465257765e-05, + "loss": 2.696, + "step": 150000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5558013916015625, + "eval_runtime": 176.7584, + "eval_samples_per_second": 56.574, + "eval_steps_per_second": 3.536, + "step": 150000 + }, + { + "epoch": 0.1, + "learning_rate": 4.522574447646494e-05, + "loss": 2.7101, + "step": 150100 + }, + { + "epoch": 0.1, + "learning_rate": 4.522254242715338e-05, + "loss": 2.6998, + "step": 150200 + }, + { + "epoch": 0.1, + "learning_rate": 4.5219340377841827e-05, + "loss": 2.7, + "step": 150300 + }, + { + "epoch": 0.1, + "learning_rate": 4.521613832853026e-05, + "loss": 2.6892, + "step": 150400 + }, + { + "epoch": 0.1, + "learning_rate": 4.5212936279218706e-05, + "loss": 2.7206, + "step": 150500 + }, + { + "epoch": 0.1, + "learning_rate": 4.520973422990714e-05, + "loss": 2.7221, + "step": 150600 + }, + { + "epoch": 0.1, + "learning_rate": 4.5206532180595585e-05, + "loss": 2.6952, + "step": 150700 + }, + { + "epoch": 0.1, + "learning_rate": 4.5203330131284025e-05, + "loss": 2.7009, + "step": 150800 + }, + { + "epoch": 0.1, + "learning_rate": 4.5200128081972465e-05, + "loss": 2.7042, + "step": 150900 + }, + { + "epoch": 0.1, + "learning_rate": 4.5196926032660905e-05, + "loss": 2.7094, + "step": 151000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5532150268554688, + "eval_runtime": 176.648, + "eval_samples_per_second": 56.61, + "eval_steps_per_second": 3.538, + "step": 151000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5193723983349344e-05, + "loss": 2.6896, + "step": 151100 + }, + { + "epoch": 0.1, + "learning_rate": 4.519052193403779e-05, + "loss": 2.7063, + "step": 151200 + }, + { + "epoch": 0.1, + "learning_rate": 4.5187319884726224e-05, + "loss": 2.728, + "step": 151300 + }, + { + "epoch": 0.1, + "learning_rate": 4.518411783541467e-05, + "loss": 2.6979, + "step": 151400 + }, + { + "epoch": 0.1, + "learning_rate": 4.51809157861031e-05, + "loss": 2.7126, + "step": 151500 + }, + { + "epoch": 0.1, + "learning_rate": 4.517771373679155e-05, + "loss": 2.7166, + "step": 151600 + }, + { + "epoch": 0.1, + "learning_rate": 4.517451168747999e-05, + "loss": 2.6942, + "step": 151700 + }, + { + "epoch": 0.1, + "learning_rate": 4.517130963816843e-05, + "loss": 2.6964, + "step": 151800 + }, + { + "epoch": 0.1, + "learning_rate": 4.5168107588856876e-05, + "loss": 2.709, + "step": 151900 + }, + { + "epoch": 0.1, + "learning_rate": 4.516490553954531e-05, + "loss": 2.7005, + "step": 152000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5514307022094727, + "eval_runtime": 176.7201, + "eval_samples_per_second": 56.587, + "eval_steps_per_second": 3.537, + "step": 152000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5161703490233755e-05, + "loss": 2.7171, + "step": 152100 + }, + { + "epoch": 0.1, + "learning_rate": 4.515850144092219e-05, + "loss": 2.6838, + "step": 152200 + }, + { + "epoch": 0.1, + "learning_rate": 4.5155299391610635e-05, + "loss": 2.7012, + "step": 152300 + }, + { + "epoch": 0.1, + "learning_rate": 4.515209734229907e-05, + "loss": 2.6938, + "step": 152400 + }, + { + "epoch": 0.1, + "learning_rate": 4.5148895292987514e-05, + "loss": 2.7044, + "step": 152500 + }, + { + "epoch": 0.1, + "learning_rate": 4.5145693243675954e-05, + "loss": 2.6927, + "step": 152600 + }, + { + "epoch": 0.1, + "learning_rate": 4.5142491194364394e-05, + "loss": 2.6975, + "step": 152700 + }, + { + "epoch": 0.1, + "learning_rate": 4.513928914505284e-05, + "loss": 2.7127, + "step": 152800 + }, + { + "epoch": 0.1, + "learning_rate": 4.513608709574127e-05, + "loss": 2.7062, + "step": 152900 + }, + { + "epoch": 0.1, + "learning_rate": 4.513288504642972e-05, + "loss": 2.6974, + "step": 153000 + }, + { + "epoch": 0.1, + "eval_loss": 2.552427053451538, + "eval_runtime": 176.0172, + "eval_samples_per_second": 56.813, + "eval_steps_per_second": 3.551, + "step": 153000 + }, + { + "epoch": 0.1, + "learning_rate": 4.512968299711815e-05, + "loss": 2.7045, + "step": 153100 + }, + { + "epoch": 0.1, + "learning_rate": 4.51264809478066e-05, + "loss": 2.7105, + "step": 153200 + }, + { + "epoch": 0.1, + "learning_rate": 4.512327889849504e-05, + "loss": 2.7038, + "step": 153300 + }, + { + "epoch": 0.1, + "learning_rate": 4.512007684918348e-05, + "loss": 2.7134, + "step": 153400 + }, + { + "epoch": 0.1, + "learning_rate": 4.5116874799871925e-05, + "loss": 2.7209, + "step": 153500 + }, + { + "epoch": 0.1, + "learning_rate": 4.511367275056036e-05, + "loss": 2.69, + "step": 153600 + }, + { + "epoch": 0.1, + "learning_rate": 4.5110470701248805e-05, + "loss": 2.7025, + "step": 153700 + }, + { + "epoch": 0.1, + "learning_rate": 4.510726865193724e-05, + "loss": 2.6883, + "step": 153800 + }, + { + "epoch": 0.1, + "learning_rate": 4.5104066602625684e-05, + "loss": 2.7054, + "step": 153900 + }, + { + "epoch": 0.1, + "learning_rate": 4.510086455331412e-05, + "loss": 2.7031, + "step": 154000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5516436100006104, + "eval_runtime": 175.5362, + "eval_samples_per_second": 56.968, + "eval_steps_per_second": 3.561, + "step": 154000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5097662504002564e-05, + "loss": 2.7158, + "step": 154100 + }, + { + "epoch": 0.1, + "learning_rate": 4.5094460454691003e-05, + "loss": 2.7083, + "step": 154200 + }, + { + "epoch": 0.1, + "learning_rate": 4.509125840537944e-05, + "loss": 2.6873, + "step": 154300 + }, + { + "epoch": 0.1, + "learning_rate": 4.508805635606789e-05, + "loss": 2.7017, + "step": 154400 + }, + { + "epoch": 0.1, + "learning_rate": 4.508485430675632e-05, + "loss": 2.7127, + "step": 154500 + }, + { + "epoch": 0.1, + "learning_rate": 4.508165225744477e-05, + "loss": 2.6783, + "step": 154600 + }, + { + "epoch": 0.1, + "learning_rate": 4.50784502081332e-05, + "loss": 2.698, + "step": 154700 + }, + { + "epoch": 0.1, + "learning_rate": 4.507524815882165e-05, + "loss": 2.6893, + "step": 154800 + }, + { + "epoch": 0.1, + "learning_rate": 4.507204610951009e-05, + "loss": 2.7094, + "step": 154900 + }, + { + "epoch": 0.1, + "learning_rate": 4.506884406019853e-05, + "loss": 2.6675, + "step": 155000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5507030487060547, + "eval_runtime": 176.7491, + "eval_samples_per_second": 56.577, + "eval_steps_per_second": 3.536, + "step": 155000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5065642010886975e-05, + "loss": 2.7306, + "step": 155100 + }, + { + "epoch": 0.1, + "learning_rate": 4.506243996157541e-05, + "loss": 2.7059, + "step": 155200 + }, + { + "epoch": 0.1, + "learning_rate": 4.5059237912263854e-05, + "loss": 2.7102, + "step": 155300 + }, + { + "epoch": 0.1, + "learning_rate": 4.505603586295229e-05, + "loss": 2.6915, + "step": 155400 + }, + { + "epoch": 0.1, + "learning_rate": 4.5052833813640734e-05, + "loss": 2.7161, + "step": 155500 + }, + { + "epoch": 0.1, + "learning_rate": 4.504963176432917e-05, + "loss": 2.6955, + "step": 155600 + }, + { + "epoch": 0.1, + "learning_rate": 4.504642971501761e-05, + "loss": 2.7184, + "step": 155700 + }, + { + "epoch": 0.1, + "learning_rate": 4.504322766570605e-05, + "loss": 2.6794, + "step": 155800 + }, + { + "epoch": 0.1, + "learning_rate": 4.504002561639449e-05, + "loss": 2.6808, + "step": 155900 + }, + { + "epoch": 0.1, + "learning_rate": 4.503682356708294e-05, + "loss": 2.691, + "step": 156000 + }, + { + "epoch": 0.1, + "eval_loss": 2.550930976867676, + "eval_runtime": 176.1204, + "eval_samples_per_second": 56.779, + "eval_steps_per_second": 3.549, + "step": 156000 + }, + { + "epoch": 0.1, + "learning_rate": 4.503362151777137e-05, + "loss": 2.7063, + "step": 156100 + }, + { + "epoch": 0.1, + "learning_rate": 4.503041946845982e-05, + "loss": 2.7128, + "step": 156200 + }, + { + "epoch": 0.1, + "learning_rate": 4.502721741914825e-05, + "loss": 2.7084, + "step": 156300 + }, + { + "epoch": 0.1, + "learning_rate": 4.50240153698367e-05, + "loss": 2.6989, + "step": 156400 + }, + { + "epoch": 0.1, + "learning_rate": 4.502081332052514e-05, + "loss": 2.6856, + "step": 156500 + }, + { + "epoch": 0.1, + "learning_rate": 4.501761127121358e-05, + "loss": 2.7051, + "step": 156600 + }, + { + "epoch": 0.1, + "learning_rate": 4.5014409221902024e-05, + "loss": 2.6999, + "step": 156700 + }, + { + "epoch": 0.1, + "learning_rate": 4.501120717259046e-05, + "loss": 2.6964, + "step": 156800 + }, + { + "epoch": 0.1, + "learning_rate": 4.5008005123278903e-05, + "loss": 2.6841, + "step": 156900 + }, + { + "epoch": 0.1, + "learning_rate": 4.5004803073967336e-05, + "loss": 2.7058, + "step": 157000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5504448413848877, + "eval_runtime": 176.3728, + "eval_samples_per_second": 56.698, + "eval_steps_per_second": 3.544, + "step": 157000 + }, + { + "epoch": 0.1, + "learning_rate": 4.500160102465578e-05, + "loss": 2.6789, + "step": 157100 + }, + { + "epoch": 0.1, + "learning_rate": 4.499839897534422e-05, + "loss": 2.6787, + "step": 157200 + }, + { + "epoch": 0.1, + "learning_rate": 4.499519692603266e-05, + "loss": 2.6961, + "step": 157300 + }, + { + "epoch": 0.1, + "learning_rate": 4.49919948767211e-05, + "loss": 2.6976, + "step": 157400 + }, + { + "epoch": 0.1, + "learning_rate": 4.498879282740954e-05, + "loss": 2.7085, + "step": 157500 + }, + { + "epoch": 0.1, + "learning_rate": 4.498559077809799e-05, + "loss": 2.6969, + "step": 157600 + }, + { + "epoch": 0.1, + "learning_rate": 4.498238872878642e-05, + "loss": 2.6797, + "step": 157700 + }, + { + "epoch": 0.1, + "learning_rate": 4.497918667947487e-05, + "loss": 2.694, + "step": 157800 + }, + { + "epoch": 0.1, + "learning_rate": 4.497598463016331e-05, + "loss": 2.6965, + "step": 157900 + }, + { + "epoch": 0.1, + "learning_rate": 4.497278258085175e-05, + "loss": 2.6927, + "step": 158000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5523300170898438, + "eval_runtime": 176.4117, + "eval_samples_per_second": 56.686, + "eval_steps_per_second": 3.543, + "step": 158000 + }, + { + "epoch": 0.1, + "learning_rate": 4.496958053154019e-05, + "loss": 2.6993, + "step": 158100 + }, + { + "epoch": 0.1, + "learning_rate": 4.496637848222863e-05, + "loss": 2.6749, + "step": 158200 + }, + { + "epoch": 0.1, + "learning_rate": 4.496317643291707e-05, + "loss": 2.6937, + "step": 158300 + }, + { + "epoch": 0.1, + "learning_rate": 4.4959974383605506e-05, + "loss": 2.6949, + "step": 158400 + }, + { + "epoch": 0.1, + "learning_rate": 4.495677233429395e-05, + "loss": 2.689, + "step": 158500 + }, + { + "epoch": 0.1, + "learning_rate": 4.495357028498239e-05, + "loss": 2.6996, + "step": 158600 + }, + { + "epoch": 0.1, + "learning_rate": 4.495036823567083e-05, + "loss": 2.6969, + "step": 158700 + }, + { + "epoch": 0.1, + "learning_rate": 4.494716618635927e-05, + "loss": 2.7083, + "step": 158800 + }, + { + "epoch": 0.1, + "learning_rate": 4.494396413704771e-05, + "loss": 2.7004, + "step": 158900 + }, + { + "epoch": 0.1, + "learning_rate": 4.494076208773615e-05, + "loss": 2.705, + "step": 159000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5506510734558105, + "eval_runtime": 176.2949, + "eval_samples_per_second": 56.723, + "eval_steps_per_second": 3.545, + "step": 159000 + }, + { + "epoch": 0.1, + "learning_rate": 4.493756003842459e-05, + "loss": 2.6878, + "step": 159100 + }, + { + "epoch": 0.1, + "learning_rate": 4.493435798911304e-05, + "loss": 2.7245, + "step": 159200 + }, + { + "epoch": 0.1, + "learning_rate": 4.493115593980147e-05, + "loss": 2.7018, + "step": 159300 + }, + { + "epoch": 0.1, + "learning_rate": 4.492795389048992e-05, + "loss": 2.6853, + "step": 159400 + }, + { + "epoch": 0.1, + "learning_rate": 4.492475184117836e-05, + "loss": 2.6791, + "step": 159500 + }, + { + "epoch": 0.1, + "learning_rate": 4.49215497918668e-05, + "loss": 2.6969, + "step": 159600 + }, + { + "epoch": 0.1, + "learning_rate": 4.4918347742555236e-05, + "loss": 2.6739, + "step": 159700 + }, + { + "epoch": 0.1, + "learning_rate": 4.4915145693243676e-05, + "loss": 2.6825, + "step": 159800 + }, + { + "epoch": 0.1, + "learning_rate": 4.491194364393212e-05, + "loss": 2.7052, + "step": 159900 + }, + { + "epoch": 0.1, + "learning_rate": 4.4908741594620556e-05, + "loss": 2.6776, + "step": 160000 + }, + { + "epoch": 0.1, + "eval_loss": 2.548813581466675, + "eval_runtime": 176.2094, + "eval_samples_per_second": 56.751, + "eval_steps_per_second": 3.547, + "step": 160000 + }, + { + "epoch": 0.1, + "learning_rate": 4.4905539545309e-05, + "loss": 2.6847, + "step": 160100 + }, + { + "epoch": 0.1, + "learning_rate": 4.490233749599744e-05, + "loss": 2.6888, + "step": 160200 + }, + { + "epoch": 0.1, + "learning_rate": 4.489913544668588e-05, + "loss": 2.6876, + "step": 160300 + }, + { + "epoch": 0.1, + "learning_rate": 4.489593339737432e-05, + "loss": 2.6889, + "step": 160400 + }, + { + "epoch": 0.1, + "learning_rate": 4.489273134806276e-05, + "loss": 2.6921, + "step": 160500 + }, + { + "epoch": 0.1, + "learning_rate": 4.48895292987512e-05, + "loss": 2.706, + "step": 160600 + }, + { + "epoch": 0.1, + "learning_rate": 4.488632724943964e-05, + "loss": 2.7059, + "step": 160700 + }, + { + "epoch": 0.1, + "learning_rate": 4.488312520012809e-05, + "loss": 2.7196, + "step": 160800 + }, + { + "epoch": 0.1, + "learning_rate": 4.487992315081653e-05, + "loss": 2.723, + "step": 160900 + }, + { + "epoch": 0.1, + "learning_rate": 4.487672110150497e-05, + "loss": 2.6698, + "step": 161000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5499532222747803, + "eval_runtime": 177.0923, + "eval_samples_per_second": 56.468, + "eval_steps_per_second": 3.529, + "step": 161000 + }, + { + "epoch": 0.1, + "learning_rate": 4.4873519052193406e-05, + "loss": 2.6885, + "step": 161100 + }, + { + "epoch": 0.1, + "learning_rate": 4.4870317002881846e-05, + "loss": 2.6578, + "step": 161200 + }, + { + "epoch": 0.1, + "learning_rate": 4.4867114953570286e-05, + "loss": 2.6916, + "step": 161300 + }, + { + "epoch": 0.1, + "learning_rate": 4.4863912904258726e-05, + "loss": 2.7043, + "step": 161400 + }, + { + "epoch": 0.1, + "learning_rate": 4.486071085494717e-05, + "loss": 2.688, + "step": 161500 + }, + { + "epoch": 0.1, + "learning_rate": 4.4857508805635605e-05, + "loss": 2.6949, + "step": 161600 + }, + { + "epoch": 0.1, + "learning_rate": 4.485430675632405e-05, + "loss": 2.6942, + "step": 161700 + }, + { + "epoch": 0.1, + "learning_rate": 4.485110470701249e-05, + "loss": 2.6822, + "step": 161800 + }, + { + "epoch": 0.1, + "learning_rate": 4.484790265770093e-05, + "loss": 2.6737, + "step": 161900 + }, + { + "epoch": 0.1, + "learning_rate": 4.484470060838937e-05, + "loss": 2.7036, + "step": 162000 + }, + { + "epoch": 0.1, + "eval_loss": 2.547950506210327, + "eval_runtime": 175.2664, + "eval_samples_per_second": 57.056, + "eval_steps_per_second": 3.566, + "step": 162000 + }, + { + "epoch": 0.1, + "learning_rate": 4.484149855907781e-05, + "loss": 2.7313, + "step": 162100 + }, + { + "epoch": 0.1, + "learning_rate": 4.483829650976625e-05, + "loss": 2.6729, + "step": 162200 + }, + { + "epoch": 0.1, + "learning_rate": 4.483509446045469e-05, + "loss": 2.6746, + "step": 162300 + }, + { + "epoch": 0.1, + "learning_rate": 4.4831892411143137e-05, + "loss": 2.7017, + "step": 162400 + }, + { + "epoch": 0.1, + "learning_rate": 4.4828690361831576e-05, + "loss": 2.6856, + "step": 162500 + }, + { + "epoch": 0.1, + "learning_rate": 4.4825488312520016e-05, + "loss": 2.6974, + "step": 162600 + }, + { + "epoch": 0.1, + "learning_rate": 4.4822286263208456e-05, + "loss": 2.697, + "step": 162700 + }, + { + "epoch": 0.1, + "learning_rate": 4.4819084213896896e-05, + "loss": 2.6867, + "step": 162800 + }, + { + "epoch": 0.1, + "learning_rate": 4.4815882164585335e-05, + "loss": 2.6919, + "step": 162900 + }, + { + "epoch": 0.1, + "learning_rate": 4.4812680115273775e-05, + "loss": 2.6869, + "step": 163000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5488507747650146, + "eval_runtime": 178.2486, + "eval_samples_per_second": 56.101, + "eval_steps_per_second": 3.506, + "step": 163000 + }, + { + "epoch": 0.1, + "learning_rate": 4.480947806596222e-05, + "loss": 2.6961, + "step": 163100 + }, + { + "epoch": 0.1, + "learning_rate": 4.480627601665066e-05, + "loss": 2.6871, + "step": 163200 + }, + { + "epoch": 0.1, + "learning_rate": 4.48030739673391e-05, + "loss": 2.6938, + "step": 163300 + }, + { + "epoch": 0.1, + "learning_rate": 4.479987191802754e-05, + "loss": 2.6779, + "step": 163400 + }, + { + "epoch": 0.1, + "learning_rate": 4.479666986871598e-05, + "loss": 2.6705, + "step": 163500 + }, + { + "epoch": 0.1, + "learning_rate": 4.479346781940442e-05, + "loss": 2.6818, + "step": 163600 + }, + { + "epoch": 0.1, + "learning_rate": 4.479026577009286e-05, + "loss": 2.679, + "step": 163700 + }, + { + "epoch": 0.1, + "learning_rate": 4.47870637207813e-05, + "loss": 2.67, + "step": 163800 + }, + { + "epoch": 0.1, + "learning_rate": 4.478386167146974e-05, + "loss": 2.6943, + "step": 163900 + }, + { + "epoch": 0.1, + "learning_rate": 4.4780659622158186e-05, + "loss": 2.6805, + "step": 164000 + }, + { + "epoch": 0.1, + "eval_loss": 2.5475895404815674, + "eval_runtime": 178.95, + "eval_samples_per_second": 55.882, + "eval_steps_per_second": 3.493, + "step": 164000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4777457572846626e-05, + "loss": 2.6768, + "step": 164100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4774255523535065e-05, + "loss": 2.6859, + "step": 164200 + }, + { + "epoch": 0.11, + "learning_rate": 4.4771053474223505e-05, + "loss": 2.6726, + "step": 164300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4767851424911945e-05, + "loss": 2.6899, + "step": 164400 + }, + { + "epoch": 0.11, + "learning_rate": 4.4764649375600385e-05, + "loss": 2.6961, + "step": 164500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4761447326288824e-05, + "loss": 2.6584, + "step": 164600 + }, + { + "epoch": 0.11, + "learning_rate": 4.475824527697727e-05, + "loss": 2.6667, + "step": 164700 + }, + { + "epoch": 0.11, + "learning_rate": 4.475504322766571e-05, + "loss": 2.6932, + "step": 164800 + }, + { + "epoch": 0.11, + "learning_rate": 4.475184117835415e-05, + "loss": 2.6838, + "step": 164900 + }, + { + "epoch": 0.11, + "learning_rate": 4.474863912904259e-05, + "loss": 2.6931, + "step": 165000 + }, + { + "epoch": 0.11, + "eval_loss": 2.545959711074829, + "eval_runtime": 183.1837, + "eval_samples_per_second": 54.59, + "eval_steps_per_second": 3.412, + "step": 165000 + }, + { + "epoch": 0.11, + "learning_rate": 4.474543707973103e-05, + "loss": 2.6931, + "step": 165100 + }, + { + "epoch": 0.11, + "learning_rate": 4.474223503041947e-05, + "loss": 2.6918, + "step": 165200 + }, + { + "epoch": 0.11, + "learning_rate": 4.473903298110791e-05, + "loss": 2.6938, + "step": 165300 + }, + { + "epoch": 0.11, + "learning_rate": 4.473583093179635e-05, + "loss": 2.7014, + "step": 165400 + }, + { + "epoch": 0.11, + "learning_rate": 4.4732628882484796e-05, + "loss": 2.7183, + "step": 165500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4729426833173235e-05, + "loss": 2.6951, + "step": 165600 + }, + { + "epoch": 0.11, + "learning_rate": 4.4726224783861675e-05, + "loss": 2.6888, + "step": 165700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4723022734550115e-05, + "loss": 2.6841, + "step": 165800 + }, + { + "epoch": 0.11, + "learning_rate": 4.4719820685238555e-05, + "loss": 2.6971, + "step": 165900 + }, + { + "epoch": 0.11, + "learning_rate": 4.4716618635926994e-05, + "loss": 2.6807, + "step": 166000 + }, + { + "epoch": 0.11, + "eval_loss": 2.546175241470337, + "eval_runtime": 182.6607, + "eval_samples_per_second": 54.746, + "eval_steps_per_second": 3.422, + "step": 166000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4713416586615434e-05, + "loss": 2.69, + "step": 166100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4710214537303874e-05, + "loss": 2.6925, + "step": 166200 + }, + { + "epoch": 0.11, + "learning_rate": 4.470701248799232e-05, + "loss": 2.6786, + "step": 166300 + }, + { + "epoch": 0.11, + "learning_rate": 4.470381043868076e-05, + "loss": 2.6916, + "step": 166400 + }, + { + "epoch": 0.11, + "learning_rate": 4.47006083893692e-05, + "loss": 2.6914, + "step": 166500 + }, + { + "epoch": 0.11, + "learning_rate": 4.469740634005764e-05, + "loss": 2.6732, + "step": 166600 + }, + { + "epoch": 0.11, + "learning_rate": 4.469420429074608e-05, + "loss": 2.7, + "step": 166700 + }, + { + "epoch": 0.11, + "learning_rate": 4.469100224143452e-05, + "loss": 2.6885, + "step": 166800 + }, + { + "epoch": 0.11, + "learning_rate": 4.468780019212296e-05, + "loss": 2.6889, + "step": 166900 + }, + { + "epoch": 0.11, + "learning_rate": 4.46845981428114e-05, + "loss": 2.6913, + "step": 167000 + }, + { + "epoch": 0.11, + "eval_loss": 2.544877052307129, + "eval_runtime": 181.3638, + "eval_samples_per_second": 55.138, + "eval_steps_per_second": 3.446, + "step": 167000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4681396093499845e-05, + "loss": 2.6865, + "step": 167100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4678194044188285e-05, + "loss": 2.6664, + "step": 167200 + }, + { + "epoch": 0.11, + "learning_rate": 4.4674991994876724e-05, + "loss": 2.6831, + "step": 167300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4671789945565164e-05, + "loss": 2.6744, + "step": 167400 + }, + { + "epoch": 0.11, + "learning_rate": 4.4668587896253604e-05, + "loss": 2.6984, + "step": 167500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4665385846942044e-05, + "loss": 2.7074, + "step": 167600 + }, + { + "epoch": 0.11, + "learning_rate": 4.466218379763048e-05, + "loss": 2.6687, + "step": 167700 + }, + { + "epoch": 0.11, + "learning_rate": 4.465898174831893e-05, + "loss": 2.686, + "step": 167800 + }, + { + "epoch": 0.11, + "learning_rate": 4.465577969900737e-05, + "loss": 2.6705, + "step": 167900 + }, + { + "epoch": 0.11, + "learning_rate": 4.465257764969581e-05, + "loss": 2.6895, + "step": 168000 + }, + { + "epoch": 0.11, + "eval_loss": 2.544931173324585, + "eval_runtime": 181.9047, + "eval_samples_per_second": 54.974, + "eval_steps_per_second": 3.436, + "step": 168000 + }, + { + "epoch": 0.11, + "learning_rate": 4.464937560038425e-05, + "loss": 2.6817, + "step": 168100 + }, + { + "epoch": 0.11, + "learning_rate": 4.464617355107269e-05, + "loss": 2.6892, + "step": 168200 + }, + { + "epoch": 0.11, + "learning_rate": 4.464297150176113e-05, + "loss": 2.6709, + "step": 168300 + }, + { + "epoch": 0.11, + "learning_rate": 4.463976945244957e-05, + "loss": 2.662, + "step": 168400 + }, + { + "epoch": 0.11, + "learning_rate": 4.4636567403138015e-05, + "loss": 2.6897, + "step": 168500 + }, + { + "epoch": 0.11, + "learning_rate": 4.463336535382645e-05, + "loss": 2.6811, + "step": 168600 + }, + { + "epoch": 0.11, + "learning_rate": 4.4630163304514894e-05, + "loss": 2.6794, + "step": 168700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4626961255203334e-05, + "loss": 2.688, + "step": 168800 + }, + { + "epoch": 0.11, + "learning_rate": 4.4623759205891774e-05, + "loss": 2.668, + "step": 168900 + }, + { + "epoch": 0.11, + "learning_rate": 4.4620557156580214e-05, + "loss": 2.6813, + "step": 169000 + }, + { + "epoch": 0.11, + "eval_loss": 2.545745611190796, + "eval_runtime": 185.2342, + "eval_samples_per_second": 53.986, + "eval_steps_per_second": 3.374, + "step": 169000 + }, + { + "epoch": 0.11, + "learning_rate": 4.461735510726865e-05, + "loss": 2.6721, + "step": 169100 + }, + { + "epoch": 0.11, + "learning_rate": 4.461415305795709e-05, + "loss": 2.6777, + "step": 169200 + }, + { + "epoch": 0.11, + "learning_rate": 4.461095100864553e-05, + "loss": 2.6988, + "step": 169300 + }, + { + "epoch": 0.11, + "learning_rate": 4.460774895933398e-05, + "loss": 2.6658, + "step": 169400 + }, + { + "epoch": 0.11, + "learning_rate": 4.460454691002241e-05, + "loss": 2.6527, + "step": 169500 + }, + { + "epoch": 0.11, + "learning_rate": 4.460134486071086e-05, + "loss": 2.6751, + "step": 169600 + }, + { + "epoch": 0.11, + "learning_rate": 4.45981428113993e-05, + "loss": 2.6917, + "step": 169700 + }, + { + "epoch": 0.11, + "learning_rate": 4.459494076208774e-05, + "loss": 2.6868, + "step": 169800 + }, + { + "epoch": 0.11, + "learning_rate": 4.459173871277618e-05, + "loss": 2.683, + "step": 169900 + }, + { + "epoch": 0.11, + "learning_rate": 4.458853666346462e-05, + "loss": 2.6483, + "step": 170000 + }, + { + "epoch": 0.11, + "eval_loss": 2.546018600463867, + "eval_runtime": 178.1131, + "eval_samples_per_second": 56.144, + "eval_steps_per_second": 3.509, + "step": 170000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4585334614153064e-05, + "loss": 2.6927, + "step": 170100 + }, + { + "epoch": 0.11, + "learning_rate": 4.45821325648415e-05, + "loss": 2.6673, + "step": 170200 + }, + { + "epoch": 0.11, + "learning_rate": 4.4578930515529944e-05, + "loss": 2.6984, + "step": 170300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4575728466218383e-05, + "loss": 2.6714, + "step": 170400 + }, + { + "epoch": 0.11, + "learning_rate": 4.457252641690682e-05, + "loss": 2.6531, + "step": 170500 + }, + { + "epoch": 0.11, + "learning_rate": 4.456932436759526e-05, + "loss": 2.6632, + "step": 170600 + }, + { + "epoch": 0.11, + "learning_rate": 4.45661223182837e-05, + "loss": 2.6671, + "step": 170700 + }, + { + "epoch": 0.11, + "learning_rate": 4.456292026897215e-05, + "loss": 2.6924, + "step": 170800 + }, + { + "epoch": 0.11, + "learning_rate": 4.455971821966058e-05, + "loss": 2.6658, + "step": 170900 + }, + { + "epoch": 0.11, + "learning_rate": 4.455651617034903e-05, + "loss": 2.7026, + "step": 171000 + }, + { + "epoch": 0.11, + "eval_loss": 2.5437819957733154, + "eval_runtime": 179.7594, + "eval_samples_per_second": 55.63, + "eval_steps_per_second": 3.477, + "step": 171000 + }, + { + "epoch": 0.11, + "learning_rate": 4.455331412103746e-05, + "loss": 2.6581, + "step": 171100 + }, + { + "epoch": 0.11, + "learning_rate": 4.455011207172591e-05, + "loss": 2.6755, + "step": 171200 + }, + { + "epoch": 0.11, + "learning_rate": 4.454691002241435e-05, + "loss": 2.6797, + "step": 171300 + }, + { + "epoch": 0.11, + "learning_rate": 4.454370797310279e-05, + "loss": 2.6632, + "step": 171400 + }, + { + "epoch": 0.11, + "learning_rate": 4.454050592379123e-05, + "loss": 2.6746, + "step": 171500 + }, + { + "epoch": 0.11, + "learning_rate": 4.453730387447967e-05, + "loss": 2.6487, + "step": 171600 + }, + { + "epoch": 0.11, + "learning_rate": 4.4534101825168114e-05, + "loss": 2.6976, + "step": 171700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4530899775856547e-05, + "loss": 2.6726, + "step": 171800 + }, + { + "epoch": 0.11, + "learning_rate": 4.452769772654499e-05, + "loss": 2.6924, + "step": 171900 + }, + { + "epoch": 0.11, + "learning_rate": 4.452449567723343e-05, + "loss": 2.6741, + "step": 172000 + }, + { + "epoch": 0.11, + "eval_loss": 2.542654514312744, + "eval_runtime": 178.2761, + "eval_samples_per_second": 56.093, + "eval_steps_per_second": 3.506, + "step": 172000 + }, + { + "epoch": 0.11, + "learning_rate": 4.452129362792187e-05, + "loss": 2.6608, + "step": 172100 + }, + { + "epoch": 0.11, + "learning_rate": 4.451809157861031e-05, + "loss": 2.6683, + "step": 172200 + }, + { + "epoch": 0.11, + "learning_rate": 4.451488952929875e-05, + "loss": 2.6632, + "step": 172300 + }, + { + "epoch": 0.11, + "learning_rate": 4.45116874799872e-05, + "loss": 2.6721, + "step": 172400 + }, + { + "epoch": 0.11, + "learning_rate": 4.450848543067563e-05, + "loss": 2.6688, + "step": 172500 + }, + { + "epoch": 0.11, + "learning_rate": 4.450528338136408e-05, + "loss": 2.6955, + "step": 172600 + }, + { + "epoch": 0.11, + "learning_rate": 4.450208133205251e-05, + "loss": 2.6606, + "step": 172700 + }, + { + "epoch": 0.11, + "learning_rate": 4.449887928274096e-05, + "loss": 2.6826, + "step": 172800 + }, + { + "epoch": 0.11, + "learning_rate": 4.44956772334294e-05, + "loss": 2.681, + "step": 172900 + }, + { + "epoch": 0.11, + "learning_rate": 4.449247518411784e-05, + "loss": 2.6699, + "step": 173000 + }, + { + "epoch": 0.11, + "eval_loss": 2.5460827350616455, + "eval_runtime": 181.5889, + "eval_samples_per_second": 55.069, + "eval_steps_per_second": 3.442, + "step": 173000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4489273134806283e-05, + "loss": 2.6707, + "step": 173100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4486071085494716e-05, + "loss": 2.6739, + "step": 173200 + }, + { + "epoch": 0.11, + "learning_rate": 4.448286903618316e-05, + "loss": 2.671, + "step": 173300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4479666986871596e-05, + "loss": 2.678, + "step": 173400 + }, + { + "epoch": 0.11, + "learning_rate": 4.447646493756004e-05, + "loss": 2.6606, + "step": 173500 + }, + { + "epoch": 0.11, + "learning_rate": 4.447326288824848e-05, + "loss": 2.6661, + "step": 173600 + }, + { + "epoch": 0.11, + "learning_rate": 4.447006083893692e-05, + "loss": 2.6721, + "step": 173700 + }, + { + "epoch": 0.11, + "learning_rate": 4.446685878962536e-05, + "loss": 2.681, + "step": 173800 + }, + { + "epoch": 0.11, + "learning_rate": 4.44636567403138e-05, + "loss": 2.6674, + "step": 173900 + }, + { + "epoch": 0.11, + "learning_rate": 4.446045469100225e-05, + "loss": 2.6802, + "step": 174000 + }, + { + "epoch": 0.11, + "eval_loss": 2.543893814086914, + "eval_runtime": 179.174, + "eval_samples_per_second": 55.812, + "eval_steps_per_second": 3.488, + "step": 174000 + }, + { + "epoch": 0.11, + "learning_rate": 4.445725264169068e-05, + "loss": 2.6419, + "step": 174100 + }, + { + "epoch": 0.11, + "learning_rate": 4.445405059237913e-05, + "loss": 2.6505, + "step": 174200 + }, + { + "epoch": 0.11, + "learning_rate": 4.445084854306756e-05, + "loss": 2.6707, + "step": 174300 + }, + { + "epoch": 0.11, + "learning_rate": 4.444764649375601e-05, + "loss": 2.6756, + "step": 174400 + }, + { + "epoch": 0.11, + "learning_rate": 4.4444444444444447e-05, + "loss": 2.6702, + "step": 174500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4441242395132886e-05, + "loss": 2.6939, + "step": 174600 + }, + { + "epoch": 0.11, + "learning_rate": 4.443804034582133e-05, + "loss": 2.6484, + "step": 174700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4434838296509766e-05, + "loss": 2.6682, + "step": 174800 + }, + { + "epoch": 0.11, + "learning_rate": 4.443163624719821e-05, + "loss": 2.6815, + "step": 174900 + }, + { + "epoch": 0.11, + "learning_rate": 4.4428434197886645e-05, + "loss": 2.6598, + "step": 175000 + }, + { + "epoch": 0.11, + "eval_loss": 2.5423295497894287, + "eval_runtime": 177.1753, + "eval_samples_per_second": 56.441, + "eval_steps_per_second": 3.528, + "step": 175000 + }, + { + "epoch": 0.11, + "learning_rate": 4.442523214857509e-05, + "loss": 2.6544, + "step": 175100 + }, + { + "epoch": 0.11, + "learning_rate": 4.442203009926353e-05, + "loss": 2.6505, + "step": 175200 + }, + { + "epoch": 0.11, + "learning_rate": 4.441882804995197e-05, + "loss": 2.6636, + "step": 175300 + }, + { + "epoch": 0.11, + "learning_rate": 4.441562600064042e-05, + "loss": 2.6802, + "step": 175400 + }, + { + "epoch": 0.11, + "learning_rate": 4.441242395132885e-05, + "loss": 2.6661, + "step": 175500 + }, + { + "epoch": 0.11, + "learning_rate": 4.44092219020173e-05, + "loss": 2.6685, + "step": 175600 + }, + { + "epoch": 0.11, + "learning_rate": 4.440601985270573e-05, + "loss": 2.6765, + "step": 175700 + }, + { + "epoch": 0.11, + "learning_rate": 4.440281780339418e-05, + "loss": 2.6613, + "step": 175800 + }, + { + "epoch": 0.11, + "learning_rate": 4.439961575408261e-05, + "loss": 2.6562, + "step": 175900 + }, + { + "epoch": 0.11, + "learning_rate": 4.4396413704771056e-05, + "loss": 2.6664, + "step": 176000 + }, + { + "epoch": 0.11, + "eval_loss": 2.5409162044525146, + "eval_runtime": 177.764, + "eval_samples_per_second": 56.254, + "eval_steps_per_second": 3.516, + "step": 176000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4393211655459496e-05, + "loss": 2.6664, + "step": 176100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4390009606147936e-05, + "loss": 2.6751, + "step": 176200 + }, + { + "epoch": 0.11, + "learning_rate": 4.438680755683638e-05, + "loss": 2.6666, + "step": 176300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4383605507524815e-05, + "loss": 2.6851, + "step": 176400 + }, + { + "epoch": 0.11, + "learning_rate": 4.438040345821326e-05, + "loss": 2.6791, + "step": 176500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4377201408901695e-05, + "loss": 2.6624, + "step": 176600 + }, + { + "epoch": 0.11, + "learning_rate": 4.437399935959014e-05, + "loss": 2.6496, + "step": 176700 + }, + { + "epoch": 0.11, + "learning_rate": 4.437079731027858e-05, + "loss": 2.6762, + "step": 176800 + }, + { + "epoch": 0.11, + "learning_rate": 4.436759526096702e-05, + "loss": 2.6605, + "step": 176900 + }, + { + "epoch": 0.11, + "learning_rate": 4.436439321165547e-05, + "loss": 2.6654, + "step": 177000 + }, + { + "epoch": 0.11, + "eval_loss": 2.543023109436035, + "eval_runtime": 176.7668, + "eval_samples_per_second": 56.572, + "eval_steps_per_second": 3.536, + "step": 177000 + }, + { + "epoch": 0.11, + "learning_rate": 4.43611911623439e-05, + "loss": 2.6607, + "step": 177100 + }, + { + "epoch": 0.11, + "learning_rate": 4.435798911303235e-05, + "loss": 2.6708, + "step": 177200 + }, + { + "epoch": 0.11, + "learning_rate": 4.435478706372078e-05, + "loss": 2.6716, + "step": 177300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4351585014409226e-05, + "loss": 2.6454, + "step": 177400 + }, + { + "epoch": 0.11, + "learning_rate": 4.434838296509766e-05, + "loss": 2.6601, + "step": 177500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4345180915786106e-05, + "loss": 2.663, + "step": 177600 + }, + { + "epoch": 0.11, + "learning_rate": 4.4341978866474545e-05, + "loss": 2.6717, + "step": 177700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4338776817162985e-05, + "loss": 2.6805, + "step": 177800 + }, + { + "epoch": 0.11, + "learning_rate": 4.433557476785143e-05, + "loss": 2.6775, + "step": 177900 + }, + { + "epoch": 0.11, + "learning_rate": 4.4332372718539865e-05, + "loss": 2.6619, + "step": 178000 + }, + { + "epoch": 0.11, + "eval_loss": 2.539804458618164, + "eval_runtime": 176.7894, + "eval_samples_per_second": 56.564, + "eval_steps_per_second": 3.535, + "step": 178000 + }, + { + "epoch": 0.11, + "learning_rate": 4.432917066922831e-05, + "loss": 2.6875, + "step": 178100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4325968619916744e-05, + "loss": 2.663, + "step": 178200 + }, + { + "epoch": 0.11, + "learning_rate": 4.432276657060519e-05, + "loss": 2.6482, + "step": 178300 + }, + { + "epoch": 0.11, + "learning_rate": 4.431956452129363e-05, + "loss": 2.6495, + "step": 178400 + }, + { + "epoch": 0.11, + "learning_rate": 4.431636247198207e-05, + "loss": 2.6604, + "step": 178500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4313160422670517e-05, + "loss": 2.6784, + "step": 178600 + }, + { + "epoch": 0.11, + "learning_rate": 4.430995837335895e-05, + "loss": 2.6695, + "step": 178700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4306756324047396e-05, + "loss": 2.6449, + "step": 178800 + }, + { + "epoch": 0.11, + "learning_rate": 4.430355427473583e-05, + "loss": 2.6526, + "step": 178900 + }, + { + "epoch": 0.11, + "learning_rate": 4.4300352225424275e-05, + "loss": 2.6478, + "step": 179000 + }, + { + "epoch": 0.11, + "eval_loss": 2.5400795936584473, + "eval_runtime": 179.3211, + "eval_samples_per_second": 55.766, + "eval_steps_per_second": 3.485, + "step": 179000 + }, + { + "epoch": 0.11, + "learning_rate": 4.429715017611271e-05, + "loss": 2.6662, + "step": 179100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4293948126801155e-05, + "loss": 2.6685, + "step": 179200 + }, + { + "epoch": 0.11, + "learning_rate": 4.4290746077489595e-05, + "loss": 2.6561, + "step": 179300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4287544028178034e-05, + "loss": 2.6846, + "step": 179400 + }, + { + "epoch": 0.11, + "learning_rate": 4.428434197886648e-05, + "loss": 2.6364, + "step": 179500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4281139929554914e-05, + "loss": 2.6593, + "step": 179600 + }, + { + "epoch": 0.12, + "learning_rate": 4.427793788024336e-05, + "loss": 2.6733, + "step": 179700 + }, + { + "epoch": 0.12, + "learning_rate": 4.427473583093179e-05, + "loss": 2.6576, + "step": 179800 + }, + { + "epoch": 0.12, + "learning_rate": 4.427153378162024e-05, + "loss": 2.6718, + "step": 179900 + }, + { + "epoch": 0.12, + "learning_rate": 4.426833173230868e-05, + "loss": 2.6503, + "step": 180000 + }, + { + "epoch": 0.12, + "eval_loss": 2.541757345199585, + "eval_runtime": 178.2308, + "eval_samples_per_second": 56.107, + "eval_steps_per_second": 3.507, + "step": 180000 + }, + { + "epoch": 0.12, + "learning_rate": 4.426512968299712e-05, + "loss": 2.6436, + "step": 180100 + }, + { + "epoch": 0.12, + "learning_rate": 4.4261927633685566e-05, + "loss": 2.6609, + "step": 180200 + }, + { + "epoch": 0.12, + "learning_rate": 4.4258725584374e-05, + "loss": 2.6725, + "step": 180300 + }, + { + "epoch": 0.12, + "learning_rate": 4.4255523535062445e-05, + "loss": 2.6301, + "step": 180400 + }, + { + "epoch": 0.12, + "learning_rate": 4.425232148575088e-05, + "loss": 2.6516, + "step": 180500 + }, + { + "epoch": 0.12, + "learning_rate": 4.4249119436439325e-05, + "loss": 2.6533, + "step": 180600 + }, + { + "epoch": 0.12, + "learning_rate": 4.4245917387127765e-05, + "loss": 2.6684, + "step": 180700 + }, + { + "epoch": 0.12, + "learning_rate": 4.4242715337816204e-05, + "loss": 2.6714, + "step": 180800 + }, + { + "epoch": 0.12, + "learning_rate": 4.4239513288504644e-05, + "loss": 2.6481, + "step": 180900 + }, + { + "epoch": 0.12, + "learning_rate": 4.4236311239193084e-05, + "loss": 2.6519, + "step": 181000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5420303344726562, + "eval_runtime": 179.5147, + "eval_samples_per_second": 55.706, + "eval_steps_per_second": 3.482, + "step": 181000 + }, + { + "epoch": 0.12, + "learning_rate": 4.423310918988153e-05, + "loss": 2.6791, + "step": 181100 + }, + { + "epoch": 0.12, + "learning_rate": 4.422990714056996e-05, + "loss": 2.6628, + "step": 181200 + }, + { + "epoch": 0.12, + "learning_rate": 4.422670509125841e-05, + "loss": 2.6631, + "step": 181300 + }, + { + "epoch": 0.12, + "learning_rate": 4.422350304194684e-05, + "loss": 2.6562, + "step": 181400 + }, + { + "epoch": 0.12, + "learning_rate": 4.422030099263529e-05, + "loss": 2.6672, + "step": 181500 + }, + { + "epoch": 0.12, + "learning_rate": 4.421709894332373e-05, + "loss": 2.6597, + "step": 181600 + }, + { + "epoch": 0.12, + "learning_rate": 4.421389689401217e-05, + "loss": 2.6623, + "step": 181700 + }, + { + "epoch": 0.12, + "learning_rate": 4.4210694844700615e-05, + "loss": 2.6592, + "step": 181800 + }, + { + "epoch": 0.12, + "learning_rate": 4.420749279538905e-05, + "loss": 2.6635, + "step": 181900 + }, + { + "epoch": 0.12, + "learning_rate": 4.4204290746077495e-05, + "loss": 2.6597, + "step": 182000 + }, + { + "epoch": 0.12, + "eval_loss": 2.539924144744873, + "eval_runtime": 179.778, + "eval_samples_per_second": 55.624, + "eval_steps_per_second": 3.477, + "step": 182000 + }, + { + "epoch": 0.12, + "learning_rate": 4.420108869676593e-05, + "loss": 2.6532, + "step": 182100 + }, + { + "epoch": 0.12, + "learning_rate": 4.4197886647454374e-05, + "loss": 2.6642, + "step": 182200 + }, + { + "epoch": 0.12, + "learning_rate": 4.4194684598142814e-05, + "loss": 2.6746, + "step": 182300 + }, + { + "epoch": 0.12, + "learning_rate": 4.4191482548831254e-05, + "loss": 2.6647, + "step": 182400 + }, + { + "epoch": 0.12, + "learning_rate": 4.4188280499519693e-05, + "loss": 2.6517, + "step": 182500 + }, + { + "epoch": 0.12, + "learning_rate": 4.418507845020813e-05, + "loss": 2.6666, + "step": 182600 + }, + { + "epoch": 0.12, + "learning_rate": 4.418187640089658e-05, + "loss": 2.6635, + "step": 182700 + }, + { + "epoch": 0.12, + "learning_rate": 4.417867435158501e-05, + "loss": 2.7006, + "step": 182800 + }, + { + "epoch": 0.12, + "learning_rate": 4.417547230227346e-05, + "loss": 2.6571, + "step": 182900 + }, + { + "epoch": 0.12, + "learning_rate": 4.41722702529619e-05, + "loss": 2.6557, + "step": 183000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5385019779205322, + "eval_runtime": 177.971, + "eval_samples_per_second": 56.189, + "eval_steps_per_second": 3.512, + "step": 183000 + }, + { + "epoch": 0.12, + "learning_rate": 4.416906820365034e-05, + "loss": 2.6473, + "step": 183100 + }, + { + "epoch": 0.12, + "learning_rate": 4.416586615433878e-05, + "loss": 2.671, + "step": 183200 + }, + { + "epoch": 0.12, + "learning_rate": 4.416266410502722e-05, + "loss": 2.6507, + "step": 183300 + }, + { + "epoch": 0.12, + "learning_rate": 4.4159462055715665e-05, + "loss": 2.6854, + "step": 183400 + }, + { + "epoch": 0.12, + "learning_rate": 4.41562600064041e-05, + "loss": 2.6632, + "step": 183500 + }, + { + "epoch": 0.12, + "learning_rate": 4.4153057957092544e-05, + "loss": 2.6713, + "step": 183600 + }, + { + "epoch": 0.12, + "learning_rate": 4.414985590778098e-05, + "loss": 2.6622, + "step": 183700 + }, + { + "epoch": 0.12, + "learning_rate": 4.4146653858469424e-05, + "loss": 2.6379, + "step": 183800 + }, + { + "epoch": 0.12, + "learning_rate": 4.414345180915786e-05, + "loss": 2.6656, + "step": 183900 + }, + { + "epoch": 0.12, + "learning_rate": 4.41402497598463e-05, + "loss": 2.6544, + "step": 184000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5391104221343994, + "eval_runtime": 182.9166, + "eval_samples_per_second": 54.67, + "eval_steps_per_second": 3.417, + "step": 184000 + }, + { + "epoch": 0.12, + "learning_rate": 4.413704771053474e-05, + "loss": 2.649, + "step": 184100 + }, + { + "epoch": 0.12, + "learning_rate": 4.413384566122318e-05, + "loss": 2.6583, + "step": 184200 + }, + { + "epoch": 0.12, + "learning_rate": 4.413064361191163e-05, + "loss": 2.6847, + "step": 184300 + }, + { + "epoch": 0.12, + "learning_rate": 4.412744156260006e-05, + "loss": 2.6579, + "step": 184400 + }, + { + "epoch": 0.12, + "learning_rate": 4.412423951328851e-05, + "loss": 2.6357, + "step": 184500 + }, + { + "epoch": 0.12, + "learning_rate": 4.412103746397695e-05, + "loss": 2.6437, + "step": 184600 + }, + { + "epoch": 0.12, + "learning_rate": 4.411783541466539e-05, + "loss": 2.6591, + "step": 184700 + }, + { + "epoch": 0.12, + "learning_rate": 4.411463336535383e-05, + "loss": 2.6783, + "step": 184800 + }, + { + "epoch": 0.12, + "learning_rate": 4.411143131604227e-05, + "loss": 2.6516, + "step": 184900 + }, + { + "epoch": 0.12, + "learning_rate": 4.4108229266730714e-05, + "loss": 2.6584, + "step": 185000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5388665199279785, + "eval_runtime": 181.9145, + "eval_samples_per_second": 54.971, + "eval_steps_per_second": 3.436, + "step": 185000 + }, + { + "epoch": 0.12, + "learning_rate": 4.410502721741915e-05, + "loss": 2.6592, + "step": 185100 + }, + { + "epoch": 0.12, + "learning_rate": 4.4101825168107593e-05, + "loss": 2.6676, + "step": 185200 + }, + { + "epoch": 0.12, + "learning_rate": 4.409862311879603e-05, + "loss": 2.6503, + "step": 185300 + }, + { + "epoch": 0.12, + "learning_rate": 4.409542106948447e-05, + "loss": 2.6789, + "step": 185400 + }, + { + "epoch": 0.12, + "learning_rate": 4.409221902017291e-05, + "loss": 2.6606, + "step": 185500 + }, + { + "epoch": 0.12, + "learning_rate": 4.408901697086135e-05, + "loss": 2.6591, + "step": 185600 + }, + { + "epoch": 0.12, + "learning_rate": 4.408581492154979e-05, + "loss": 2.6723, + "step": 185700 + }, + { + "epoch": 0.12, + "learning_rate": 4.408261287223823e-05, + "loss": 2.6434, + "step": 185800 + }, + { + "epoch": 0.12, + "learning_rate": 4.407941082292668e-05, + "loss": 2.6468, + "step": 185900 + }, + { + "epoch": 0.12, + "learning_rate": 4.407620877361511e-05, + "loss": 2.6492, + "step": 186000 + }, + { + "epoch": 0.12, + "eval_loss": 2.53727650642395, + "eval_runtime": 177.1235, + "eval_samples_per_second": 56.458, + "eval_steps_per_second": 3.529, + "step": 186000 + }, + { + "epoch": 0.12, + "learning_rate": 4.407300672430356e-05, + "loss": 2.6555, + "step": 186100 + }, + { + "epoch": 0.12, + "learning_rate": 4.4069804674992e-05, + "loss": 2.6451, + "step": 186200 + }, + { + "epoch": 0.12, + "learning_rate": 4.406660262568044e-05, + "loss": 2.6524, + "step": 186300 + }, + { + "epoch": 0.12, + "learning_rate": 4.406340057636888e-05, + "loss": 2.6685, + "step": 186400 + }, + { + "epoch": 0.12, + "learning_rate": 4.406019852705732e-05, + "loss": 2.67, + "step": 186500 + }, + { + "epoch": 0.12, + "learning_rate": 4.405699647774576e-05, + "loss": 2.6588, + "step": 186600 + }, + { + "epoch": 0.12, + "learning_rate": 4.4053794428434196e-05, + "loss": 2.6536, + "step": 186700 + }, + { + "epoch": 0.12, + "learning_rate": 4.405059237912264e-05, + "loss": 2.6571, + "step": 186800 + }, + { + "epoch": 0.12, + "learning_rate": 4.404739032981108e-05, + "loss": 2.6535, + "step": 186900 + }, + { + "epoch": 0.12, + "learning_rate": 4.404418828049952e-05, + "loss": 2.6514, + "step": 187000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5385489463806152, + "eval_runtime": 176.1879, + "eval_samples_per_second": 56.758, + "eval_steps_per_second": 3.547, + "step": 187000 + }, + { + "epoch": 0.12, + "learning_rate": 4.404098623118796e-05, + "loss": 2.6707, + "step": 187100 + }, + { + "epoch": 0.12, + "learning_rate": 4.40377841818764e-05, + "loss": 2.6565, + "step": 187200 + }, + { + "epoch": 0.12, + "learning_rate": 4.403458213256484e-05, + "loss": 2.65, + "step": 187300 + }, + { + "epoch": 0.12, + "learning_rate": 4.403138008325328e-05, + "loss": 2.6506, + "step": 187400 + }, + { + "epoch": 0.12, + "learning_rate": 4.402817803394173e-05, + "loss": 2.6517, + "step": 187500 + }, + { + "epoch": 0.12, + "learning_rate": 4.402497598463017e-05, + "loss": 2.6659, + "step": 187600 + }, + { + "epoch": 0.12, + "learning_rate": 4.402177393531861e-05, + "loss": 2.6645, + "step": 187700 + }, + { + "epoch": 0.12, + "learning_rate": 4.401857188600705e-05, + "loss": 2.6607, + "step": 187800 + }, + { + "epoch": 0.12, + "learning_rate": 4.401536983669549e-05, + "loss": 2.6688, + "step": 187900 + }, + { + "epoch": 0.12, + "learning_rate": 4.4012167787383926e-05, + "loss": 2.6649, + "step": 188000 + }, + { + "epoch": 0.12, + "eval_loss": 2.537940740585327, + "eval_runtime": 179.0706, + "eval_samples_per_second": 55.844, + "eval_steps_per_second": 3.49, + "step": 188000 + }, + { + "epoch": 0.12, + "learning_rate": 4.4008965738072366e-05, + "loss": 2.6627, + "step": 188100 + }, + { + "epoch": 0.12, + "learning_rate": 4.4005763688760806e-05, + "loss": 2.6571, + "step": 188200 + }, + { + "epoch": 0.12, + "learning_rate": 4.400256163944925e-05, + "loss": 2.661, + "step": 188300 + }, + { + "epoch": 0.12, + "learning_rate": 4.399935959013769e-05, + "loss": 2.627, + "step": 188400 + }, + { + "epoch": 0.12, + "learning_rate": 4.399615754082613e-05, + "loss": 2.6708, + "step": 188500 + }, + { + "epoch": 0.12, + "learning_rate": 4.399295549151457e-05, + "loss": 2.6759, + "step": 188600 + }, + { + "epoch": 0.12, + "learning_rate": 4.398975344220301e-05, + "loss": 2.6571, + "step": 188700 + }, + { + "epoch": 0.12, + "learning_rate": 4.398655139289145e-05, + "loss": 2.673, + "step": 188800 + }, + { + "epoch": 0.12, + "learning_rate": 4.398334934357989e-05, + "loss": 2.667, + "step": 188900 + }, + { + "epoch": 0.12, + "learning_rate": 4.398014729426833e-05, + "loss": 2.6433, + "step": 189000 + }, + { + "epoch": 0.12, + "eval_loss": 2.539026975631714, + "eval_runtime": 176.2255, + "eval_samples_per_second": 56.745, + "eval_steps_per_second": 3.547, + "step": 189000 + }, + { + "epoch": 0.12, + "learning_rate": 4.397694524495678e-05, + "loss": 2.6545, + "step": 189100 + }, + { + "epoch": 0.12, + "learning_rate": 4.397374319564522e-05, + "loss": 2.6561, + "step": 189200 + }, + { + "epoch": 0.12, + "learning_rate": 4.397054114633366e-05, + "loss": 2.6411, + "step": 189300 + }, + { + "epoch": 0.12, + "learning_rate": 4.3967339097022096e-05, + "loss": 2.6798, + "step": 189400 + }, + { + "epoch": 0.12, + "learning_rate": 4.3964137047710536e-05, + "loss": 2.6658, + "step": 189500 + }, + { + "epoch": 0.12, + "learning_rate": 4.3960934998398976e-05, + "loss": 2.6477, + "step": 189600 + }, + { + "epoch": 0.12, + "learning_rate": 4.3957732949087416e-05, + "loss": 2.6476, + "step": 189700 + }, + { + "epoch": 0.12, + "learning_rate": 4.3954530899775855e-05, + "loss": 2.6484, + "step": 189800 + }, + { + "epoch": 0.12, + "learning_rate": 4.39513288504643e-05, + "loss": 2.666, + "step": 189900 + }, + { + "epoch": 0.12, + "learning_rate": 4.394812680115274e-05, + "loss": 2.6422, + "step": 190000 + }, + { + "epoch": 0.12, + "eval_loss": 2.538339138031006, + "eval_runtime": 177.1468, + "eval_samples_per_second": 56.45, + "eval_steps_per_second": 3.528, + "step": 190000 + }, + { + "epoch": 0.12, + "learning_rate": 4.394492475184118e-05, + "loss": 2.6582, + "step": 190100 + }, + { + "epoch": 0.12, + "learning_rate": 4.394172270252962e-05, + "loss": 2.6722, + "step": 190200 + }, + { + "epoch": 0.12, + "learning_rate": 4.393852065321806e-05, + "loss": 2.6741, + "step": 190300 + }, + { + "epoch": 0.12, + "learning_rate": 4.39353186039065e-05, + "loss": 2.6614, + "step": 190400 + }, + { + "epoch": 0.12, + "learning_rate": 4.393211655459494e-05, + "loss": 2.6514, + "step": 190500 + }, + { + "epoch": 0.12, + "learning_rate": 4.392891450528339e-05, + "loss": 2.6624, + "step": 190600 + }, + { + "epoch": 0.12, + "learning_rate": 4.3925712455971827e-05, + "loss": 2.6354, + "step": 190700 + }, + { + "epoch": 0.12, + "learning_rate": 4.3922510406660266e-05, + "loss": 2.6575, + "step": 190800 + }, + { + "epoch": 0.12, + "learning_rate": 4.3919308357348706e-05, + "loss": 2.6565, + "step": 190900 + }, + { + "epoch": 0.12, + "learning_rate": 4.3916106308037146e-05, + "loss": 2.6553, + "step": 191000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5357167720794678, + "eval_runtime": 179.655, + "eval_samples_per_second": 55.662, + "eval_steps_per_second": 3.479, + "step": 191000 + }, + { + "epoch": 0.12, + "learning_rate": 4.3912904258725585e-05, + "loss": 2.6707, + "step": 191100 + }, + { + "epoch": 0.12, + "learning_rate": 4.3909702209414025e-05, + "loss": 2.6394, + "step": 191200 + }, + { + "epoch": 0.12, + "learning_rate": 4.3906500160102465e-05, + "loss": 2.6648, + "step": 191300 + }, + { + "epoch": 0.12, + "learning_rate": 4.3903298110790905e-05, + "loss": 2.6524, + "step": 191400 + }, + { + "epoch": 0.12, + "learning_rate": 4.390009606147935e-05, + "loss": 2.6517, + "step": 191500 + }, + { + "epoch": 0.12, + "learning_rate": 4.389689401216779e-05, + "loss": 2.6365, + "step": 191600 + }, + { + "epoch": 0.12, + "learning_rate": 4.389369196285623e-05, + "loss": 2.6565, + "step": 191700 + }, + { + "epoch": 0.12, + "learning_rate": 4.389048991354467e-05, + "loss": 2.6446, + "step": 191800 + }, + { + "epoch": 0.12, + "learning_rate": 4.388728786423311e-05, + "loss": 2.6494, + "step": 191900 + }, + { + "epoch": 0.12, + "learning_rate": 4.388408581492155e-05, + "loss": 2.6612, + "step": 192000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5360355377197266, + "eval_runtime": 177.2658, + "eval_samples_per_second": 56.412, + "eval_steps_per_second": 3.526, + "step": 192000 + }, + { + "epoch": 0.12, + "learning_rate": 4.388088376560999e-05, + "loss": 2.6504, + "step": 192100 + }, + { + "epoch": 0.12, + "learning_rate": 4.3877681716298436e-05, + "loss": 2.634, + "step": 192200 + }, + { + "epoch": 0.12, + "learning_rate": 4.3874479666986876e-05, + "loss": 2.6451, + "step": 192300 + }, + { + "epoch": 0.12, + "learning_rate": 4.3871277617675316e-05, + "loss": 2.6446, + "step": 192400 + }, + { + "epoch": 0.12, + "learning_rate": 4.3868075568363755e-05, + "loss": 2.6578, + "step": 192500 + }, + { + "epoch": 0.12, + "learning_rate": 4.3864873519052195e-05, + "loss": 2.6589, + "step": 192600 + }, + { + "epoch": 0.12, + "learning_rate": 4.3861671469740635e-05, + "loss": 2.659, + "step": 192700 + }, + { + "epoch": 0.12, + "learning_rate": 4.3858469420429075e-05, + "loss": 2.6478, + "step": 192800 + }, + { + "epoch": 0.12, + "learning_rate": 4.385526737111752e-05, + "loss": 2.6644, + "step": 192900 + }, + { + "epoch": 0.12, + "learning_rate": 4.3852065321805954e-05, + "loss": 2.6528, + "step": 193000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5370066165924072, + "eval_runtime": 176.8095, + "eval_samples_per_second": 56.558, + "eval_steps_per_second": 3.535, + "step": 193000 + }, + { + "epoch": 0.12, + "learning_rate": 4.38488632724944e-05, + "loss": 2.641, + "step": 193100 + }, + { + "epoch": 0.12, + "learning_rate": 4.384566122318284e-05, + "loss": 2.6679, + "step": 193200 + }, + { + "epoch": 0.12, + "learning_rate": 4.384245917387128e-05, + "loss": 2.6377, + "step": 193300 + }, + { + "epoch": 0.12, + "learning_rate": 4.383925712455972e-05, + "loss": 2.6468, + "step": 193400 + }, + { + "epoch": 0.12, + "learning_rate": 4.383605507524816e-05, + "loss": 2.6512, + "step": 193500 + }, + { + "epoch": 0.12, + "learning_rate": 4.38328530259366e-05, + "loss": 2.6409, + "step": 193600 + }, + { + "epoch": 0.12, + "learning_rate": 4.382965097662504e-05, + "loss": 2.6594, + "step": 193700 + }, + { + "epoch": 0.12, + "learning_rate": 4.3826448927313486e-05, + "loss": 2.6408, + "step": 193800 + }, + { + "epoch": 0.12, + "learning_rate": 4.3823246878001925e-05, + "loss": 2.6558, + "step": 193900 + }, + { + "epoch": 0.12, + "learning_rate": 4.3820044828690365e-05, + "loss": 2.6412, + "step": 194000 + }, + { + "epoch": 0.12, + "eval_loss": 2.5341765880584717, + "eval_runtime": 176.7404, + "eval_samples_per_second": 56.58, + "eval_steps_per_second": 3.536, + "step": 194000 + }, + { + "epoch": 0.12, + "learning_rate": 4.3816842779378805e-05, + "loss": 2.659, + "step": 194100 + }, + { + "epoch": 0.12, + "learning_rate": 4.3813640730067245e-05, + "loss": 2.6463, + "step": 194200 + }, + { + "epoch": 0.12, + "learning_rate": 4.3810438680755684e-05, + "loss": 2.6407, + "step": 194300 + }, + { + "epoch": 0.12, + "learning_rate": 4.3807236631444124e-05, + "loss": 2.631, + "step": 194400 + }, + { + "epoch": 0.12, + "learning_rate": 4.380403458213257e-05, + "loss": 2.6573, + "step": 194500 + }, + { + "epoch": 0.12, + "learning_rate": 4.3800832532821003e-05, + "loss": 2.6519, + "step": 194600 + }, + { + "epoch": 0.12, + "learning_rate": 4.379763048350945e-05, + "loss": 2.6476, + "step": 194700 + }, + { + "epoch": 0.12, + "learning_rate": 4.379442843419789e-05, + "loss": 2.6345, + "step": 194800 + }, + { + "epoch": 0.12, + "learning_rate": 4.379122638488633e-05, + "loss": 2.6486, + "step": 194900 + }, + { + "epoch": 0.12, + "learning_rate": 4.378802433557477e-05, + "loss": 2.6419, + "step": 195000 + }, + { + "epoch": 0.12, + "eval_loss": 2.535737991333008, + "eval_runtime": 177.7521, + "eval_samples_per_second": 56.258, + "eval_steps_per_second": 3.516, + "step": 195000 + }, + { + "epoch": 0.12, + "learning_rate": 4.378482228626321e-05, + "loss": 2.6427, + "step": 195100 + }, + { + "epoch": 0.12, + "learning_rate": 4.3781620236951655e-05, + "loss": 2.651, + "step": 195200 + }, + { + "epoch": 0.12, + "learning_rate": 4.377841818764009e-05, + "loss": 2.6583, + "step": 195300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3775216138328535e-05, + "loss": 2.6303, + "step": 195400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3772014089016975e-05, + "loss": 2.6712, + "step": 195500 + }, + { + "epoch": 0.13, + "learning_rate": 4.3768812039705414e-05, + "loss": 2.6521, + "step": 195600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3765609990393854e-05, + "loss": 2.6526, + "step": 195700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3762407941082294e-05, + "loss": 2.6319, + "step": 195800 + }, + { + "epoch": 0.13, + "learning_rate": 4.3759205891770734e-05, + "loss": 2.6545, + "step": 195900 + }, + { + "epoch": 0.13, + "learning_rate": 4.375600384245917e-05, + "loss": 2.6323, + "step": 196000 + }, + { + "epoch": 0.13, + "eval_loss": 2.5361852645874023, + "eval_runtime": 178.5034, + "eval_samples_per_second": 56.021, + "eval_steps_per_second": 3.501, + "step": 196000 + }, + { + "epoch": 0.13, + "learning_rate": 4.375280179314762e-05, + "loss": 2.6563, + "step": 196100 + }, + { + "epoch": 0.13, + "learning_rate": 4.374959974383605e-05, + "loss": 2.6301, + "step": 196200 + }, + { + "epoch": 0.13, + "learning_rate": 4.37463976945245e-05, + "loss": 2.6575, + "step": 196300 + }, + { + "epoch": 0.13, + "learning_rate": 4.374319564521294e-05, + "loss": 2.6323, + "step": 196400 + }, + { + "epoch": 0.13, + "learning_rate": 4.373999359590138e-05, + "loss": 2.6314, + "step": 196500 + }, + { + "epoch": 0.13, + "learning_rate": 4.373679154658982e-05, + "loss": 2.6519, + "step": 196600 + }, + { + "epoch": 0.13, + "learning_rate": 4.373358949727826e-05, + "loss": 2.6487, + "step": 196700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3730387447966705e-05, + "loss": 2.6536, + "step": 196800 + }, + { + "epoch": 0.13, + "learning_rate": 4.372718539865514e-05, + "loss": 2.6511, + "step": 196900 + }, + { + "epoch": 0.13, + "learning_rate": 4.3723983349343584e-05, + "loss": 2.6536, + "step": 197000 + }, + { + "epoch": 0.13, + "eval_loss": 2.533262252807617, + "eval_runtime": 177.806, + "eval_samples_per_second": 56.241, + "eval_steps_per_second": 3.515, + "step": 197000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3720781300032024e-05, + "loss": 2.672, + "step": 197100 + }, + { + "epoch": 0.13, + "learning_rate": 4.3717579250720464e-05, + "loss": 2.655, + "step": 197200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3714377201408904e-05, + "loss": 2.6523, + "step": 197300 + }, + { + "epoch": 0.13, + "learning_rate": 4.371117515209734e-05, + "loss": 2.6679, + "step": 197400 + }, + { + "epoch": 0.13, + "learning_rate": 4.370797310278579e-05, + "loss": 2.6668, + "step": 197500 + }, + { + "epoch": 0.13, + "learning_rate": 4.370477105347422e-05, + "loss": 2.6433, + "step": 197600 + }, + { + "epoch": 0.13, + "learning_rate": 4.370156900416267e-05, + "loss": 2.66, + "step": 197700 + }, + { + "epoch": 0.13, + "learning_rate": 4.36983669548511e-05, + "loss": 2.6406, + "step": 197800 + }, + { + "epoch": 0.13, + "learning_rate": 4.369516490553955e-05, + "loss": 2.6566, + "step": 197900 + }, + { + "epoch": 0.13, + "learning_rate": 4.369196285622799e-05, + "loss": 2.6576, + "step": 198000 + }, + { + "epoch": 0.13, + "eval_loss": 2.53364634513855, + "eval_runtime": 175.0111, + "eval_samples_per_second": 57.139, + "eval_steps_per_second": 3.571, + "step": 198000 + }, + { + "epoch": 0.13, + "learning_rate": 4.368876080691643e-05, + "loss": 2.6356, + "step": 198100 + }, + { + "epoch": 0.13, + "learning_rate": 4.3685558757604875e-05, + "loss": 2.663, + "step": 198200 + }, + { + "epoch": 0.13, + "learning_rate": 4.368235670829331e-05, + "loss": 2.6496, + "step": 198300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3679154658981754e-05, + "loss": 2.6501, + "step": 198400 + }, + { + "epoch": 0.13, + "learning_rate": 4.367595260967019e-05, + "loss": 2.6726, + "step": 198500 + }, + { + "epoch": 0.13, + "learning_rate": 4.3672750560358634e-05, + "loss": 2.6322, + "step": 198600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3669548511047073e-05, + "loss": 2.6449, + "step": 198700 + }, + { + "epoch": 0.13, + "learning_rate": 4.366634646173551e-05, + "loss": 2.6556, + "step": 198800 + }, + { + "epoch": 0.13, + "learning_rate": 4.366314441242395e-05, + "loss": 2.6442, + "step": 198900 + }, + { + "epoch": 0.13, + "learning_rate": 4.365994236311239e-05, + "loss": 2.6787, + "step": 199000 + }, + { + "epoch": 0.13, + "eval_loss": 2.5323338508605957, + "eval_runtime": 177.6549, + "eval_samples_per_second": 56.289, + "eval_steps_per_second": 3.518, + "step": 199000 + }, + { + "epoch": 0.13, + "learning_rate": 4.365674031380084e-05, + "loss": 2.6481, + "step": 199100 + }, + { + "epoch": 0.13, + "learning_rate": 4.365353826448927e-05, + "loss": 2.6517, + "step": 199200 + }, + { + "epoch": 0.13, + "learning_rate": 4.365033621517772e-05, + "loss": 2.6406, + "step": 199300 + }, + { + "epoch": 0.13, + "learning_rate": 4.364713416586615e-05, + "loss": 2.6472, + "step": 199400 + }, + { + "epoch": 0.13, + "learning_rate": 4.36439321165546e-05, + "loss": 2.6541, + "step": 199500 + }, + { + "epoch": 0.13, + "learning_rate": 4.364073006724304e-05, + "loss": 2.6353, + "step": 199600 + }, + { + "epoch": 0.13, + "learning_rate": 4.363752801793148e-05, + "loss": 2.6538, + "step": 199700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3634325968619924e-05, + "loss": 2.6368, + "step": 199800 + }, + { + "epoch": 0.13, + "learning_rate": 4.363112391930836e-05, + "loss": 2.6507, + "step": 199900 + }, + { + "epoch": 0.13, + "learning_rate": 4.3627921869996804e-05, + "loss": 2.6438, + "step": 200000 + }, + { + "epoch": 0.13, + "eval_loss": 2.533414125442505, + "eval_runtime": 176.5588, + "eval_samples_per_second": 56.638, + "eval_steps_per_second": 3.54, + "step": 200000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3624719820685237e-05, + "loss": 2.6773, + "step": 200100 + }, + { + "epoch": 0.13, + "learning_rate": 4.362151777137368e-05, + "loss": 2.6444, + "step": 200200 + }, + { + "epoch": 0.13, + "learning_rate": 4.361831572206212e-05, + "loss": 2.6491, + "step": 200300 + }, + { + "epoch": 0.13, + "learning_rate": 4.361511367275056e-05, + "loss": 2.6575, + "step": 200400 + }, + { + "epoch": 0.13, + "learning_rate": 4.361191162343901e-05, + "loss": 2.6546, + "step": 200500 + }, + { + "epoch": 0.13, + "learning_rate": 4.360870957412744e-05, + "loss": 2.6145, + "step": 200600 + }, + { + "epoch": 0.13, + "learning_rate": 4.360550752481589e-05, + "loss": 2.6463, + "step": 200700 + }, + { + "epoch": 0.13, + "learning_rate": 4.360230547550432e-05, + "loss": 2.6528, + "step": 200800 + }, + { + "epoch": 0.13, + "learning_rate": 4.359910342619277e-05, + "loss": 2.658, + "step": 200900 + }, + { + "epoch": 0.13, + "learning_rate": 4.35959013768812e-05, + "loss": 2.6667, + "step": 201000 + }, + { + "epoch": 0.13, + "eval_loss": 2.531358242034912, + "eval_runtime": 177.6252, + "eval_samples_per_second": 56.298, + "eval_steps_per_second": 3.519, + "step": 201000 + }, + { + "epoch": 0.13, + "learning_rate": 4.359269932756965e-05, + "loss": 2.6399, + "step": 201100 + }, + { + "epoch": 0.13, + "learning_rate": 4.358949727825809e-05, + "loss": 2.6425, + "step": 201200 + }, + { + "epoch": 0.13, + "learning_rate": 4.358629522894653e-05, + "loss": 2.6418, + "step": 201300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3583093179634973e-05, + "loss": 2.6688, + "step": 201400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3579891130323406e-05, + "loss": 2.6664, + "step": 201500 + }, + { + "epoch": 0.13, + "learning_rate": 4.357668908101185e-05, + "loss": 2.6546, + "step": 201600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3573487031700286e-05, + "loss": 2.6256, + "step": 201700 + }, + { + "epoch": 0.13, + "learning_rate": 4.357028498238873e-05, + "loss": 2.6314, + "step": 201800 + }, + { + "epoch": 0.13, + "learning_rate": 4.356708293307717e-05, + "loss": 2.6514, + "step": 201900 + }, + { + "epoch": 0.13, + "learning_rate": 4.356388088376561e-05, + "loss": 2.645, + "step": 202000 + }, + { + "epoch": 0.13, + "eval_loss": 2.5308117866516113, + "eval_runtime": 177.6308, + "eval_samples_per_second": 56.297, + "eval_steps_per_second": 3.519, + "step": 202000 + }, + { + "epoch": 0.13, + "learning_rate": 4.356067883445406e-05, + "loss": 2.6456, + "step": 202100 + }, + { + "epoch": 0.13, + "learning_rate": 4.355747678514249e-05, + "loss": 2.6435, + "step": 202200 + }, + { + "epoch": 0.13, + "learning_rate": 4.355427473583094e-05, + "loss": 2.6495, + "step": 202300 + }, + { + "epoch": 0.13, + "learning_rate": 4.355107268651937e-05, + "loss": 2.6596, + "step": 202400 + }, + { + "epoch": 0.13, + "learning_rate": 4.354787063720782e-05, + "loss": 2.6367, + "step": 202500 + }, + { + "epoch": 0.13, + "learning_rate": 4.354466858789625e-05, + "loss": 2.6302, + "step": 202600 + }, + { + "epoch": 0.13, + "learning_rate": 4.35414665385847e-05, + "loss": 2.6582, + "step": 202700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3538264489273137e-05, + "loss": 2.6292, + "step": 202800 + }, + { + "epoch": 0.13, + "learning_rate": 4.3535062439961576e-05, + "loss": 2.651, + "step": 202900 + }, + { + "epoch": 0.13, + "learning_rate": 4.353186039065002e-05, + "loss": 2.6644, + "step": 203000 + }, + { + "epoch": 0.13, + "eval_loss": 2.53143572807312, + "eval_runtime": 175.7967, + "eval_samples_per_second": 56.884, + "eval_steps_per_second": 3.555, + "step": 203000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3528658341338456e-05, + "loss": 2.6447, + "step": 203100 + }, + { + "epoch": 0.13, + "learning_rate": 4.35254562920269e-05, + "loss": 2.6724, + "step": 203200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3522254242715335e-05, + "loss": 2.6471, + "step": 203300 + }, + { + "epoch": 0.13, + "learning_rate": 4.351905219340378e-05, + "loss": 2.6656, + "step": 203400 + }, + { + "epoch": 0.13, + "learning_rate": 4.351585014409222e-05, + "loss": 2.6418, + "step": 203500 + }, + { + "epoch": 0.13, + "learning_rate": 4.351264809478066e-05, + "loss": 2.6343, + "step": 203600 + }, + { + "epoch": 0.13, + "learning_rate": 4.350944604546911e-05, + "loss": 2.6404, + "step": 203700 + }, + { + "epoch": 0.13, + "learning_rate": 4.350624399615754e-05, + "loss": 2.666, + "step": 203800 + }, + { + "epoch": 0.13, + "learning_rate": 4.350304194684599e-05, + "loss": 2.6475, + "step": 203900 + }, + { + "epoch": 0.13, + "learning_rate": 4.349983989753442e-05, + "loss": 2.6607, + "step": 204000 + }, + { + "epoch": 0.13, + "eval_loss": 2.5313851833343506, + "eval_runtime": 176.4774, + "eval_samples_per_second": 56.664, + "eval_steps_per_second": 3.542, + "step": 204000 + }, + { + "epoch": 0.13, + "learning_rate": 4.349663784822287e-05, + "loss": 2.6304, + "step": 204100 + }, + { + "epoch": 0.13, + "learning_rate": 4.34934357989113e-05, + "loss": 2.655, + "step": 204200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3490233749599746e-05, + "loss": 2.6224, + "step": 204300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3487031700288186e-05, + "loss": 2.6145, + "step": 204400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3483829650976626e-05, + "loss": 2.6473, + "step": 204500 + }, + { + "epoch": 0.13, + "learning_rate": 4.348062760166507e-05, + "loss": 2.634, + "step": 204600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3477425552353505e-05, + "loss": 2.6438, + "step": 204700 + }, + { + "epoch": 0.13, + "learning_rate": 4.347422350304195e-05, + "loss": 2.6381, + "step": 204800 + }, + { + "epoch": 0.13, + "learning_rate": 4.3471021453730385e-05, + "loss": 2.6346, + "step": 204900 + }, + { + "epoch": 0.13, + "learning_rate": 4.346781940441883e-05, + "loss": 2.633, + "step": 205000 + }, + { + "epoch": 0.13, + "eval_loss": 2.529742479324341, + "eval_runtime": 176.8338, + "eval_samples_per_second": 56.55, + "eval_steps_per_second": 3.534, + "step": 205000 + }, + { + "epoch": 0.13, + "learning_rate": 4.346461735510727e-05, + "loss": 2.6434, + "step": 205100 + }, + { + "epoch": 0.13, + "learning_rate": 4.346141530579571e-05, + "loss": 2.646, + "step": 205200 + }, + { + "epoch": 0.13, + "learning_rate": 4.345821325648416e-05, + "loss": 2.6348, + "step": 205300 + }, + { + "epoch": 0.13, + "learning_rate": 4.345501120717259e-05, + "loss": 2.632, + "step": 205400 + }, + { + "epoch": 0.13, + "learning_rate": 4.345180915786104e-05, + "loss": 2.6379, + "step": 205500 + }, + { + "epoch": 0.13, + "learning_rate": 4.344860710854947e-05, + "loss": 2.6433, + "step": 205600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3445405059237916e-05, + "loss": 2.632, + "step": 205700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3442203009926356e-05, + "loss": 2.6641, + "step": 205800 + }, + { + "epoch": 0.13, + "learning_rate": 4.3439000960614796e-05, + "loss": 2.6363, + "step": 205900 + }, + { + "epoch": 0.13, + "learning_rate": 4.3435798911303235e-05, + "loss": 2.6425, + "step": 206000 + }, + { + "epoch": 0.13, + "eval_loss": 2.528657913208008, + "eval_runtime": 175.9343, + "eval_samples_per_second": 56.839, + "eval_steps_per_second": 3.552, + "step": 206000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3432596861991675e-05, + "loss": 2.6231, + "step": 206100 + }, + { + "epoch": 0.13, + "learning_rate": 4.342939481268012e-05, + "loss": 2.6284, + "step": 206200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3426192763368555e-05, + "loss": 2.6581, + "step": 206300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3422990714057e-05, + "loss": 2.6255, + "step": 206400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3419788664745434e-05, + "loss": 2.652, + "step": 206500 + }, + { + "epoch": 0.13, + "learning_rate": 4.341658661543388e-05, + "loss": 2.632, + "step": 206600 + }, + { + "epoch": 0.13, + "learning_rate": 4.341338456612232e-05, + "loss": 2.6546, + "step": 206700 + }, + { + "epoch": 0.13, + "learning_rate": 4.341018251681076e-05, + "loss": 2.6312, + "step": 206800 + }, + { + "epoch": 0.13, + "learning_rate": 4.34069804674992e-05, + "loss": 2.6419, + "step": 206900 + }, + { + "epoch": 0.13, + "learning_rate": 4.340377841818764e-05, + "loss": 2.6417, + "step": 207000 + }, + { + "epoch": 0.13, + "eval_loss": 2.529862403869629, + "eval_runtime": 177.3132, + "eval_samples_per_second": 56.397, + "eval_steps_per_second": 3.525, + "step": 207000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3400576368876086e-05, + "loss": 2.645, + "step": 207100 + }, + { + "epoch": 0.13, + "learning_rate": 4.339737431956452e-05, + "loss": 2.6484, + "step": 207200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3394172270252965e-05, + "loss": 2.6235, + "step": 207300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3390970220941405e-05, + "loss": 2.633, + "step": 207400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3387768171629845e-05, + "loss": 2.6329, + "step": 207500 + }, + { + "epoch": 0.13, + "learning_rate": 4.3384566122318285e-05, + "loss": 2.6663, + "step": 207600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3381364073006724e-05, + "loss": 2.6429, + "step": 207700 + }, + { + "epoch": 0.13, + "learning_rate": 4.337816202369517e-05, + "loss": 2.6539, + "step": 207800 + }, + { + "epoch": 0.13, + "learning_rate": 4.3374959974383604e-05, + "loss": 2.6292, + "step": 207900 + }, + { + "epoch": 0.13, + "learning_rate": 4.337175792507205e-05, + "loss": 2.6195, + "step": 208000 + }, + { + "epoch": 0.13, + "eval_loss": 2.530228614807129, + "eval_runtime": 176.3625, + "eval_samples_per_second": 56.701, + "eval_steps_per_second": 3.544, + "step": 208000 + }, + { + "epoch": 0.13, + "learning_rate": 4.336855587576049e-05, + "loss": 2.6254, + "step": 208100 + }, + { + "epoch": 0.13, + "learning_rate": 4.336535382644893e-05, + "loss": 2.6354, + "step": 208200 + }, + { + "epoch": 0.13, + "learning_rate": 4.336215177713737e-05, + "loss": 2.6171, + "step": 208300 + }, + { + "epoch": 0.13, + "learning_rate": 4.335894972782581e-05, + "loss": 2.6298, + "step": 208400 + }, + { + "epoch": 0.13, + "learning_rate": 4.335574767851425e-05, + "loss": 2.6503, + "step": 208500 + }, + { + "epoch": 0.13, + "learning_rate": 4.335254562920269e-05, + "loss": 2.621, + "step": 208600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3349343579891135e-05, + "loss": 2.631, + "step": 208700 + }, + { + "epoch": 0.13, + "learning_rate": 4.334614153057957e-05, + "loss": 2.6321, + "step": 208800 + }, + { + "epoch": 0.13, + "learning_rate": 4.3342939481268015e-05, + "loss": 2.6312, + "step": 208900 + }, + { + "epoch": 0.13, + "learning_rate": 4.3339737431956455e-05, + "loss": 2.6512, + "step": 209000 + }, + { + "epoch": 0.13, + "eval_loss": 2.5286104679107666, + "eval_runtime": 195.3431, + "eval_samples_per_second": 51.192, + "eval_steps_per_second": 3.199, + "step": 209000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3336535382644894e-05, + "loss": 2.627, + "step": 209100 + }, + { + "epoch": 0.13, + "learning_rate": 4.3333333333333334e-05, + "loss": 2.6264, + "step": 209200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3330131284021774e-05, + "loss": 2.6263, + "step": 209300 + }, + { + "epoch": 0.13, + "learning_rate": 4.332692923471022e-05, + "loss": 2.6325, + "step": 209400 + }, + { + "epoch": 0.13, + "learning_rate": 4.332372718539865e-05, + "loss": 2.611, + "step": 209500 + }, + { + "epoch": 0.13, + "learning_rate": 4.33205251360871e-05, + "loss": 2.6166, + "step": 209600 + }, + { + "epoch": 0.13, + "learning_rate": 4.331732308677554e-05, + "loss": 2.6387, + "step": 209700 + }, + { + "epoch": 0.13, + "learning_rate": 4.331412103746398e-05, + "loss": 2.6505, + "step": 209800 + }, + { + "epoch": 0.13, + "learning_rate": 4.331091898815242e-05, + "loss": 2.6525, + "step": 209900 + }, + { + "epoch": 0.13, + "learning_rate": 4.330771693884086e-05, + "loss": 2.6292, + "step": 210000 + }, + { + "epoch": 0.13, + "eval_loss": 2.5292112827301025, + "eval_runtime": 193.9899, + "eval_samples_per_second": 51.549, + "eval_steps_per_second": 3.222, + "step": 210000 + }, + { + "epoch": 0.13, + "learning_rate": 4.33045148895293e-05, + "loss": 2.646, + "step": 210100 + }, + { + "epoch": 0.13, + "learning_rate": 4.330131284021774e-05, + "loss": 2.617, + "step": 210200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3298110790906185e-05, + "loss": 2.6466, + "step": 210300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3294908741594624e-05, + "loss": 2.638, + "step": 210400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3291706692283064e-05, + "loss": 2.656, + "step": 210500 + }, + { + "epoch": 0.13, + "learning_rate": 4.3288504642971504e-05, + "loss": 2.6414, + "step": 210600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3285302593659944e-05, + "loss": 2.629, + "step": 210700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3282100544348383e-05, + "loss": 2.6291, + "step": 210800 + }, + { + "epoch": 0.13, + "learning_rate": 4.327889849503682e-05, + "loss": 2.6474, + "step": 210900 + }, + { + "epoch": 0.14, + "learning_rate": 4.327569644572527e-05, + "loss": 2.6521, + "step": 211000 + }, + { + "epoch": 0.14, + "eval_loss": 2.529031991958618, + "eval_runtime": 177.8606, + "eval_samples_per_second": 56.224, + "eval_steps_per_second": 3.514, + "step": 211000 + }, + { + "epoch": 0.14, + "learning_rate": 4.32724943964137e-05, + "loss": 2.6118, + "step": 211100 + }, + { + "epoch": 0.14, + "learning_rate": 4.326929234710215e-05, + "loss": 2.6543, + "step": 211200 + }, + { + "epoch": 0.14, + "learning_rate": 4.326609029779059e-05, + "loss": 2.6448, + "step": 211300 + }, + { + "epoch": 0.14, + "learning_rate": 4.326288824847903e-05, + "loss": 2.6423, + "step": 211400 + }, + { + "epoch": 0.14, + "learning_rate": 4.325968619916747e-05, + "loss": 2.6156, + "step": 211500 + }, + { + "epoch": 0.14, + "learning_rate": 4.325648414985591e-05, + "loss": 2.6427, + "step": 211600 + }, + { + "epoch": 0.14, + "learning_rate": 4.325328210054435e-05, + "loss": 2.6329, + "step": 211700 + }, + { + "epoch": 0.14, + "learning_rate": 4.325008005123279e-05, + "loss": 2.6331, + "step": 211800 + }, + { + "epoch": 0.14, + "learning_rate": 4.3246878001921234e-05, + "loss": 2.6276, + "step": 211900 + }, + { + "epoch": 0.14, + "learning_rate": 4.3243675952609674e-05, + "loss": 2.6377, + "step": 212000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5291922092437744, + "eval_runtime": 176.4283, + "eval_samples_per_second": 56.68, + "eval_steps_per_second": 3.543, + "step": 212000 + }, + { + "epoch": 0.14, + "learning_rate": 4.3240473903298114e-05, + "loss": 2.6265, + "step": 212100 + }, + { + "epoch": 0.14, + "learning_rate": 4.323727185398655e-05, + "loss": 2.6319, + "step": 212200 + }, + { + "epoch": 0.14, + "learning_rate": 4.323406980467499e-05, + "loss": 2.6235, + "step": 212300 + }, + { + "epoch": 0.14, + "learning_rate": 4.323086775536343e-05, + "loss": 2.6233, + "step": 212400 + }, + { + "epoch": 0.14, + "learning_rate": 4.322766570605187e-05, + "loss": 2.6506, + "step": 212500 + }, + { + "epoch": 0.14, + "learning_rate": 4.322446365674032e-05, + "loss": 2.6294, + "step": 212600 + }, + { + "epoch": 0.14, + "learning_rate": 4.322126160742876e-05, + "loss": 2.6288, + "step": 212700 + }, + { + "epoch": 0.14, + "learning_rate": 4.32180595581172e-05, + "loss": 2.6241, + "step": 212800 + }, + { + "epoch": 0.14, + "learning_rate": 4.321485750880564e-05, + "loss": 2.6348, + "step": 212900 + }, + { + "epoch": 0.14, + "learning_rate": 4.321165545949408e-05, + "loss": 2.6268, + "step": 213000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5289108753204346, + "eval_runtime": 173.9573, + "eval_samples_per_second": 57.485, + "eval_steps_per_second": 3.593, + "step": 213000 + }, + { + "epoch": 0.14, + "learning_rate": 4.320845341018252e-05, + "loss": 2.5975, + "step": 213100 + }, + { + "epoch": 0.14, + "learning_rate": 4.320525136087096e-05, + "loss": 2.6618, + "step": 213200 + }, + { + "epoch": 0.14, + "learning_rate": 4.32020493115594e-05, + "loss": 2.6427, + "step": 213300 + }, + { + "epoch": 0.14, + "learning_rate": 4.319884726224784e-05, + "loss": 2.6347, + "step": 213400 + }, + { + "epoch": 0.14, + "learning_rate": 4.3195645212936283e-05, + "loss": 2.6308, + "step": 213500 + }, + { + "epoch": 0.14, + "learning_rate": 4.319244316362472e-05, + "loss": 2.6144, + "step": 213600 + }, + { + "epoch": 0.14, + "learning_rate": 4.318924111431316e-05, + "loss": 2.6242, + "step": 213700 + }, + { + "epoch": 0.14, + "learning_rate": 4.31860390650016e-05, + "loss": 2.6297, + "step": 213800 + }, + { + "epoch": 0.14, + "learning_rate": 4.318283701569004e-05, + "loss": 2.6198, + "step": 213900 + }, + { + "epoch": 0.14, + "learning_rate": 4.317963496637848e-05, + "loss": 2.6436, + "step": 214000 + }, + { + "epoch": 0.14, + "eval_loss": 2.528165340423584, + "eval_runtime": 176.0011, + "eval_samples_per_second": 56.818, + "eval_steps_per_second": 3.551, + "step": 214000 + }, + { + "epoch": 0.14, + "learning_rate": 4.317643291706692e-05, + "loss": 2.6456, + "step": 214100 + }, + { + "epoch": 0.14, + "learning_rate": 4.317323086775537e-05, + "loss": 2.6205, + "step": 214200 + }, + { + "epoch": 0.14, + "learning_rate": 4.317002881844381e-05, + "loss": 2.6102, + "step": 214300 + }, + { + "epoch": 0.14, + "learning_rate": 4.316682676913225e-05, + "loss": 2.6279, + "step": 214400 + }, + { + "epoch": 0.14, + "learning_rate": 4.316362471982069e-05, + "loss": 2.6337, + "step": 214500 + }, + { + "epoch": 0.14, + "learning_rate": 4.316042267050913e-05, + "loss": 2.6296, + "step": 214600 + }, + { + "epoch": 0.14, + "learning_rate": 4.315722062119757e-05, + "loss": 2.6301, + "step": 214700 + }, + { + "epoch": 0.14, + "learning_rate": 4.315401857188601e-05, + "loss": 2.6218, + "step": 214800 + }, + { + "epoch": 0.14, + "learning_rate": 4.3150816522574447e-05, + "loss": 2.6434, + "step": 214900 + }, + { + "epoch": 0.14, + "learning_rate": 4.314761447326289e-05, + "loss": 2.61, + "step": 215000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5294289588928223, + "eval_runtime": 177.7074, + "eval_samples_per_second": 56.272, + "eval_steps_per_second": 3.517, + "step": 215000 + }, + { + "epoch": 0.14, + "learning_rate": 4.314441242395133e-05, + "loss": 2.6298, + "step": 215100 + }, + { + "epoch": 0.14, + "learning_rate": 4.314121037463977e-05, + "loss": 2.6189, + "step": 215200 + }, + { + "epoch": 0.14, + "learning_rate": 4.313800832532821e-05, + "loss": 2.596, + "step": 215300 + }, + { + "epoch": 0.14, + "learning_rate": 4.313480627601665e-05, + "loss": 2.6313, + "step": 215400 + }, + { + "epoch": 0.14, + "learning_rate": 4.313160422670509e-05, + "loss": 2.6104, + "step": 215500 + }, + { + "epoch": 0.14, + "learning_rate": 4.312840217739353e-05, + "loss": 2.6391, + "step": 215600 + }, + { + "epoch": 0.14, + "learning_rate": 4.312520012808198e-05, + "loss": 2.6356, + "step": 215700 + }, + { + "epoch": 0.14, + "learning_rate": 4.312199807877042e-05, + "loss": 2.6151, + "step": 215800 + }, + { + "epoch": 0.14, + "learning_rate": 4.311879602945886e-05, + "loss": 2.6279, + "step": 215900 + }, + { + "epoch": 0.14, + "learning_rate": 4.31155939801473e-05, + "loss": 2.6185, + "step": 216000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5280823707580566, + "eval_runtime": 176.0196, + "eval_samples_per_second": 56.812, + "eval_steps_per_second": 3.551, + "step": 216000 + }, + { + "epoch": 0.14, + "learning_rate": 4.311239193083574e-05, + "loss": 2.6091, + "step": 216100 + }, + { + "epoch": 0.14, + "learning_rate": 4.310918988152418e-05, + "loss": 2.6187, + "step": 216200 + }, + { + "epoch": 0.14, + "learning_rate": 4.3105987832212616e-05, + "loss": 2.6362, + "step": 216300 + }, + { + "epoch": 0.14, + "learning_rate": 4.3102785782901056e-05, + "loss": 2.617, + "step": 216400 + }, + { + "epoch": 0.14, + "learning_rate": 4.3099583733589496e-05, + "loss": 2.6403, + "step": 216500 + }, + { + "epoch": 0.14, + "learning_rate": 4.309638168427794e-05, + "loss": 2.6077, + "step": 216600 + }, + { + "epoch": 0.14, + "learning_rate": 4.309317963496638e-05, + "loss": 2.6381, + "step": 216700 + }, + { + "epoch": 0.14, + "learning_rate": 4.308997758565482e-05, + "loss": 2.6548, + "step": 216800 + }, + { + "epoch": 0.14, + "learning_rate": 4.308677553634326e-05, + "loss": 2.6257, + "step": 216900 + }, + { + "epoch": 0.14, + "learning_rate": 4.30835734870317e-05, + "loss": 2.6427, + "step": 217000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5252022743225098, + "eval_runtime": 173.6916, + "eval_samples_per_second": 57.573, + "eval_steps_per_second": 3.598, + "step": 217000 + }, + { + "epoch": 0.14, + "learning_rate": 4.308037143772014e-05, + "loss": 2.6324, + "step": 217100 + }, + { + "epoch": 0.14, + "learning_rate": 4.307716938840858e-05, + "loss": 2.62, + "step": 217200 + }, + { + "epoch": 0.14, + "learning_rate": 4.307396733909703e-05, + "loss": 2.6349, + "step": 217300 + }, + { + "epoch": 0.14, + "learning_rate": 4.307076528978547e-05, + "loss": 2.6162, + "step": 217400 + }, + { + "epoch": 0.14, + "learning_rate": 4.306756324047391e-05, + "loss": 2.6245, + "step": 217500 + }, + { + "epoch": 0.14, + "learning_rate": 4.306436119116235e-05, + "loss": 2.6265, + "step": 217600 + }, + { + "epoch": 0.14, + "learning_rate": 4.3061159141850786e-05, + "loss": 2.6246, + "step": 217700 + }, + { + "epoch": 0.14, + "learning_rate": 4.3057957092539226e-05, + "loss": 2.6485, + "step": 217800 + }, + { + "epoch": 0.14, + "learning_rate": 4.3054755043227666e-05, + "loss": 2.6113, + "step": 217900 + }, + { + "epoch": 0.14, + "learning_rate": 4.305155299391611e-05, + "loss": 2.6212, + "step": 218000 + }, + { + "epoch": 0.14, + "eval_loss": 2.527156352996826, + "eval_runtime": 176.014, + "eval_samples_per_second": 56.814, + "eval_steps_per_second": 3.551, + "step": 218000 + }, + { + "epoch": 0.14, + "learning_rate": 4.3048350944604545e-05, + "loss": 2.6248, + "step": 218100 + }, + { + "epoch": 0.14, + "learning_rate": 4.304514889529299e-05, + "loss": 2.6509, + "step": 218200 + }, + { + "epoch": 0.14, + "learning_rate": 4.304194684598143e-05, + "loss": 2.6011, + "step": 218300 + }, + { + "epoch": 0.14, + "learning_rate": 4.303874479666987e-05, + "loss": 2.6169, + "step": 218400 + }, + { + "epoch": 0.14, + "learning_rate": 4.303554274735831e-05, + "loss": 2.6246, + "step": 218500 + }, + { + "epoch": 0.14, + "learning_rate": 4.303234069804675e-05, + "loss": 2.6467, + "step": 218600 + }, + { + "epoch": 0.14, + "learning_rate": 4.302913864873519e-05, + "loss": 2.6275, + "step": 218700 + }, + { + "epoch": 0.14, + "learning_rate": 4.302593659942363e-05, + "loss": 2.6104, + "step": 218800 + }, + { + "epoch": 0.14, + "learning_rate": 4.302273455011208e-05, + "loss": 2.613, + "step": 218900 + }, + { + "epoch": 0.14, + "learning_rate": 4.3019532500800517e-05, + "loss": 2.64, + "step": 219000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5278069972991943, + "eval_runtime": 176.722, + "eval_samples_per_second": 56.586, + "eval_steps_per_second": 3.537, + "step": 219000 + }, + { + "epoch": 0.14, + "learning_rate": 4.3016330451488956e-05, + "loss": 2.6189, + "step": 219100 + }, + { + "epoch": 0.14, + "learning_rate": 4.3013128402177396e-05, + "loss": 2.6284, + "step": 219200 + }, + { + "epoch": 0.14, + "learning_rate": 4.3009926352865836e-05, + "loss": 2.6216, + "step": 219300 + }, + { + "epoch": 0.14, + "learning_rate": 4.3006724303554275e-05, + "loss": 2.6361, + "step": 219400 + }, + { + "epoch": 0.14, + "learning_rate": 4.3003522254242715e-05, + "loss": 2.6333, + "step": 219500 + }, + { + "epoch": 0.14, + "learning_rate": 4.300032020493116e-05, + "loss": 2.6239, + "step": 219600 + }, + { + "epoch": 0.14, + "learning_rate": 4.2997118155619595e-05, + "loss": 2.6211, + "step": 219700 + }, + { + "epoch": 0.14, + "learning_rate": 4.299391610630804e-05, + "loss": 2.635, + "step": 219800 + }, + { + "epoch": 0.14, + "learning_rate": 4.299071405699648e-05, + "loss": 2.6111, + "step": 219900 + }, + { + "epoch": 0.14, + "learning_rate": 4.298751200768492e-05, + "loss": 2.6169, + "step": 220000 + }, + { + "epoch": 0.14, + "eval_loss": 2.527263641357422, + "eval_runtime": 173.9107, + "eval_samples_per_second": 57.501, + "eval_steps_per_second": 3.594, + "step": 220000 + }, + { + "epoch": 0.14, + "learning_rate": 4.298430995837336e-05, + "loss": 2.595, + "step": 220100 + }, + { + "epoch": 0.14, + "learning_rate": 4.29811079090618e-05, + "loss": 2.6117, + "step": 220200 + }, + { + "epoch": 0.14, + "learning_rate": 4.297790585975025e-05, + "loss": 2.635, + "step": 220300 + }, + { + "epoch": 0.14, + "learning_rate": 4.297470381043868e-05, + "loss": 2.6172, + "step": 220400 + }, + { + "epoch": 0.14, + "learning_rate": 4.2971501761127126e-05, + "loss": 2.6418, + "step": 220500 + }, + { + "epoch": 0.14, + "learning_rate": 4.2968299711815566e-05, + "loss": 2.6069, + "step": 220600 + }, + { + "epoch": 0.14, + "learning_rate": 4.2965097662504006e-05, + "loss": 2.6341, + "step": 220700 + }, + { + "epoch": 0.14, + "learning_rate": 4.2961895613192445e-05, + "loss": 2.6143, + "step": 220800 + }, + { + "epoch": 0.14, + "learning_rate": 4.2958693563880885e-05, + "loss": 2.6295, + "step": 220900 + }, + { + "epoch": 0.14, + "learning_rate": 4.2955491514569325e-05, + "loss": 2.6249, + "step": 221000 + }, + { + "epoch": 0.14, + "eval_loss": 2.526632308959961, + "eval_runtime": 178.088, + "eval_samples_per_second": 56.152, + "eval_steps_per_second": 3.51, + "step": 221000 + }, + { + "epoch": 0.14, + "learning_rate": 4.2952289465257765e-05, + "loss": 2.614, + "step": 221100 + }, + { + "epoch": 0.14, + "learning_rate": 4.294908741594621e-05, + "loss": 2.6068, + "step": 221200 + }, + { + "epoch": 0.14, + "learning_rate": 4.2945885366634644e-05, + "loss": 2.6442, + "step": 221300 + }, + { + "epoch": 0.14, + "learning_rate": 4.294268331732309e-05, + "loss": 2.6008, + "step": 221400 + }, + { + "epoch": 0.14, + "learning_rate": 4.293948126801153e-05, + "loss": 2.6161, + "step": 221500 + }, + { + "epoch": 0.14, + "learning_rate": 4.293627921869997e-05, + "loss": 2.6089, + "step": 221600 + }, + { + "epoch": 0.14, + "learning_rate": 4.293307716938841e-05, + "loss": 2.6385, + "step": 221700 + }, + { + "epoch": 0.14, + "learning_rate": 4.292987512007685e-05, + "loss": 2.6166, + "step": 221800 + }, + { + "epoch": 0.14, + "learning_rate": 4.2926673070765296e-05, + "loss": 2.6013, + "step": 221900 + }, + { + "epoch": 0.14, + "learning_rate": 4.292347102145373e-05, + "loss": 2.6103, + "step": 222000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5263125896453857, + "eval_runtime": 178.1295, + "eval_samples_per_second": 56.139, + "eval_steps_per_second": 3.509, + "step": 222000 + }, + { + "epoch": 0.14, + "learning_rate": 4.2920268972142176e-05, + "loss": 2.62, + "step": 222100 + }, + { + "epoch": 0.14, + "learning_rate": 4.2917066922830615e-05, + "loss": 2.6053, + "step": 222200 + }, + { + "epoch": 0.14, + "learning_rate": 4.2913864873519055e-05, + "loss": 2.6139, + "step": 222300 + }, + { + "epoch": 0.14, + "learning_rate": 4.2910662824207495e-05, + "loss": 2.617, + "step": 222400 + }, + { + "epoch": 0.14, + "learning_rate": 4.2907460774895934e-05, + "loss": 2.6101, + "step": 222500 + }, + { + "epoch": 0.14, + "learning_rate": 4.290425872558438e-05, + "loss": 2.6292, + "step": 222600 + }, + { + "epoch": 0.14, + "learning_rate": 4.2901056676272814e-05, + "loss": 2.608, + "step": 222700 + }, + { + "epoch": 0.14, + "learning_rate": 4.289785462696126e-05, + "loss": 2.6042, + "step": 222800 + }, + { + "epoch": 0.14, + "learning_rate": 4.2894652577649693e-05, + "loss": 2.5969, + "step": 222900 + }, + { + "epoch": 0.14, + "learning_rate": 4.289145052833814e-05, + "loss": 2.6138, + "step": 223000 + }, + { + "epoch": 0.14, + "eval_loss": 2.525189161300659, + "eval_runtime": 174.7154, + "eval_samples_per_second": 57.236, + "eval_steps_per_second": 3.577, + "step": 223000 + }, + { + "epoch": 0.14, + "learning_rate": 4.288824847902658e-05, + "loss": 2.6086, + "step": 223100 + }, + { + "epoch": 0.14, + "learning_rate": 4.288504642971502e-05, + "loss": 2.6301, + "step": 223200 + }, + { + "epoch": 0.14, + "learning_rate": 4.288184438040346e-05, + "loss": 2.6091, + "step": 223300 + }, + { + "epoch": 0.14, + "learning_rate": 4.28786423310919e-05, + "loss": 2.6136, + "step": 223400 + }, + { + "epoch": 0.14, + "learning_rate": 4.2875440281780345e-05, + "loss": 2.5998, + "step": 223500 + }, + { + "epoch": 0.14, + "learning_rate": 4.287223823246878e-05, + "loss": 2.6226, + "step": 223600 + }, + { + "epoch": 0.14, + "learning_rate": 4.2869036183157225e-05, + "loss": 2.6109, + "step": 223700 + }, + { + "epoch": 0.14, + "learning_rate": 4.2865834133845665e-05, + "loss": 2.633, + "step": 223800 + }, + { + "epoch": 0.14, + "learning_rate": 4.2862632084534104e-05, + "loss": 2.6114, + "step": 223900 + }, + { + "epoch": 0.14, + "learning_rate": 4.2859430035222544e-05, + "loss": 2.6099, + "step": 224000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5254721641540527, + "eval_runtime": 176.1995, + "eval_samples_per_second": 56.754, + "eval_steps_per_second": 3.547, + "step": 224000 + }, + { + "epoch": 0.14, + "learning_rate": 4.2856227985910984e-05, + "loss": 2.6315, + "step": 224100 + }, + { + "epoch": 0.14, + "learning_rate": 4.285302593659943e-05, + "loss": 2.6034, + "step": 224200 + }, + { + "epoch": 0.14, + "learning_rate": 4.284982388728786e-05, + "loss": 2.5927, + "step": 224300 + }, + { + "epoch": 0.14, + "learning_rate": 4.284662183797631e-05, + "loss": 2.6138, + "step": 224400 + }, + { + "epoch": 0.14, + "learning_rate": 4.284341978866474e-05, + "loss": 2.6295, + "step": 224500 + }, + { + "epoch": 0.14, + "learning_rate": 4.284021773935319e-05, + "loss": 2.6111, + "step": 224600 + }, + { + "epoch": 0.14, + "learning_rate": 4.283701569004163e-05, + "loss": 2.6108, + "step": 224700 + }, + { + "epoch": 0.14, + "learning_rate": 4.283381364073007e-05, + "loss": 2.6103, + "step": 224800 + }, + { + "epoch": 0.14, + "learning_rate": 4.2830611591418515e-05, + "loss": 2.6197, + "step": 224900 + }, + { + "epoch": 0.14, + "learning_rate": 4.282740954210695e-05, + "loss": 2.6191, + "step": 225000 + }, + { + "epoch": 0.14, + "eval_loss": 2.5249056816101074, + "eval_runtime": 176.5788, + "eval_samples_per_second": 56.632, + "eval_steps_per_second": 3.539, + "step": 225000 + }, + { + "epoch": 0.14, + "learning_rate": 4.2824207492795395e-05, + "loss": 2.6135, + "step": 225100 + }, + { + "epoch": 0.14, + "learning_rate": 4.282100544348383e-05, + "loss": 2.6273, + "step": 225200 + }, + { + "epoch": 0.14, + "learning_rate": 4.2817803394172274e-05, + "loss": 2.6085, + "step": 225300 + }, + { + "epoch": 0.14, + "learning_rate": 4.2814601344860714e-05, + "loss": 2.6303, + "step": 225400 + }, + { + "epoch": 0.14, + "learning_rate": 4.2811399295549154e-05, + "loss": 2.609, + "step": 225500 + }, + { + "epoch": 0.14, + "learning_rate": 4.2808197246237594e-05, + "loss": 2.6163, + "step": 225600 + }, + { + "epoch": 0.14, + "learning_rate": 4.280499519692603e-05, + "loss": 2.625, + "step": 225700 + }, + { + "epoch": 0.14, + "learning_rate": 4.280179314761448e-05, + "loss": 2.6277, + "step": 225800 + }, + { + "epoch": 0.14, + "learning_rate": 4.279859109830291e-05, + "loss": 2.628, + "step": 225900 + }, + { + "epoch": 0.14, + "learning_rate": 4.279538904899136e-05, + "loss": 2.6122, + "step": 226000 + }, + { + "epoch": 0.14, + "eval_loss": 2.527702808380127, + "eval_runtime": 179.746, + "eval_samples_per_second": 55.634, + "eval_steps_per_second": 3.477, + "step": 226000 + }, + { + "epoch": 0.14, + "learning_rate": 4.279218699967979e-05, + "loss": 2.6287, + "step": 226100 + }, + { + "epoch": 0.14, + "learning_rate": 4.278898495036824e-05, + "loss": 2.6267, + "step": 226200 + }, + { + "epoch": 0.14, + "learning_rate": 4.278578290105668e-05, + "loss": 2.6106, + "step": 226300 + }, + { + "epoch": 0.14, + "learning_rate": 4.278258085174512e-05, + "loss": 2.6114, + "step": 226400 + }, + { + "epoch": 0.14, + "learning_rate": 4.2779378802433565e-05, + "loss": 2.6216, + "step": 226500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2776176753122e-05, + "loss": 2.6045, + "step": 226600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2772974703810444e-05, + "loss": 2.6172, + "step": 226700 + }, + { + "epoch": 0.15, + "learning_rate": 4.276977265449888e-05, + "loss": 2.6315, + "step": 226800 + }, + { + "epoch": 0.15, + "learning_rate": 4.2766570605187324e-05, + "loss": 2.6167, + "step": 226900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2763368555875763e-05, + "loss": 2.6426, + "step": 227000 + }, + { + "epoch": 0.15, + "eval_loss": 2.525062084197998, + "eval_runtime": 177.7314, + "eval_samples_per_second": 56.265, + "eval_steps_per_second": 3.517, + "step": 227000 + }, + { + "epoch": 0.15, + "learning_rate": 4.27601665065642e-05, + "loss": 2.6213, + "step": 227100 + }, + { + "epoch": 0.15, + "learning_rate": 4.275696445725264e-05, + "loss": 2.6122, + "step": 227200 + }, + { + "epoch": 0.15, + "learning_rate": 4.275376240794108e-05, + "loss": 2.6104, + "step": 227300 + }, + { + "epoch": 0.15, + "learning_rate": 4.275056035862953e-05, + "loss": 2.6016, + "step": 227400 + }, + { + "epoch": 0.15, + "learning_rate": 4.274735830931796e-05, + "loss": 2.5993, + "step": 227500 + }, + { + "epoch": 0.15, + "learning_rate": 4.274415626000641e-05, + "loss": 2.6125, + "step": 227600 + }, + { + "epoch": 0.15, + "learning_rate": 4.274095421069484e-05, + "loss": 2.5991, + "step": 227700 + }, + { + "epoch": 0.15, + "learning_rate": 4.273775216138329e-05, + "loss": 2.619, + "step": 227800 + }, + { + "epoch": 0.15, + "learning_rate": 4.273455011207173e-05, + "loss": 2.6267, + "step": 227900 + }, + { + "epoch": 0.15, + "learning_rate": 4.273134806276017e-05, + "loss": 2.613, + "step": 228000 + }, + { + "epoch": 0.15, + "eval_loss": 2.52404522895813, + "eval_runtime": 176.0414, + "eval_samples_per_second": 56.805, + "eval_steps_per_second": 3.55, + "step": 228000 + }, + { + "epoch": 0.15, + "learning_rate": 4.2728146013448614e-05, + "loss": 2.626, + "step": 228100 + }, + { + "epoch": 0.15, + "learning_rate": 4.272494396413705e-05, + "loss": 2.6154, + "step": 228200 + }, + { + "epoch": 0.15, + "learning_rate": 4.2721741914825494e-05, + "loss": 2.6089, + "step": 228300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2718539865513927e-05, + "loss": 2.6217, + "step": 228400 + }, + { + "epoch": 0.15, + "learning_rate": 4.271533781620237e-05, + "loss": 2.6165, + "step": 228500 + }, + { + "epoch": 0.15, + "learning_rate": 4.271213576689081e-05, + "loss": 2.6223, + "step": 228600 + }, + { + "epoch": 0.15, + "learning_rate": 4.270893371757925e-05, + "loss": 2.6154, + "step": 228700 + }, + { + "epoch": 0.15, + "learning_rate": 4.270573166826769e-05, + "loss": 2.6102, + "step": 228800 + }, + { + "epoch": 0.15, + "learning_rate": 4.270252961895613e-05, + "loss": 2.631, + "step": 228900 + }, + { + "epoch": 0.15, + "learning_rate": 4.269932756964458e-05, + "loss": 2.6001, + "step": 229000 + }, + { + "epoch": 0.15, + "eval_loss": 2.5241382122039795, + "eval_runtime": 176.3416, + "eval_samples_per_second": 56.708, + "eval_steps_per_second": 3.544, + "step": 229000 + }, + { + "epoch": 0.15, + "learning_rate": 4.269612552033301e-05, + "loss": 2.5996, + "step": 229100 + }, + { + "epoch": 0.15, + "learning_rate": 4.269292347102146e-05, + "loss": 2.6287, + "step": 229200 + }, + { + "epoch": 0.15, + "learning_rate": 4.268972142170989e-05, + "loss": 2.6207, + "step": 229300 + }, + { + "epoch": 0.15, + "learning_rate": 4.268651937239834e-05, + "loss": 2.6177, + "step": 229400 + }, + { + "epoch": 0.15, + "learning_rate": 4.268331732308678e-05, + "loss": 2.6006, + "step": 229500 + }, + { + "epoch": 0.15, + "learning_rate": 4.268011527377522e-05, + "loss": 2.6149, + "step": 229600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2676913224463663e-05, + "loss": 2.6005, + "step": 229700 + }, + { + "epoch": 0.15, + "learning_rate": 4.2673711175152096e-05, + "loss": 2.6295, + "step": 229800 + }, + { + "epoch": 0.15, + "learning_rate": 4.267050912584054e-05, + "loss": 2.6113, + "step": 229900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2667307076528976e-05, + "loss": 2.6262, + "step": 230000 + }, + { + "epoch": 0.15, + "eval_loss": 2.5240397453308105, + "eval_runtime": 177.6269, + "eval_samples_per_second": 56.298, + "eval_steps_per_second": 3.519, + "step": 230000 + }, + { + "epoch": 0.15, + "learning_rate": 4.266410502721742e-05, + "loss": 2.6225, + "step": 230100 + }, + { + "epoch": 0.15, + "learning_rate": 4.266090297790586e-05, + "loss": 2.6236, + "step": 230200 + }, + { + "epoch": 0.15, + "learning_rate": 4.26577009285943e-05, + "loss": 2.6117, + "step": 230300 + }, + { + "epoch": 0.15, + "learning_rate": 4.265449887928274e-05, + "loss": 2.6258, + "step": 230400 + }, + { + "epoch": 0.15, + "learning_rate": 4.265129682997118e-05, + "loss": 2.6132, + "step": 230500 + }, + { + "epoch": 0.15, + "learning_rate": 4.264809478065963e-05, + "loss": 2.6055, + "step": 230600 + }, + { + "epoch": 0.15, + "learning_rate": 4.264489273134806e-05, + "loss": 2.6217, + "step": 230700 + }, + { + "epoch": 0.15, + "learning_rate": 4.264169068203651e-05, + "loss": 2.6134, + "step": 230800 + }, + { + "epoch": 0.15, + "learning_rate": 4.263848863272494e-05, + "loss": 2.6186, + "step": 230900 + }, + { + "epoch": 0.15, + "learning_rate": 4.263528658341339e-05, + "loss": 2.6124, + "step": 231000 + }, + { + "epoch": 0.15, + "eval_loss": 2.522747755050659, + "eval_runtime": 177.0914, + "eval_samples_per_second": 56.468, + "eval_steps_per_second": 3.529, + "step": 231000 + }, + { + "epoch": 0.15, + "learning_rate": 4.2632084534101827e-05, + "loss": 2.6188, + "step": 231100 + }, + { + "epoch": 0.15, + "learning_rate": 4.2628882484790266e-05, + "loss": 2.6348, + "step": 231200 + }, + { + "epoch": 0.15, + "learning_rate": 4.262568043547871e-05, + "loss": 2.628, + "step": 231300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2622478386167146e-05, + "loss": 2.6006, + "step": 231400 + }, + { + "epoch": 0.15, + "learning_rate": 4.261927633685559e-05, + "loss": 2.6075, + "step": 231500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2616074287544025e-05, + "loss": 2.6141, + "step": 231600 + }, + { + "epoch": 0.15, + "learning_rate": 4.261287223823247e-05, + "loss": 2.6051, + "step": 231700 + }, + { + "epoch": 0.15, + "learning_rate": 4.260967018892091e-05, + "loss": 2.6123, + "step": 231800 + }, + { + "epoch": 0.15, + "learning_rate": 4.260646813960935e-05, + "loss": 2.6122, + "step": 231900 + }, + { + "epoch": 0.15, + "learning_rate": 4.260326609029779e-05, + "loss": 2.6129, + "step": 232000 + }, + { + "epoch": 0.15, + "eval_loss": 2.5222604274749756, + "eval_runtime": 175.6813, + "eval_samples_per_second": 56.921, + "eval_steps_per_second": 3.558, + "step": 232000 + }, + { + "epoch": 0.15, + "learning_rate": 4.260006404098623e-05, + "loss": 2.6264, + "step": 232100 + }, + { + "epoch": 0.15, + "learning_rate": 4.259686199167468e-05, + "loss": 2.6076, + "step": 232200 + }, + { + "epoch": 0.15, + "learning_rate": 4.259365994236311e-05, + "loss": 2.6195, + "step": 232300 + }, + { + "epoch": 0.15, + "learning_rate": 4.259045789305156e-05, + "loss": 2.6187, + "step": 232400 + }, + { + "epoch": 0.15, + "learning_rate": 4.2587255843739996e-05, + "loss": 2.6214, + "step": 232500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2584053794428436e-05, + "loss": 2.6129, + "step": 232600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2580851745116876e-05, + "loss": 2.6233, + "step": 232700 + }, + { + "epoch": 0.15, + "learning_rate": 4.2577649695805316e-05, + "loss": 2.61, + "step": 232800 + }, + { + "epoch": 0.15, + "learning_rate": 4.257444764649376e-05, + "loss": 2.6174, + "step": 232900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2571245597182195e-05, + "loss": 2.6273, + "step": 233000 + }, + { + "epoch": 0.15, + "eval_loss": 2.52047061920166, + "eval_runtime": 175.8957, + "eval_samples_per_second": 56.852, + "eval_steps_per_second": 3.553, + "step": 233000 + }, + { + "epoch": 0.15, + "learning_rate": 4.256804354787064e-05, + "loss": 2.6267, + "step": 233100 + }, + { + "epoch": 0.15, + "learning_rate": 4.2564841498559075e-05, + "loss": 2.6121, + "step": 233200 + }, + { + "epoch": 0.15, + "learning_rate": 4.256163944924752e-05, + "loss": 2.6026, + "step": 233300 + }, + { + "epoch": 0.15, + "learning_rate": 4.255843739993596e-05, + "loss": 2.6381, + "step": 233400 + }, + { + "epoch": 0.15, + "learning_rate": 4.25552353506244e-05, + "loss": 2.6196, + "step": 233500 + }, + { + "epoch": 0.15, + "learning_rate": 4.255203330131284e-05, + "loss": 2.6094, + "step": 233600 + }, + { + "epoch": 0.15, + "learning_rate": 4.254883125200128e-05, + "loss": 2.6105, + "step": 233700 + }, + { + "epoch": 0.15, + "learning_rate": 4.254562920268973e-05, + "loss": 2.6103, + "step": 233800 + }, + { + "epoch": 0.15, + "learning_rate": 4.254242715337816e-05, + "loss": 2.6288, + "step": 233900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2539225104066606e-05, + "loss": 2.6153, + "step": 234000 + }, + { + "epoch": 0.15, + "eval_loss": 2.5217788219451904, + "eval_runtime": 178.8277, + "eval_samples_per_second": 55.92, + "eval_steps_per_second": 3.495, + "step": 234000 + }, + { + "epoch": 0.15, + "learning_rate": 4.2536023054755046e-05, + "loss": 2.6037, + "step": 234100 + }, + { + "epoch": 0.15, + "learning_rate": 4.2532821005443486e-05, + "loss": 2.6315, + "step": 234200 + }, + { + "epoch": 0.15, + "learning_rate": 4.2529618956131925e-05, + "loss": 2.6008, + "step": 234300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2526416906820365e-05, + "loss": 2.6132, + "step": 234400 + }, + { + "epoch": 0.15, + "learning_rate": 4.252321485750881e-05, + "loss": 2.5825, + "step": 234500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2520012808197245e-05, + "loss": 2.6072, + "step": 234600 + }, + { + "epoch": 0.15, + "learning_rate": 4.251681075888569e-05, + "loss": 2.6044, + "step": 234700 + }, + { + "epoch": 0.15, + "learning_rate": 4.251360870957413e-05, + "loss": 2.6369, + "step": 234800 + }, + { + "epoch": 0.15, + "learning_rate": 4.251040666026257e-05, + "loss": 2.6158, + "step": 234900 + }, + { + "epoch": 0.15, + "learning_rate": 4.250720461095101e-05, + "loss": 2.6374, + "step": 235000 + }, + { + "epoch": 0.15, + "eval_loss": 2.521732807159424, + "eval_runtime": 176.1112, + "eval_samples_per_second": 56.782, + "eval_steps_per_second": 3.549, + "step": 235000 + }, + { + "epoch": 0.15, + "learning_rate": 4.250400256163945e-05, + "loss": 2.6234, + "step": 235100 + }, + { + "epoch": 0.15, + "learning_rate": 4.250080051232789e-05, + "loss": 2.6115, + "step": 235200 + }, + { + "epoch": 0.15, + "learning_rate": 4.249759846301633e-05, + "loss": 2.6079, + "step": 235300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2494396413704776e-05, + "loss": 2.637, + "step": 235400 + }, + { + "epoch": 0.15, + "learning_rate": 4.2491194364393216e-05, + "loss": 2.6268, + "step": 235500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2487992315081655e-05, + "loss": 2.6055, + "step": 235600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2484790265770095e-05, + "loss": 2.5941, + "step": 235700 + }, + { + "epoch": 0.15, + "learning_rate": 4.2481588216458535e-05, + "loss": 2.6161, + "step": 235800 + }, + { + "epoch": 0.15, + "learning_rate": 4.2478386167146975e-05, + "loss": 2.5807, + "step": 235900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2475184117835414e-05, + "loss": 2.6111, + "step": 236000 + }, + { + "epoch": 0.15, + "eval_loss": 2.522373676300049, + "eval_runtime": 177.1075, + "eval_samples_per_second": 56.463, + "eval_steps_per_second": 3.529, + "step": 236000 + }, + { + "epoch": 0.15, + "learning_rate": 4.247198206852386e-05, + "loss": 2.6346, + "step": 236100 + }, + { + "epoch": 0.15, + "learning_rate": 4.2468780019212294e-05, + "loss": 2.6172, + "step": 236200 + }, + { + "epoch": 0.15, + "learning_rate": 4.246557796990074e-05, + "loss": 2.6092, + "step": 236300 + }, + { + "epoch": 0.15, + "learning_rate": 4.246237592058918e-05, + "loss": 2.619, + "step": 236400 + }, + { + "epoch": 0.15, + "learning_rate": 4.245917387127762e-05, + "loss": 2.6073, + "step": 236500 + }, + { + "epoch": 0.15, + "learning_rate": 4.245597182196606e-05, + "loss": 2.626, + "step": 236600 + }, + { + "epoch": 0.15, + "learning_rate": 4.24527697726545e-05, + "loss": 2.6131, + "step": 236700 + }, + { + "epoch": 0.15, + "learning_rate": 4.244956772334294e-05, + "loss": 2.5899, + "step": 236800 + }, + { + "epoch": 0.15, + "learning_rate": 4.244636567403138e-05, + "loss": 2.5973, + "step": 236900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2443163624719825e-05, + "loss": 2.6271, + "step": 237000 + }, + { + "epoch": 0.15, + "eval_loss": 2.5218992233276367, + "eval_runtime": 177.3058, + "eval_samples_per_second": 56.4, + "eval_steps_per_second": 3.525, + "step": 237000 + }, + { + "epoch": 0.15, + "learning_rate": 4.2439961575408265e-05, + "loss": 2.6092, + "step": 237100 + }, + { + "epoch": 0.15, + "learning_rate": 4.2436759526096705e-05, + "loss": 2.601, + "step": 237200 + }, + { + "epoch": 0.15, + "learning_rate": 4.2433557476785145e-05, + "loss": 2.618, + "step": 237300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2430355427473584e-05, + "loss": 2.6241, + "step": 237400 + }, + { + "epoch": 0.15, + "learning_rate": 4.2427153378162024e-05, + "loss": 2.6135, + "step": 237500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2423951328850464e-05, + "loss": 2.6208, + "step": 237600 + }, + { + "epoch": 0.15, + "learning_rate": 4.242074927953891e-05, + "loss": 2.6056, + "step": 237700 + }, + { + "epoch": 0.15, + "learning_rate": 4.241754723022735e-05, + "loss": 2.6001, + "step": 237800 + }, + { + "epoch": 0.15, + "learning_rate": 4.241434518091579e-05, + "loss": 2.6226, + "step": 237900 + }, + { + "epoch": 0.15, + "learning_rate": 4.241114313160423e-05, + "loss": 2.6156, + "step": 238000 + }, + { + "epoch": 0.15, + "eval_loss": 2.519456624984741, + "eval_runtime": 177.7406, + "eval_samples_per_second": 56.262, + "eval_steps_per_second": 3.516, + "step": 238000 + }, + { + "epoch": 0.15, + "learning_rate": 4.240794108229267e-05, + "loss": 2.6083, + "step": 238100 + }, + { + "epoch": 0.15, + "learning_rate": 4.240473903298111e-05, + "loss": 2.6022, + "step": 238200 + }, + { + "epoch": 0.15, + "learning_rate": 4.240153698366955e-05, + "loss": 2.6034, + "step": 238300 + }, + { + "epoch": 0.15, + "learning_rate": 4.239833493435799e-05, + "loss": 2.6088, + "step": 238400 + }, + { + "epoch": 0.15, + "learning_rate": 4.239513288504643e-05, + "loss": 2.6109, + "step": 238500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2391930835734875e-05, + "loss": 2.5811, + "step": 238600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2388728786423314e-05, + "loss": 2.61, + "step": 238700 + }, + { + "epoch": 0.15, + "learning_rate": 4.2385526737111754e-05, + "loss": 2.6104, + "step": 238800 + }, + { + "epoch": 0.15, + "learning_rate": 4.2382324687800194e-05, + "loss": 2.6341, + "step": 238900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2379122638488634e-05, + "loss": 2.6085, + "step": 239000 + }, + { + "epoch": 0.15, + "eval_loss": 2.521357536315918, + "eval_runtime": 180.2145, + "eval_samples_per_second": 55.489, + "eval_steps_per_second": 3.468, + "step": 239000 + }, + { + "epoch": 0.15, + "learning_rate": 4.2375920589177073e-05, + "loss": 2.6155, + "step": 239100 + }, + { + "epoch": 0.15, + "learning_rate": 4.237271853986551e-05, + "loss": 2.62, + "step": 239200 + }, + { + "epoch": 0.15, + "learning_rate": 4.236951649055396e-05, + "loss": 2.6162, + "step": 239300 + }, + { + "epoch": 0.15, + "learning_rate": 4.23663144412424e-05, + "loss": 2.6319, + "step": 239400 + }, + { + "epoch": 0.15, + "learning_rate": 4.236311239193084e-05, + "loss": 2.5933, + "step": 239500 + }, + { + "epoch": 0.15, + "learning_rate": 4.235991034261928e-05, + "loss": 2.6053, + "step": 239600 + }, + { + "epoch": 0.15, + "learning_rate": 4.235670829330772e-05, + "loss": 2.6177, + "step": 239700 + }, + { + "epoch": 0.15, + "learning_rate": 4.235350624399616e-05, + "loss": 2.6148, + "step": 239800 + }, + { + "epoch": 0.15, + "learning_rate": 4.23503041946846e-05, + "loss": 2.6012, + "step": 239900 + }, + { + "epoch": 0.15, + "learning_rate": 4.234710214537304e-05, + "loss": 2.6172, + "step": 240000 + }, + { + "epoch": 0.15, + "eval_loss": 2.522383451461792, + "eval_runtime": 176.722, + "eval_samples_per_second": 56.586, + "eval_steps_per_second": 3.537, + "step": 240000 + }, + { + "epoch": 0.15, + "learning_rate": 4.2343900096061484e-05, + "loss": 2.6025, + "step": 240100 + }, + { + "epoch": 0.15, + "learning_rate": 4.2340698046749924e-05, + "loss": 2.6058, + "step": 240200 + }, + { + "epoch": 0.15, + "learning_rate": 4.2337495997438364e-05, + "loss": 2.5907, + "step": 240300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2334293948126804e-05, + "loss": 2.6172, + "step": 240400 + }, + { + "epoch": 0.15, + "learning_rate": 4.233109189881524e-05, + "loss": 2.6035, + "step": 240500 + }, + { + "epoch": 0.15, + "learning_rate": 4.232788984950368e-05, + "loss": 2.6052, + "step": 240600 + }, + { + "epoch": 0.15, + "learning_rate": 4.232468780019212e-05, + "loss": 2.5891, + "step": 240700 + }, + { + "epoch": 0.15, + "learning_rate": 4.232148575088056e-05, + "loss": 2.6134, + "step": 240800 + }, + { + "epoch": 0.15, + "learning_rate": 4.231828370156901e-05, + "loss": 2.6096, + "step": 240900 + }, + { + "epoch": 0.15, + "learning_rate": 4.231508165225745e-05, + "loss": 2.6041, + "step": 241000 + }, + { + "epoch": 0.15, + "eval_loss": 2.519827127456665, + "eval_runtime": 173.9651, + "eval_samples_per_second": 57.483, + "eval_steps_per_second": 3.593, + "step": 241000 + }, + { + "epoch": 0.15, + "learning_rate": 4.231187960294589e-05, + "loss": 2.6267, + "step": 241100 + }, + { + "epoch": 0.15, + "learning_rate": 4.230867755363433e-05, + "loss": 2.6165, + "step": 241200 + }, + { + "epoch": 0.15, + "learning_rate": 4.230547550432277e-05, + "loss": 2.5852, + "step": 241300 + }, + { + "epoch": 0.15, + "learning_rate": 4.230227345501121e-05, + "loss": 2.5852, + "step": 241400 + }, + { + "epoch": 0.15, + "learning_rate": 4.229907140569965e-05, + "loss": 2.6022, + "step": 241500 + }, + { + "epoch": 0.15, + "learning_rate": 4.229586935638809e-05, + "loss": 2.6135, + "step": 241600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2292667307076534e-05, + "loss": 2.6242, + "step": 241700 + }, + { + "epoch": 0.15, + "learning_rate": 4.2289465257764973e-05, + "loss": 2.6085, + "step": 241800 + }, + { + "epoch": 0.15, + "learning_rate": 4.228626320845341e-05, + "loss": 2.6124, + "step": 241900 + }, + { + "epoch": 0.15, + "learning_rate": 4.228306115914185e-05, + "loss": 2.6433, + "step": 242000 + }, + { + "epoch": 0.15, + "eval_loss": 2.5190274715423584, + "eval_runtime": 176.3523, + "eval_samples_per_second": 56.705, + "eval_steps_per_second": 3.544, + "step": 242000 + }, + { + "epoch": 0.15, + "learning_rate": 4.227985910983029e-05, + "loss": 2.6167, + "step": 242100 + }, + { + "epoch": 0.16, + "learning_rate": 4.227665706051873e-05, + "loss": 2.5979, + "step": 242200 + }, + { + "epoch": 0.16, + "learning_rate": 4.227345501120717e-05, + "loss": 2.6175, + "step": 242300 + }, + { + "epoch": 0.16, + "learning_rate": 4.227025296189562e-05, + "loss": 2.6009, + "step": 242400 + }, + { + "epoch": 0.16, + "learning_rate": 4.226705091258406e-05, + "loss": 2.5928, + "step": 242500 + }, + { + "epoch": 0.16, + "learning_rate": 4.22638488632725e-05, + "loss": 2.6029, + "step": 242600 + }, + { + "epoch": 0.16, + "learning_rate": 4.226064681396094e-05, + "loss": 2.6182, + "step": 242700 + }, + { + "epoch": 0.16, + "learning_rate": 4.225744476464938e-05, + "loss": 2.6158, + "step": 242800 + }, + { + "epoch": 0.16, + "learning_rate": 4.225424271533782e-05, + "loss": 2.6035, + "step": 242900 + }, + { + "epoch": 0.16, + "learning_rate": 4.225104066602626e-05, + "loss": 2.6024, + "step": 243000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5197348594665527, + "eval_runtime": 178.619, + "eval_samples_per_second": 55.985, + "eval_steps_per_second": 3.499, + "step": 243000 + }, + { + "epoch": 0.16, + "learning_rate": 4.22478386167147e-05, + "loss": 2.5982, + "step": 243100 + }, + { + "epoch": 0.16, + "learning_rate": 4.2244636567403137e-05, + "loss": 2.6126, + "step": 243200 + }, + { + "epoch": 0.16, + "learning_rate": 4.224143451809158e-05, + "loss": 2.6022, + "step": 243300 + }, + { + "epoch": 0.16, + "learning_rate": 4.223823246878002e-05, + "loss": 2.6159, + "step": 243400 + }, + { + "epoch": 0.16, + "learning_rate": 4.223503041946846e-05, + "loss": 2.602, + "step": 243500 + }, + { + "epoch": 0.16, + "learning_rate": 4.22318283701569e-05, + "loss": 2.5958, + "step": 243600 + }, + { + "epoch": 0.16, + "learning_rate": 4.222862632084534e-05, + "loss": 2.5933, + "step": 243700 + }, + { + "epoch": 0.16, + "learning_rate": 4.222542427153378e-05, + "loss": 2.6133, + "step": 243800 + }, + { + "epoch": 0.16, + "learning_rate": 4.222222222222222e-05, + "loss": 2.5952, + "step": 243900 + }, + { + "epoch": 0.16, + "learning_rate": 4.221902017291067e-05, + "loss": 2.6234, + "step": 244000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5188870429992676, + "eval_runtime": 176.9188, + "eval_samples_per_second": 56.523, + "eval_steps_per_second": 3.533, + "step": 244000 + }, + { + "epoch": 0.16, + "learning_rate": 4.221581812359911e-05, + "loss": 2.5892, + "step": 244100 + }, + { + "epoch": 0.16, + "learning_rate": 4.221261607428755e-05, + "loss": 2.6068, + "step": 244200 + }, + { + "epoch": 0.16, + "learning_rate": 4.220941402497599e-05, + "loss": 2.599, + "step": 244300 + }, + { + "epoch": 0.16, + "learning_rate": 4.220621197566443e-05, + "loss": 2.6054, + "step": 244400 + }, + { + "epoch": 0.16, + "learning_rate": 4.220300992635287e-05, + "loss": 2.6022, + "step": 244500 + }, + { + "epoch": 0.16, + "learning_rate": 4.2199807877041306e-05, + "loss": 2.6111, + "step": 244600 + }, + { + "epoch": 0.16, + "learning_rate": 4.219660582772975e-05, + "loss": 2.5955, + "step": 244700 + }, + { + "epoch": 0.16, + "learning_rate": 4.2193403778418186e-05, + "loss": 2.6128, + "step": 244800 + }, + { + "epoch": 0.16, + "learning_rate": 4.219020172910663e-05, + "loss": 2.5816, + "step": 244900 + }, + { + "epoch": 0.16, + "learning_rate": 4.218699967979507e-05, + "loss": 2.6044, + "step": 245000 + }, + { + "epoch": 0.16, + "eval_loss": 2.519922971725464, + "eval_runtime": 176.5307, + "eval_samples_per_second": 56.647, + "eval_steps_per_second": 3.54, + "step": 245000 + }, + { + "epoch": 0.16, + "learning_rate": 4.218379763048351e-05, + "loss": 2.5932, + "step": 245100 + }, + { + "epoch": 0.16, + "learning_rate": 4.218059558117195e-05, + "loss": 2.5801, + "step": 245200 + }, + { + "epoch": 0.16, + "learning_rate": 4.217739353186039e-05, + "loss": 2.6113, + "step": 245300 + }, + { + "epoch": 0.16, + "learning_rate": 4.217419148254884e-05, + "loss": 2.5939, + "step": 245400 + }, + { + "epoch": 0.16, + "learning_rate": 4.217098943323727e-05, + "loss": 2.606, + "step": 245500 + }, + { + "epoch": 0.16, + "learning_rate": 4.216778738392572e-05, + "loss": 2.6075, + "step": 245600 + }, + { + "epoch": 0.16, + "learning_rate": 4.216458533461416e-05, + "loss": 2.5771, + "step": 245700 + }, + { + "epoch": 0.16, + "learning_rate": 4.21613832853026e-05, + "loss": 2.6056, + "step": 245800 + }, + { + "epoch": 0.16, + "learning_rate": 4.215818123599104e-05, + "loss": 2.606, + "step": 245900 + }, + { + "epoch": 0.16, + "learning_rate": 4.2154979186679476e-05, + "loss": 2.585, + "step": 246000 + }, + { + "epoch": 0.16, + "eval_loss": 2.521212577819824, + "eval_runtime": 174.8676, + "eval_samples_per_second": 57.186, + "eval_steps_per_second": 3.574, + "step": 246000 + }, + { + "epoch": 0.16, + "learning_rate": 4.2151777137367916e-05, + "loss": 2.6218, + "step": 246100 + }, + { + "epoch": 0.16, + "learning_rate": 4.2148575088056356e-05, + "loss": 2.6135, + "step": 246200 + }, + { + "epoch": 0.16, + "learning_rate": 4.21453730387448e-05, + "loss": 2.5745, + "step": 246300 + }, + { + "epoch": 0.16, + "learning_rate": 4.2142170989433235e-05, + "loss": 2.5689, + "step": 246400 + }, + { + "epoch": 0.16, + "learning_rate": 4.213896894012168e-05, + "loss": 2.611, + "step": 246500 + }, + { + "epoch": 0.16, + "learning_rate": 4.213576689081012e-05, + "loss": 2.6038, + "step": 246600 + }, + { + "epoch": 0.16, + "learning_rate": 4.213256484149856e-05, + "loss": 2.6005, + "step": 246700 + }, + { + "epoch": 0.16, + "learning_rate": 4.2129362792187e-05, + "loss": 2.5979, + "step": 246800 + }, + { + "epoch": 0.16, + "learning_rate": 4.212616074287544e-05, + "loss": 2.6079, + "step": 246900 + }, + { + "epoch": 0.16, + "learning_rate": 4.212295869356389e-05, + "loss": 2.6015, + "step": 247000 + }, + { + "epoch": 0.16, + "eval_loss": 2.519866466522217, + "eval_runtime": 177.7026, + "eval_samples_per_second": 56.274, + "eval_steps_per_second": 3.517, + "step": 247000 + }, + { + "epoch": 0.16, + "learning_rate": 4.211975664425232e-05, + "loss": 2.5989, + "step": 247100 + }, + { + "epoch": 0.16, + "learning_rate": 4.211655459494077e-05, + "loss": 2.6152, + "step": 247200 + }, + { + "epoch": 0.16, + "learning_rate": 4.2113352545629207e-05, + "loss": 2.5921, + "step": 247300 + }, + { + "epoch": 0.16, + "learning_rate": 4.2110150496317646e-05, + "loss": 2.586, + "step": 247400 + }, + { + "epoch": 0.16, + "learning_rate": 4.2106948447006086e-05, + "loss": 2.6009, + "step": 247500 + }, + { + "epoch": 0.16, + "learning_rate": 4.2103746397694526e-05, + "loss": 2.5924, + "step": 247600 + }, + { + "epoch": 0.16, + "learning_rate": 4.210054434838297e-05, + "loss": 2.5901, + "step": 247700 + }, + { + "epoch": 0.16, + "learning_rate": 4.2097342299071405e-05, + "loss": 2.5757, + "step": 247800 + }, + { + "epoch": 0.16, + "learning_rate": 4.209414024975985e-05, + "loss": 2.5997, + "step": 247900 + }, + { + "epoch": 0.16, + "learning_rate": 4.2090938200448285e-05, + "loss": 2.5898, + "step": 248000 + }, + { + "epoch": 0.16, + "eval_loss": 2.517677068710327, + "eval_runtime": 179.5331, + "eval_samples_per_second": 55.7, + "eval_steps_per_second": 3.481, + "step": 248000 + }, + { + "epoch": 0.16, + "learning_rate": 4.208773615113673e-05, + "loss": 2.6087, + "step": 248100 + }, + { + "epoch": 0.16, + "learning_rate": 4.208453410182517e-05, + "loss": 2.5959, + "step": 248200 + }, + { + "epoch": 0.16, + "learning_rate": 4.208133205251361e-05, + "loss": 2.5946, + "step": 248300 + }, + { + "epoch": 0.16, + "learning_rate": 4.207813000320205e-05, + "loss": 2.5904, + "step": 248400 + }, + { + "epoch": 0.16, + "learning_rate": 4.207492795389049e-05, + "loss": 2.6008, + "step": 248500 + }, + { + "epoch": 0.16, + "learning_rate": 4.207172590457894e-05, + "loss": 2.6257, + "step": 248600 + }, + { + "epoch": 0.16, + "learning_rate": 4.206852385526737e-05, + "loss": 2.5861, + "step": 248700 + }, + { + "epoch": 0.16, + "learning_rate": 4.2065321805955816e-05, + "loss": 2.6015, + "step": 248800 + }, + { + "epoch": 0.16, + "learning_rate": 4.2062119756644256e-05, + "loss": 2.6052, + "step": 248900 + }, + { + "epoch": 0.16, + "learning_rate": 4.2058917707332696e-05, + "loss": 2.6083, + "step": 249000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5183045864105225, + "eval_runtime": 174.4514, + "eval_samples_per_second": 57.323, + "eval_steps_per_second": 3.583, + "step": 249000 + }, + { + "epoch": 0.16, + "learning_rate": 4.2055715658021135e-05, + "loss": 2.5813, + "step": 249100 + }, + { + "epoch": 0.16, + "learning_rate": 4.2052513608709575e-05, + "loss": 2.6023, + "step": 249200 + }, + { + "epoch": 0.16, + "learning_rate": 4.204931155939802e-05, + "loss": 2.6073, + "step": 249300 + }, + { + "epoch": 0.16, + "learning_rate": 4.2046109510086455e-05, + "loss": 2.6034, + "step": 249400 + }, + { + "epoch": 0.16, + "learning_rate": 4.20429074607749e-05, + "loss": 2.5924, + "step": 249500 + }, + { + "epoch": 0.16, + "learning_rate": 4.2039705411463334e-05, + "loss": 2.6071, + "step": 249600 + }, + { + "epoch": 0.16, + "learning_rate": 4.203650336215178e-05, + "loss": 2.6096, + "step": 249700 + }, + { + "epoch": 0.16, + "learning_rate": 4.203330131284022e-05, + "loss": 2.6003, + "step": 249800 + }, + { + "epoch": 0.16, + "learning_rate": 4.203009926352866e-05, + "loss": 2.5857, + "step": 249900 + }, + { + "epoch": 0.16, + "learning_rate": 4.2026897214217107e-05, + "loss": 2.5914, + "step": 250000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5187132358551025, + "eval_runtime": 178.1664, + "eval_samples_per_second": 56.127, + "eval_steps_per_second": 3.508, + "step": 250000 + }, + { + "epoch": 0.16, + "learning_rate": 4.202369516490554e-05, + "loss": 2.5998, + "step": 250100 + }, + { + "epoch": 0.16, + "learning_rate": 4.2020493115593986e-05, + "loss": 2.5964, + "step": 250200 + }, + { + "epoch": 0.16, + "learning_rate": 4.201729106628242e-05, + "loss": 2.6038, + "step": 250300 + }, + { + "epoch": 0.16, + "learning_rate": 4.2014089016970866e-05, + "loss": 2.6056, + "step": 250400 + }, + { + "epoch": 0.16, + "learning_rate": 4.2010886967659305e-05, + "loss": 2.6148, + "step": 250500 + }, + { + "epoch": 0.16, + "learning_rate": 4.2007684918347745e-05, + "loss": 2.6028, + "step": 250600 + }, + { + "epoch": 0.16, + "learning_rate": 4.2004482869036185e-05, + "loss": 2.6202, + "step": 250700 + }, + { + "epoch": 0.16, + "learning_rate": 4.2001280819724624e-05, + "loss": 2.6139, + "step": 250800 + }, + { + "epoch": 0.16, + "learning_rate": 4.199807877041307e-05, + "loss": 2.5886, + "step": 250900 + }, + { + "epoch": 0.16, + "learning_rate": 4.1994876721101504e-05, + "loss": 2.5908, + "step": 251000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5188255310058594, + "eval_runtime": 175.9361, + "eval_samples_per_second": 56.839, + "eval_steps_per_second": 3.552, + "step": 251000 + }, + { + "epoch": 0.16, + "learning_rate": 4.199167467178995e-05, + "loss": 2.6054, + "step": 251100 + }, + { + "epoch": 0.16, + "learning_rate": 4.1988472622478383e-05, + "loss": 2.6235, + "step": 251200 + }, + { + "epoch": 0.16, + "learning_rate": 4.198527057316683e-05, + "loss": 2.6033, + "step": 251300 + }, + { + "epoch": 0.16, + "learning_rate": 4.198206852385527e-05, + "loss": 2.6026, + "step": 251400 + }, + { + "epoch": 0.16, + "learning_rate": 4.197886647454371e-05, + "loss": 2.5992, + "step": 251500 + }, + { + "epoch": 0.16, + "learning_rate": 4.1975664425232156e-05, + "loss": 2.5972, + "step": 251600 + }, + { + "epoch": 0.16, + "learning_rate": 4.197246237592059e-05, + "loss": 2.6024, + "step": 251700 + }, + { + "epoch": 0.16, + "learning_rate": 4.1969260326609035e-05, + "loss": 2.5899, + "step": 251800 + }, + { + "epoch": 0.16, + "learning_rate": 4.196605827729747e-05, + "loss": 2.5926, + "step": 251900 + }, + { + "epoch": 0.16, + "learning_rate": 4.1962856227985915e-05, + "loss": 2.5775, + "step": 252000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5177624225616455, + "eval_runtime": 178.4144, + "eval_samples_per_second": 56.049, + "eval_steps_per_second": 3.503, + "step": 252000 + }, + { + "epoch": 0.16, + "learning_rate": 4.1959654178674355e-05, + "loss": 2.6064, + "step": 252100 + }, + { + "epoch": 0.16, + "learning_rate": 4.1956452129362794e-05, + "loss": 2.5807, + "step": 252200 + }, + { + "epoch": 0.16, + "learning_rate": 4.1953250080051234e-05, + "loss": 2.576, + "step": 252300 + }, + { + "epoch": 0.16, + "learning_rate": 4.1950048030739674e-05, + "loss": 2.6154, + "step": 252400 + }, + { + "epoch": 0.16, + "learning_rate": 4.194684598142812e-05, + "loss": 2.5894, + "step": 252500 + }, + { + "epoch": 0.16, + "learning_rate": 4.194364393211655e-05, + "loss": 2.5964, + "step": 252600 + }, + { + "epoch": 0.16, + "learning_rate": 4.1940441882805e-05, + "loss": 2.5941, + "step": 252700 + }, + { + "epoch": 0.16, + "learning_rate": 4.193723983349343e-05, + "loss": 2.6035, + "step": 252800 + }, + { + "epoch": 0.16, + "learning_rate": 4.193403778418188e-05, + "loss": 2.5959, + "step": 252900 + }, + { + "epoch": 0.16, + "learning_rate": 4.193083573487032e-05, + "loss": 2.6098, + "step": 253000 + }, + { + "epoch": 0.16, + "eval_loss": 2.51733660697937, + "eval_runtime": 177.19, + "eval_samples_per_second": 56.437, + "eval_steps_per_second": 3.527, + "step": 253000 + }, + { + "epoch": 0.16, + "learning_rate": 4.192763368555876e-05, + "loss": 2.6199, + "step": 253100 + }, + { + "epoch": 0.16, + "learning_rate": 4.1924431636247205e-05, + "loss": 2.5878, + "step": 253200 + }, + { + "epoch": 0.16, + "learning_rate": 4.192122958693564e-05, + "loss": 2.6154, + "step": 253300 + }, + { + "epoch": 0.16, + "learning_rate": 4.1918027537624085e-05, + "loss": 2.6085, + "step": 253400 + }, + { + "epoch": 0.16, + "learning_rate": 4.191482548831252e-05, + "loss": 2.5945, + "step": 253500 + }, + { + "epoch": 0.16, + "learning_rate": 4.1911623439000964e-05, + "loss": 2.5918, + "step": 253600 + }, + { + "epoch": 0.16, + "learning_rate": 4.1908421389689404e-05, + "loss": 2.593, + "step": 253700 + }, + { + "epoch": 0.16, + "learning_rate": 4.1905219340377844e-05, + "loss": 2.6045, + "step": 253800 + }, + { + "epoch": 0.16, + "learning_rate": 4.1902017291066283e-05, + "loss": 2.582, + "step": 253900 + }, + { + "epoch": 0.16, + "learning_rate": 4.189881524175472e-05, + "loss": 2.5908, + "step": 254000 + }, + { + "epoch": 0.16, + "eval_loss": 2.517996072769165, + "eval_runtime": 179.2284, + "eval_samples_per_second": 55.795, + "eval_steps_per_second": 3.487, + "step": 254000 + }, + { + "epoch": 0.16, + "learning_rate": 4.189561319244317e-05, + "loss": 2.6161, + "step": 254100 + }, + { + "epoch": 0.16, + "learning_rate": 4.18924111431316e-05, + "loss": 2.5769, + "step": 254200 + }, + { + "epoch": 0.16, + "learning_rate": 4.188920909382005e-05, + "loss": 2.6037, + "step": 254300 + }, + { + "epoch": 0.16, + "learning_rate": 4.188600704450848e-05, + "loss": 2.5995, + "step": 254400 + }, + { + "epoch": 0.16, + "learning_rate": 4.188280499519693e-05, + "loss": 2.5857, + "step": 254500 + }, + { + "epoch": 0.16, + "learning_rate": 4.187960294588537e-05, + "loss": 2.5969, + "step": 254600 + }, + { + "epoch": 0.16, + "learning_rate": 4.187640089657381e-05, + "loss": 2.5733, + "step": 254700 + }, + { + "epoch": 0.16, + "learning_rate": 4.1873198847262255e-05, + "loss": 2.6007, + "step": 254800 + }, + { + "epoch": 0.16, + "learning_rate": 4.186999679795069e-05, + "loss": 2.6058, + "step": 254900 + }, + { + "epoch": 0.16, + "learning_rate": 4.1866794748639134e-05, + "loss": 2.6154, + "step": 255000 + }, + { + "epoch": 0.16, + "eval_loss": 2.517491102218628, + "eval_runtime": 176.7253, + "eval_samples_per_second": 56.585, + "eval_steps_per_second": 3.537, + "step": 255000 + }, + { + "epoch": 0.16, + "learning_rate": 4.186359269932757e-05, + "loss": 2.6082, + "step": 255100 + }, + { + "epoch": 0.16, + "learning_rate": 4.1860390650016014e-05, + "loss": 2.6015, + "step": 255200 + }, + { + "epoch": 0.16, + "learning_rate": 4.1857188600704453e-05, + "loss": 2.6023, + "step": 255300 + }, + { + "epoch": 0.16, + "learning_rate": 4.185398655139289e-05, + "loss": 2.5929, + "step": 255400 + }, + { + "epoch": 0.16, + "learning_rate": 4.185078450208133e-05, + "loss": 2.604, + "step": 255500 + }, + { + "epoch": 0.16, + "learning_rate": 4.184758245276977e-05, + "loss": 2.6106, + "step": 255600 + }, + { + "epoch": 0.16, + "learning_rate": 4.184438040345822e-05, + "loss": 2.5907, + "step": 255700 + }, + { + "epoch": 0.16, + "learning_rate": 4.184117835414665e-05, + "loss": 2.5968, + "step": 255800 + }, + { + "epoch": 0.16, + "learning_rate": 4.18379763048351e-05, + "loss": 2.59, + "step": 255900 + }, + { + "epoch": 0.16, + "learning_rate": 4.183477425552353e-05, + "loss": 2.6208, + "step": 256000 + }, + { + "epoch": 0.16, + "eval_loss": 2.516918182373047, + "eval_runtime": 173.799, + "eval_samples_per_second": 57.538, + "eval_steps_per_second": 3.596, + "step": 256000 + }, + { + "epoch": 0.16, + "learning_rate": 4.183157220621198e-05, + "loss": 2.5787, + "step": 256100 + }, + { + "epoch": 0.16, + "learning_rate": 4.182837015690042e-05, + "loss": 2.6033, + "step": 256200 + }, + { + "epoch": 0.16, + "learning_rate": 4.182516810758886e-05, + "loss": 2.5794, + "step": 256300 + }, + { + "epoch": 0.16, + "learning_rate": 4.1821966058277304e-05, + "loss": 2.6046, + "step": 256400 + }, + { + "epoch": 0.16, + "learning_rate": 4.181876400896574e-05, + "loss": 2.6119, + "step": 256500 + }, + { + "epoch": 0.16, + "learning_rate": 4.1815561959654184e-05, + "loss": 2.5908, + "step": 256600 + }, + { + "epoch": 0.16, + "learning_rate": 4.1812359910342617e-05, + "loss": 2.5921, + "step": 256700 + }, + { + "epoch": 0.16, + "learning_rate": 4.180915786103106e-05, + "loss": 2.6081, + "step": 256800 + }, + { + "epoch": 0.16, + "learning_rate": 4.18059558117195e-05, + "loss": 2.605, + "step": 256900 + }, + { + "epoch": 0.16, + "learning_rate": 4.180275376240794e-05, + "loss": 2.5885, + "step": 257000 + }, + { + "epoch": 0.16, + "eval_loss": 2.5189480781555176, + "eval_runtime": 176.498, + "eval_samples_per_second": 56.658, + "eval_steps_per_second": 3.541, + "step": 257000 + }, + { + "epoch": 0.16, + "learning_rate": 4.179955171309638e-05, + "loss": 2.6032, + "step": 257100 + }, + { + "epoch": 0.16, + "learning_rate": 4.179634966378482e-05, + "loss": 2.5909, + "step": 257200 + }, + { + "epoch": 0.16, + "learning_rate": 4.179314761447327e-05, + "loss": 2.5831, + "step": 257300 + }, + { + "epoch": 0.16, + "learning_rate": 4.17899455651617e-05, + "loss": 2.6053, + "step": 257400 + }, + { + "epoch": 0.16, + "learning_rate": 4.178674351585015e-05, + "loss": 2.5832, + "step": 257500 + }, + { + "epoch": 0.16, + "learning_rate": 4.178354146653859e-05, + "loss": 2.5843, + "step": 257600 + }, + { + "epoch": 0.16, + "learning_rate": 4.178033941722703e-05, + "loss": 2.6138, + "step": 257700 + }, + { + "epoch": 0.16, + "learning_rate": 4.177713736791547e-05, + "loss": 2.5878, + "step": 257800 + }, + { + "epoch": 0.17, + "learning_rate": 4.177393531860391e-05, + "loss": 2.5953, + "step": 257900 + }, + { + "epoch": 0.17, + "learning_rate": 4.1770733269292353e-05, + "loss": 2.5822, + "step": 258000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5180604457855225, + "eval_runtime": 174.2586, + "eval_samples_per_second": 57.386, + "eval_steps_per_second": 3.587, + "step": 258000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1767531219980786e-05, + "loss": 2.5609, + "step": 258100 + }, + { + "epoch": 0.17, + "learning_rate": 4.176432917066923e-05, + "loss": 2.5931, + "step": 258200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1761127121357666e-05, + "loss": 2.5921, + "step": 258300 + }, + { + "epoch": 0.17, + "learning_rate": 4.175792507204611e-05, + "loss": 2.5857, + "step": 258400 + }, + { + "epoch": 0.17, + "learning_rate": 4.175472302273455e-05, + "loss": 2.5527, + "step": 258500 + }, + { + "epoch": 0.17, + "learning_rate": 4.175152097342299e-05, + "loss": 2.606, + "step": 258600 + }, + { + "epoch": 0.17, + "learning_rate": 4.174831892411143e-05, + "loss": 2.6038, + "step": 258700 + }, + { + "epoch": 0.17, + "learning_rate": 4.174511687479987e-05, + "loss": 2.5795, + "step": 258800 + }, + { + "epoch": 0.17, + "learning_rate": 4.174191482548832e-05, + "loss": 2.59, + "step": 258900 + }, + { + "epoch": 0.17, + "learning_rate": 4.173871277617675e-05, + "loss": 2.6116, + "step": 259000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5164027214050293, + "eval_runtime": 177.8193, + "eval_samples_per_second": 56.237, + "eval_steps_per_second": 3.515, + "step": 259000 + }, + { + "epoch": 0.17, + "learning_rate": 4.17355107268652e-05, + "loss": 2.5925, + "step": 259100 + }, + { + "epoch": 0.17, + "learning_rate": 4.173230867755364e-05, + "loss": 2.5748, + "step": 259200 + }, + { + "epoch": 0.17, + "learning_rate": 4.172910662824208e-05, + "loss": 2.5917, + "step": 259300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1725904578930517e-05, + "loss": 2.5977, + "step": 259400 + }, + { + "epoch": 0.17, + "learning_rate": 4.1722702529618956e-05, + "loss": 2.5957, + "step": 259500 + }, + { + "epoch": 0.17, + "learning_rate": 4.17195004803074e-05, + "loss": 2.599, + "step": 259600 + }, + { + "epoch": 0.17, + "learning_rate": 4.1716298430995836e-05, + "loss": 2.571, + "step": 259700 + }, + { + "epoch": 0.17, + "learning_rate": 4.171309638168428e-05, + "loss": 2.5959, + "step": 259800 + }, + { + "epoch": 0.17, + "learning_rate": 4.170989433237272e-05, + "loss": 2.5897, + "step": 259900 + }, + { + "epoch": 0.17, + "learning_rate": 4.170669228306116e-05, + "loss": 2.5842, + "step": 260000 + }, + { + "epoch": 0.17, + "eval_loss": 2.515014410018921, + "eval_runtime": 177.7592, + "eval_samples_per_second": 56.256, + "eval_steps_per_second": 3.516, + "step": 260000 + }, + { + "epoch": 0.17, + "learning_rate": 4.17034902337496e-05, + "loss": 2.5836, + "step": 260100 + }, + { + "epoch": 0.17, + "learning_rate": 4.170028818443804e-05, + "loss": 2.5905, + "step": 260200 + }, + { + "epoch": 0.17, + "learning_rate": 4.169708613512648e-05, + "loss": 2.5985, + "step": 260300 + }, + { + "epoch": 0.17, + "learning_rate": 4.169388408581492e-05, + "loss": 2.5889, + "step": 260400 + }, + { + "epoch": 0.17, + "learning_rate": 4.169068203650337e-05, + "loss": 2.5939, + "step": 260500 + }, + { + "epoch": 0.17, + "learning_rate": 4.16874799871918e-05, + "loss": 2.5975, + "step": 260600 + }, + { + "epoch": 0.17, + "learning_rate": 4.168427793788025e-05, + "loss": 2.5703, + "step": 260700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1681075888568686e-05, + "loss": 2.61, + "step": 260800 + }, + { + "epoch": 0.17, + "learning_rate": 4.1677873839257126e-05, + "loss": 2.5877, + "step": 260900 + }, + { + "epoch": 0.17, + "learning_rate": 4.1674671789945566e-05, + "loss": 2.5998, + "step": 261000 + }, + { + "epoch": 0.17, + "eval_loss": 2.513521909713745, + "eval_runtime": 176.0418, + "eval_samples_per_second": 56.805, + "eval_steps_per_second": 3.55, + "step": 261000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1671469740634006e-05, + "loss": 2.5755, + "step": 261100 + }, + { + "epoch": 0.17, + "learning_rate": 4.166826769132245e-05, + "loss": 2.5838, + "step": 261200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1665065642010885e-05, + "loss": 2.5788, + "step": 261300 + }, + { + "epoch": 0.17, + "learning_rate": 4.166186359269933e-05, + "loss": 2.5935, + "step": 261400 + }, + { + "epoch": 0.17, + "learning_rate": 4.165866154338777e-05, + "loss": 2.5933, + "step": 261500 + }, + { + "epoch": 0.17, + "learning_rate": 4.165545949407621e-05, + "loss": 2.5783, + "step": 261600 + }, + { + "epoch": 0.17, + "learning_rate": 4.165225744476465e-05, + "loss": 2.5971, + "step": 261700 + }, + { + "epoch": 0.17, + "learning_rate": 4.164905539545309e-05, + "loss": 2.5905, + "step": 261800 + }, + { + "epoch": 0.17, + "learning_rate": 4.164585334614153e-05, + "loss": 2.5828, + "step": 261900 + }, + { + "epoch": 0.17, + "learning_rate": 4.164265129682997e-05, + "loss": 2.582, + "step": 262000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5153019428253174, + "eval_runtime": 176.8144, + "eval_samples_per_second": 56.556, + "eval_steps_per_second": 3.535, + "step": 262000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1639449247518417e-05, + "loss": 2.6019, + "step": 262100 + }, + { + "epoch": 0.17, + "learning_rate": 4.1636247198206856e-05, + "loss": 2.5596, + "step": 262200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1633045148895296e-05, + "loss": 2.6009, + "step": 262300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1629843099583736e-05, + "loss": 2.5817, + "step": 262400 + }, + { + "epoch": 0.17, + "learning_rate": 4.1626641050272176e-05, + "loss": 2.5961, + "step": 262500 + }, + { + "epoch": 0.17, + "learning_rate": 4.1623439000960615e-05, + "loss": 2.5779, + "step": 262600 + }, + { + "epoch": 0.17, + "learning_rate": 4.1620236951649055e-05, + "loss": 2.5732, + "step": 262700 + }, + { + "epoch": 0.17, + "learning_rate": 4.16170349023375e-05, + "loss": 2.5881, + "step": 262800 + }, + { + "epoch": 0.17, + "learning_rate": 4.161383285302594e-05, + "loss": 2.5831, + "step": 262900 + }, + { + "epoch": 0.17, + "learning_rate": 4.161063080371438e-05, + "loss": 2.5916, + "step": 263000 + }, + { + "epoch": 0.17, + "eval_loss": 2.515854597091675, + "eval_runtime": 177.8685, + "eval_samples_per_second": 56.221, + "eval_steps_per_second": 3.514, + "step": 263000 + }, + { + "epoch": 0.17, + "learning_rate": 4.160742875440282e-05, + "loss": 2.5857, + "step": 263100 + }, + { + "epoch": 0.17, + "learning_rate": 4.160422670509126e-05, + "loss": 2.5812, + "step": 263200 + }, + { + "epoch": 0.17, + "learning_rate": 4.16010246557797e-05, + "loss": 2.5848, + "step": 263300 + }, + { + "epoch": 0.17, + "learning_rate": 4.159782260646814e-05, + "loss": 2.5676, + "step": 263400 + }, + { + "epoch": 0.17, + "learning_rate": 4.159462055715658e-05, + "loss": 2.5976, + "step": 263500 + }, + { + "epoch": 0.17, + "learning_rate": 4.159141850784502e-05, + "loss": 2.5659, + "step": 263600 + }, + { + "epoch": 0.17, + "learning_rate": 4.1588216458533466e-05, + "loss": 2.5824, + "step": 263700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1585014409221906e-05, + "loss": 2.5917, + "step": 263800 + }, + { + "epoch": 0.17, + "learning_rate": 4.1581812359910345e-05, + "loss": 2.5712, + "step": 263900 + }, + { + "epoch": 0.17, + "learning_rate": 4.1578610310598785e-05, + "loss": 2.594, + "step": 264000 + }, + { + "epoch": 0.17, + "eval_loss": 2.515024185180664, + "eval_runtime": 177.996, + "eval_samples_per_second": 56.181, + "eval_steps_per_second": 3.511, + "step": 264000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1575408261287225e-05, + "loss": 2.5841, + "step": 264100 + }, + { + "epoch": 0.17, + "learning_rate": 4.1572206211975665e-05, + "loss": 2.592, + "step": 264200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1569004162664104e-05, + "loss": 2.5938, + "step": 264300 + }, + { + "epoch": 0.17, + "learning_rate": 4.156580211335255e-05, + "loss": 2.5704, + "step": 264400 + }, + { + "epoch": 0.17, + "learning_rate": 4.156260006404099e-05, + "loss": 2.5664, + "step": 264500 + }, + { + "epoch": 0.17, + "learning_rate": 4.155939801472943e-05, + "loss": 2.576, + "step": 264600 + }, + { + "epoch": 0.17, + "learning_rate": 4.155619596541787e-05, + "loss": 2.5821, + "step": 264700 + }, + { + "epoch": 0.17, + "learning_rate": 4.155299391610631e-05, + "loss": 2.588, + "step": 264800 + }, + { + "epoch": 0.17, + "learning_rate": 4.154979186679475e-05, + "loss": 2.571, + "step": 264900 + }, + { + "epoch": 0.17, + "learning_rate": 4.154658981748319e-05, + "loss": 2.56, + "step": 265000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5161280632019043, + "eval_runtime": 177.1926, + "eval_samples_per_second": 56.436, + "eval_steps_per_second": 3.527, + "step": 265000 + }, + { + "epoch": 0.17, + "learning_rate": 4.154338776817163e-05, + "loss": 2.5877, + "step": 265100 + }, + { + "epoch": 0.17, + "learning_rate": 4.1540185718860076e-05, + "loss": 2.5833, + "step": 265200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1536983669548515e-05, + "loss": 2.5962, + "step": 265300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1533781620236955e-05, + "loss": 2.5718, + "step": 265400 + }, + { + "epoch": 0.17, + "learning_rate": 4.1530579570925395e-05, + "loss": 2.5899, + "step": 265500 + }, + { + "epoch": 0.17, + "learning_rate": 4.1527377521613835e-05, + "loss": 2.595, + "step": 265600 + }, + { + "epoch": 0.17, + "learning_rate": 4.1524175472302274e-05, + "loss": 2.5843, + "step": 265700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1520973422990714e-05, + "loss": 2.5903, + "step": 265800 + }, + { + "epoch": 0.17, + "learning_rate": 4.1517771373679154e-05, + "loss": 2.5776, + "step": 265900 + }, + { + "epoch": 0.17, + "learning_rate": 4.15145693243676e-05, + "loss": 2.5732, + "step": 266000 + }, + { + "epoch": 0.17, + "eval_loss": 2.515674114227295, + "eval_runtime": 176.8798, + "eval_samples_per_second": 56.536, + "eval_steps_per_second": 3.533, + "step": 266000 + }, + { + "epoch": 0.17, + "learning_rate": 4.151136727505604e-05, + "loss": 2.5623, + "step": 266100 + }, + { + "epoch": 0.17, + "learning_rate": 4.150816522574448e-05, + "loss": 2.5807, + "step": 266200 + }, + { + "epoch": 0.17, + "learning_rate": 4.150496317643292e-05, + "loss": 2.5973, + "step": 266300 + }, + { + "epoch": 0.17, + "learning_rate": 4.150176112712136e-05, + "loss": 2.5657, + "step": 266400 + }, + { + "epoch": 0.17, + "learning_rate": 4.14985590778098e-05, + "loss": 2.5731, + "step": 266500 + }, + { + "epoch": 0.17, + "learning_rate": 4.149535702849824e-05, + "loss": 2.5915, + "step": 266600 + }, + { + "epoch": 0.17, + "learning_rate": 4.149215497918668e-05, + "loss": 2.5775, + "step": 266700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1488952929875125e-05, + "loss": 2.5722, + "step": 266800 + }, + { + "epoch": 0.17, + "learning_rate": 4.1485750880563565e-05, + "loss": 2.5907, + "step": 266900 + }, + { + "epoch": 0.17, + "learning_rate": 4.1482548831252004e-05, + "loss": 2.5999, + "step": 267000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5125646591186523, + "eval_runtime": 177.7706, + "eval_samples_per_second": 56.252, + "eval_steps_per_second": 3.516, + "step": 267000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1479346781940444e-05, + "loss": 2.5765, + "step": 267100 + }, + { + "epoch": 0.17, + "learning_rate": 4.1476144732628884e-05, + "loss": 2.5961, + "step": 267200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1472942683317324e-05, + "loss": 2.5974, + "step": 267300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1469740634005763e-05, + "loss": 2.6007, + "step": 267400 + }, + { + "epoch": 0.17, + "learning_rate": 4.146653858469421e-05, + "loss": 2.5798, + "step": 267500 + }, + { + "epoch": 0.17, + "learning_rate": 4.146333653538265e-05, + "loss": 2.5943, + "step": 267600 + }, + { + "epoch": 0.17, + "learning_rate": 4.146013448607109e-05, + "loss": 2.5976, + "step": 267700 + }, + { + "epoch": 0.17, + "learning_rate": 4.145693243675953e-05, + "loss": 2.599, + "step": 267800 + }, + { + "epoch": 0.17, + "learning_rate": 4.145373038744797e-05, + "loss": 2.5967, + "step": 267900 + }, + { + "epoch": 0.17, + "learning_rate": 4.145052833813641e-05, + "loss": 2.5996, + "step": 268000 + }, + { + "epoch": 0.17, + "eval_loss": 2.512153148651123, + "eval_runtime": 175.9425, + "eval_samples_per_second": 56.837, + "eval_steps_per_second": 3.552, + "step": 268000 + }, + { + "epoch": 0.17, + "learning_rate": 4.144732628882485e-05, + "loss": 2.5637, + "step": 268100 + }, + { + "epoch": 0.17, + "learning_rate": 4.144412423951329e-05, + "loss": 2.6014, + "step": 268200 + }, + { + "epoch": 0.17, + "learning_rate": 4.144092219020173e-05, + "loss": 2.5843, + "step": 268300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1437720140890174e-05, + "loss": 2.5961, + "step": 268400 + }, + { + "epoch": 0.17, + "learning_rate": 4.1434518091578614e-05, + "loss": 2.5788, + "step": 268500 + }, + { + "epoch": 0.17, + "learning_rate": 4.1431316042267054e-05, + "loss": 2.5749, + "step": 268600 + }, + { + "epoch": 0.17, + "learning_rate": 4.1428113992955494e-05, + "loss": 2.5783, + "step": 268700 + }, + { + "epoch": 0.17, + "learning_rate": 4.142491194364393e-05, + "loss": 2.6043, + "step": 268800 + }, + { + "epoch": 0.17, + "learning_rate": 4.142170989433237e-05, + "loss": 2.5832, + "step": 268900 + }, + { + "epoch": 0.17, + "learning_rate": 4.141850784502081e-05, + "loss": 2.5844, + "step": 269000 + }, + { + "epoch": 0.17, + "eval_loss": 2.512434482574463, + "eval_runtime": 177.2599, + "eval_samples_per_second": 56.414, + "eval_steps_per_second": 3.526, + "step": 269000 + }, + { + "epoch": 0.17, + "learning_rate": 4.141530579570926e-05, + "loss": 2.6106, + "step": 269100 + }, + { + "epoch": 0.17, + "learning_rate": 4.14121037463977e-05, + "loss": 2.6047, + "step": 269200 + }, + { + "epoch": 0.17, + "learning_rate": 4.140890169708614e-05, + "loss": 2.5827, + "step": 269300 + }, + { + "epoch": 0.17, + "learning_rate": 4.140569964777458e-05, + "loss": 2.5831, + "step": 269400 + }, + { + "epoch": 0.17, + "learning_rate": 4.140249759846302e-05, + "loss": 2.5736, + "step": 269500 + }, + { + "epoch": 0.17, + "learning_rate": 4.139929554915146e-05, + "loss": 2.5827, + "step": 269600 + }, + { + "epoch": 0.17, + "learning_rate": 4.13960934998399e-05, + "loss": 2.5672, + "step": 269700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1392891450528344e-05, + "loss": 2.5707, + "step": 269800 + }, + { + "epoch": 0.17, + "learning_rate": 4.138968940121678e-05, + "loss": 2.5966, + "step": 269900 + }, + { + "epoch": 0.17, + "learning_rate": 4.1386487351905224e-05, + "loss": 2.5745, + "step": 270000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5134899616241455, + "eval_runtime": 180.3244, + "eval_samples_per_second": 55.456, + "eval_steps_per_second": 3.466, + "step": 270000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1383285302593663e-05, + "loss": 2.5802, + "step": 270100 + }, + { + "epoch": 0.17, + "learning_rate": 4.13800832532821e-05, + "loss": 2.5698, + "step": 270200 + }, + { + "epoch": 0.17, + "learning_rate": 4.137688120397054e-05, + "loss": 2.5655, + "step": 270300 + }, + { + "epoch": 0.17, + "learning_rate": 4.137367915465898e-05, + "loss": 2.5857, + "step": 270400 + }, + { + "epoch": 0.17, + "learning_rate": 4.137047710534742e-05, + "loss": 2.5985, + "step": 270500 + }, + { + "epoch": 0.17, + "learning_rate": 4.136727505603586e-05, + "loss": 2.5936, + "step": 270600 + }, + { + "epoch": 0.17, + "learning_rate": 4.136407300672431e-05, + "loss": 2.5917, + "step": 270700 + }, + { + "epoch": 0.17, + "learning_rate": 4.136087095741275e-05, + "loss": 2.5781, + "step": 270800 + }, + { + "epoch": 0.17, + "learning_rate": 4.135766890810119e-05, + "loss": 2.5733, + "step": 270900 + }, + { + "epoch": 0.17, + "learning_rate": 4.135446685878963e-05, + "loss": 2.5809, + "step": 271000 + }, + { + "epoch": 0.17, + "eval_loss": 2.5146307945251465, + "eval_runtime": 177.0724, + "eval_samples_per_second": 56.474, + "eval_steps_per_second": 3.53, + "step": 271000 + }, + { + "epoch": 0.17, + "learning_rate": 4.135126480947807e-05, + "loss": 2.5765, + "step": 271100 + }, + { + "epoch": 0.17, + "learning_rate": 4.134806276016651e-05, + "loss": 2.585, + "step": 271200 + }, + { + "epoch": 0.17, + "learning_rate": 4.134486071085495e-05, + "loss": 2.5658, + "step": 271300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1341658661543394e-05, + "loss": 2.574, + "step": 271400 + }, + { + "epoch": 0.17, + "learning_rate": 4.1338456612231827e-05, + "loss": 2.5854, + "step": 271500 + }, + { + "epoch": 0.17, + "learning_rate": 4.133525456292027e-05, + "loss": 2.5767, + "step": 271600 + }, + { + "epoch": 0.17, + "learning_rate": 4.133205251360871e-05, + "loss": 2.5914, + "step": 271700 + }, + { + "epoch": 0.17, + "learning_rate": 4.132885046429715e-05, + "loss": 2.5716, + "step": 271800 + }, + { + "epoch": 0.17, + "learning_rate": 4.132564841498559e-05, + "loss": 2.579, + "step": 271900 + }, + { + "epoch": 0.17, + "learning_rate": 4.132244636567403e-05, + "loss": 2.5993, + "step": 272000 + }, + { + "epoch": 0.17, + "eval_loss": 2.513415575027466, + "eval_runtime": 176.3837, + "eval_samples_per_second": 56.695, + "eval_steps_per_second": 3.543, + "step": 272000 + }, + { + "epoch": 0.17, + "learning_rate": 4.131924431636248e-05, + "loss": 2.5811, + "step": 272100 + }, + { + "epoch": 0.17, + "learning_rate": 4.131604226705091e-05, + "loss": 2.5801, + "step": 272200 + }, + { + "epoch": 0.17, + "learning_rate": 4.131284021773936e-05, + "loss": 2.5834, + "step": 272300 + }, + { + "epoch": 0.17, + "learning_rate": 4.13096381684278e-05, + "loss": 2.6039, + "step": 272400 + }, + { + "epoch": 0.17, + "learning_rate": 4.130643611911624e-05, + "loss": 2.5798, + "step": 272500 + }, + { + "epoch": 0.17, + "learning_rate": 4.130323406980468e-05, + "loss": 2.5923, + "step": 272600 + }, + { + "epoch": 0.17, + "learning_rate": 4.130003202049312e-05, + "loss": 2.585, + "step": 272700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1296829971181564e-05, + "loss": 2.5654, + "step": 272800 + }, + { + "epoch": 0.17, + "learning_rate": 4.1293627921869996e-05, + "loss": 2.6044, + "step": 272900 + }, + { + "epoch": 0.17, + "learning_rate": 4.129042587255844e-05, + "loss": 2.5672, + "step": 273000 + }, + { + "epoch": 0.17, + "eval_loss": 2.514824151992798, + "eval_runtime": 176.7123, + "eval_samples_per_second": 56.589, + "eval_steps_per_second": 3.537, + "step": 273000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1287223823246876e-05, + "loss": 2.5791, + "step": 273100 + }, + { + "epoch": 0.17, + "learning_rate": 4.128402177393532e-05, + "loss": 2.5671, + "step": 273200 + }, + { + "epoch": 0.17, + "learning_rate": 4.128081972462376e-05, + "loss": 2.5685, + "step": 273300 + }, + { + "epoch": 0.17, + "learning_rate": 4.12776176753122e-05, + "loss": 2.5751, + "step": 273400 + }, + { + "epoch": 0.18, + "learning_rate": 4.127441562600064e-05, + "loss": 2.542, + "step": 273500 + }, + { + "epoch": 0.18, + "learning_rate": 4.127121357668908e-05, + "loss": 2.5675, + "step": 273600 + }, + { + "epoch": 0.18, + "learning_rate": 4.126801152737753e-05, + "loss": 2.5709, + "step": 273700 + }, + { + "epoch": 0.18, + "learning_rate": 4.126480947806596e-05, + "loss": 2.5776, + "step": 273800 + }, + { + "epoch": 0.18, + "learning_rate": 4.126160742875441e-05, + "loss": 2.5686, + "step": 273900 + }, + { + "epoch": 0.18, + "learning_rate": 4.125840537944285e-05, + "loss": 2.5814, + "step": 274000 + }, + { + "epoch": 0.18, + "eval_loss": 2.512730836868286, + "eval_runtime": 178.1525, + "eval_samples_per_second": 56.132, + "eval_steps_per_second": 3.508, + "step": 274000 + }, + { + "epoch": 0.18, + "learning_rate": 4.125520333013129e-05, + "loss": 2.5885, + "step": 274100 + }, + { + "epoch": 0.18, + "learning_rate": 4.125200128081973e-05, + "loss": 2.5774, + "step": 274200 + }, + { + "epoch": 0.18, + "learning_rate": 4.1248799231508166e-05, + "loss": 2.5668, + "step": 274300 + }, + { + "epoch": 0.18, + "learning_rate": 4.124559718219661e-05, + "loss": 2.5889, + "step": 274400 + }, + { + "epoch": 0.18, + "learning_rate": 4.1242395132885046e-05, + "loss": 2.5962, + "step": 274500 + }, + { + "epoch": 0.18, + "learning_rate": 4.123919308357349e-05, + "loss": 2.5924, + "step": 274600 + }, + { + "epoch": 0.18, + "learning_rate": 4.1235991034261925e-05, + "loss": 2.5818, + "step": 274700 + }, + { + "epoch": 0.18, + "learning_rate": 4.123278898495037e-05, + "loss": 2.5815, + "step": 274800 + }, + { + "epoch": 0.18, + "learning_rate": 4.122958693563881e-05, + "loss": 2.5683, + "step": 274900 + }, + { + "epoch": 0.18, + "learning_rate": 4.122638488632725e-05, + "loss": 2.5661, + "step": 275000 + }, + { + "epoch": 0.18, + "eval_loss": 2.510558843612671, + "eval_runtime": 178.7649, + "eval_samples_per_second": 55.939, + "eval_steps_per_second": 3.496, + "step": 275000 + }, + { + "epoch": 0.18, + "learning_rate": 4.12231828370157e-05, + "loss": 2.5731, + "step": 275100 + }, + { + "epoch": 0.18, + "learning_rate": 4.121998078770413e-05, + "loss": 2.56, + "step": 275200 + }, + { + "epoch": 0.18, + "learning_rate": 4.121677873839258e-05, + "loss": 2.5673, + "step": 275300 + }, + { + "epoch": 0.18, + "learning_rate": 4.121357668908101e-05, + "loss": 2.5788, + "step": 275400 + }, + { + "epoch": 0.18, + "learning_rate": 4.121037463976946e-05, + "loss": 2.5721, + "step": 275500 + }, + { + "epoch": 0.18, + "learning_rate": 4.120717259045789e-05, + "loss": 2.5839, + "step": 275600 + }, + { + "epoch": 0.18, + "learning_rate": 4.1203970541146336e-05, + "loss": 2.576, + "step": 275700 + }, + { + "epoch": 0.18, + "learning_rate": 4.1200768491834776e-05, + "loss": 2.5467, + "step": 275800 + }, + { + "epoch": 0.18, + "learning_rate": 4.1197566442523216e-05, + "loss": 2.5834, + "step": 275900 + }, + { + "epoch": 0.18, + "learning_rate": 4.119436439321166e-05, + "loss": 2.5884, + "step": 276000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5124151706695557, + "eval_runtime": 174.6576, + "eval_samples_per_second": 57.255, + "eval_steps_per_second": 3.578, + "step": 276000 + }, + { + "epoch": 0.18, + "learning_rate": 4.1191162343900095e-05, + "loss": 2.5852, + "step": 276100 + }, + { + "epoch": 0.18, + "learning_rate": 4.118796029458854e-05, + "loss": 2.5807, + "step": 276200 + }, + { + "epoch": 0.18, + "learning_rate": 4.1184758245276975e-05, + "loss": 2.5777, + "step": 276300 + }, + { + "epoch": 0.18, + "learning_rate": 4.118155619596542e-05, + "loss": 2.5618, + "step": 276400 + }, + { + "epoch": 0.18, + "learning_rate": 4.117835414665386e-05, + "loss": 2.5635, + "step": 276500 + }, + { + "epoch": 0.18, + "learning_rate": 4.11751520973423e-05, + "loss": 2.5857, + "step": 276600 + }, + { + "epoch": 0.18, + "learning_rate": 4.117195004803075e-05, + "loss": 2.5788, + "step": 276700 + }, + { + "epoch": 0.18, + "learning_rate": 4.116874799871918e-05, + "loss": 2.5738, + "step": 276800 + }, + { + "epoch": 0.18, + "learning_rate": 4.116554594940763e-05, + "loss": 2.5889, + "step": 276900 + }, + { + "epoch": 0.18, + "learning_rate": 4.116234390009606e-05, + "loss": 2.5654, + "step": 277000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5116689205169678, + "eval_runtime": 176.8925, + "eval_samples_per_second": 56.532, + "eval_steps_per_second": 3.533, + "step": 277000 + }, + { + "epoch": 0.18, + "learning_rate": 4.1159141850784506e-05, + "loss": 2.5913, + "step": 277100 + }, + { + "epoch": 0.18, + "learning_rate": 4.115593980147294e-05, + "loss": 2.5725, + "step": 277200 + }, + { + "epoch": 0.18, + "learning_rate": 4.1152737752161386e-05, + "loss": 2.5543, + "step": 277300 + }, + { + "epoch": 0.18, + "learning_rate": 4.1149535702849825e-05, + "loss": 2.5497, + "step": 277400 + }, + { + "epoch": 0.18, + "learning_rate": 4.1146333653538265e-05, + "loss": 2.5553, + "step": 277500 + }, + { + "epoch": 0.18, + "learning_rate": 4.114313160422671e-05, + "loss": 2.5875, + "step": 277600 + }, + { + "epoch": 0.18, + "learning_rate": 4.1139929554915145e-05, + "loss": 2.5723, + "step": 277700 + }, + { + "epoch": 0.18, + "learning_rate": 4.113672750560359e-05, + "loss": 2.5872, + "step": 277800 + }, + { + "epoch": 0.18, + "learning_rate": 4.1133525456292024e-05, + "loss": 2.5567, + "step": 277900 + }, + { + "epoch": 0.18, + "learning_rate": 4.113032340698047e-05, + "loss": 2.5727, + "step": 278000 + }, + { + "epoch": 0.18, + "eval_loss": 2.511739492416382, + "eval_runtime": 174.9948, + "eval_samples_per_second": 57.145, + "eval_steps_per_second": 3.572, + "step": 278000 + }, + { + "epoch": 0.18, + "learning_rate": 4.112712135766891e-05, + "loss": 2.5517, + "step": 278100 + }, + { + "epoch": 0.18, + "learning_rate": 4.112391930835735e-05, + "loss": 2.5698, + "step": 278200 + }, + { + "epoch": 0.18, + "learning_rate": 4.1120717259045797e-05, + "loss": 2.5745, + "step": 278300 + }, + { + "epoch": 0.18, + "learning_rate": 4.111751520973423e-05, + "loss": 2.5706, + "step": 278400 + }, + { + "epoch": 0.18, + "learning_rate": 4.1114313160422676e-05, + "loss": 2.5668, + "step": 278500 + }, + { + "epoch": 0.18, + "learning_rate": 4.111111111111111e-05, + "loss": 2.5859, + "step": 278600 + }, + { + "epoch": 0.18, + "learning_rate": 4.1107909061799556e-05, + "loss": 2.5705, + "step": 278700 + }, + { + "epoch": 0.18, + "learning_rate": 4.110470701248799e-05, + "loss": 2.5645, + "step": 278800 + }, + { + "epoch": 0.18, + "learning_rate": 4.1101504963176435e-05, + "loss": 2.5714, + "step": 278900 + }, + { + "epoch": 0.18, + "learning_rate": 4.1098302913864875e-05, + "loss": 2.5648, + "step": 279000 + }, + { + "epoch": 0.18, + "eval_loss": 2.511039972305298, + "eval_runtime": 177.9542, + "eval_samples_per_second": 56.194, + "eval_steps_per_second": 3.512, + "step": 279000 + }, + { + "epoch": 0.18, + "learning_rate": 4.1095100864553314e-05, + "loss": 2.5803, + "step": 279100 + }, + { + "epoch": 0.18, + "learning_rate": 4.109189881524176e-05, + "loss": 2.5774, + "step": 279200 + }, + { + "epoch": 0.18, + "learning_rate": 4.1088696765930194e-05, + "loss": 2.5557, + "step": 279300 + }, + { + "epoch": 0.18, + "learning_rate": 4.108549471661864e-05, + "loss": 2.5804, + "step": 279400 + }, + { + "epoch": 0.18, + "learning_rate": 4.1082292667307073e-05, + "loss": 2.5812, + "step": 279500 + }, + { + "epoch": 0.18, + "learning_rate": 4.107909061799552e-05, + "loss": 2.5659, + "step": 279600 + }, + { + "epoch": 0.18, + "learning_rate": 4.107588856868396e-05, + "loss": 2.5777, + "step": 279700 + }, + { + "epoch": 0.18, + "learning_rate": 4.10726865193724e-05, + "loss": 2.5677, + "step": 279800 + }, + { + "epoch": 0.18, + "learning_rate": 4.1069484470060846e-05, + "loss": 2.5654, + "step": 279900 + }, + { + "epoch": 0.18, + "learning_rate": 4.106628242074928e-05, + "loss": 2.5633, + "step": 280000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5125532150268555, + "eval_runtime": 173.5232, + "eval_samples_per_second": 57.629, + "eval_steps_per_second": 3.602, + "step": 280000 + }, + { + "epoch": 0.18, + "learning_rate": 4.1063080371437725e-05, + "loss": 2.5804, + "step": 280100 + }, + { + "epoch": 0.18, + "learning_rate": 4.105987832212616e-05, + "loss": 2.55, + "step": 280200 + }, + { + "epoch": 0.18, + "learning_rate": 4.1056676272814605e-05, + "loss": 2.5984, + "step": 280300 + }, + { + "epoch": 0.18, + "learning_rate": 4.105347422350304e-05, + "loss": 2.5503, + "step": 280400 + }, + { + "epoch": 0.18, + "learning_rate": 4.1050272174191484e-05, + "loss": 2.5609, + "step": 280500 + }, + { + "epoch": 0.18, + "learning_rate": 4.1047070124879924e-05, + "loss": 2.5903, + "step": 280600 + }, + { + "epoch": 0.18, + "learning_rate": 4.1043868075568364e-05, + "loss": 2.5634, + "step": 280700 + }, + { + "epoch": 0.18, + "learning_rate": 4.104066602625681e-05, + "loss": 2.5981, + "step": 280800 + }, + { + "epoch": 0.18, + "learning_rate": 4.103746397694524e-05, + "loss": 2.5627, + "step": 280900 + }, + { + "epoch": 0.18, + "learning_rate": 4.103426192763369e-05, + "loss": 2.5925, + "step": 281000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5105316638946533, + "eval_runtime": 175.7903, + "eval_samples_per_second": 56.886, + "eval_steps_per_second": 3.555, + "step": 281000 + }, + { + "epoch": 0.18, + "learning_rate": 4.103105987832212e-05, + "loss": 2.5634, + "step": 281100 + }, + { + "epoch": 0.18, + "learning_rate": 4.102785782901057e-05, + "loss": 2.5757, + "step": 281200 + }, + { + "epoch": 0.18, + "learning_rate": 4.102465577969901e-05, + "loss": 2.5716, + "step": 281300 + }, + { + "epoch": 0.18, + "learning_rate": 4.102145373038745e-05, + "loss": 2.5856, + "step": 281400 + }, + { + "epoch": 0.18, + "learning_rate": 4.1018251681075895e-05, + "loss": 2.5613, + "step": 281500 + }, + { + "epoch": 0.18, + "learning_rate": 4.101504963176433e-05, + "loss": 2.5924, + "step": 281600 + }, + { + "epoch": 0.18, + "learning_rate": 4.1011847582452775e-05, + "loss": 2.5446, + "step": 281700 + }, + { + "epoch": 0.18, + "learning_rate": 4.100864553314121e-05, + "loss": 2.5892, + "step": 281800 + }, + { + "epoch": 0.18, + "learning_rate": 4.1005443483829654e-05, + "loss": 2.5723, + "step": 281900 + }, + { + "epoch": 0.18, + "learning_rate": 4.1002241434518094e-05, + "loss": 2.581, + "step": 282000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5110504627227783, + "eval_runtime": 176.3019, + "eval_samples_per_second": 56.721, + "eval_steps_per_second": 3.545, + "step": 282000 + }, + { + "epoch": 0.18, + "learning_rate": 4.0999039385206534e-05, + "loss": 2.5647, + "step": 282100 + }, + { + "epoch": 0.18, + "learning_rate": 4.0995837335894973e-05, + "loss": 2.5775, + "step": 282200 + }, + { + "epoch": 0.18, + "learning_rate": 4.099263528658341e-05, + "loss": 2.5806, + "step": 282300 + }, + { + "epoch": 0.18, + "learning_rate": 4.098943323727186e-05, + "loss": 2.58, + "step": 282400 + }, + { + "epoch": 0.18, + "learning_rate": 4.098623118796029e-05, + "loss": 2.5629, + "step": 282500 + }, + { + "epoch": 0.18, + "learning_rate": 4.098302913864874e-05, + "loss": 2.5716, + "step": 282600 + }, + { + "epoch": 0.18, + "learning_rate": 4.097982708933718e-05, + "loss": 2.5695, + "step": 282700 + }, + { + "epoch": 0.18, + "learning_rate": 4.097662504002562e-05, + "loss": 2.5563, + "step": 282800 + }, + { + "epoch": 0.18, + "learning_rate": 4.097342299071406e-05, + "loss": 2.5978, + "step": 282900 + }, + { + "epoch": 0.18, + "learning_rate": 4.09702209414025e-05, + "loss": 2.5856, + "step": 283000 + }, + { + "epoch": 0.18, + "eval_loss": 2.511104106903076, + "eval_runtime": 179.631, + "eval_samples_per_second": 55.67, + "eval_steps_per_second": 3.479, + "step": 283000 + }, + { + "epoch": 0.18, + "learning_rate": 4.0967018892090945e-05, + "loss": 2.5935, + "step": 283100 + }, + { + "epoch": 0.18, + "learning_rate": 4.096381684277938e-05, + "loss": 2.5626, + "step": 283200 + }, + { + "epoch": 0.18, + "learning_rate": 4.0960614793467824e-05, + "loss": 2.5588, + "step": 283300 + }, + { + "epoch": 0.18, + "learning_rate": 4.095741274415626e-05, + "loss": 2.5614, + "step": 283400 + }, + { + "epoch": 0.18, + "learning_rate": 4.0954210694844704e-05, + "loss": 2.5808, + "step": 283500 + }, + { + "epoch": 0.18, + "learning_rate": 4.095100864553314e-05, + "loss": 2.566, + "step": 283600 + }, + { + "epoch": 0.18, + "learning_rate": 4.094780659622158e-05, + "loss": 2.5677, + "step": 283700 + }, + { + "epoch": 0.18, + "learning_rate": 4.094460454691002e-05, + "loss": 2.5766, + "step": 283800 + }, + { + "epoch": 0.18, + "learning_rate": 4.094140249759846e-05, + "loss": 2.5525, + "step": 283900 + }, + { + "epoch": 0.18, + "learning_rate": 4.093820044828691e-05, + "loss": 2.5671, + "step": 284000 + }, + { + "epoch": 0.18, + "eval_loss": 2.509161949157715, + "eval_runtime": 176.3943, + "eval_samples_per_second": 56.691, + "eval_steps_per_second": 3.543, + "step": 284000 + }, + { + "epoch": 0.18, + "learning_rate": 4.093499839897534e-05, + "loss": 2.5692, + "step": 284100 + }, + { + "epoch": 0.18, + "learning_rate": 4.093179634966379e-05, + "loss": 2.5572, + "step": 284200 + }, + { + "epoch": 0.18, + "learning_rate": 4.092859430035223e-05, + "loss": 2.5831, + "step": 284300 + }, + { + "epoch": 0.18, + "learning_rate": 4.092539225104067e-05, + "loss": 2.5719, + "step": 284400 + }, + { + "epoch": 0.18, + "learning_rate": 4.092219020172911e-05, + "loss": 2.5671, + "step": 284500 + }, + { + "epoch": 0.18, + "learning_rate": 4.091898815241755e-05, + "loss": 2.5802, + "step": 284600 + }, + { + "epoch": 0.18, + "learning_rate": 4.0915786103105994e-05, + "loss": 2.5717, + "step": 284700 + }, + { + "epoch": 0.18, + "learning_rate": 4.091258405379443e-05, + "loss": 2.5834, + "step": 284800 + }, + { + "epoch": 0.18, + "learning_rate": 4.0909382004482874e-05, + "loss": 2.5793, + "step": 284900 + }, + { + "epoch": 0.18, + "learning_rate": 4.090617995517131e-05, + "loss": 2.5728, + "step": 285000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5098185539245605, + "eval_runtime": 173.9083, + "eval_samples_per_second": 57.502, + "eval_steps_per_second": 3.594, + "step": 285000 + }, + { + "epoch": 0.18, + "learning_rate": 4.090297790585975e-05, + "loss": 2.5591, + "step": 285100 + }, + { + "epoch": 0.18, + "learning_rate": 4.089977585654819e-05, + "loss": 2.5807, + "step": 285200 + }, + { + "epoch": 0.18, + "learning_rate": 4.089657380723663e-05, + "loss": 2.5771, + "step": 285300 + }, + { + "epoch": 0.18, + "learning_rate": 4.089337175792507e-05, + "loss": 2.5801, + "step": 285400 + }, + { + "epoch": 0.18, + "learning_rate": 4.089016970861351e-05, + "loss": 2.5605, + "step": 285500 + }, + { + "epoch": 0.18, + "learning_rate": 4.088696765930196e-05, + "loss": 2.5708, + "step": 285600 + }, + { + "epoch": 0.18, + "learning_rate": 4.088376560999039e-05, + "loss": 2.5581, + "step": 285700 + }, + { + "epoch": 0.18, + "learning_rate": 4.088056356067884e-05, + "loss": 2.5597, + "step": 285800 + }, + { + "epoch": 0.18, + "learning_rate": 4.087736151136728e-05, + "loss": 2.553, + "step": 285900 + }, + { + "epoch": 0.18, + "learning_rate": 4.087415946205572e-05, + "loss": 2.5626, + "step": 286000 + }, + { + "epoch": 0.18, + "eval_loss": 2.51062273979187, + "eval_runtime": 176.5074, + "eval_samples_per_second": 56.655, + "eval_steps_per_second": 3.541, + "step": 286000 + }, + { + "epoch": 0.18, + "learning_rate": 4.087095741274416e-05, + "loss": 2.5574, + "step": 286100 + }, + { + "epoch": 0.18, + "learning_rate": 4.08677553634326e-05, + "loss": 2.562, + "step": 286200 + }, + { + "epoch": 0.18, + "learning_rate": 4.0864553314121043e-05, + "loss": 2.5496, + "step": 286300 + }, + { + "epoch": 0.18, + "learning_rate": 4.0861351264809476e-05, + "loss": 2.5846, + "step": 286400 + }, + { + "epoch": 0.18, + "learning_rate": 4.085814921549792e-05, + "loss": 2.5862, + "step": 286500 + }, + { + "epoch": 0.18, + "learning_rate": 4.085494716618636e-05, + "loss": 2.5518, + "step": 286600 + }, + { + "epoch": 0.18, + "learning_rate": 4.08517451168748e-05, + "loss": 2.5733, + "step": 286700 + }, + { + "epoch": 0.18, + "learning_rate": 4.084854306756324e-05, + "loss": 2.5678, + "step": 286800 + }, + { + "epoch": 0.18, + "learning_rate": 4.084534101825168e-05, + "loss": 2.5802, + "step": 286900 + }, + { + "epoch": 0.18, + "learning_rate": 4.084213896894012e-05, + "loss": 2.5513, + "step": 287000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5107758045196533, + "eval_runtime": 175.9187, + "eval_samples_per_second": 56.844, + "eval_steps_per_second": 3.553, + "step": 287000 + }, + { + "epoch": 0.18, + "learning_rate": 4.083893691962856e-05, + "loss": 2.5619, + "step": 287100 + }, + { + "epoch": 0.18, + "learning_rate": 4.083573487031701e-05, + "loss": 2.5645, + "step": 287200 + }, + { + "epoch": 0.18, + "learning_rate": 4.083253282100545e-05, + "loss": 2.5653, + "step": 287300 + }, + { + "epoch": 0.18, + "learning_rate": 4.082933077169389e-05, + "loss": 2.5906, + "step": 287400 + }, + { + "epoch": 0.18, + "learning_rate": 4.082612872238233e-05, + "loss": 2.5605, + "step": 287500 + }, + { + "epoch": 0.18, + "learning_rate": 4.082292667307077e-05, + "loss": 2.5506, + "step": 287600 + }, + { + "epoch": 0.18, + "learning_rate": 4.0819724623759207e-05, + "loss": 2.5645, + "step": 287700 + }, + { + "epoch": 0.18, + "learning_rate": 4.0816522574447646e-05, + "loss": 2.5771, + "step": 287800 + }, + { + "epoch": 0.18, + "learning_rate": 4.081332052513609e-05, + "loss": 2.5784, + "step": 287900 + }, + { + "epoch": 0.18, + "learning_rate": 4.0810118475824526e-05, + "loss": 2.5557, + "step": 288000 + }, + { + "epoch": 0.18, + "eval_loss": 2.5089871883392334, + "eval_runtime": 178.3368, + "eval_samples_per_second": 56.074, + "eval_steps_per_second": 3.505, + "step": 288000 + }, + { + "epoch": 0.18, + "learning_rate": 4.080691642651297e-05, + "loss": 2.5438, + "step": 288100 + }, + { + "epoch": 0.18, + "learning_rate": 4.080371437720141e-05, + "loss": 2.5454, + "step": 288200 + }, + { + "epoch": 0.18, + "learning_rate": 4.080051232788985e-05, + "loss": 2.579, + "step": 288300 + }, + { + "epoch": 0.18, + "learning_rate": 4.079731027857829e-05, + "loss": 2.5547, + "step": 288400 + }, + { + "epoch": 0.18, + "learning_rate": 4.079410822926673e-05, + "loss": 2.5641, + "step": 288500 + }, + { + "epoch": 0.18, + "learning_rate": 4.079090617995517e-05, + "loss": 2.5702, + "step": 288600 + }, + { + "epoch": 0.18, + "learning_rate": 4.078770413064361e-05, + "loss": 2.5474, + "step": 288700 + }, + { + "epoch": 0.18, + "learning_rate": 4.078450208133206e-05, + "loss": 2.5632, + "step": 288800 + }, + { + "epoch": 0.18, + "learning_rate": 4.07813000320205e-05, + "loss": 2.5475, + "step": 288900 + }, + { + "epoch": 0.18, + "learning_rate": 4.077809798270894e-05, + "loss": 2.571, + "step": 289000 + }, + { + "epoch": 0.18, + "eval_loss": 2.510617971420288, + "eval_runtime": 177.734, + "eval_samples_per_second": 56.264, + "eval_steps_per_second": 3.516, + "step": 289000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0774895933397376e-05, + "loss": 2.5685, + "step": 289100 + }, + { + "epoch": 0.19, + "learning_rate": 4.0771693884085816e-05, + "loss": 2.5794, + "step": 289200 + }, + { + "epoch": 0.19, + "learning_rate": 4.0768491834774256e-05, + "loss": 2.5354, + "step": 289300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0765289785462696e-05, + "loss": 2.5567, + "step": 289400 + }, + { + "epoch": 0.19, + "learning_rate": 4.076208773615114e-05, + "loss": 2.5689, + "step": 289500 + }, + { + "epoch": 0.19, + "learning_rate": 4.075888568683958e-05, + "loss": 2.5718, + "step": 289600 + }, + { + "epoch": 0.19, + "learning_rate": 4.075568363752802e-05, + "loss": 2.5576, + "step": 289700 + }, + { + "epoch": 0.19, + "learning_rate": 4.075248158821646e-05, + "loss": 2.5523, + "step": 289800 + }, + { + "epoch": 0.19, + "learning_rate": 4.07492795389049e-05, + "loss": 2.5517, + "step": 289900 + }, + { + "epoch": 0.19, + "learning_rate": 4.074607748959334e-05, + "loss": 2.5623, + "step": 290000 + }, + { + "epoch": 0.19, + "eval_loss": 2.508697748184204, + "eval_runtime": 176.5437, + "eval_samples_per_second": 56.643, + "eval_steps_per_second": 3.54, + "step": 290000 + }, + { + "epoch": 0.19, + "learning_rate": 4.074287544028178e-05, + "loss": 2.5643, + "step": 290100 + }, + { + "epoch": 0.19, + "learning_rate": 4.073967339097022e-05, + "loss": 2.5692, + "step": 290200 + }, + { + "epoch": 0.19, + "learning_rate": 4.073647134165866e-05, + "loss": 2.5541, + "step": 290300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0733269292347107e-05, + "loss": 2.5701, + "step": 290400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0730067243035546e-05, + "loss": 2.5417, + "step": 290500 + }, + { + "epoch": 0.19, + "learning_rate": 4.0726865193723986e-05, + "loss": 2.5643, + "step": 290600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0723663144412426e-05, + "loss": 2.5483, + "step": 290700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0720461095100866e-05, + "loss": 2.5724, + "step": 290800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0717259045789305e-05, + "loss": 2.5341, + "step": 290900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0714056996477745e-05, + "loss": 2.5673, + "step": 291000 + }, + { + "epoch": 0.19, + "eval_loss": 2.508972644805908, + "eval_runtime": 177.4854, + "eval_samples_per_second": 56.343, + "eval_steps_per_second": 3.521, + "step": 291000 + }, + { + "epoch": 0.19, + "learning_rate": 4.071085494716619e-05, + "loss": 2.5623, + "step": 291100 + }, + { + "epoch": 0.19, + "learning_rate": 4.070765289785463e-05, + "loss": 2.578, + "step": 291200 + }, + { + "epoch": 0.19, + "learning_rate": 4.070445084854307e-05, + "loss": 2.5711, + "step": 291300 + }, + { + "epoch": 0.19, + "learning_rate": 4.070124879923151e-05, + "loss": 2.5647, + "step": 291400 + }, + { + "epoch": 0.19, + "learning_rate": 4.069804674991995e-05, + "loss": 2.5425, + "step": 291500 + }, + { + "epoch": 0.19, + "learning_rate": 4.069484470060839e-05, + "loss": 2.5546, + "step": 291600 + }, + { + "epoch": 0.19, + "learning_rate": 4.069164265129683e-05, + "loss": 2.5606, + "step": 291700 + }, + { + "epoch": 0.19, + "learning_rate": 4.068844060198527e-05, + "loss": 2.5569, + "step": 291800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0685238552673716e-05, + "loss": 2.5551, + "step": 291900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0682036503362156e-05, + "loss": 2.5544, + "step": 292000 + }, + { + "epoch": 0.19, + "eval_loss": 2.508294105529785, + "eval_runtime": 174.7106, + "eval_samples_per_second": 57.238, + "eval_steps_per_second": 3.577, + "step": 292000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0678834454050596e-05, + "loss": 2.5827, + "step": 292100 + }, + { + "epoch": 0.19, + "learning_rate": 4.0675632404739035e-05, + "loss": 2.5554, + "step": 292200 + }, + { + "epoch": 0.19, + "learning_rate": 4.0672430355427475e-05, + "loss": 2.5515, + "step": 292300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0669228306115915e-05, + "loss": 2.5602, + "step": 292400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0666026256804355e-05, + "loss": 2.562, + "step": 292500 + }, + { + "epoch": 0.19, + "learning_rate": 4.06628242074928e-05, + "loss": 2.5807, + "step": 292600 + }, + { + "epoch": 0.19, + "learning_rate": 4.065962215818124e-05, + "loss": 2.5839, + "step": 292700 + }, + { + "epoch": 0.19, + "learning_rate": 4.065642010886968e-05, + "loss": 2.548, + "step": 292800 + }, + { + "epoch": 0.19, + "learning_rate": 4.065321805955812e-05, + "loss": 2.5535, + "step": 292900 + }, + { + "epoch": 0.19, + "learning_rate": 4.065001601024656e-05, + "loss": 2.5625, + "step": 293000 + }, + { + "epoch": 0.19, + "eval_loss": 2.509491205215454, + "eval_runtime": 178.2991, + "eval_samples_per_second": 56.086, + "eval_steps_per_second": 3.505, + "step": 293000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0646813960935e-05, + "loss": 2.5633, + "step": 293100 + }, + { + "epoch": 0.19, + "learning_rate": 4.064361191162344e-05, + "loss": 2.5533, + "step": 293200 + }, + { + "epoch": 0.19, + "learning_rate": 4.064040986231188e-05, + "loss": 2.541, + "step": 293300 + }, + { + "epoch": 0.19, + "learning_rate": 4.063720781300032e-05, + "loss": 2.5639, + "step": 293400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0634005763688766e-05, + "loss": 2.547, + "step": 293500 + }, + { + "epoch": 0.19, + "learning_rate": 4.0630803714377205e-05, + "loss": 2.5464, + "step": 293600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0627601665065645e-05, + "loss": 2.5627, + "step": 293700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0624399615754085e-05, + "loss": 2.5713, + "step": 293800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0621197566442525e-05, + "loss": 2.5626, + "step": 293900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0617995517130964e-05, + "loss": 2.5665, + "step": 294000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5079572200775146, + "eval_runtime": 174.8545, + "eval_samples_per_second": 57.19, + "eval_steps_per_second": 3.574, + "step": 294000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0614793467819404e-05, + "loss": 2.5639, + "step": 294100 + }, + { + "epoch": 0.19, + "learning_rate": 4.061159141850785e-05, + "loss": 2.5535, + "step": 294200 + }, + { + "epoch": 0.19, + "learning_rate": 4.0608389369196284e-05, + "loss": 2.5441, + "step": 294300 + }, + { + "epoch": 0.19, + "learning_rate": 4.060518731988473e-05, + "loss": 2.5535, + "step": 294400 + }, + { + "epoch": 0.19, + "learning_rate": 4.060198527057317e-05, + "loss": 2.5606, + "step": 294500 + }, + { + "epoch": 0.19, + "learning_rate": 4.059878322126161e-05, + "loss": 2.5639, + "step": 294600 + }, + { + "epoch": 0.19, + "learning_rate": 4.059558117195005e-05, + "loss": 2.5637, + "step": 294700 + }, + { + "epoch": 0.19, + "learning_rate": 4.059237912263849e-05, + "loss": 2.5466, + "step": 294800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0589177073326935e-05, + "loss": 2.5829, + "step": 294900 + }, + { + "epoch": 0.19, + "learning_rate": 4.058597502401537e-05, + "loss": 2.5556, + "step": 295000 + }, + { + "epoch": 0.19, + "eval_loss": 2.509488344192505, + "eval_runtime": 178.3018, + "eval_samples_per_second": 56.085, + "eval_steps_per_second": 3.505, + "step": 295000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0582772974703815e-05, + "loss": 2.5414, + "step": 295100 + }, + { + "epoch": 0.19, + "learning_rate": 4.0579570925392255e-05, + "loss": 2.5758, + "step": 295200 + }, + { + "epoch": 0.19, + "learning_rate": 4.0576368876080694e-05, + "loss": 2.5638, + "step": 295300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0573166826769134e-05, + "loss": 2.5582, + "step": 295400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0569964777457574e-05, + "loss": 2.5373, + "step": 295500 + }, + { + "epoch": 0.19, + "learning_rate": 4.0566762728146014e-05, + "loss": 2.5559, + "step": 295600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0563560678834453e-05, + "loss": 2.575, + "step": 295700 + }, + { + "epoch": 0.19, + "learning_rate": 4.05603586295229e-05, + "loss": 2.5523, + "step": 295800 + }, + { + "epoch": 0.19, + "learning_rate": 4.055715658021133e-05, + "loss": 2.5523, + "step": 295900 + }, + { + "epoch": 0.19, + "learning_rate": 4.055395453089978e-05, + "loss": 2.5525, + "step": 296000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5082788467407227, + "eval_runtime": 176.6723, + "eval_samples_per_second": 56.602, + "eval_steps_per_second": 3.538, + "step": 296000 + }, + { + "epoch": 0.19, + "learning_rate": 4.055075248158822e-05, + "loss": 2.5737, + "step": 296100 + }, + { + "epoch": 0.19, + "learning_rate": 4.054755043227666e-05, + "loss": 2.5611, + "step": 296200 + }, + { + "epoch": 0.19, + "learning_rate": 4.05443483829651e-05, + "loss": 2.5676, + "step": 296300 + }, + { + "epoch": 0.19, + "learning_rate": 4.054114633365354e-05, + "loss": 2.5752, + "step": 296400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0537944284341985e-05, + "loss": 2.5637, + "step": 296500 + }, + { + "epoch": 0.19, + "learning_rate": 4.053474223503042e-05, + "loss": 2.5388, + "step": 296600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0531540185718864e-05, + "loss": 2.5566, + "step": 296700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0528338136407304e-05, + "loss": 2.534, + "step": 296800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0525136087095744e-05, + "loss": 2.5518, + "step": 296900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0521934037784184e-05, + "loss": 2.5801, + "step": 297000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5086190700531006, + "eval_runtime": 174.1216, + "eval_samples_per_second": 57.431, + "eval_steps_per_second": 3.589, + "step": 297000 + }, + { + "epoch": 0.19, + "learning_rate": 4.051873198847262e-05, + "loss": 2.546, + "step": 297100 + }, + { + "epoch": 0.19, + "learning_rate": 4.051552993916107e-05, + "loss": 2.552, + "step": 297200 + }, + { + "epoch": 0.19, + "learning_rate": 4.05123278898495e-05, + "loss": 2.5856, + "step": 297300 + }, + { + "epoch": 0.19, + "learning_rate": 4.050912584053795e-05, + "loss": 2.5604, + "step": 297400 + }, + { + "epoch": 0.19, + "learning_rate": 4.050592379122638e-05, + "loss": 2.57, + "step": 297500 + }, + { + "epoch": 0.19, + "learning_rate": 4.050272174191483e-05, + "loss": 2.5799, + "step": 297600 + }, + { + "epoch": 0.19, + "learning_rate": 4.049951969260327e-05, + "loss": 2.5632, + "step": 297700 + }, + { + "epoch": 0.19, + "learning_rate": 4.049631764329171e-05, + "loss": 2.5524, + "step": 297800 + }, + { + "epoch": 0.19, + "learning_rate": 4.049311559398015e-05, + "loss": 2.5868, + "step": 297900 + }, + { + "epoch": 0.19, + "learning_rate": 4.048991354466859e-05, + "loss": 2.5558, + "step": 298000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5086541175842285, + "eval_runtime": 176.9276, + "eval_samples_per_second": 56.52, + "eval_steps_per_second": 3.533, + "step": 298000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0486711495357034e-05, + "loss": 2.5616, + "step": 298100 + }, + { + "epoch": 0.19, + "learning_rate": 4.048350944604547e-05, + "loss": 2.5705, + "step": 298200 + }, + { + "epoch": 0.19, + "learning_rate": 4.0480307396733914e-05, + "loss": 2.5497, + "step": 298300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0477105347422353e-05, + "loss": 2.5514, + "step": 298400 + }, + { + "epoch": 0.19, + "learning_rate": 4.047390329811079e-05, + "loss": 2.5565, + "step": 298500 + }, + { + "epoch": 0.19, + "learning_rate": 4.047070124879923e-05, + "loss": 2.5641, + "step": 298600 + }, + { + "epoch": 0.19, + "learning_rate": 4.046749919948767e-05, + "loss": 2.569, + "step": 298700 + }, + { + "epoch": 0.19, + "learning_rate": 4.046429715017612e-05, + "loss": 2.5735, + "step": 298800 + }, + { + "epoch": 0.19, + "learning_rate": 4.046109510086455e-05, + "loss": 2.5601, + "step": 298900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0457893051553e-05, + "loss": 2.5677, + "step": 299000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5073604583740234, + "eval_runtime": 173.7708, + "eval_samples_per_second": 57.547, + "eval_steps_per_second": 3.597, + "step": 299000 + }, + { + "epoch": 0.19, + "learning_rate": 4.045469100224143e-05, + "loss": 2.5402, + "step": 299100 + }, + { + "epoch": 0.19, + "learning_rate": 4.045148895292988e-05, + "loss": 2.551, + "step": 299200 + }, + { + "epoch": 0.19, + "learning_rate": 4.044828690361832e-05, + "loss": 2.5308, + "step": 299300 + }, + { + "epoch": 0.19, + "learning_rate": 4.044508485430676e-05, + "loss": 2.535, + "step": 299400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0441882804995204e-05, + "loss": 2.5503, + "step": 299500 + }, + { + "epoch": 0.19, + "learning_rate": 4.043868075568364e-05, + "loss": 2.5448, + "step": 299600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0435478706372084e-05, + "loss": 2.5586, + "step": 299700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0432276657060517e-05, + "loss": 2.5536, + "step": 299800 + }, + { + "epoch": 0.19, + "learning_rate": 4.042907460774896e-05, + "loss": 2.5617, + "step": 299900 + }, + { + "epoch": 0.19, + "learning_rate": 4.04258725584374e-05, + "loss": 2.5559, + "step": 300000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5065934658050537, + "eval_runtime": 177.8009, + "eval_samples_per_second": 56.243, + "eval_steps_per_second": 3.515, + "step": 300000 + }, + { + "epoch": 0.19, + "learning_rate": 4.042267050912584e-05, + "loss": 2.5646, + "step": 300100 + }, + { + "epoch": 0.19, + "learning_rate": 4.041946845981428e-05, + "loss": 2.573, + "step": 300200 + }, + { + "epoch": 0.19, + "learning_rate": 4.041626641050272e-05, + "loss": 2.5649, + "step": 300300 + }, + { + "epoch": 0.19, + "learning_rate": 4.041306436119117e-05, + "loss": 2.5522, + "step": 300400 + }, + { + "epoch": 0.19, + "learning_rate": 4.04098623118796e-05, + "loss": 2.5513, + "step": 300500 + }, + { + "epoch": 0.19, + "learning_rate": 4.040666026256805e-05, + "loss": 2.5763, + "step": 300600 + }, + { + "epoch": 0.19, + "learning_rate": 4.040345821325648e-05, + "loss": 2.5712, + "step": 300700 + }, + { + "epoch": 0.19, + "learning_rate": 4.040025616394493e-05, + "loss": 2.5657, + "step": 300800 + }, + { + "epoch": 0.19, + "learning_rate": 4.039705411463337e-05, + "loss": 2.5598, + "step": 300900 + }, + { + "epoch": 0.19, + "learning_rate": 4.039385206532181e-05, + "loss": 2.5637, + "step": 301000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5084407329559326, + "eval_runtime": 177.1384, + "eval_samples_per_second": 56.453, + "eval_steps_per_second": 3.528, + "step": 301000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0390650016010254e-05, + "loss": 2.5361, + "step": 301100 + }, + { + "epoch": 0.19, + "learning_rate": 4.0387447966698686e-05, + "loss": 2.556, + "step": 301200 + }, + { + "epoch": 0.19, + "learning_rate": 4.038424591738713e-05, + "loss": 2.5632, + "step": 301300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0381043868075566e-05, + "loss": 2.5759, + "step": 301400 + }, + { + "epoch": 0.19, + "learning_rate": 4.037784181876401e-05, + "loss": 2.5651, + "step": 301500 + }, + { + "epoch": 0.19, + "learning_rate": 4.037463976945245e-05, + "loss": 2.5534, + "step": 301600 + }, + { + "epoch": 0.19, + "learning_rate": 4.037143772014089e-05, + "loss": 2.5505, + "step": 301700 + }, + { + "epoch": 0.19, + "learning_rate": 4.036823567082934e-05, + "loss": 2.5649, + "step": 301800 + }, + { + "epoch": 0.19, + "learning_rate": 4.036503362151777e-05, + "loss": 2.5579, + "step": 301900 + }, + { + "epoch": 0.19, + "learning_rate": 4.036183157220622e-05, + "loss": 2.5369, + "step": 302000 + }, + { + "epoch": 0.19, + "eval_loss": 2.508272171020508, + "eval_runtime": 173.6825, + "eval_samples_per_second": 57.576, + "eval_steps_per_second": 3.599, + "step": 302000 + }, + { + "epoch": 0.19, + "learning_rate": 4.035862952289465e-05, + "loss": 2.5736, + "step": 302100 + }, + { + "epoch": 0.19, + "learning_rate": 4.03554274735831e-05, + "loss": 2.5686, + "step": 302200 + }, + { + "epoch": 0.19, + "learning_rate": 4.035222542427153e-05, + "loss": 2.5647, + "step": 302300 + }, + { + "epoch": 0.19, + "learning_rate": 4.034902337495998e-05, + "loss": 2.5552, + "step": 302400 + }, + { + "epoch": 0.19, + "learning_rate": 4.034582132564842e-05, + "loss": 2.5441, + "step": 302500 + }, + { + "epoch": 0.19, + "learning_rate": 4.0342619276336856e-05, + "loss": 2.5753, + "step": 302600 + }, + { + "epoch": 0.19, + "learning_rate": 4.03394172270253e-05, + "loss": 2.5821, + "step": 302700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0336215177713736e-05, + "loss": 2.5517, + "step": 302800 + }, + { + "epoch": 0.19, + "learning_rate": 4.033301312840218e-05, + "loss": 2.5735, + "step": 302900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0329811079090615e-05, + "loss": 2.5704, + "step": 303000 + }, + { + "epoch": 0.19, + "eval_loss": 2.50827693939209, + "eval_runtime": 177.9887, + "eval_samples_per_second": 56.183, + "eval_steps_per_second": 3.511, + "step": 303000 + }, + { + "epoch": 0.19, + "learning_rate": 4.032660902977906e-05, + "loss": 2.5739, + "step": 303100 + }, + { + "epoch": 0.19, + "learning_rate": 4.03234069804675e-05, + "loss": 2.5599, + "step": 303200 + }, + { + "epoch": 0.19, + "learning_rate": 4.032020493115594e-05, + "loss": 2.5451, + "step": 303300 + }, + { + "epoch": 0.19, + "learning_rate": 4.031700288184439e-05, + "loss": 2.5776, + "step": 303400 + }, + { + "epoch": 0.19, + "learning_rate": 4.031380083253282e-05, + "loss": 2.5636, + "step": 303500 + }, + { + "epoch": 0.19, + "learning_rate": 4.031059878322127e-05, + "loss": 2.5535, + "step": 303600 + }, + { + "epoch": 0.19, + "learning_rate": 4.03073967339097e-05, + "loss": 2.5527, + "step": 303700 + }, + { + "epoch": 0.19, + "learning_rate": 4.030419468459815e-05, + "loss": 2.5781, + "step": 303800 + }, + { + "epoch": 0.19, + "learning_rate": 4.030099263528658e-05, + "loss": 2.5719, + "step": 303900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0297790585975026e-05, + "loss": 2.5473, + "step": 304000 + }, + { + "epoch": 0.19, + "eval_loss": 2.5073838233947754, + "eval_runtime": 179.4958, + "eval_samples_per_second": 55.712, + "eval_steps_per_second": 3.482, + "step": 304000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0294588536663466e-05, + "loss": 2.548, + "step": 304100 + }, + { + "epoch": 0.19, + "learning_rate": 4.0291386487351906e-05, + "loss": 2.5382, + "step": 304200 + }, + { + "epoch": 0.19, + "learning_rate": 4.028818443804035e-05, + "loss": 2.5654, + "step": 304300 + }, + { + "epoch": 0.19, + "learning_rate": 4.0284982388728785e-05, + "loss": 2.5579, + "step": 304400 + }, + { + "epoch": 0.19, + "learning_rate": 4.028178033941723e-05, + "loss": 2.5408, + "step": 304500 + }, + { + "epoch": 0.19, + "learning_rate": 4.0278578290105665e-05, + "loss": 2.564, + "step": 304600 + }, + { + "epoch": 0.2, + "learning_rate": 4.027537624079411e-05, + "loss": 2.5394, + "step": 304700 + }, + { + "epoch": 0.2, + "learning_rate": 4.027217419148255e-05, + "loss": 2.564, + "step": 304800 + }, + { + "epoch": 0.2, + "learning_rate": 4.026897214217099e-05, + "loss": 2.5578, + "step": 304900 + }, + { + "epoch": 0.2, + "learning_rate": 4.026577009285944e-05, + "loss": 2.5446, + "step": 305000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5069239139556885, + "eval_runtime": 176.0746, + "eval_samples_per_second": 56.794, + "eval_steps_per_second": 3.55, + "step": 305000 + }, + { + "epoch": 0.2, + "learning_rate": 4.026256804354787e-05, + "loss": 2.5529, + "step": 305100 + }, + { + "epoch": 0.2, + "learning_rate": 4.025936599423632e-05, + "loss": 2.5475, + "step": 305200 + }, + { + "epoch": 0.2, + "learning_rate": 4.025616394492475e-05, + "loss": 2.5522, + "step": 305300 + }, + { + "epoch": 0.2, + "learning_rate": 4.0252961895613196e-05, + "loss": 2.5702, + "step": 305400 + }, + { + "epoch": 0.2, + "learning_rate": 4.024975984630163e-05, + "loss": 2.5735, + "step": 305500 + }, + { + "epoch": 0.2, + "learning_rate": 4.0246557796990076e-05, + "loss": 2.5599, + "step": 305600 + }, + { + "epoch": 0.2, + "learning_rate": 4.0243355747678515e-05, + "loss": 2.5524, + "step": 305700 + }, + { + "epoch": 0.2, + "learning_rate": 4.0240153698366955e-05, + "loss": 2.5459, + "step": 305800 + }, + { + "epoch": 0.2, + "learning_rate": 4.02369516490554e-05, + "loss": 2.5709, + "step": 305900 + }, + { + "epoch": 0.2, + "learning_rate": 4.0233749599743835e-05, + "loss": 2.5669, + "step": 306000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5048508644104004, + "eval_runtime": 177.2145, + "eval_samples_per_second": 56.429, + "eval_steps_per_second": 3.527, + "step": 306000 + }, + { + "epoch": 0.2, + "learning_rate": 4.023054755043228e-05, + "loss": 2.572, + "step": 306100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0227345501120714e-05, + "loss": 2.551, + "step": 306200 + }, + { + "epoch": 0.2, + "learning_rate": 4.022414345180916e-05, + "loss": 2.5737, + "step": 306300 + }, + { + "epoch": 0.2, + "learning_rate": 4.02209414024976e-05, + "loss": 2.5602, + "step": 306400 + }, + { + "epoch": 0.2, + "learning_rate": 4.021773935318604e-05, + "loss": 2.5354, + "step": 306500 + }, + { + "epoch": 0.2, + "learning_rate": 4.0214537303874487e-05, + "loss": 2.5582, + "step": 306600 + }, + { + "epoch": 0.2, + "learning_rate": 4.021133525456292e-05, + "loss": 2.5414, + "step": 306700 + }, + { + "epoch": 0.2, + "learning_rate": 4.0208133205251366e-05, + "loss": 2.5451, + "step": 306800 + }, + { + "epoch": 0.2, + "learning_rate": 4.02049311559398e-05, + "loss": 2.5318, + "step": 306900 + }, + { + "epoch": 0.2, + "learning_rate": 4.0201729106628246e-05, + "loss": 2.5518, + "step": 307000 + }, + { + "epoch": 0.2, + "eval_loss": 2.507680892944336, + "eval_runtime": 177.91, + "eval_samples_per_second": 56.208, + "eval_steps_per_second": 3.513, + "step": 307000 + }, + { + "epoch": 0.2, + "learning_rate": 4.0198527057316685e-05, + "loss": 2.5772, + "step": 307100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0195325008005125e-05, + "loss": 2.5669, + "step": 307200 + }, + { + "epoch": 0.2, + "learning_rate": 4.0192122958693565e-05, + "loss": 2.5582, + "step": 307300 + }, + { + "epoch": 0.2, + "learning_rate": 4.0188920909382004e-05, + "loss": 2.5566, + "step": 307400 + }, + { + "epoch": 0.2, + "learning_rate": 4.018571886007045e-05, + "loss": 2.5654, + "step": 307500 + }, + { + "epoch": 0.2, + "learning_rate": 4.0182516810758884e-05, + "loss": 2.5824, + "step": 307600 + }, + { + "epoch": 0.2, + "learning_rate": 4.017931476144733e-05, + "loss": 2.5633, + "step": 307700 + }, + { + "epoch": 0.2, + "learning_rate": 4.0176112712135763e-05, + "loss": 2.5698, + "step": 307800 + }, + { + "epoch": 0.2, + "learning_rate": 4.017291066282421e-05, + "loss": 2.542, + "step": 307900 + }, + { + "epoch": 0.2, + "learning_rate": 4.016970861351265e-05, + "loss": 2.5414, + "step": 308000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5082297325134277, + "eval_runtime": 174.5318, + "eval_samples_per_second": 57.296, + "eval_steps_per_second": 3.581, + "step": 308000 + }, + { + "epoch": 0.2, + "learning_rate": 4.016650656420109e-05, + "loss": 2.55, + "step": 308100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0163304514889536e-05, + "loss": 2.5611, + "step": 308200 + }, + { + "epoch": 0.2, + "learning_rate": 4.016010246557797e-05, + "loss": 2.5464, + "step": 308300 + }, + { + "epoch": 0.2, + "learning_rate": 4.0156900416266415e-05, + "loss": 2.5544, + "step": 308400 + }, + { + "epoch": 0.2, + "learning_rate": 4.015369836695485e-05, + "loss": 2.5612, + "step": 308500 + }, + { + "epoch": 0.2, + "learning_rate": 4.0150496317643295e-05, + "loss": 2.5487, + "step": 308600 + }, + { + "epoch": 0.2, + "learning_rate": 4.0147294268331735e-05, + "loss": 2.5452, + "step": 308700 + }, + { + "epoch": 0.2, + "learning_rate": 4.0144092219020174e-05, + "loss": 2.5652, + "step": 308800 + }, + { + "epoch": 0.2, + "learning_rate": 4.0140890169708614e-05, + "loss": 2.5409, + "step": 308900 + }, + { + "epoch": 0.2, + "learning_rate": 4.0137688120397054e-05, + "loss": 2.5588, + "step": 309000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5070741176605225, + "eval_runtime": 176.2748, + "eval_samples_per_second": 56.73, + "eval_steps_per_second": 3.546, + "step": 309000 + }, + { + "epoch": 0.2, + "learning_rate": 4.01344860710855e-05, + "loss": 2.553, + "step": 309100 + }, + { + "epoch": 0.2, + "learning_rate": 4.013128402177393e-05, + "loss": 2.5702, + "step": 309200 + }, + { + "epoch": 0.2, + "learning_rate": 4.012808197246238e-05, + "loss": 2.5501, + "step": 309300 + }, + { + "epoch": 0.2, + "learning_rate": 4.012487992315082e-05, + "loss": 2.5654, + "step": 309400 + }, + { + "epoch": 0.2, + "learning_rate": 4.012167787383926e-05, + "loss": 2.5569, + "step": 309500 + }, + { + "epoch": 0.2, + "learning_rate": 4.01184758245277e-05, + "loss": 2.5349, + "step": 309600 + }, + { + "epoch": 0.2, + "learning_rate": 4.011527377521614e-05, + "loss": 2.5506, + "step": 309700 + }, + { + "epoch": 0.2, + "learning_rate": 4.0112071725904585e-05, + "loss": 2.5564, + "step": 309800 + }, + { + "epoch": 0.2, + "learning_rate": 4.010886967659302e-05, + "loss": 2.5588, + "step": 309900 + }, + { + "epoch": 0.2, + "learning_rate": 4.0105667627281465e-05, + "loss": 2.5626, + "step": 310000 + }, + { + "epoch": 0.2, + "eval_loss": 2.506380796432495, + "eval_runtime": 181.6871, + "eval_samples_per_second": 55.04, + "eval_steps_per_second": 3.44, + "step": 310000 + }, + { + "epoch": 0.2, + "learning_rate": 4.01024655779699e-05, + "loss": 2.5418, + "step": 310100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0099263528658344e-05, + "loss": 2.5562, + "step": 310200 + }, + { + "epoch": 0.2, + "learning_rate": 4.0096061479346784e-05, + "loss": 2.5452, + "step": 310300 + }, + { + "epoch": 0.2, + "learning_rate": 4.0092859430035224e-05, + "loss": 2.5572, + "step": 310400 + }, + { + "epoch": 0.2, + "learning_rate": 4.0089657380723663e-05, + "loss": 2.5605, + "step": 310500 + }, + { + "epoch": 0.2, + "learning_rate": 4.00864553314121e-05, + "loss": 2.5521, + "step": 310600 + }, + { + "epoch": 0.2, + "learning_rate": 4.008325328210055e-05, + "loss": 2.5529, + "step": 310700 + }, + { + "epoch": 0.2, + "learning_rate": 4.008005123278898e-05, + "loss": 2.5305, + "step": 310800 + }, + { + "epoch": 0.2, + "learning_rate": 4.007684918347743e-05, + "loss": 2.5354, + "step": 310900 + }, + { + "epoch": 0.2, + "learning_rate": 4.007364713416587e-05, + "loss": 2.5514, + "step": 311000 + }, + { + "epoch": 0.2, + "eval_loss": 2.505598545074463, + "eval_runtime": 174.1208, + "eval_samples_per_second": 57.431, + "eval_steps_per_second": 3.589, + "step": 311000 + }, + { + "epoch": 0.2, + "learning_rate": 4.007044508485431e-05, + "loss": 2.5501, + "step": 311100 + }, + { + "epoch": 0.2, + "learning_rate": 4.006724303554275e-05, + "loss": 2.5382, + "step": 311200 + }, + { + "epoch": 0.2, + "learning_rate": 4.006404098623119e-05, + "loss": 2.5473, + "step": 311300 + }, + { + "epoch": 0.2, + "learning_rate": 4.0060838936919635e-05, + "loss": 2.5561, + "step": 311400 + }, + { + "epoch": 0.2, + "learning_rate": 4.005763688760807e-05, + "loss": 2.5504, + "step": 311500 + }, + { + "epoch": 0.2, + "learning_rate": 4.0054434838296514e-05, + "loss": 2.5553, + "step": 311600 + }, + { + "epoch": 0.2, + "learning_rate": 4.0051232788984954e-05, + "loss": 2.5465, + "step": 311700 + }, + { + "epoch": 0.2, + "learning_rate": 4.0048030739673394e-05, + "loss": 2.5427, + "step": 311800 + }, + { + "epoch": 0.2, + "learning_rate": 4.004482869036183e-05, + "loss": 2.558, + "step": 311900 + }, + { + "epoch": 0.2, + "learning_rate": 4.004162664105027e-05, + "loss": 2.5363, + "step": 312000 + }, + { + "epoch": 0.2, + "eval_loss": 2.505476236343384, + "eval_runtime": 179.3041, + "eval_samples_per_second": 55.771, + "eval_steps_per_second": 3.486, + "step": 312000 + }, + { + "epoch": 0.2, + "learning_rate": 4.003842459173871e-05, + "loss": 2.5484, + "step": 312100 + }, + { + "epoch": 0.2, + "learning_rate": 4.003522254242715e-05, + "loss": 2.5404, + "step": 312200 + }, + { + "epoch": 0.2, + "learning_rate": 4.00320204931156e-05, + "loss": 2.5442, + "step": 312300 + }, + { + "epoch": 0.2, + "learning_rate": 4.002881844380404e-05, + "loss": 2.5239, + "step": 312400 + }, + { + "epoch": 0.2, + "learning_rate": 4.002561639449248e-05, + "loss": 2.5535, + "step": 312500 + }, + { + "epoch": 0.2, + "learning_rate": 4.002241434518092e-05, + "loss": 2.5565, + "step": 312600 + }, + { + "epoch": 0.2, + "learning_rate": 4.001921229586936e-05, + "loss": 2.5325, + "step": 312700 + }, + { + "epoch": 0.2, + "learning_rate": 4.00160102465578e-05, + "loss": 2.5471, + "step": 312800 + }, + { + "epoch": 0.2, + "learning_rate": 4.001280819724624e-05, + "loss": 2.5279, + "step": 312900 + }, + { + "epoch": 0.2, + "learning_rate": 4.000960614793468e-05, + "loss": 2.5293, + "step": 313000 + }, + { + "epoch": 0.2, + "eval_loss": 2.50457763671875, + "eval_runtime": 182.9787, + "eval_samples_per_second": 54.651, + "eval_steps_per_second": 3.416, + "step": 313000 + }, + { + "epoch": 0.2, + "learning_rate": 4.000640409862312e-05, + "loss": 2.5403, + "step": 313100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0003202049311564e-05, + "loss": 2.5394, + "step": 313200 + }, + { + "epoch": 0.2, + "learning_rate": 4e-05, + "loss": 2.5552, + "step": 313300 + }, + { + "epoch": 0.2, + "learning_rate": 3.999679795068844e-05, + "loss": 2.5321, + "step": 313400 + }, + { + "epoch": 0.2, + "learning_rate": 3.999359590137688e-05, + "loss": 2.5459, + "step": 313500 + }, + { + "epoch": 0.2, + "learning_rate": 3.999039385206532e-05, + "loss": 2.5312, + "step": 313600 + }, + { + "epoch": 0.2, + "learning_rate": 3.998719180275376e-05, + "loss": 2.5396, + "step": 313700 + }, + { + "epoch": 0.2, + "learning_rate": 3.99839897534422e-05, + "loss": 2.5456, + "step": 313800 + }, + { + "epoch": 0.2, + "learning_rate": 3.998078770413065e-05, + "loss": 2.546, + "step": 313900 + }, + { + "epoch": 0.2, + "learning_rate": 3.997758565481909e-05, + "loss": 2.5397, + "step": 314000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5051333904266357, + "eval_runtime": 176.2002, + "eval_samples_per_second": 56.754, + "eval_steps_per_second": 3.547, + "step": 314000 + }, + { + "epoch": 0.2, + "learning_rate": 3.997438360550753e-05, + "loss": 2.5528, + "step": 314100 + }, + { + "epoch": 0.2, + "learning_rate": 3.997118155619597e-05, + "loss": 2.5509, + "step": 314200 + }, + { + "epoch": 0.2, + "learning_rate": 3.996797950688441e-05, + "loss": 2.5298, + "step": 314300 + }, + { + "epoch": 0.2, + "learning_rate": 3.996477745757285e-05, + "loss": 2.5405, + "step": 314400 + }, + { + "epoch": 0.2, + "learning_rate": 3.996157540826129e-05, + "loss": 2.5263, + "step": 314500 + }, + { + "epoch": 0.2, + "learning_rate": 3.995837335894973e-05, + "loss": 2.5482, + "step": 314600 + }, + { + "epoch": 0.2, + "learning_rate": 3.995517130963817e-05, + "loss": 2.5449, + "step": 314700 + }, + { + "epoch": 0.2, + "learning_rate": 3.995196926032661e-05, + "loss": 2.5438, + "step": 314800 + }, + { + "epoch": 0.2, + "learning_rate": 3.994876721101505e-05, + "loss": 2.5317, + "step": 314900 + }, + { + "epoch": 0.2, + "learning_rate": 3.994556516170349e-05, + "loss": 2.5511, + "step": 315000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5045018196105957, + "eval_runtime": 179.4239, + "eval_samples_per_second": 55.734, + "eval_steps_per_second": 3.483, + "step": 315000 + }, + { + "epoch": 0.2, + "learning_rate": 3.994236311239193e-05, + "loss": 2.5536, + "step": 315100 + }, + { + "epoch": 0.2, + "learning_rate": 3.993916106308037e-05, + "loss": 2.5616, + "step": 315200 + }, + { + "epoch": 0.2, + "learning_rate": 3.993595901376881e-05, + "loss": 2.5476, + "step": 315300 + }, + { + "epoch": 0.2, + "learning_rate": 3.993275696445725e-05, + "loss": 2.562, + "step": 315400 + }, + { + "epoch": 0.2, + "learning_rate": 3.99295549151457e-05, + "loss": 2.5341, + "step": 315500 + }, + { + "epoch": 0.2, + "learning_rate": 3.992635286583414e-05, + "loss": 2.5354, + "step": 315600 + }, + { + "epoch": 0.2, + "learning_rate": 3.992315081652258e-05, + "loss": 2.5496, + "step": 315700 + }, + { + "epoch": 0.2, + "learning_rate": 3.991994876721102e-05, + "loss": 2.5394, + "step": 315800 + }, + { + "epoch": 0.2, + "learning_rate": 3.991674671789946e-05, + "loss": 2.5321, + "step": 315900 + }, + { + "epoch": 0.2, + "learning_rate": 3.9913544668587897e-05, + "loss": 2.5372, + "step": 316000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5045223236083984, + "eval_runtime": 176.428, + "eval_samples_per_second": 56.68, + "eval_steps_per_second": 3.543, + "step": 316000 + }, + { + "epoch": 0.2, + "learning_rate": 3.9910342619276336e-05, + "loss": 2.5451, + "step": 316100 + }, + { + "epoch": 0.2, + "learning_rate": 3.9907140569964776e-05, + "loss": 2.5537, + "step": 316200 + }, + { + "epoch": 0.2, + "learning_rate": 3.990393852065322e-05, + "loss": 2.558, + "step": 316300 + }, + { + "epoch": 0.2, + "learning_rate": 3.990073647134166e-05, + "loss": 2.5495, + "step": 316400 + }, + { + "epoch": 0.2, + "learning_rate": 3.98975344220301e-05, + "loss": 2.5321, + "step": 316500 + }, + { + "epoch": 0.2, + "learning_rate": 3.989433237271854e-05, + "loss": 2.535, + "step": 316600 + }, + { + "epoch": 0.2, + "learning_rate": 3.989113032340698e-05, + "loss": 2.5428, + "step": 316700 + }, + { + "epoch": 0.2, + "learning_rate": 3.988792827409542e-05, + "loss": 2.54, + "step": 316800 + }, + { + "epoch": 0.2, + "learning_rate": 3.988472622478386e-05, + "loss": 2.5508, + "step": 316900 + }, + { + "epoch": 0.2, + "learning_rate": 3.988152417547231e-05, + "loss": 2.5319, + "step": 317000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5040602684020996, + "eval_runtime": 174.4318, + "eval_samples_per_second": 57.329, + "eval_steps_per_second": 3.583, + "step": 317000 + }, + { + "epoch": 0.2, + "learning_rate": 3.987832212616075e-05, + "loss": 2.5404, + "step": 317100 + }, + { + "epoch": 0.2, + "learning_rate": 3.987512007684919e-05, + "loss": 2.5349, + "step": 317200 + }, + { + "epoch": 0.2, + "learning_rate": 3.987191802753763e-05, + "loss": 2.5611, + "step": 317300 + }, + { + "epoch": 0.2, + "learning_rate": 3.9868715978226066e-05, + "loss": 2.5419, + "step": 317400 + }, + { + "epoch": 0.2, + "learning_rate": 3.9865513928914506e-05, + "loss": 2.538, + "step": 317500 + }, + { + "epoch": 0.2, + "learning_rate": 3.9862311879602946e-05, + "loss": 2.5602, + "step": 317600 + }, + { + "epoch": 0.2, + "learning_rate": 3.9859109830291386e-05, + "loss": 2.5787, + "step": 317700 + }, + { + "epoch": 0.2, + "learning_rate": 3.9855907780979825e-05, + "loss": 2.5444, + "step": 317800 + }, + { + "epoch": 0.2, + "learning_rate": 3.985270573166827e-05, + "loss": 2.559, + "step": 317900 + }, + { + "epoch": 0.2, + "learning_rate": 3.984950368235671e-05, + "loss": 2.5492, + "step": 318000 + }, + { + "epoch": 0.2, + "eval_loss": 2.504504919052124, + "eval_runtime": 177.5462, + "eval_samples_per_second": 56.323, + "eval_steps_per_second": 3.52, + "step": 318000 + }, + { + "epoch": 0.2, + "learning_rate": 3.984630163304515e-05, + "loss": 2.5417, + "step": 318100 + }, + { + "epoch": 0.2, + "learning_rate": 3.984309958373359e-05, + "loss": 2.5378, + "step": 318200 + }, + { + "epoch": 0.2, + "learning_rate": 3.983989753442203e-05, + "loss": 2.5486, + "step": 318300 + }, + { + "epoch": 0.2, + "learning_rate": 3.983669548511047e-05, + "loss": 2.5299, + "step": 318400 + }, + { + "epoch": 0.2, + "learning_rate": 3.983349343579891e-05, + "loss": 2.5635, + "step": 318500 + }, + { + "epoch": 0.2, + "learning_rate": 3.983029138648736e-05, + "loss": 2.559, + "step": 318600 + }, + { + "epoch": 0.2, + "learning_rate": 3.9827089337175797e-05, + "loss": 2.5321, + "step": 318700 + }, + { + "epoch": 0.2, + "learning_rate": 3.9823887287864236e-05, + "loss": 2.5433, + "step": 318800 + }, + { + "epoch": 0.2, + "learning_rate": 3.9820685238552676e-05, + "loss": 2.5332, + "step": 318900 + }, + { + "epoch": 0.2, + "learning_rate": 3.9817483189241116e-05, + "loss": 2.5496, + "step": 319000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5038700103759766, + "eval_runtime": 180.8354, + "eval_samples_per_second": 55.299, + "eval_steps_per_second": 3.456, + "step": 319000 + }, + { + "epoch": 0.2, + "learning_rate": 3.9814281139929556e-05, + "loss": 2.5304, + "step": 319100 + }, + { + "epoch": 0.2, + "learning_rate": 3.9811079090617995e-05, + "loss": 2.5197, + "step": 319200 + }, + { + "epoch": 0.2, + "learning_rate": 3.980787704130644e-05, + "loss": 2.5367, + "step": 319300 + }, + { + "epoch": 0.2, + "learning_rate": 3.9804674991994875e-05, + "loss": 2.5455, + "step": 319400 + }, + { + "epoch": 0.2, + "learning_rate": 3.980147294268332e-05, + "loss": 2.5495, + "step": 319500 + }, + { + "epoch": 0.2, + "learning_rate": 3.979827089337176e-05, + "loss": 2.544, + "step": 319600 + }, + { + "epoch": 0.2, + "learning_rate": 3.97950688440602e-05, + "loss": 2.5515, + "step": 319700 + }, + { + "epoch": 0.2, + "learning_rate": 3.979186679474864e-05, + "loss": 2.5458, + "step": 319800 + }, + { + "epoch": 0.2, + "learning_rate": 3.978866474543708e-05, + "loss": 2.5365, + "step": 319900 + }, + { + "epoch": 0.2, + "learning_rate": 3.978546269612553e-05, + "loss": 2.5437, + "step": 320000 + }, + { + "epoch": 0.2, + "eval_loss": 2.5052034854888916, + "eval_runtime": 177.9273, + "eval_samples_per_second": 56.203, + "eval_steps_per_second": 3.513, + "step": 320000 + }, + { + "epoch": 0.2, + "learning_rate": 3.978226064681396e-05, + "loss": 2.5319, + "step": 320100 + }, + { + "epoch": 0.2, + "learning_rate": 3.9779058597502406e-05, + "loss": 2.5411, + "step": 320200 + }, + { + "epoch": 0.2, + "learning_rate": 3.9775856548190846e-05, + "loss": 2.5497, + "step": 320300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9772654498879286e-05, + "loss": 2.5301, + "step": 320400 + }, + { + "epoch": 0.21, + "learning_rate": 3.9769452449567725e-05, + "loss": 2.5616, + "step": 320500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9766250400256165e-05, + "loss": 2.5533, + "step": 320600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9763048350944605e-05, + "loss": 2.5563, + "step": 320700 + }, + { + "epoch": 0.21, + "learning_rate": 3.9759846301633045e-05, + "loss": 2.535, + "step": 320800 + }, + { + "epoch": 0.21, + "learning_rate": 3.975664425232149e-05, + "loss": 2.5515, + "step": 320900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9753442203009924e-05, + "loss": 2.5478, + "step": 321000 + }, + { + "epoch": 0.21, + "eval_loss": 2.502642869949341, + "eval_runtime": 176.3063, + "eval_samples_per_second": 56.719, + "eval_steps_per_second": 3.545, + "step": 321000 + }, + { + "epoch": 0.21, + "learning_rate": 3.975024015369837e-05, + "loss": 2.5499, + "step": 321100 + }, + { + "epoch": 0.21, + "learning_rate": 3.974703810438681e-05, + "loss": 2.5389, + "step": 321200 + }, + { + "epoch": 0.21, + "learning_rate": 3.974383605507525e-05, + "loss": 2.5296, + "step": 321300 + }, + { + "epoch": 0.21, + "learning_rate": 3.974063400576369e-05, + "loss": 2.5517, + "step": 321400 + }, + { + "epoch": 0.21, + "learning_rate": 3.973743195645213e-05, + "loss": 2.5395, + "step": 321500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9734229907140576e-05, + "loss": 2.5425, + "step": 321600 + }, + { + "epoch": 0.21, + "learning_rate": 3.973102785782901e-05, + "loss": 2.5416, + "step": 321700 + }, + { + "epoch": 0.21, + "learning_rate": 3.9727825808517456e-05, + "loss": 2.5388, + "step": 321800 + }, + { + "epoch": 0.21, + "learning_rate": 3.9724623759205895e-05, + "loss": 2.5441, + "step": 321900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9721421709894335e-05, + "loss": 2.5368, + "step": 322000 + }, + { + "epoch": 0.21, + "eval_loss": 2.505239725112915, + "eval_runtime": 176.1017, + "eval_samples_per_second": 56.785, + "eval_steps_per_second": 3.549, + "step": 322000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9718219660582775e-05, + "loss": 2.5253, + "step": 322100 + }, + { + "epoch": 0.21, + "learning_rate": 3.9715017611271215e-05, + "loss": 2.558, + "step": 322200 + }, + { + "epoch": 0.21, + "learning_rate": 3.971181556195966e-05, + "loss": 2.5313, + "step": 322300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9708613512648094e-05, + "loss": 2.5337, + "step": 322400 + }, + { + "epoch": 0.21, + "learning_rate": 3.970541146333654e-05, + "loss": 2.5383, + "step": 322500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9702209414024974e-05, + "loss": 2.5247, + "step": 322600 + }, + { + "epoch": 0.21, + "learning_rate": 3.969900736471342e-05, + "loss": 2.5411, + "step": 322700 + }, + { + "epoch": 0.21, + "learning_rate": 3.969580531540186e-05, + "loss": 2.558, + "step": 322800 + }, + { + "epoch": 0.21, + "learning_rate": 3.96926032660903e-05, + "loss": 2.5671, + "step": 322900 + }, + { + "epoch": 0.21, + "learning_rate": 3.968940121677874e-05, + "loss": 2.5209, + "step": 323000 + }, + { + "epoch": 0.21, + "eval_loss": 2.504387617111206, + "eval_runtime": 176.9359, + "eval_samples_per_second": 56.518, + "eval_steps_per_second": 3.532, + "step": 323000 + }, + { + "epoch": 0.21, + "learning_rate": 3.968619916746718e-05, + "loss": 2.5459, + "step": 323100 + }, + { + "epoch": 0.21, + "learning_rate": 3.9682997118155625e-05, + "loss": 2.5554, + "step": 323200 + }, + { + "epoch": 0.21, + "learning_rate": 3.967979506884406e-05, + "loss": 2.5532, + "step": 323300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9676593019532505e-05, + "loss": 2.5303, + "step": 323400 + }, + { + "epoch": 0.21, + "learning_rate": 3.9673390970220945e-05, + "loss": 2.5224, + "step": 323500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9670188920909384e-05, + "loss": 2.5402, + "step": 323600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9666986871597824e-05, + "loss": 2.5362, + "step": 323700 + }, + { + "epoch": 0.21, + "learning_rate": 3.9663784822286264e-05, + "loss": 2.5348, + "step": 323800 + }, + { + "epoch": 0.21, + "learning_rate": 3.966058277297471e-05, + "loss": 2.5452, + "step": 323900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9657380723663143e-05, + "loss": 2.5453, + "step": 324000 + }, + { + "epoch": 0.21, + "eval_loss": 2.503420829772949, + "eval_runtime": 175.8923, + "eval_samples_per_second": 56.853, + "eval_steps_per_second": 3.553, + "step": 324000 + }, + { + "epoch": 0.21, + "learning_rate": 3.965417867435159e-05, + "loss": 2.5733, + "step": 324100 + }, + { + "epoch": 0.21, + "learning_rate": 3.965097662504002e-05, + "loss": 2.5448, + "step": 324200 + }, + { + "epoch": 0.21, + "learning_rate": 3.964777457572847e-05, + "loss": 2.5223, + "step": 324300 + }, + { + "epoch": 0.21, + "learning_rate": 3.964457252641691e-05, + "loss": 2.5542, + "step": 324400 + }, + { + "epoch": 0.21, + "learning_rate": 3.964137047710535e-05, + "loss": 2.5298, + "step": 324500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9638168427793795e-05, + "loss": 2.5369, + "step": 324600 + }, + { + "epoch": 0.21, + "learning_rate": 3.963496637848223e-05, + "loss": 2.5425, + "step": 324700 + }, + { + "epoch": 0.21, + "learning_rate": 3.9631764329170675e-05, + "loss": 2.547, + "step": 324800 + }, + { + "epoch": 0.21, + "learning_rate": 3.962856227985911e-05, + "loss": 2.5343, + "step": 324900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9625360230547554e-05, + "loss": 2.5714, + "step": 325000 + }, + { + "epoch": 0.21, + "eval_loss": 2.503641366958618, + "eval_runtime": 174.1884, + "eval_samples_per_second": 57.409, + "eval_steps_per_second": 3.588, + "step": 325000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9622158181235994e-05, + "loss": 2.5426, + "step": 325100 + }, + { + "epoch": 0.21, + "learning_rate": 3.9618956131924434e-05, + "loss": 2.537, + "step": 325200 + }, + { + "epoch": 0.21, + "learning_rate": 3.9615754082612874e-05, + "loss": 2.5445, + "step": 325300 + }, + { + "epoch": 0.21, + "learning_rate": 3.961255203330131e-05, + "loss": 2.5471, + "step": 325400 + }, + { + "epoch": 0.21, + "learning_rate": 3.960934998398976e-05, + "loss": 2.546, + "step": 325500 + }, + { + "epoch": 0.21, + "learning_rate": 3.960614793467819e-05, + "loss": 2.537, + "step": 325600 + }, + { + "epoch": 0.21, + "learning_rate": 3.960294588536664e-05, + "loss": 2.5471, + "step": 325700 + }, + { + "epoch": 0.21, + "learning_rate": 3.959974383605507e-05, + "loss": 2.5305, + "step": 325800 + }, + { + "epoch": 0.21, + "learning_rate": 3.959654178674352e-05, + "loss": 2.5455, + "step": 325900 + }, + { + "epoch": 0.21, + "learning_rate": 3.959333973743196e-05, + "loss": 2.5335, + "step": 326000 + }, + { + "epoch": 0.21, + "eval_loss": 2.5042974948883057, + "eval_runtime": 176.1528, + "eval_samples_per_second": 56.769, + "eval_steps_per_second": 3.548, + "step": 326000 + }, + { + "epoch": 0.21, + "learning_rate": 3.95901376881204e-05, + "loss": 2.5461, + "step": 326100 + }, + { + "epoch": 0.21, + "learning_rate": 3.9586935638808845e-05, + "loss": 2.5605, + "step": 326200 + }, + { + "epoch": 0.21, + "learning_rate": 3.958373358949728e-05, + "loss": 2.5226, + "step": 326300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9580531540185724e-05, + "loss": 2.5242, + "step": 326400 + }, + { + "epoch": 0.21, + "learning_rate": 3.957732949087416e-05, + "loss": 2.5546, + "step": 326500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9574127441562604e-05, + "loss": 2.544, + "step": 326600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9570925392251043e-05, + "loss": 2.5166, + "step": 326700 + }, + { + "epoch": 0.21, + "learning_rate": 3.956772334293948e-05, + "loss": 2.5471, + "step": 326800 + }, + { + "epoch": 0.21, + "learning_rate": 3.956452129362793e-05, + "loss": 2.5462, + "step": 326900 + }, + { + "epoch": 0.21, + "learning_rate": 3.956131924431636e-05, + "loss": 2.5419, + "step": 327000 + }, + { + "epoch": 0.21, + "eval_loss": 2.502007246017456, + "eval_runtime": 181.6416, + "eval_samples_per_second": 55.053, + "eval_steps_per_second": 3.441, + "step": 327000 + }, + { + "epoch": 0.21, + "learning_rate": 3.955811719500481e-05, + "loss": 2.5315, + "step": 327100 + }, + { + "epoch": 0.21, + "learning_rate": 3.955491514569324e-05, + "loss": 2.5304, + "step": 327200 + }, + { + "epoch": 0.21, + "learning_rate": 3.955171309638169e-05, + "loss": 2.5687, + "step": 327300 + }, + { + "epoch": 0.21, + "learning_rate": 3.954851104707012e-05, + "loss": 2.5477, + "step": 327400 + }, + { + "epoch": 0.21, + "learning_rate": 3.954530899775857e-05, + "loss": 2.5408, + "step": 327500 + }, + { + "epoch": 0.21, + "learning_rate": 3.954210694844701e-05, + "loss": 2.5411, + "step": 327600 + }, + { + "epoch": 0.21, + "learning_rate": 3.953890489913545e-05, + "loss": 2.5467, + "step": 327700 + }, + { + "epoch": 0.21, + "learning_rate": 3.9535702849823894e-05, + "loss": 2.548, + "step": 327800 + }, + { + "epoch": 0.21, + "learning_rate": 3.953250080051233e-05, + "loss": 2.5375, + "step": 327900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9529298751200774e-05, + "loss": 2.554, + "step": 328000 + }, + { + "epoch": 0.21, + "eval_loss": 2.504699468612671, + "eval_runtime": 173.722, + "eval_samples_per_second": 57.563, + "eval_steps_per_second": 3.598, + "step": 328000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9526096701889207e-05, + "loss": 2.5319, + "step": 328100 + }, + { + "epoch": 0.21, + "learning_rate": 3.952289465257765e-05, + "loss": 2.5416, + "step": 328200 + }, + { + "epoch": 0.21, + "learning_rate": 3.951969260326609e-05, + "loss": 2.5422, + "step": 328300 + }, + { + "epoch": 0.21, + "learning_rate": 3.951649055395453e-05, + "loss": 2.5598, + "step": 328400 + }, + { + "epoch": 0.21, + "learning_rate": 3.951328850464298e-05, + "loss": 2.5542, + "step": 328500 + }, + { + "epoch": 0.21, + "learning_rate": 3.951008645533141e-05, + "loss": 2.5342, + "step": 328600 + }, + { + "epoch": 0.21, + "learning_rate": 3.950688440601986e-05, + "loss": 2.5362, + "step": 328700 + }, + { + "epoch": 0.21, + "learning_rate": 3.950368235670829e-05, + "loss": 2.5397, + "step": 328800 + }, + { + "epoch": 0.21, + "learning_rate": 3.950048030739674e-05, + "loss": 2.5425, + "step": 328900 + }, + { + "epoch": 0.21, + "learning_rate": 3.949727825808517e-05, + "loss": 2.5315, + "step": 329000 + }, + { + "epoch": 0.21, + "eval_loss": 2.5056402683258057, + "eval_runtime": 177.4283, + "eval_samples_per_second": 56.361, + "eval_steps_per_second": 3.523, + "step": 329000 + }, + { + "epoch": 0.21, + "learning_rate": 3.949407620877362e-05, + "loss": 2.5413, + "step": 329100 + }, + { + "epoch": 0.21, + "learning_rate": 3.949087415946206e-05, + "loss": 2.5423, + "step": 329200 + }, + { + "epoch": 0.21, + "learning_rate": 3.94876721101505e-05, + "loss": 2.5424, + "step": 329300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9484470060838944e-05, + "loss": 2.5236, + "step": 329400 + }, + { + "epoch": 0.21, + "learning_rate": 3.9481268011527376e-05, + "loss": 2.5192, + "step": 329500 + }, + { + "epoch": 0.21, + "learning_rate": 3.947806596221582e-05, + "loss": 2.5476, + "step": 329600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9474863912904256e-05, + "loss": 2.5511, + "step": 329700 + }, + { + "epoch": 0.21, + "learning_rate": 3.94716618635927e-05, + "loss": 2.5232, + "step": 329800 + }, + { + "epoch": 0.21, + "learning_rate": 3.946845981428114e-05, + "loss": 2.5542, + "step": 329900 + }, + { + "epoch": 0.21, + "learning_rate": 3.946525776496958e-05, + "loss": 2.535, + "step": 330000 + }, + { + "epoch": 0.21, + "eval_loss": 2.508066177368164, + "eval_runtime": 182.1861, + "eval_samples_per_second": 54.889, + "eval_steps_per_second": 3.431, + "step": 330000 + }, + { + "epoch": 0.21, + "learning_rate": 3.946205571565802e-05, + "loss": 2.5529, + "step": 330100 + }, + { + "epoch": 0.21, + "learning_rate": 3.945885366634646e-05, + "loss": 2.5257, + "step": 330200 + }, + { + "epoch": 0.21, + "learning_rate": 3.945565161703491e-05, + "loss": 2.5365, + "step": 330300 + }, + { + "epoch": 0.21, + "learning_rate": 3.945244956772334e-05, + "loss": 2.5432, + "step": 330400 + }, + { + "epoch": 0.21, + "learning_rate": 3.944924751841179e-05, + "loss": 2.5478, + "step": 330500 + }, + { + "epoch": 0.21, + "learning_rate": 3.944604546910022e-05, + "loss": 2.5398, + "step": 330600 + }, + { + "epoch": 0.21, + "learning_rate": 3.944284341978867e-05, + "loss": 2.5203, + "step": 330700 + }, + { + "epoch": 0.21, + "learning_rate": 3.943964137047711e-05, + "loss": 2.5287, + "step": 330800 + }, + { + "epoch": 0.21, + "learning_rate": 3.9436439321165546e-05, + "loss": 2.5378, + "step": 330900 + }, + { + "epoch": 0.21, + "learning_rate": 3.943323727185399e-05, + "loss": 2.5431, + "step": 331000 + }, + { + "epoch": 0.21, + "eval_loss": 2.5063817501068115, + "eval_runtime": 177.3593, + "eval_samples_per_second": 56.383, + "eval_steps_per_second": 3.524, + "step": 331000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9430035222542426e-05, + "loss": 2.5348, + "step": 331100 + }, + { + "epoch": 0.21, + "learning_rate": 3.942683317323087e-05, + "loss": 2.5277, + "step": 331200 + }, + { + "epoch": 0.21, + "learning_rate": 3.9423631123919305e-05, + "loss": 2.5345, + "step": 331300 + }, + { + "epoch": 0.21, + "learning_rate": 3.942042907460775e-05, + "loss": 2.5292, + "step": 331400 + }, + { + "epoch": 0.21, + "learning_rate": 3.941722702529619e-05, + "loss": 2.5705, + "step": 331500 + }, + { + "epoch": 0.21, + "learning_rate": 3.941402497598463e-05, + "loss": 2.5488, + "step": 331600 + }, + { + "epoch": 0.21, + "learning_rate": 3.941082292667307e-05, + "loss": 2.5694, + "step": 331700 + }, + { + "epoch": 0.21, + "learning_rate": 3.940762087736151e-05, + "loss": 2.5491, + "step": 331800 + }, + { + "epoch": 0.21, + "learning_rate": 3.940441882804996e-05, + "loss": 2.5307, + "step": 331900 + }, + { + "epoch": 0.21, + "learning_rate": 3.940121677873839e-05, + "loss": 2.5362, + "step": 332000 + }, + { + "epoch": 0.21, + "eval_loss": 2.5041511058807373, + "eval_runtime": 173.3732, + "eval_samples_per_second": 57.679, + "eval_steps_per_second": 3.605, + "step": 332000 + }, + { + "epoch": 0.21, + "learning_rate": 3.939801472942684e-05, + "loss": 2.5468, + "step": 332100 + }, + { + "epoch": 0.21, + "learning_rate": 3.9394812680115277e-05, + "loss": 2.5429, + "step": 332200 + }, + { + "epoch": 0.21, + "learning_rate": 3.9391610630803716e-05, + "loss": 2.5514, + "step": 332300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9388408581492156e-05, + "loss": 2.5171, + "step": 332400 + }, + { + "epoch": 0.21, + "learning_rate": 3.9385206532180596e-05, + "loss": 2.5341, + "step": 332500 + }, + { + "epoch": 0.21, + "learning_rate": 3.938200448286904e-05, + "loss": 2.5426, + "step": 332600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9378802433557475e-05, + "loss": 2.5406, + "step": 332700 + }, + { + "epoch": 0.21, + "learning_rate": 3.937560038424592e-05, + "loss": 2.5297, + "step": 332800 + }, + { + "epoch": 0.21, + "learning_rate": 3.9372398334934355e-05, + "loss": 2.5386, + "step": 332900 + }, + { + "epoch": 0.21, + "learning_rate": 3.93691962856228e-05, + "loss": 2.5313, + "step": 333000 + }, + { + "epoch": 0.21, + "eval_loss": 2.5052037239074707, + "eval_runtime": 178.1571, + "eval_samples_per_second": 56.13, + "eval_steps_per_second": 3.508, + "step": 333000 + }, + { + "epoch": 0.21, + "learning_rate": 3.936599423631124e-05, + "loss": 2.5314, + "step": 333100 + }, + { + "epoch": 0.21, + "learning_rate": 3.936279218699968e-05, + "loss": 2.5428, + "step": 333200 + }, + { + "epoch": 0.21, + "learning_rate": 3.935959013768812e-05, + "loss": 2.539, + "step": 333300 + }, + { + "epoch": 0.21, + "learning_rate": 3.935638808837656e-05, + "loss": 2.5373, + "step": 333400 + }, + { + "epoch": 0.21, + "learning_rate": 3.935318603906501e-05, + "loss": 2.5497, + "step": 333500 + }, + { + "epoch": 0.21, + "learning_rate": 3.934998398975344e-05, + "loss": 2.5336, + "step": 333600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9346781940441886e-05, + "loss": 2.5407, + "step": 333700 + }, + { + "epoch": 0.21, + "learning_rate": 3.9343579891130326e-05, + "loss": 2.5352, + "step": 333800 + }, + { + "epoch": 0.21, + "learning_rate": 3.9340377841818766e-05, + "loss": 2.5255, + "step": 333900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9337175792507205e-05, + "loss": 2.5323, + "step": 334000 + }, + { + "epoch": 0.21, + "eval_loss": 2.5034334659576416, + "eval_runtime": 177.3628, + "eval_samples_per_second": 56.382, + "eval_steps_per_second": 3.524, + "step": 334000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9333973743195645e-05, + "loss": 2.5344, + "step": 334100 + }, + { + "epoch": 0.21, + "learning_rate": 3.933077169388409e-05, + "loss": 2.5478, + "step": 334200 + }, + { + "epoch": 0.21, + "learning_rate": 3.9327569644572525e-05, + "loss": 2.5514, + "step": 334300 + }, + { + "epoch": 0.21, + "learning_rate": 3.932436759526097e-05, + "loss": 2.5434, + "step": 334400 + }, + { + "epoch": 0.21, + "learning_rate": 3.932116554594941e-05, + "loss": 2.5329, + "step": 334500 + }, + { + "epoch": 0.21, + "learning_rate": 3.931796349663785e-05, + "loss": 2.5589, + "step": 334600 + }, + { + "epoch": 0.21, + "learning_rate": 3.931476144732629e-05, + "loss": 2.5424, + "step": 334700 + }, + { + "epoch": 0.21, + "learning_rate": 3.931155939801473e-05, + "loss": 2.5403, + "step": 334800 + }, + { + "epoch": 0.21, + "learning_rate": 3.930835734870317e-05, + "loss": 2.5382, + "step": 334900 + }, + { + "epoch": 0.21, + "learning_rate": 3.930515529939161e-05, + "loss": 2.5531, + "step": 335000 + }, + { + "epoch": 0.21, + "eval_loss": 2.502439260482788, + "eval_runtime": 177.2654, + "eval_samples_per_second": 56.413, + "eval_steps_per_second": 3.526, + "step": 335000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9301953250080056e-05, + "loss": 2.5458, + "step": 335100 + }, + { + "epoch": 0.21, + "learning_rate": 3.929875120076849e-05, + "loss": 2.5491, + "step": 335200 + }, + { + "epoch": 0.21, + "learning_rate": 3.9295549151456936e-05, + "loss": 2.5313, + "step": 335300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9292347102145375e-05, + "loss": 2.5275, + "step": 335400 + }, + { + "epoch": 0.21, + "learning_rate": 3.9289145052833815e-05, + "loss": 2.5285, + "step": 335500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9285943003522255e-05, + "loss": 2.527, + "step": 335600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9282740954210694e-05, + "loss": 2.519, + "step": 335700 + }, + { + "epoch": 0.21, + "learning_rate": 3.927953890489914e-05, + "loss": 2.5079, + "step": 335800 + }, + { + "epoch": 0.21, + "learning_rate": 3.9276336855587574e-05, + "loss": 2.5293, + "step": 335900 + }, + { + "epoch": 0.22, + "learning_rate": 3.927313480627602e-05, + "loss": 2.534, + "step": 336000 + }, + { + "epoch": 0.22, + "eval_loss": 2.505464553833008, + "eval_runtime": 179.7562, + "eval_samples_per_second": 55.631, + "eval_steps_per_second": 3.477, + "step": 336000 + }, + { + "epoch": 0.22, + "learning_rate": 3.926993275696446e-05, + "loss": 2.5273, + "step": 336100 + }, + { + "epoch": 0.22, + "learning_rate": 3.92667307076529e-05, + "loss": 2.5203, + "step": 336200 + }, + { + "epoch": 0.22, + "learning_rate": 3.926352865834134e-05, + "loss": 2.5347, + "step": 336300 + }, + { + "epoch": 0.22, + "learning_rate": 3.926032660902978e-05, + "loss": 2.5287, + "step": 336400 + }, + { + "epoch": 0.22, + "learning_rate": 3.925712455971822e-05, + "loss": 2.5166, + "step": 336500 + }, + { + "epoch": 0.22, + "learning_rate": 3.925392251040666e-05, + "loss": 2.5404, + "step": 336600 + }, + { + "epoch": 0.22, + "learning_rate": 3.9250720461095105e-05, + "loss": 2.5171, + "step": 336700 + }, + { + "epoch": 0.22, + "learning_rate": 3.9247518411783545e-05, + "loss": 2.5414, + "step": 336800 + }, + { + "epoch": 0.22, + "learning_rate": 3.9244316362471985e-05, + "loss": 2.5476, + "step": 336900 + }, + { + "epoch": 0.22, + "learning_rate": 3.9241114313160425e-05, + "loss": 2.5311, + "step": 337000 + }, + { + "epoch": 0.22, + "eval_loss": 2.503237009048462, + "eval_runtime": 177.8754, + "eval_samples_per_second": 56.219, + "eval_steps_per_second": 3.514, + "step": 337000 + }, + { + "epoch": 0.22, + "learning_rate": 3.9237912263848864e-05, + "loss": 2.5438, + "step": 337100 + }, + { + "epoch": 0.22, + "learning_rate": 3.9234710214537304e-05, + "loss": 2.5231, + "step": 337200 + }, + { + "epoch": 0.22, + "learning_rate": 3.9231508165225744e-05, + "loss": 2.5314, + "step": 337300 + }, + { + "epoch": 0.22, + "learning_rate": 3.922830611591419e-05, + "loss": 2.5164, + "step": 337400 + }, + { + "epoch": 0.22, + "learning_rate": 3.922510406660262e-05, + "loss": 2.5187, + "step": 337500 + }, + { + "epoch": 0.22, + "learning_rate": 3.922190201729107e-05, + "loss": 2.5424, + "step": 337600 + }, + { + "epoch": 0.22, + "learning_rate": 3.921869996797951e-05, + "loss": 2.5342, + "step": 337700 + }, + { + "epoch": 0.22, + "learning_rate": 3.921549791866795e-05, + "loss": 2.5107, + "step": 337800 + }, + { + "epoch": 0.22, + "learning_rate": 3.921229586935639e-05, + "loss": 2.5053, + "step": 337900 + }, + { + "epoch": 0.22, + "learning_rate": 3.920909382004483e-05, + "loss": 2.537, + "step": 338000 + }, + { + "epoch": 0.22, + "eval_loss": 2.502249002456665, + "eval_runtime": 177.7412, + "eval_samples_per_second": 56.262, + "eval_steps_per_second": 3.516, + "step": 338000 + }, + { + "epoch": 0.22, + "learning_rate": 3.920589177073327e-05, + "loss": 2.5133, + "step": 338100 + }, + { + "epoch": 0.22, + "learning_rate": 3.920268972142171e-05, + "loss": 2.5349, + "step": 338200 + }, + { + "epoch": 0.22, + "learning_rate": 3.9199487672110155e-05, + "loss": 2.5304, + "step": 338300 + }, + { + "epoch": 0.22, + "learning_rate": 3.9196285622798595e-05, + "loss": 2.5281, + "step": 338400 + }, + { + "epoch": 0.22, + "learning_rate": 3.9193083573487034e-05, + "loss": 2.5396, + "step": 338500 + }, + { + "epoch": 0.22, + "learning_rate": 3.9189881524175474e-05, + "loss": 2.5307, + "step": 338600 + }, + { + "epoch": 0.22, + "learning_rate": 3.9186679474863914e-05, + "loss": 2.5417, + "step": 338700 + }, + { + "epoch": 0.22, + "learning_rate": 3.9183477425552353e-05, + "loss": 2.5217, + "step": 338800 + }, + { + "epoch": 0.22, + "learning_rate": 3.918027537624079e-05, + "loss": 2.5489, + "step": 338900 + }, + { + "epoch": 0.22, + "learning_rate": 3.917707332692924e-05, + "loss": 2.5204, + "step": 339000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5009543895721436, + "eval_runtime": 177.5956, + "eval_samples_per_second": 56.308, + "eval_steps_per_second": 3.519, + "step": 339000 + }, + { + "epoch": 0.22, + "learning_rate": 3.917387127761768e-05, + "loss": 2.5157, + "step": 339100 + }, + { + "epoch": 0.22, + "learning_rate": 3.917066922830612e-05, + "loss": 2.5283, + "step": 339200 + }, + { + "epoch": 0.22, + "learning_rate": 3.916746717899456e-05, + "loss": 2.4974, + "step": 339300 + }, + { + "epoch": 0.22, + "learning_rate": 3.9164265129683e-05, + "loss": 2.5386, + "step": 339400 + }, + { + "epoch": 0.22, + "learning_rate": 3.916106308037144e-05, + "loss": 2.5457, + "step": 339500 + }, + { + "epoch": 0.22, + "learning_rate": 3.915786103105988e-05, + "loss": 2.5345, + "step": 339600 + }, + { + "epoch": 0.22, + "learning_rate": 3.915465898174832e-05, + "loss": 2.519, + "step": 339700 + }, + { + "epoch": 0.22, + "learning_rate": 3.9151456932436764e-05, + "loss": 2.5253, + "step": 339800 + }, + { + "epoch": 0.22, + "learning_rate": 3.9148254883125204e-05, + "loss": 2.5485, + "step": 339900 + }, + { + "epoch": 0.22, + "learning_rate": 3.9145052833813644e-05, + "loss": 2.543, + "step": 340000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5014827251434326, + "eval_runtime": 173.8552, + "eval_samples_per_second": 57.519, + "eval_steps_per_second": 3.595, + "step": 340000 + }, + { + "epoch": 0.22, + "learning_rate": 3.9141850784502084e-05, + "loss": 2.5324, + "step": 340100 + }, + { + "epoch": 0.22, + "learning_rate": 3.913864873519052e-05, + "loss": 2.5207, + "step": 340200 + }, + { + "epoch": 0.22, + "learning_rate": 3.913544668587896e-05, + "loss": 2.5121, + "step": 340300 + }, + { + "epoch": 0.22, + "learning_rate": 3.91322446365674e-05, + "loss": 2.5151, + "step": 340400 + }, + { + "epoch": 0.22, + "learning_rate": 3.912904258725584e-05, + "loss": 2.5572, + "step": 340500 + }, + { + "epoch": 0.22, + "learning_rate": 3.912584053794429e-05, + "loss": 2.5336, + "step": 340600 + }, + { + "epoch": 0.22, + "learning_rate": 3.912263848863273e-05, + "loss": 2.5318, + "step": 340700 + }, + { + "epoch": 0.22, + "learning_rate": 3.911943643932117e-05, + "loss": 2.5118, + "step": 340800 + }, + { + "epoch": 0.22, + "learning_rate": 3.911623439000961e-05, + "loss": 2.5269, + "step": 340900 + }, + { + "epoch": 0.22, + "learning_rate": 3.911303234069805e-05, + "loss": 2.5227, + "step": 341000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5016543865203857, + "eval_runtime": 180.0431, + "eval_samples_per_second": 55.542, + "eval_steps_per_second": 3.471, + "step": 341000 + }, + { + "epoch": 0.22, + "learning_rate": 3.910983029138649e-05, + "loss": 2.5317, + "step": 341100 + }, + { + "epoch": 0.22, + "learning_rate": 3.910662824207493e-05, + "loss": 2.5284, + "step": 341200 + }, + { + "epoch": 0.22, + "learning_rate": 3.910342619276337e-05, + "loss": 2.5298, + "step": 341300 + }, + { + "epoch": 0.22, + "learning_rate": 3.9100224143451814e-05, + "loss": 2.5419, + "step": 341400 + }, + { + "epoch": 0.22, + "learning_rate": 3.9097022094140254e-05, + "loss": 2.5179, + "step": 341500 + }, + { + "epoch": 0.22, + "learning_rate": 3.909382004482869e-05, + "loss": 2.509, + "step": 341600 + }, + { + "epoch": 0.22, + "learning_rate": 3.909061799551713e-05, + "loss": 2.5208, + "step": 341700 + }, + { + "epoch": 0.22, + "learning_rate": 3.908741594620557e-05, + "loss": 2.5141, + "step": 341800 + }, + { + "epoch": 0.22, + "learning_rate": 3.908421389689401e-05, + "loss": 2.5313, + "step": 341900 + }, + { + "epoch": 0.22, + "learning_rate": 3.908101184758245e-05, + "loss": 2.5236, + "step": 342000 + }, + { + "epoch": 0.22, + "eval_loss": 2.502887010574341, + "eval_runtime": 177.9793, + "eval_samples_per_second": 56.186, + "eval_steps_per_second": 3.512, + "step": 342000 + }, + { + "epoch": 0.22, + "learning_rate": 3.90778097982709e-05, + "loss": 2.5436, + "step": 342100 + }, + { + "epoch": 0.22, + "learning_rate": 3.907460774895934e-05, + "loss": 2.5374, + "step": 342200 + }, + { + "epoch": 0.22, + "learning_rate": 3.907140569964778e-05, + "loss": 2.5199, + "step": 342300 + }, + { + "epoch": 0.22, + "learning_rate": 3.906820365033622e-05, + "loss": 2.5138, + "step": 342400 + }, + { + "epoch": 0.22, + "learning_rate": 3.906500160102466e-05, + "loss": 2.5207, + "step": 342500 + }, + { + "epoch": 0.22, + "learning_rate": 3.90617995517131e-05, + "loss": 2.544, + "step": 342600 + }, + { + "epoch": 0.22, + "learning_rate": 3.905859750240154e-05, + "loss": 2.5205, + "step": 342700 + }, + { + "epoch": 0.22, + "learning_rate": 3.905539545308998e-05, + "loss": 2.5191, + "step": 342800 + }, + { + "epoch": 0.22, + "learning_rate": 3.905219340377842e-05, + "loss": 2.501, + "step": 342900 + }, + { + "epoch": 0.22, + "learning_rate": 3.904899135446686e-05, + "loss": 2.5272, + "step": 343000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5032293796539307, + "eval_runtime": 176.8306, + "eval_samples_per_second": 56.551, + "eval_steps_per_second": 3.534, + "step": 343000 + }, + { + "epoch": 0.22, + "learning_rate": 3.90457893051553e-05, + "loss": 2.5399, + "step": 343100 + }, + { + "epoch": 0.22, + "learning_rate": 3.904258725584374e-05, + "loss": 2.5317, + "step": 343200 + }, + { + "epoch": 0.22, + "learning_rate": 3.903938520653218e-05, + "loss": 2.5491, + "step": 343300 + }, + { + "epoch": 0.22, + "learning_rate": 3.903618315722062e-05, + "loss": 2.5377, + "step": 343400 + }, + { + "epoch": 0.22, + "learning_rate": 3.903298110790906e-05, + "loss": 2.5159, + "step": 343500 + }, + { + "epoch": 0.22, + "learning_rate": 3.90297790585975e-05, + "loss": 2.5141, + "step": 343600 + }, + { + "epoch": 0.22, + "learning_rate": 3.902657700928595e-05, + "loss": 2.5237, + "step": 343700 + }, + { + "epoch": 0.22, + "learning_rate": 3.902337495997439e-05, + "loss": 2.5061, + "step": 343800 + }, + { + "epoch": 0.22, + "learning_rate": 3.902017291066283e-05, + "loss": 2.5459, + "step": 343900 + }, + { + "epoch": 0.22, + "learning_rate": 3.901697086135127e-05, + "loss": 2.5172, + "step": 344000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5020711421966553, + "eval_runtime": 174.1296, + "eval_samples_per_second": 57.429, + "eval_steps_per_second": 3.589, + "step": 344000 + }, + { + "epoch": 0.22, + "learning_rate": 3.901376881203971e-05, + "loss": 2.5239, + "step": 344100 + }, + { + "epoch": 0.22, + "learning_rate": 3.901056676272815e-05, + "loss": 2.5312, + "step": 344200 + }, + { + "epoch": 0.22, + "learning_rate": 3.9007364713416587e-05, + "loss": 2.5279, + "step": 344300 + }, + { + "epoch": 0.22, + "learning_rate": 3.900416266410503e-05, + "loss": 2.5287, + "step": 344400 + }, + { + "epoch": 0.22, + "learning_rate": 3.9000960614793466e-05, + "loss": 2.5267, + "step": 344500 + }, + { + "epoch": 0.22, + "learning_rate": 3.899775856548191e-05, + "loss": 2.5315, + "step": 344600 + }, + { + "epoch": 0.22, + "learning_rate": 3.899455651617035e-05, + "loss": 2.5198, + "step": 344700 + }, + { + "epoch": 0.22, + "learning_rate": 3.899135446685879e-05, + "loss": 2.5358, + "step": 344800 + }, + { + "epoch": 0.22, + "learning_rate": 3.898815241754723e-05, + "loss": 2.5314, + "step": 344900 + }, + { + "epoch": 0.22, + "learning_rate": 3.898495036823567e-05, + "loss": 2.5102, + "step": 345000 + }, + { + "epoch": 0.22, + "eval_loss": 2.500669240951538, + "eval_runtime": 174.1005, + "eval_samples_per_second": 57.438, + "eval_steps_per_second": 3.59, + "step": 345000 + }, + { + "epoch": 0.22, + "learning_rate": 3.898174831892411e-05, + "loss": 2.5264, + "step": 345100 + }, + { + "epoch": 0.22, + "learning_rate": 3.897854626961255e-05, + "loss": 2.4822, + "step": 345200 + }, + { + "epoch": 0.22, + "learning_rate": 3.8975344220301e-05, + "loss": 2.5123, + "step": 345300 + }, + { + "epoch": 0.22, + "learning_rate": 3.897214217098944e-05, + "loss": 2.5433, + "step": 345400 + }, + { + "epoch": 0.22, + "learning_rate": 3.896894012167788e-05, + "loss": 2.5241, + "step": 345500 + }, + { + "epoch": 0.22, + "learning_rate": 3.896573807236632e-05, + "loss": 2.5093, + "step": 345600 + }, + { + "epoch": 0.22, + "learning_rate": 3.8962536023054756e-05, + "loss": 2.5274, + "step": 345700 + }, + { + "epoch": 0.22, + "learning_rate": 3.8959333973743196e-05, + "loss": 2.525, + "step": 345800 + }, + { + "epoch": 0.22, + "learning_rate": 3.8956131924431636e-05, + "loss": 2.5402, + "step": 345900 + }, + { + "epoch": 0.22, + "learning_rate": 3.895292987512008e-05, + "loss": 2.5156, + "step": 346000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5017688274383545, + "eval_runtime": 182.3704, + "eval_samples_per_second": 54.833, + "eval_steps_per_second": 3.427, + "step": 346000 + }, + { + "epoch": 0.22, + "learning_rate": 3.8949727825808515e-05, + "loss": 2.5155, + "step": 346100 + }, + { + "epoch": 0.22, + "learning_rate": 3.894652577649696e-05, + "loss": 2.5315, + "step": 346200 + }, + { + "epoch": 0.22, + "learning_rate": 3.89433237271854e-05, + "loss": 2.5151, + "step": 346300 + }, + { + "epoch": 0.22, + "learning_rate": 3.894012167787384e-05, + "loss": 2.5185, + "step": 346400 + }, + { + "epoch": 0.22, + "learning_rate": 3.893691962856228e-05, + "loss": 2.5272, + "step": 346500 + }, + { + "epoch": 0.22, + "learning_rate": 3.893371757925072e-05, + "loss": 2.5344, + "step": 346600 + }, + { + "epoch": 0.22, + "learning_rate": 3.893051552993917e-05, + "loss": 2.5144, + "step": 346700 + }, + { + "epoch": 0.22, + "learning_rate": 3.89273134806276e-05, + "loss": 2.5232, + "step": 346800 + }, + { + "epoch": 0.22, + "learning_rate": 3.892411143131605e-05, + "loss": 2.5326, + "step": 346900 + }, + { + "epoch": 0.22, + "learning_rate": 3.8920909382004487e-05, + "loss": 2.5229, + "step": 347000 + }, + { + "epoch": 0.22, + "eval_loss": 2.502462387084961, + "eval_runtime": 177.3184, + "eval_samples_per_second": 56.396, + "eval_steps_per_second": 3.525, + "step": 347000 + }, + { + "epoch": 0.22, + "learning_rate": 3.8917707332692926e-05, + "loss": 2.5204, + "step": 347100 + }, + { + "epoch": 0.22, + "learning_rate": 3.8914505283381366e-05, + "loss": 2.5095, + "step": 347200 + }, + { + "epoch": 0.22, + "learning_rate": 3.8911303234069806e-05, + "loss": 2.5313, + "step": 347300 + }, + { + "epoch": 0.22, + "learning_rate": 3.8908101184758246e-05, + "loss": 2.5229, + "step": 347400 + }, + { + "epoch": 0.22, + "learning_rate": 3.8904899135446685e-05, + "loss": 2.503, + "step": 347500 + }, + { + "epoch": 0.22, + "learning_rate": 3.890169708613513e-05, + "loss": 2.5152, + "step": 347600 + }, + { + "epoch": 0.22, + "learning_rate": 3.8898495036823565e-05, + "loss": 2.5295, + "step": 347700 + }, + { + "epoch": 0.22, + "learning_rate": 3.889529298751201e-05, + "loss": 2.5384, + "step": 347800 + }, + { + "epoch": 0.22, + "learning_rate": 3.889209093820045e-05, + "loss": 2.5224, + "step": 347900 + }, + { + "epoch": 0.22, + "learning_rate": 3.888888888888889e-05, + "loss": 2.5292, + "step": 348000 + }, + { + "epoch": 0.22, + "eval_loss": 2.500412940979004, + "eval_runtime": 176.5813, + "eval_samples_per_second": 56.631, + "eval_steps_per_second": 3.539, + "step": 348000 + }, + { + "epoch": 0.22, + "learning_rate": 3.888568683957733e-05, + "loss": 2.5116, + "step": 348100 + }, + { + "epoch": 0.22, + "learning_rate": 3.888248479026577e-05, + "loss": 2.4974, + "step": 348200 + }, + { + "epoch": 0.22, + "learning_rate": 3.887928274095422e-05, + "loss": 2.5153, + "step": 348300 + }, + { + "epoch": 0.22, + "learning_rate": 3.887608069164265e-05, + "loss": 2.5223, + "step": 348400 + }, + { + "epoch": 0.22, + "learning_rate": 3.8872878642331096e-05, + "loss": 2.5173, + "step": 348500 + }, + { + "epoch": 0.22, + "learning_rate": 3.8869676593019536e-05, + "loss": 2.5406, + "step": 348600 + }, + { + "epoch": 0.22, + "learning_rate": 3.8866474543707976e-05, + "loss": 2.5215, + "step": 348700 + }, + { + "epoch": 0.22, + "learning_rate": 3.8863272494396415e-05, + "loss": 2.5127, + "step": 348800 + }, + { + "epoch": 0.22, + "learning_rate": 3.8860070445084855e-05, + "loss": 2.5301, + "step": 348900 + }, + { + "epoch": 0.22, + "learning_rate": 3.88568683957733e-05, + "loss": 2.5334, + "step": 349000 + }, + { + "epoch": 0.22, + "eval_loss": 2.4996371269226074, + "eval_runtime": 173.9569, + "eval_samples_per_second": 57.485, + "eval_steps_per_second": 3.593, + "step": 349000 + }, + { + "epoch": 0.22, + "learning_rate": 3.8853666346461735e-05, + "loss": 2.5127, + "step": 349100 + }, + { + "epoch": 0.22, + "learning_rate": 3.885046429715018e-05, + "loss": 2.4993, + "step": 349200 + }, + { + "epoch": 0.22, + "learning_rate": 3.8847262247838614e-05, + "loss": 2.5131, + "step": 349300 + }, + { + "epoch": 0.22, + "learning_rate": 3.884406019852706e-05, + "loss": 2.5099, + "step": 349400 + }, + { + "epoch": 0.22, + "learning_rate": 3.88408581492155e-05, + "loss": 2.5165, + "step": 349500 + }, + { + "epoch": 0.22, + "learning_rate": 3.883765609990394e-05, + "loss": 2.5212, + "step": 349600 + }, + { + "epoch": 0.22, + "learning_rate": 3.883445405059239e-05, + "loss": 2.5353, + "step": 349700 + }, + { + "epoch": 0.22, + "learning_rate": 3.883125200128082e-05, + "loss": 2.5002, + "step": 349800 + }, + { + "epoch": 0.22, + "learning_rate": 3.8828049951969266e-05, + "loss": 2.5112, + "step": 349900 + }, + { + "epoch": 0.22, + "learning_rate": 3.88248479026577e-05, + "loss": 2.5168, + "step": 350000 + }, + { + "epoch": 0.22, + "eval_loss": 2.4998345375061035, + "eval_runtime": 175.5683, + "eval_samples_per_second": 56.958, + "eval_steps_per_second": 3.56, + "step": 350000 + }, + { + "epoch": 0.22, + "learning_rate": 3.8821645853346146e-05, + "loss": 2.5114, + "step": 350100 + }, + { + "epoch": 0.22, + "learning_rate": 3.8818443804034585e-05, + "loss": 2.512, + "step": 350200 + }, + { + "epoch": 0.22, + "learning_rate": 3.8815241754723025e-05, + "loss": 2.5388, + "step": 350300 + }, + { + "epoch": 0.22, + "learning_rate": 3.8812039705411465e-05, + "loss": 2.5288, + "step": 350400 + }, + { + "epoch": 0.22, + "learning_rate": 3.8808837656099905e-05, + "loss": 2.5107, + "step": 350500 + }, + { + "epoch": 0.22, + "learning_rate": 3.880563560678835e-05, + "loss": 2.5228, + "step": 350600 + }, + { + "epoch": 0.22, + "learning_rate": 3.8802433557476784e-05, + "loss": 2.5256, + "step": 350700 + }, + { + "epoch": 0.22, + "learning_rate": 3.879923150816523e-05, + "loss": 2.5209, + "step": 350800 + }, + { + "epoch": 0.22, + "learning_rate": 3.8796029458853664e-05, + "loss": 2.5366, + "step": 350900 + }, + { + "epoch": 0.22, + "learning_rate": 3.879282740954211e-05, + "loss": 2.5308, + "step": 351000 + }, + { + "epoch": 0.22, + "eval_loss": 2.5002245903015137, + "eval_runtime": 179.2279, + "eval_samples_per_second": 55.795, + "eval_steps_per_second": 3.487, + "step": 351000 + }, + { + "epoch": 0.22, + "learning_rate": 3.878962536023055e-05, + "loss": 2.5232, + "step": 351100 + }, + { + "epoch": 0.22, + "learning_rate": 3.878642331091899e-05, + "loss": 2.5057, + "step": 351200 + }, + { + "epoch": 0.22, + "learning_rate": 3.8783221261607436e-05, + "loss": 2.534, + "step": 351300 + }, + { + "epoch": 0.22, + "learning_rate": 3.878001921229587e-05, + "loss": 2.5405, + "step": 351400 + }, + { + "epoch": 0.22, + "learning_rate": 3.8776817162984315e-05, + "loss": 2.5292, + "step": 351500 + }, + { + "epoch": 0.23, + "learning_rate": 3.877361511367275e-05, + "loss": 2.5251, + "step": 351600 + }, + { + "epoch": 0.23, + "learning_rate": 3.8770413064361195e-05, + "loss": 2.5063, + "step": 351700 + }, + { + "epoch": 0.23, + "learning_rate": 3.8767211015049635e-05, + "loss": 2.5231, + "step": 351800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8764008965738074e-05, + "loss": 2.505, + "step": 351900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8760806916426514e-05, + "loss": 2.5168, + "step": 352000 + }, + { + "epoch": 0.23, + "eval_loss": 2.5032877922058105, + "eval_runtime": 176.5268, + "eval_samples_per_second": 56.649, + "eval_steps_per_second": 3.541, + "step": 352000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8757604867114954e-05, + "loss": 2.4991, + "step": 352100 + }, + { + "epoch": 0.23, + "learning_rate": 3.87544028178034e-05, + "loss": 2.5305, + "step": 352200 + }, + { + "epoch": 0.23, + "learning_rate": 3.8751200768491833e-05, + "loss": 2.5349, + "step": 352300 + }, + { + "epoch": 0.23, + "learning_rate": 3.874799871918028e-05, + "loss": 2.5165, + "step": 352400 + }, + { + "epoch": 0.23, + "learning_rate": 3.874479666986871e-05, + "loss": 2.498, + "step": 352500 + }, + { + "epoch": 0.23, + "learning_rate": 3.874159462055716e-05, + "loss": 2.5275, + "step": 352600 + }, + { + "epoch": 0.23, + "learning_rate": 3.87383925712456e-05, + "loss": 2.5178, + "step": 352700 + }, + { + "epoch": 0.23, + "learning_rate": 3.873519052193404e-05, + "loss": 2.5061, + "step": 352800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8731988472622485e-05, + "loss": 2.5156, + "step": 352900 + }, + { + "epoch": 0.23, + "learning_rate": 3.872878642331092e-05, + "loss": 2.5166, + "step": 353000 + }, + { + "epoch": 0.23, + "eval_loss": 2.500791072845459, + "eval_runtime": 179.6206, + "eval_samples_per_second": 55.673, + "eval_steps_per_second": 3.48, + "step": 353000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8725584373999365e-05, + "loss": 2.5127, + "step": 353100 + }, + { + "epoch": 0.23, + "learning_rate": 3.87223823246878e-05, + "loss": 2.5216, + "step": 353200 + }, + { + "epoch": 0.23, + "learning_rate": 3.8719180275376244e-05, + "loss": 2.539, + "step": 353300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8715978226064684e-05, + "loss": 2.5005, + "step": 353400 + }, + { + "epoch": 0.23, + "learning_rate": 3.8712776176753124e-05, + "loss": 2.5216, + "step": 353500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8709574127441564e-05, + "loss": 2.52, + "step": 353600 + }, + { + "epoch": 0.23, + "learning_rate": 3.870637207813e-05, + "loss": 2.5194, + "step": 353700 + }, + { + "epoch": 0.23, + "learning_rate": 3.870317002881845e-05, + "loss": 2.5032, + "step": 353800 + }, + { + "epoch": 0.23, + "learning_rate": 3.869996797950688e-05, + "loss": 2.5008, + "step": 353900 + }, + { + "epoch": 0.23, + "learning_rate": 3.869676593019533e-05, + "loss": 2.5085, + "step": 354000 + }, + { + "epoch": 0.23, + "eval_loss": 2.5006790161132812, + "eval_runtime": 175.044, + "eval_samples_per_second": 57.128, + "eval_steps_per_second": 3.571, + "step": 354000 + }, + { + "epoch": 0.23, + "learning_rate": 3.869356388088376e-05, + "loss": 2.5099, + "step": 354100 + }, + { + "epoch": 0.23, + "learning_rate": 3.869036183157221e-05, + "loss": 2.509, + "step": 354200 + }, + { + "epoch": 0.23, + "learning_rate": 3.868715978226065e-05, + "loss": 2.5359, + "step": 354300 + }, + { + "epoch": 0.23, + "learning_rate": 3.868395773294909e-05, + "loss": 2.5108, + "step": 354400 + }, + { + "epoch": 0.23, + "learning_rate": 3.8680755683637535e-05, + "loss": 2.5438, + "step": 354500 + }, + { + "epoch": 0.23, + "learning_rate": 3.867755363432597e-05, + "loss": 2.5437, + "step": 354600 + }, + { + "epoch": 0.23, + "learning_rate": 3.8674351585014414e-05, + "loss": 2.5349, + "step": 354700 + }, + { + "epoch": 0.23, + "learning_rate": 3.867114953570285e-05, + "loss": 2.5286, + "step": 354800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8667947486391294e-05, + "loss": 2.5114, + "step": 354900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8664745437079733e-05, + "loss": 2.5082, + "step": 355000 + }, + { + "epoch": 0.23, + "eval_loss": 2.5020599365234375, + "eval_runtime": 176.4383, + "eval_samples_per_second": 56.677, + "eval_steps_per_second": 3.542, + "step": 355000 + }, + { + "epoch": 0.23, + "learning_rate": 3.866154338776817e-05, + "loss": 2.5269, + "step": 355100 + }, + { + "epoch": 0.23, + "learning_rate": 3.865834133845661e-05, + "loss": 2.5355, + "step": 355200 + }, + { + "epoch": 0.23, + "learning_rate": 3.865513928914505e-05, + "loss": 2.5141, + "step": 355300 + }, + { + "epoch": 0.23, + "learning_rate": 3.86519372398335e-05, + "loss": 2.5175, + "step": 355400 + }, + { + "epoch": 0.23, + "learning_rate": 3.864873519052193e-05, + "loss": 2.5303, + "step": 355500 + }, + { + "epoch": 0.23, + "learning_rate": 3.864553314121038e-05, + "loss": 2.5186, + "step": 355600 + }, + { + "epoch": 0.23, + "learning_rate": 3.864233109189881e-05, + "loss": 2.5079, + "step": 355700 + }, + { + "epoch": 0.23, + "learning_rate": 3.863912904258726e-05, + "loss": 2.5176, + "step": 355800 + }, + { + "epoch": 0.23, + "learning_rate": 3.86359269932757e-05, + "loss": 2.5181, + "step": 355900 + }, + { + "epoch": 0.23, + "learning_rate": 3.863272494396414e-05, + "loss": 2.5136, + "step": 356000 + }, + { + "epoch": 0.23, + "eval_loss": 2.501915216445923, + "eval_runtime": 177.034, + "eval_samples_per_second": 56.486, + "eval_steps_per_second": 3.53, + "step": 356000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8629522894652584e-05, + "loss": 2.5119, + "step": 356100 + }, + { + "epoch": 0.23, + "learning_rate": 3.862632084534102e-05, + "loss": 2.5295, + "step": 356200 + }, + { + "epoch": 0.23, + "learning_rate": 3.8623118796029464e-05, + "loss": 2.5161, + "step": 356300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8619916746717897e-05, + "loss": 2.5175, + "step": 356400 + }, + { + "epoch": 0.23, + "learning_rate": 3.861671469740634e-05, + "loss": 2.5247, + "step": 356500 + }, + { + "epoch": 0.23, + "learning_rate": 3.861351264809478e-05, + "loss": 2.5361, + "step": 356600 + }, + { + "epoch": 0.23, + "learning_rate": 3.861031059878322e-05, + "loss": 2.5401, + "step": 356700 + }, + { + "epoch": 0.23, + "learning_rate": 3.860710854947166e-05, + "loss": 2.5127, + "step": 356800 + }, + { + "epoch": 0.23, + "learning_rate": 3.86039065001601e-05, + "loss": 2.5346, + "step": 356900 + }, + { + "epoch": 0.23, + "learning_rate": 3.860070445084855e-05, + "loss": 2.542, + "step": 357000 + }, + { + "epoch": 0.23, + "eval_loss": 2.4995999336242676, + "eval_runtime": 180.1686, + "eval_samples_per_second": 55.504, + "eval_steps_per_second": 3.469, + "step": 357000 + }, + { + "epoch": 0.23, + "learning_rate": 3.859750240153698e-05, + "loss": 2.5014, + "step": 357100 + }, + { + "epoch": 0.23, + "learning_rate": 3.859430035222543e-05, + "loss": 2.5226, + "step": 357200 + }, + { + "epoch": 0.23, + "learning_rate": 3.859109830291386e-05, + "loss": 2.5191, + "step": 357300 + }, + { + "epoch": 0.23, + "learning_rate": 3.858789625360231e-05, + "loss": 2.5267, + "step": 357400 + }, + { + "epoch": 0.23, + "learning_rate": 3.858469420429075e-05, + "loss": 2.5142, + "step": 357500 + }, + { + "epoch": 0.23, + "learning_rate": 3.858149215497919e-05, + "loss": 2.532, + "step": 357600 + }, + { + "epoch": 0.23, + "learning_rate": 3.8578290105667633e-05, + "loss": 2.5104, + "step": 357700 + }, + { + "epoch": 0.23, + "learning_rate": 3.8575088056356066e-05, + "loss": 2.5333, + "step": 357800 + }, + { + "epoch": 0.23, + "learning_rate": 3.857188600704451e-05, + "loss": 2.5213, + "step": 357900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8568683957732946e-05, + "loss": 2.5094, + "step": 358000 + }, + { + "epoch": 0.23, + "eval_loss": 2.500349521636963, + "eval_runtime": 179.5109, + "eval_samples_per_second": 55.707, + "eval_steps_per_second": 3.482, + "step": 358000 + }, + { + "epoch": 0.23, + "learning_rate": 3.856548190842139e-05, + "loss": 2.5176, + "step": 358100 + }, + { + "epoch": 0.23, + "learning_rate": 3.856227985910983e-05, + "loss": 2.5237, + "step": 358200 + }, + { + "epoch": 0.23, + "learning_rate": 3.855907780979827e-05, + "loss": 2.5199, + "step": 358300 + }, + { + "epoch": 0.23, + "learning_rate": 3.855587576048671e-05, + "loss": 2.5333, + "step": 358400 + }, + { + "epoch": 0.23, + "learning_rate": 3.855267371117515e-05, + "loss": 2.5012, + "step": 358500 + }, + { + "epoch": 0.23, + "learning_rate": 3.85494716618636e-05, + "loss": 2.5268, + "step": 358600 + }, + { + "epoch": 0.23, + "learning_rate": 3.854626961255203e-05, + "loss": 2.5097, + "step": 358700 + }, + { + "epoch": 0.23, + "learning_rate": 3.854306756324048e-05, + "loss": 2.5219, + "step": 358800 + }, + { + "epoch": 0.23, + "learning_rate": 3.853986551392892e-05, + "loss": 2.5008, + "step": 358900 + }, + { + "epoch": 0.23, + "learning_rate": 3.853666346461736e-05, + "loss": 2.5321, + "step": 359000 + }, + { + "epoch": 0.23, + "eval_loss": 2.4990463256835938, + "eval_runtime": 176.9791, + "eval_samples_per_second": 56.504, + "eval_steps_per_second": 3.531, + "step": 359000 + }, + { + "epoch": 0.23, + "learning_rate": 3.85334614153058e-05, + "loss": 2.5267, + "step": 359100 + }, + { + "epoch": 0.23, + "learning_rate": 3.8530259365994236e-05, + "loss": 2.5116, + "step": 359200 + }, + { + "epoch": 0.23, + "learning_rate": 3.852705731668268e-05, + "loss": 2.5269, + "step": 359300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8523855267371116e-05, + "loss": 2.5122, + "step": 359400 + }, + { + "epoch": 0.23, + "learning_rate": 3.852065321805956e-05, + "loss": 2.5173, + "step": 359500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8517451168748e-05, + "loss": 2.5284, + "step": 359600 + }, + { + "epoch": 0.23, + "learning_rate": 3.851424911943644e-05, + "loss": 2.5028, + "step": 359700 + }, + { + "epoch": 0.23, + "learning_rate": 3.851104707012488e-05, + "loss": 2.5211, + "step": 359800 + }, + { + "epoch": 0.23, + "learning_rate": 3.850784502081332e-05, + "loss": 2.515, + "step": 359900 + }, + { + "epoch": 0.23, + "learning_rate": 3.850464297150176e-05, + "loss": 2.5045, + "step": 360000 + }, + { + "epoch": 0.23, + "eval_loss": 2.5016562938690186, + "eval_runtime": 176.9991, + "eval_samples_per_second": 56.497, + "eval_steps_per_second": 3.531, + "step": 360000 + }, + { + "epoch": 0.23, + "learning_rate": 3.85014409221902e-05, + "loss": 2.5153, + "step": 360100 + }, + { + "epoch": 0.23, + "learning_rate": 3.849823887287865e-05, + "loss": 2.4965, + "step": 360200 + }, + { + "epoch": 0.23, + "learning_rate": 3.849503682356708e-05, + "loss": 2.516, + "step": 360300 + }, + { + "epoch": 0.23, + "learning_rate": 3.849183477425553e-05, + "loss": 2.5196, + "step": 360400 + }, + { + "epoch": 0.23, + "learning_rate": 3.8488632724943967e-05, + "loss": 2.5213, + "step": 360500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8485430675632406e-05, + "loss": 2.5319, + "step": 360600 + }, + { + "epoch": 0.23, + "learning_rate": 3.8482228626320846e-05, + "loss": 2.5034, + "step": 360700 + }, + { + "epoch": 0.23, + "learning_rate": 3.8479026577009286e-05, + "loss": 2.5259, + "step": 360800 + }, + { + "epoch": 0.23, + "learning_rate": 3.847582452769773e-05, + "loss": 2.5339, + "step": 360900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8472622478386165e-05, + "loss": 2.5079, + "step": 361000 + }, + { + "epoch": 0.23, + "eval_loss": 2.498728036880493, + "eval_runtime": 174.5838, + "eval_samples_per_second": 57.279, + "eval_steps_per_second": 3.58, + "step": 361000 + }, + { + "epoch": 0.23, + "learning_rate": 3.846942042907461e-05, + "loss": 2.5167, + "step": 361100 + }, + { + "epoch": 0.23, + "learning_rate": 3.846621837976305e-05, + "loss": 2.5128, + "step": 361200 + }, + { + "epoch": 0.23, + "learning_rate": 3.846301633045149e-05, + "loss": 2.5308, + "step": 361300 + }, + { + "epoch": 0.23, + "learning_rate": 3.845981428113993e-05, + "loss": 2.5099, + "step": 361400 + }, + { + "epoch": 0.23, + "learning_rate": 3.845661223182837e-05, + "loss": 2.5277, + "step": 361500 + }, + { + "epoch": 0.23, + "learning_rate": 3.845341018251681e-05, + "loss": 2.5191, + "step": 361600 + }, + { + "epoch": 0.23, + "learning_rate": 3.845020813320525e-05, + "loss": 2.5065, + "step": 361700 + }, + { + "epoch": 0.23, + "learning_rate": 3.84470060838937e-05, + "loss": 2.5314, + "step": 361800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8443804034582136e-05, + "loss": 2.5064, + "step": 361900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8440601985270576e-05, + "loss": 2.5229, + "step": 362000 + }, + { + "epoch": 0.23, + "eval_loss": 2.5006937980651855, + "eval_runtime": 178.2317, + "eval_samples_per_second": 56.107, + "eval_steps_per_second": 3.507, + "step": 362000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8437399935959016e-05, + "loss": 2.5102, + "step": 362100 + }, + { + "epoch": 0.23, + "learning_rate": 3.8434197886647456e-05, + "loss": 2.5236, + "step": 362200 + }, + { + "epoch": 0.23, + "learning_rate": 3.8430995837335895e-05, + "loss": 2.4997, + "step": 362300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8427793788024335e-05, + "loss": 2.5303, + "step": 362400 + }, + { + "epoch": 0.23, + "learning_rate": 3.842459173871278e-05, + "loss": 2.503, + "step": 362500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8421389689401215e-05, + "loss": 2.5286, + "step": 362600 + }, + { + "epoch": 0.23, + "learning_rate": 3.841818764008966e-05, + "loss": 2.5163, + "step": 362700 + }, + { + "epoch": 0.23, + "learning_rate": 3.84149855907781e-05, + "loss": 2.5096, + "step": 362800 + }, + { + "epoch": 0.23, + "learning_rate": 3.841178354146654e-05, + "loss": 2.5302, + "step": 362900 + }, + { + "epoch": 0.23, + "learning_rate": 3.840858149215498e-05, + "loss": 2.4782, + "step": 363000 + }, + { + "epoch": 0.23, + "eval_loss": 2.5006227493286133, + "eval_runtime": 179.5352, + "eval_samples_per_second": 55.699, + "eval_steps_per_second": 3.481, + "step": 363000 + }, + { + "epoch": 0.23, + "learning_rate": 3.840537944284342e-05, + "loss": 2.5197, + "step": 363100 + }, + { + "epoch": 0.23, + "learning_rate": 3.840217739353186e-05, + "loss": 2.4934, + "step": 363200 + }, + { + "epoch": 0.23, + "learning_rate": 3.83989753442203e-05, + "loss": 2.5161, + "step": 363300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8395773294908746e-05, + "loss": 2.4989, + "step": 363400 + }, + { + "epoch": 0.23, + "learning_rate": 3.8392571245597186e-05, + "loss": 2.5181, + "step": 363500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8389369196285626e-05, + "loss": 2.5186, + "step": 363600 + }, + { + "epoch": 0.23, + "learning_rate": 3.8386167146974065e-05, + "loss": 2.4932, + "step": 363700 + }, + { + "epoch": 0.23, + "learning_rate": 3.8382965097662505e-05, + "loss": 2.5351, + "step": 363800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8379763048350945e-05, + "loss": 2.5293, + "step": 363900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8376560999039384e-05, + "loss": 2.5084, + "step": 364000 + }, + { + "epoch": 0.23, + "eval_loss": 2.4995594024658203, + "eval_runtime": 178.5309, + "eval_samples_per_second": 56.013, + "eval_steps_per_second": 3.501, + "step": 364000 + }, + { + "epoch": 0.23, + "learning_rate": 3.837335894972783e-05, + "loss": 2.499, + "step": 364100 + }, + { + "epoch": 0.23, + "learning_rate": 3.837015690041627e-05, + "loss": 2.4895, + "step": 364200 + }, + { + "epoch": 0.23, + "learning_rate": 3.836695485110471e-05, + "loss": 2.5049, + "step": 364300 + }, + { + "epoch": 0.23, + "learning_rate": 3.836375280179315e-05, + "loss": 2.515, + "step": 364400 + }, + { + "epoch": 0.23, + "learning_rate": 3.836055075248159e-05, + "loss": 2.497, + "step": 364500 + }, + { + "epoch": 0.23, + "learning_rate": 3.835734870317003e-05, + "loss": 2.501, + "step": 364600 + }, + { + "epoch": 0.23, + "learning_rate": 3.835414665385847e-05, + "loss": 2.4983, + "step": 364700 + }, + { + "epoch": 0.23, + "learning_rate": 3.835094460454691e-05, + "loss": 2.513, + "step": 364800 + }, + { + "epoch": 0.23, + "learning_rate": 3.834774255523535e-05, + "loss": 2.5129, + "step": 364900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8344540505923795e-05, + "loss": 2.4935, + "step": 365000 + }, + { + "epoch": 0.23, + "eval_loss": 2.499540090560913, + "eval_runtime": 174.256, + "eval_samples_per_second": 57.387, + "eval_steps_per_second": 3.587, + "step": 365000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8341338456612235e-05, + "loss": 2.5025, + "step": 365100 + }, + { + "epoch": 0.23, + "learning_rate": 3.8338136407300675e-05, + "loss": 2.5153, + "step": 365200 + }, + { + "epoch": 0.23, + "learning_rate": 3.8334934357989115e-05, + "loss": 2.5086, + "step": 365300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8331732308677554e-05, + "loss": 2.4997, + "step": 365400 + }, + { + "epoch": 0.23, + "learning_rate": 3.8328530259365994e-05, + "loss": 2.5072, + "step": 365500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8325328210054434e-05, + "loss": 2.5187, + "step": 365600 + }, + { + "epoch": 0.23, + "learning_rate": 3.832212616074288e-05, + "loss": 2.5381, + "step": 365700 + }, + { + "epoch": 0.23, + "learning_rate": 3.831892411143132e-05, + "loss": 2.4854, + "step": 365800 + }, + { + "epoch": 0.23, + "learning_rate": 3.831572206211976e-05, + "loss": 2.5078, + "step": 365900 + }, + { + "epoch": 0.23, + "learning_rate": 3.83125200128082e-05, + "loss": 2.5088, + "step": 366000 + }, + { + "epoch": 0.23, + "eval_loss": 2.4976606369018555, + "eval_runtime": 174.5916, + "eval_samples_per_second": 57.277, + "eval_steps_per_second": 3.58, + "step": 366000 + }, + { + "epoch": 0.23, + "learning_rate": 3.830931796349664e-05, + "loss": 2.5265, + "step": 366100 + }, + { + "epoch": 0.23, + "learning_rate": 3.830611591418508e-05, + "loss": 2.5259, + "step": 366200 + }, + { + "epoch": 0.23, + "learning_rate": 3.830291386487352e-05, + "loss": 2.5045, + "step": 366300 + }, + { + "epoch": 0.23, + "learning_rate": 3.829971181556196e-05, + "loss": 2.5072, + "step": 366400 + }, + { + "epoch": 0.23, + "learning_rate": 3.8296509766250405e-05, + "loss": 2.5038, + "step": 366500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8293307716938845e-05, + "loss": 2.5233, + "step": 366600 + }, + { + "epoch": 0.23, + "learning_rate": 3.8290105667627285e-05, + "loss": 2.5389, + "step": 366700 + }, + { + "epoch": 0.23, + "learning_rate": 3.8286903618315724e-05, + "loss": 2.5118, + "step": 366800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8283701569004164e-05, + "loss": 2.4953, + "step": 366900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8280499519692604e-05, + "loss": 2.5081, + "step": 367000 + }, + { + "epoch": 0.23, + "eval_loss": 2.4992783069610596, + "eval_runtime": 174.4627, + "eval_samples_per_second": 57.319, + "eval_steps_per_second": 3.582, + "step": 367000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8277297470381043e-05, + "loss": 2.5201, + "step": 367100 + }, + { + "epoch": 0.24, + "learning_rate": 3.827409542106948e-05, + "loss": 2.5032, + "step": 367200 + }, + { + "epoch": 0.24, + "learning_rate": 3.827089337175793e-05, + "loss": 2.5098, + "step": 367300 + }, + { + "epoch": 0.24, + "learning_rate": 3.826769132244637e-05, + "loss": 2.4947, + "step": 367400 + }, + { + "epoch": 0.24, + "learning_rate": 3.826448927313481e-05, + "loss": 2.5109, + "step": 367500 + }, + { + "epoch": 0.24, + "learning_rate": 3.826128722382325e-05, + "loss": 2.5234, + "step": 367600 + }, + { + "epoch": 0.24, + "learning_rate": 3.825808517451169e-05, + "loss": 2.5035, + "step": 367700 + }, + { + "epoch": 0.24, + "learning_rate": 3.825488312520013e-05, + "loss": 2.5126, + "step": 367800 + }, + { + "epoch": 0.24, + "learning_rate": 3.825168107588857e-05, + "loss": 2.4972, + "step": 367900 + }, + { + "epoch": 0.24, + "learning_rate": 3.824847902657701e-05, + "loss": 2.5145, + "step": 368000 + }, + { + "epoch": 0.24, + "eval_loss": 2.498431444168091, + "eval_runtime": 177.0359, + "eval_samples_per_second": 56.486, + "eval_steps_per_second": 3.53, + "step": 368000 + }, + { + "epoch": 0.24, + "learning_rate": 3.8245276977265454e-05, + "loss": 2.484, + "step": 368100 + }, + { + "epoch": 0.24, + "learning_rate": 3.8242074927953894e-05, + "loss": 2.4975, + "step": 368200 + }, + { + "epoch": 0.24, + "learning_rate": 3.8238872878642334e-05, + "loss": 2.4994, + "step": 368300 + }, + { + "epoch": 0.24, + "learning_rate": 3.8235670829330774e-05, + "loss": 2.5052, + "step": 368400 + }, + { + "epoch": 0.24, + "learning_rate": 3.823246878001921e-05, + "loss": 2.4882, + "step": 368500 + }, + { + "epoch": 0.24, + "learning_rate": 3.822926673070765e-05, + "loss": 2.4856, + "step": 368600 + }, + { + "epoch": 0.24, + "learning_rate": 3.822606468139609e-05, + "loss": 2.4974, + "step": 368700 + }, + { + "epoch": 0.24, + "learning_rate": 3.822286263208454e-05, + "loss": 2.5006, + "step": 368800 + }, + { + "epoch": 0.24, + "learning_rate": 3.821966058277298e-05, + "loss": 2.4981, + "step": 368900 + }, + { + "epoch": 0.24, + "learning_rate": 3.821645853346142e-05, + "loss": 2.4965, + "step": 369000 + }, + { + "epoch": 0.24, + "eval_loss": 2.499391794204712, + "eval_runtime": 176.8888, + "eval_samples_per_second": 56.533, + "eval_steps_per_second": 3.533, + "step": 369000 + }, + { + "epoch": 0.24, + "learning_rate": 3.821325648414986e-05, + "loss": 2.5005, + "step": 369100 + }, + { + "epoch": 0.24, + "learning_rate": 3.82100544348383e-05, + "loss": 2.4901, + "step": 369200 + }, + { + "epoch": 0.24, + "learning_rate": 3.820685238552674e-05, + "loss": 2.5005, + "step": 369300 + }, + { + "epoch": 0.24, + "learning_rate": 3.820365033621518e-05, + "loss": 2.4681, + "step": 369400 + }, + { + "epoch": 0.24, + "learning_rate": 3.8200448286903624e-05, + "loss": 2.4975, + "step": 369500 + }, + { + "epoch": 0.24, + "learning_rate": 3.819724623759206e-05, + "loss": 2.496, + "step": 369600 + }, + { + "epoch": 0.24, + "learning_rate": 3.8194044188280504e-05, + "loss": 2.5186, + "step": 369700 + }, + { + "epoch": 0.24, + "learning_rate": 3.8190842138968944e-05, + "loss": 2.4931, + "step": 369800 + }, + { + "epoch": 0.24, + "learning_rate": 3.818764008965738e-05, + "loss": 2.4973, + "step": 369900 + }, + { + "epoch": 0.24, + "learning_rate": 3.818443804034582e-05, + "loss": 2.5053, + "step": 370000 + }, + { + "epoch": 0.24, + "eval_loss": 2.498286247253418, + "eval_runtime": 176.9249, + "eval_samples_per_second": 56.521, + "eval_steps_per_second": 3.533, + "step": 370000 + }, + { + "epoch": 0.24, + "learning_rate": 3.818123599103426e-05, + "loss": 2.4876, + "step": 370100 + }, + { + "epoch": 0.24, + "learning_rate": 3.81780339417227e-05, + "loss": 2.4781, + "step": 370200 + }, + { + "epoch": 0.24, + "learning_rate": 3.817483189241114e-05, + "loss": 2.512, + "step": 370300 + }, + { + "epoch": 0.24, + "learning_rate": 3.817162984309959e-05, + "loss": 2.4961, + "step": 370400 + }, + { + "epoch": 0.24, + "learning_rate": 3.816842779378803e-05, + "loss": 2.4942, + "step": 370500 + }, + { + "epoch": 0.24, + "learning_rate": 3.816522574447647e-05, + "loss": 2.4921, + "step": 370600 + }, + { + "epoch": 0.24, + "learning_rate": 3.816202369516491e-05, + "loss": 2.492, + "step": 370700 + }, + { + "epoch": 0.24, + "learning_rate": 3.815882164585335e-05, + "loss": 2.488, + "step": 370800 + }, + { + "epoch": 0.24, + "learning_rate": 3.815561959654179e-05, + "loss": 2.4957, + "step": 370900 + }, + { + "epoch": 0.24, + "learning_rate": 3.815241754723023e-05, + "loss": 2.4859, + "step": 371000 + }, + { + "epoch": 0.24, + "eval_loss": 2.499032974243164, + "eval_runtime": 177.6943, + "eval_samples_per_second": 56.276, + "eval_steps_per_second": 3.517, + "step": 371000 + }, + { + "epoch": 0.24, + "learning_rate": 3.8149215497918674e-05, + "loss": 2.5002, + "step": 371100 + }, + { + "epoch": 0.24, + "learning_rate": 3.814601344860711e-05, + "loss": 2.4838, + "step": 371200 + }, + { + "epoch": 0.24, + "learning_rate": 3.814281139929555e-05, + "loss": 2.4838, + "step": 371300 + }, + { + "epoch": 0.24, + "learning_rate": 3.813960934998399e-05, + "loss": 2.477, + "step": 371400 + }, + { + "epoch": 0.24, + "learning_rate": 3.813640730067243e-05, + "loss": 2.4773, + "step": 371500 + }, + { + "epoch": 0.24, + "learning_rate": 3.813320525136087e-05, + "loss": 2.4868, + "step": 371600 + }, + { + "epoch": 0.24, + "learning_rate": 3.813000320204931e-05, + "loss": 2.5113, + "step": 371700 + }, + { + "epoch": 0.24, + "learning_rate": 3.812680115273776e-05, + "loss": 2.4901, + "step": 371800 + }, + { + "epoch": 0.24, + "learning_rate": 3.812359910342619e-05, + "loss": 2.4744, + "step": 371900 + }, + { + "epoch": 0.24, + "learning_rate": 3.812039705411464e-05, + "loss": 2.5048, + "step": 372000 + }, + { + "epoch": 0.24, + "eval_loss": 2.497897148132324, + "eval_runtime": 173.8771, + "eval_samples_per_second": 57.512, + "eval_steps_per_second": 3.594, + "step": 372000 + }, + { + "epoch": 0.24, + "learning_rate": 3.811719500480308e-05, + "loss": 2.4887, + "step": 372100 + }, + { + "epoch": 0.24, + "learning_rate": 3.811399295549152e-05, + "loss": 2.5085, + "step": 372200 + }, + { + "epoch": 0.24, + "learning_rate": 3.811079090617996e-05, + "loss": 2.5021, + "step": 372300 + }, + { + "epoch": 0.24, + "learning_rate": 3.81075888568684e-05, + "loss": 2.5068, + "step": 372400 + }, + { + "epoch": 0.24, + "learning_rate": 3.810438680755684e-05, + "loss": 2.4976, + "step": 372500 + }, + { + "epoch": 0.24, + "learning_rate": 3.8101184758245277e-05, + "loss": 2.4909, + "step": 372600 + }, + { + "epoch": 0.24, + "learning_rate": 3.809798270893372e-05, + "loss": 2.4874, + "step": 372700 + }, + { + "epoch": 0.24, + "learning_rate": 3.8094780659622156e-05, + "loss": 2.4913, + "step": 372800 + }, + { + "epoch": 0.24, + "learning_rate": 3.80915786103106e-05, + "loss": 2.4973, + "step": 372900 + }, + { + "epoch": 0.24, + "learning_rate": 3.808837656099904e-05, + "loss": 2.5019, + "step": 373000 + }, + { + "epoch": 0.24, + "eval_loss": 2.5008938312530518, + "eval_runtime": 174.588, + "eval_samples_per_second": 57.278, + "eval_steps_per_second": 3.58, + "step": 373000 + }, + { + "epoch": 0.24, + "learning_rate": 3.808517451168748e-05, + "loss": 2.499, + "step": 373100 + }, + { + "epoch": 0.24, + "learning_rate": 3.808197246237592e-05, + "loss": 2.4925, + "step": 373200 + }, + { + "epoch": 0.24, + "learning_rate": 3.807877041306436e-05, + "loss": 2.4797, + "step": 373300 + }, + { + "epoch": 0.24, + "learning_rate": 3.807556836375281e-05, + "loss": 2.5389, + "step": 373400 + }, + { + "epoch": 0.24, + "learning_rate": 3.807236631444124e-05, + "loss": 2.5107, + "step": 373500 + }, + { + "epoch": 0.24, + "learning_rate": 3.806916426512969e-05, + "loss": 2.51, + "step": 373600 + }, + { + "epoch": 0.24, + "learning_rate": 3.806596221581813e-05, + "loss": 2.5184, + "step": 373700 + }, + { + "epoch": 0.24, + "learning_rate": 3.806276016650657e-05, + "loss": 2.4962, + "step": 373800 + }, + { + "epoch": 0.24, + "learning_rate": 3.805955811719501e-05, + "loss": 2.522, + "step": 373900 + }, + { + "epoch": 0.24, + "learning_rate": 3.8056356067883446e-05, + "loss": 2.4978, + "step": 374000 + }, + { + "epoch": 0.24, + "eval_loss": 2.496070146560669, + "eval_runtime": 173.6201, + "eval_samples_per_second": 57.597, + "eval_steps_per_second": 3.6, + "step": 374000 + }, + { + "epoch": 0.24, + "learning_rate": 3.805315401857189e-05, + "loss": 2.5088, + "step": 374100 + }, + { + "epoch": 0.24, + "learning_rate": 3.8049951969260326e-05, + "loss": 2.5204, + "step": 374200 + }, + { + "epoch": 0.24, + "learning_rate": 3.804674991994877e-05, + "loss": 2.5002, + "step": 374300 + }, + { + "epoch": 0.24, + "learning_rate": 3.8043547870637205e-05, + "loss": 2.5399, + "step": 374400 + }, + { + "epoch": 0.24, + "learning_rate": 3.804034582132565e-05, + "loss": 2.4997, + "step": 374500 + }, + { + "epoch": 0.24, + "learning_rate": 3.803714377201409e-05, + "loss": 2.5279, + "step": 374600 + }, + { + "epoch": 0.24, + "learning_rate": 3.803394172270253e-05, + "loss": 2.4982, + "step": 374700 + }, + { + "epoch": 0.24, + "learning_rate": 3.803073967339097e-05, + "loss": 2.5392, + "step": 374800 + }, + { + "epoch": 0.24, + "learning_rate": 3.802753762407941e-05, + "loss": 2.5068, + "step": 374900 + }, + { + "epoch": 0.24, + "learning_rate": 3.802433557476786e-05, + "loss": 2.5149, + "step": 375000 + }, + { + "epoch": 0.24, + "eval_loss": 2.4967455863952637, + "eval_runtime": 176.9338, + "eval_samples_per_second": 56.518, + "eval_steps_per_second": 3.532, + "step": 375000 + }, + { + "epoch": 0.24, + "learning_rate": 3.802113352545629e-05, + "loss": 2.5, + "step": 375100 + }, + { + "epoch": 0.24, + "learning_rate": 3.801793147614474e-05, + "loss": 2.5314, + "step": 375200 + }, + { + "epoch": 0.24, + "learning_rate": 3.8014729426833177e-05, + "loss": 2.5022, + "step": 375300 + }, + { + "epoch": 0.24, + "learning_rate": 3.8011527377521616e-05, + "loss": 2.5078, + "step": 375400 + }, + { + "epoch": 0.24, + "learning_rate": 3.8008325328210056e-05, + "loss": 2.4933, + "step": 375500 + }, + { + "epoch": 0.24, + "learning_rate": 3.8005123278898496e-05, + "loss": 2.5119, + "step": 375600 + }, + { + "epoch": 0.24, + "learning_rate": 3.800192122958694e-05, + "loss": 2.4983, + "step": 375700 + }, + { + "epoch": 0.24, + "learning_rate": 3.7998719180275375e-05, + "loss": 2.4915, + "step": 375800 + }, + { + "epoch": 0.24, + "learning_rate": 3.799551713096382e-05, + "loss": 2.4841, + "step": 375900 + }, + { + "epoch": 0.24, + "learning_rate": 3.7992315081652255e-05, + "loss": 2.5027, + "step": 376000 + }, + { + "epoch": 0.24, + "eval_loss": 2.497227430343628, + "eval_runtime": 177.9629, + "eval_samples_per_second": 56.191, + "eval_steps_per_second": 3.512, + "step": 376000 + }, + { + "epoch": 0.24, + "learning_rate": 3.79891130323407e-05, + "loss": 2.5291, + "step": 376100 + }, + { + "epoch": 0.24, + "learning_rate": 3.798591098302914e-05, + "loss": 2.519, + "step": 376200 + }, + { + "epoch": 0.24, + "learning_rate": 3.798270893371758e-05, + "loss": 2.504, + "step": 376300 + }, + { + "epoch": 0.24, + "learning_rate": 3.797950688440603e-05, + "loss": 2.5066, + "step": 376400 + }, + { + "epoch": 0.24, + "learning_rate": 3.797630483509446e-05, + "loss": 2.5211, + "step": 376500 + }, + { + "epoch": 0.24, + "learning_rate": 3.797310278578291e-05, + "loss": 2.5055, + "step": 376600 + }, + { + "epoch": 0.24, + "learning_rate": 3.796990073647134e-05, + "loss": 2.512, + "step": 376700 + }, + { + "epoch": 0.24, + "learning_rate": 3.7966698687159786e-05, + "loss": 2.4845, + "step": 376800 + }, + { + "epoch": 0.24, + "learning_rate": 3.7963496637848226e-05, + "loss": 2.5076, + "step": 376900 + }, + { + "epoch": 0.24, + "learning_rate": 3.7960294588536666e-05, + "loss": 2.507, + "step": 377000 + }, + { + "epoch": 0.24, + "eval_loss": 2.4986350536346436, + "eval_runtime": 178.4737, + "eval_samples_per_second": 56.031, + "eval_steps_per_second": 3.502, + "step": 377000 + }, + { + "epoch": 0.24, + "learning_rate": 3.7957092539225105e-05, + "loss": 2.5007, + "step": 377100 + }, + { + "epoch": 0.24, + "learning_rate": 3.7953890489913545e-05, + "loss": 2.5171, + "step": 377200 + }, + { + "epoch": 0.24, + "learning_rate": 3.795068844060199e-05, + "loss": 2.5309, + "step": 377300 + }, + { + "epoch": 0.24, + "learning_rate": 3.7947486391290425e-05, + "loss": 2.5166, + "step": 377400 + }, + { + "epoch": 0.24, + "learning_rate": 3.794428434197887e-05, + "loss": 2.4914, + "step": 377500 + }, + { + "epoch": 0.24, + "learning_rate": 3.7941082292667304e-05, + "loss": 2.5082, + "step": 377600 + }, + { + "epoch": 0.24, + "learning_rate": 3.793788024335575e-05, + "loss": 2.5063, + "step": 377700 + }, + { + "epoch": 0.24, + "learning_rate": 3.793467819404419e-05, + "loss": 2.5043, + "step": 377800 + }, + { + "epoch": 0.24, + "learning_rate": 3.793147614473263e-05, + "loss": 2.508, + "step": 377900 + }, + { + "epoch": 0.24, + "learning_rate": 3.792827409542108e-05, + "loss": 2.5115, + "step": 378000 + }, + { + "epoch": 0.24, + "eval_loss": 2.497148036956787, + "eval_runtime": 177.947, + "eval_samples_per_second": 56.196, + "eval_steps_per_second": 3.512, + "step": 378000 + }, + { + "epoch": 0.24, + "learning_rate": 3.792507204610951e-05, + "loss": 2.5246, + "step": 378100 + }, + { + "epoch": 0.24, + "learning_rate": 3.7921869996797956e-05, + "loss": 2.5, + "step": 378200 + }, + { + "epoch": 0.24, + "learning_rate": 3.791866794748639e-05, + "loss": 2.4988, + "step": 378300 + }, + { + "epoch": 0.24, + "learning_rate": 3.7915465898174836e-05, + "loss": 2.486, + "step": 378400 + }, + { + "epoch": 0.24, + "learning_rate": 3.7912263848863275e-05, + "loss": 2.5065, + "step": 378500 + }, + { + "epoch": 0.24, + "learning_rate": 3.7909061799551715e-05, + "loss": 2.5117, + "step": 378600 + }, + { + "epoch": 0.24, + "learning_rate": 3.7905859750240155e-05, + "loss": 2.512, + "step": 378700 + }, + { + "epoch": 0.24, + "learning_rate": 3.7902657700928595e-05, + "loss": 2.5151, + "step": 378800 + }, + { + "epoch": 0.24, + "learning_rate": 3.789945565161704e-05, + "loss": 2.4957, + "step": 378900 + }, + { + "epoch": 0.24, + "learning_rate": 3.7896253602305474e-05, + "loss": 2.5088, + "step": 379000 + }, + { + "epoch": 0.24, + "eval_loss": 2.4963321685791016, + "eval_runtime": 174.046, + "eval_samples_per_second": 57.456, + "eval_steps_per_second": 3.591, + "step": 379000 + }, + { + "epoch": 0.24, + "learning_rate": 3.789305155299392e-05, + "loss": 2.5089, + "step": 379100 + }, + { + "epoch": 0.24, + "learning_rate": 3.7889849503682353e-05, + "loss": 2.5041, + "step": 379200 + }, + { + "epoch": 0.24, + "learning_rate": 3.78866474543708e-05, + "loss": 2.5202, + "step": 379300 + }, + { + "epoch": 0.24, + "learning_rate": 3.788344540505924e-05, + "loss": 2.4987, + "step": 379400 + }, + { + "epoch": 0.24, + "learning_rate": 3.788024335574768e-05, + "loss": 2.4961, + "step": 379500 + }, + { + "epoch": 0.24, + "learning_rate": 3.7877041306436126e-05, + "loss": 2.509, + "step": 379600 + }, + { + "epoch": 0.24, + "learning_rate": 3.787383925712456e-05, + "loss": 2.4977, + "step": 379700 + }, + { + "epoch": 0.24, + "learning_rate": 3.7870637207813005e-05, + "loss": 2.5113, + "step": 379800 + }, + { + "epoch": 0.24, + "learning_rate": 3.786743515850144e-05, + "loss": 2.5187, + "step": 379900 + }, + { + "epoch": 0.24, + "learning_rate": 3.7864233109189885e-05, + "loss": 2.491, + "step": 380000 + }, + { + "epoch": 0.24, + "eval_loss": 2.4965128898620605, + "eval_runtime": 173.6496, + "eval_samples_per_second": 57.587, + "eval_steps_per_second": 3.599, + "step": 380000 + }, + { + "epoch": 0.24, + "learning_rate": 3.7861031059878325e-05, + "loss": 2.5134, + "step": 380100 + }, + { + "epoch": 0.24, + "learning_rate": 3.7857829010566764e-05, + "loss": 2.5061, + "step": 380200 + }, + { + "epoch": 0.24, + "learning_rate": 3.7854626961255204e-05, + "loss": 2.5033, + "step": 380300 + }, + { + "epoch": 0.24, + "learning_rate": 3.7851424911943644e-05, + "loss": 2.5135, + "step": 380400 + }, + { + "epoch": 0.24, + "learning_rate": 3.784822286263209e-05, + "loss": 2.5063, + "step": 380500 + }, + { + "epoch": 0.24, + "learning_rate": 3.7845020813320523e-05, + "loss": 2.521, + "step": 380600 + }, + { + "epoch": 0.24, + "learning_rate": 3.784181876400897e-05, + "loss": 2.5078, + "step": 380700 + }, + { + "epoch": 0.24, + "learning_rate": 3.78386167146974e-05, + "loss": 2.5084, + "step": 380800 + }, + { + "epoch": 0.24, + "learning_rate": 3.783541466538585e-05, + "loss": 2.5144, + "step": 380900 + }, + { + "epoch": 0.24, + "learning_rate": 3.783221261607429e-05, + "loss": 2.4836, + "step": 381000 + }, + { + "epoch": 0.24, + "eval_loss": 2.4974985122680664, + "eval_runtime": 174.9398, + "eval_samples_per_second": 57.163, + "eval_steps_per_second": 3.573, + "step": 381000 + }, + { + "epoch": 0.24, + "learning_rate": 3.782901056676273e-05, + "loss": 2.5083, + "step": 381100 + }, + { + "epoch": 0.24, + "learning_rate": 3.7825808517451175e-05, + "loss": 2.5283, + "step": 381200 + }, + { + "epoch": 0.24, + "learning_rate": 3.782260646813961e-05, + "loss": 2.5143, + "step": 381300 + }, + { + "epoch": 0.24, + "learning_rate": 3.7819404418828055e-05, + "loss": 2.524, + "step": 381400 + }, + { + "epoch": 0.24, + "learning_rate": 3.781620236951649e-05, + "loss": 2.5112, + "step": 381500 + }, + { + "epoch": 0.24, + "learning_rate": 3.7813000320204934e-05, + "loss": 2.516, + "step": 381600 + }, + { + "epoch": 0.24, + "learning_rate": 3.7809798270893374e-05, + "loss": 2.4911, + "step": 381700 + }, + { + "epoch": 0.24, + "learning_rate": 3.7806596221581814e-05, + "loss": 2.5095, + "step": 381800 + }, + { + "epoch": 0.24, + "learning_rate": 3.7803394172270254e-05, + "loss": 2.5176, + "step": 381900 + }, + { + "epoch": 0.24, + "learning_rate": 3.780019212295869e-05, + "loss": 2.501, + "step": 382000 + }, + { + "epoch": 0.24, + "eval_loss": 2.49737548828125, + "eval_runtime": 176.7872, + "eval_samples_per_second": 56.565, + "eval_steps_per_second": 3.535, + "step": 382000 + }, + { + "epoch": 0.24, + "learning_rate": 3.779699007364714e-05, + "loss": 2.515, + "step": 382100 + }, + { + "epoch": 0.24, + "learning_rate": 3.779378802433557e-05, + "loss": 2.5137, + "step": 382200 + }, + { + "epoch": 0.24, + "learning_rate": 3.779058597502402e-05, + "loss": 2.4989, + "step": 382300 + }, + { + "epoch": 0.24, + "learning_rate": 3.778738392571245e-05, + "loss": 2.5079, + "step": 382400 + }, + { + "epoch": 0.24, + "learning_rate": 3.77841818764009e-05, + "loss": 2.5006, + "step": 382500 + }, + { + "epoch": 0.24, + "learning_rate": 3.778097982708934e-05, + "loss": 2.5061, + "step": 382600 + }, + { + "epoch": 0.24, + "learning_rate": 3.777777777777778e-05, + "loss": 2.5081, + "step": 382700 + }, + { + "epoch": 0.24, + "learning_rate": 3.7774575728466225e-05, + "loss": 2.5005, + "step": 382800 + }, + { + "epoch": 0.25, + "learning_rate": 3.777137367915466e-05, + "loss": 2.4997, + "step": 382900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7768171629843104e-05, + "loss": 2.5135, + "step": 383000 + }, + { + "epoch": 0.25, + "eval_loss": 2.496156692504883, + "eval_runtime": 177.1933, + "eval_samples_per_second": 56.436, + "eval_steps_per_second": 3.527, + "step": 383000 + }, + { + "epoch": 0.25, + "learning_rate": 3.776496958053154e-05, + "loss": 2.4954, + "step": 383100 + }, + { + "epoch": 0.25, + "learning_rate": 3.7761767531219984e-05, + "loss": 2.5033, + "step": 383200 + }, + { + "epoch": 0.25, + "learning_rate": 3.7758565481908423e-05, + "loss": 2.5158, + "step": 383300 + }, + { + "epoch": 0.25, + "learning_rate": 3.775536343259686e-05, + "loss": 2.4833, + "step": 383400 + }, + { + "epoch": 0.25, + "learning_rate": 3.77521613832853e-05, + "loss": 2.5069, + "step": 383500 + }, + { + "epoch": 0.25, + "learning_rate": 3.774895933397374e-05, + "loss": 2.5084, + "step": 383600 + }, + { + "epoch": 0.25, + "learning_rate": 3.774575728466219e-05, + "loss": 2.5297, + "step": 383700 + }, + { + "epoch": 0.25, + "learning_rate": 3.774255523535062e-05, + "loss": 2.5019, + "step": 383800 + }, + { + "epoch": 0.25, + "learning_rate": 3.773935318603907e-05, + "loss": 2.5263, + "step": 383900 + }, + { + "epoch": 0.25, + "learning_rate": 3.773615113672751e-05, + "loss": 2.5243, + "step": 384000 + }, + { + "epoch": 0.25, + "eval_loss": 2.49690318107605, + "eval_runtime": 176.5316, + "eval_samples_per_second": 56.647, + "eval_steps_per_second": 3.54, + "step": 384000 + }, + { + "epoch": 0.25, + "learning_rate": 3.773294908741595e-05, + "loss": 2.5084, + "step": 384100 + }, + { + "epoch": 0.25, + "learning_rate": 3.772974703810439e-05, + "loss": 2.5146, + "step": 384200 + }, + { + "epoch": 0.25, + "learning_rate": 3.772654498879283e-05, + "loss": 2.5167, + "step": 384300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7723342939481274e-05, + "loss": 2.5029, + "step": 384400 + }, + { + "epoch": 0.25, + "learning_rate": 3.772014089016971e-05, + "loss": 2.4988, + "step": 384500 + }, + { + "epoch": 0.25, + "learning_rate": 3.7716938840858154e-05, + "loss": 2.5149, + "step": 384600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7713736791546587e-05, + "loss": 2.4904, + "step": 384700 + }, + { + "epoch": 0.25, + "learning_rate": 3.771053474223503e-05, + "loss": 2.508, + "step": 384800 + }, + { + "epoch": 0.25, + "learning_rate": 3.770733269292347e-05, + "loss": 2.511, + "step": 384900 + }, + { + "epoch": 0.25, + "learning_rate": 3.770413064361191e-05, + "loss": 2.518, + "step": 385000 + }, + { + "epoch": 0.25, + "eval_loss": 2.496809482574463, + "eval_runtime": 177.0211, + "eval_samples_per_second": 56.49, + "eval_steps_per_second": 3.531, + "step": 385000 + }, + { + "epoch": 0.25, + "learning_rate": 3.770092859430035e-05, + "loss": 2.513, + "step": 385100 + }, + { + "epoch": 0.25, + "learning_rate": 3.769772654498879e-05, + "loss": 2.4858, + "step": 385200 + }, + { + "epoch": 0.25, + "learning_rate": 3.769452449567724e-05, + "loss": 2.515, + "step": 385300 + }, + { + "epoch": 0.25, + "learning_rate": 3.769132244636567e-05, + "loss": 2.5217, + "step": 385400 + }, + { + "epoch": 0.25, + "learning_rate": 3.768812039705412e-05, + "loss": 2.5021, + "step": 385500 + }, + { + "epoch": 0.25, + "learning_rate": 3.768491834774256e-05, + "loss": 2.4919, + "step": 385600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7681716298431e-05, + "loss": 2.4961, + "step": 385700 + }, + { + "epoch": 0.25, + "learning_rate": 3.767851424911944e-05, + "loss": 2.5035, + "step": 385800 + }, + { + "epoch": 0.25, + "learning_rate": 3.767531219980788e-05, + "loss": 2.5346, + "step": 385900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7672110150496323e-05, + "loss": 2.5015, + "step": 386000 + }, + { + "epoch": 0.25, + "eval_loss": 2.4960687160491943, + "eval_runtime": 177.1294, + "eval_samples_per_second": 56.456, + "eval_steps_per_second": 3.528, + "step": 386000 + }, + { + "epoch": 0.25, + "learning_rate": 3.7668908101184756e-05, + "loss": 2.4868, + "step": 386100 + }, + { + "epoch": 0.25, + "learning_rate": 3.76657060518732e-05, + "loss": 2.5166, + "step": 386200 + }, + { + "epoch": 0.25, + "learning_rate": 3.766250400256164e-05, + "loss": 2.4984, + "step": 386300 + }, + { + "epoch": 0.25, + "learning_rate": 3.765930195325008e-05, + "loss": 2.493, + "step": 386400 + }, + { + "epoch": 0.25, + "learning_rate": 3.765609990393852e-05, + "loss": 2.4937, + "step": 386500 + }, + { + "epoch": 0.25, + "learning_rate": 3.765289785462696e-05, + "loss": 2.5139, + "step": 386600 + }, + { + "epoch": 0.25, + "learning_rate": 3.76496958053154e-05, + "loss": 2.4961, + "step": 386700 + }, + { + "epoch": 0.25, + "learning_rate": 3.764649375600384e-05, + "loss": 2.5078, + "step": 386800 + }, + { + "epoch": 0.25, + "learning_rate": 3.764329170669229e-05, + "loss": 2.5074, + "step": 386900 + }, + { + "epoch": 0.25, + "learning_rate": 3.764008965738073e-05, + "loss": 2.5123, + "step": 387000 + }, + { + "epoch": 0.25, + "eval_loss": 2.496873378753662, + "eval_runtime": 173.3967, + "eval_samples_per_second": 57.671, + "eval_steps_per_second": 3.604, + "step": 387000 + }, + { + "epoch": 0.25, + "learning_rate": 3.763688760806917e-05, + "loss": 2.492, + "step": 387100 + }, + { + "epoch": 0.25, + "learning_rate": 3.763368555875761e-05, + "loss": 2.5076, + "step": 387200 + }, + { + "epoch": 0.25, + "learning_rate": 3.763048350944605e-05, + "loss": 2.5064, + "step": 387300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7627281460134487e-05, + "loss": 2.5212, + "step": 387400 + }, + { + "epoch": 0.25, + "learning_rate": 3.7624079410822926e-05, + "loss": 2.5205, + "step": 387500 + }, + { + "epoch": 0.25, + "learning_rate": 3.762087736151137e-05, + "loss": 2.4969, + "step": 387600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7617675312199806e-05, + "loss": 2.4872, + "step": 387700 + }, + { + "epoch": 0.25, + "learning_rate": 3.761447326288825e-05, + "loss": 2.4822, + "step": 387800 + }, + { + "epoch": 0.25, + "learning_rate": 3.761127121357669e-05, + "loss": 2.4989, + "step": 387900 + }, + { + "epoch": 0.25, + "learning_rate": 3.760806916426513e-05, + "loss": 2.5161, + "step": 388000 + }, + { + "epoch": 0.25, + "eval_loss": 2.4972026348114014, + "eval_runtime": 173.6832, + "eval_samples_per_second": 57.576, + "eval_steps_per_second": 3.599, + "step": 388000 + }, + { + "epoch": 0.25, + "learning_rate": 3.760486711495357e-05, + "loss": 2.5165, + "step": 388100 + }, + { + "epoch": 0.25, + "learning_rate": 3.760166506564201e-05, + "loss": 2.4943, + "step": 388200 + }, + { + "epoch": 0.25, + "learning_rate": 3.759846301633045e-05, + "loss": 2.4975, + "step": 388300 + }, + { + "epoch": 0.25, + "learning_rate": 3.759526096701889e-05, + "loss": 2.5096, + "step": 388400 + }, + { + "epoch": 0.25, + "learning_rate": 3.759205891770734e-05, + "loss": 2.4847, + "step": 388500 + }, + { + "epoch": 0.25, + "learning_rate": 3.758885686839578e-05, + "loss": 2.4915, + "step": 388600 + }, + { + "epoch": 0.25, + "learning_rate": 3.758565481908422e-05, + "loss": 2.5114, + "step": 388700 + }, + { + "epoch": 0.25, + "learning_rate": 3.7582452769772657e-05, + "loss": 2.495, + "step": 388800 + }, + { + "epoch": 0.25, + "learning_rate": 3.7579250720461096e-05, + "loss": 2.5039, + "step": 388900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7576048671149536e-05, + "loss": 2.4924, + "step": 389000 + }, + { + "epoch": 0.25, + "eval_loss": 2.4967758655548096, + "eval_runtime": 174.0389, + "eval_samples_per_second": 57.458, + "eval_steps_per_second": 3.591, + "step": 389000 + }, + { + "epoch": 0.25, + "learning_rate": 3.7572846621837976e-05, + "loss": 2.4837, + "step": 389100 + }, + { + "epoch": 0.25, + "learning_rate": 3.756964457252642e-05, + "loss": 2.4728, + "step": 389200 + }, + { + "epoch": 0.25, + "learning_rate": 3.756644252321486e-05, + "loss": 2.4844, + "step": 389300 + }, + { + "epoch": 0.25, + "learning_rate": 3.75632404739033e-05, + "loss": 2.5008, + "step": 389400 + }, + { + "epoch": 0.25, + "learning_rate": 3.756003842459174e-05, + "loss": 2.4834, + "step": 389500 + }, + { + "epoch": 0.25, + "learning_rate": 3.755683637528018e-05, + "loss": 2.4868, + "step": 389600 + }, + { + "epoch": 0.25, + "learning_rate": 3.755363432596862e-05, + "loss": 2.4877, + "step": 389700 + }, + { + "epoch": 0.25, + "learning_rate": 3.755043227665706e-05, + "loss": 2.4949, + "step": 389800 + }, + { + "epoch": 0.25, + "learning_rate": 3.75472302273455e-05, + "loss": 2.5034, + "step": 389900 + }, + { + "epoch": 0.25, + "learning_rate": 3.754402817803394e-05, + "loss": 2.4923, + "step": 390000 + }, + { + "epoch": 0.25, + "eval_loss": 2.496317148208618, + "eval_runtime": 173.3618, + "eval_samples_per_second": 57.683, + "eval_steps_per_second": 3.605, + "step": 390000 + }, + { + "epoch": 0.25, + "learning_rate": 3.754082612872239e-05, + "loss": 2.4867, + "step": 390100 + }, + { + "epoch": 0.25, + "learning_rate": 3.7537624079410826e-05, + "loss": 2.5088, + "step": 390200 + }, + { + "epoch": 0.25, + "learning_rate": 3.7534422030099266e-05, + "loss": 2.4966, + "step": 390300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7531219980787706e-05, + "loss": 2.488, + "step": 390400 + }, + { + "epoch": 0.25, + "learning_rate": 3.7528017931476146e-05, + "loss": 2.5066, + "step": 390500 + }, + { + "epoch": 0.25, + "learning_rate": 3.7524815882164585e-05, + "loss": 2.4949, + "step": 390600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7521613832853025e-05, + "loss": 2.5074, + "step": 390700 + }, + { + "epoch": 0.25, + "learning_rate": 3.751841178354147e-05, + "loss": 2.5273, + "step": 390800 + }, + { + "epoch": 0.25, + "learning_rate": 3.751520973422991e-05, + "loss": 2.5095, + "step": 390900 + }, + { + "epoch": 0.25, + "learning_rate": 3.751200768491835e-05, + "loss": 2.4876, + "step": 391000 + }, + { + "epoch": 0.25, + "eval_loss": 2.4965176582336426, + "eval_runtime": 173.8711, + "eval_samples_per_second": 57.514, + "eval_steps_per_second": 3.595, + "step": 391000 + }, + { + "epoch": 0.25, + "learning_rate": 3.750880563560679e-05, + "loss": 2.4904, + "step": 391100 + }, + { + "epoch": 0.25, + "learning_rate": 3.750560358629523e-05, + "loss": 2.5088, + "step": 391200 + }, + { + "epoch": 0.25, + "learning_rate": 3.750240153698367e-05, + "loss": 2.4968, + "step": 391300 + }, + { + "epoch": 0.25, + "learning_rate": 3.749919948767211e-05, + "loss": 2.4927, + "step": 391400 + }, + { + "epoch": 0.25, + "learning_rate": 3.749599743836055e-05, + "loss": 2.4834, + "step": 391500 + }, + { + "epoch": 0.25, + "learning_rate": 3.7492795389048996e-05, + "loss": 2.5241, + "step": 391600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7489593339737436e-05, + "loss": 2.4944, + "step": 391700 + }, + { + "epoch": 0.25, + "learning_rate": 3.7486391290425876e-05, + "loss": 2.4965, + "step": 391800 + }, + { + "epoch": 0.25, + "learning_rate": 3.7483189241114316e-05, + "loss": 2.4864, + "step": 391900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7479987191802755e-05, + "loss": 2.4961, + "step": 392000 + }, + { + "epoch": 0.25, + "eval_loss": 2.496955156326294, + "eval_runtime": 177.1413, + "eval_samples_per_second": 56.452, + "eval_steps_per_second": 3.528, + "step": 392000 + }, + { + "epoch": 0.25, + "learning_rate": 3.7476785142491195e-05, + "loss": 2.4987, + "step": 392100 + }, + { + "epoch": 0.25, + "learning_rate": 3.7473583093179635e-05, + "loss": 2.5002, + "step": 392200 + }, + { + "epoch": 0.25, + "learning_rate": 3.7470381043868074e-05, + "loss": 2.4853, + "step": 392300 + }, + { + "epoch": 0.25, + "learning_rate": 3.746717899455652e-05, + "loss": 2.5047, + "step": 392400 + }, + { + "epoch": 0.25, + "learning_rate": 3.746397694524496e-05, + "loss": 2.4739, + "step": 392500 + }, + { + "epoch": 0.25, + "learning_rate": 3.74607748959334e-05, + "loss": 2.4956, + "step": 392600 + }, + { + "epoch": 0.25, + "learning_rate": 3.745757284662184e-05, + "loss": 2.5093, + "step": 392700 + }, + { + "epoch": 0.25, + "learning_rate": 3.745437079731028e-05, + "loss": 2.487, + "step": 392800 + }, + { + "epoch": 0.25, + "learning_rate": 3.745116874799872e-05, + "loss": 2.5049, + "step": 392900 + }, + { + "epoch": 0.25, + "learning_rate": 3.744796669868716e-05, + "loss": 2.4893, + "step": 393000 + }, + { + "epoch": 0.25, + "eval_loss": 2.497549533843994, + "eval_runtime": 177.5778, + "eval_samples_per_second": 56.313, + "eval_steps_per_second": 3.52, + "step": 393000 + }, + { + "epoch": 0.25, + "learning_rate": 3.74447646493756e-05, + "loss": 2.501, + "step": 393100 + }, + { + "epoch": 0.25, + "learning_rate": 3.7441562600064046e-05, + "loss": 2.4826, + "step": 393200 + }, + { + "epoch": 0.25, + "learning_rate": 3.7438360550752485e-05, + "loss": 2.4819, + "step": 393300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7435158501440925e-05, + "loss": 2.4699, + "step": 393400 + }, + { + "epoch": 0.25, + "learning_rate": 3.7431956452129365e-05, + "loss": 2.5042, + "step": 393500 + }, + { + "epoch": 0.25, + "learning_rate": 3.7428754402817805e-05, + "loss": 2.5126, + "step": 393600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7425552353506244e-05, + "loss": 2.5198, + "step": 393700 + }, + { + "epoch": 0.25, + "learning_rate": 3.7422350304194684e-05, + "loss": 2.5092, + "step": 393800 + }, + { + "epoch": 0.25, + "learning_rate": 3.741914825488313e-05, + "loss": 2.5063, + "step": 393900 + }, + { + "epoch": 0.25, + "learning_rate": 3.741594620557157e-05, + "loss": 2.4989, + "step": 394000 + }, + { + "epoch": 0.25, + "eval_loss": 2.497384548187256, + "eval_runtime": 178.2295, + "eval_samples_per_second": 56.107, + "eval_steps_per_second": 3.507, + "step": 394000 + }, + { + "epoch": 0.25, + "learning_rate": 3.741274415626001e-05, + "loss": 2.4731, + "step": 394100 + }, + { + "epoch": 0.25, + "learning_rate": 3.740954210694845e-05, + "loss": 2.4909, + "step": 394200 + }, + { + "epoch": 0.25, + "learning_rate": 3.740634005763689e-05, + "loss": 2.4864, + "step": 394300 + }, + { + "epoch": 0.25, + "learning_rate": 3.740313800832533e-05, + "loss": 2.488, + "step": 394400 + }, + { + "epoch": 0.25, + "learning_rate": 3.739993595901377e-05, + "loss": 2.4725, + "step": 394500 + }, + { + "epoch": 0.25, + "learning_rate": 3.739673390970221e-05, + "loss": 2.482, + "step": 394600 + }, + { + "epoch": 0.25, + "learning_rate": 3.739353186039065e-05, + "loss": 2.4969, + "step": 394700 + }, + { + "epoch": 0.25, + "learning_rate": 3.7390329811079095e-05, + "loss": 2.4944, + "step": 394800 + }, + { + "epoch": 0.25, + "learning_rate": 3.7387127761767535e-05, + "loss": 2.5095, + "step": 394900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7383925712455975e-05, + "loss": 2.4977, + "step": 395000 + }, + { + "epoch": 0.25, + "eval_loss": 2.4954965114593506, + "eval_runtime": 176.1394, + "eval_samples_per_second": 56.773, + "eval_steps_per_second": 3.548, + "step": 395000 + }, + { + "epoch": 0.25, + "learning_rate": 3.7380723663144414e-05, + "loss": 2.4841, + "step": 395100 + }, + { + "epoch": 0.25, + "learning_rate": 3.7377521613832854e-05, + "loss": 2.4834, + "step": 395200 + }, + { + "epoch": 0.25, + "learning_rate": 3.7374319564521294e-05, + "loss": 2.5135, + "step": 395300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7371117515209733e-05, + "loss": 2.4807, + "step": 395400 + }, + { + "epoch": 0.25, + "learning_rate": 3.736791546589818e-05, + "loss": 2.4932, + "step": 395500 + }, + { + "epoch": 0.25, + "learning_rate": 3.736471341658662e-05, + "loss": 2.4871, + "step": 395600 + }, + { + "epoch": 0.25, + "learning_rate": 3.736151136727506e-05, + "loss": 2.4998, + "step": 395700 + }, + { + "epoch": 0.25, + "learning_rate": 3.73583093179635e-05, + "loss": 2.4979, + "step": 395800 + }, + { + "epoch": 0.25, + "learning_rate": 3.735510726865194e-05, + "loss": 2.4878, + "step": 395900 + }, + { + "epoch": 0.25, + "learning_rate": 3.735190521934038e-05, + "loss": 2.4836, + "step": 396000 + }, + { + "epoch": 0.25, + "eval_loss": 2.49569034576416, + "eval_runtime": 176.8797, + "eval_samples_per_second": 56.536, + "eval_steps_per_second": 3.533, + "step": 396000 + }, + { + "epoch": 0.25, + "learning_rate": 3.734870317002882e-05, + "loss": 2.4959, + "step": 396100 + }, + { + "epoch": 0.25, + "learning_rate": 3.7345501120717265e-05, + "loss": 2.4945, + "step": 396200 + }, + { + "epoch": 0.25, + "learning_rate": 3.73422990714057e-05, + "loss": 2.4926, + "step": 396300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7339097022094144e-05, + "loss": 2.4823, + "step": 396400 + }, + { + "epoch": 0.25, + "learning_rate": 3.7335894972782584e-05, + "loss": 2.498, + "step": 396500 + }, + { + "epoch": 0.25, + "learning_rate": 3.7332692923471024e-05, + "loss": 2.5038, + "step": 396600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7329490874159464e-05, + "loss": 2.4799, + "step": 396700 + }, + { + "epoch": 0.25, + "learning_rate": 3.73262888248479e-05, + "loss": 2.4956, + "step": 396800 + }, + { + "epoch": 0.25, + "learning_rate": 3.732308677553635e-05, + "loss": 2.4837, + "step": 396900 + }, + { + "epoch": 0.25, + "learning_rate": 3.731988472622478e-05, + "loss": 2.4958, + "step": 397000 + }, + { + "epoch": 0.25, + "eval_loss": 2.49448299407959, + "eval_runtime": 174.5573, + "eval_samples_per_second": 57.288, + "eval_steps_per_second": 3.58, + "step": 397000 + }, + { + "epoch": 0.25, + "learning_rate": 3.731668267691323e-05, + "loss": 2.493, + "step": 397100 + }, + { + "epoch": 0.25, + "learning_rate": 3.731348062760167e-05, + "loss": 2.4892, + "step": 397200 + }, + { + "epoch": 0.25, + "learning_rate": 3.731027857829011e-05, + "loss": 2.4825, + "step": 397300 + }, + { + "epoch": 0.25, + "learning_rate": 3.730707652897855e-05, + "loss": 2.4979, + "step": 397400 + }, + { + "epoch": 0.25, + "learning_rate": 3.730387447966699e-05, + "loss": 2.509, + "step": 397500 + }, + { + "epoch": 0.25, + "learning_rate": 3.730067243035543e-05, + "loss": 2.5102, + "step": 397600 + }, + { + "epoch": 0.25, + "learning_rate": 3.729747038104387e-05, + "loss": 2.5075, + "step": 397700 + }, + { + "epoch": 0.25, + "learning_rate": 3.7294268331732314e-05, + "loss": 2.4701, + "step": 397800 + }, + { + "epoch": 0.25, + "learning_rate": 3.729106628242075e-05, + "loss": 2.4921, + "step": 397900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7287864233109194e-05, + "loss": 2.4906, + "step": 398000 + }, + { + "epoch": 0.25, + "eval_loss": 2.4942800998687744, + "eval_runtime": 173.664, + "eval_samples_per_second": 57.582, + "eval_steps_per_second": 3.599, + "step": 398000 + }, + { + "epoch": 0.25, + "learning_rate": 3.7284662183797634e-05, + "loss": 2.4884, + "step": 398100 + }, + { + "epoch": 0.25, + "learning_rate": 3.728146013448607e-05, + "loss": 2.5008, + "step": 398200 + }, + { + "epoch": 0.25, + "learning_rate": 3.727825808517451e-05, + "loss": 2.4798, + "step": 398300 + }, + { + "epoch": 0.25, + "learning_rate": 3.727505603586295e-05, + "loss": 2.4974, + "step": 398400 + }, + { + "epoch": 0.26, + "learning_rate": 3.72718539865514e-05, + "loss": 2.4839, + "step": 398500 + }, + { + "epoch": 0.26, + "learning_rate": 3.726865193723983e-05, + "loss": 2.4923, + "step": 398600 + }, + { + "epoch": 0.26, + "learning_rate": 3.726544988792828e-05, + "loss": 2.5026, + "step": 398700 + }, + { + "epoch": 0.26, + "learning_rate": 3.726224783861672e-05, + "loss": 2.4937, + "step": 398800 + }, + { + "epoch": 0.26, + "learning_rate": 3.725904578930516e-05, + "loss": 2.4812, + "step": 398900 + }, + { + "epoch": 0.26, + "learning_rate": 3.72558437399936e-05, + "loss": 2.5094, + "step": 399000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4958760738372803, + "eval_runtime": 173.8851, + "eval_samples_per_second": 57.509, + "eval_steps_per_second": 3.594, + "step": 399000 + }, + { + "epoch": 0.26, + "learning_rate": 3.725264169068204e-05, + "loss": 2.4947, + "step": 399100 + }, + { + "epoch": 0.26, + "learning_rate": 3.7249439641370484e-05, + "loss": 2.4721, + "step": 399200 + }, + { + "epoch": 0.26, + "learning_rate": 3.724623759205892e-05, + "loss": 2.4907, + "step": 399300 + }, + { + "epoch": 0.26, + "learning_rate": 3.7243035542747364e-05, + "loss": 2.5079, + "step": 399400 + }, + { + "epoch": 0.26, + "learning_rate": 3.72398334934358e-05, + "loss": 2.5069, + "step": 399500 + }, + { + "epoch": 0.26, + "learning_rate": 3.723663144412424e-05, + "loss": 2.4893, + "step": 399600 + }, + { + "epoch": 0.26, + "learning_rate": 3.723342939481268e-05, + "loss": 2.5025, + "step": 399700 + }, + { + "epoch": 0.26, + "learning_rate": 3.723022734550112e-05, + "loss": 2.4968, + "step": 399800 + }, + { + "epoch": 0.26, + "learning_rate": 3.722702529618956e-05, + "loss": 2.4965, + "step": 399900 + }, + { + "epoch": 0.26, + "learning_rate": 3.7223823246878e-05, + "loss": 2.4971, + "step": 400000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4941930770874023, + "eval_runtime": 174.0267, + "eval_samples_per_second": 57.462, + "eval_steps_per_second": 3.591, + "step": 400000 + }, + { + "epoch": 0.26, + "learning_rate": 3.722062119756645e-05, + "loss": 2.4825, + "step": 400100 + }, + { + "epoch": 0.26, + "learning_rate": 3.721741914825488e-05, + "loss": 2.4765, + "step": 400200 + }, + { + "epoch": 0.26, + "learning_rate": 3.721421709894333e-05, + "loss": 2.4687, + "step": 400300 + }, + { + "epoch": 0.26, + "learning_rate": 3.721101504963176e-05, + "loss": 2.501, + "step": 400400 + }, + { + "epoch": 0.26, + "learning_rate": 3.720781300032021e-05, + "loss": 2.4899, + "step": 400500 + }, + { + "epoch": 0.26, + "learning_rate": 3.720461095100865e-05, + "loss": 2.4968, + "step": 400600 + }, + { + "epoch": 0.26, + "learning_rate": 3.720140890169709e-05, + "loss": 2.4797, + "step": 400700 + }, + { + "epoch": 0.26, + "learning_rate": 3.7198206852385534e-05, + "loss": 2.4911, + "step": 400800 + }, + { + "epoch": 0.26, + "learning_rate": 3.7195004803073967e-05, + "loss": 2.4909, + "step": 400900 + }, + { + "epoch": 0.26, + "learning_rate": 3.719180275376241e-05, + "loss": 2.5164, + "step": 401000 + }, + { + "epoch": 0.26, + "eval_loss": 2.494380474090576, + "eval_runtime": 174.0163, + "eval_samples_per_second": 57.466, + "eval_steps_per_second": 3.592, + "step": 401000 + }, + { + "epoch": 0.26, + "learning_rate": 3.7188600704450846e-05, + "loss": 2.4886, + "step": 401100 + }, + { + "epoch": 0.26, + "learning_rate": 3.718539865513929e-05, + "loss": 2.4859, + "step": 401200 + }, + { + "epoch": 0.26, + "learning_rate": 3.718219660582773e-05, + "loss": 2.5117, + "step": 401300 + }, + { + "epoch": 0.26, + "learning_rate": 3.717899455651617e-05, + "loss": 2.4978, + "step": 401400 + }, + { + "epoch": 0.26, + "learning_rate": 3.717579250720462e-05, + "loss": 2.4842, + "step": 401500 + }, + { + "epoch": 0.26, + "learning_rate": 3.717259045789305e-05, + "loss": 2.5005, + "step": 401600 + }, + { + "epoch": 0.26, + "learning_rate": 3.71693884085815e-05, + "loss": 2.5048, + "step": 401700 + }, + { + "epoch": 0.26, + "learning_rate": 3.716618635926993e-05, + "loss": 2.5005, + "step": 401800 + }, + { + "epoch": 0.26, + "learning_rate": 3.716298430995838e-05, + "loss": 2.5101, + "step": 401900 + }, + { + "epoch": 0.26, + "learning_rate": 3.715978226064681e-05, + "loss": 2.4978, + "step": 402000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4960920810699463, + "eval_runtime": 177.2252, + "eval_samples_per_second": 56.425, + "eval_steps_per_second": 3.527, + "step": 402000 + }, + { + "epoch": 0.26, + "learning_rate": 3.715658021133526e-05, + "loss": 2.506, + "step": 402100 + }, + { + "epoch": 0.26, + "learning_rate": 3.71533781620237e-05, + "loss": 2.5091, + "step": 402200 + }, + { + "epoch": 0.26, + "learning_rate": 3.7150176112712136e-05, + "loss": 2.4909, + "step": 402300 + }, + { + "epoch": 0.26, + "learning_rate": 3.714697406340058e-05, + "loss": 2.4901, + "step": 402400 + }, + { + "epoch": 0.26, + "learning_rate": 3.7143772014089016e-05, + "loss": 2.4949, + "step": 402500 + }, + { + "epoch": 0.26, + "learning_rate": 3.714056996477746e-05, + "loss": 2.4997, + "step": 402600 + }, + { + "epoch": 0.26, + "learning_rate": 3.7137367915465895e-05, + "loss": 2.5078, + "step": 402700 + }, + { + "epoch": 0.26, + "learning_rate": 3.713416586615434e-05, + "loss": 2.4992, + "step": 402800 + }, + { + "epoch": 0.26, + "learning_rate": 3.713096381684278e-05, + "loss": 2.5028, + "step": 402900 + }, + { + "epoch": 0.26, + "learning_rate": 3.712776176753122e-05, + "loss": 2.4893, + "step": 403000 + }, + { + "epoch": 0.26, + "eval_loss": 2.495441198348999, + "eval_runtime": 176.8442, + "eval_samples_per_second": 56.547, + "eval_steps_per_second": 3.534, + "step": 403000 + }, + { + "epoch": 0.26, + "learning_rate": 3.712455971821967e-05, + "loss": 2.4648, + "step": 403100 + }, + { + "epoch": 0.26, + "learning_rate": 3.71213576689081e-05, + "loss": 2.5059, + "step": 403200 + }, + { + "epoch": 0.26, + "learning_rate": 3.711815561959655e-05, + "loss": 2.4918, + "step": 403300 + }, + { + "epoch": 0.26, + "learning_rate": 3.711495357028498e-05, + "loss": 2.5006, + "step": 403400 + }, + { + "epoch": 0.26, + "learning_rate": 3.711175152097343e-05, + "loss": 2.48, + "step": 403500 + }, + { + "epoch": 0.26, + "learning_rate": 3.710854947166186e-05, + "loss": 2.4921, + "step": 403600 + }, + { + "epoch": 0.26, + "learning_rate": 3.7105347422350306e-05, + "loss": 2.49, + "step": 403700 + }, + { + "epoch": 0.26, + "learning_rate": 3.7102145373038746e-05, + "loss": 2.4854, + "step": 403800 + }, + { + "epoch": 0.26, + "learning_rate": 3.7098943323727186e-05, + "loss": 2.4964, + "step": 403900 + }, + { + "epoch": 0.26, + "learning_rate": 3.709574127441563e-05, + "loss": 2.4902, + "step": 404000 + }, + { + "epoch": 0.26, + "eval_loss": 2.49703311920166, + "eval_runtime": 176.7332, + "eval_samples_per_second": 56.582, + "eval_steps_per_second": 3.536, + "step": 404000 + }, + { + "epoch": 0.26, + "learning_rate": 3.7092539225104065e-05, + "loss": 2.4859, + "step": 404100 + }, + { + "epoch": 0.26, + "learning_rate": 3.708933717579251e-05, + "loss": 2.493, + "step": 404200 + }, + { + "epoch": 0.26, + "learning_rate": 3.7086135126480945e-05, + "loss": 2.5139, + "step": 404300 + }, + { + "epoch": 0.26, + "learning_rate": 3.708293307716939e-05, + "loss": 2.4837, + "step": 404400 + }, + { + "epoch": 0.26, + "learning_rate": 3.707973102785783e-05, + "loss": 2.5094, + "step": 404500 + }, + { + "epoch": 0.26, + "learning_rate": 3.707652897854627e-05, + "loss": 2.4963, + "step": 404600 + }, + { + "epoch": 0.26, + "learning_rate": 3.707332692923472e-05, + "loss": 2.4897, + "step": 404700 + }, + { + "epoch": 0.26, + "learning_rate": 3.707012487992315e-05, + "loss": 2.5022, + "step": 404800 + }, + { + "epoch": 0.26, + "learning_rate": 3.70669228306116e-05, + "loss": 2.4878, + "step": 404900 + }, + { + "epoch": 0.26, + "learning_rate": 3.706372078130003e-05, + "loss": 2.5035, + "step": 405000 + }, + { + "epoch": 0.26, + "eval_loss": 2.496225595474243, + "eval_runtime": 177.3895, + "eval_samples_per_second": 56.373, + "eval_steps_per_second": 3.523, + "step": 405000 + }, + { + "epoch": 0.26, + "learning_rate": 3.7060518731988476e-05, + "loss": 2.4765, + "step": 405100 + }, + { + "epoch": 0.26, + "learning_rate": 3.705731668267691e-05, + "loss": 2.5169, + "step": 405200 + }, + { + "epoch": 0.26, + "learning_rate": 3.7054114633365356e-05, + "loss": 2.4945, + "step": 405300 + }, + { + "epoch": 0.26, + "learning_rate": 3.7050912584053795e-05, + "loss": 2.4851, + "step": 405400 + }, + { + "epoch": 0.26, + "learning_rate": 3.7047710534742235e-05, + "loss": 2.4956, + "step": 405500 + }, + { + "epoch": 0.26, + "learning_rate": 3.704450848543068e-05, + "loss": 2.4983, + "step": 405600 + }, + { + "epoch": 0.26, + "learning_rate": 3.7041306436119115e-05, + "loss": 2.4676, + "step": 405700 + }, + { + "epoch": 0.26, + "learning_rate": 3.703810438680756e-05, + "loss": 2.4784, + "step": 405800 + }, + { + "epoch": 0.26, + "learning_rate": 3.7034902337495994e-05, + "loss": 2.4799, + "step": 405900 + }, + { + "epoch": 0.26, + "learning_rate": 3.703170028818444e-05, + "loss": 2.4875, + "step": 406000 + }, + { + "epoch": 0.26, + "eval_loss": 2.493391752243042, + "eval_runtime": 176.6539, + "eval_samples_per_second": 56.608, + "eval_steps_per_second": 3.538, + "step": 406000 + }, + { + "epoch": 0.26, + "learning_rate": 3.702849823887288e-05, + "loss": 2.484, + "step": 406100 + }, + { + "epoch": 0.26, + "learning_rate": 3.702529618956132e-05, + "loss": 2.4861, + "step": 406200 + }, + { + "epoch": 0.26, + "learning_rate": 3.702209414024977e-05, + "loss": 2.479, + "step": 406300 + }, + { + "epoch": 0.26, + "learning_rate": 3.70188920909382e-05, + "loss": 2.497, + "step": 406400 + }, + { + "epoch": 0.26, + "learning_rate": 3.7015690041626646e-05, + "loss": 2.4992, + "step": 406500 + }, + { + "epoch": 0.26, + "learning_rate": 3.701248799231508e-05, + "loss": 2.4795, + "step": 406600 + }, + { + "epoch": 0.26, + "learning_rate": 3.7009285943003526e-05, + "loss": 2.5039, + "step": 406700 + }, + { + "epoch": 0.26, + "learning_rate": 3.7006083893691965e-05, + "loss": 2.4807, + "step": 406800 + }, + { + "epoch": 0.26, + "learning_rate": 3.7002881844380405e-05, + "loss": 2.4756, + "step": 406900 + }, + { + "epoch": 0.26, + "learning_rate": 3.6999679795068845e-05, + "loss": 2.5064, + "step": 407000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4955644607543945, + "eval_runtime": 176.7613, + "eval_samples_per_second": 56.573, + "eval_steps_per_second": 3.536, + "step": 407000 + }, + { + "epoch": 0.26, + "learning_rate": 3.6996477745757285e-05, + "loss": 2.4904, + "step": 407100 + }, + { + "epoch": 0.26, + "learning_rate": 3.699327569644573e-05, + "loss": 2.483, + "step": 407200 + }, + { + "epoch": 0.26, + "learning_rate": 3.6990073647134164e-05, + "loss": 2.473, + "step": 407300 + }, + { + "epoch": 0.26, + "learning_rate": 3.698687159782261e-05, + "loss": 2.4728, + "step": 407400 + }, + { + "epoch": 0.26, + "learning_rate": 3.6983669548511043e-05, + "loss": 2.4798, + "step": 407500 + }, + { + "epoch": 0.26, + "learning_rate": 3.698046749919949e-05, + "loss": 2.495, + "step": 407600 + }, + { + "epoch": 0.26, + "learning_rate": 3.697726544988793e-05, + "loss": 2.4861, + "step": 407700 + }, + { + "epoch": 0.26, + "learning_rate": 3.697406340057637e-05, + "loss": 2.4784, + "step": 407800 + }, + { + "epoch": 0.26, + "learning_rate": 3.6970861351264816e-05, + "loss": 2.4936, + "step": 407900 + }, + { + "epoch": 0.26, + "learning_rate": 3.696765930195325e-05, + "loss": 2.4981, + "step": 408000 + }, + { + "epoch": 0.26, + "eval_loss": 2.494189739227295, + "eval_runtime": 177.0105, + "eval_samples_per_second": 56.494, + "eval_steps_per_second": 3.531, + "step": 408000 + }, + { + "epoch": 0.26, + "learning_rate": 3.6964457252641695e-05, + "loss": 2.4751, + "step": 408100 + }, + { + "epoch": 0.26, + "learning_rate": 3.696125520333013e-05, + "loss": 2.4802, + "step": 408200 + }, + { + "epoch": 0.26, + "learning_rate": 3.6958053154018575e-05, + "loss": 2.4841, + "step": 408300 + }, + { + "epoch": 0.26, + "learning_rate": 3.6954851104707015e-05, + "loss": 2.4799, + "step": 408400 + }, + { + "epoch": 0.26, + "learning_rate": 3.6951649055395454e-05, + "loss": 2.4673, + "step": 408500 + }, + { + "epoch": 0.26, + "learning_rate": 3.6948447006083894e-05, + "loss": 2.4727, + "step": 408600 + }, + { + "epoch": 0.26, + "learning_rate": 3.6945244956772334e-05, + "loss": 2.4801, + "step": 408700 + }, + { + "epoch": 0.26, + "learning_rate": 3.694204290746078e-05, + "loss": 2.4717, + "step": 408800 + }, + { + "epoch": 0.26, + "learning_rate": 3.693884085814921e-05, + "loss": 2.4838, + "step": 408900 + }, + { + "epoch": 0.26, + "learning_rate": 3.693563880883766e-05, + "loss": 2.4908, + "step": 409000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4922473430633545, + "eval_runtime": 177.4522, + "eval_samples_per_second": 56.353, + "eval_steps_per_second": 3.522, + "step": 409000 + }, + { + "epoch": 0.26, + "learning_rate": 3.69324367595261e-05, + "loss": 2.4793, + "step": 409100 + }, + { + "epoch": 0.26, + "learning_rate": 3.692923471021454e-05, + "loss": 2.4808, + "step": 409200 + }, + { + "epoch": 0.26, + "learning_rate": 3.692603266090298e-05, + "loss": 2.4761, + "step": 409300 + }, + { + "epoch": 0.26, + "learning_rate": 3.692283061159142e-05, + "loss": 2.469, + "step": 409400 + }, + { + "epoch": 0.26, + "learning_rate": 3.6919628562279865e-05, + "loss": 2.4551, + "step": 409500 + }, + { + "epoch": 0.26, + "learning_rate": 3.69164265129683e-05, + "loss": 2.5, + "step": 409600 + }, + { + "epoch": 0.26, + "learning_rate": 3.6913224463656745e-05, + "loss": 2.4481, + "step": 409700 + }, + { + "epoch": 0.26, + "learning_rate": 3.691002241434518e-05, + "loss": 2.4726, + "step": 409800 + }, + { + "epoch": 0.26, + "learning_rate": 3.6906820365033624e-05, + "loss": 2.474, + "step": 409900 + }, + { + "epoch": 0.26, + "learning_rate": 3.6903618315722064e-05, + "loss": 2.4847, + "step": 410000 + }, + { + "epoch": 0.26, + "eval_loss": 2.49210262298584, + "eval_runtime": 174.6081, + "eval_samples_per_second": 57.271, + "eval_steps_per_second": 3.579, + "step": 410000 + }, + { + "epoch": 0.26, + "learning_rate": 3.6900416266410504e-05, + "loss": 2.4922, + "step": 410100 + }, + { + "epoch": 0.26, + "learning_rate": 3.6897214217098944e-05, + "loss": 2.4802, + "step": 410200 + }, + { + "epoch": 0.26, + "learning_rate": 3.689401216778738e-05, + "loss": 2.4912, + "step": 410300 + }, + { + "epoch": 0.26, + "learning_rate": 3.689081011847583e-05, + "loss": 2.4649, + "step": 410400 + }, + { + "epoch": 0.26, + "learning_rate": 3.688760806916426e-05, + "loss": 2.4839, + "step": 410500 + }, + { + "epoch": 0.26, + "learning_rate": 3.688440601985271e-05, + "loss": 2.4636, + "step": 410600 + }, + { + "epoch": 0.26, + "learning_rate": 3.688120397054115e-05, + "loss": 2.4508, + "step": 410700 + }, + { + "epoch": 0.26, + "learning_rate": 3.687800192122959e-05, + "loss": 2.4876, + "step": 410800 + }, + { + "epoch": 0.26, + "learning_rate": 3.687479987191803e-05, + "loss": 2.4915, + "step": 410900 + }, + { + "epoch": 0.26, + "learning_rate": 3.687159782260647e-05, + "loss": 2.477, + "step": 411000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4932756423950195, + "eval_runtime": 174.1704, + "eval_samples_per_second": 57.415, + "eval_steps_per_second": 3.588, + "step": 411000 + }, + { + "epoch": 0.26, + "learning_rate": 3.6868395773294915e-05, + "loss": 2.4905, + "step": 411100 + }, + { + "epoch": 0.26, + "learning_rate": 3.686519372398335e-05, + "loss": 2.4914, + "step": 411200 + }, + { + "epoch": 0.26, + "learning_rate": 3.6861991674671794e-05, + "loss": 2.4911, + "step": 411300 + }, + { + "epoch": 0.26, + "learning_rate": 3.6858789625360234e-05, + "loss": 2.509, + "step": 411400 + }, + { + "epoch": 0.26, + "learning_rate": 3.6855587576048674e-05, + "loss": 2.4944, + "step": 411500 + }, + { + "epoch": 0.26, + "learning_rate": 3.6852385526737113e-05, + "loss": 2.4959, + "step": 411600 + }, + { + "epoch": 0.26, + "learning_rate": 3.684918347742555e-05, + "loss": 2.4756, + "step": 411700 + }, + { + "epoch": 0.26, + "learning_rate": 3.684598142811399e-05, + "loss": 2.4881, + "step": 411800 + }, + { + "epoch": 0.26, + "learning_rate": 3.684277937880243e-05, + "loss": 2.5099, + "step": 411900 + }, + { + "epoch": 0.26, + "learning_rate": 3.683957732949088e-05, + "loss": 2.5014, + "step": 412000 + }, + { + "epoch": 0.26, + "eval_loss": 2.495574712753296, + "eval_runtime": 173.897, + "eval_samples_per_second": 57.505, + "eval_steps_per_second": 3.594, + "step": 412000 + }, + { + "epoch": 0.26, + "learning_rate": 3.683637528017931e-05, + "loss": 2.4868, + "step": 412100 + }, + { + "epoch": 0.26, + "learning_rate": 3.683317323086776e-05, + "loss": 2.4963, + "step": 412200 + }, + { + "epoch": 0.26, + "learning_rate": 3.68299711815562e-05, + "loss": 2.4964, + "step": 412300 + }, + { + "epoch": 0.26, + "learning_rate": 3.682676913224464e-05, + "loss": 2.4912, + "step": 412400 + }, + { + "epoch": 0.26, + "learning_rate": 3.682356708293308e-05, + "loss": 2.5166, + "step": 412500 + }, + { + "epoch": 0.26, + "learning_rate": 3.682036503362152e-05, + "loss": 2.487, + "step": 412600 + }, + { + "epoch": 0.26, + "learning_rate": 3.6817162984309964e-05, + "loss": 2.4917, + "step": 412700 + }, + { + "epoch": 0.26, + "learning_rate": 3.68139609349984e-05, + "loss": 2.505, + "step": 412800 + }, + { + "epoch": 0.26, + "learning_rate": 3.6810758885686844e-05, + "loss": 2.476, + "step": 412900 + }, + { + "epoch": 0.26, + "learning_rate": 3.680755683637528e-05, + "loss": 2.4829, + "step": 413000 + }, + { + "epoch": 0.26, + "eval_loss": 2.4932498931884766, + "eval_runtime": 174.2872, + "eval_samples_per_second": 57.377, + "eval_steps_per_second": 3.586, + "step": 413000 + }, + { + "epoch": 0.26, + "learning_rate": 3.680435478706372e-05, + "loss": 2.4614, + "step": 413100 + }, + { + "epoch": 0.26, + "learning_rate": 3.680115273775216e-05, + "loss": 2.4694, + "step": 413200 + }, + { + "epoch": 0.26, + "learning_rate": 3.67979506884406e-05, + "loss": 2.4825, + "step": 413300 + }, + { + "epoch": 0.26, + "learning_rate": 3.679474863912904e-05, + "loss": 2.4695, + "step": 413400 + }, + { + "epoch": 0.26, + "learning_rate": 3.679154658981748e-05, + "loss": 2.499, + "step": 413500 + }, + { + "epoch": 0.26, + "learning_rate": 3.678834454050593e-05, + "loss": 2.5021, + "step": 413600 + }, + { + "epoch": 0.26, + "learning_rate": 3.678514249119437e-05, + "loss": 2.5068, + "step": 413700 + }, + { + "epoch": 0.26, + "learning_rate": 3.678194044188281e-05, + "loss": 2.4818, + "step": 413800 + }, + { + "epoch": 0.26, + "learning_rate": 3.677873839257125e-05, + "loss": 2.4985, + "step": 413900 + }, + { + "epoch": 0.26, + "learning_rate": 3.677553634325969e-05, + "loss": 2.4631, + "step": 414000 + }, + { + "epoch": 0.26, + "eval_loss": 2.494476318359375, + "eval_runtime": 174.3204, + "eval_samples_per_second": 57.366, + "eval_steps_per_second": 3.585, + "step": 414000 + }, + { + "epoch": 0.27, + "learning_rate": 3.677233429394813e-05, + "loss": 2.4697, + "step": 414100 + }, + { + "epoch": 0.27, + "learning_rate": 3.676913224463657e-05, + "loss": 2.4702, + "step": 414200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6765930195325013e-05, + "loss": 2.492, + "step": 414300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6762728146013446e-05, + "loss": 2.4991, + "step": 414400 + }, + { + "epoch": 0.27, + "learning_rate": 3.675952609670189e-05, + "loss": 2.4868, + "step": 414500 + }, + { + "epoch": 0.27, + "learning_rate": 3.675632404739033e-05, + "loss": 2.4767, + "step": 414600 + }, + { + "epoch": 0.27, + "learning_rate": 3.675312199807877e-05, + "loss": 2.4825, + "step": 414700 + }, + { + "epoch": 0.27, + "learning_rate": 3.674991994876721e-05, + "loss": 2.4673, + "step": 414800 + }, + { + "epoch": 0.27, + "learning_rate": 3.674671789945565e-05, + "loss": 2.4693, + "step": 414900 + }, + { + "epoch": 0.27, + "learning_rate": 3.674351585014409e-05, + "loss": 2.4845, + "step": 415000 + }, + { + "epoch": 0.27, + "eval_loss": 2.4948008060455322, + "eval_runtime": 174.7629, + "eval_samples_per_second": 57.22, + "eval_steps_per_second": 3.576, + "step": 415000 + }, + { + "epoch": 0.27, + "learning_rate": 3.674031380083253e-05, + "loss": 2.4759, + "step": 415100 + }, + { + "epoch": 0.27, + "learning_rate": 3.673711175152098e-05, + "loss": 2.5018, + "step": 415200 + }, + { + "epoch": 0.27, + "learning_rate": 3.673390970220942e-05, + "loss": 2.4912, + "step": 415300 + }, + { + "epoch": 0.27, + "learning_rate": 3.673070765289786e-05, + "loss": 2.4889, + "step": 415400 + }, + { + "epoch": 0.27, + "learning_rate": 3.67275056035863e-05, + "loss": 2.4829, + "step": 415500 + }, + { + "epoch": 0.27, + "learning_rate": 3.672430355427474e-05, + "loss": 2.4768, + "step": 415600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6721101504963177e-05, + "loss": 2.4834, + "step": 415700 + }, + { + "epoch": 0.27, + "learning_rate": 3.6717899455651616e-05, + "loss": 2.4717, + "step": 415800 + }, + { + "epoch": 0.27, + "learning_rate": 3.671469740634006e-05, + "loss": 2.4642, + "step": 415900 + }, + { + "epoch": 0.27, + "learning_rate": 3.67114953570285e-05, + "loss": 2.5015, + "step": 416000 + }, + { + "epoch": 0.27, + "eval_loss": 2.4942965507507324, + "eval_runtime": 173.9314, + "eval_samples_per_second": 57.494, + "eval_steps_per_second": 3.593, + "step": 416000 + }, + { + "epoch": 0.27, + "learning_rate": 3.670829330771694e-05, + "loss": 2.4761, + "step": 416100 + }, + { + "epoch": 0.27, + "learning_rate": 3.670509125840538e-05, + "loss": 2.4949, + "step": 416200 + }, + { + "epoch": 0.27, + "learning_rate": 3.670188920909382e-05, + "loss": 2.4876, + "step": 416300 + }, + { + "epoch": 0.27, + "learning_rate": 3.669868715978226e-05, + "loss": 2.4664, + "step": 416400 + }, + { + "epoch": 0.27, + "learning_rate": 3.66954851104707e-05, + "loss": 2.4787, + "step": 416500 + }, + { + "epoch": 0.27, + "learning_rate": 3.669228306115914e-05, + "loss": 2.4904, + "step": 416600 + }, + { + "epoch": 0.27, + "learning_rate": 3.668908101184759e-05, + "loss": 2.485, + "step": 416700 + }, + { + "epoch": 0.27, + "learning_rate": 3.668587896253603e-05, + "loss": 2.4717, + "step": 416800 + }, + { + "epoch": 0.27, + "learning_rate": 3.668267691322447e-05, + "loss": 2.4711, + "step": 416900 + }, + { + "epoch": 0.27, + "learning_rate": 3.667947486391291e-05, + "loss": 2.5017, + "step": 417000 + }, + { + "epoch": 0.27, + "eval_loss": 2.4940578937530518, + "eval_runtime": 177.4234, + "eval_samples_per_second": 56.362, + "eval_steps_per_second": 3.523, + "step": 417000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6676272814601346e-05, + "loss": 2.4778, + "step": 417100 + }, + { + "epoch": 0.27, + "learning_rate": 3.6673070765289786e-05, + "loss": 2.4829, + "step": 417200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6669868715978226e-05, + "loss": 2.4863, + "step": 417300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6666666666666666e-05, + "loss": 2.4672, + "step": 417400 + }, + { + "epoch": 0.27, + "learning_rate": 3.666346461735511e-05, + "loss": 2.4826, + "step": 417500 + }, + { + "epoch": 0.27, + "learning_rate": 3.666026256804355e-05, + "loss": 2.5087, + "step": 417600 + }, + { + "epoch": 0.27, + "learning_rate": 3.665706051873199e-05, + "loss": 2.5071, + "step": 417700 + }, + { + "epoch": 0.27, + "learning_rate": 3.665385846942043e-05, + "loss": 2.4953, + "step": 417800 + }, + { + "epoch": 0.27, + "learning_rate": 3.665065642010887e-05, + "loss": 2.5056, + "step": 417900 + }, + { + "epoch": 0.27, + "learning_rate": 3.664745437079731e-05, + "loss": 2.4985, + "step": 418000 + }, + { + "epoch": 0.27, + "eval_loss": 2.492438554763794, + "eval_runtime": 177.1033, + "eval_samples_per_second": 56.464, + "eval_steps_per_second": 3.529, + "step": 418000 + }, + { + "epoch": 0.27, + "learning_rate": 3.664425232148575e-05, + "loss": 2.5036, + "step": 418100 + }, + { + "epoch": 0.27, + "learning_rate": 3.664105027217419e-05, + "loss": 2.4859, + "step": 418200 + }, + { + "epoch": 0.27, + "learning_rate": 3.663784822286264e-05, + "loss": 2.497, + "step": 418300 + }, + { + "epoch": 0.27, + "learning_rate": 3.663464617355108e-05, + "loss": 2.495, + "step": 418400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6631444124239516e-05, + "loss": 2.4866, + "step": 418500 + }, + { + "epoch": 0.27, + "learning_rate": 3.6628242074927956e-05, + "loss": 2.4929, + "step": 418600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6625040025616396e-05, + "loss": 2.5067, + "step": 418700 + }, + { + "epoch": 0.27, + "learning_rate": 3.6621837976304836e-05, + "loss": 2.4892, + "step": 418800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6618635926993275e-05, + "loss": 2.4862, + "step": 418900 + }, + { + "epoch": 0.27, + "learning_rate": 3.661543387768172e-05, + "loss": 2.4802, + "step": 419000 + }, + { + "epoch": 0.27, + "eval_loss": 2.49405837059021, + "eval_runtime": 176.9572, + "eval_samples_per_second": 56.511, + "eval_steps_per_second": 3.532, + "step": 419000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6612231828370155e-05, + "loss": 2.4848, + "step": 419100 + }, + { + "epoch": 0.27, + "learning_rate": 3.66090297790586e-05, + "loss": 2.4793, + "step": 419200 + }, + { + "epoch": 0.27, + "learning_rate": 3.660582772974704e-05, + "loss": 2.4655, + "step": 419300 + }, + { + "epoch": 0.27, + "learning_rate": 3.660262568043548e-05, + "loss": 2.478, + "step": 419400 + }, + { + "epoch": 0.27, + "learning_rate": 3.659942363112392e-05, + "loss": 2.4943, + "step": 419500 + }, + { + "epoch": 0.27, + "learning_rate": 3.659622158181236e-05, + "loss": 2.4792, + "step": 419600 + }, + { + "epoch": 0.27, + "learning_rate": 3.65930195325008e-05, + "loss": 2.4713, + "step": 419700 + }, + { + "epoch": 0.27, + "learning_rate": 3.658981748318924e-05, + "loss": 2.487, + "step": 419800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6586615433877686e-05, + "loss": 2.4838, + "step": 419900 + }, + { + "epoch": 0.27, + "learning_rate": 3.6583413384566126e-05, + "loss": 2.4632, + "step": 420000 + }, + { + "epoch": 0.27, + "eval_loss": 2.495870590209961, + "eval_runtime": 177.4486, + "eval_samples_per_second": 56.354, + "eval_steps_per_second": 3.522, + "step": 420000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6580211335254566e-05, + "loss": 2.4954, + "step": 420100 + }, + { + "epoch": 0.27, + "learning_rate": 3.6577009285943006e-05, + "loss": 2.4779, + "step": 420200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6573807236631445e-05, + "loss": 2.4693, + "step": 420300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6570605187319885e-05, + "loss": 2.4719, + "step": 420400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6567403138008325e-05, + "loss": 2.4674, + "step": 420500 + }, + { + "epoch": 0.27, + "learning_rate": 3.656420108869677e-05, + "loss": 2.4754, + "step": 420600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6560999039385204e-05, + "loss": 2.4842, + "step": 420700 + }, + { + "epoch": 0.27, + "learning_rate": 3.655779699007365e-05, + "loss": 2.4858, + "step": 420800 + }, + { + "epoch": 0.27, + "learning_rate": 3.655459494076209e-05, + "loss": 2.4592, + "step": 420900 + }, + { + "epoch": 0.27, + "learning_rate": 3.655139289145053e-05, + "loss": 2.4765, + "step": 421000 + }, + { + "epoch": 0.27, + "eval_loss": 2.4958765506744385, + "eval_runtime": 177.6241, + "eval_samples_per_second": 56.299, + "eval_steps_per_second": 3.519, + "step": 421000 + }, + { + "epoch": 0.27, + "learning_rate": 3.654819084213897e-05, + "loss": 2.4862, + "step": 421100 + }, + { + "epoch": 0.27, + "learning_rate": 3.654498879282741e-05, + "loss": 2.4507, + "step": 421200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6541786743515856e-05, + "loss": 2.5029, + "step": 421300 + }, + { + "epoch": 0.27, + "learning_rate": 3.653858469420429e-05, + "loss": 2.4713, + "step": 421400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6535382644892736e-05, + "loss": 2.4763, + "step": 421500 + }, + { + "epoch": 0.27, + "learning_rate": 3.6532180595581175e-05, + "loss": 2.4569, + "step": 421600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6528978546269615e-05, + "loss": 2.4754, + "step": 421700 + }, + { + "epoch": 0.27, + "learning_rate": 3.6525776496958055e-05, + "loss": 2.4744, + "step": 421800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6522574447646495e-05, + "loss": 2.4856, + "step": 421900 + }, + { + "epoch": 0.27, + "learning_rate": 3.6519372398334934e-05, + "loss": 2.4698, + "step": 422000 + }, + { + "epoch": 0.27, + "eval_loss": 2.494734287261963, + "eval_runtime": 177.2819, + "eval_samples_per_second": 56.407, + "eval_steps_per_second": 3.525, + "step": 422000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6516170349023374e-05, + "loss": 2.4684, + "step": 422100 + }, + { + "epoch": 0.27, + "learning_rate": 3.651296829971182e-05, + "loss": 2.4747, + "step": 422200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6509766250400254e-05, + "loss": 2.4901, + "step": 422300 + }, + { + "epoch": 0.27, + "learning_rate": 3.65065642010887e-05, + "loss": 2.4759, + "step": 422400 + }, + { + "epoch": 0.27, + "learning_rate": 3.650336215177714e-05, + "loss": 2.4759, + "step": 422500 + }, + { + "epoch": 0.27, + "learning_rate": 3.650016010246558e-05, + "loss": 2.4831, + "step": 422600 + }, + { + "epoch": 0.27, + "learning_rate": 3.649695805315402e-05, + "loss": 2.4787, + "step": 422700 + }, + { + "epoch": 0.27, + "learning_rate": 3.649375600384246e-05, + "loss": 2.4861, + "step": 422800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6490553954530906e-05, + "loss": 2.4596, + "step": 422900 + }, + { + "epoch": 0.27, + "learning_rate": 3.648735190521934e-05, + "loss": 2.454, + "step": 423000 + }, + { + "epoch": 0.27, + "eval_loss": 2.49556565284729, + "eval_runtime": 177.5164, + "eval_samples_per_second": 56.333, + "eval_steps_per_second": 3.521, + "step": 423000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6484149855907785e-05, + "loss": 2.4739, + "step": 423100 + }, + { + "epoch": 0.27, + "learning_rate": 3.6480947806596225e-05, + "loss": 2.4826, + "step": 423200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6477745757284665e-05, + "loss": 2.4642, + "step": 423300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6474543707973104e-05, + "loss": 2.4935, + "step": 423400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6471341658661544e-05, + "loss": 2.4811, + "step": 423500 + }, + { + "epoch": 0.27, + "learning_rate": 3.646813960934999e-05, + "loss": 2.489, + "step": 423600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6464937560038423e-05, + "loss": 2.4683, + "step": 423700 + }, + { + "epoch": 0.27, + "learning_rate": 3.646173551072687e-05, + "loss": 2.4814, + "step": 423800 + }, + { + "epoch": 0.27, + "learning_rate": 3.64585334614153e-05, + "loss": 2.4841, + "step": 423900 + }, + { + "epoch": 0.27, + "learning_rate": 3.645533141210375e-05, + "loss": 2.4703, + "step": 424000 + }, + { + "epoch": 0.27, + "eval_loss": 2.496413230895996, + "eval_runtime": 177.072, + "eval_samples_per_second": 56.474, + "eval_steps_per_second": 3.53, + "step": 424000 + }, + { + "epoch": 0.27, + "learning_rate": 3.645212936279219e-05, + "loss": 2.4733, + "step": 424100 + }, + { + "epoch": 0.27, + "learning_rate": 3.644892731348063e-05, + "loss": 2.4829, + "step": 424200 + }, + { + "epoch": 0.27, + "learning_rate": 3.644572526416907e-05, + "loss": 2.4814, + "step": 424300 + }, + { + "epoch": 0.27, + "learning_rate": 3.644252321485751e-05, + "loss": 2.4794, + "step": 424400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6439321165545955e-05, + "loss": 2.4672, + "step": 424500 + }, + { + "epoch": 0.27, + "learning_rate": 3.643611911623439e-05, + "loss": 2.4783, + "step": 424600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6432917066922834e-05, + "loss": 2.4736, + "step": 424700 + }, + { + "epoch": 0.27, + "learning_rate": 3.6429715017611274e-05, + "loss": 2.4833, + "step": 424800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6426512968299714e-05, + "loss": 2.4814, + "step": 424900 + }, + { + "epoch": 0.27, + "learning_rate": 3.6423310918988154e-05, + "loss": 2.472, + "step": 425000 + }, + { + "epoch": 0.27, + "eval_loss": 2.4947168827056885, + "eval_runtime": 178.8163, + "eval_samples_per_second": 55.923, + "eval_steps_per_second": 3.495, + "step": 425000 + }, + { + "epoch": 0.27, + "learning_rate": 3.642010886967659e-05, + "loss": 2.4883, + "step": 425100 + }, + { + "epoch": 0.27, + "learning_rate": 3.641690682036504e-05, + "loss": 2.4865, + "step": 425200 + }, + { + "epoch": 0.27, + "learning_rate": 3.641370477105347e-05, + "loss": 2.465, + "step": 425300 + }, + { + "epoch": 0.27, + "learning_rate": 3.641050272174192e-05, + "loss": 2.4774, + "step": 425400 + }, + { + "epoch": 0.27, + "learning_rate": 3.640730067243035e-05, + "loss": 2.489, + "step": 425500 + }, + { + "epoch": 0.27, + "learning_rate": 3.64040986231188e-05, + "loss": 2.4767, + "step": 425600 + }, + { + "epoch": 0.27, + "learning_rate": 3.640089657380724e-05, + "loss": 2.4766, + "step": 425700 + }, + { + "epoch": 0.27, + "learning_rate": 3.639769452449568e-05, + "loss": 2.4751, + "step": 425800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6394492475184125e-05, + "loss": 2.4676, + "step": 425900 + }, + { + "epoch": 0.27, + "learning_rate": 3.639129042587256e-05, + "loss": 2.5024, + "step": 426000 + }, + { + "epoch": 0.27, + "eval_loss": 2.492905378341675, + "eval_runtime": 176.9952, + "eval_samples_per_second": 56.499, + "eval_steps_per_second": 3.531, + "step": 426000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6388088376561004e-05, + "loss": 2.4877, + "step": 426100 + }, + { + "epoch": 0.27, + "learning_rate": 3.638488632724944e-05, + "loss": 2.4866, + "step": 426200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6381684277937884e-05, + "loss": 2.462, + "step": 426300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6378482228626324e-05, + "loss": 2.4671, + "step": 426400 + }, + { + "epoch": 0.27, + "learning_rate": 3.637528017931476e-05, + "loss": 2.4859, + "step": 426500 + }, + { + "epoch": 0.27, + "learning_rate": 3.637207813000321e-05, + "loss": 2.4792, + "step": 426600 + }, + { + "epoch": 0.27, + "learning_rate": 3.636887608069164e-05, + "loss": 2.4829, + "step": 426700 + }, + { + "epoch": 0.27, + "learning_rate": 3.636567403138009e-05, + "loss": 2.4976, + "step": 426800 + }, + { + "epoch": 0.27, + "learning_rate": 3.636247198206852e-05, + "loss": 2.4798, + "step": 426900 + }, + { + "epoch": 0.27, + "learning_rate": 3.635926993275697e-05, + "loss": 2.4688, + "step": 427000 + }, + { + "epoch": 0.27, + "eval_loss": 2.495476722717285, + "eval_runtime": 177.9819, + "eval_samples_per_second": 56.185, + "eval_steps_per_second": 3.512, + "step": 427000 + }, + { + "epoch": 0.27, + "learning_rate": 3.63560678834454e-05, + "loss": 2.4749, + "step": 427100 + }, + { + "epoch": 0.27, + "learning_rate": 3.635286583413385e-05, + "loss": 2.4752, + "step": 427200 + }, + { + "epoch": 0.27, + "learning_rate": 3.634966378482229e-05, + "loss": 2.4922, + "step": 427300 + }, + { + "epoch": 0.27, + "learning_rate": 3.634646173551073e-05, + "loss": 2.4857, + "step": 427400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6343259686199174e-05, + "loss": 2.476, + "step": 427500 + }, + { + "epoch": 0.27, + "learning_rate": 3.634005763688761e-05, + "loss": 2.4561, + "step": 427600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6336855587576054e-05, + "loss": 2.4772, + "step": 427700 + }, + { + "epoch": 0.27, + "learning_rate": 3.633365353826449e-05, + "loss": 2.4719, + "step": 427800 + }, + { + "epoch": 0.27, + "learning_rate": 3.633045148895293e-05, + "loss": 2.4825, + "step": 427900 + }, + { + "epoch": 0.27, + "learning_rate": 3.632724943964137e-05, + "loss": 2.463, + "step": 428000 + }, + { + "epoch": 0.27, + "eval_loss": 2.494075298309326, + "eval_runtime": 178.0026, + "eval_samples_per_second": 56.179, + "eval_steps_per_second": 3.511, + "step": 428000 + }, + { + "epoch": 0.27, + "learning_rate": 3.632404739032981e-05, + "loss": 2.459, + "step": 428100 + }, + { + "epoch": 0.27, + "learning_rate": 3.632084534101826e-05, + "loss": 2.4766, + "step": 428200 + }, + { + "epoch": 0.27, + "learning_rate": 3.631764329170669e-05, + "loss": 2.4788, + "step": 428300 + }, + { + "epoch": 0.27, + "learning_rate": 3.631444124239514e-05, + "loss": 2.4721, + "step": 428400 + }, + { + "epoch": 0.27, + "learning_rate": 3.631123919308357e-05, + "loss": 2.4855, + "step": 428500 + }, + { + "epoch": 0.27, + "learning_rate": 3.630803714377202e-05, + "loss": 2.4738, + "step": 428600 + }, + { + "epoch": 0.27, + "learning_rate": 3.630483509446045e-05, + "loss": 2.4772, + "step": 428700 + }, + { + "epoch": 0.27, + "learning_rate": 3.63016330451489e-05, + "loss": 2.467, + "step": 428800 + }, + { + "epoch": 0.27, + "learning_rate": 3.629843099583734e-05, + "loss": 2.4587, + "step": 428900 + }, + { + "epoch": 0.27, + "learning_rate": 3.629522894652578e-05, + "loss": 2.4758, + "step": 429000 + }, + { + "epoch": 0.27, + "eval_loss": 2.494337320327759, + "eval_runtime": 178.7881, + "eval_samples_per_second": 55.932, + "eval_steps_per_second": 3.496, + "step": 429000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6292026897214224e-05, + "loss": 2.4677, + "step": 429100 + }, + { + "epoch": 0.27, + "learning_rate": 3.6288824847902657e-05, + "loss": 2.4757, + "step": 429200 + }, + { + "epoch": 0.27, + "learning_rate": 3.62856227985911e-05, + "loss": 2.4604, + "step": 429300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6282420749279536e-05, + "loss": 2.4735, + "step": 429400 + }, + { + "epoch": 0.27, + "learning_rate": 3.627921869996798e-05, + "loss": 2.4672, + "step": 429500 + }, + { + "epoch": 0.27, + "learning_rate": 3.627601665065642e-05, + "loss": 2.4663, + "step": 429600 + }, + { + "epoch": 0.28, + "learning_rate": 3.627281460134486e-05, + "loss": 2.4685, + "step": 429700 + }, + { + "epoch": 0.28, + "learning_rate": 3.626961255203331e-05, + "loss": 2.4663, + "step": 429800 + }, + { + "epoch": 0.28, + "learning_rate": 3.626641050272174e-05, + "loss": 2.4527, + "step": 429900 + }, + { + "epoch": 0.28, + "learning_rate": 3.626320845341019e-05, + "loss": 2.4643, + "step": 430000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4952518939971924, + "eval_runtime": 177.0684, + "eval_samples_per_second": 56.475, + "eval_steps_per_second": 3.53, + "step": 430000 + }, + { + "epoch": 0.28, + "learning_rate": 3.626000640409862e-05, + "loss": 2.4684, + "step": 430100 + }, + { + "epoch": 0.28, + "learning_rate": 3.625680435478707e-05, + "loss": 2.4798, + "step": 430200 + }, + { + "epoch": 0.28, + "learning_rate": 3.62536023054755e-05, + "loss": 2.4671, + "step": 430300 + }, + { + "epoch": 0.28, + "learning_rate": 3.625040025616395e-05, + "loss": 2.4579, + "step": 430400 + }, + { + "epoch": 0.28, + "learning_rate": 3.624719820685239e-05, + "loss": 2.4635, + "step": 430500 + }, + { + "epoch": 0.28, + "learning_rate": 3.6243996157540826e-05, + "loss": 2.4713, + "step": 430600 + }, + { + "epoch": 0.28, + "learning_rate": 3.624079410822927e-05, + "loss": 2.4734, + "step": 430700 + }, + { + "epoch": 0.28, + "learning_rate": 3.6237592058917706e-05, + "loss": 2.476, + "step": 430800 + }, + { + "epoch": 0.28, + "learning_rate": 3.623439000960615e-05, + "loss": 2.4674, + "step": 430900 + }, + { + "epoch": 0.28, + "learning_rate": 3.6231187960294585e-05, + "loss": 2.4588, + "step": 431000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4956154823303223, + "eval_runtime": 177.2162, + "eval_samples_per_second": 56.428, + "eval_steps_per_second": 3.527, + "step": 431000 + }, + { + "epoch": 0.28, + "learning_rate": 3.622798591098303e-05, + "loss": 2.4642, + "step": 431100 + }, + { + "epoch": 0.28, + "learning_rate": 3.622478386167147e-05, + "loss": 2.4684, + "step": 431200 + }, + { + "epoch": 0.28, + "learning_rate": 3.622158181235991e-05, + "loss": 2.4802, + "step": 431300 + }, + { + "epoch": 0.28, + "learning_rate": 3.621837976304836e-05, + "loss": 2.4736, + "step": 431400 + }, + { + "epoch": 0.28, + "learning_rate": 3.621517771373679e-05, + "loss": 2.464, + "step": 431500 + }, + { + "epoch": 0.28, + "learning_rate": 3.621197566442524e-05, + "loss": 2.466, + "step": 431600 + }, + { + "epoch": 0.28, + "learning_rate": 3.620877361511367e-05, + "loss": 2.4719, + "step": 431700 + }, + { + "epoch": 0.28, + "learning_rate": 3.620557156580212e-05, + "loss": 2.4805, + "step": 431800 + }, + { + "epoch": 0.28, + "learning_rate": 3.620236951649055e-05, + "loss": 2.4887, + "step": 431900 + }, + { + "epoch": 0.28, + "learning_rate": 3.6199167467178996e-05, + "loss": 2.4649, + "step": 432000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4944067001342773, + "eval_runtime": 177.969, + "eval_samples_per_second": 56.19, + "eval_steps_per_second": 3.512, + "step": 432000 + }, + { + "epoch": 0.28, + "learning_rate": 3.6195965417867436e-05, + "loss": 2.4812, + "step": 432100 + }, + { + "epoch": 0.28, + "learning_rate": 3.6192763368555876e-05, + "loss": 2.4749, + "step": 432200 + }, + { + "epoch": 0.28, + "learning_rate": 3.618956131924432e-05, + "loss": 2.4669, + "step": 432300 + }, + { + "epoch": 0.28, + "learning_rate": 3.6186359269932755e-05, + "loss": 2.4679, + "step": 432400 + }, + { + "epoch": 0.28, + "learning_rate": 3.61831572206212e-05, + "loss": 2.4874, + "step": 432500 + }, + { + "epoch": 0.28, + "learning_rate": 3.6179955171309635e-05, + "loss": 2.4755, + "step": 432600 + }, + { + "epoch": 0.28, + "learning_rate": 3.617675312199808e-05, + "loss": 2.4708, + "step": 432700 + }, + { + "epoch": 0.28, + "learning_rate": 3.617355107268652e-05, + "loss": 2.4744, + "step": 432800 + }, + { + "epoch": 0.28, + "learning_rate": 3.617034902337496e-05, + "loss": 2.4797, + "step": 432900 + }, + { + "epoch": 0.28, + "learning_rate": 3.616714697406341e-05, + "loss": 2.4551, + "step": 433000 + }, + { + "epoch": 0.28, + "eval_loss": 2.495239734649658, + "eval_runtime": 178.15, + "eval_samples_per_second": 56.132, + "eval_steps_per_second": 3.508, + "step": 433000 + }, + { + "epoch": 0.28, + "learning_rate": 3.616394492475184e-05, + "loss": 2.4559, + "step": 433100 + }, + { + "epoch": 0.28, + "learning_rate": 3.616074287544029e-05, + "loss": 2.4529, + "step": 433200 + }, + { + "epoch": 0.28, + "learning_rate": 3.615754082612872e-05, + "loss": 2.4838, + "step": 433300 + }, + { + "epoch": 0.28, + "learning_rate": 3.6154338776817166e-05, + "loss": 2.4913, + "step": 433400 + }, + { + "epoch": 0.28, + "learning_rate": 3.6151136727505606e-05, + "loss": 2.4775, + "step": 433500 + }, + { + "epoch": 0.28, + "learning_rate": 3.6147934678194046e-05, + "loss": 2.4805, + "step": 433600 + }, + { + "epoch": 0.28, + "learning_rate": 3.6144732628882485e-05, + "loss": 2.4598, + "step": 433700 + }, + { + "epoch": 0.28, + "learning_rate": 3.6141530579570925e-05, + "loss": 2.4719, + "step": 433800 + }, + { + "epoch": 0.28, + "learning_rate": 3.613832853025937e-05, + "loss": 2.4614, + "step": 433900 + }, + { + "epoch": 0.28, + "learning_rate": 3.6135126480947805e-05, + "loss": 2.4796, + "step": 434000 + }, + { + "epoch": 0.28, + "eval_loss": 2.493349313735962, + "eval_runtime": 178.1065, + "eval_samples_per_second": 56.146, + "eval_steps_per_second": 3.509, + "step": 434000 + }, + { + "epoch": 0.28, + "learning_rate": 3.613192443163625e-05, + "loss": 2.4692, + "step": 434100 + }, + { + "epoch": 0.28, + "learning_rate": 3.6128722382324684e-05, + "loss": 2.472, + "step": 434200 + }, + { + "epoch": 0.28, + "learning_rate": 3.612552033301313e-05, + "loss": 2.4806, + "step": 434300 + }, + { + "epoch": 0.28, + "learning_rate": 3.612231828370157e-05, + "loss": 2.5066, + "step": 434400 + }, + { + "epoch": 0.28, + "learning_rate": 3.611911623439001e-05, + "loss": 2.4824, + "step": 434500 + }, + { + "epoch": 0.28, + "learning_rate": 3.611591418507846e-05, + "loss": 2.4553, + "step": 434600 + }, + { + "epoch": 0.28, + "learning_rate": 3.611271213576689e-05, + "loss": 2.4641, + "step": 434700 + }, + { + "epoch": 0.28, + "learning_rate": 3.6109510086455336e-05, + "loss": 2.4703, + "step": 434800 + }, + { + "epoch": 0.28, + "learning_rate": 3.610630803714377e-05, + "loss": 2.4885, + "step": 434900 + }, + { + "epoch": 0.28, + "learning_rate": 3.6103105987832216e-05, + "loss": 2.4675, + "step": 435000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4942822456359863, + "eval_runtime": 177.9046, + "eval_samples_per_second": 56.21, + "eval_steps_per_second": 3.513, + "step": 435000 + }, + { + "epoch": 0.28, + "learning_rate": 3.6099903938520655e-05, + "loss": 2.4845, + "step": 435100 + }, + { + "epoch": 0.28, + "learning_rate": 3.6096701889209095e-05, + "loss": 2.4817, + "step": 435200 + }, + { + "epoch": 0.28, + "learning_rate": 3.6093499839897535e-05, + "loss": 2.4671, + "step": 435300 + }, + { + "epoch": 0.28, + "learning_rate": 3.6090297790585975e-05, + "loss": 2.4666, + "step": 435400 + }, + { + "epoch": 0.28, + "learning_rate": 3.608709574127442e-05, + "loss": 2.4653, + "step": 435500 + }, + { + "epoch": 0.28, + "learning_rate": 3.6083893691962854e-05, + "loss": 2.4874, + "step": 435600 + }, + { + "epoch": 0.28, + "learning_rate": 3.60806916426513e-05, + "loss": 2.4933, + "step": 435700 + }, + { + "epoch": 0.28, + "learning_rate": 3.607748959333974e-05, + "loss": 2.4735, + "step": 435800 + }, + { + "epoch": 0.28, + "learning_rate": 3.607428754402818e-05, + "loss": 2.4854, + "step": 435900 + }, + { + "epoch": 0.28, + "learning_rate": 3.607108549471662e-05, + "loss": 2.4806, + "step": 436000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4951186180114746, + "eval_runtime": 177.2138, + "eval_samples_per_second": 56.429, + "eval_steps_per_second": 3.527, + "step": 436000 + }, + { + "epoch": 0.28, + "learning_rate": 3.606788344540506e-05, + "loss": 2.4915, + "step": 436100 + }, + { + "epoch": 0.28, + "learning_rate": 3.60646813960935e-05, + "loss": 2.4785, + "step": 436200 + }, + { + "epoch": 0.28, + "learning_rate": 3.606147934678194e-05, + "loss": 2.4932, + "step": 436300 + }, + { + "epoch": 0.28, + "learning_rate": 3.6058277297470385e-05, + "loss": 2.4729, + "step": 436400 + }, + { + "epoch": 0.28, + "learning_rate": 3.6055075248158825e-05, + "loss": 2.5004, + "step": 436500 + }, + { + "epoch": 0.28, + "learning_rate": 3.6051873198847265e-05, + "loss": 2.4784, + "step": 436600 + }, + { + "epoch": 0.28, + "learning_rate": 3.6048671149535705e-05, + "loss": 2.4774, + "step": 436700 + }, + { + "epoch": 0.28, + "learning_rate": 3.6045469100224144e-05, + "loss": 2.4783, + "step": 436800 + }, + { + "epoch": 0.28, + "learning_rate": 3.6042267050912584e-05, + "loss": 2.4763, + "step": 436900 + }, + { + "epoch": 0.28, + "learning_rate": 3.6039065001601024e-05, + "loss": 2.4744, + "step": 437000 + }, + { + "epoch": 0.28, + "eval_loss": 2.494119644165039, + "eval_runtime": 177.0508, + "eval_samples_per_second": 56.481, + "eval_steps_per_second": 3.53, + "step": 437000 + }, + { + "epoch": 0.28, + "learning_rate": 3.603586295228947e-05, + "loss": 2.4783, + "step": 437100 + }, + { + "epoch": 0.28, + "learning_rate": 3.60326609029779e-05, + "loss": 2.4644, + "step": 437200 + }, + { + "epoch": 0.28, + "learning_rate": 3.602945885366635e-05, + "loss": 2.478, + "step": 437300 + }, + { + "epoch": 0.28, + "learning_rate": 3.602625680435479e-05, + "loss": 2.4572, + "step": 437400 + }, + { + "epoch": 0.28, + "learning_rate": 3.602305475504323e-05, + "loss": 2.4817, + "step": 437500 + }, + { + "epoch": 0.28, + "learning_rate": 3.601985270573167e-05, + "loss": 2.4597, + "step": 437600 + }, + { + "epoch": 0.28, + "learning_rate": 3.601665065642011e-05, + "loss": 2.4773, + "step": 437700 + }, + { + "epoch": 0.28, + "learning_rate": 3.601344860710855e-05, + "loss": 2.4608, + "step": 437800 + }, + { + "epoch": 0.28, + "learning_rate": 3.601024655779699e-05, + "loss": 2.4653, + "step": 437900 + }, + { + "epoch": 0.28, + "learning_rate": 3.6007044508485435e-05, + "loss": 2.4651, + "step": 438000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4944350719451904, + "eval_runtime": 176.4637, + "eval_samples_per_second": 56.669, + "eval_steps_per_second": 3.542, + "step": 438000 + }, + { + "epoch": 0.28, + "learning_rate": 3.6003842459173875e-05, + "loss": 2.4738, + "step": 438100 + }, + { + "epoch": 0.28, + "learning_rate": 3.6000640409862314e-05, + "loss": 2.4739, + "step": 438200 + }, + { + "epoch": 0.28, + "learning_rate": 3.5997438360550754e-05, + "loss": 2.473, + "step": 438300 + }, + { + "epoch": 0.28, + "learning_rate": 3.5994236311239194e-05, + "loss": 2.4737, + "step": 438400 + }, + { + "epoch": 0.28, + "learning_rate": 3.5991034261927634e-05, + "loss": 2.4668, + "step": 438500 + }, + { + "epoch": 0.28, + "learning_rate": 3.598783221261607e-05, + "loss": 2.4615, + "step": 438600 + }, + { + "epoch": 0.28, + "learning_rate": 3.598463016330452e-05, + "loss": 2.4614, + "step": 438700 + }, + { + "epoch": 0.28, + "learning_rate": 3.598142811399296e-05, + "loss": 2.4867, + "step": 438800 + }, + { + "epoch": 0.28, + "learning_rate": 3.59782260646814e-05, + "loss": 2.479, + "step": 438900 + }, + { + "epoch": 0.28, + "learning_rate": 3.597502401536984e-05, + "loss": 2.4717, + "step": 439000 + }, + { + "epoch": 0.28, + "eval_loss": 2.49422550201416, + "eval_runtime": 176.3748, + "eval_samples_per_second": 56.697, + "eval_steps_per_second": 3.544, + "step": 439000 + }, + { + "epoch": 0.28, + "learning_rate": 3.597182196605828e-05, + "loss": 2.4801, + "step": 439100 + }, + { + "epoch": 0.28, + "learning_rate": 3.596861991674672e-05, + "loss": 2.4789, + "step": 439200 + }, + { + "epoch": 0.28, + "learning_rate": 3.596541786743516e-05, + "loss": 2.4716, + "step": 439300 + }, + { + "epoch": 0.28, + "learning_rate": 3.59622158181236e-05, + "loss": 2.4616, + "step": 439400 + }, + { + "epoch": 0.28, + "learning_rate": 3.595901376881204e-05, + "loss": 2.4795, + "step": 439500 + }, + { + "epoch": 0.28, + "learning_rate": 3.5955811719500484e-05, + "loss": 2.482, + "step": 439600 + }, + { + "epoch": 0.28, + "learning_rate": 3.5952609670188924e-05, + "loss": 2.48, + "step": 439700 + }, + { + "epoch": 0.28, + "learning_rate": 3.5949407620877364e-05, + "loss": 2.4779, + "step": 439800 + }, + { + "epoch": 0.28, + "learning_rate": 3.5946205571565803e-05, + "loss": 2.471, + "step": 439900 + }, + { + "epoch": 0.28, + "learning_rate": 3.594300352225424e-05, + "loss": 2.4799, + "step": 440000 + }, + { + "epoch": 0.28, + "eval_loss": 2.494257926940918, + "eval_runtime": 177.2632, + "eval_samples_per_second": 56.413, + "eval_steps_per_second": 3.526, + "step": 440000 + }, + { + "epoch": 0.28, + "learning_rate": 3.593980147294268e-05, + "loss": 2.4841, + "step": 440100 + }, + { + "epoch": 0.28, + "learning_rate": 3.593659942363112e-05, + "loss": 2.4754, + "step": 440200 + }, + { + "epoch": 0.28, + "learning_rate": 3.593339737431957e-05, + "loss": 2.4856, + "step": 440300 + }, + { + "epoch": 0.28, + "learning_rate": 3.593019532500801e-05, + "loss": 2.4596, + "step": 440400 + }, + { + "epoch": 0.28, + "learning_rate": 3.592699327569645e-05, + "loss": 2.4729, + "step": 440500 + }, + { + "epoch": 0.28, + "learning_rate": 3.592379122638489e-05, + "loss": 2.4772, + "step": 440600 + }, + { + "epoch": 0.28, + "learning_rate": 3.592058917707333e-05, + "loss": 2.4668, + "step": 440700 + }, + { + "epoch": 0.28, + "learning_rate": 3.591738712776177e-05, + "loss": 2.4635, + "step": 440800 + }, + { + "epoch": 0.28, + "learning_rate": 3.591418507845021e-05, + "loss": 2.481, + "step": 440900 + }, + { + "epoch": 0.28, + "learning_rate": 3.591098302913865e-05, + "loss": 2.4808, + "step": 441000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4937944412231445, + "eval_runtime": 176.7561, + "eval_samples_per_second": 56.575, + "eval_steps_per_second": 3.536, + "step": 441000 + }, + { + "epoch": 0.28, + "learning_rate": 3.5907780979827094e-05, + "loss": 2.4661, + "step": 441100 + }, + { + "epoch": 0.28, + "learning_rate": 3.5904578930515534e-05, + "loss": 2.4612, + "step": 441200 + }, + { + "epoch": 0.28, + "learning_rate": 3.590137688120397e-05, + "loss": 2.4688, + "step": 441300 + }, + { + "epoch": 0.28, + "learning_rate": 3.589817483189241e-05, + "loss": 2.4639, + "step": 441400 + }, + { + "epoch": 0.28, + "learning_rate": 3.589497278258085e-05, + "loss": 2.4823, + "step": 441500 + }, + { + "epoch": 0.28, + "learning_rate": 3.589177073326929e-05, + "loss": 2.464, + "step": 441600 + }, + { + "epoch": 0.28, + "learning_rate": 3.588856868395773e-05, + "loss": 2.4781, + "step": 441700 + }, + { + "epoch": 0.28, + "learning_rate": 3.588536663464617e-05, + "loss": 2.476, + "step": 441800 + }, + { + "epoch": 0.28, + "learning_rate": 3.588216458533462e-05, + "loss": 2.4694, + "step": 441900 + }, + { + "epoch": 0.28, + "learning_rate": 3.587896253602306e-05, + "loss": 2.4791, + "step": 442000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4932377338409424, + "eval_runtime": 177.3647, + "eval_samples_per_second": 56.381, + "eval_steps_per_second": 3.524, + "step": 442000 + }, + { + "epoch": 0.28, + "learning_rate": 3.58757604867115e-05, + "loss": 2.4855, + "step": 442100 + }, + { + "epoch": 0.28, + "learning_rate": 3.587255843739994e-05, + "loss": 2.4666, + "step": 442200 + }, + { + "epoch": 0.28, + "learning_rate": 3.586935638808838e-05, + "loss": 2.4782, + "step": 442300 + }, + { + "epoch": 0.28, + "learning_rate": 3.586615433877682e-05, + "loss": 2.4878, + "step": 442400 + }, + { + "epoch": 0.28, + "learning_rate": 3.586295228946526e-05, + "loss": 2.4789, + "step": 442500 + }, + { + "epoch": 0.28, + "learning_rate": 3.58597502401537e-05, + "loss": 2.4673, + "step": 442600 + }, + { + "epoch": 0.28, + "learning_rate": 3.585654819084214e-05, + "loss": 2.4407, + "step": 442700 + }, + { + "epoch": 0.28, + "learning_rate": 3.585334614153058e-05, + "loss": 2.4581, + "step": 442800 + }, + { + "epoch": 0.28, + "learning_rate": 3.585014409221902e-05, + "loss": 2.4714, + "step": 442900 + }, + { + "epoch": 0.28, + "learning_rate": 3.584694204290746e-05, + "loss": 2.4655, + "step": 443000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4938127994537354, + "eval_runtime": 178.5522, + "eval_samples_per_second": 56.006, + "eval_steps_per_second": 3.5, + "step": 443000 + }, + { + "epoch": 0.28, + "learning_rate": 3.58437399935959e-05, + "loss": 2.4742, + "step": 443100 + }, + { + "epoch": 0.28, + "learning_rate": 3.584053794428434e-05, + "loss": 2.4513, + "step": 443200 + }, + { + "epoch": 0.28, + "learning_rate": 3.583733589497278e-05, + "loss": 2.4779, + "step": 443300 + }, + { + "epoch": 0.28, + "learning_rate": 3.583413384566123e-05, + "loss": 2.4659, + "step": 443400 + }, + { + "epoch": 0.28, + "learning_rate": 3.583093179634967e-05, + "loss": 2.4824, + "step": 443500 + }, + { + "epoch": 0.28, + "learning_rate": 3.582772974703811e-05, + "loss": 2.4671, + "step": 443600 + }, + { + "epoch": 0.28, + "learning_rate": 3.582452769772655e-05, + "loss": 2.4798, + "step": 443700 + }, + { + "epoch": 0.28, + "learning_rate": 3.582132564841499e-05, + "loss": 2.4541, + "step": 443800 + }, + { + "epoch": 0.28, + "learning_rate": 3.581812359910343e-05, + "loss": 2.4834, + "step": 443900 + }, + { + "epoch": 0.28, + "learning_rate": 3.5814921549791867e-05, + "loss": 2.468, + "step": 444000 + }, + { + "epoch": 0.28, + "eval_loss": 2.492299795150757, + "eval_runtime": 178.808, + "eval_samples_per_second": 55.926, + "eval_steps_per_second": 3.495, + "step": 444000 + }, + { + "epoch": 0.28, + "learning_rate": 3.581171950048031e-05, + "loss": 2.4625, + "step": 444100 + }, + { + "epoch": 0.28, + "learning_rate": 3.5808517451168746e-05, + "loss": 2.4731, + "step": 444200 + }, + { + "epoch": 0.28, + "learning_rate": 3.580531540185719e-05, + "loss": 2.4681, + "step": 444300 + }, + { + "epoch": 0.28, + "learning_rate": 3.580211335254563e-05, + "loss": 2.4664, + "step": 444400 + }, + { + "epoch": 0.28, + "learning_rate": 3.579891130323407e-05, + "loss": 2.4502, + "step": 444500 + }, + { + "epoch": 0.28, + "learning_rate": 3.579570925392251e-05, + "loss": 2.4593, + "step": 444600 + }, + { + "epoch": 0.28, + "learning_rate": 3.579250720461095e-05, + "loss": 2.4739, + "step": 444700 + }, + { + "epoch": 0.28, + "learning_rate": 3.578930515529939e-05, + "loss": 2.4491, + "step": 444800 + }, + { + "epoch": 0.28, + "learning_rate": 3.578610310598783e-05, + "loss": 2.4718, + "step": 444900 + }, + { + "epoch": 0.28, + "learning_rate": 3.578290105667628e-05, + "loss": 2.4559, + "step": 445000 + }, + { + "epoch": 0.28, + "eval_loss": 2.4955239295959473, + "eval_runtime": 177.5848, + "eval_samples_per_second": 56.311, + "eval_steps_per_second": 3.519, + "step": 445000 + }, + { + "epoch": 0.28, + "learning_rate": 3.577969900736472e-05, + "loss": 2.4634, + "step": 445100 + }, + { + "epoch": 0.28, + "learning_rate": 3.577649695805316e-05, + "loss": 2.4743, + "step": 445200 + }, + { + "epoch": 0.28, + "learning_rate": 3.57732949087416e-05, + "loss": 2.4673, + "step": 445300 + }, + { + "epoch": 0.29, + "learning_rate": 3.5770092859430036e-05, + "loss": 2.469, + "step": 445400 + }, + { + "epoch": 0.29, + "learning_rate": 3.5766890810118476e-05, + "loss": 2.4642, + "step": 445500 + }, + { + "epoch": 0.29, + "learning_rate": 3.5763688760806916e-05, + "loss": 2.4439, + "step": 445600 + }, + { + "epoch": 0.29, + "learning_rate": 3.576048671149536e-05, + "loss": 2.4756, + "step": 445700 + }, + { + "epoch": 0.29, + "learning_rate": 3.5757284662183795e-05, + "loss": 2.4627, + "step": 445800 + }, + { + "epoch": 0.29, + "learning_rate": 3.575408261287224e-05, + "loss": 2.457, + "step": 445900 + }, + { + "epoch": 0.29, + "learning_rate": 3.575088056356068e-05, + "loss": 2.4831, + "step": 446000 + }, + { + "epoch": 0.29, + "eval_loss": 2.49298095703125, + "eval_runtime": 176.8362, + "eval_samples_per_second": 56.55, + "eval_steps_per_second": 3.534, + "step": 446000 + }, + { + "epoch": 0.29, + "learning_rate": 3.574767851424912e-05, + "loss": 2.4626, + "step": 446100 + }, + { + "epoch": 0.29, + "learning_rate": 3.574447646493756e-05, + "loss": 2.4478, + "step": 446200 + }, + { + "epoch": 0.29, + "learning_rate": 3.5741274415626e-05, + "loss": 2.4569, + "step": 446300 + }, + { + "epoch": 0.29, + "learning_rate": 3.573807236631445e-05, + "loss": 2.4704, + "step": 446400 + }, + { + "epoch": 0.29, + "learning_rate": 3.573487031700288e-05, + "loss": 2.4993, + "step": 446500 + }, + { + "epoch": 0.29, + "learning_rate": 3.573166826769133e-05, + "loss": 2.4508, + "step": 446600 + }, + { + "epoch": 0.29, + "learning_rate": 3.572846621837977e-05, + "loss": 2.4603, + "step": 446700 + }, + { + "epoch": 0.29, + "learning_rate": 3.5725264169068206e-05, + "loss": 2.4818, + "step": 446800 + }, + { + "epoch": 0.29, + "learning_rate": 3.5722062119756646e-05, + "loss": 2.4838, + "step": 446900 + }, + { + "epoch": 0.29, + "learning_rate": 3.5718860070445086e-05, + "loss": 2.4736, + "step": 447000 + }, + { + "epoch": 0.29, + "eval_loss": 2.4949324131011963, + "eval_runtime": 177.2099, + "eval_samples_per_second": 56.43, + "eval_steps_per_second": 3.527, + "step": 447000 + }, + { + "epoch": 0.29, + "learning_rate": 3.5715658021133526e-05, + "loss": 2.4481, + "step": 447100 + }, + { + "epoch": 0.29, + "learning_rate": 3.5712455971821965e-05, + "loss": 2.4844, + "step": 447200 + }, + { + "epoch": 0.29, + "learning_rate": 3.570925392251041e-05, + "loss": 2.4795, + "step": 447300 + }, + { + "epoch": 0.29, + "learning_rate": 3.5706051873198845e-05, + "loss": 2.46, + "step": 447400 + }, + { + "epoch": 0.29, + "learning_rate": 3.570284982388729e-05, + "loss": 2.4569, + "step": 447500 + }, + { + "epoch": 0.29, + "learning_rate": 3.569964777457573e-05, + "loss": 2.4753, + "step": 447600 + }, + { + "epoch": 0.29, + "learning_rate": 3.569644572526417e-05, + "loss": 2.4581, + "step": 447700 + }, + { + "epoch": 0.29, + "learning_rate": 3.569324367595261e-05, + "loss": 2.4456, + "step": 447800 + }, + { + "epoch": 0.29, + "learning_rate": 3.569004162664105e-05, + "loss": 2.4647, + "step": 447900 + }, + { + "epoch": 0.29, + "learning_rate": 3.56868395773295e-05, + "loss": 2.4473, + "step": 448000 + }, + { + "epoch": 0.29, + "eval_loss": 2.496084690093994, + "eval_runtime": 173.9796, + "eval_samples_per_second": 57.478, + "eval_steps_per_second": 3.592, + "step": 448000 + }, + { + "epoch": 0.29, + "learning_rate": 3.568363752801793e-05, + "loss": 2.4676, + "step": 448100 + }, + { + "epoch": 0.29, + "learning_rate": 3.5680435478706376e-05, + "loss": 2.4709, + "step": 448200 + }, + { + "epoch": 0.29, + "learning_rate": 3.5677233429394816e-05, + "loss": 2.4867, + "step": 448300 + }, + { + "epoch": 0.29, + "learning_rate": 3.5674031380083256e-05, + "loss": 2.4506, + "step": 448400 + }, + { + "epoch": 0.29, + "learning_rate": 3.5670829330771695e-05, + "loss": 2.4687, + "step": 448500 + }, + { + "epoch": 0.29, + "learning_rate": 3.5667627281460135e-05, + "loss": 2.4423, + "step": 448600 + }, + { + "epoch": 0.29, + "learning_rate": 3.566442523214858e-05, + "loss": 2.4716, + "step": 448700 + }, + { + "epoch": 0.29, + "learning_rate": 3.5661223182837015e-05, + "loss": 2.4638, + "step": 448800 + }, + { + "epoch": 0.29, + "learning_rate": 3.565802113352546e-05, + "loss": 2.4549, + "step": 448900 + }, + { + "epoch": 0.29, + "learning_rate": 3.5654819084213894e-05, + "loss": 2.4655, + "step": 449000 + }, + { + "epoch": 0.29, + "eval_loss": 2.494385242462158, + "eval_runtime": 173.7929, + "eval_samples_per_second": 57.54, + "eval_steps_per_second": 3.596, + "step": 449000 + } + ], + "max_steps": 1562500, + "num_train_epochs": 1, + "total_flos": 1.229824204996608e+19, + "trial_name": null, + "trial_params": null +}