{ "best_metric": 1.3208675384521484, "best_model_checkpoint": "saved_model/c2s_jan2025/checkpoint-17636", "epoch": 1.9999716493018642, "eval_steps": 500, "global_step": 35272, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": NaN, "learning_rate": 0.0, "loss": 75.2365, "step": 1 }, { "epoch": 0.0, "grad_norm": 17.601116180419922, "learning_rate": 2.5e-06, "loss": 73.7197, "step": 10 }, { "epoch": 0.0, "grad_norm": 14.332148551940918, "learning_rate": 7.5e-06, "loss": 73.8353, "step": 20 }, { "epoch": 0.0, "grad_norm": NaN, "learning_rate": 1.1000000000000001e-05, "loss": 71.5395, "step": 30 }, { "epoch": 0.0, "grad_norm": 12.998903274536133, "learning_rate": 1.6000000000000003e-05, "loss": 70.4902, "step": 40 }, { "epoch": 0.0, "grad_norm": 13.739825248718262, "learning_rate": 2.05e-05, "loss": 68.2056, "step": 50 }, { "epoch": 0.0, "grad_norm": 28.420326232910156, "learning_rate": 2.5500000000000003e-05, "loss": 63.6571, "step": 60 }, { "epoch": 0.0, "grad_norm": 24.43354034423828, "learning_rate": 3.05e-05, "loss": 56.1449, "step": 70 }, { "epoch": 0.0, "grad_norm": 29.866273880004883, "learning_rate": 3.55e-05, "loss": 37.3549, "step": 80 }, { "epoch": 0.01, "grad_norm": 12.376081466674805, "learning_rate": 4.05e-05, "loss": 15.4044, "step": 90 }, { "epoch": 0.01, "grad_norm": 6.918032646179199, "learning_rate": 4.55e-05, "loss": 7.8667, "step": 100 }, { "epoch": 0.01, "grad_norm": 6.051260471343994, "learning_rate": 5.05e-05, "loss": 5.5636, "step": 110 }, { "epoch": 0.01, "grad_norm": 9.028839111328125, "learning_rate": 5.550000000000001e-05, "loss": 4.7794, "step": 120 }, { "epoch": 0.01, "grad_norm": 6.544124603271484, "learning_rate": 6.05e-05, "loss": 4.2496, "step": 130 }, { "epoch": 0.01, "grad_norm": 7.263061046600342, "learning_rate": 6.55e-05, "loss": 3.9991, "step": 140 }, { "epoch": 0.01, "grad_norm": 6.651381015777588, "learning_rate": 7.05e-05, "loss": 3.7571, "step": 150 }, { "epoch": 0.01, "grad_norm": 6.244420528411865, "learning_rate": 7.55e-05, "loss": 3.6719, "step": 160 }, { "epoch": 0.01, "grad_norm": 7.223423957824707, "learning_rate": 8.05e-05, "loss": 3.5676, "step": 170 }, { "epoch": 0.01, "grad_norm": 5.794151306152344, "learning_rate": 8.55e-05, "loss": 3.4162, "step": 180 }, { "epoch": 0.01, "grad_norm": 6.603245735168457, "learning_rate": 9.05e-05, "loss": 3.365, "step": 190 }, { "epoch": 0.01, "grad_norm": 5.7261199951171875, "learning_rate": 9.55e-05, "loss": 3.2593, "step": 200 }, { "epoch": 0.01, "grad_norm": 5.978001594543457, "learning_rate": 9.999943233424161e-05, "loss": 3.245, "step": 210 }, { "epoch": 0.01, "grad_norm": 5.7439374923706055, "learning_rate": 9.999375567665759e-05, "loss": 3.1241, "step": 220 }, { "epoch": 0.01, "grad_norm": 5.832992076873779, "learning_rate": 9.998807901907357e-05, "loss": 3.19, "step": 230 }, { "epoch": 0.01, "grad_norm": 5.574953556060791, "learning_rate": 9.998240236148956e-05, "loss": 3.0943, "step": 240 }, { "epoch": 0.01, "grad_norm": 6.619871139526367, "learning_rate": 9.997672570390554e-05, "loss": 3.1124, "step": 250 }, { "epoch": 0.01, "grad_norm": 5.600893497467041, "learning_rate": 9.997104904632152e-05, "loss": 3.0763, "step": 260 }, { "epoch": 0.02, "grad_norm": 4.82636833190918, "learning_rate": 9.996537238873752e-05, "loss": 3.0789, "step": 270 }, { "epoch": 0.02, "grad_norm": 5.087795734405518, "learning_rate": 9.99596957311535e-05, "loss": 2.9761, "step": 280 }, { "epoch": 0.02, "grad_norm": 4.215710639953613, "learning_rate": 9.995401907356948e-05, "loss": 3.0284, "step": 290 }, { "epoch": 0.02, "grad_norm": 5.010191440582275, "learning_rate": 9.994834241598547e-05, "loss": 3.0684, "step": 300 }, { "epoch": 0.02, "grad_norm": 4.843348026275635, "learning_rate": 9.994266575840145e-05, "loss": 2.981, "step": 310 }, { "epoch": 0.02, "grad_norm": 5.19581413269043, "learning_rate": 9.993698910081744e-05, "loss": 3.0014, "step": 320 }, { "epoch": 0.02, "grad_norm": 4.199511528015137, "learning_rate": 9.993131244323342e-05, "loss": 2.9341, "step": 330 }, { "epoch": 0.02, "grad_norm": 4.261699199676514, "learning_rate": 9.992563578564942e-05, "loss": 2.9334, "step": 340 }, { "epoch": 0.02, "grad_norm": 4.278054714202881, "learning_rate": 9.99199591280654e-05, "loss": 2.9098, "step": 350 }, { "epoch": 0.02, "grad_norm": 4.84391450881958, "learning_rate": 9.991428247048139e-05, "loss": 2.8694, "step": 360 }, { "epoch": 0.02, "grad_norm": 4.222662925720215, "learning_rate": 9.990860581289737e-05, "loss": 2.8555, "step": 370 }, { "epoch": 0.02, "grad_norm": 4.368465423583984, "learning_rate": 9.990292915531335e-05, "loss": 2.8884, "step": 380 }, { "epoch": 0.02, "grad_norm": 4.3050217628479, "learning_rate": 9.989725249772935e-05, "loss": 2.8811, "step": 390 }, { "epoch": 0.02, "grad_norm": 5.0246453285217285, "learning_rate": 9.989157584014533e-05, "loss": 2.822, "step": 400 }, { "epoch": 0.02, "grad_norm": 4.739973068237305, "learning_rate": 9.988589918256132e-05, "loss": 2.795, "step": 410 }, { "epoch": 0.02, "grad_norm": 4.35971736907959, "learning_rate": 9.98802225249773e-05, "loss": 2.7265, "step": 420 }, { "epoch": 0.02, "grad_norm": 4.430526256561279, "learning_rate": 9.987454586739329e-05, "loss": 2.7514, "step": 430 }, { "epoch": 0.02, "grad_norm": 4.301177501678467, "learning_rate": 9.986886920980927e-05, "loss": 2.6568, "step": 440 }, { "epoch": 0.03, "grad_norm": 3.539289712905884, "learning_rate": 9.986319255222527e-05, "loss": 2.643, "step": 450 }, { "epoch": 0.03, "grad_norm": 3.6616992950439453, "learning_rate": 9.985751589464125e-05, "loss": 2.5689, "step": 460 }, { "epoch": 0.03, "grad_norm": 3.811377763748169, "learning_rate": 9.985183923705723e-05, "loss": 2.6046, "step": 470 }, { "epoch": 0.03, "grad_norm": 3.795743942260742, "learning_rate": 9.984616257947322e-05, "loss": 2.4734, "step": 480 }, { "epoch": 0.03, "grad_norm": 2.9367635250091553, "learning_rate": 9.98404859218892e-05, "loss": 2.3111, "step": 490 }, { "epoch": 0.03, "grad_norm": 3.0315542221069336, "learning_rate": 9.983480926430518e-05, "loss": 2.1928, "step": 500 }, { "epoch": 0.03, "grad_norm": 2.4590466022491455, "learning_rate": 9.982913260672116e-05, "loss": 2.0721, "step": 510 }, { "epoch": 0.03, "grad_norm": 2.110217332839966, "learning_rate": 9.982345594913715e-05, "loss": 1.9357, "step": 520 }, { "epoch": 0.03, "grad_norm": 2.280344247817993, "learning_rate": 9.981777929155313e-05, "loss": 1.8419, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.9060053825378418, "learning_rate": 9.981210263396913e-05, "loss": 1.8254, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.814399003982544, "learning_rate": 9.98064259763851e-05, "loss": 1.7558, "step": 550 }, { "epoch": 0.03, "grad_norm": 1.9476678371429443, "learning_rate": 9.980074931880109e-05, "loss": 1.7916, "step": 560 }, { "epoch": 0.03, "grad_norm": 1.7333909273147583, "learning_rate": 9.979507266121708e-05, "loss": 1.7442, "step": 570 }, { "epoch": 0.03, "grad_norm": 1.657645344734192, "learning_rate": 9.978939600363306e-05, "loss": 1.6894, "step": 580 }, { "epoch": 0.03, "grad_norm": 1.7111319303512573, "learning_rate": 9.978371934604905e-05, "loss": 1.6551, "step": 590 }, { "epoch": 0.03, "grad_norm": 1.672993540763855, "learning_rate": 9.977804268846503e-05, "loss": 1.6807, "step": 600 }, { "epoch": 0.03, "grad_norm": 1.8577607870101929, "learning_rate": 9.977236603088103e-05, "loss": 1.6378, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.7955611944198608, "learning_rate": 9.9766689373297e-05, "loss": 1.6549, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.683457851409912, "learning_rate": 9.9761012715713e-05, "loss": 1.6408, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.5391137599945068, "learning_rate": 9.975533605812898e-05, "loss": 1.6117, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.6177318096160889, "learning_rate": 9.974965940054496e-05, "loss": 1.6058, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.5189566612243652, "learning_rate": 9.974398274296095e-05, "loss": 1.5888, "step": 660 }, { "epoch": 0.04, "grad_norm": 1.513979434967041, "learning_rate": 9.973830608537693e-05, "loss": 1.57, "step": 670 }, { "epoch": 0.04, "grad_norm": 1.529478669166565, "learning_rate": 9.973262942779293e-05, "loss": 1.585, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.6714704036712646, "learning_rate": 9.97269527702089e-05, "loss": 1.5865, "step": 690 }, { "epoch": 0.04, "grad_norm": 1.4285169839859009, "learning_rate": 9.97212761126249e-05, "loss": 1.6109, "step": 700 }, { "epoch": 0.04, "grad_norm": 1.5364874601364136, "learning_rate": 9.971559945504088e-05, "loss": 1.5768, "step": 710 }, { "epoch": 0.04, "grad_norm": 1.5872305631637573, "learning_rate": 9.970992279745687e-05, "loss": 1.5879, "step": 720 }, { "epoch": 0.04, "grad_norm": 1.5019173622131348, "learning_rate": 9.970424613987285e-05, "loss": 1.5452, "step": 730 }, { "epoch": 0.04, "grad_norm": 1.5767675638198853, "learning_rate": 9.969856948228883e-05, "loss": 1.5838, "step": 740 }, { "epoch": 0.04, "grad_norm": 1.429320216178894, "learning_rate": 9.969289282470481e-05, "loss": 1.5672, "step": 750 }, { "epoch": 0.04, "grad_norm": 1.495836853981018, "learning_rate": 9.968721616712081e-05, "loss": 1.5678, "step": 760 }, { "epoch": 0.04, "grad_norm": 1.7393364906311035, "learning_rate": 9.968153950953679e-05, "loss": 1.5624, "step": 770 }, { "epoch": 0.04, "grad_norm": 1.5137113332748413, "learning_rate": 9.967586285195277e-05, "loss": 1.5604, "step": 780 }, { "epoch": 0.04, "grad_norm": 1.5822861194610596, "learning_rate": 9.967018619436876e-05, "loss": 1.5298, "step": 790 }, { "epoch": 0.05, "grad_norm": 1.3643122911453247, "learning_rate": 9.966450953678474e-05, "loss": 1.4915, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.3700604438781738, "learning_rate": 9.965883287920073e-05, "loss": 1.5018, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.3062816858291626, "learning_rate": 9.965315622161671e-05, "loss": 1.4884, "step": 820 }, { "epoch": 0.05, "grad_norm": 1.462350845336914, "learning_rate": 9.96474795640327e-05, "loss": 1.5164, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.4391170740127563, "learning_rate": 9.964180290644869e-05, "loss": 1.4902, "step": 840 }, { "epoch": 0.05, "grad_norm": 1.4623042345046997, "learning_rate": 9.963612624886467e-05, "loss": 1.4873, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.5488314628601074, "learning_rate": 9.963044959128066e-05, "loss": 1.5155, "step": 860 }, { "epoch": 0.05, "grad_norm": 1.3980971574783325, "learning_rate": 9.962477293369664e-05, "loss": 1.5174, "step": 870 }, { "epoch": 0.05, "grad_norm": 1.369452714920044, "learning_rate": 9.961909627611263e-05, "loss": 1.4942, "step": 880 }, { "epoch": 0.05, "grad_norm": 1.4750287532806396, "learning_rate": 9.961341961852861e-05, "loss": 1.4891, "step": 890 }, { "epoch": 0.05, "grad_norm": 1.3880773782730103, "learning_rate": 9.960774296094461e-05, "loss": 1.5045, "step": 900 }, { "epoch": 0.05, "grad_norm": 1.3842027187347412, "learning_rate": 9.960206630336059e-05, "loss": 1.4988, "step": 910 }, { "epoch": 0.05, "grad_norm": 1.3961899280548096, "learning_rate": 9.959638964577657e-05, "loss": 1.526, "step": 920 }, { "epoch": 0.05, "grad_norm": 1.3435640335083008, "learning_rate": 9.959071298819256e-05, "loss": 1.4984, "step": 930 }, { "epoch": 0.05, "grad_norm": 1.4240602254867554, "learning_rate": 9.958503633060854e-05, "loss": 1.4739, "step": 940 }, { "epoch": 0.05, "grad_norm": 1.476125717163086, "learning_rate": 9.957935967302453e-05, "loss": 1.4586, "step": 950 }, { "epoch": 0.05, "grad_norm": 1.3862614631652832, "learning_rate": 9.957368301544051e-05, "loss": 1.4851, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.3513296842575073, "learning_rate": 9.956800635785651e-05, "loss": 1.4765, "step": 970 }, { "epoch": 0.06, "grad_norm": 1.3960676193237305, "learning_rate": 9.956232970027249e-05, "loss": 1.4854, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.3700183629989624, "learning_rate": 9.955665304268847e-05, "loss": 1.4588, "step": 990 }, { "epoch": 0.06, "grad_norm": 1.441102147102356, "learning_rate": 9.955097638510446e-05, "loss": 1.462, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.3054773807525635, "learning_rate": 9.954529972752044e-05, "loss": 1.4755, "step": 1010 }, { "epoch": 0.06, "grad_norm": 1.3567821979522705, "learning_rate": 9.953962306993642e-05, "loss": 1.4505, "step": 1020 }, { "epoch": 0.06, "grad_norm": 1.3063576221466064, "learning_rate": 9.95339464123524e-05, "loss": 1.478, "step": 1030 }, { "epoch": 0.06, "grad_norm": 1.301519751548767, "learning_rate": 9.95282697547684e-05, "loss": 1.4347, "step": 1040 }, { "epoch": 0.06, "grad_norm": 1.4010707139968872, "learning_rate": 9.952259309718438e-05, "loss": 1.4355, "step": 1050 }, { "epoch": 0.06, "grad_norm": 1.3830599784851074, "learning_rate": 9.951691643960037e-05, "loss": 1.467, "step": 1060 }, { "epoch": 0.06, "grad_norm": 1.2570585012435913, "learning_rate": 9.951123978201635e-05, "loss": 1.4781, "step": 1070 }, { "epoch": 0.06, "grad_norm": 1.3248127698898315, "learning_rate": 9.950556312443234e-05, "loss": 1.4363, "step": 1080 }, { "epoch": 0.06, "grad_norm": 1.3226598501205444, "learning_rate": 9.949988646684832e-05, "loss": 1.4613, "step": 1090 }, { "epoch": 0.06, "grad_norm": 1.140259861946106, "learning_rate": 9.94942098092643e-05, "loss": 1.4284, "step": 1100 }, { "epoch": 0.06, "grad_norm": 1.2090774774551392, "learning_rate": 9.94885331516803e-05, "loss": 1.4481, "step": 1110 }, { "epoch": 0.06, "grad_norm": 1.191074252128601, "learning_rate": 9.948285649409628e-05, "loss": 1.4493, "step": 1120 }, { "epoch": 0.06, "grad_norm": 1.225002646446228, "learning_rate": 9.947717983651227e-05, "loss": 1.4592, "step": 1130 }, { "epoch": 0.06, "grad_norm": 1.3696708679199219, "learning_rate": 9.947150317892825e-05, "loss": 1.4407, "step": 1140 }, { "epoch": 0.07, "grad_norm": 1.3406765460968018, "learning_rate": 9.946582652134424e-05, "loss": 1.4228, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.354957938194275, "learning_rate": 9.946014986376022e-05, "loss": 1.4362, "step": 1160 }, { "epoch": 0.07, "grad_norm": 1.2042971849441528, "learning_rate": 9.945447320617622e-05, "loss": 1.4769, "step": 1170 }, { "epoch": 0.07, "grad_norm": 1.3324471712112427, "learning_rate": 9.94487965485922e-05, "loss": 1.4438, "step": 1180 }, { "epoch": 0.07, "grad_norm": 1.278115153312683, "learning_rate": 9.944311989100818e-05, "loss": 1.4399, "step": 1190 }, { "epoch": 0.07, "grad_norm": 1.267648458480835, "learning_rate": 9.943744323342417e-05, "loss": 1.4158, "step": 1200 }, { "epoch": 0.07, "grad_norm": 1.1339918375015259, "learning_rate": 9.943176657584015e-05, "loss": 1.4498, "step": 1210 }, { "epoch": 0.07, "grad_norm": 1.2202574014663696, "learning_rate": 9.942608991825614e-05, "loss": 1.4339, "step": 1220 }, { "epoch": 0.07, "grad_norm": 1.2328014373779297, "learning_rate": 9.942041326067212e-05, "loss": 1.4348, "step": 1230 }, { "epoch": 0.07, "grad_norm": 1.1617897748947144, "learning_rate": 9.941473660308812e-05, "loss": 1.412, "step": 1240 }, { "epoch": 0.07, "grad_norm": 1.2108471393585205, "learning_rate": 9.94090599455041e-05, "loss": 1.4324, "step": 1250 }, { "epoch": 0.07, "grad_norm": 1.190739393234253, "learning_rate": 9.940338328792008e-05, "loss": 1.3694, "step": 1260 }, { "epoch": 0.07, "grad_norm": 1.1757116317749023, "learning_rate": 9.939770663033606e-05, "loss": 1.418, "step": 1270 }, { "epoch": 0.07, "grad_norm": 1.1638460159301758, "learning_rate": 9.939202997275205e-05, "loss": 1.4174, "step": 1280 }, { "epoch": 0.07, "grad_norm": 1.210571527481079, "learning_rate": 9.938635331516803e-05, "loss": 1.4349, "step": 1290 }, { "epoch": 0.07, "grad_norm": 1.1896806955337524, "learning_rate": 9.938067665758401e-05, "loss": 1.41, "step": 1300 }, { "epoch": 0.07, "grad_norm": 1.3075348138809204, "learning_rate": 9.9375e-05, "loss": 1.4036, "step": 1310 }, { "epoch": 0.07, "grad_norm": 1.2618632316589355, "learning_rate": 9.936932334241598e-05, "loss": 1.446, "step": 1320 }, { "epoch": 0.08, "grad_norm": 1.1855167150497437, "learning_rate": 9.936364668483198e-05, "loss": 1.4341, "step": 1330 }, { "epoch": 0.08, "grad_norm": 1.2698819637298584, "learning_rate": 9.935797002724796e-05, "loss": 1.4053, "step": 1340 }, { "epoch": 0.08, "grad_norm": 1.2201569080352783, "learning_rate": 9.935229336966395e-05, "loss": 1.4174, "step": 1350 }, { "epoch": 0.08, "grad_norm": 1.307070016860962, "learning_rate": 9.934661671207993e-05, "loss": 1.4147, "step": 1360 }, { "epoch": 0.08, "grad_norm": 1.2106343507766724, "learning_rate": 9.934094005449591e-05, "loss": 1.3672, "step": 1370 }, { "epoch": 0.08, "grad_norm": 1.323914885520935, "learning_rate": 9.93352633969119e-05, "loss": 1.3932, "step": 1380 }, { "epoch": 0.08, "grad_norm": 1.2546958923339844, "learning_rate": 9.932958673932788e-05, "loss": 1.4205, "step": 1390 }, { "epoch": 0.08, "grad_norm": 1.1377944946289062, "learning_rate": 9.932391008174388e-05, "loss": 1.4253, "step": 1400 }, { "epoch": 0.08, "grad_norm": 1.1831437349319458, "learning_rate": 9.931823342415986e-05, "loss": 1.4195, "step": 1410 }, { "epoch": 0.08, "grad_norm": 1.1202658414840698, "learning_rate": 9.931255676657585e-05, "loss": 1.3935, "step": 1420 }, { "epoch": 0.08, "grad_norm": 1.1810835599899292, "learning_rate": 9.930688010899183e-05, "loss": 1.3804, "step": 1430 }, { "epoch": 0.08, "grad_norm": 1.2228286266326904, "learning_rate": 9.930120345140782e-05, "loss": 1.3884, "step": 1440 }, { "epoch": 0.08, "grad_norm": 1.2651171684265137, "learning_rate": 9.92955267938238e-05, "loss": 1.3962, "step": 1450 }, { "epoch": 0.08, "grad_norm": 1.8477009534835815, "learning_rate": 9.928985013623978e-05, "loss": 1.3733, "step": 1460 }, { "epoch": 0.08, "grad_norm": 1.1564894914627075, "learning_rate": 9.928417347865578e-05, "loss": 1.3848, "step": 1470 }, { "epoch": 0.08, "grad_norm": 1.2132364511489868, "learning_rate": 9.927849682107176e-05, "loss": 1.3956, "step": 1480 }, { "epoch": 0.08, "grad_norm": 1.191025972366333, "learning_rate": 9.927282016348775e-05, "loss": 1.4016, "step": 1490 }, { "epoch": 0.09, "grad_norm": 1.096326231956482, "learning_rate": 9.926714350590373e-05, "loss": 1.4115, "step": 1500 }, { "epoch": 0.09, "grad_norm": 1.1612204313278198, "learning_rate": 9.926146684831971e-05, "loss": 1.3886, "step": 1510 }, { "epoch": 0.09, "grad_norm": 1.1434056758880615, "learning_rate": 9.92557901907357e-05, "loss": 1.4206, "step": 1520 }, { "epoch": 0.09, "grad_norm": 1.1720579862594604, "learning_rate": 9.925011353315168e-05, "loss": 1.421, "step": 1530 }, { "epoch": 0.09, "grad_norm": 1.2896806001663208, "learning_rate": 9.924443687556766e-05, "loss": 1.3829, "step": 1540 }, { "epoch": 0.09, "grad_norm": 1.1180717945098877, "learning_rate": 9.923876021798364e-05, "loss": 1.3822, "step": 1550 }, { "epoch": 0.09, "grad_norm": 1.1078883409500122, "learning_rate": 9.923308356039964e-05, "loss": 1.3998, "step": 1560 }, { "epoch": 0.09, "grad_norm": 1.2296640872955322, "learning_rate": 9.922740690281562e-05, "loss": 1.3463, "step": 1570 }, { "epoch": 0.09, "grad_norm": 1.1031551361083984, "learning_rate": 9.922173024523161e-05, "loss": 1.3944, "step": 1580 }, { "epoch": 0.09, "grad_norm": 1.190901279449463, "learning_rate": 9.921605358764759e-05, "loss": 1.3996, "step": 1590 }, { "epoch": 0.09, "grad_norm": 1.2299946546554565, "learning_rate": 9.921037693006358e-05, "loss": 1.4188, "step": 1600 }, { "epoch": 0.09, "grad_norm": 1.0972840785980225, "learning_rate": 9.920470027247956e-05, "loss": 1.356, "step": 1610 }, { "epoch": 0.09, "grad_norm": 1.060860514640808, "learning_rate": 9.919902361489556e-05, "loss": 1.3576, "step": 1620 }, { "epoch": 0.09, "grad_norm": 1.0994012355804443, "learning_rate": 9.919334695731154e-05, "loss": 1.3573, "step": 1630 }, { "epoch": 0.09, "grad_norm": 1.2150131464004517, "learning_rate": 9.918767029972752e-05, "loss": 1.3651, "step": 1640 }, { "epoch": 0.09, "grad_norm": 1.1189353466033936, "learning_rate": 9.918199364214351e-05, "loss": 1.3459, "step": 1650 }, { "epoch": 0.09, "grad_norm": 1.272196888923645, "learning_rate": 9.917631698455949e-05, "loss": 1.3399, "step": 1660 }, { "epoch": 0.09, "grad_norm": 1.170344591140747, "learning_rate": 9.917064032697549e-05, "loss": 1.3671, "step": 1670 }, { "epoch": 0.1, "grad_norm": 1.2262731790542603, "learning_rate": 9.916496366939147e-05, "loss": 1.3833, "step": 1680 }, { "epoch": 0.1, "grad_norm": 1.109778642654419, "learning_rate": 9.915928701180746e-05, "loss": 1.3743, "step": 1690 }, { "epoch": 0.1, "grad_norm": 1.1340208053588867, "learning_rate": 9.915361035422344e-05, "loss": 1.3517, "step": 1700 }, { "epoch": 0.1, "grad_norm": 1.1593152284622192, "learning_rate": 9.914793369663943e-05, "loss": 1.3727, "step": 1710 }, { "epoch": 0.1, "grad_norm": 1.076449990272522, "learning_rate": 9.914225703905541e-05, "loss": 1.3439, "step": 1720 }, { "epoch": 0.1, "grad_norm": 1.1183741092681885, "learning_rate": 9.913658038147139e-05, "loss": 1.3379, "step": 1730 }, { "epoch": 0.1, "grad_norm": 1.0741314888000488, "learning_rate": 9.913090372388739e-05, "loss": 1.3744, "step": 1740 }, { "epoch": 0.1, "grad_norm": 1.1001273393630981, "learning_rate": 9.912522706630337e-05, "loss": 1.3605, "step": 1750 }, { "epoch": 0.1, "grad_norm": 1.2605596780776978, "learning_rate": 9.911955040871936e-05, "loss": 1.374, "step": 1760 }, { "epoch": 0.1, "grad_norm": 1.053250789642334, "learning_rate": 9.911387375113534e-05, "loss": 1.4, "step": 1770 }, { "epoch": 0.1, "grad_norm": 1.10549795627594, "learning_rate": 9.910819709355132e-05, "loss": 1.3515, "step": 1780 }, { "epoch": 0.1, "grad_norm": 1.0442265272140503, "learning_rate": 9.91025204359673e-05, "loss": 1.3694, "step": 1790 }, { "epoch": 0.1, "grad_norm": 1.1511539220809937, "learning_rate": 9.909684377838329e-05, "loss": 1.3563, "step": 1800 }, { "epoch": 0.1, "grad_norm": 1.2317103147506714, "learning_rate": 9.909116712079927e-05, "loss": 1.359, "step": 1810 }, { "epoch": 0.1, "grad_norm": 1.132761001586914, "learning_rate": 9.908549046321525e-05, "loss": 1.3769, "step": 1820 }, { "epoch": 0.1, "grad_norm": 1.0808706283569336, "learning_rate": 9.907981380563125e-05, "loss": 1.3658, "step": 1830 }, { "epoch": 0.1, "grad_norm": 1.0977706909179688, "learning_rate": 9.907413714804723e-05, "loss": 1.3761, "step": 1840 }, { "epoch": 0.1, "grad_norm": 1.165120005607605, "learning_rate": 9.906846049046322e-05, "loss": 1.3644, "step": 1850 }, { "epoch": 0.11, "grad_norm": 1.1937764883041382, "learning_rate": 9.90627838328792e-05, "loss": 1.3589, "step": 1860 }, { "epoch": 0.11, "grad_norm": 1.1666409969329834, "learning_rate": 9.905710717529519e-05, "loss": 1.3522, "step": 1870 }, { "epoch": 0.11, "grad_norm": 1.072387933731079, "learning_rate": 9.905143051771117e-05, "loss": 1.3471, "step": 1880 }, { "epoch": 0.11, "grad_norm": 1.0204044580459595, "learning_rate": 9.904575386012717e-05, "loss": 1.3821, "step": 1890 }, { "epoch": 0.11, "grad_norm": 1.343366026878357, "learning_rate": 9.904007720254315e-05, "loss": 1.3558, "step": 1900 }, { "epoch": 0.11, "grad_norm": 1.0841014385223389, "learning_rate": 9.903440054495914e-05, "loss": 1.3585, "step": 1910 }, { "epoch": 0.11, "grad_norm": 1.0932461023330688, "learning_rate": 9.902872388737512e-05, "loss": 1.3549, "step": 1920 }, { "epoch": 0.11, "grad_norm": 1.0415698289871216, "learning_rate": 9.90230472297911e-05, "loss": 1.3729, "step": 1930 }, { "epoch": 0.11, "grad_norm": 1.0276919603347778, "learning_rate": 9.901737057220709e-05, "loss": 1.3505, "step": 1940 }, { "epoch": 0.11, "grad_norm": 1.190290093421936, "learning_rate": 9.901169391462307e-05, "loss": 1.3474, "step": 1950 }, { "epoch": 0.11, "grad_norm": 1.2268683910369873, "learning_rate": 9.900601725703907e-05, "loss": 1.3449, "step": 1960 }, { "epoch": 0.11, "grad_norm": 1.104063868522644, "learning_rate": 9.900034059945505e-05, "loss": 1.362, "step": 1970 }, { "epoch": 0.11, "grad_norm": 1.1939626932144165, "learning_rate": 9.899466394187104e-05, "loss": 1.3404, "step": 1980 }, { "epoch": 0.11, "grad_norm": 1.1183202266693115, "learning_rate": 9.898898728428702e-05, "loss": 1.3747, "step": 1990 }, { "epoch": 0.11, "grad_norm": 1.1427550315856934, "learning_rate": 9.898331062670301e-05, "loss": 1.323, "step": 2000 }, { "epoch": 0.11, "grad_norm": 1.0319037437438965, "learning_rate": 9.8977633969119e-05, "loss": 1.3564, "step": 2010 }, { "epoch": 0.11, "grad_norm": 1.1494090557098389, "learning_rate": 9.897195731153497e-05, "loss": 1.338, "step": 2020 }, { "epoch": 0.12, "grad_norm": 1.112302303314209, "learning_rate": 9.896628065395095e-05, "loss": 1.3362, "step": 2030 }, { "epoch": 0.12, "grad_norm": 1.2843081951141357, "learning_rate": 9.896060399636695e-05, "loss": 1.3636, "step": 2040 }, { "epoch": 0.12, "grad_norm": 1.1213219165802002, "learning_rate": 9.895492733878293e-05, "loss": 1.3115, "step": 2050 }, { "epoch": 0.12, "grad_norm": 1.1535017490386963, "learning_rate": 9.894925068119891e-05, "loss": 1.3385, "step": 2060 }, { "epoch": 0.12, "grad_norm": 1.0788516998291016, "learning_rate": 9.89435740236149e-05, "loss": 1.3603, "step": 2070 }, { "epoch": 0.12, "grad_norm": 1.1347218751907349, "learning_rate": 9.893789736603088e-05, "loss": 1.3606, "step": 2080 }, { "epoch": 0.12, "grad_norm": 1.1195112466812134, "learning_rate": 9.893222070844687e-05, "loss": 1.3422, "step": 2090 }, { "epoch": 0.12, "grad_norm": 1.0394469499588013, "learning_rate": 9.892654405086285e-05, "loss": 1.3517, "step": 2100 }, { "epoch": 0.12, "grad_norm": 1.1114802360534668, "learning_rate": 9.892086739327883e-05, "loss": 1.3232, "step": 2110 }, { "epoch": 0.12, "grad_norm": 1.0926640033721924, "learning_rate": 9.891519073569483e-05, "loss": 1.3521, "step": 2120 }, { "epoch": 0.12, "grad_norm": 1.1456726789474487, "learning_rate": 9.890951407811081e-05, "loss": 1.3254, "step": 2130 }, { "epoch": 0.12, "grad_norm": 1.0451672077178955, "learning_rate": 9.89038374205268e-05, "loss": 1.3217, "step": 2140 }, { "epoch": 0.12, "grad_norm": 1.1492669582366943, "learning_rate": 9.889816076294278e-05, "loss": 1.3368, "step": 2150 }, { "epoch": 0.12, "grad_norm": 1.0062178373336792, "learning_rate": 9.889248410535877e-05, "loss": 1.3204, "step": 2160 }, { "epoch": 0.12, "grad_norm": 1.0931153297424316, "learning_rate": 9.888680744777475e-05, "loss": 1.3494, "step": 2170 }, { "epoch": 0.12, "grad_norm": 1.0552350282669067, "learning_rate": 9.888113079019075e-05, "loss": 1.3345, "step": 2180 }, { "epoch": 0.12, "grad_norm": 1.1342849731445312, "learning_rate": 9.887545413260673e-05, "loss": 1.3174, "step": 2190 }, { "epoch": 0.12, "grad_norm": 1.043104887008667, "learning_rate": 9.886977747502271e-05, "loss": 1.3411, "step": 2200 }, { "epoch": 0.13, "grad_norm": 1.1673355102539062, "learning_rate": 9.88641008174387e-05, "loss": 1.3608, "step": 2210 }, { "epoch": 0.13, "grad_norm": 1.079847812652588, "learning_rate": 9.885842415985468e-05, "loss": 1.3346, "step": 2220 }, { "epoch": 0.13, "grad_norm": 1.097606897354126, "learning_rate": 9.885274750227067e-05, "loss": 1.2973, "step": 2230 }, { "epoch": 0.13, "grad_norm": 1.0822465419769287, "learning_rate": 9.884707084468665e-05, "loss": 1.3169, "step": 2240 }, { "epoch": 0.13, "grad_norm": 1.0520752668380737, "learning_rate": 9.884139418710265e-05, "loss": 1.3149, "step": 2250 }, { "epoch": 0.13, "grad_norm": 1.1001169681549072, "learning_rate": 9.883571752951863e-05, "loss": 1.33, "step": 2260 }, { "epoch": 0.13, "grad_norm": 1.1592305898666382, "learning_rate": 9.883004087193461e-05, "loss": 1.3476, "step": 2270 }, { "epoch": 0.13, "grad_norm": 1.0532255172729492, "learning_rate": 9.88243642143506e-05, "loss": 1.3283, "step": 2280 }, { "epoch": 0.13, "grad_norm": 1.0762512683868408, "learning_rate": 9.881868755676658e-05, "loss": 1.3119, "step": 2290 }, { "epoch": 0.13, "grad_norm": 1.108137607574463, "learning_rate": 9.881301089918256e-05, "loss": 1.3329, "step": 2300 }, { "epoch": 0.13, "grad_norm": 1.0257203578948975, "learning_rate": 9.880733424159854e-05, "loss": 1.3236, "step": 2310 }, { "epoch": 0.13, "grad_norm": 1.0732356309890747, "learning_rate": 9.880165758401454e-05, "loss": 1.3174, "step": 2320 }, { "epoch": 0.13, "grad_norm": 1.1318949460983276, "learning_rate": 9.879598092643052e-05, "loss": 1.3126, "step": 2330 }, { "epoch": 0.13, "grad_norm": 1.1428983211517334, "learning_rate": 9.879030426884651e-05, "loss": 1.3322, "step": 2340 }, { "epoch": 0.13, "grad_norm": 1.1009504795074463, "learning_rate": 9.878462761126249e-05, "loss": 1.3417, "step": 2350 }, { "epoch": 0.13, "grad_norm": 1.08405601978302, "learning_rate": 9.877895095367848e-05, "loss": 1.3062, "step": 2360 }, { "epoch": 0.13, "grad_norm": 1.0670369863510132, "learning_rate": 9.877327429609446e-05, "loss": 1.3119, "step": 2370 }, { "epoch": 0.13, "grad_norm": 1.0442290306091309, "learning_rate": 9.876759763851044e-05, "loss": 1.3394, "step": 2380 }, { "epoch": 0.14, "grad_norm": 1.0600146055221558, "learning_rate": 9.876192098092644e-05, "loss": 1.3151, "step": 2390 }, { "epoch": 0.14, "grad_norm": 1.0546330213546753, "learning_rate": 9.875624432334242e-05, "loss": 1.3451, "step": 2400 }, { "epoch": 0.14, "grad_norm": 1.2398649454116821, "learning_rate": 9.875056766575841e-05, "loss": 1.3313, "step": 2410 }, { "epoch": 0.14, "grad_norm": 1.0660555362701416, "learning_rate": 9.874489100817439e-05, "loss": 1.3289, "step": 2420 }, { "epoch": 0.14, "grad_norm": 1.195712924003601, "learning_rate": 9.873921435059038e-05, "loss": 1.3221, "step": 2430 }, { "epoch": 0.14, "grad_norm": 1.0925532579421997, "learning_rate": 9.873353769300636e-05, "loss": 1.3239, "step": 2440 }, { "epoch": 0.14, "grad_norm": 1.046752691268921, "learning_rate": 9.872786103542236e-05, "loss": 1.3184, "step": 2450 }, { "epoch": 0.14, "grad_norm": 1.0557363033294678, "learning_rate": 9.872218437783834e-05, "loss": 1.317, "step": 2460 }, { "epoch": 0.14, "grad_norm": 1.0574841499328613, "learning_rate": 9.871650772025432e-05, "loss": 1.2897, "step": 2470 }, { "epoch": 0.14, "grad_norm": 1.1476939916610718, "learning_rate": 9.871083106267031e-05, "loss": 1.3501, "step": 2480 }, { "epoch": 0.14, "grad_norm": 1.0368316173553467, "learning_rate": 9.870515440508629e-05, "loss": 1.3233, "step": 2490 }, { "epoch": 0.14, "grad_norm": 1.000741958618164, "learning_rate": 9.869947774750228e-05, "loss": 1.3407, "step": 2500 }, { "epoch": 0.14, "grad_norm": 1.0677701234817505, "learning_rate": 9.869380108991826e-05, "loss": 1.3148, "step": 2510 }, { "epoch": 0.14, "grad_norm": 1.0548089742660522, "learning_rate": 9.868812443233426e-05, "loss": 1.2771, "step": 2520 }, { "epoch": 0.14, "grad_norm": 0.9509639143943787, "learning_rate": 9.868244777475024e-05, "loss": 1.3341, "step": 2530 }, { "epoch": 0.14, "grad_norm": 1.1564607620239258, "learning_rate": 9.867677111716622e-05, "loss": 1.308, "step": 2540 }, { "epoch": 0.14, "grad_norm": 0.9862377643585205, "learning_rate": 9.86710944595822e-05, "loss": 1.291, "step": 2550 }, { "epoch": 0.15, "grad_norm": 1.0758394002914429, "learning_rate": 9.866541780199819e-05, "loss": 1.3208, "step": 2560 }, { "epoch": 0.15, "grad_norm": 1.0198851823806763, "learning_rate": 9.865974114441417e-05, "loss": 1.3329, "step": 2570 }, { "epoch": 0.15, "grad_norm": 1.0363171100616455, "learning_rate": 9.865406448683015e-05, "loss": 1.3248, "step": 2580 }, { "epoch": 0.15, "grad_norm": 1.0032418966293335, "learning_rate": 9.864838782924614e-05, "loss": 1.3515, "step": 2590 }, { "epoch": 0.15, "grad_norm": 1.114959716796875, "learning_rate": 9.864271117166212e-05, "loss": 1.3216, "step": 2600 }, { "epoch": 0.15, "grad_norm": 1.0095750093460083, "learning_rate": 9.863703451407812e-05, "loss": 1.3297, "step": 2610 }, { "epoch": 0.15, "grad_norm": 1.0897860527038574, "learning_rate": 9.86313578564941e-05, "loss": 1.3054, "step": 2620 }, { "epoch": 0.15, "grad_norm": 1.0434647798538208, "learning_rate": 9.862568119891009e-05, "loss": 1.2993, "step": 2630 }, { "epoch": 0.15, "grad_norm": 1.0443435907363892, "learning_rate": 9.862000454132607e-05, "loss": 1.309, "step": 2640 }, { "epoch": 0.15, "grad_norm": 1.1580283641815186, "learning_rate": 9.861432788374205e-05, "loss": 1.2743, "step": 2650 }, { "epoch": 0.15, "grad_norm": 1.0595611333847046, "learning_rate": 9.860865122615804e-05, "loss": 1.2987, "step": 2660 }, { "epoch": 0.15, "grad_norm": 0.9997444152832031, "learning_rate": 9.860297456857402e-05, "loss": 1.2969, "step": 2670 }, { "epoch": 0.15, "grad_norm": 1.05143404006958, "learning_rate": 9.859729791099002e-05, "loss": 1.3039, "step": 2680 }, { "epoch": 0.15, "grad_norm": 1.0704314708709717, "learning_rate": 9.8591621253406e-05, "loss": 1.3007, "step": 2690 }, { "epoch": 0.15, "grad_norm": 1.022567629814148, "learning_rate": 9.858594459582199e-05, "loss": 1.3231, "step": 2700 }, { "epoch": 0.15, "grad_norm": 1.0851246118545532, "learning_rate": 9.858026793823797e-05, "loss": 1.3091, "step": 2710 }, { "epoch": 0.15, "grad_norm": 1.0055643320083618, "learning_rate": 9.857459128065396e-05, "loss": 1.2883, "step": 2720 }, { "epoch": 0.15, "grad_norm": 1.0326086282730103, "learning_rate": 9.856891462306994e-05, "loss": 1.294, "step": 2730 }, { "epoch": 0.16, "grad_norm": 1.0606677532196045, "learning_rate": 9.856323796548592e-05, "loss": 1.322, "step": 2740 }, { "epoch": 0.16, "grad_norm": 1.0349372625350952, "learning_rate": 9.855756130790192e-05, "loss": 1.3068, "step": 2750 }, { "epoch": 0.16, "grad_norm": 1.07005774974823, "learning_rate": 9.85518846503179e-05, "loss": 1.2976, "step": 2760 }, { "epoch": 0.16, "grad_norm": 1.1033165454864502, "learning_rate": 9.854620799273389e-05, "loss": 1.2753, "step": 2770 }, { "epoch": 0.16, "grad_norm": 1.1739461421966553, "learning_rate": 9.854053133514987e-05, "loss": 1.2628, "step": 2780 }, { "epoch": 0.16, "grad_norm": 1.0010274648666382, "learning_rate": 9.853485467756586e-05, "loss": 1.3087, "step": 2790 }, { "epoch": 0.16, "grad_norm": 1.0188254117965698, "learning_rate": 9.852917801998184e-05, "loss": 1.2723, "step": 2800 }, { "epoch": 0.16, "grad_norm": 1.0677130222320557, "learning_rate": 9.852350136239782e-05, "loss": 1.2538, "step": 2810 }, { "epoch": 0.16, "grad_norm": 1.005825400352478, "learning_rate": 9.85178247048138e-05, "loss": 1.3073, "step": 2820 }, { "epoch": 0.16, "grad_norm": 1.1073874235153198, "learning_rate": 9.851214804722978e-05, "loss": 1.2568, "step": 2830 }, { "epoch": 0.16, "grad_norm": 1.0428448915481567, "learning_rate": 9.850647138964578e-05, "loss": 1.2859, "step": 2840 }, { "epoch": 0.16, "grad_norm": 1.0172590017318726, "learning_rate": 9.850079473206176e-05, "loss": 1.2561, "step": 2850 }, { "epoch": 0.16, "grad_norm": 1.076541781425476, "learning_rate": 9.849511807447775e-05, "loss": 1.2904, "step": 2860 }, { "epoch": 0.16, "grad_norm": 1.1356227397918701, "learning_rate": 9.848944141689373e-05, "loss": 1.2982, "step": 2870 }, { "epoch": 0.16, "grad_norm": 1.0827975273132324, "learning_rate": 9.848376475930972e-05, "loss": 1.2762, "step": 2880 }, { "epoch": 0.16, "grad_norm": 1.084412693977356, "learning_rate": 9.84780881017257e-05, "loss": 1.3074, "step": 2890 }, { "epoch": 0.16, "grad_norm": 1.025355577468872, "learning_rate": 9.84724114441417e-05, "loss": 1.27, "step": 2900 }, { "epoch": 0.17, "grad_norm": 1.1133872270584106, "learning_rate": 9.846673478655768e-05, "loss": 1.3064, "step": 2910 }, { "epoch": 0.17, "grad_norm": 1.0112468004226685, "learning_rate": 9.846105812897366e-05, "loss": 1.3254, "step": 2920 }, { "epoch": 0.17, "grad_norm": 1.0576187372207642, "learning_rate": 9.845538147138965e-05, "loss": 1.2738, "step": 2930 }, { "epoch": 0.17, "grad_norm": 1.1225559711456299, "learning_rate": 9.844970481380563e-05, "loss": 1.2738, "step": 2940 }, { "epoch": 0.17, "grad_norm": 1.1187222003936768, "learning_rate": 9.844402815622163e-05, "loss": 1.2845, "step": 2950 }, { "epoch": 0.17, "grad_norm": 0.9986518025398254, "learning_rate": 9.84383514986376e-05, "loss": 1.2865, "step": 2960 }, { "epoch": 0.17, "grad_norm": 1.0404571294784546, "learning_rate": 9.84326748410536e-05, "loss": 1.2742, "step": 2970 }, { "epoch": 0.17, "grad_norm": 0.9464170336723328, "learning_rate": 9.842699818346958e-05, "loss": 1.2849, "step": 2980 }, { "epoch": 0.17, "grad_norm": 1.0966501235961914, "learning_rate": 9.842132152588557e-05, "loss": 1.2846, "step": 2990 }, { "epoch": 0.17, "grad_norm": 1.0356130599975586, "learning_rate": 9.841564486830155e-05, "loss": 1.2675, "step": 3000 }, { "epoch": 0.17, "grad_norm": 0.9848985075950623, "learning_rate": 9.840996821071753e-05, "loss": 1.2976, "step": 3010 }, { "epoch": 0.17, "grad_norm": 0.9767049551010132, "learning_rate": 9.840429155313353e-05, "loss": 1.2826, "step": 3020 }, { "epoch": 0.17, "grad_norm": 0.9843510389328003, "learning_rate": 9.83986148955495e-05, "loss": 1.3139, "step": 3030 }, { "epoch": 0.17, "grad_norm": 0.9942503571510315, "learning_rate": 9.83929382379655e-05, "loss": 1.2821, "step": 3040 }, { "epoch": 0.17, "grad_norm": 0.9316833019256592, "learning_rate": 9.838726158038148e-05, "loss": 1.274, "step": 3050 }, { "epoch": 0.17, "grad_norm": 1.1236603260040283, "learning_rate": 9.838158492279746e-05, "loss": 1.2758, "step": 3060 }, { "epoch": 0.17, "grad_norm": 0.9830310344696045, "learning_rate": 9.837590826521344e-05, "loss": 1.2803, "step": 3070 }, { "epoch": 0.17, "grad_norm": 1.0139652490615845, "learning_rate": 9.837023160762943e-05, "loss": 1.2997, "step": 3080 }, { "epoch": 0.18, "grad_norm": 1.055843710899353, "learning_rate": 9.836455495004541e-05, "loss": 1.2921, "step": 3090 }, { "epoch": 0.18, "grad_norm": 0.970291256904602, "learning_rate": 9.835887829246139e-05, "loss": 1.2557, "step": 3100 }, { "epoch": 0.18, "grad_norm": 1.0143458843231201, "learning_rate": 9.835320163487739e-05, "loss": 1.284, "step": 3110 }, { "epoch": 0.18, "grad_norm": 1.066016435623169, "learning_rate": 9.834752497729337e-05, "loss": 1.2781, "step": 3120 }, { "epoch": 0.18, "grad_norm": 0.906440019607544, "learning_rate": 9.834184831970936e-05, "loss": 1.2341, "step": 3130 }, { "epoch": 0.18, "grad_norm": 0.9243439435958862, "learning_rate": 9.833617166212534e-05, "loss": 1.2866, "step": 3140 }, { "epoch": 0.18, "grad_norm": 1.0565030574798584, "learning_rate": 9.833049500454133e-05, "loss": 1.2586, "step": 3150 }, { "epoch": 0.18, "grad_norm": 1.0760706663131714, "learning_rate": 9.832481834695731e-05, "loss": 1.2827, "step": 3160 }, { "epoch": 0.18, "grad_norm": 1.0349349975585938, "learning_rate": 9.83191416893733e-05, "loss": 1.3134, "step": 3170 }, { "epoch": 0.18, "grad_norm": 0.8969243168830872, "learning_rate": 9.831346503178929e-05, "loss": 1.2756, "step": 3180 }, { "epoch": 0.18, "grad_norm": 1.0531789064407349, "learning_rate": 9.830778837420527e-05, "loss": 1.276, "step": 3190 }, { "epoch": 0.18, "grad_norm": 1.0125030279159546, "learning_rate": 9.830211171662126e-05, "loss": 1.253, "step": 3200 }, { "epoch": 0.18, "grad_norm": 0.999312698841095, "learning_rate": 9.829643505903724e-05, "loss": 1.2747, "step": 3210 }, { "epoch": 0.18, "grad_norm": 1.05613374710083, "learning_rate": 9.829075840145323e-05, "loss": 1.2652, "step": 3220 }, { "epoch": 0.18, "grad_norm": 0.9635623097419739, "learning_rate": 9.828508174386921e-05, "loss": 1.2896, "step": 3230 }, { "epoch": 0.18, "grad_norm": 1.000622272491455, "learning_rate": 9.82794050862852e-05, "loss": 1.2727, "step": 3240 }, { "epoch": 0.18, "grad_norm": 1.0221668481826782, "learning_rate": 9.827372842870119e-05, "loss": 1.2748, "step": 3250 }, { "epoch": 0.18, "grad_norm": 1.101860761642456, "learning_rate": 9.826805177111718e-05, "loss": 1.2617, "step": 3260 }, { "epoch": 0.19, "grad_norm": 1.0108023881912231, "learning_rate": 9.826237511353316e-05, "loss": 1.2671, "step": 3270 }, { "epoch": 0.19, "grad_norm": 0.9930527806282043, "learning_rate": 9.825669845594914e-05, "loss": 1.275, "step": 3280 }, { "epoch": 0.19, "grad_norm": 1.0418983697891235, "learning_rate": 9.825102179836513e-05, "loss": 1.2801, "step": 3290 }, { "epoch": 0.19, "grad_norm": 1.0474895238876343, "learning_rate": 9.824534514078111e-05, "loss": 1.2993, "step": 3300 }, { "epoch": 0.19, "grad_norm": 1.0194941759109497, "learning_rate": 9.823966848319711e-05, "loss": 1.2438, "step": 3310 }, { "epoch": 0.19, "grad_norm": 1.2835735082626343, "learning_rate": 9.823399182561309e-05, "loss": 1.3017, "step": 3320 }, { "epoch": 0.19, "grad_norm": 1.02116060256958, "learning_rate": 9.822831516802907e-05, "loss": 1.2707, "step": 3330 }, { "epoch": 0.19, "grad_norm": 0.9771260023117065, "learning_rate": 9.822263851044505e-05, "loss": 1.2433, "step": 3340 }, { "epoch": 0.19, "grad_norm": 1.1031745672225952, "learning_rate": 9.821696185286104e-05, "loss": 1.2715, "step": 3350 }, { "epoch": 0.19, "grad_norm": 1.0150822401046753, "learning_rate": 9.821128519527702e-05, "loss": 1.2965, "step": 3360 }, { "epoch": 0.19, "grad_norm": 1.0167672634124756, "learning_rate": 9.8205608537693e-05, "loss": 1.2958, "step": 3370 }, { "epoch": 0.19, "grad_norm": 0.9208256602287292, "learning_rate": 9.8199931880109e-05, "loss": 1.2764, "step": 3380 }, { "epoch": 0.19, "grad_norm": 1.0258420705795288, "learning_rate": 9.819425522252497e-05, "loss": 1.2695, "step": 3390 }, { "epoch": 0.19, "grad_norm": 0.978565514087677, "learning_rate": 9.818857856494097e-05, "loss": 1.2503, "step": 3400 }, { "epoch": 0.19, "grad_norm": 1.01639986038208, "learning_rate": 9.818290190735695e-05, "loss": 1.2773, "step": 3410 }, { "epoch": 0.19, "grad_norm": 0.933590292930603, "learning_rate": 9.817722524977294e-05, "loss": 1.2918, "step": 3420 }, { "epoch": 0.19, "grad_norm": 1.0528416633605957, "learning_rate": 9.817154859218892e-05, "loss": 1.2703, "step": 3430 }, { "epoch": 0.2, "grad_norm": 1.0193231105804443, "learning_rate": 9.816587193460491e-05, "loss": 1.2489, "step": 3440 }, { "epoch": 0.2, "grad_norm": 0.9132826924324036, "learning_rate": 9.81601952770209e-05, "loss": 1.2963, "step": 3450 }, { "epoch": 0.2, "grad_norm": 1.031947135925293, "learning_rate": 9.815451861943687e-05, "loss": 1.2678, "step": 3460 }, { "epoch": 0.2, "grad_norm": 1.0326824188232422, "learning_rate": 9.814884196185287e-05, "loss": 1.2752, "step": 3470 }, { "epoch": 0.2, "grad_norm": 0.9925066828727722, "learning_rate": 9.814316530426885e-05, "loss": 1.2646, "step": 3480 }, { "epoch": 0.2, "grad_norm": 1.037388801574707, "learning_rate": 9.813748864668484e-05, "loss": 1.2664, "step": 3490 }, { "epoch": 0.2, "grad_norm": 0.9593036770820618, "learning_rate": 9.813181198910082e-05, "loss": 1.2551, "step": 3500 }, { "epoch": 0.2, "grad_norm": 1.0785013437271118, "learning_rate": 9.812613533151681e-05, "loss": 1.2443, "step": 3510 }, { "epoch": 0.2, "grad_norm": 1.0577926635742188, "learning_rate": 9.81204586739328e-05, "loss": 1.2621, "step": 3520 }, { "epoch": 0.2, "grad_norm": 0.9940651655197144, "learning_rate": 9.811478201634879e-05, "loss": 1.291, "step": 3530 }, { "epoch": 0.2, "grad_norm": 0.9882029294967651, "learning_rate": 9.810910535876477e-05, "loss": 1.2523, "step": 3540 }, { "epoch": 0.2, "grad_norm": 1.0596500635147095, "learning_rate": 9.810342870118075e-05, "loss": 1.2313, "step": 3550 }, { "epoch": 0.2, "grad_norm": 1.0946542024612427, "learning_rate": 9.809775204359674e-05, "loss": 1.2734, "step": 3560 }, { "epoch": 0.2, "grad_norm": 1.0859044790267944, "learning_rate": 9.809207538601272e-05, "loss": 1.2858, "step": 3570 }, { "epoch": 0.2, "grad_norm": 1.0236567258834839, "learning_rate": 9.80863987284287e-05, "loss": 1.2734, "step": 3580 }, { "epoch": 0.2, "grad_norm": 0.9605791568756104, "learning_rate": 9.80807220708447e-05, "loss": 1.2841, "step": 3590 }, { "epoch": 0.2, "grad_norm": 0.9978966116905212, "learning_rate": 9.807504541326068e-05, "loss": 1.2544, "step": 3600 }, { "epoch": 0.2, "grad_norm": 1.0181249380111694, "learning_rate": 9.806936875567666e-05, "loss": 1.2384, "step": 3610 }, { "epoch": 0.21, "grad_norm": 1.0088882446289062, "learning_rate": 9.806369209809265e-05, "loss": 1.2627, "step": 3620 }, { "epoch": 0.21, "grad_norm": 0.9831598401069641, "learning_rate": 9.805801544050863e-05, "loss": 1.2512, "step": 3630 }, { "epoch": 0.21, "grad_norm": 1.0169237852096558, "learning_rate": 9.805233878292461e-05, "loss": 1.2802, "step": 3640 }, { "epoch": 0.21, "grad_norm": 1.0119333267211914, "learning_rate": 9.80466621253406e-05, "loss": 1.2771, "step": 3650 }, { "epoch": 0.21, "grad_norm": 0.9895005226135254, "learning_rate": 9.804098546775658e-05, "loss": 1.2587, "step": 3660 }, { "epoch": 0.21, "grad_norm": 1.0102922916412354, "learning_rate": 9.803530881017258e-05, "loss": 1.2229, "step": 3670 }, { "epoch": 0.21, "grad_norm": 1.0000123977661133, "learning_rate": 9.802963215258856e-05, "loss": 1.2632, "step": 3680 }, { "epoch": 0.21, "grad_norm": 1.0000272989273071, "learning_rate": 9.802395549500455e-05, "loss": 1.2465, "step": 3690 }, { "epoch": 0.21, "grad_norm": 0.9728595614433289, "learning_rate": 9.801827883742053e-05, "loss": 1.2662, "step": 3700 }, { "epoch": 0.21, "grad_norm": 0.9513252377510071, "learning_rate": 9.801260217983652e-05, "loss": 1.2522, "step": 3710 }, { "epoch": 0.21, "grad_norm": 1.0674374103546143, "learning_rate": 9.80069255222525e-05, "loss": 1.2587, "step": 3720 }, { "epoch": 0.21, "grad_norm": 1.0710309743881226, "learning_rate": 9.80012488646685e-05, "loss": 1.2953, "step": 3730 }, { "epoch": 0.21, "grad_norm": 0.9801631569862366, "learning_rate": 9.799557220708448e-05, "loss": 1.2381, "step": 3740 }, { "epoch": 0.21, "grad_norm": 0.9497891068458557, "learning_rate": 9.798989554950046e-05, "loss": 1.2503, "step": 3750 }, { "epoch": 0.21, "grad_norm": 0.9248577356338501, "learning_rate": 9.798421889191645e-05, "loss": 1.2396, "step": 3760 }, { "epoch": 0.21, "grad_norm": 0.9961555600166321, "learning_rate": 9.797854223433243e-05, "loss": 1.2568, "step": 3770 }, { "epoch": 0.21, "grad_norm": 1.0121487379074097, "learning_rate": 9.797286557674842e-05, "loss": 1.2641, "step": 3780 }, { "epoch": 0.21, "grad_norm": 0.9560475945472717, "learning_rate": 9.79671889191644e-05, "loss": 1.2473, "step": 3790 }, { "epoch": 0.22, "grad_norm": 0.9593220949172974, "learning_rate": 9.79615122615804e-05, "loss": 1.2651, "step": 3800 }, { "epoch": 0.22, "grad_norm": 1.0036102533340454, "learning_rate": 9.795583560399638e-05, "loss": 1.2659, "step": 3810 }, { "epoch": 0.22, "grad_norm": 0.9199342727661133, "learning_rate": 9.795015894641236e-05, "loss": 1.2112, "step": 3820 }, { "epoch": 0.22, "grad_norm": 1.0517109632492065, "learning_rate": 9.794448228882835e-05, "loss": 1.2604, "step": 3830 }, { "epoch": 0.22, "grad_norm": 0.9386459589004517, "learning_rate": 9.793880563124433e-05, "loss": 1.2421, "step": 3840 }, { "epoch": 0.22, "grad_norm": 0.9735701084136963, "learning_rate": 9.793312897366031e-05, "loss": 1.2288, "step": 3850 }, { "epoch": 0.22, "grad_norm": 0.9791916608810425, "learning_rate": 9.792745231607629e-05, "loss": 1.2554, "step": 3860 }, { "epoch": 0.22, "grad_norm": 0.9774475693702698, "learning_rate": 9.792177565849228e-05, "loss": 1.2563, "step": 3870 }, { "epoch": 0.22, "grad_norm": 0.9960165023803711, "learning_rate": 9.791609900090826e-05, "loss": 1.2423, "step": 3880 }, { "epoch": 0.22, "grad_norm": 0.9634711742401123, "learning_rate": 9.791042234332426e-05, "loss": 1.2416, "step": 3890 }, { "epoch": 0.22, "grad_norm": 1.0329073667526245, "learning_rate": 9.790474568574024e-05, "loss": 1.2628, "step": 3900 }, { "epoch": 0.22, "grad_norm": 1.00831937789917, "learning_rate": 9.789906902815623e-05, "loss": 1.2437, "step": 3910 }, { "epoch": 0.22, "grad_norm": 0.9698894023895264, "learning_rate": 9.789339237057221e-05, "loss": 1.2298, "step": 3920 }, { "epoch": 0.22, "grad_norm": 0.974982738494873, "learning_rate": 9.788771571298819e-05, "loss": 1.2432, "step": 3930 }, { "epoch": 0.22, "grad_norm": 0.9949013590812683, "learning_rate": 9.788203905540418e-05, "loss": 1.2647, "step": 3940 }, { "epoch": 0.22, "grad_norm": 1.0812668800354004, "learning_rate": 9.787636239782016e-05, "loss": 1.2141, "step": 3950 }, { "epoch": 0.22, "grad_norm": 0.9531967639923096, "learning_rate": 9.787068574023616e-05, "loss": 1.2259, "step": 3960 }, { "epoch": 0.23, "grad_norm": 0.9319901466369629, "learning_rate": 9.786500908265214e-05, "loss": 1.263, "step": 3970 }, { "epoch": 0.23, "grad_norm": 0.9619455933570862, "learning_rate": 9.785933242506813e-05, "loss": 1.2743, "step": 3980 }, { "epoch": 0.23, "grad_norm": 1.0070489645004272, "learning_rate": 9.785365576748411e-05, "loss": 1.2527, "step": 3990 }, { "epoch": 0.23, "grad_norm": 1.0030869245529175, "learning_rate": 9.78479791099001e-05, "loss": 1.2691, "step": 4000 }, { "epoch": 0.23, "grad_norm": 0.9854745864868164, "learning_rate": 9.784230245231608e-05, "loss": 1.2389, "step": 4010 }, { "epoch": 0.23, "grad_norm": 1.052079677581787, "learning_rate": 9.783662579473206e-05, "loss": 1.2533, "step": 4020 }, { "epoch": 0.23, "grad_norm": 1.0434256792068481, "learning_rate": 9.783094913714806e-05, "loss": 1.2785, "step": 4030 }, { "epoch": 0.23, "grad_norm": 1.487256407737732, "learning_rate": 9.782527247956404e-05, "loss": 1.2476, "step": 4040 }, { "epoch": 0.23, "grad_norm": 0.975676417350769, "learning_rate": 9.781959582198003e-05, "loss": 1.2652, "step": 4050 }, { "epoch": 0.23, "grad_norm": 0.995024561882019, "learning_rate": 9.781391916439601e-05, "loss": 1.2442, "step": 4060 }, { "epoch": 0.23, "grad_norm": 0.9742059707641602, "learning_rate": 9.7808242506812e-05, "loss": 1.2561, "step": 4070 }, { "epoch": 0.23, "grad_norm": 0.9914084076881409, "learning_rate": 9.780256584922798e-05, "loss": 1.2319, "step": 4080 }, { "epoch": 0.23, "grad_norm": 1.0362884998321533, "learning_rate": 9.779688919164396e-05, "loss": 1.2468, "step": 4090 }, { "epoch": 0.23, "grad_norm": 0.9930813908576965, "learning_rate": 9.779121253405994e-05, "loss": 1.2309, "step": 4100 }, { "epoch": 0.23, "grad_norm": 0.9847789406776428, "learning_rate": 9.778553587647594e-05, "loss": 1.2653, "step": 4110 }, { "epoch": 0.23, "grad_norm": 0.9696356654167175, "learning_rate": 9.777985921889192e-05, "loss": 1.2304, "step": 4120 }, { "epoch": 0.23, "grad_norm": 1.0458033084869385, "learning_rate": 9.77741825613079e-05, "loss": 1.2616, "step": 4130 }, { "epoch": 0.23, "grad_norm": 0.9498910307884216, "learning_rate": 9.776850590372389e-05, "loss": 1.2607, "step": 4140 }, { "epoch": 0.24, "grad_norm": 1.0175129175186157, "learning_rate": 9.776282924613987e-05, "loss": 1.2315, "step": 4150 }, { "epoch": 0.24, "grad_norm": 0.9869800209999084, "learning_rate": 9.775715258855586e-05, "loss": 1.218, "step": 4160 }, { "epoch": 0.24, "grad_norm": 0.9384220838546753, "learning_rate": 9.775147593097184e-05, "loss": 1.2612, "step": 4170 }, { "epoch": 0.24, "grad_norm": 0.9256226420402527, "learning_rate": 9.774579927338784e-05, "loss": 1.2519, "step": 4180 }, { "epoch": 0.24, "grad_norm": 1.0034139156341553, "learning_rate": 9.774012261580382e-05, "loss": 1.2647, "step": 4190 }, { "epoch": 0.24, "grad_norm": 0.9973955154418945, "learning_rate": 9.77344459582198e-05, "loss": 1.2363, "step": 4200 }, { "epoch": 0.24, "grad_norm": 0.9063628315925598, "learning_rate": 9.772876930063579e-05, "loss": 1.2461, "step": 4210 }, { "epoch": 0.24, "grad_norm": 1.0314080715179443, "learning_rate": 9.772309264305177e-05, "loss": 1.2768, "step": 4220 }, { "epoch": 0.24, "grad_norm": 0.9746389389038086, "learning_rate": 9.771741598546777e-05, "loss": 1.2708, "step": 4230 }, { "epoch": 0.24, "grad_norm": 1.0959221124649048, "learning_rate": 9.771173932788375e-05, "loss": 1.2273, "step": 4240 }, { "epoch": 0.24, "grad_norm": 1.0830498933792114, "learning_rate": 9.770606267029974e-05, "loss": 1.2117, "step": 4250 }, { "epoch": 0.24, "grad_norm": 0.9578700661659241, "learning_rate": 9.770038601271572e-05, "loss": 1.2455, "step": 4260 }, { "epoch": 0.24, "grad_norm": 0.9718576073646545, "learning_rate": 9.769470935513171e-05, "loss": 1.2622, "step": 4270 }, { "epoch": 0.24, "grad_norm": 0.9609904885292053, "learning_rate": 9.768903269754769e-05, "loss": 1.2414, "step": 4280 }, { "epoch": 0.24, "grad_norm": 0.9413146376609802, "learning_rate": 9.768335603996367e-05, "loss": 1.2496, "step": 4290 }, { "epoch": 0.24, "grad_norm": 0.9959927201271057, "learning_rate": 9.767767938237967e-05, "loss": 1.2367, "step": 4300 }, { "epoch": 0.24, "grad_norm": 1.1118781566619873, "learning_rate": 9.767200272479565e-05, "loss": 1.251, "step": 4310 }, { "epoch": 0.24, "grad_norm": 0.9655351042747498, "learning_rate": 9.766632606721164e-05, "loss": 1.2225, "step": 4320 }, { "epoch": 0.25, "grad_norm": 0.9416446685791016, "learning_rate": 9.766064940962762e-05, "loss": 1.2597, "step": 4330 }, { "epoch": 0.25, "grad_norm": 0.9980165362358093, "learning_rate": 9.76549727520436e-05, "loss": 1.2457, "step": 4340 }, { "epoch": 0.25, "grad_norm": 1.0602681636810303, "learning_rate": 9.764929609445959e-05, "loss": 1.2325, "step": 4350 }, { "epoch": 0.25, "grad_norm": 0.9713975191116333, "learning_rate": 9.764361943687557e-05, "loss": 1.2565, "step": 4360 }, { "epoch": 0.25, "grad_norm": 0.9709974527359009, "learning_rate": 9.763794277929155e-05, "loss": 1.2473, "step": 4370 }, { "epoch": 0.25, "grad_norm": 0.9736787676811218, "learning_rate": 9.763226612170753e-05, "loss": 1.2234, "step": 4380 }, { "epoch": 0.25, "grad_norm": 0.943060576915741, "learning_rate": 9.762658946412353e-05, "loss": 1.223, "step": 4390 }, { "epoch": 0.25, "grad_norm": 0.9659847021102905, "learning_rate": 9.76209128065395e-05, "loss": 1.2455, "step": 4400 }, { "epoch": 0.25, "grad_norm": 0.9999145865440369, "learning_rate": 9.76152361489555e-05, "loss": 1.2512, "step": 4410 }, { "epoch": 0.25, "grad_norm": 1.0362340211868286, "learning_rate": 9.760955949137148e-05, "loss": 1.2581, "step": 4420 }, { "epoch": 0.25, "grad_norm": 0.9700642824172974, "learning_rate": 9.760388283378747e-05, "loss": 1.2521, "step": 4430 }, { "epoch": 0.25, "grad_norm": 0.8884727954864502, "learning_rate": 9.759820617620345e-05, "loss": 1.2439, "step": 4440 }, { "epoch": 0.25, "grad_norm": 1.0720523595809937, "learning_rate": 9.759252951861945e-05, "loss": 1.2362, "step": 4450 }, { "epoch": 0.25, "grad_norm": 0.9750374555587769, "learning_rate": 9.758685286103543e-05, "loss": 1.2522, "step": 4460 }, { "epoch": 0.25, "grad_norm": 0.9445420503616333, "learning_rate": 9.75811762034514e-05, "loss": 1.222, "step": 4470 }, { "epoch": 0.25, "grad_norm": 1.0326794385910034, "learning_rate": 9.75754995458674e-05, "loss": 1.2522, "step": 4480 }, { "epoch": 0.25, "grad_norm": 1.0989222526550293, "learning_rate": 9.756982288828338e-05, "loss": 1.2434, "step": 4490 }, { "epoch": 0.26, "grad_norm": 0.999760627746582, "learning_rate": 9.756414623069937e-05, "loss": 1.2434, "step": 4500 }, { "epoch": 0.26, "grad_norm": 0.9860814213752747, "learning_rate": 9.755846957311535e-05, "loss": 1.2545, "step": 4510 }, { "epoch": 0.26, "grad_norm": 0.9508693218231201, "learning_rate": 9.755279291553135e-05, "loss": 1.269, "step": 4520 }, { "epoch": 0.26, "grad_norm": 0.9584646224975586, "learning_rate": 9.754711625794733e-05, "loss": 1.2259, "step": 4530 }, { "epoch": 0.26, "grad_norm": 1.0930712223052979, "learning_rate": 9.754143960036332e-05, "loss": 1.2224, "step": 4540 }, { "epoch": 0.26, "grad_norm": 1.0517632961273193, "learning_rate": 9.75357629427793e-05, "loss": 1.2681, "step": 4550 }, { "epoch": 0.26, "grad_norm": 0.9799410700798035, "learning_rate": 9.753008628519528e-05, "loss": 1.2527, "step": 4560 }, { "epoch": 0.26, "grad_norm": 0.9678217768669128, "learning_rate": 9.752440962761127e-05, "loss": 1.2095, "step": 4570 }, { "epoch": 0.26, "grad_norm": 0.9173681735992432, "learning_rate": 9.751873297002725e-05, "loss": 1.2388, "step": 4580 }, { "epoch": 0.26, "grad_norm": 0.9416786432266235, "learning_rate": 9.751305631244325e-05, "loss": 1.2436, "step": 4590 }, { "epoch": 0.26, "grad_norm": 1.0364335775375366, "learning_rate": 9.750737965485923e-05, "loss": 1.2318, "step": 4600 }, { "epoch": 0.26, "grad_norm": 0.9562888145446777, "learning_rate": 9.750170299727521e-05, "loss": 1.2348, "step": 4610 }, { "epoch": 0.26, "grad_norm": 1.0197279453277588, "learning_rate": 9.749602633969119e-05, "loss": 1.2674, "step": 4620 }, { "epoch": 0.26, "grad_norm": 1.023374319076538, "learning_rate": 9.749034968210718e-05, "loss": 1.2358, "step": 4630 }, { "epoch": 0.26, "grad_norm": 0.9374992847442627, "learning_rate": 9.748467302452316e-05, "loss": 1.2125, "step": 4640 }, { "epoch": 0.26, "grad_norm": 0.9955867528915405, "learning_rate": 9.747899636693914e-05, "loss": 1.2468, "step": 4650 }, { "epoch": 0.26, "grad_norm": 1.0048675537109375, "learning_rate": 9.747331970935513e-05, "loss": 1.2418, "step": 4660 }, { "epoch": 0.26, "grad_norm": 0.9523321986198425, "learning_rate": 9.746764305177111e-05, "loss": 1.2218, "step": 4670 }, { "epoch": 0.27, "grad_norm": 0.9310972690582275, "learning_rate": 9.746196639418711e-05, "loss": 1.2445, "step": 4680 }, { "epoch": 0.27, "grad_norm": 1.0277653932571411, "learning_rate": 9.745628973660309e-05, "loss": 1.2317, "step": 4690 }, { "epoch": 0.27, "grad_norm": 0.9962049126625061, "learning_rate": 9.745061307901908e-05, "loss": 1.2318, "step": 4700 }, { "epoch": 0.27, "grad_norm": 0.9292561411857605, "learning_rate": 9.744493642143506e-05, "loss": 1.2183, "step": 4710 }, { "epoch": 0.27, "grad_norm": 0.9764232635498047, "learning_rate": 9.743925976385105e-05, "loss": 1.2342, "step": 4720 }, { "epoch": 0.27, "grad_norm": 0.9892465472221375, "learning_rate": 9.743358310626703e-05, "loss": 1.2256, "step": 4730 }, { "epoch": 0.27, "grad_norm": 0.9032623767852783, "learning_rate": 9.742790644868301e-05, "loss": 1.2446, "step": 4740 }, { "epoch": 0.27, "grad_norm": 0.9002546668052673, "learning_rate": 9.742222979109901e-05, "loss": 1.2363, "step": 4750 }, { "epoch": 0.27, "grad_norm": 0.9547824263572693, "learning_rate": 9.741655313351499e-05, "loss": 1.2432, "step": 4760 }, { "epoch": 0.27, "grad_norm": 0.895626425743103, "learning_rate": 9.741087647593098e-05, "loss": 1.2335, "step": 4770 }, { "epoch": 0.27, "grad_norm": 0.8817634582519531, "learning_rate": 9.740519981834696e-05, "loss": 1.2445, "step": 4780 }, { "epoch": 0.27, "grad_norm": 0.9708264470100403, "learning_rate": 9.739952316076295e-05, "loss": 1.2076, "step": 4790 }, { "epoch": 0.27, "grad_norm": 0.9782925248146057, "learning_rate": 9.739384650317893e-05, "loss": 1.2041, "step": 4800 }, { "epoch": 0.27, "grad_norm": 0.9433874487876892, "learning_rate": 9.738816984559493e-05, "loss": 1.2272, "step": 4810 }, { "epoch": 0.27, "grad_norm": 0.9774789810180664, "learning_rate": 9.738249318801091e-05, "loss": 1.2197, "step": 4820 }, { "epoch": 0.27, "grad_norm": 1.0384457111358643, "learning_rate": 9.737681653042689e-05, "loss": 1.2216, "step": 4830 }, { "epoch": 0.27, "grad_norm": 0.9714308381080627, "learning_rate": 9.737113987284288e-05, "loss": 1.2305, "step": 4840 }, { "epoch": 0.28, "grad_norm": 0.9416885375976562, "learning_rate": 9.736546321525886e-05, "loss": 1.2136, "step": 4850 }, { "epoch": 0.28, "grad_norm": 0.9692510366439819, "learning_rate": 9.735978655767484e-05, "loss": 1.2674, "step": 4860 }, { "epoch": 0.28, "grad_norm": 0.9739055633544922, "learning_rate": 9.735410990009084e-05, "loss": 1.222, "step": 4870 }, { "epoch": 0.28, "grad_norm": 0.9053775668144226, "learning_rate": 9.734843324250682e-05, "loss": 1.2503, "step": 4880 }, { "epoch": 0.28, "grad_norm": 0.9749047756195068, "learning_rate": 9.73427565849228e-05, "loss": 1.2175, "step": 4890 }, { "epoch": 0.28, "grad_norm": 0.9847483038902283, "learning_rate": 9.733707992733879e-05, "loss": 1.2511, "step": 4900 }, { "epoch": 0.28, "grad_norm": 1.0591623783111572, "learning_rate": 9.733140326975477e-05, "loss": 1.2381, "step": 4910 }, { "epoch": 0.28, "grad_norm": 1.0497854948043823, "learning_rate": 9.732572661217075e-05, "loss": 1.2235, "step": 4920 }, { "epoch": 0.28, "grad_norm": 1.0532983541488647, "learning_rate": 9.732004995458674e-05, "loss": 1.2294, "step": 4930 }, { "epoch": 0.28, "grad_norm": 0.9762168526649475, "learning_rate": 9.731437329700272e-05, "loss": 1.229, "step": 4940 }, { "epoch": 0.28, "grad_norm": 1.010563611984253, "learning_rate": 9.730869663941872e-05, "loss": 1.211, "step": 4950 }, { "epoch": 0.28, "grad_norm": 0.9877942800521851, "learning_rate": 9.73030199818347e-05, "loss": 1.2382, "step": 4960 }, { "epoch": 0.28, "grad_norm": 1.0096025466918945, "learning_rate": 9.729734332425069e-05, "loss": 1.2184, "step": 4970 }, { "epoch": 0.28, "grad_norm": 0.9563623070716858, "learning_rate": 9.729166666666667e-05, "loss": 1.2103, "step": 4980 }, { "epoch": 0.28, "grad_norm": 1.011098027229309, "learning_rate": 9.728599000908266e-05, "loss": 1.2305, "step": 4990 }, { "epoch": 0.28, "grad_norm": 1.0213693380355835, "learning_rate": 9.728031335149864e-05, "loss": 1.2157, "step": 5000 }, { "epoch": 0.28, "grad_norm": 0.9660182595252991, "learning_rate": 9.727463669391462e-05, "loss": 1.2053, "step": 5010 }, { "epoch": 0.28, "grad_norm": 1.0043233633041382, "learning_rate": 9.726896003633062e-05, "loss": 1.2411, "step": 5020 }, { "epoch": 0.29, "grad_norm": 0.9700961112976074, "learning_rate": 9.72632833787466e-05, "loss": 1.2298, "step": 5030 }, { "epoch": 0.29, "grad_norm": 0.8622508645057678, "learning_rate": 9.725760672116259e-05, "loss": 1.2424, "step": 5040 }, { "epoch": 0.29, "grad_norm": 0.9924274682998657, "learning_rate": 9.725193006357857e-05, "loss": 1.1989, "step": 5050 }, { "epoch": 0.29, "grad_norm": 1.0462549924850464, "learning_rate": 9.724625340599456e-05, "loss": 1.2194, "step": 5060 }, { "epoch": 0.29, "grad_norm": 1.0608320236206055, "learning_rate": 9.724057674841054e-05, "loss": 1.2057, "step": 5070 }, { "epoch": 0.29, "grad_norm": 0.9545693397521973, "learning_rate": 9.723490009082654e-05, "loss": 1.2149, "step": 5080 }, { "epoch": 0.29, "grad_norm": 0.9600771069526672, "learning_rate": 9.722922343324252e-05, "loss": 1.2158, "step": 5090 }, { "epoch": 0.29, "grad_norm": 0.9364641308784485, "learning_rate": 9.72235467756585e-05, "loss": 1.2305, "step": 5100 }, { "epoch": 0.29, "grad_norm": 0.9675828218460083, "learning_rate": 9.721787011807449e-05, "loss": 1.218, "step": 5110 }, { "epoch": 0.29, "grad_norm": 0.9879128932952881, "learning_rate": 9.721219346049047e-05, "loss": 1.2377, "step": 5120 }, { "epoch": 0.29, "grad_norm": 1.04416024684906, "learning_rate": 9.720651680290645e-05, "loss": 1.2243, "step": 5130 }, { "epoch": 0.29, "grad_norm": 1.0124753713607788, "learning_rate": 9.720084014532243e-05, "loss": 1.2546, "step": 5140 }, { "epoch": 0.29, "grad_norm": 0.9883249998092651, "learning_rate": 9.719516348773842e-05, "loss": 1.2513, "step": 5150 }, { "epoch": 0.29, "grad_norm": 1.0172793865203857, "learning_rate": 9.71894868301544e-05, "loss": 1.2317, "step": 5160 }, { "epoch": 0.29, "grad_norm": 0.9647625088691711, "learning_rate": 9.71838101725704e-05, "loss": 1.1994, "step": 5170 }, { "epoch": 0.29, "grad_norm": 1.022866129875183, "learning_rate": 9.717813351498638e-05, "loss": 1.2259, "step": 5180 }, { "epoch": 0.29, "grad_norm": 0.9378897547721863, "learning_rate": 9.717245685740236e-05, "loss": 1.2417, "step": 5190 }, { "epoch": 0.29, "grad_norm": 1.0504844188690186, "learning_rate": 9.716678019981835e-05, "loss": 1.22, "step": 5200 }, { "epoch": 0.3, "grad_norm": 1.0217347145080566, "learning_rate": 9.716110354223433e-05, "loss": 1.209, "step": 5210 }, { "epoch": 0.3, "grad_norm": 1.0693106651306152, "learning_rate": 9.715542688465032e-05, "loss": 1.2251, "step": 5220 }, { "epoch": 0.3, "grad_norm": 0.9852681756019592, "learning_rate": 9.71497502270663e-05, "loss": 1.2152, "step": 5230 }, { "epoch": 0.3, "grad_norm": 1.006651520729065, "learning_rate": 9.71440735694823e-05, "loss": 1.2269, "step": 5240 }, { "epoch": 0.3, "grad_norm": 0.967632532119751, "learning_rate": 9.713839691189828e-05, "loss": 1.206, "step": 5250 }, { "epoch": 0.3, "grad_norm": 0.945203959941864, "learning_rate": 9.713272025431427e-05, "loss": 1.243, "step": 5260 }, { "epoch": 0.3, "grad_norm": 0.9939317107200623, "learning_rate": 9.712704359673025e-05, "loss": 1.2467, "step": 5270 }, { "epoch": 0.3, "grad_norm": 1.6729995012283325, "learning_rate": 9.712136693914623e-05, "loss": 1.2109, "step": 5280 }, { "epoch": 0.3, "grad_norm": 0.9905920624732971, "learning_rate": 9.711569028156222e-05, "loss": 1.1974, "step": 5290 }, { "epoch": 0.3, "grad_norm": 0.9968215227127075, "learning_rate": 9.71100136239782e-05, "loss": 1.2056, "step": 5300 }, { "epoch": 0.3, "grad_norm": 0.935305118560791, "learning_rate": 9.71043369663942e-05, "loss": 1.2342, "step": 5310 }, { "epoch": 0.3, "grad_norm": 1.000004768371582, "learning_rate": 9.709866030881018e-05, "loss": 1.2049, "step": 5320 }, { "epoch": 0.3, "grad_norm": 0.9154984951019287, "learning_rate": 9.709298365122617e-05, "loss": 1.2222, "step": 5330 }, { "epoch": 0.3, "grad_norm": 0.9942060708999634, "learning_rate": 9.708730699364215e-05, "loss": 1.2295, "step": 5340 }, { "epoch": 0.3, "grad_norm": 0.9692301154136658, "learning_rate": 9.708163033605814e-05, "loss": 1.2116, "step": 5350 }, { "epoch": 0.3, "grad_norm": 0.9954958558082581, "learning_rate": 9.707595367847412e-05, "loss": 1.2235, "step": 5360 }, { "epoch": 0.3, "grad_norm": 0.914152204990387, "learning_rate": 9.70702770208901e-05, "loss": 1.2113, "step": 5370 }, { "epoch": 0.31, "grad_norm": 1.049073576927185, "learning_rate": 9.706460036330608e-05, "loss": 1.2496, "step": 5380 }, { "epoch": 0.31, "grad_norm": 0.915627658367157, "learning_rate": 9.705892370572208e-05, "loss": 1.224, "step": 5390 }, { "epoch": 0.31, "grad_norm": 1.0351252555847168, "learning_rate": 9.705324704813806e-05, "loss": 1.2074, "step": 5400 }, { "epoch": 0.31, "grad_norm": 0.9773271083831787, "learning_rate": 9.704757039055404e-05, "loss": 1.2242, "step": 5410 }, { "epoch": 0.31, "grad_norm": 1.0038673877716064, "learning_rate": 9.704189373297003e-05, "loss": 1.2456, "step": 5420 }, { "epoch": 0.31, "grad_norm": 0.9619837999343872, "learning_rate": 9.703621707538601e-05, "loss": 1.2287, "step": 5430 }, { "epoch": 0.31, "grad_norm": 0.9694613814353943, "learning_rate": 9.7030540417802e-05, "loss": 1.1979, "step": 5440 }, { "epoch": 0.31, "grad_norm": 0.9587101936340332, "learning_rate": 9.702486376021798e-05, "loss": 1.2291, "step": 5450 }, { "epoch": 0.31, "grad_norm": 0.9130726456642151, "learning_rate": 9.701918710263396e-05, "loss": 1.2094, "step": 5460 }, { "epoch": 0.31, "grad_norm": 0.9977928996086121, "learning_rate": 9.701351044504996e-05, "loss": 1.2375, "step": 5470 }, { "epoch": 0.31, "grad_norm": 1.0406864881515503, "learning_rate": 9.700783378746594e-05, "loss": 1.211, "step": 5480 }, { "epoch": 0.31, "grad_norm": 1.0213518142700195, "learning_rate": 9.700215712988193e-05, "loss": 1.2243, "step": 5490 }, { "epoch": 0.31, "grad_norm": 0.9878332614898682, "learning_rate": 9.699648047229791e-05, "loss": 1.2299, "step": 5500 }, { "epoch": 0.31, "grad_norm": 0.9144527316093445, "learning_rate": 9.69908038147139e-05, "loss": 1.2222, "step": 5510 }, { "epoch": 0.31, "grad_norm": 0.9710639715194702, "learning_rate": 9.698512715712989e-05, "loss": 1.1999, "step": 5520 }, { "epoch": 0.31, "grad_norm": 0.9447893500328064, "learning_rate": 9.697945049954588e-05, "loss": 1.1988, "step": 5530 }, { "epoch": 0.31, "grad_norm": 0.9952858686447144, "learning_rate": 9.697377384196186e-05, "loss": 1.2344, "step": 5540 }, { "epoch": 0.31, "grad_norm": 0.9730527400970459, "learning_rate": 9.696809718437785e-05, "loss": 1.2206, "step": 5550 }, { "epoch": 0.32, "grad_norm": 0.9833120703697205, "learning_rate": 9.696242052679383e-05, "loss": 1.2254, "step": 5560 }, { "epoch": 0.32, "grad_norm": 0.9918665885925293, "learning_rate": 9.695674386920981e-05, "loss": 1.2254, "step": 5570 }, { "epoch": 0.32, "grad_norm": 0.9493137001991272, "learning_rate": 9.69510672116258e-05, "loss": 1.2148, "step": 5580 }, { "epoch": 0.32, "grad_norm": 0.9510250687599182, "learning_rate": 9.694539055404179e-05, "loss": 1.201, "step": 5590 }, { "epoch": 0.32, "grad_norm": 0.976383626461029, "learning_rate": 9.693971389645778e-05, "loss": 1.2228, "step": 5600 }, { "epoch": 0.32, "grad_norm": 0.9054012894630432, "learning_rate": 9.693403723887376e-05, "loss": 1.24, "step": 5610 }, { "epoch": 0.32, "grad_norm": 0.9748900532722473, "learning_rate": 9.692836058128974e-05, "loss": 1.2041, "step": 5620 }, { "epoch": 0.32, "grad_norm": 0.9594511985778809, "learning_rate": 9.692268392370573e-05, "loss": 1.1974, "step": 5630 }, { "epoch": 0.32, "grad_norm": 1.047252893447876, "learning_rate": 9.691700726612171e-05, "loss": 1.2012, "step": 5640 }, { "epoch": 0.32, "grad_norm": 0.8878451585769653, "learning_rate": 9.691133060853769e-05, "loss": 1.197, "step": 5650 }, { "epoch": 0.32, "grad_norm": 1.0529465675354004, "learning_rate": 9.690565395095367e-05, "loss": 1.2226, "step": 5660 }, { "epoch": 0.32, "grad_norm": 0.9330804347991943, "learning_rate": 9.689997729336967e-05, "loss": 1.2256, "step": 5670 }, { "epoch": 0.32, "grad_norm": 1.2960147857666016, "learning_rate": 9.689430063578565e-05, "loss": 1.2246, "step": 5680 }, { "epoch": 0.32, "grad_norm": 0.9271606206893921, "learning_rate": 9.688862397820164e-05, "loss": 1.2086, "step": 5690 }, { "epoch": 0.32, "grad_norm": 0.8849117755889893, "learning_rate": 9.688294732061762e-05, "loss": 1.209, "step": 5700 }, { "epoch": 0.32, "grad_norm": 0.9681224226951599, "learning_rate": 9.687727066303361e-05, "loss": 1.2121, "step": 5710 }, { "epoch": 0.32, "grad_norm": 0.9691559672355652, "learning_rate": 9.687159400544959e-05, "loss": 1.2058, "step": 5720 }, { "epoch": 0.32, "grad_norm": 1.0708140134811401, "learning_rate": 9.686591734786559e-05, "loss": 1.2344, "step": 5730 }, { "epoch": 0.33, "grad_norm": 0.9036786556243896, "learning_rate": 9.686024069028157e-05, "loss": 1.2286, "step": 5740 }, { "epoch": 0.33, "grad_norm": 0.9920363426208496, "learning_rate": 9.685456403269755e-05, "loss": 1.193, "step": 5750 }, { "epoch": 0.33, "grad_norm": 0.9138022065162659, "learning_rate": 9.684888737511354e-05, "loss": 1.2343, "step": 5760 }, { "epoch": 0.33, "grad_norm": 0.993645966053009, "learning_rate": 9.684321071752952e-05, "loss": 1.218, "step": 5770 }, { "epoch": 0.33, "grad_norm": 0.9414227604866028, "learning_rate": 9.683753405994551e-05, "loss": 1.2135, "step": 5780 }, { "epoch": 0.33, "grad_norm": 0.9698729515075684, "learning_rate": 9.683185740236149e-05, "loss": 1.205, "step": 5790 }, { "epoch": 0.33, "grad_norm": 0.9810023903846741, "learning_rate": 9.682618074477749e-05, "loss": 1.2262, "step": 5800 }, { "epoch": 0.33, "grad_norm": 0.9742171764373779, "learning_rate": 9.682050408719347e-05, "loss": 1.1811, "step": 5810 }, { "epoch": 0.33, "grad_norm": 1.013816237449646, "learning_rate": 9.681482742960946e-05, "loss": 1.2217, "step": 5820 }, { "epoch": 0.33, "grad_norm": 0.9264166355133057, "learning_rate": 9.680915077202544e-05, "loss": 1.2466, "step": 5830 }, { "epoch": 0.33, "grad_norm": 0.9824050068855286, "learning_rate": 9.680347411444142e-05, "loss": 1.1987, "step": 5840 }, { "epoch": 0.33, "grad_norm": 1.004480242729187, "learning_rate": 9.679779745685741e-05, "loss": 1.2092, "step": 5850 }, { "epoch": 0.33, "grad_norm": 0.9058519005775452, "learning_rate": 9.67921207992734e-05, "loss": 1.2044, "step": 5860 }, { "epoch": 0.33, "grad_norm": 0.9383645057678223, "learning_rate": 9.678644414168939e-05, "loss": 1.2061, "step": 5870 }, { "epoch": 0.33, "grad_norm": 1.008074402809143, "learning_rate": 9.678076748410537e-05, "loss": 1.1986, "step": 5880 }, { "epoch": 0.33, "grad_norm": 0.9307847619056702, "learning_rate": 9.677509082652135e-05, "loss": 1.1771, "step": 5890 }, { "epoch": 0.33, "grad_norm": 1.007521390914917, "learning_rate": 9.676941416893733e-05, "loss": 1.2027, "step": 5900 }, { "epoch": 0.34, "grad_norm": 0.9314125180244446, "learning_rate": 9.676373751135332e-05, "loss": 1.2086, "step": 5910 }, { "epoch": 0.34, "grad_norm": 0.969463050365448, "learning_rate": 9.67580608537693e-05, "loss": 1.1974, "step": 5920 }, { "epoch": 0.34, "grad_norm": 0.9360992312431335, "learning_rate": 9.675238419618528e-05, "loss": 1.1817, "step": 5930 }, { "epoch": 0.34, "grad_norm": 1.0412075519561768, "learning_rate": 9.674670753860127e-05, "loss": 1.2116, "step": 5940 }, { "epoch": 0.34, "grad_norm": 1.011391520500183, "learning_rate": 9.674103088101725e-05, "loss": 1.2086, "step": 5950 }, { "epoch": 0.34, "grad_norm": 0.9717627763748169, "learning_rate": 9.673535422343325e-05, "loss": 1.2368, "step": 5960 }, { "epoch": 0.34, "grad_norm": 0.8991236090660095, "learning_rate": 9.672967756584923e-05, "loss": 1.2324, "step": 5970 }, { "epoch": 0.34, "grad_norm": 0.9216236472129822, "learning_rate": 9.672400090826522e-05, "loss": 1.2134, "step": 5980 }, { "epoch": 0.34, "grad_norm": 1.0368611812591553, "learning_rate": 9.67183242506812e-05, "loss": 1.1791, "step": 5990 }, { "epoch": 0.34, "grad_norm": 0.9596677422523499, "learning_rate": 9.67126475930972e-05, "loss": 1.212, "step": 6000 }, { "epoch": 0.34, "grad_norm": 1.0157121419906616, "learning_rate": 9.670697093551317e-05, "loss": 1.2269, "step": 6010 }, { "epoch": 0.34, "grad_norm": 0.9914820194244385, "learning_rate": 9.670129427792915e-05, "loss": 1.2151, "step": 6020 }, { "epoch": 0.34, "grad_norm": 0.9580597877502441, "learning_rate": 9.669561762034515e-05, "loss": 1.2114, "step": 6030 }, { "epoch": 0.34, "grad_norm": 0.9770675301551819, "learning_rate": 9.668994096276113e-05, "loss": 1.2037, "step": 6040 }, { "epoch": 0.34, "grad_norm": 0.9368177652359009, "learning_rate": 9.668426430517712e-05, "loss": 1.2194, "step": 6050 }, { "epoch": 0.34, "grad_norm": 0.9760795831680298, "learning_rate": 9.66785876475931e-05, "loss": 1.1955, "step": 6060 }, { "epoch": 0.34, "grad_norm": 0.9341722130775452, "learning_rate": 9.66729109900091e-05, "loss": 1.1955, "step": 6070 }, { "epoch": 0.34, "grad_norm": 1.589320421218872, "learning_rate": 9.666723433242507e-05, "loss": 1.1883, "step": 6080 }, { "epoch": 0.35, "grad_norm": 1.0216847658157349, "learning_rate": 9.666155767484107e-05, "loss": 1.2105, "step": 6090 }, { "epoch": 0.35, "grad_norm": 1.0454490184783936, "learning_rate": 9.665588101725705e-05, "loss": 1.2017, "step": 6100 }, { "epoch": 0.35, "grad_norm": 0.9043789505958557, "learning_rate": 9.665020435967303e-05, "loss": 1.1985, "step": 6110 }, { "epoch": 0.35, "grad_norm": 0.9450429081916809, "learning_rate": 9.664452770208902e-05, "loss": 1.2118, "step": 6120 }, { "epoch": 0.35, "grad_norm": 0.9617353677749634, "learning_rate": 9.6638851044505e-05, "loss": 1.1819, "step": 6130 }, { "epoch": 0.35, "grad_norm": 0.8988189697265625, "learning_rate": 9.663317438692098e-05, "loss": 1.1763, "step": 6140 }, { "epoch": 0.35, "grad_norm": 1.0122365951538086, "learning_rate": 9.662749772933698e-05, "loss": 1.2052, "step": 6150 }, { "epoch": 0.35, "grad_norm": 0.9477173089981079, "learning_rate": 9.662182107175296e-05, "loss": 1.2115, "step": 6160 }, { "epoch": 0.35, "grad_norm": 0.892354428768158, "learning_rate": 9.661614441416893e-05, "loss": 1.231, "step": 6170 }, { "epoch": 0.35, "grad_norm": 1.0030262470245361, "learning_rate": 9.661046775658493e-05, "loss": 1.2043, "step": 6180 }, { "epoch": 0.35, "grad_norm": 0.975794792175293, "learning_rate": 9.660479109900091e-05, "loss": 1.214, "step": 6190 }, { "epoch": 0.35, "grad_norm": 1.0083255767822266, "learning_rate": 9.659911444141689e-05, "loss": 1.2073, "step": 6200 }, { "epoch": 0.35, "grad_norm": 0.9600338339805603, "learning_rate": 9.659343778383288e-05, "loss": 1.1972, "step": 6210 }, { "epoch": 0.35, "grad_norm": 0.9312694072723389, "learning_rate": 9.658776112624886e-05, "loss": 1.2027, "step": 6220 }, { "epoch": 0.35, "grad_norm": 1.0393730401992798, "learning_rate": 9.658208446866486e-05, "loss": 1.1864, "step": 6230 }, { "epoch": 0.35, "grad_norm": 0.9344609975814819, "learning_rate": 9.657640781108084e-05, "loss": 1.2092, "step": 6240 }, { "epoch": 0.35, "grad_norm": 0.9619960784912109, "learning_rate": 9.657073115349683e-05, "loss": 1.2028, "step": 6250 }, { "epoch": 0.35, "grad_norm": 1.2705780267715454, "learning_rate": 9.656505449591281e-05, "loss": 1.2087, "step": 6260 }, { "epoch": 0.36, "grad_norm": 1.0109564065933228, "learning_rate": 9.65593778383288e-05, "loss": 1.2058, "step": 6270 }, { "epoch": 0.36, "grad_norm": 0.959254264831543, "learning_rate": 9.655370118074478e-05, "loss": 1.2098, "step": 6280 }, { "epoch": 0.36, "grad_norm": 0.9391984343528748, "learning_rate": 9.654802452316076e-05, "loss": 1.2006, "step": 6290 }, { "epoch": 0.36, "grad_norm": 0.9803739786148071, "learning_rate": 9.654234786557676e-05, "loss": 1.2161, "step": 6300 }, { "epoch": 0.36, "grad_norm": 0.9044747352600098, "learning_rate": 9.653667120799274e-05, "loss": 1.2218, "step": 6310 }, { "epoch": 0.36, "grad_norm": 0.9341353178024292, "learning_rate": 9.653099455040873e-05, "loss": 1.2113, "step": 6320 }, { "epoch": 0.36, "grad_norm": 1.0371884107589722, "learning_rate": 9.652531789282471e-05, "loss": 1.1963, "step": 6330 }, { "epoch": 0.36, "grad_norm": 0.9064238667488098, "learning_rate": 9.65196412352407e-05, "loss": 1.2015, "step": 6340 }, { "epoch": 0.36, "grad_norm": 1.009519338607788, "learning_rate": 9.651396457765668e-05, "loss": 1.196, "step": 6350 }, { "epoch": 0.36, "grad_norm": 0.9547874927520752, "learning_rate": 9.650828792007268e-05, "loss": 1.2, "step": 6360 }, { "epoch": 0.36, "grad_norm": 0.945046067237854, "learning_rate": 9.650261126248866e-05, "loss": 1.2111, "step": 6370 }, { "epoch": 0.36, "grad_norm": 0.98404461145401, "learning_rate": 9.649693460490464e-05, "loss": 1.1894, "step": 6380 }, { "epoch": 0.36, "grad_norm": 0.9261896014213562, "learning_rate": 9.649125794732063e-05, "loss": 1.2089, "step": 6390 }, { "epoch": 0.36, "grad_norm": 0.9798639416694641, "learning_rate": 9.648558128973661e-05, "loss": 1.2277, "step": 6400 }, { "epoch": 0.36, "grad_norm": 0.970360517501831, "learning_rate": 9.647990463215259e-05, "loss": 1.1943, "step": 6410 }, { "epoch": 0.36, "grad_norm": 1.0123772621154785, "learning_rate": 9.647422797456857e-05, "loss": 1.207, "step": 6420 }, { "epoch": 0.36, "grad_norm": 0.9619002342224121, "learning_rate": 9.646855131698456e-05, "loss": 1.2121, "step": 6430 }, { "epoch": 0.37, "grad_norm": 0.9245018362998962, "learning_rate": 9.646287465940054e-05, "loss": 1.2037, "step": 6440 }, { "epoch": 0.37, "grad_norm": 0.9852418899536133, "learning_rate": 9.645719800181654e-05, "loss": 1.2097, "step": 6450 }, { "epoch": 0.37, "grad_norm": 0.9481627345085144, "learning_rate": 9.645152134423252e-05, "loss": 1.1729, "step": 6460 }, { "epoch": 0.37, "grad_norm": 0.9178306460380554, "learning_rate": 9.64458446866485e-05, "loss": 1.2055, "step": 6470 }, { "epoch": 0.37, "grad_norm": 0.9035511016845703, "learning_rate": 9.644016802906449e-05, "loss": 1.2364, "step": 6480 }, { "epoch": 0.37, "grad_norm": 1.0383960008621216, "learning_rate": 9.643449137148047e-05, "loss": 1.1763, "step": 6490 }, { "epoch": 0.37, "grad_norm": 0.9368672966957092, "learning_rate": 9.642881471389646e-05, "loss": 1.2373, "step": 6500 }, { "epoch": 0.37, "grad_norm": 0.9455004930496216, "learning_rate": 9.642313805631244e-05, "loss": 1.1938, "step": 6510 }, { "epoch": 0.37, "grad_norm": 1.0616569519042969, "learning_rate": 9.641746139872844e-05, "loss": 1.2174, "step": 6520 }, { "epoch": 0.37, "grad_norm": 1.0085340738296509, "learning_rate": 9.641178474114442e-05, "loss": 1.1765, "step": 6530 }, { "epoch": 0.37, "grad_norm": 0.9946673512458801, "learning_rate": 9.640610808356041e-05, "loss": 1.198, "step": 6540 }, { "epoch": 0.37, "grad_norm": 0.8995618224143982, "learning_rate": 9.640043142597639e-05, "loss": 1.184, "step": 6550 }, { "epoch": 0.37, "grad_norm": 0.9680418968200684, "learning_rate": 9.639475476839237e-05, "loss": 1.2069, "step": 6560 }, { "epoch": 0.37, "grad_norm": 0.9700941443443298, "learning_rate": 9.638907811080836e-05, "loss": 1.2309, "step": 6570 }, { "epoch": 0.37, "grad_norm": 1.018792748451233, "learning_rate": 9.638340145322434e-05, "loss": 1.2212, "step": 6580 }, { "epoch": 0.37, "grad_norm": 0.902732789516449, "learning_rate": 9.637772479564034e-05, "loss": 1.1708, "step": 6590 }, { "epoch": 0.37, "grad_norm": 1.0059360265731812, "learning_rate": 9.637204813805632e-05, "loss": 1.1925, "step": 6600 }, { "epoch": 0.37, "grad_norm": 0.9479530453681946, "learning_rate": 9.636637148047231e-05, "loss": 1.1929, "step": 6610 }, { "epoch": 0.38, "grad_norm": 0.9971259832382202, "learning_rate": 9.636069482288829e-05, "loss": 1.2065, "step": 6620 }, { "epoch": 0.38, "grad_norm": 1.028159260749817, "learning_rate": 9.635501816530428e-05, "loss": 1.1913, "step": 6630 }, { "epoch": 0.38, "grad_norm": 0.9953494071960449, "learning_rate": 9.634934150772026e-05, "loss": 1.1913, "step": 6640 }, { "epoch": 0.38, "grad_norm": 0.9348169565200806, "learning_rate": 9.634366485013624e-05, "loss": 1.2012, "step": 6650 }, { "epoch": 0.38, "grad_norm": 1.0567855834960938, "learning_rate": 9.633798819255222e-05, "loss": 1.2074, "step": 6660 }, { "epoch": 0.38, "grad_norm": 0.9314650893211365, "learning_rate": 9.633231153496822e-05, "loss": 1.1911, "step": 6670 }, { "epoch": 0.38, "grad_norm": 0.9716767072677612, "learning_rate": 9.63266348773842e-05, "loss": 1.2043, "step": 6680 }, { "epoch": 0.38, "grad_norm": 0.9639903903007507, "learning_rate": 9.632095821980018e-05, "loss": 1.1951, "step": 6690 }, { "epoch": 0.38, "grad_norm": 0.9613707661628723, "learning_rate": 9.631528156221617e-05, "loss": 1.2059, "step": 6700 }, { "epoch": 0.38, "grad_norm": 1.0148128271102905, "learning_rate": 9.630960490463215e-05, "loss": 1.2102, "step": 6710 }, { "epoch": 0.38, "grad_norm": 0.94298255443573, "learning_rate": 9.630392824704814e-05, "loss": 1.1922, "step": 6720 }, { "epoch": 0.38, "grad_norm": 1.003740906715393, "learning_rate": 9.629825158946412e-05, "loss": 1.2025, "step": 6730 }, { "epoch": 0.38, "grad_norm": 0.977586030960083, "learning_rate": 9.62925749318801e-05, "loss": 1.1821, "step": 6740 }, { "epoch": 0.38, "grad_norm": 0.9562227129936218, "learning_rate": 9.62868982742961e-05, "loss": 1.1948, "step": 6750 }, { "epoch": 0.38, "grad_norm": 0.9349279999732971, "learning_rate": 9.628122161671208e-05, "loss": 1.2066, "step": 6760 }, { "epoch": 0.38, "grad_norm": 0.9206972122192383, "learning_rate": 9.627554495912807e-05, "loss": 1.1877, "step": 6770 }, { "epoch": 0.38, "grad_norm": 0.9736289381980896, "learning_rate": 9.626986830154405e-05, "loss": 1.1861, "step": 6780 }, { "epoch": 0.39, "grad_norm": 0.9641215801239014, "learning_rate": 9.626419164396005e-05, "loss": 1.1864, "step": 6790 }, { "epoch": 0.39, "grad_norm": 0.9817414283752441, "learning_rate": 9.625851498637603e-05, "loss": 1.1882, "step": 6800 }, { "epoch": 0.39, "grad_norm": 0.9583709836006165, "learning_rate": 9.625283832879202e-05, "loss": 1.1706, "step": 6810 }, { "epoch": 0.39, "grad_norm": 0.876564621925354, "learning_rate": 9.6247161671208e-05, "loss": 1.2117, "step": 6820 }, { "epoch": 0.39, "grad_norm": 1.05660080909729, "learning_rate": 9.624148501362398e-05, "loss": 1.2145, "step": 6830 }, { "epoch": 0.39, "grad_norm": 0.9505444765090942, "learning_rate": 9.623580835603997e-05, "loss": 1.193, "step": 6840 }, { "epoch": 0.39, "grad_norm": 0.9954424500465393, "learning_rate": 9.623013169845595e-05, "loss": 1.1813, "step": 6850 }, { "epoch": 0.39, "grad_norm": 0.9649336338043213, "learning_rate": 9.622445504087195e-05, "loss": 1.1765, "step": 6860 }, { "epoch": 0.39, "grad_norm": 0.9583410024642944, "learning_rate": 9.621877838328793e-05, "loss": 1.1897, "step": 6870 }, { "epoch": 0.39, "grad_norm": 0.9787657856941223, "learning_rate": 9.621310172570392e-05, "loss": 1.1947, "step": 6880 }, { "epoch": 0.39, "grad_norm": 0.9325912594795227, "learning_rate": 9.62074250681199e-05, "loss": 1.1955, "step": 6890 }, { "epoch": 0.39, "grad_norm": 0.9560771584510803, "learning_rate": 9.620174841053589e-05, "loss": 1.1906, "step": 6900 }, { "epoch": 0.39, "grad_norm": 1.083975911140442, "learning_rate": 9.619607175295187e-05, "loss": 1.2224, "step": 6910 }, { "epoch": 0.39, "grad_norm": 1.0300383567810059, "learning_rate": 9.619039509536785e-05, "loss": 1.1914, "step": 6920 }, { "epoch": 0.39, "grad_norm": 1.0193012952804565, "learning_rate": 9.618471843778383e-05, "loss": 1.2062, "step": 6930 }, { "epoch": 0.39, "grad_norm": 0.9946755170822144, "learning_rate": 9.617904178019981e-05, "loss": 1.2013, "step": 6940 }, { "epoch": 0.39, "grad_norm": 0.9372959136962891, "learning_rate": 9.61733651226158e-05, "loss": 1.1974, "step": 6950 }, { "epoch": 0.39, "grad_norm": 0.9849737882614136, "learning_rate": 9.616768846503179e-05, "loss": 1.2059, "step": 6960 }, { "epoch": 0.4, "grad_norm": 0.9735512137413025, "learning_rate": 9.616201180744778e-05, "loss": 1.1845, "step": 6970 }, { "epoch": 0.4, "grad_norm": 1.048280119895935, "learning_rate": 9.615633514986376e-05, "loss": 1.1946, "step": 6980 }, { "epoch": 0.4, "grad_norm": 0.9489299654960632, "learning_rate": 9.615065849227975e-05, "loss": 1.1856, "step": 6990 }, { "epoch": 0.4, "grad_norm": 1.0619897842407227, "learning_rate": 9.614498183469573e-05, "loss": 1.1874, "step": 7000 }, { "epoch": 0.4, "grad_norm": 1.0205270051956177, "learning_rate": 9.613930517711171e-05, "loss": 1.2166, "step": 7010 }, { "epoch": 0.4, "grad_norm": 0.9465163350105286, "learning_rate": 9.61336285195277e-05, "loss": 1.1804, "step": 7020 }, { "epoch": 0.4, "grad_norm": 0.969622015953064, "learning_rate": 9.612795186194369e-05, "loss": 1.2032, "step": 7030 }, { "epoch": 0.4, "grad_norm": 1.024182915687561, "learning_rate": 9.612227520435968e-05, "loss": 1.2344, "step": 7040 }, { "epoch": 0.4, "grad_norm": 0.9870299100875854, "learning_rate": 9.611659854677566e-05, "loss": 1.2062, "step": 7050 }, { "epoch": 0.4, "grad_norm": 0.9293481707572937, "learning_rate": 9.611092188919165e-05, "loss": 1.1856, "step": 7060 }, { "epoch": 0.4, "grad_norm": 0.9442258477210999, "learning_rate": 9.610524523160763e-05, "loss": 1.2149, "step": 7070 }, { "epoch": 0.4, "grad_norm": 1.0450546741485596, "learning_rate": 9.609956857402363e-05, "loss": 1.1811, "step": 7080 }, { "epoch": 0.4, "grad_norm": 0.9862436056137085, "learning_rate": 9.60938919164396e-05, "loss": 1.2017, "step": 7090 }, { "epoch": 0.4, "grad_norm": 0.9870041608810425, "learning_rate": 9.608821525885559e-05, "loss": 1.1802, "step": 7100 }, { "epoch": 0.4, "grad_norm": 0.9738830924034119, "learning_rate": 9.608253860127158e-05, "loss": 1.211, "step": 7110 }, { "epoch": 0.4, "grad_norm": 1.020713210105896, "learning_rate": 9.607686194368756e-05, "loss": 1.1898, "step": 7120 }, { "epoch": 0.4, "grad_norm": 1.0476644039154053, "learning_rate": 9.607118528610355e-05, "loss": 1.1619, "step": 7130 }, { "epoch": 0.4, "grad_norm": 0.9442864656448364, "learning_rate": 9.606550862851953e-05, "loss": 1.1993, "step": 7140 }, { "epoch": 0.41, "grad_norm": 0.8963363170623779, "learning_rate": 9.605983197093553e-05, "loss": 1.2007, "step": 7150 }, { "epoch": 0.41, "grad_norm": 1.0123989582061768, "learning_rate": 9.605415531335151e-05, "loss": 1.2219, "step": 7160 }, { "epoch": 0.41, "grad_norm": 0.9460508823394775, "learning_rate": 9.604847865576749e-05, "loss": 1.2341, "step": 7170 }, { "epoch": 0.41, "grad_norm": 0.9552776217460632, "learning_rate": 9.604280199818348e-05, "loss": 1.1892, "step": 7180 }, { "epoch": 0.41, "grad_norm": 0.9770448803901672, "learning_rate": 9.603712534059946e-05, "loss": 1.2168, "step": 7190 }, { "epoch": 0.41, "grad_norm": 0.9128080606460571, "learning_rate": 9.603144868301544e-05, "loss": 1.1958, "step": 7200 }, { "epoch": 0.41, "grad_norm": 0.9979309439659119, "learning_rate": 9.602577202543142e-05, "loss": 1.1664, "step": 7210 }, { "epoch": 0.41, "grad_norm": 0.970644474029541, "learning_rate": 9.602009536784741e-05, "loss": 1.18, "step": 7220 }, { "epoch": 0.41, "grad_norm": 0.9744210243225098, "learning_rate": 9.60144187102634e-05, "loss": 1.1965, "step": 7230 }, { "epoch": 0.41, "grad_norm": 0.9579272866249084, "learning_rate": 9.600874205267939e-05, "loss": 1.1807, "step": 7240 }, { "epoch": 0.41, "grad_norm": 0.9710751175880432, "learning_rate": 9.600306539509537e-05, "loss": 1.1887, "step": 7250 }, { "epoch": 0.41, "grad_norm": 0.939027726650238, "learning_rate": 9.599738873751136e-05, "loss": 1.1958, "step": 7260 }, { "epoch": 0.41, "grad_norm": 0.9103739857673645, "learning_rate": 9.599171207992734e-05, "loss": 1.2173, "step": 7270 }, { "epoch": 0.41, "grad_norm": 1.0057470798492432, "learning_rate": 9.598603542234332e-05, "loss": 1.1917, "step": 7280 }, { "epoch": 0.41, "grad_norm": 0.9669437408447266, "learning_rate": 9.598035876475931e-05, "loss": 1.1857, "step": 7290 }, { "epoch": 0.41, "grad_norm": 0.9418566226959229, "learning_rate": 9.59746821071753e-05, "loss": 1.1688, "step": 7300 }, { "epoch": 0.41, "grad_norm": 0.9476494193077087, "learning_rate": 9.596900544959129e-05, "loss": 1.1767, "step": 7310 }, { "epoch": 0.42, "grad_norm": 1.0473417043685913, "learning_rate": 9.596332879200727e-05, "loss": 1.1803, "step": 7320 }, { "epoch": 0.42, "grad_norm": 0.9744994640350342, "learning_rate": 9.595765213442326e-05, "loss": 1.1918, "step": 7330 }, { "epoch": 0.42, "grad_norm": 1.0140069723129272, "learning_rate": 9.595197547683924e-05, "loss": 1.1796, "step": 7340 }, { "epoch": 0.42, "grad_norm": 0.9530301690101624, "learning_rate": 9.594629881925523e-05, "loss": 1.2113, "step": 7350 }, { "epoch": 0.42, "grad_norm": 0.9623181819915771, "learning_rate": 9.594062216167121e-05, "loss": 1.1998, "step": 7360 }, { "epoch": 0.42, "grad_norm": 1.007694125175476, "learning_rate": 9.593494550408721e-05, "loss": 1.2068, "step": 7370 }, { "epoch": 0.42, "grad_norm": 0.9598998427391052, "learning_rate": 9.592926884650319e-05, "loss": 1.1969, "step": 7380 }, { "epoch": 0.42, "grad_norm": 0.9235202670097351, "learning_rate": 9.592359218891917e-05, "loss": 1.1737, "step": 7390 }, { "epoch": 0.42, "grad_norm": 1.044347882270813, "learning_rate": 9.591791553133516e-05, "loss": 1.1799, "step": 7400 }, { "epoch": 0.42, "grad_norm": 0.8898642063140869, "learning_rate": 9.591223887375114e-05, "loss": 1.2098, "step": 7410 }, { "epoch": 0.42, "grad_norm": 1.0139752626419067, "learning_rate": 9.590656221616714e-05, "loss": 1.1784, "step": 7420 }, { "epoch": 0.42, "grad_norm": 1.036505937576294, "learning_rate": 9.590088555858312e-05, "loss": 1.1719, "step": 7430 }, { "epoch": 0.42, "grad_norm": 1.042585849761963, "learning_rate": 9.58952089009991e-05, "loss": 1.2029, "step": 7440 }, { "epoch": 0.42, "grad_norm": 1.003122329711914, "learning_rate": 9.588953224341507e-05, "loss": 1.1513, "step": 7450 }, { "epoch": 0.42, "grad_norm": 0.9696173667907715, "learning_rate": 9.588385558583107e-05, "loss": 1.1796, "step": 7460 }, { "epoch": 0.42, "grad_norm": 0.9967196583747864, "learning_rate": 9.587817892824705e-05, "loss": 1.1861, "step": 7470 }, { "epoch": 0.42, "grad_norm": 1.0102577209472656, "learning_rate": 9.587250227066303e-05, "loss": 1.2014, "step": 7480 }, { "epoch": 0.42, "grad_norm": 0.9935798048973083, "learning_rate": 9.586682561307902e-05, "loss": 1.1972, "step": 7490 }, { "epoch": 0.43, "grad_norm": 0.9446285963058472, "learning_rate": 9.5861148955495e-05, "loss": 1.1885, "step": 7500 }, { "epoch": 0.43, "grad_norm": 0.9883187413215637, "learning_rate": 9.5855472297911e-05, "loss": 1.1942, "step": 7510 }, { "epoch": 0.43, "grad_norm": 0.9677265286445618, "learning_rate": 9.584979564032698e-05, "loss": 1.2083, "step": 7520 }, { "epoch": 0.43, "grad_norm": 0.9447225332260132, "learning_rate": 9.584411898274297e-05, "loss": 1.1712, "step": 7530 }, { "epoch": 0.43, "grad_norm": 1.00504469871521, "learning_rate": 9.583844232515895e-05, "loss": 1.1845, "step": 7540 }, { "epoch": 0.43, "grad_norm": 0.9601876735687256, "learning_rate": 9.583276566757494e-05, "loss": 1.1812, "step": 7550 }, { "epoch": 0.43, "grad_norm": 0.9935469031333923, "learning_rate": 9.582708900999092e-05, "loss": 1.1777, "step": 7560 }, { "epoch": 0.43, "grad_norm": 1.0669876337051392, "learning_rate": 9.58214123524069e-05, "loss": 1.1754, "step": 7570 }, { "epoch": 0.43, "grad_norm": 1.0002832412719727, "learning_rate": 9.58157356948229e-05, "loss": 1.2002, "step": 7580 }, { "epoch": 0.43, "grad_norm": 0.9820768237113953, "learning_rate": 9.581005903723888e-05, "loss": 1.1966, "step": 7590 }, { "epoch": 0.43, "grad_norm": 1.0214369297027588, "learning_rate": 9.580438237965487e-05, "loss": 1.1972, "step": 7600 }, { "epoch": 0.43, "grad_norm": 1.0456993579864502, "learning_rate": 9.579870572207085e-05, "loss": 1.1918, "step": 7610 }, { "epoch": 0.43, "grad_norm": 0.9424342513084412, "learning_rate": 9.579302906448684e-05, "loss": 1.1561, "step": 7620 }, { "epoch": 0.43, "grad_norm": 0.9495410323143005, "learning_rate": 9.578735240690282e-05, "loss": 1.1806, "step": 7630 }, { "epoch": 0.43, "grad_norm": 0.9518994092941284, "learning_rate": 9.578167574931882e-05, "loss": 1.1736, "step": 7640 }, { "epoch": 0.43, "grad_norm": 1.0274769067764282, "learning_rate": 9.57759990917348e-05, "loss": 1.1783, "step": 7650 }, { "epoch": 0.43, "grad_norm": 1.0021309852600098, "learning_rate": 9.577032243415078e-05, "loss": 1.1593, "step": 7660 }, { "epoch": 0.43, "grad_norm": 0.9454078078269958, "learning_rate": 9.576464577656677e-05, "loss": 1.1629, "step": 7670 }, { "epoch": 0.44, "grad_norm": 0.9240717887878418, "learning_rate": 9.575896911898275e-05, "loss": 1.2129, "step": 7680 }, { "epoch": 0.44, "grad_norm": 0.9865195155143738, "learning_rate": 9.575329246139873e-05, "loss": 1.2169, "step": 7690 }, { "epoch": 0.44, "grad_norm": 1.0080214738845825, "learning_rate": 9.574761580381472e-05, "loss": 1.1668, "step": 7700 }, { "epoch": 0.44, "grad_norm": 0.9322482347488403, "learning_rate": 9.57419391462307e-05, "loss": 1.1759, "step": 7710 }, { "epoch": 0.44, "grad_norm": 0.9918451309204102, "learning_rate": 9.573626248864668e-05, "loss": 1.1781, "step": 7720 }, { "epoch": 0.44, "grad_norm": 1.017480492591858, "learning_rate": 9.573058583106268e-05, "loss": 1.1441, "step": 7730 }, { "epoch": 0.44, "grad_norm": 0.9910601377487183, "learning_rate": 9.572490917347866e-05, "loss": 1.2011, "step": 7740 }, { "epoch": 0.44, "grad_norm": 0.9818063378334045, "learning_rate": 9.571923251589464e-05, "loss": 1.1457, "step": 7750 }, { "epoch": 0.44, "grad_norm": 0.9974391460418701, "learning_rate": 9.571355585831063e-05, "loss": 1.1904, "step": 7760 }, { "epoch": 0.44, "grad_norm": 0.9524908065795898, "learning_rate": 9.570787920072661e-05, "loss": 1.1711, "step": 7770 }, { "epoch": 0.44, "grad_norm": 1.0547486543655396, "learning_rate": 9.57022025431426e-05, "loss": 1.1968, "step": 7780 }, { "epoch": 0.44, "grad_norm": 1.0281764268875122, "learning_rate": 9.569652588555858e-05, "loss": 1.1508, "step": 7790 }, { "epoch": 0.44, "grad_norm": 0.9502371549606323, "learning_rate": 9.569084922797458e-05, "loss": 1.1916, "step": 7800 }, { "epoch": 0.44, "grad_norm": 1.0216362476348877, "learning_rate": 9.568517257039056e-05, "loss": 1.1936, "step": 7810 }, { "epoch": 0.44, "grad_norm": 1.0183351039886475, "learning_rate": 9.567949591280655e-05, "loss": 1.1803, "step": 7820 }, { "epoch": 0.44, "grad_norm": 0.9858915209770203, "learning_rate": 9.567381925522253e-05, "loss": 1.1926, "step": 7830 }, { "epoch": 0.44, "grad_norm": 1.0373444557189941, "learning_rate": 9.566814259763851e-05, "loss": 1.1749, "step": 7840 }, { "epoch": 0.45, "grad_norm": 1.0373564958572388, "learning_rate": 9.56624659400545e-05, "loss": 1.1888, "step": 7850 }, { "epoch": 0.45, "grad_norm": 1.027773141860962, "learning_rate": 9.565678928247048e-05, "loss": 1.1503, "step": 7860 }, { "epoch": 0.45, "grad_norm": 1.0240588188171387, "learning_rate": 9.565111262488648e-05, "loss": 1.1611, "step": 7870 }, { "epoch": 0.45, "grad_norm": 0.9764520525932312, "learning_rate": 9.564543596730246e-05, "loss": 1.1806, "step": 7880 }, { "epoch": 0.45, "grad_norm": 1.013033390045166, "learning_rate": 9.563975930971845e-05, "loss": 1.1729, "step": 7890 }, { "epoch": 0.45, "grad_norm": 0.924264132976532, "learning_rate": 9.563408265213443e-05, "loss": 1.1818, "step": 7900 }, { "epoch": 0.45, "grad_norm": 1.045018196105957, "learning_rate": 9.562840599455042e-05, "loss": 1.2013, "step": 7910 }, { "epoch": 0.45, "grad_norm": 0.9599833488464355, "learning_rate": 9.56227293369664e-05, "loss": 1.1506, "step": 7920 }, { "epoch": 0.45, "grad_norm": 0.9783795475959778, "learning_rate": 9.561705267938238e-05, "loss": 1.186, "step": 7930 }, { "epoch": 0.45, "grad_norm": 0.960241436958313, "learning_rate": 9.561137602179838e-05, "loss": 1.2077, "step": 7940 }, { "epoch": 0.45, "grad_norm": 0.9935620427131653, "learning_rate": 9.560569936421436e-05, "loss": 1.1623, "step": 7950 }, { "epoch": 0.45, "grad_norm": 0.956610381603241, "learning_rate": 9.560002270663034e-05, "loss": 1.1829, "step": 7960 }, { "epoch": 0.45, "grad_norm": 0.9784103631973267, "learning_rate": 9.559434604904632e-05, "loss": 1.2054, "step": 7970 }, { "epoch": 0.45, "grad_norm": 1.0240082740783691, "learning_rate": 9.558866939146231e-05, "loss": 1.192, "step": 7980 }, { "epoch": 0.45, "grad_norm": 0.9975481629371643, "learning_rate": 9.558299273387829e-05, "loss": 1.1629, "step": 7990 }, { "epoch": 0.45, "grad_norm": 0.9417718052864075, "learning_rate": 9.557731607629428e-05, "loss": 1.1764, "step": 8000 }, { "epoch": 0.45, "grad_norm": 1.017586350440979, "learning_rate": 9.557163941871026e-05, "loss": 1.1717, "step": 8010 }, { "epoch": 0.45, "grad_norm": 0.8856431841850281, "learning_rate": 9.556596276112624e-05, "loss": 1.2125, "step": 8020 }, { "epoch": 0.46, "grad_norm": 1.0078914165496826, "learning_rate": 9.556028610354224e-05, "loss": 1.1551, "step": 8030 }, { "epoch": 0.46, "grad_norm": 1.0039008855819702, "learning_rate": 9.555460944595822e-05, "loss": 1.1759, "step": 8040 }, { "epoch": 0.46, "grad_norm": 0.9318314790725708, "learning_rate": 9.554893278837421e-05, "loss": 1.1866, "step": 8050 }, { "epoch": 0.46, "grad_norm": 0.9513652920722961, "learning_rate": 9.554325613079019e-05, "loss": 1.1628, "step": 8060 }, { "epoch": 0.46, "grad_norm": 0.9595157504081726, "learning_rate": 9.553757947320619e-05, "loss": 1.1819, "step": 8070 }, { "epoch": 0.46, "grad_norm": 0.9132928848266602, "learning_rate": 9.553190281562216e-05, "loss": 1.1931, "step": 8080 }, { "epoch": 0.46, "grad_norm": 0.9688674807548523, "learning_rate": 9.552622615803816e-05, "loss": 1.188, "step": 8090 }, { "epoch": 0.46, "grad_norm": 1.0429368019104004, "learning_rate": 9.552054950045414e-05, "loss": 1.1668, "step": 8100 }, { "epoch": 0.46, "grad_norm": 0.9210917949676514, "learning_rate": 9.551487284287012e-05, "loss": 1.1734, "step": 8110 }, { "epoch": 0.46, "grad_norm": 0.9803915619850159, "learning_rate": 9.550919618528611e-05, "loss": 1.1762, "step": 8120 }, { "epoch": 0.46, "grad_norm": 0.9634639620780945, "learning_rate": 9.550351952770209e-05, "loss": 1.2012, "step": 8130 }, { "epoch": 0.46, "grad_norm": 0.9697930216789246, "learning_rate": 9.549784287011809e-05, "loss": 1.1904, "step": 8140 }, { "epoch": 0.46, "grad_norm": 0.9612547755241394, "learning_rate": 9.549216621253407e-05, "loss": 1.1726, "step": 8150 }, { "epoch": 0.46, "grad_norm": 0.9414181709289551, "learning_rate": 9.548648955495006e-05, "loss": 1.1723, "step": 8160 }, { "epoch": 0.46, "grad_norm": 0.9059590101242065, "learning_rate": 9.548081289736604e-05, "loss": 1.2029, "step": 8170 }, { "epoch": 0.46, "grad_norm": 0.9577644467353821, "learning_rate": 9.547513623978203e-05, "loss": 1.1849, "step": 8180 }, { "epoch": 0.46, "grad_norm": 0.8785077333450317, "learning_rate": 9.546945958219801e-05, "loss": 1.1739, "step": 8190 }, { "epoch": 0.46, "grad_norm": 0.9505687952041626, "learning_rate": 9.546378292461399e-05, "loss": 1.1859, "step": 8200 }, { "epoch": 0.47, "grad_norm": 1.0428407192230225, "learning_rate": 9.545810626702997e-05, "loss": 1.1668, "step": 8210 }, { "epoch": 0.47, "grad_norm": 1.0240697860717773, "learning_rate": 9.545242960944597e-05, "loss": 1.1641, "step": 8220 }, { "epoch": 0.47, "grad_norm": 0.9424143433570862, "learning_rate": 9.544675295186195e-05, "loss": 1.1798, "step": 8230 }, { "epoch": 0.47, "grad_norm": 0.9830737113952637, "learning_rate": 9.544107629427793e-05, "loss": 1.1716, "step": 8240 }, { "epoch": 0.47, "grad_norm": 0.9692713022232056, "learning_rate": 9.543539963669392e-05, "loss": 1.1772, "step": 8250 }, { "epoch": 0.47, "grad_norm": 0.9477458596229553, "learning_rate": 9.54297229791099e-05, "loss": 1.2004, "step": 8260 }, { "epoch": 0.47, "grad_norm": 0.931882917881012, "learning_rate": 9.542404632152589e-05, "loss": 1.1737, "step": 8270 }, { "epoch": 0.47, "grad_norm": 1.0921721458435059, "learning_rate": 9.541836966394187e-05, "loss": 1.1852, "step": 8280 }, { "epoch": 0.47, "grad_norm": 1.0063748359680176, "learning_rate": 9.541269300635785e-05, "loss": 1.1533, "step": 8290 }, { "epoch": 0.47, "grad_norm": 1.0849257707595825, "learning_rate": 9.540701634877385e-05, "loss": 1.1762, "step": 8300 }, { "epoch": 0.47, "grad_norm": 0.9301006197929382, "learning_rate": 9.540133969118983e-05, "loss": 1.1757, "step": 8310 }, { "epoch": 0.47, "grad_norm": 1.0004489421844482, "learning_rate": 9.539566303360582e-05, "loss": 1.1748, "step": 8320 }, { "epoch": 0.47, "grad_norm": 0.9480199217796326, "learning_rate": 9.53899863760218e-05, "loss": 1.1873, "step": 8330 }, { "epoch": 0.47, "grad_norm": 1.0081839561462402, "learning_rate": 9.538430971843779e-05, "loss": 1.1679, "step": 8340 }, { "epoch": 0.47, "grad_norm": 0.9619608521461487, "learning_rate": 9.537863306085377e-05, "loss": 1.1903, "step": 8350 }, { "epoch": 0.47, "grad_norm": 0.9807209372520447, "learning_rate": 9.537295640326977e-05, "loss": 1.1734, "step": 8360 }, { "epoch": 0.47, "grad_norm": 0.9918584823608398, "learning_rate": 9.536727974568575e-05, "loss": 1.1722, "step": 8370 }, { "epoch": 0.48, "grad_norm": 1.0077104568481445, "learning_rate": 9.536160308810173e-05, "loss": 1.2017, "step": 8380 }, { "epoch": 0.48, "grad_norm": 1.005751609802246, "learning_rate": 9.535592643051772e-05, "loss": 1.1581, "step": 8390 }, { "epoch": 0.48, "grad_norm": 1.0079874992370605, "learning_rate": 9.53502497729337e-05, "loss": 1.1829, "step": 8400 }, { "epoch": 0.48, "grad_norm": 0.9474384784698486, "learning_rate": 9.53445731153497e-05, "loss": 1.1793, "step": 8410 }, { "epoch": 0.48, "grad_norm": 0.9797519445419312, "learning_rate": 9.533889645776567e-05, "loss": 1.1957, "step": 8420 }, { "epoch": 0.48, "grad_norm": 0.9810649752616882, "learning_rate": 9.533321980018167e-05, "loss": 1.1448, "step": 8430 }, { "epoch": 0.48, "grad_norm": 0.9460911750793457, "learning_rate": 9.532754314259765e-05, "loss": 1.1547, "step": 8440 }, { "epoch": 0.48, "grad_norm": 0.9836834669113159, "learning_rate": 9.532186648501363e-05, "loss": 1.1641, "step": 8450 }, { "epoch": 0.48, "grad_norm": 0.9666243195533752, "learning_rate": 9.531618982742962e-05, "loss": 1.1475, "step": 8460 }, { "epoch": 0.48, "grad_norm": 1.0395878553390503, "learning_rate": 9.53105131698456e-05, "loss": 1.1912, "step": 8470 }, { "epoch": 0.48, "grad_norm": 1.1445064544677734, "learning_rate": 9.530483651226158e-05, "loss": 1.1846, "step": 8480 }, { "epoch": 0.48, "grad_norm": 0.9115649461746216, "learning_rate": 9.529915985467756e-05, "loss": 1.1906, "step": 8490 }, { "epoch": 0.48, "grad_norm": 0.9402968287467957, "learning_rate": 9.529348319709355e-05, "loss": 1.1803, "step": 8500 }, { "epoch": 0.48, "grad_norm": 0.937130331993103, "learning_rate": 9.528780653950953e-05, "loss": 1.1538, "step": 8510 }, { "epoch": 0.48, "grad_norm": 0.9884362816810608, "learning_rate": 9.528212988192553e-05, "loss": 1.1691, "step": 8520 }, { "epoch": 0.48, "grad_norm": 1.0198137760162354, "learning_rate": 9.527645322434151e-05, "loss": 1.1517, "step": 8530 }, { "epoch": 0.48, "grad_norm": 1.0019257068634033, "learning_rate": 9.52707765667575e-05, "loss": 1.1644, "step": 8540 }, { "epoch": 0.48, "grad_norm": 1.0177230834960938, "learning_rate": 9.526509990917348e-05, "loss": 1.1979, "step": 8550 }, { "epoch": 0.49, "grad_norm": 0.9867101907730103, "learning_rate": 9.525942325158946e-05, "loss": 1.1458, "step": 8560 }, { "epoch": 0.49, "grad_norm": 0.9933324456214905, "learning_rate": 9.525374659400545e-05, "loss": 1.1709, "step": 8570 }, { "epoch": 0.49, "grad_norm": 0.9596146941184998, "learning_rate": 9.524806993642143e-05, "loss": 1.1522, "step": 8580 }, { "epoch": 0.49, "grad_norm": 0.9486159682273865, "learning_rate": 9.524239327883743e-05, "loss": 1.1664, "step": 8590 }, { "epoch": 0.49, "grad_norm": 1.0523295402526855, "learning_rate": 9.523671662125341e-05, "loss": 1.1563, "step": 8600 }, { "epoch": 0.49, "grad_norm": 0.9608811736106873, "learning_rate": 9.52310399636694e-05, "loss": 1.1621, "step": 8610 }, { "epoch": 0.49, "grad_norm": 1.0165441036224365, "learning_rate": 9.522536330608538e-05, "loss": 1.1751, "step": 8620 }, { "epoch": 0.49, "grad_norm": 0.9358837604522705, "learning_rate": 9.521968664850137e-05, "loss": 1.1737, "step": 8630 }, { "epoch": 0.49, "grad_norm": 1.177914023399353, "learning_rate": 9.521400999091735e-05, "loss": 1.1875, "step": 8640 }, { "epoch": 0.49, "grad_norm": 0.9603959321975708, "learning_rate": 9.520833333333333e-05, "loss": 1.184, "step": 8650 }, { "epoch": 0.49, "grad_norm": 0.9914448857307434, "learning_rate": 9.520265667574933e-05, "loss": 1.1543, "step": 8660 }, { "epoch": 0.49, "grad_norm": 0.9682386517524719, "learning_rate": 9.519698001816531e-05, "loss": 1.1911, "step": 8670 }, { "epoch": 0.49, "grad_norm": 0.9031766057014465, "learning_rate": 9.51913033605813e-05, "loss": 1.1774, "step": 8680 }, { "epoch": 0.49, "grad_norm": 0.9875689744949341, "learning_rate": 9.518562670299728e-05, "loss": 1.1486, "step": 8690 }, { "epoch": 0.49, "grad_norm": 0.9976431131362915, "learning_rate": 9.517995004541328e-05, "loss": 1.1585, "step": 8700 }, { "epoch": 0.49, "grad_norm": 1.0555423498153687, "learning_rate": 9.517427338782926e-05, "loss": 1.1754, "step": 8710 }, { "epoch": 0.49, "grad_norm": 0.9725318551063538, "learning_rate": 9.516859673024523e-05, "loss": 1.1781, "step": 8720 }, { "epoch": 0.5, "grad_norm": 0.9613460302352905, "learning_rate": 9.516292007266121e-05, "loss": 1.2012, "step": 8730 }, { "epoch": 0.5, "grad_norm": 0.922593355178833, "learning_rate": 9.515724341507721e-05, "loss": 1.153, "step": 8740 }, { "epoch": 0.5, "grad_norm": 1.004565954208374, "learning_rate": 9.515156675749319e-05, "loss": 1.1717, "step": 8750 }, { "epoch": 0.5, "grad_norm": 0.9826274514198303, "learning_rate": 9.514589009990917e-05, "loss": 1.1656, "step": 8760 }, { "epoch": 0.5, "grad_norm": 0.9717330932617188, "learning_rate": 9.514021344232516e-05, "loss": 1.188, "step": 8770 }, { "epoch": 0.5, "grad_norm": 1.0705103874206543, "learning_rate": 9.513453678474114e-05, "loss": 1.1603, "step": 8780 }, { "epoch": 0.5, "grad_norm": 0.9652580618858337, "learning_rate": 9.512886012715714e-05, "loss": 1.1595, "step": 8790 }, { "epoch": 0.5, "grad_norm": 0.9758157134056091, "learning_rate": 9.512318346957312e-05, "loss": 1.1689, "step": 8800 }, { "epoch": 0.5, "grad_norm": 0.9781925082206726, "learning_rate": 9.511750681198911e-05, "loss": 1.1666, "step": 8810 }, { "epoch": 0.5, "grad_norm": 0.9680438041687012, "learning_rate": 9.511183015440509e-05, "loss": 1.1661, "step": 8820 }, { "epoch": 0.5, "grad_norm": 1.0028702020645142, "learning_rate": 9.510615349682107e-05, "loss": 1.1683, "step": 8830 }, { "epoch": 0.5, "grad_norm": 0.9377083778381348, "learning_rate": 9.510047683923706e-05, "loss": 1.1757, "step": 8840 }, { "epoch": 0.5, "grad_norm": 1.0374672412872314, "learning_rate": 9.509480018165304e-05, "loss": 1.1838, "step": 8850 }, { "epoch": 0.5, "grad_norm": 0.940727949142456, "learning_rate": 9.508912352406904e-05, "loss": 1.1485, "step": 8860 }, { "epoch": 0.5, "grad_norm": 0.9429153800010681, "learning_rate": 9.508344686648502e-05, "loss": 1.1797, "step": 8870 }, { "epoch": 0.5, "grad_norm": 0.950762152671814, "learning_rate": 9.507777020890101e-05, "loss": 1.1945, "step": 8880 }, { "epoch": 0.5, "grad_norm": 0.9927104115486145, "learning_rate": 9.507209355131699e-05, "loss": 1.1765, "step": 8890 }, { "epoch": 0.5, "grad_norm": 0.9556518793106079, "learning_rate": 9.506641689373298e-05, "loss": 1.1837, "step": 8900 }, { "epoch": 0.51, "grad_norm": 0.9867491126060486, "learning_rate": 9.506074023614896e-05, "loss": 1.1654, "step": 8910 }, { "epoch": 0.51, "grad_norm": 0.9314953684806824, "learning_rate": 9.505506357856494e-05, "loss": 1.148, "step": 8920 }, { "epoch": 0.51, "grad_norm": 0.9624485969543457, "learning_rate": 9.504938692098094e-05, "loss": 1.1579, "step": 8930 }, { "epoch": 0.51, "grad_norm": 0.9782319664955139, "learning_rate": 9.504371026339692e-05, "loss": 1.1735, "step": 8940 }, { "epoch": 0.51, "grad_norm": 1.0383247137069702, "learning_rate": 9.503803360581291e-05, "loss": 1.1569, "step": 8950 }, { "epoch": 0.51, "grad_norm": 0.989554762840271, "learning_rate": 9.503235694822889e-05, "loss": 1.1796, "step": 8960 }, { "epoch": 0.51, "grad_norm": 0.9254682064056396, "learning_rate": 9.502668029064487e-05, "loss": 1.1778, "step": 8970 }, { "epoch": 0.51, "grad_norm": 0.9924626350402832, "learning_rate": 9.502100363306086e-05, "loss": 1.1613, "step": 8980 }, { "epoch": 0.51, "grad_norm": 1.0077202320098877, "learning_rate": 9.501532697547684e-05, "loss": 1.1731, "step": 8990 }, { "epoch": 0.51, "grad_norm": 0.944053053855896, "learning_rate": 9.500965031789282e-05, "loss": 1.1842, "step": 9000 }, { "epoch": 0.51, "grad_norm": 0.9369684457778931, "learning_rate": 9.50039736603088e-05, "loss": 1.1787, "step": 9010 }, { "epoch": 0.51, "grad_norm": 0.9352824687957764, "learning_rate": 9.49982970027248e-05, "loss": 1.1834, "step": 9020 }, { "epoch": 0.51, "grad_norm": 0.9312922358512878, "learning_rate": 9.499262034514078e-05, "loss": 1.1736, "step": 9030 }, { "epoch": 0.51, "grad_norm": 0.9691975116729736, "learning_rate": 9.498694368755677e-05, "loss": 1.1845, "step": 9040 }, { "epoch": 0.51, "grad_norm": 0.9689831733703613, "learning_rate": 9.498126702997275e-05, "loss": 1.1765, "step": 9050 }, { "epoch": 0.51, "grad_norm": 1.0389455556869507, "learning_rate": 9.497559037238874e-05, "loss": 1.1603, "step": 9060 }, { "epoch": 0.51, "grad_norm": 0.9843589663505554, "learning_rate": 9.496991371480472e-05, "loss": 1.1693, "step": 9070 }, { "epoch": 0.51, "grad_norm": 0.9837065935134888, "learning_rate": 9.496423705722072e-05, "loss": 1.1546, "step": 9080 }, { "epoch": 0.52, "grad_norm": 0.9470362067222595, "learning_rate": 9.49585603996367e-05, "loss": 1.1666, "step": 9090 }, { "epoch": 0.52, "grad_norm": 1.0865381956100464, "learning_rate": 9.495288374205268e-05, "loss": 1.1795, "step": 9100 }, { "epoch": 0.52, "grad_norm": 0.996343731880188, "learning_rate": 9.494720708446867e-05, "loss": 1.1391, "step": 9110 }, { "epoch": 0.52, "grad_norm": 1.0870699882507324, "learning_rate": 9.494153042688465e-05, "loss": 1.1549, "step": 9120 }, { "epoch": 0.52, "grad_norm": 0.9308897852897644, "learning_rate": 9.493585376930064e-05, "loss": 1.1707, "step": 9130 }, { "epoch": 0.52, "grad_norm": 0.9566501975059509, "learning_rate": 9.493017711171662e-05, "loss": 1.1765, "step": 9140 }, { "epoch": 0.52, "grad_norm": 0.9714122414588928, "learning_rate": 9.492450045413262e-05, "loss": 1.1831, "step": 9150 }, { "epoch": 0.52, "grad_norm": 0.9222943782806396, "learning_rate": 9.49188237965486e-05, "loss": 1.1524, "step": 9160 }, { "epoch": 0.52, "grad_norm": 0.9806659817695618, "learning_rate": 9.491314713896459e-05, "loss": 1.1753, "step": 9170 }, { "epoch": 0.52, "grad_norm": 0.9818117618560791, "learning_rate": 9.490747048138057e-05, "loss": 1.1711, "step": 9180 }, { "epoch": 0.52, "grad_norm": 0.9587385058403015, "learning_rate": 9.490179382379655e-05, "loss": 1.1839, "step": 9190 }, { "epoch": 0.52, "grad_norm": 0.9912527799606323, "learning_rate": 9.489611716621254e-05, "loss": 1.1581, "step": 9200 }, { "epoch": 0.52, "grad_norm": 0.9589125514030457, "learning_rate": 9.489044050862852e-05, "loss": 1.162, "step": 9210 }, { "epoch": 0.52, "grad_norm": 1.0010898113250732, "learning_rate": 9.488476385104452e-05, "loss": 1.1605, "step": 9220 }, { "epoch": 0.52, "grad_norm": 0.9928580522537231, "learning_rate": 9.48790871934605e-05, "loss": 1.1732, "step": 9230 }, { "epoch": 0.52, "grad_norm": 0.9527132511138916, "learning_rate": 9.487341053587648e-05, "loss": 1.154, "step": 9240 }, { "epoch": 0.52, "grad_norm": 1.0326685905456543, "learning_rate": 9.486773387829246e-05, "loss": 1.1546, "step": 9250 }, { "epoch": 0.53, "grad_norm": 0.9382330179214478, "learning_rate": 9.486205722070845e-05, "loss": 1.1512, "step": 9260 }, { "epoch": 0.53, "grad_norm": 0.9664940237998962, "learning_rate": 9.485638056312443e-05, "loss": 1.1545, "step": 9270 }, { "epoch": 0.53, "grad_norm": 1.043082356452942, "learning_rate": 9.485070390554042e-05, "loss": 1.1209, "step": 9280 }, { "epoch": 0.53, "grad_norm": 1.0264842510223389, "learning_rate": 9.48450272479564e-05, "loss": 1.1592, "step": 9290 }, { "epoch": 0.53, "grad_norm": 0.9739967584609985, "learning_rate": 9.483935059037238e-05, "loss": 1.1657, "step": 9300 }, { "epoch": 0.53, "grad_norm": 0.9986457228660583, "learning_rate": 9.483367393278838e-05, "loss": 1.149, "step": 9310 }, { "epoch": 0.53, "grad_norm": 0.9587652087211609, "learning_rate": 9.482799727520436e-05, "loss": 1.1501, "step": 9320 }, { "epoch": 0.53, "grad_norm": 1.0092830657958984, "learning_rate": 9.482232061762035e-05, "loss": 1.1966, "step": 9330 }, { "epoch": 0.53, "grad_norm": 1.0331405401229858, "learning_rate": 9.481664396003633e-05, "loss": 1.1393, "step": 9340 }, { "epoch": 0.53, "grad_norm": 0.9400890469551086, "learning_rate": 9.481096730245233e-05, "loss": 1.1464, "step": 9350 }, { "epoch": 0.53, "grad_norm": 0.9825927019119263, "learning_rate": 9.48052906448683e-05, "loss": 1.1861, "step": 9360 }, { "epoch": 0.53, "grad_norm": 0.9908943176269531, "learning_rate": 9.47996139872843e-05, "loss": 1.1549, "step": 9370 }, { "epoch": 0.53, "grad_norm": 0.9715225696563721, "learning_rate": 9.479393732970028e-05, "loss": 1.1693, "step": 9380 }, { "epoch": 0.53, "grad_norm": 1.013728380203247, "learning_rate": 9.478826067211626e-05, "loss": 1.1559, "step": 9390 }, { "epoch": 0.53, "grad_norm": 1.0478010177612305, "learning_rate": 9.478258401453225e-05, "loss": 1.1622, "step": 9400 }, { "epoch": 0.53, "grad_norm": 0.9791405200958252, "learning_rate": 9.477690735694823e-05, "loss": 1.1397, "step": 9410 }, { "epoch": 0.53, "grad_norm": 1.036456823348999, "learning_rate": 9.477123069936423e-05, "loss": 1.146, "step": 9420 }, { "epoch": 0.53, "grad_norm": 0.9669798612594604, "learning_rate": 9.47655540417802e-05, "loss": 1.148, "step": 9430 }, { "epoch": 0.54, "grad_norm": 1.0143091678619385, "learning_rate": 9.47598773841962e-05, "loss": 1.1436, "step": 9440 }, { "epoch": 0.54, "grad_norm": 0.9751670360565186, "learning_rate": 9.475420072661218e-05, "loss": 1.1431, "step": 9450 }, { "epoch": 0.54, "grad_norm": 1.0133594274520874, "learning_rate": 9.474852406902817e-05, "loss": 1.134, "step": 9460 }, { "epoch": 0.54, "grad_norm": 0.980779230594635, "learning_rate": 9.474284741144415e-05, "loss": 1.1758, "step": 9470 }, { "epoch": 0.54, "grad_norm": 1.0387659072875977, "learning_rate": 9.473717075386013e-05, "loss": 1.1578, "step": 9480 }, { "epoch": 0.54, "grad_norm": 1.0302485227584839, "learning_rate": 9.473149409627611e-05, "loss": 1.1702, "step": 9490 }, { "epoch": 0.54, "grad_norm": 1.0035736560821533, "learning_rate": 9.47258174386921e-05, "loss": 1.1518, "step": 9500 }, { "epoch": 0.54, "grad_norm": 0.9978141188621521, "learning_rate": 9.472014078110809e-05, "loss": 1.1512, "step": 9510 }, { "epoch": 0.54, "grad_norm": 0.9570597410202026, "learning_rate": 9.471446412352407e-05, "loss": 1.1393, "step": 9520 }, { "epoch": 0.54, "grad_norm": 1.005622386932373, "learning_rate": 9.470878746594006e-05, "loss": 1.159, "step": 9530 }, { "epoch": 0.54, "grad_norm": 0.9532175064086914, "learning_rate": 9.470311080835604e-05, "loss": 1.1629, "step": 9540 }, { "epoch": 0.54, "grad_norm": 0.9887433648109436, "learning_rate": 9.469743415077203e-05, "loss": 1.1923, "step": 9550 }, { "epoch": 0.54, "grad_norm": 0.9804301857948303, "learning_rate": 9.469175749318801e-05, "loss": 1.1657, "step": 9560 }, { "epoch": 0.54, "grad_norm": 1.0526161193847656, "learning_rate": 9.468608083560399e-05, "loss": 1.1551, "step": 9570 }, { "epoch": 0.54, "grad_norm": 0.9017874002456665, "learning_rate": 9.468040417801999e-05, "loss": 1.1848, "step": 9580 }, { "epoch": 0.54, "grad_norm": 1.0008541345596313, "learning_rate": 9.467472752043597e-05, "loss": 1.1575, "step": 9590 }, { "epoch": 0.54, "grad_norm": 1.0143426656723022, "learning_rate": 9.466905086285196e-05, "loss": 1.1968, "step": 9600 }, { "epoch": 0.54, "grad_norm": 0.9785216450691223, "learning_rate": 9.466337420526794e-05, "loss": 1.1507, "step": 9610 }, { "epoch": 0.55, "grad_norm": 1.0327850580215454, "learning_rate": 9.465769754768393e-05, "loss": 1.1724, "step": 9620 }, { "epoch": 0.55, "grad_norm": 0.9984729290008545, "learning_rate": 9.465202089009991e-05, "loss": 1.158, "step": 9630 }, { "epoch": 0.55, "grad_norm": 0.9387264847755432, "learning_rate": 9.46463442325159e-05, "loss": 1.174, "step": 9640 }, { "epoch": 0.55, "grad_norm": 1.1018481254577637, "learning_rate": 9.464066757493189e-05, "loss": 1.1858, "step": 9650 }, { "epoch": 0.55, "grad_norm": 0.9828536510467529, "learning_rate": 9.463499091734787e-05, "loss": 1.1698, "step": 9660 }, { "epoch": 0.55, "grad_norm": 1.0482219457626343, "learning_rate": 9.462931425976386e-05, "loss": 1.1526, "step": 9670 }, { "epoch": 0.55, "grad_norm": 0.9513322710990906, "learning_rate": 9.462363760217984e-05, "loss": 1.1745, "step": 9680 }, { "epoch": 0.55, "grad_norm": 1.0229015350341797, "learning_rate": 9.461796094459583e-05, "loss": 1.1414, "step": 9690 }, { "epoch": 0.55, "grad_norm": 1.0006420612335205, "learning_rate": 9.461228428701181e-05, "loss": 1.169, "step": 9700 }, { "epoch": 0.55, "grad_norm": 0.9120140075683594, "learning_rate": 9.460660762942781e-05, "loss": 1.1611, "step": 9710 }, { "epoch": 0.55, "grad_norm": 1.1213351488113403, "learning_rate": 9.460093097184379e-05, "loss": 1.1601, "step": 9720 }, { "epoch": 0.55, "grad_norm": 1.0536513328552246, "learning_rate": 9.459525431425977e-05, "loss": 1.1771, "step": 9730 }, { "epoch": 0.55, "grad_norm": 0.9669517874717712, "learning_rate": 9.458957765667576e-05, "loss": 1.1622, "step": 9740 }, { "epoch": 0.55, "grad_norm": 0.9467039108276367, "learning_rate": 9.458390099909174e-05, "loss": 1.1502, "step": 9750 }, { "epoch": 0.55, "grad_norm": 0.9067078232765198, "learning_rate": 9.457822434150772e-05, "loss": 1.1401, "step": 9760 }, { "epoch": 0.55, "grad_norm": 0.9830605983734131, "learning_rate": 9.45725476839237e-05, "loss": 1.1366, "step": 9770 }, { "epoch": 0.55, "grad_norm": 0.9391667246818542, "learning_rate": 9.45668710263397e-05, "loss": 1.1469, "step": 9780 }, { "epoch": 0.56, "grad_norm": 0.9808118343353271, "learning_rate": 9.456119436875567e-05, "loss": 1.1839, "step": 9790 }, { "epoch": 0.56, "grad_norm": 0.9593546390533447, "learning_rate": 9.455551771117167e-05, "loss": 1.163, "step": 9800 }, { "epoch": 0.56, "grad_norm": 0.9397660493850708, "learning_rate": 9.454984105358765e-05, "loss": 1.1583, "step": 9810 }, { "epoch": 0.56, "grad_norm": 1.0307159423828125, "learning_rate": 9.454416439600364e-05, "loss": 1.1437, "step": 9820 }, { "epoch": 0.56, "grad_norm": 0.9391475915908813, "learning_rate": 9.453848773841962e-05, "loss": 1.1516, "step": 9830 }, { "epoch": 0.56, "grad_norm": 1.0255663394927979, "learning_rate": 9.45328110808356e-05, "loss": 1.1576, "step": 9840 }, { "epoch": 0.56, "grad_norm": 0.9064210653305054, "learning_rate": 9.45271344232516e-05, "loss": 1.1472, "step": 9850 }, { "epoch": 0.56, "grad_norm": 1.007863998413086, "learning_rate": 9.452145776566757e-05, "loss": 1.1699, "step": 9860 }, { "epoch": 0.56, "grad_norm": 0.9377842545509338, "learning_rate": 9.451578110808357e-05, "loss": 1.15, "step": 9870 }, { "epoch": 0.56, "grad_norm": 0.9416317939758301, "learning_rate": 9.451010445049955e-05, "loss": 1.1255, "step": 9880 }, { "epoch": 0.56, "grad_norm": 1.015101432800293, "learning_rate": 9.450442779291554e-05, "loss": 1.1653, "step": 9890 }, { "epoch": 0.56, "grad_norm": 0.961009681224823, "learning_rate": 9.449875113533152e-05, "loss": 1.1645, "step": 9900 }, { "epoch": 0.56, "grad_norm": 0.9696190357208252, "learning_rate": 9.449307447774751e-05, "loss": 1.1472, "step": 9910 }, { "epoch": 0.56, "grad_norm": 0.976368248462677, "learning_rate": 9.44873978201635e-05, "loss": 1.1688, "step": 9920 }, { "epoch": 0.56, "grad_norm": 0.9839035868644714, "learning_rate": 9.448172116257947e-05, "loss": 1.1426, "step": 9930 }, { "epoch": 0.56, "grad_norm": 1.0188510417938232, "learning_rate": 9.447604450499547e-05, "loss": 1.1683, "step": 9940 }, { "epoch": 0.56, "grad_norm": 0.9617220163345337, "learning_rate": 9.447036784741145e-05, "loss": 1.1469, "step": 9950 }, { "epoch": 0.56, "grad_norm": 0.9511649012565613, "learning_rate": 9.446469118982744e-05, "loss": 1.1759, "step": 9960 }, { "epoch": 0.57, "grad_norm": 0.9769867658615112, "learning_rate": 9.445901453224342e-05, "loss": 1.1767, "step": 9970 }, { "epoch": 0.57, "grad_norm": 1.041967749595642, "learning_rate": 9.445333787465942e-05, "loss": 1.1291, "step": 9980 }, { "epoch": 0.57, "grad_norm": 0.9762113094329834, "learning_rate": 9.44476612170754e-05, "loss": 1.1607, "step": 9990 }, { "epoch": 0.57, "grad_norm": 1.00654935836792, "learning_rate": 9.444198455949137e-05, "loss": 1.1645, "step": 10000 }, { "epoch": 0.57, "grad_norm": 0.9714931845664978, "learning_rate": 9.443630790190735e-05, "loss": 1.1621, "step": 10010 }, { "epoch": 0.57, "grad_norm": 0.9587466716766357, "learning_rate": 9.443063124432335e-05, "loss": 1.1721, "step": 10020 }, { "epoch": 0.57, "grad_norm": 0.9643003344535828, "learning_rate": 9.442495458673933e-05, "loss": 1.1511, "step": 10030 }, { "epoch": 0.57, "grad_norm": 0.948980987071991, "learning_rate": 9.441927792915531e-05, "loss": 1.1653, "step": 10040 }, { "epoch": 0.57, "grad_norm": 0.9937887191772461, "learning_rate": 9.44136012715713e-05, "loss": 1.1367, "step": 10050 }, { "epoch": 0.57, "grad_norm": 1.022507667541504, "learning_rate": 9.440792461398728e-05, "loss": 1.1607, "step": 10060 }, { "epoch": 0.57, "grad_norm": 1.0031214952468872, "learning_rate": 9.440224795640328e-05, "loss": 1.1449, "step": 10070 }, { "epoch": 0.57, "grad_norm": 0.9895460605621338, "learning_rate": 9.439657129881926e-05, "loss": 1.1394, "step": 10080 }, { "epoch": 0.57, "grad_norm": 1.0605047941207886, "learning_rate": 9.439089464123525e-05, "loss": 1.1469, "step": 10090 }, { "epoch": 0.57, "grad_norm": 1.033028483390808, "learning_rate": 9.438521798365123e-05, "loss": 1.1629, "step": 10100 }, { "epoch": 0.57, "grad_norm": 1.08809494972229, "learning_rate": 9.437954132606721e-05, "loss": 1.1466, "step": 10110 }, { "epoch": 0.57, "grad_norm": 1.0141315460205078, "learning_rate": 9.43744323342416e-05, "loss": 1.1666, "step": 10120 }, { "epoch": 0.57, "grad_norm": 0.9543845057487488, "learning_rate": 9.436875567665759e-05, "loss": 1.1585, "step": 10130 }, { "epoch": 0.57, "grad_norm": 0.9781716465950012, "learning_rate": 9.436307901907357e-05, "loss": 1.1502, "step": 10140 }, { "epoch": 0.58, "grad_norm": 1.0163137912750244, "learning_rate": 9.435740236148955e-05, "loss": 1.1611, "step": 10150 }, { "epoch": 0.58, "grad_norm": 0.973810076713562, "learning_rate": 9.435172570390554e-05, "loss": 1.1765, "step": 10160 }, { "epoch": 0.58, "grad_norm": 1.0479021072387695, "learning_rate": 9.434604904632152e-05, "loss": 1.1459, "step": 10170 }, { "epoch": 0.58, "grad_norm": 1.0721889734268188, "learning_rate": 9.434037238873752e-05, "loss": 1.1688, "step": 10180 }, { "epoch": 0.58, "grad_norm": 0.9661620259284973, "learning_rate": 9.43346957311535e-05, "loss": 1.1357, "step": 10190 }, { "epoch": 0.58, "grad_norm": 1.0208302736282349, "learning_rate": 9.432901907356949e-05, "loss": 1.1449, "step": 10200 }, { "epoch": 0.58, "grad_norm": 1.0438979864120483, "learning_rate": 9.432334241598547e-05, "loss": 1.1592, "step": 10210 }, { "epoch": 0.58, "grad_norm": 0.9835490584373474, "learning_rate": 9.431766575840145e-05, "loss": 1.1235, "step": 10220 }, { "epoch": 0.58, "grad_norm": 0.9627678990364075, "learning_rate": 9.431198910081744e-05, "loss": 1.1412, "step": 10230 }, { "epoch": 0.58, "grad_norm": 1.0610265731811523, "learning_rate": 9.430631244323342e-05, "loss": 1.1691, "step": 10240 }, { "epoch": 0.58, "grad_norm": 0.9271091818809509, "learning_rate": 9.430063578564942e-05, "loss": 1.1554, "step": 10250 }, { "epoch": 0.58, "grad_norm": 0.9506227970123291, "learning_rate": 9.42949591280654e-05, "loss": 1.1544, "step": 10260 }, { "epoch": 0.58, "grad_norm": 0.9379181861877441, "learning_rate": 9.428928247048139e-05, "loss": 1.1564, "step": 10270 }, { "epoch": 0.58, "grad_norm": 0.9782284498214722, "learning_rate": 9.428360581289737e-05, "loss": 1.1564, "step": 10280 }, { "epoch": 0.58, "grad_norm": 1.0011905431747437, "learning_rate": 9.427792915531336e-05, "loss": 1.1636, "step": 10290 }, { "epoch": 0.58, "grad_norm": 0.99295973777771, "learning_rate": 9.427225249772934e-05, "loss": 1.1727, "step": 10300 }, { "epoch": 0.58, "grad_norm": 0.9479188323020935, "learning_rate": 9.426657584014534e-05, "loss": 1.1599, "step": 10310 }, { "epoch": 0.59, "grad_norm": 0.9435503482818604, "learning_rate": 9.426089918256132e-05, "loss": 1.1628, "step": 10320 }, { "epoch": 0.59, "grad_norm": 0.9509761333465576, "learning_rate": 9.42552225249773e-05, "loss": 1.1439, "step": 10330 }, { "epoch": 0.59, "grad_norm": 1.088510274887085, "learning_rate": 9.424954586739329e-05, "loss": 1.1381, "step": 10340 }, { "epoch": 0.59, "grad_norm": 0.9839029312133789, "learning_rate": 9.424386920980927e-05, "loss": 1.1326, "step": 10350 }, { "epoch": 0.59, "grad_norm": 0.9487875699996948, "learning_rate": 9.423819255222525e-05, "loss": 1.1597, "step": 10360 }, { "epoch": 0.59, "grad_norm": 1.0027797222137451, "learning_rate": 9.423251589464124e-05, "loss": 1.1372, "step": 10370 }, { "epoch": 0.59, "grad_norm": 0.927510678768158, "learning_rate": 9.422683923705722e-05, "loss": 1.1479, "step": 10380 }, { "epoch": 0.59, "grad_norm": 0.9859471917152405, "learning_rate": 9.42211625794732e-05, "loss": 1.1326, "step": 10390 }, { "epoch": 0.59, "grad_norm": 0.9949334263801575, "learning_rate": 9.42154859218892e-05, "loss": 1.1783, "step": 10400 }, { "epoch": 0.59, "grad_norm": 0.9241623282432556, "learning_rate": 9.420980926430518e-05, "loss": 1.1432, "step": 10410 }, { "epoch": 0.59, "grad_norm": 0.9705997109413147, "learning_rate": 9.420413260672116e-05, "loss": 1.1629, "step": 10420 }, { "epoch": 0.59, "grad_norm": 0.9613255262374878, "learning_rate": 9.419845594913715e-05, "loss": 1.1823, "step": 10430 }, { "epoch": 0.59, "grad_norm": 1.0240172147750854, "learning_rate": 9.419277929155313e-05, "loss": 1.1486, "step": 10440 }, { "epoch": 0.59, "grad_norm": 1.011834740638733, "learning_rate": 9.418710263396912e-05, "loss": 1.1551, "step": 10450 }, { "epoch": 0.59, "grad_norm": 1.0249963998794556, "learning_rate": 9.41814259763851e-05, "loss": 1.1576, "step": 10460 }, { "epoch": 0.59, "grad_norm": 1.0104044675827026, "learning_rate": 9.41757493188011e-05, "loss": 1.1752, "step": 10470 }, { "epoch": 0.59, "grad_norm": 1.0388237237930298, "learning_rate": 9.417007266121708e-05, "loss": 1.141, "step": 10480 }, { "epoch": 0.59, "grad_norm": 1.0170191526412964, "learning_rate": 9.416439600363307e-05, "loss": 1.1785, "step": 10490 }, { "epoch": 0.6, "grad_norm": 1.0371077060699463, "learning_rate": 9.415871934604905e-05, "loss": 1.1434, "step": 10500 }, { "epoch": 0.6, "grad_norm": 1.0628021955490112, "learning_rate": 9.415304268846503e-05, "loss": 1.157, "step": 10510 }, { "epoch": 0.6, "grad_norm": 0.9731179475784302, "learning_rate": 9.414736603088103e-05, "loss": 1.1616, "step": 10520 }, { "epoch": 0.6, "grad_norm": 1.032436728477478, "learning_rate": 9.4141689373297e-05, "loss": 1.1722, "step": 10530 }, { "epoch": 0.6, "grad_norm": 0.9102173447608948, "learning_rate": 9.4136012715713e-05, "loss": 1.1397, "step": 10540 }, { "epoch": 0.6, "grad_norm": 0.9598124027252197, "learning_rate": 9.413033605812898e-05, "loss": 1.1563, "step": 10550 }, { "epoch": 0.6, "grad_norm": 0.8975786566734314, "learning_rate": 9.412465940054497e-05, "loss": 1.1388, "step": 10560 }, { "epoch": 0.6, "grad_norm": 0.9144327044487, "learning_rate": 9.411898274296095e-05, "loss": 1.1489, "step": 10570 }, { "epoch": 0.6, "grad_norm": 0.9712315201759338, "learning_rate": 9.411330608537695e-05, "loss": 1.1682, "step": 10580 }, { "epoch": 0.6, "grad_norm": 1.0741809606552124, "learning_rate": 9.410762942779293e-05, "loss": 1.1481, "step": 10590 }, { "epoch": 0.6, "grad_norm": 1.1330828666687012, "learning_rate": 9.41019527702089e-05, "loss": 1.1462, "step": 10600 }, { "epoch": 0.6, "grad_norm": 0.9552538990974426, "learning_rate": 9.40962761126249e-05, "loss": 1.1573, "step": 10610 }, { "epoch": 0.6, "grad_norm": 0.9816569089889526, "learning_rate": 9.409059945504088e-05, "loss": 1.1335, "step": 10620 }, { "epoch": 0.6, "grad_norm": 0.9989336729049683, "learning_rate": 9.408492279745686e-05, "loss": 1.139, "step": 10630 }, { "epoch": 0.6, "grad_norm": 0.9570315480232239, "learning_rate": 9.407924613987284e-05, "loss": 1.1628, "step": 10640 }, { "epoch": 0.6, "grad_norm": 0.9327189326286316, "learning_rate": 9.407356948228883e-05, "loss": 1.1781, "step": 10650 }, { "epoch": 0.6, "grad_norm": 1.0181310176849365, "learning_rate": 9.406789282470481e-05, "loss": 1.1481, "step": 10660 }, { "epoch": 0.61, "grad_norm": 1.018066644668579, "learning_rate": 9.40622161671208e-05, "loss": 1.1604, "step": 10670 }, { "epoch": 0.61, "grad_norm": 1.0140725374221802, "learning_rate": 9.405653950953679e-05, "loss": 1.1337, "step": 10680 }, { "epoch": 0.61, "grad_norm": 1.0203570127487183, "learning_rate": 9.405086285195277e-05, "loss": 1.1464, "step": 10690 }, { "epoch": 0.61, "grad_norm": 0.9813259840011597, "learning_rate": 9.404518619436876e-05, "loss": 1.1223, "step": 10700 }, { "epoch": 0.61, "grad_norm": 1.018497109413147, "learning_rate": 9.403950953678474e-05, "loss": 1.1416, "step": 10710 }, { "epoch": 0.61, "grad_norm": 1.087365984916687, "learning_rate": 9.403383287920073e-05, "loss": 1.119, "step": 10720 }, { "epoch": 0.61, "grad_norm": 0.9784966707229614, "learning_rate": 9.402815622161671e-05, "loss": 1.1345, "step": 10730 }, { "epoch": 0.61, "grad_norm": 0.9157513976097107, "learning_rate": 9.40224795640327e-05, "loss": 1.1571, "step": 10740 }, { "epoch": 0.61, "grad_norm": 1.0136699676513672, "learning_rate": 9.401680290644869e-05, "loss": 1.1241, "step": 10750 }, { "epoch": 0.61, "grad_norm": 0.9623086452484131, "learning_rate": 9.401112624886468e-05, "loss": 1.137, "step": 10760 }, { "epoch": 0.61, "grad_norm": 1.0080710649490356, "learning_rate": 9.400544959128066e-05, "loss": 1.1261, "step": 10770 }, { "epoch": 0.61, "grad_norm": 0.9755554795265198, "learning_rate": 9.399977293369664e-05, "loss": 1.1348, "step": 10780 }, { "epoch": 0.61, "grad_norm": 0.9977463483810425, "learning_rate": 9.399409627611263e-05, "loss": 1.1633, "step": 10790 }, { "epoch": 0.61, "grad_norm": 0.9958288073539734, "learning_rate": 9.398841961852861e-05, "loss": 1.1406, "step": 10800 }, { "epoch": 0.61, "grad_norm": 0.9545509219169617, "learning_rate": 9.39827429609446e-05, "loss": 1.1378, "step": 10810 }, { "epoch": 0.61, "grad_norm": 0.9518715143203735, "learning_rate": 9.397706630336059e-05, "loss": 1.1521, "step": 10820 }, { "epoch": 0.61, "grad_norm": 0.9854986667633057, "learning_rate": 9.397138964577658e-05, "loss": 1.1234, "step": 10830 }, { "epoch": 0.61, "grad_norm": 1.0162800550460815, "learning_rate": 9.396571298819256e-05, "loss": 1.1334, "step": 10840 }, { "epoch": 0.62, "grad_norm": 0.8939637541770935, "learning_rate": 9.396003633060855e-05, "loss": 1.1641, "step": 10850 }, { "epoch": 0.62, "grad_norm": 0.9664207100868225, "learning_rate": 9.395435967302453e-05, "loss": 1.1462, "step": 10860 }, { "epoch": 0.62, "grad_norm": 0.980857253074646, "learning_rate": 9.394868301544051e-05, "loss": 1.1494, "step": 10870 }, { "epoch": 0.62, "grad_norm": 1.0440709590911865, "learning_rate": 9.39430063578565e-05, "loss": 1.2009, "step": 10880 }, { "epoch": 0.62, "grad_norm": 0.9828513264656067, "learning_rate": 9.393732970027249e-05, "loss": 1.1365, "step": 10890 }, { "epoch": 0.62, "grad_norm": 0.9722577333450317, "learning_rate": 9.393165304268847e-05, "loss": 1.1195, "step": 10900 }, { "epoch": 0.62, "grad_norm": 1.0124742984771729, "learning_rate": 9.392597638510445e-05, "loss": 1.1539, "step": 10910 }, { "epoch": 0.62, "grad_norm": 0.9653096795082092, "learning_rate": 9.392029972752044e-05, "loss": 1.146, "step": 10920 }, { "epoch": 0.62, "grad_norm": 0.9911755919456482, "learning_rate": 9.391462306993642e-05, "loss": 1.1525, "step": 10930 }, { "epoch": 0.62, "grad_norm": 0.9338049292564392, "learning_rate": 9.390894641235241e-05, "loss": 1.1397, "step": 10940 }, { "epoch": 0.62, "grad_norm": 0.9323024153709412, "learning_rate": 9.39032697547684e-05, "loss": 1.1555, "step": 10950 }, { "epoch": 0.62, "grad_norm": 0.972978413105011, "learning_rate": 9.389759309718437e-05, "loss": 1.1339, "step": 10960 }, { "epoch": 0.62, "grad_norm": 0.9991594552993774, "learning_rate": 9.389191643960037e-05, "loss": 1.1756, "step": 10970 }, { "epoch": 0.62, "grad_norm": 0.9671734571456909, "learning_rate": 9.388623978201635e-05, "loss": 1.1247, "step": 10980 }, { "epoch": 0.62, "grad_norm": 1.0239403247833252, "learning_rate": 9.388056312443234e-05, "loss": 1.1491, "step": 10990 }, { "epoch": 0.62, "grad_norm": 1.0251514911651611, "learning_rate": 9.387488646684832e-05, "loss": 1.1496, "step": 11000 }, { "epoch": 0.62, "grad_norm": 0.9392321109771729, "learning_rate": 9.386920980926431e-05, "loss": 1.1721, "step": 11010 }, { "epoch": 0.62, "grad_norm": 0.9865664839744568, "learning_rate": 9.38635331516803e-05, "loss": 1.1498, "step": 11020 }, { "epoch": 0.63, "grad_norm": 0.9404027462005615, "learning_rate": 9.385785649409629e-05, "loss": 1.1359, "step": 11030 }, { "epoch": 0.63, "grad_norm": 1.121880292892456, "learning_rate": 9.385217983651227e-05, "loss": 1.1516, "step": 11040 }, { "epoch": 0.63, "grad_norm": 0.9269070625305176, "learning_rate": 9.384650317892825e-05, "loss": 1.1461, "step": 11050 }, { "epoch": 0.63, "grad_norm": 0.991563618183136, "learning_rate": 9.384082652134424e-05, "loss": 1.1481, "step": 11060 }, { "epoch": 0.63, "grad_norm": 0.9974544048309326, "learning_rate": 9.383514986376022e-05, "loss": 1.1212, "step": 11070 }, { "epoch": 0.63, "grad_norm": 0.9152713418006897, "learning_rate": 9.382947320617621e-05, "loss": 1.1338, "step": 11080 }, { "epoch": 0.63, "grad_norm": 1.0938050746917725, "learning_rate": 9.38237965485922e-05, "loss": 1.1285, "step": 11090 }, { "epoch": 0.63, "grad_norm": 1.0209763050079346, "learning_rate": 9.381811989100819e-05, "loss": 1.1765, "step": 11100 }, { "epoch": 0.63, "grad_norm": 1.0133930444717407, "learning_rate": 9.381244323342417e-05, "loss": 1.1266, "step": 11110 }, { "epoch": 0.63, "grad_norm": 0.9963176846504211, "learning_rate": 9.380676657584015e-05, "loss": 1.1566, "step": 11120 }, { "epoch": 0.63, "grad_norm": 0.9436922073364258, "learning_rate": 9.380108991825614e-05, "loss": 1.1082, "step": 11130 }, { "epoch": 0.63, "grad_norm": 1.0831457376480103, "learning_rate": 9.379541326067212e-05, "loss": 1.1311, "step": 11140 }, { "epoch": 0.63, "grad_norm": 1.0131016969680786, "learning_rate": 9.37897366030881e-05, "loss": 1.1286, "step": 11150 }, { "epoch": 0.63, "grad_norm": 1.0338177680969238, "learning_rate": 9.378405994550408e-05, "loss": 1.1337, "step": 11160 }, { "epoch": 0.63, "grad_norm": 1.0365561246871948, "learning_rate": 9.377838328792008e-05, "loss": 1.1539, "step": 11170 }, { "epoch": 0.63, "grad_norm": 0.9711923003196716, "learning_rate": 9.377270663033605e-05, "loss": 1.1349, "step": 11180 }, { "epoch": 0.63, "grad_norm": 1.0068745613098145, "learning_rate": 9.376702997275205e-05, "loss": 1.1473, "step": 11190 }, { "epoch": 0.64, "grad_norm": 1.030835747718811, "learning_rate": 9.376135331516803e-05, "loss": 1.1774, "step": 11200 }, { "epoch": 0.64, "grad_norm": 1.0082447528839111, "learning_rate": 9.375567665758402e-05, "loss": 1.147, "step": 11210 }, { "epoch": 0.64, "grad_norm": 0.9309071898460388, "learning_rate": 9.375e-05, "loss": 1.1435, "step": 11220 }, { "epoch": 0.64, "grad_norm": 0.9685840606689453, "learning_rate": 9.374432334241598e-05, "loss": 1.151, "step": 11230 }, { "epoch": 0.64, "grad_norm": 1.0632404088974, "learning_rate": 9.373864668483198e-05, "loss": 1.1378, "step": 11240 }, { "epoch": 0.64, "grad_norm": 0.9536535143852234, "learning_rate": 9.373297002724796e-05, "loss": 1.1595, "step": 11250 }, { "epoch": 0.64, "grad_norm": 1.0298157930374146, "learning_rate": 9.372729336966395e-05, "loss": 1.1232, "step": 11260 }, { "epoch": 0.64, "grad_norm": 1.0255874395370483, "learning_rate": 9.372161671207993e-05, "loss": 1.1299, "step": 11270 }, { "epoch": 0.64, "grad_norm": 0.9828300476074219, "learning_rate": 9.371594005449592e-05, "loss": 1.1335, "step": 11280 }, { "epoch": 0.64, "grad_norm": 1.0111970901489258, "learning_rate": 9.37102633969119e-05, "loss": 1.1527, "step": 11290 }, { "epoch": 0.64, "grad_norm": 1.0049268007278442, "learning_rate": 9.37045867393279e-05, "loss": 1.1585, "step": 11300 }, { "epoch": 0.64, "grad_norm": 1.0037709474563599, "learning_rate": 9.369891008174388e-05, "loss": 1.1595, "step": 11310 }, { "epoch": 0.64, "grad_norm": 1.0081613063812256, "learning_rate": 9.369323342415986e-05, "loss": 1.1302, "step": 11320 }, { "epoch": 0.64, "grad_norm": 1.0493674278259277, "learning_rate": 9.368755676657585e-05, "loss": 1.1426, "step": 11330 }, { "epoch": 0.64, "grad_norm": 0.9283673167228699, "learning_rate": 9.368188010899183e-05, "loss": 1.1575, "step": 11340 }, { "epoch": 0.64, "grad_norm": 0.9885026812553406, "learning_rate": 9.367620345140782e-05, "loss": 1.114, "step": 11350 }, { "epoch": 0.64, "grad_norm": 0.9218712449073792, "learning_rate": 9.36705267938238e-05, "loss": 1.1259, "step": 11360 }, { "epoch": 0.64, "grad_norm": 0.9795019626617432, "learning_rate": 9.36648501362398e-05, "loss": 1.13, "step": 11370 }, { "epoch": 0.65, "grad_norm": 0.9776854515075684, "learning_rate": 9.365917347865578e-05, "loss": 1.1608, "step": 11380 }, { "epoch": 0.65, "grad_norm": 0.9655928611755371, "learning_rate": 9.365349682107176e-05, "loss": 1.1244, "step": 11390 }, { "epoch": 0.65, "grad_norm": 1.128403902053833, "learning_rate": 9.364782016348774e-05, "loss": 1.1461, "step": 11400 }, { "epoch": 0.65, "grad_norm": 0.9621775150299072, "learning_rate": 9.364214350590373e-05, "loss": 1.1431, "step": 11410 }, { "epoch": 0.65, "grad_norm": 4.147283554077148, "learning_rate": 9.363646684831971e-05, "loss": 1.1422, "step": 11420 }, { "epoch": 0.65, "grad_norm": 0.954611599445343, "learning_rate": 9.363079019073569e-05, "loss": 1.1569, "step": 11430 }, { "epoch": 0.65, "grad_norm": 0.8977065682411194, "learning_rate": 9.362511353315168e-05, "loss": 1.1401, "step": 11440 }, { "epoch": 0.65, "grad_norm": 0.9612804651260376, "learning_rate": 9.361943687556766e-05, "loss": 1.134, "step": 11450 }, { "epoch": 0.65, "grad_norm": 1.0569292306900024, "learning_rate": 9.361376021798366e-05, "loss": 1.167, "step": 11460 }, { "epoch": 0.65, "grad_norm": 1.0299201011657715, "learning_rate": 9.360808356039964e-05, "loss": 1.1261, "step": 11470 }, { "epoch": 0.65, "grad_norm": 1.1393407583236694, "learning_rate": 9.360240690281563e-05, "loss": 1.1235, "step": 11480 }, { "epoch": 0.65, "grad_norm": 1.0172597169876099, "learning_rate": 9.359673024523161e-05, "loss": 1.1337, "step": 11490 }, { "epoch": 0.65, "grad_norm": 1.044114351272583, "learning_rate": 9.359105358764759e-05, "loss": 1.1438, "step": 11500 }, { "epoch": 0.65, "grad_norm": 1.0102729797363281, "learning_rate": 9.358537693006358e-05, "loss": 1.158, "step": 11510 }, { "epoch": 0.65, "grad_norm": 0.9595967531204224, "learning_rate": 9.357970027247956e-05, "loss": 1.1371, "step": 11520 }, { "epoch": 0.65, "grad_norm": 0.9766552448272705, "learning_rate": 9.357402361489556e-05, "loss": 1.1071, "step": 11530 }, { "epoch": 0.65, "grad_norm": 1.0751302242279053, "learning_rate": 9.356834695731154e-05, "loss": 1.1439, "step": 11540 }, { "epoch": 0.65, "grad_norm": 1.0136288404464722, "learning_rate": 9.356267029972753e-05, "loss": 1.1578, "step": 11550 }, { "epoch": 0.66, "grad_norm": 0.9902289509773254, "learning_rate": 9.355699364214351e-05, "loss": 1.1511, "step": 11560 }, { "epoch": 0.66, "grad_norm": 1.151686429977417, "learning_rate": 9.35513169845595e-05, "loss": 1.1252, "step": 11570 }, { "epoch": 0.66, "grad_norm": 1.0288395881652832, "learning_rate": 9.354564032697548e-05, "loss": 1.1456, "step": 11580 }, { "epoch": 0.66, "grad_norm": 0.9831141233444214, "learning_rate": 9.353996366939146e-05, "loss": 1.1454, "step": 11590 }, { "epoch": 0.66, "grad_norm": 0.9215033650398254, "learning_rate": 9.353428701180746e-05, "loss": 1.1366, "step": 11600 }, { "epoch": 0.66, "grad_norm": 0.9723914861679077, "learning_rate": 9.352861035422344e-05, "loss": 1.1538, "step": 11610 }, { "epoch": 0.66, "grad_norm": 1.008786678314209, "learning_rate": 9.352293369663943e-05, "loss": 1.1572, "step": 11620 }, { "epoch": 0.66, "grad_norm": 0.9975411295890808, "learning_rate": 9.351725703905541e-05, "loss": 1.1478, "step": 11630 }, { "epoch": 0.66, "grad_norm": 0.9875149130821228, "learning_rate": 9.351158038147139e-05, "loss": 1.1487, "step": 11640 }, { "epoch": 0.66, "grad_norm": 1.003165364265442, "learning_rate": 9.350590372388738e-05, "loss": 1.1217, "step": 11650 }, { "epoch": 0.66, "grad_norm": 0.9419116973876953, "learning_rate": 9.350022706630336e-05, "loss": 1.1269, "step": 11660 }, { "epoch": 0.66, "grad_norm": 0.971165120601654, "learning_rate": 9.349455040871934e-05, "loss": 1.1272, "step": 11670 }, { "epoch": 0.66, "grad_norm": 0.9652339816093445, "learning_rate": 9.348887375113532e-05, "loss": 1.1153, "step": 11680 }, { "epoch": 0.66, "grad_norm": 1.0533803701400757, "learning_rate": 9.348319709355132e-05, "loss": 1.1563, "step": 11690 }, { "epoch": 0.66, "grad_norm": 1.0036250352859497, "learning_rate": 9.34775204359673e-05, "loss": 1.1136, "step": 11700 }, { "epoch": 0.66, "grad_norm": 0.9456911087036133, "learning_rate": 9.347184377838329e-05, "loss": 1.1453, "step": 11710 }, { "epoch": 0.66, "grad_norm": 0.9687916040420532, "learning_rate": 9.346616712079927e-05, "loss": 1.1243, "step": 11720 }, { "epoch": 0.67, "grad_norm": 1.025870680809021, "learning_rate": 9.346049046321526e-05, "loss": 1.1169, "step": 11730 }, { "epoch": 0.67, "grad_norm": 1.0889047384262085, "learning_rate": 9.345481380563124e-05, "loss": 1.133, "step": 11740 }, { "epoch": 0.67, "grad_norm": 1.0131717920303345, "learning_rate": 9.344913714804724e-05, "loss": 1.1365, "step": 11750 }, { "epoch": 0.67, "grad_norm": 1.000176191329956, "learning_rate": 9.344346049046322e-05, "loss": 1.1465, "step": 11760 }, { "epoch": 0.67, "grad_norm": 0.9969251751899719, "learning_rate": 9.34377838328792e-05, "loss": 1.1429, "step": 11770 }, { "epoch": 0.67, "grad_norm": 0.9617388844490051, "learning_rate": 9.343210717529519e-05, "loss": 1.1494, "step": 11780 }, { "epoch": 0.67, "grad_norm": 1.0743606090545654, "learning_rate": 9.342643051771117e-05, "loss": 1.1487, "step": 11790 }, { "epoch": 0.67, "grad_norm": 0.9852492809295654, "learning_rate": 9.342075386012717e-05, "loss": 1.1333, "step": 11800 }, { "epoch": 0.67, "grad_norm": 1.0446208715438843, "learning_rate": 9.341507720254315e-05, "loss": 1.1543, "step": 11810 }, { "epoch": 0.67, "grad_norm": 0.9246658682823181, "learning_rate": 9.340940054495914e-05, "loss": 1.1397, "step": 11820 }, { "epoch": 0.67, "grad_norm": 1.0332772731781006, "learning_rate": 9.340372388737512e-05, "loss": 1.1519, "step": 11830 }, { "epoch": 0.67, "grad_norm": 1.021146297454834, "learning_rate": 9.339804722979111e-05, "loss": 1.1319, "step": 11840 }, { "epoch": 0.67, "grad_norm": 1.0231304168701172, "learning_rate": 9.339237057220709e-05, "loss": 1.1323, "step": 11850 }, { "epoch": 0.67, "grad_norm": 1.011443853378296, "learning_rate": 9.338669391462307e-05, "loss": 1.1183, "step": 11860 }, { "epoch": 0.67, "grad_norm": 0.9623959064483643, "learning_rate": 9.338101725703907e-05, "loss": 1.1573, "step": 11870 }, { "epoch": 0.67, "grad_norm": 1.0112462043762207, "learning_rate": 9.337534059945505e-05, "loss": 1.1205, "step": 11880 }, { "epoch": 0.67, "grad_norm": 1.095661997795105, "learning_rate": 9.336966394187104e-05, "loss": 1.1045, "step": 11890 }, { "epoch": 0.67, "grad_norm": 1.022985816001892, "learning_rate": 9.336398728428702e-05, "loss": 1.1314, "step": 11900 }, { "epoch": 0.68, "grad_norm": 0.9209020137786865, "learning_rate": 9.3358310626703e-05, "loss": 1.1255, "step": 11910 }, { "epoch": 0.68, "grad_norm": 1.0182536840438843, "learning_rate": 9.335263396911898e-05, "loss": 1.1297, "step": 11920 }, { "epoch": 0.68, "grad_norm": 0.9707931280136108, "learning_rate": 9.334695731153497e-05, "loss": 1.1626, "step": 11930 }, { "epoch": 0.68, "grad_norm": 0.9366953372955322, "learning_rate": 9.334128065395095e-05, "loss": 1.1335, "step": 11940 }, { "epoch": 0.68, "grad_norm": 1.0299640893936157, "learning_rate": 9.333560399636693e-05, "loss": 1.1444, "step": 11950 }, { "epoch": 0.68, "grad_norm": 1.0496751070022583, "learning_rate": 9.332992733878293e-05, "loss": 1.1551, "step": 11960 }, { "epoch": 0.68, "grad_norm": 0.9577037692070007, "learning_rate": 9.33242506811989e-05, "loss": 1.1428, "step": 11970 }, { "epoch": 0.68, "grad_norm": 0.9349406957626343, "learning_rate": 9.33185740236149e-05, "loss": 1.1189, "step": 11980 }, { "epoch": 0.68, "grad_norm": 1.1421507596969604, "learning_rate": 9.331289736603088e-05, "loss": 1.1354, "step": 11990 }, { "epoch": 0.68, "grad_norm": 0.8703690767288208, "learning_rate": 9.330722070844687e-05, "loss": 1.1539, "step": 12000 }, { "epoch": 0.68, "grad_norm": 0.9846386313438416, "learning_rate": 9.330154405086285e-05, "loss": 1.1473, "step": 12010 }, { "epoch": 0.68, "grad_norm": 0.9649510979652405, "learning_rate": 9.329586739327885e-05, "loss": 1.1598, "step": 12020 }, { "epoch": 0.68, "grad_norm": 1.070156216621399, "learning_rate": 9.329019073569483e-05, "loss": 1.1492, "step": 12030 }, { "epoch": 0.68, "grad_norm": 1.0317492485046387, "learning_rate": 9.32845140781108e-05, "loss": 1.1537, "step": 12040 }, { "epoch": 0.68, "grad_norm": 0.9810558557510376, "learning_rate": 9.32788374205268e-05, "loss": 1.1396, "step": 12050 }, { "epoch": 0.68, "grad_norm": 1.0284616947174072, "learning_rate": 9.327316076294278e-05, "loss": 1.1389, "step": 12060 }, { "epoch": 0.68, "grad_norm": 0.9653558135032654, "learning_rate": 9.326748410535877e-05, "loss": 1.1667, "step": 12070 }, { "epoch": 0.68, "grad_norm": 1.060706377029419, "learning_rate": 9.326180744777475e-05, "loss": 1.1327, "step": 12080 }, { "epoch": 0.69, "grad_norm": 1.0040075778961182, "learning_rate": 9.325613079019075e-05, "loss": 1.1429, "step": 12090 }, { "epoch": 0.69, "grad_norm": 1.0059195756912231, "learning_rate": 9.325045413260673e-05, "loss": 1.1206, "step": 12100 }, { "epoch": 0.69, "grad_norm": 1.0958690643310547, "learning_rate": 9.324477747502272e-05, "loss": 1.1284, "step": 12110 }, { "epoch": 0.69, "grad_norm": 1.0155565738677979, "learning_rate": 9.32391008174387e-05, "loss": 1.1472, "step": 12120 }, { "epoch": 0.69, "grad_norm": 1.0269426107406616, "learning_rate": 9.323342415985468e-05, "loss": 1.1392, "step": 12130 }, { "epoch": 0.69, "grad_norm": 0.9821152687072754, "learning_rate": 9.322774750227067e-05, "loss": 1.1105, "step": 12140 }, { "epoch": 0.69, "grad_norm": 0.9423434138298035, "learning_rate": 9.322207084468665e-05, "loss": 1.1446, "step": 12150 }, { "epoch": 0.69, "grad_norm": 1.0154529809951782, "learning_rate": 9.321639418710265e-05, "loss": 1.1185, "step": 12160 }, { "epoch": 0.69, "grad_norm": 1.0229703187942505, "learning_rate": 9.321071752951863e-05, "loss": 1.142, "step": 12170 }, { "epoch": 0.69, "grad_norm": 1.0485339164733887, "learning_rate": 9.320504087193461e-05, "loss": 1.143, "step": 12180 }, { "epoch": 0.69, "grad_norm": 1.0023962259292603, "learning_rate": 9.319936421435059e-05, "loss": 1.1591, "step": 12190 }, { "epoch": 0.69, "grad_norm": 0.9803469181060791, "learning_rate": 9.319368755676658e-05, "loss": 1.164, "step": 12200 }, { "epoch": 0.69, "grad_norm": 1.02930748462677, "learning_rate": 9.318801089918256e-05, "loss": 1.1159, "step": 12210 }, { "epoch": 0.69, "grad_norm": 1.032214879989624, "learning_rate": 9.318233424159855e-05, "loss": 1.1332, "step": 12220 }, { "epoch": 0.69, "grad_norm": 0.9512103796005249, "learning_rate": 9.317665758401453e-05, "loss": 1.105, "step": 12230 }, { "epoch": 0.69, "grad_norm": 1.154041051864624, "learning_rate": 9.317098092643051e-05, "loss": 1.1364, "step": 12240 }, { "epoch": 0.69, "grad_norm": 0.9030895829200745, "learning_rate": 9.316530426884651e-05, "loss": 1.137, "step": 12250 }, { "epoch": 0.7, "grad_norm": 1.006066083908081, "learning_rate": 9.315962761126249e-05, "loss": 1.1292, "step": 12260 }, { "epoch": 0.7, "grad_norm": 0.9661989212036133, "learning_rate": 9.315395095367848e-05, "loss": 1.132, "step": 12270 }, { "epoch": 0.7, "grad_norm": 1.0355207920074463, "learning_rate": 9.314827429609446e-05, "loss": 1.1435, "step": 12280 }, { "epoch": 0.7, "grad_norm": 1.021599292755127, "learning_rate": 9.314259763851045e-05, "loss": 1.1263, "step": 12290 }, { "epoch": 0.7, "grad_norm": 0.9850451946258545, "learning_rate": 9.313692098092643e-05, "loss": 1.1337, "step": 12300 }, { "epoch": 0.7, "grad_norm": 1.0030597448349, "learning_rate": 9.313124432334243e-05, "loss": 1.1013, "step": 12310 }, { "epoch": 0.7, "grad_norm": 0.9009820222854614, "learning_rate": 9.312556766575841e-05, "loss": 1.1147, "step": 12320 }, { "epoch": 0.7, "grad_norm": 0.9985597729682922, "learning_rate": 9.311989100817439e-05, "loss": 1.1588, "step": 12330 }, { "epoch": 0.7, "grad_norm": 1.0422958135604858, "learning_rate": 9.311421435059038e-05, "loss": 1.1224, "step": 12340 }, { "epoch": 0.7, "grad_norm": 0.9785307049751282, "learning_rate": 9.310853769300636e-05, "loss": 1.1406, "step": 12350 }, { "epoch": 0.7, "grad_norm": 0.9783348441123962, "learning_rate": 9.310286103542235e-05, "loss": 1.1345, "step": 12360 }, { "epoch": 0.7, "grad_norm": 0.9617540240287781, "learning_rate": 9.309718437783833e-05, "loss": 1.1223, "step": 12370 }, { "epoch": 0.7, "grad_norm": 1.0083848237991333, "learning_rate": 9.309150772025433e-05, "loss": 1.1521, "step": 12380 }, { "epoch": 0.7, "grad_norm": 1.0029990673065186, "learning_rate": 9.308583106267031e-05, "loss": 1.1175, "step": 12390 }, { "epoch": 0.7, "grad_norm": 1.037126898765564, "learning_rate": 9.30801544050863e-05, "loss": 1.1342, "step": 12400 }, { "epoch": 0.7, "grad_norm": 1.0602080821990967, "learning_rate": 9.307447774750228e-05, "loss": 1.1528, "step": 12410 }, { "epoch": 0.7, "grad_norm": 1.0063506364822388, "learning_rate": 9.306880108991826e-05, "loss": 1.1343, "step": 12420 }, { "epoch": 0.7, "grad_norm": 1.029363989830017, "learning_rate": 9.306312443233424e-05, "loss": 1.1418, "step": 12430 }, { "epoch": 0.71, "grad_norm": 0.9840766787528992, "learning_rate": 9.305744777475022e-05, "loss": 1.1449, "step": 12440 }, { "epoch": 0.71, "grad_norm": 0.9808806777000427, "learning_rate": 9.305177111716622e-05, "loss": 1.1302, "step": 12450 }, { "epoch": 0.71, "grad_norm": 1.0218464136123657, "learning_rate": 9.30460944595822e-05, "loss": 1.1487, "step": 12460 }, { "epoch": 0.71, "grad_norm": 1.0032578706741333, "learning_rate": 9.304041780199819e-05, "loss": 1.1619, "step": 12470 }, { "epoch": 0.71, "grad_norm": 0.9812183976173401, "learning_rate": 9.303474114441417e-05, "loss": 1.143, "step": 12480 }, { "epoch": 0.71, "grad_norm": 0.9862493872642517, "learning_rate": 9.302906448683016e-05, "loss": 1.1315, "step": 12490 }, { "epoch": 0.71, "grad_norm": 1.009459137916565, "learning_rate": 9.302338782924614e-05, "loss": 1.1255, "step": 12500 }, { "epoch": 0.71, "grad_norm": 0.9555947780609131, "learning_rate": 9.301771117166212e-05, "loss": 1.1337, "step": 12510 }, { "epoch": 0.71, "grad_norm": 0.9611344337463379, "learning_rate": 9.301203451407812e-05, "loss": 1.1516, "step": 12520 }, { "epoch": 0.71, "grad_norm": 1.0089364051818848, "learning_rate": 9.30063578564941e-05, "loss": 1.1043, "step": 12530 }, { "epoch": 0.71, "grad_norm": 1.088699460029602, "learning_rate": 9.300068119891009e-05, "loss": 1.0955, "step": 12540 }, { "epoch": 0.71, "grad_norm": 0.9295605421066284, "learning_rate": 9.299500454132607e-05, "loss": 1.1215, "step": 12550 }, { "epoch": 0.71, "grad_norm": 1.0070425271987915, "learning_rate": 9.298932788374206e-05, "loss": 1.1036, "step": 12560 }, { "epoch": 0.71, "grad_norm": 0.9883787631988525, "learning_rate": 9.298365122615804e-05, "loss": 1.1148, "step": 12570 }, { "epoch": 0.71, "grad_norm": 1.057974100112915, "learning_rate": 9.297797456857404e-05, "loss": 1.1434, "step": 12580 }, { "epoch": 0.71, "grad_norm": 0.9262000918388367, "learning_rate": 9.297229791099002e-05, "loss": 1.1233, "step": 12590 }, { "epoch": 0.71, "grad_norm": 1.0516611337661743, "learning_rate": 9.2966621253406e-05, "loss": 1.135, "step": 12600 }, { "epoch": 0.72, "grad_norm": 1.02475905418396, "learning_rate": 9.296094459582199e-05, "loss": 1.1216, "step": 12610 }, { "epoch": 0.72, "grad_norm": 0.9131384491920471, "learning_rate": 9.295526793823797e-05, "loss": 1.0909, "step": 12620 }, { "epoch": 0.72, "grad_norm": 0.9969770908355713, "learning_rate": 9.294959128065396e-05, "loss": 1.1458, "step": 12630 }, { "epoch": 0.72, "grad_norm": 1.0411813259124756, "learning_rate": 9.294391462306994e-05, "loss": 1.1381, "step": 12640 }, { "epoch": 0.72, "grad_norm": 0.9708327651023865, "learning_rate": 9.293823796548594e-05, "loss": 1.1142, "step": 12650 }, { "epoch": 0.72, "grad_norm": 0.9975429177284241, "learning_rate": 9.293256130790192e-05, "loss": 1.1337, "step": 12660 }, { "epoch": 0.72, "grad_norm": 0.9593037366867065, "learning_rate": 9.29268846503179e-05, "loss": 1.1294, "step": 12670 }, { "epoch": 0.72, "grad_norm": 0.9615362286567688, "learning_rate": 9.292120799273389e-05, "loss": 1.1538, "step": 12680 }, { "epoch": 0.72, "grad_norm": 1.0532574653625488, "learning_rate": 9.291553133514987e-05, "loss": 1.1235, "step": 12690 }, { "epoch": 0.72, "grad_norm": 1.0758750438690186, "learning_rate": 9.290985467756585e-05, "loss": 1.1222, "step": 12700 }, { "epoch": 0.72, "grad_norm": 0.9290038347244263, "learning_rate": 9.290417801998183e-05, "loss": 1.1499, "step": 12710 }, { "epoch": 0.72, "grad_norm": 0.8857769966125488, "learning_rate": 9.289850136239782e-05, "loss": 1.1335, "step": 12720 }, { "epoch": 0.72, "grad_norm": 1.0771281719207764, "learning_rate": 9.28928247048138e-05, "loss": 1.103, "step": 12730 }, { "epoch": 0.72, "grad_norm": 0.984920859336853, "learning_rate": 9.28871480472298e-05, "loss": 1.1274, "step": 12740 }, { "epoch": 0.72, "grad_norm": 0.9737094640731812, "learning_rate": 9.288147138964578e-05, "loss": 1.1273, "step": 12750 }, { "epoch": 0.72, "grad_norm": 0.9753016829490662, "learning_rate": 9.287579473206177e-05, "loss": 1.1138, "step": 12760 }, { "epoch": 0.72, "grad_norm": 0.9585466980934143, "learning_rate": 9.287011807447775e-05, "loss": 1.1304, "step": 12770 }, { "epoch": 0.72, "grad_norm": 0.9780167937278748, "learning_rate": 9.286444141689373e-05, "loss": 1.1276, "step": 12780 }, { "epoch": 0.73, "grad_norm": 1.016771912574768, "learning_rate": 9.285876475930972e-05, "loss": 1.1307, "step": 12790 }, { "epoch": 0.73, "grad_norm": 0.955298662185669, "learning_rate": 9.28530881017257e-05, "loss": 1.135, "step": 12800 }, { "epoch": 0.73, "grad_norm": 1.0209823846817017, "learning_rate": 9.28474114441417e-05, "loss": 1.113, "step": 12810 }, { "epoch": 0.73, "grad_norm": 0.9464651942253113, "learning_rate": 9.284173478655768e-05, "loss": 1.1239, "step": 12820 }, { "epoch": 0.73, "grad_norm": 0.9967359304428101, "learning_rate": 9.283605812897367e-05, "loss": 1.124, "step": 12830 }, { "epoch": 0.73, "grad_norm": 1.0220355987548828, "learning_rate": 9.283038147138965e-05, "loss": 1.0998, "step": 12840 }, { "epoch": 0.73, "grad_norm": 0.9760149121284485, "learning_rate": 9.282470481380564e-05, "loss": 1.1623, "step": 12850 }, { "epoch": 0.73, "grad_norm": 1.0367529392242432, "learning_rate": 9.281902815622162e-05, "loss": 1.1057, "step": 12860 }, { "epoch": 0.73, "grad_norm": 1.0317234992980957, "learning_rate": 9.28133514986376e-05, "loss": 1.1114, "step": 12870 }, { "epoch": 0.73, "grad_norm": 1.0234925746917725, "learning_rate": 9.28076748410536e-05, "loss": 1.1402, "step": 12880 }, { "epoch": 0.73, "grad_norm": 1.0895369052886963, "learning_rate": 9.280199818346958e-05, "loss": 1.1466, "step": 12890 }, { "epoch": 0.73, "grad_norm": 1.0651042461395264, "learning_rate": 9.279632152588557e-05, "loss": 1.1125, "step": 12900 }, { "epoch": 0.73, "grad_norm": 1.0335577726364136, "learning_rate": 9.279064486830155e-05, "loss": 1.1339, "step": 12910 }, { "epoch": 0.73, "grad_norm": 0.9850559234619141, "learning_rate": 9.278496821071754e-05, "loss": 1.1353, "step": 12920 }, { "epoch": 0.73, "grad_norm": 0.9969815015792847, "learning_rate": 9.277929155313352e-05, "loss": 1.1345, "step": 12930 }, { "epoch": 0.73, "grad_norm": 1.0012595653533936, "learning_rate": 9.27736148955495e-05, "loss": 1.1512, "step": 12940 }, { "epoch": 0.73, "grad_norm": 1.011484146118164, "learning_rate": 9.276793823796548e-05, "loss": 1.1214, "step": 12950 }, { "epoch": 0.73, "grad_norm": 1.0687730312347412, "learning_rate": 9.276226158038148e-05, "loss": 1.1262, "step": 12960 }, { "epoch": 0.74, "grad_norm": 0.9885759949684143, "learning_rate": 9.275658492279746e-05, "loss": 1.1042, "step": 12970 }, { "epoch": 0.74, "grad_norm": 1.0457040071487427, "learning_rate": 9.275090826521344e-05, "loss": 1.1087, "step": 12980 }, { "epoch": 0.74, "grad_norm": 1.022881031036377, "learning_rate": 9.274523160762943e-05, "loss": 1.1136, "step": 12990 }, { "epoch": 0.74, "grad_norm": 0.970537543296814, "learning_rate": 9.273955495004541e-05, "loss": 1.107, "step": 13000 }, { "epoch": 0.74, "grad_norm": 0.9963802695274353, "learning_rate": 9.27338782924614e-05, "loss": 1.1478, "step": 13010 }, { "epoch": 0.74, "grad_norm": 0.9523242712020874, "learning_rate": 9.272820163487738e-05, "loss": 1.1384, "step": 13020 }, { "epoch": 0.74, "grad_norm": 0.9758188724517822, "learning_rate": 9.272252497729338e-05, "loss": 1.1301, "step": 13030 }, { "epoch": 0.74, "grad_norm": 0.9975390434265137, "learning_rate": 9.271684831970936e-05, "loss": 1.1128, "step": 13040 }, { "epoch": 0.74, "grad_norm": 1.0296211242675781, "learning_rate": 9.271117166212534e-05, "loss": 1.129, "step": 13050 }, { "epoch": 0.74, "grad_norm": 0.9724248051643372, "learning_rate": 9.270549500454133e-05, "loss": 1.0969, "step": 13060 }, { "epoch": 0.74, "grad_norm": 1.0166122913360596, "learning_rate": 9.269981834695731e-05, "loss": 1.146, "step": 13070 }, { "epoch": 0.74, "grad_norm": 1.0212044715881348, "learning_rate": 9.26941416893733e-05, "loss": 1.14, "step": 13080 }, { "epoch": 0.74, "grad_norm": 0.9906020164489746, "learning_rate": 9.268846503178929e-05, "loss": 1.1641, "step": 13090 }, { "epoch": 0.74, "grad_norm": 0.9545894861221313, "learning_rate": 9.268278837420528e-05, "loss": 1.1154, "step": 13100 }, { "epoch": 0.74, "grad_norm": 1.0101183652877808, "learning_rate": 9.267711171662126e-05, "loss": 1.132, "step": 13110 }, { "epoch": 0.74, "grad_norm": 1.0166221857070923, "learning_rate": 9.267143505903725e-05, "loss": 1.0996, "step": 13120 }, { "epoch": 0.74, "grad_norm": 1.0745973587036133, "learning_rate": 9.266575840145323e-05, "loss": 1.1315, "step": 13130 }, { "epoch": 0.75, "grad_norm": 0.9956825971603394, "learning_rate": 9.266008174386921e-05, "loss": 1.1375, "step": 13140 }, { "epoch": 0.75, "grad_norm": 0.9925455451011658, "learning_rate": 9.26544050862852e-05, "loss": 1.1196, "step": 13150 }, { "epoch": 0.75, "grad_norm": 1.0045737028121948, "learning_rate": 9.264872842870119e-05, "loss": 1.1112, "step": 13160 }, { "epoch": 0.75, "grad_norm": 0.9905725717544556, "learning_rate": 9.264305177111718e-05, "loss": 1.1348, "step": 13170 }, { "epoch": 0.75, "grad_norm": 0.9582028985023499, "learning_rate": 9.263737511353316e-05, "loss": 1.1399, "step": 13180 }, { "epoch": 0.75, "grad_norm": 0.989618718624115, "learning_rate": 9.263169845594914e-05, "loss": 1.1205, "step": 13190 }, { "epoch": 0.75, "grad_norm": 1.0315896272659302, "learning_rate": 9.262602179836513e-05, "loss": 1.123, "step": 13200 }, { "epoch": 0.75, "grad_norm": 1.0654592514038086, "learning_rate": 9.262034514078111e-05, "loss": 1.1244, "step": 13210 }, { "epoch": 0.75, "grad_norm": 0.9766521453857422, "learning_rate": 9.261466848319709e-05, "loss": 1.1151, "step": 13220 }, { "epoch": 0.75, "grad_norm": 1.0658446550369263, "learning_rate": 9.260899182561307e-05, "loss": 1.1586, "step": 13230 }, { "epoch": 0.75, "grad_norm": 0.9909529089927673, "learning_rate": 9.260331516802907e-05, "loss": 1.1076, "step": 13240 }, { "epoch": 0.75, "grad_norm": 1.0159281492233276, "learning_rate": 9.259763851044505e-05, "loss": 1.1161, "step": 13250 }, { "epoch": 0.75, "grad_norm": 1.015122413635254, "learning_rate": 9.259196185286104e-05, "loss": 1.113, "step": 13260 }, { "epoch": 0.75, "grad_norm": 0.9504070281982422, "learning_rate": 9.258628519527702e-05, "loss": 1.1265, "step": 13270 }, { "epoch": 0.75, "grad_norm": 0.9702829718589783, "learning_rate": 9.258060853769301e-05, "loss": 1.1304, "step": 13280 }, { "epoch": 0.75, "grad_norm": 1.0008254051208496, "learning_rate": 9.257493188010899e-05, "loss": 1.1293, "step": 13290 }, { "epoch": 0.75, "grad_norm": 1.0942738056182861, "learning_rate": 9.256925522252499e-05, "loss": 1.1133, "step": 13300 }, { "epoch": 0.75, "grad_norm": 0.9874013066291809, "learning_rate": 9.256357856494097e-05, "loss": 1.1317, "step": 13310 }, { "epoch": 0.76, "grad_norm": 0.9267258048057556, "learning_rate": 9.255790190735695e-05, "loss": 1.1233, "step": 13320 }, { "epoch": 0.76, "grad_norm": 0.9503917694091797, "learning_rate": 9.255222524977294e-05, "loss": 1.1149, "step": 13330 }, { "epoch": 0.76, "grad_norm": 1.0026260614395142, "learning_rate": 9.254654859218892e-05, "loss": 1.1227, "step": 13340 }, { "epoch": 0.76, "grad_norm": 1.0162370204925537, "learning_rate": 9.254087193460491e-05, "loss": 1.1507, "step": 13350 }, { "epoch": 0.76, "grad_norm": 1.0516648292541504, "learning_rate": 9.253519527702089e-05, "loss": 1.1157, "step": 13360 }, { "epoch": 0.76, "grad_norm": 1.002819538116455, "learning_rate": 9.252951861943689e-05, "loss": 1.1206, "step": 13370 }, { "epoch": 0.76, "grad_norm": 1.00905179977417, "learning_rate": 9.252384196185287e-05, "loss": 1.1361, "step": 13380 }, { "epoch": 0.76, "grad_norm": 0.9493829607963562, "learning_rate": 9.251816530426886e-05, "loss": 1.1063, "step": 13390 }, { "epoch": 0.76, "grad_norm": 1.0229923725128174, "learning_rate": 9.251248864668484e-05, "loss": 1.119, "step": 13400 }, { "epoch": 0.76, "grad_norm": 1.0415233373641968, "learning_rate": 9.250681198910082e-05, "loss": 1.1418, "step": 13410 }, { "epoch": 0.76, "grad_norm": 1.0043052434921265, "learning_rate": 9.250113533151681e-05, "loss": 1.1236, "step": 13420 }, { "epoch": 0.76, "grad_norm": 0.9874782562255859, "learning_rate": 9.24954586739328e-05, "loss": 1.1095, "step": 13430 }, { "epoch": 0.76, "grad_norm": 1.0400551557540894, "learning_rate": 9.248978201634879e-05, "loss": 1.1153, "step": 13440 }, { "epoch": 0.76, "grad_norm": 0.9278976321220398, "learning_rate": 9.248410535876477e-05, "loss": 1.129, "step": 13450 }, { "epoch": 0.76, "grad_norm": 0.9968656301498413, "learning_rate": 9.247842870118075e-05, "loss": 1.1288, "step": 13460 }, { "epoch": 0.76, "grad_norm": 1.0796774625778198, "learning_rate": 9.247275204359673e-05, "loss": 1.1267, "step": 13470 }, { "epoch": 0.76, "grad_norm": 0.9293530583381653, "learning_rate": 9.246707538601272e-05, "loss": 1.1255, "step": 13480 }, { "epoch": 0.76, "grad_norm": 1.0788495540618896, "learning_rate": 9.24613987284287e-05, "loss": 1.1307, "step": 13490 }, { "epoch": 0.77, "grad_norm": 0.9997620582580566, "learning_rate": 9.245572207084468e-05, "loss": 1.1207, "step": 13500 }, { "epoch": 0.77, "grad_norm": 1.001564860343933, "learning_rate": 9.245004541326067e-05, "loss": 1.1231, "step": 13510 }, { "epoch": 0.77, "grad_norm": 0.9855202436447144, "learning_rate": 9.244436875567665e-05, "loss": 1.0995, "step": 13520 }, { "epoch": 0.77, "grad_norm": 1.023049235343933, "learning_rate": 9.243869209809265e-05, "loss": 1.133, "step": 13530 }, { "epoch": 0.77, "grad_norm": 1.0153595209121704, "learning_rate": 9.243301544050863e-05, "loss": 1.1239, "step": 13540 }, { "epoch": 0.77, "grad_norm": 0.967313289642334, "learning_rate": 9.242733878292462e-05, "loss": 1.1328, "step": 13550 }, { "epoch": 0.77, "grad_norm": 0.9344214200973511, "learning_rate": 9.24216621253406e-05, "loss": 1.0926, "step": 13560 }, { "epoch": 0.77, "grad_norm": 1.0503844022750854, "learning_rate": 9.24159854677566e-05, "loss": 1.0949, "step": 13570 }, { "epoch": 0.77, "grad_norm": 1.157586693763733, "learning_rate": 9.241030881017257e-05, "loss": 1.1294, "step": 13580 }, { "epoch": 0.77, "grad_norm": 1.0961414575576782, "learning_rate": 9.240463215258855e-05, "loss": 1.1041, "step": 13590 }, { "epoch": 0.77, "grad_norm": 1.126868486404419, "learning_rate": 9.239895549500455e-05, "loss": 1.1168, "step": 13600 }, { "epoch": 0.77, "grad_norm": 1.0285909175872803, "learning_rate": 9.239327883742053e-05, "loss": 1.1504, "step": 13610 }, { "epoch": 0.77, "grad_norm": 1.0518323183059692, "learning_rate": 9.238760217983652e-05, "loss": 1.1201, "step": 13620 }, { "epoch": 0.77, "grad_norm": 0.9983231425285339, "learning_rate": 9.23819255222525e-05, "loss": 1.1295, "step": 13630 }, { "epoch": 0.77, "grad_norm": 1.04630446434021, "learning_rate": 9.23762488646685e-05, "loss": 1.0992, "step": 13640 }, { "epoch": 0.77, "grad_norm": 0.9806832075119019, "learning_rate": 9.237057220708447e-05, "loss": 1.1212, "step": 13650 }, { "epoch": 0.77, "grad_norm": 0.9897171258926392, "learning_rate": 9.236489554950047e-05, "loss": 1.1457, "step": 13660 }, { "epoch": 0.78, "grad_norm": 0.9621373414993286, "learning_rate": 9.235921889191645e-05, "loss": 1.1084, "step": 13670 }, { "epoch": 0.78, "grad_norm": 0.9922381043434143, "learning_rate": 9.235354223433243e-05, "loss": 1.1264, "step": 13680 }, { "epoch": 0.78, "grad_norm": 1.0514843463897705, "learning_rate": 9.234786557674842e-05, "loss": 1.1425, "step": 13690 }, { "epoch": 0.78, "grad_norm": 0.955376923084259, "learning_rate": 9.23421889191644e-05, "loss": 1.1178, "step": 13700 }, { "epoch": 0.78, "grad_norm": 1.0003074407577515, "learning_rate": 9.233651226158038e-05, "loss": 1.1137, "step": 13710 }, { "epoch": 0.78, "grad_norm": 0.9716780781745911, "learning_rate": 9.233083560399638e-05, "loss": 1.1007, "step": 13720 }, { "epoch": 0.78, "grad_norm": 1.0509406328201294, "learning_rate": 9.232515894641236e-05, "loss": 1.107, "step": 13730 }, { "epoch": 0.78, "grad_norm": 0.9714393019676208, "learning_rate": 9.231948228882833e-05, "loss": 1.0981, "step": 13740 }, { "epoch": 0.78, "grad_norm": 0.9100378155708313, "learning_rate": 9.231380563124433e-05, "loss": 1.1303, "step": 13750 }, { "epoch": 0.78, "grad_norm": 1.016874074935913, "learning_rate": 9.230812897366031e-05, "loss": 1.1293, "step": 13760 }, { "epoch": 0.78, "grad_norm": 0.9573549628257751, "learning_rate": 9.230245231607629e-05, "loss": 1.1213, "step": 13770 }, { "epoch": 0.78, "grad_norm": 0.9816476702690125, "learning_rate": 9.229677565849228e-05, "loss": 1.1209, "step": 13780 }, { "epoch": 0.78, "grad_norm": 0.9707009196281433, "learning_rate": 9.229109900090826e-05, "loss": 1.1257, "step": 13790 }, { "epoch": 0.78, "grad_norm": 1.0320913791656494, "learning_rate": 9.228542234332426e-05, "loss": 1.1001, "step": 13800 }, { "epoch": 0.78, "grad_norm": 0.9946555495262146, "learning_rate": 9.227974568574024e-05, "loss": 1.1188, "step": 13810 }, { "epoch": 0.78, "grad_norm": 0.9929690957069397, "learning_rate": 9.227406902815623e-05, "loss": 1.1047, "step": 13820 }, { "epoch": 0.78, "grad_norm": 1.0483194589614868, "learning_rate": 9.226839237057221e-05, "loss": 1.1233, "step": 13830 }, { "epoch": 0.78, "grad_norm": 0.9883351922035217, "learning_rate": 9.22627157129882e-05, "loss": 1.1241, "step": 13840 }, { "epoch": 0.79, "grad_norm": 1.0293415784835815, "learning_rate": 9.225703905540418e-05, "loss": 1.0953, "step": 13850 }, { "epoch": 0.79, "grad_norm": 0.9924306869506836, "learning_rate": 9.225136239782016e-05, "loss": 1.136, "step": 13860 }, { "epoch": 0.79, "grad_norm": 0.9578368663787842, "learning_rate": 9.224568574023616e-05, "loss": 1.1157, "step": 13870 }, { "epoch": 0.79, "grad_norm": 1.00650954246521, "learning_rate": 9.224000908265214e-05, "loss": 1.1047, "step": 13880 }, { "epoch": 0.79, "grad_norm": 0.9723221063613892, "learning_rate": 9.223433242506813e-05, "loss": 1.1138, "step": 13890 }, { "epoch": 0.79, "grad_norm": 0.9543737173080444, "learning_rate": 9.222865576748411e-05, "loss": 1.1356, "step": 13900 }, { "epoch": 0.79, "grad_norm": 1.0746557712554932, "learning_rate": 9.22229791099001e-05, "loss": 1.0926, "step": 13910 }, { "epoch": 0.79, "grad_norm": 0.9825300574302673, "learning_rate": 9.221730245231608e-05, "loss": 1.128, "step": 13920 }, { "epoch": 0.79, "grad_norm": 0.959663450717926, "learning_rate": 9.221162579473208e-05, "loss": 1.1403, "step": 13930 }, { "epoch": 0.79, "grad_norm": 0.9351370334625244, "learning_rate": 9.220594913714806e-05, "loss": 1.1394, "step": 13940 }, { "epoch": 0.79, "grad_norm": 1.0480303764343262, "learning_rate": 9.220027247956404e-05, "loss": 1.1175, "step": 13950 }, { "epoch": 0.79, "grad_norm": 0.9621829986572266, "learning_rate": 9.219459582198003e-05, "loss": 1.0962, "step": 13960 }, { "epoch": 0.79, "grad_norm": 0.9936897158622742, "learning_rate": 9.218891916439601e-05, "loss": 1.1222, "step": 13970 }, { "epoch": 0.79, "grad_norm": 1.0394905805587769, "learning_rate": 9.218324250681199e-05, "loss": 1.1104, "step": 13980 }, { "epoch": 0.79, "grad_norm": 1.069922685623169, "learning_rate": 9.217756584922797e-05, "loss": 1.1159, "step": 13990 }, { "epoch": 0.79, "grad_norm": 0.9766385555267334, "learning_rate": 9.217188919164396e-05, "loss": 1.1187, "step": 14000 }, { "epoch": 0.79, "grad_norm": 0.9776850342750549, "learning_rate": 9.216621253405994e-05, "loss": 1.1065, "step": 14010 }, { "epoch": 0.79, "grad_norm": 0.9195267558097839, "learning_rate": 9.216053587647594e-05, "loss": 1.1181, "step": 14020 }, { "epoch": 0.8, "grad_norm": 1.0584040880203247, "learning_rate": 9.215485921889192e-05, "loss": 1.1067, "step": 14030 }, { "epoch": 0.8, "grad_norm": 1.055219054222107, "learning_rate": 9.214918256130791e-05, "loss": 1.1453, "step": 14040 }, { "epoch": 0.8, "grad_norm": 0.9629987478256226, "learning_rate": 9.214350590372389e-05, "loss": 1.1143, "step": 14050 }, { "epoch": 0.8, "grad_norm": 1.1212327480316162, "learning_rate": 9.213782924613987e-05, "loss": 1.1392, "step": 14060 }, { "epoch": 0.8, "grad_norm": 0.9892047643661499, "learning_rate": 9.213215258855586e-05, "loss": 1.1189, "step": 14070 }, { "epoch": 0.8, "grad_norm": 1.0217310190200806, "learning_rate": 9.212704359673025e-05, "loss": 1.1042, "step": 14080 }, { "epoch": 0.8, "grad_norm": 1.0401531457901, "learning_rate": 9.212136693914623e-05, "loss": 1.1306, "step": 14090 }, { "epoch": 0.8, "grad_norm": 1.0513296127319336, "learning_rate": 9.211569028156221e-05, "loss": 1.1364, "step": 14100 }, { "epoch": 0.8, "grad_norm": 1.0199244022369385, "learning_rate": 9.21100136239782e-05, "loss": 1.1069, "step": 14110 }, { "epoch": 0.8, "grad_norm": 0.9920427203178406, "learning_rate": 9.210433696639418e-05, "loss": 1.1164, "step": 14120 }, { "epoch": 0.8, "grad_norm": 1.0810894966125488, "learning_rate": 9.209866030881018e-05, "loss": 1.1544, "step": 14130 }, { "epoch": 0.8, "grad_norm": 0.9708192348480225, "learning_rate": 9.209298365122616e-05, "loss": 1.1228, "step": 14140 }, { "epoch": 0.8, "grad_norm": 0.944333553314209, "learning_rate": 9.208730699364215e-05, "loss": 1.1029, "step": 14150 }, { "epoch": 0.8, "grad_norm": 0.9762970805168152, "learning_rate": 9.208163033605813e-05, "loss": 1.1403, "step": 14160 }, { "epoch": 0.8, "grad_norm": 0.987148642539978, "learning_rate": 9.207595367847411e-05, "loss": 1.1162, "step": 14170 }, { "epoch": 0.8, "grad_norm": 1.0010600090026855, "learning_rate": 9.20702770208901e-05, "loss": 1.126, "step": 14180 }, { "epoch": 0.8, "grad_norm": 0.9994584918022156, "learning_rate": 9.206460036330608e-05, "loss": 1.1186, "step": 14190 }, { "epoch": 0.81, "grad_norm": 0.9381515383720398, "learning_rate": 9.205892370572208e-05, "loss": 1.1044, "step": 14200 }, { "epoch": 0.81, "grad_norm": 0.9469918608665466, "learning_rate": 9.205324704813806e-05, "loss": 1.1427, "step": 14210 }, { "epoch": 0.81, "grad_norm": 1.0264743566513062, "learning_rate": 9.204757039055405e-05, "loss": 1.1421, "step": 14220 }, { "epoch": 0.81, "grad_norm": 0.9414647221565247, "learning_rate": 9.204189373297003e-05, "loss": 1.1047, "step": 14230 }, { "epoch": 0.81, "grad_norm": 1.0229878425598145, "learning_rate": 9.203621707538603e-05, "loss": 1.1359, "step": 14240 }, { "epoch": 0.81, "grad_norm": 1.0691195726394653, "learning_rate": 9.2030540417802e-05, "loss": 1.0895, "step": 14250 }, { "epoch": 0.81, "grad_norm": 1.036987543106079, "learning_rate": 9.202486376021799e-05, "loss": 1.1178, "step": 14260 }, { "epoch": 0.81, "grad_norm": 0.9719197154045105, "learning_rate": 9.201918710263398e-05, "loss": 1.1092, "step": 14270 }, { "epoch": 0.81, "grad_norm": 1.0636261701583862, "learning_rate": 9.201351044504996e-05, "loss": 1.1011, "step": 14280 }, { "epoch": 0.81, "grad_norm": 0.9389846324920654, "learning_rate": 9.200783378746595e-05, "loss": 1.1337, "step": 14290 }, { "epoch": 0.81, "grad_norm": 0.9949506521224976, "learning_rate": 9.200215712988193e-05, "loss": 1.1223, "step": 14300 }, { "epoch": 0.81, "grad_norm": 1.041110634803772, "learning_rate": 9.199648047229793e-05, "loss": 1.1003, "step": 14310 }, { "epoch": 0.81, "grad_norm": 0.9666817784309387, "learning_rate": 9.19908038147139e-05, "loss": 1.1339, "step": 14320 }, { "epoch": 0.81, "grad_norm": 1.0057296752929688, "learning_rate": 9.198512715712989e-05, "loss": 1.0905, "step": 14330 }, { "epoch": 0.81, "grad_norm": 0.9841536283493042, "learning_rate": 9.197945049954587e-05, "loss": 1.1212, "step": 14340 }, { "epoch": 0.81, "grad_norm": 1.0078221559524536, "learning_rate": 9.197377384196186e-05, "loss": 1.123, "step": 14350 }, { "epoch": 0.81, "grad_norm": 1.022194504737854, "learning_rate": 9.196809718437784e-05, "loss": 1.1127, "step": 14360 }, { "epoch": 0.81, "grad_norm": 1.0105266571044922, "learning_rate": 9.196242052679382e-05, "loss": 1.1112, "step": 14370 }, { "epoch": 0.82, "grad_norm": 1.0545947551727295, "learning_rate": 9.195674386920981e-05, "loss": 1.1194, "step": 14380 }, { "epoch": 0.82, "grad_norm": 0.9760921597480774, "learning_rate": 9.195106721162579e-05, "loss": 1.1391, "step": 14390 }, { "epoch": 0.82, "grad_norm": 1.014750599861145, "learning_rate": 9.194539055404179e-05, "loss": 1.1193, "step": 14400 }, { "epoch": 0.82, "grad_norm": 0.991804301738739, "learning_rate": 9.193971389645777e-05, "loss": 1.103, "step": 14410 }, { "epoch": 0.82, "grad_norm": 0.9869982004165649, "learning_rate": 9.193403723887376e-05, "loss": 1.1472, "step": 14420 }, { "epoch": 0.82, "grad_norm": 1.14600670337677, "learning_rate": 9.192836058128974e-05, "loss": 1.1447, "step": 14430 }, { "epoch": 0.82, "grad_norm": 1.0702942609786987, "learning_rate": 9.192268392370572e-05, "loss": 1.1319, "step": 14440 }, { "epoch": 0.82, "grad_norm": 0.9814661741256714, "learning_rate": 9.191700726612171e-05, "loss": 1.1267, "step": 14450 }, { "epoch": 0.82, "grad_norm": 1.0085461139678955, "learning_rate": 9.191133060853769e-05, "loss": 1.1103, "step": 14460 }, { "epoch": 0.82, "grad_norm": 1.079637885093689, "learning_rate": 9.190565395095369e-05, "loss": 1.1302, "step": 14470 }, { "epoch": 0.82, "grad_norm": 1.04889976978302, "learning_rate": 9.189997729336967e-05, "loss": 1.1296, "step": 14480 }, { "epoch": 0.82, "grad_norm": 0.9940330982208252, "learning_rate": 9.189430063578566e-05, "loss": 1.0938, "step": 14490 }, { "epoch": 0.82, "grad_norm": 1.0063358545303345, "learning_rate": 9.188862397820164e-05, "loss": 1.1132, "step": 14500 }, { "epoch": 0.82, "grad_norm": 1.0436004400253296, "learning_rate": 9.188294732061763e-05, "loss": 1.1148, "step": 14510 }, { "epoch": 0.82, "grad_norm": 1.015537977218628, "learning_rate": 9.187727066303361e-05, "loss": 1.1141, "step": 14520 }, { "epoch": 0.82, "grad_norm": 0.991378903388977, "learning_rate": 9.187159400544959e-05, "loss": 1.1696, "step": 14530 }, { "epoch": 0.82, "grad_norm": 1.131270170211792, "learning_rate": 9.186591734786559e-05, "loss": 1.1031, "step": 14540 }, { "epoch": 0.83, "grad_norm": 0.9476028084754944, "learning_rate": 9.186024069028157e-05, "loss": 1.1289, "step": 14550 }, { "epoch": 0.83, "grad_norm": 0.9782541990280151, "learning_rate": 9.185456403269756e-05, "loss": 1.0914, "step": 14560 }, { "epoch": 0.83, "grad_norm": 0.999586820602417, "learning_rate": 9.184888737511354e-05, "loss": 1.1077, "step": 14570 }, { "epoch": 0.83, "grad_norm": 1.048227071762085, "learning_rate": 9.184321071752952e-05, "loss": 1.1026, "step": 14580 }, { "epoch": 0.83, "grad_norm": 0.9667967557907104, "learning_rate": 9.183753405994551e-05, "loss": 1.1197, "step": 14590 }, { "epoch": 0.83, "grad_norm": 0.9812629222869873, "learning_rate": 9.18318574023615e-05, "loss": 1.1357, "step": 14600 }, { "epoch": 0.83, "grad_norm": 1.077944040298462, "learning_rate": 9.182618074477747e-05, "loss": 1.1056, "step": 14610 }, { "epoch": 0.83, "grad_norm": 0.9812082648277283, "learning_rate": 9.182050408719345e-05, "loss": 1.1074, "step": 14620 }, { "epoch": 0.83, "grad_norm": 0.9402799010276794, "learning_rate": 9.181482742960945e-05, "loss": 1.1265, "step": 14630 }, { "epoch": 0.83, "grad_norm": 0.9863848686218262, "learning_rate": 9.180915077202543e-05, "loss": 1.1311, "step": 14640 }, { "epoch": 0.83, "grad_norm": 1.0593854188919067, "learning_rate": 9.180347411444142e-05, "loss": 1.1225, "step": 14650 }, { "epoch": 0.83, "grad_norm": 1.0075647830963135, "learning_rate": 9.17977974568574e-05, "loss": 1.098, "step": 14660 }, { "epoch": 0.83, "grad_norm": 1.0110039710998535, "learning_rate": 9.17921207992734e-05, "loss": 1.1031, "step": 14670 }, { "epoch": 0.83, "grad_norm": 1.0020732879638672, "learning_rate": 9.178644414168937e-05, "loss": 1.1108, "step": 14680 }, { "epoch": 0.83, "grad_norm": 1.2904176712036133, "learning_rate": 9.178076748410537e-05, "loss": 1.0998, "step": 14690 }, { "epoch": 0.83, "grad_norm": 0.9576265811920166, "learning_rate": 9.177509082652135e-05, "loss": 1.1081, "step": 14700 }, { "epoch": 0.83, "grad_norm": 0.9859285354614258, "learning_rate": 9.176941416893733e-05, "loss": 1.1089, "step": 14710 }, { "epoch": 0.83, "grad_norm": 0.9839569926261902, "learning_rate": 9.176373751135332e-05, "loss": 1.1286, "step": 14720 }, { "epoch": 0.84, "grad_norm": 0.984190821647644, "learning_rate": 9.17580608537693e-05, "loss": 1.1247, "step": 14730 }, { "epoch": 0.84, "grad_norm": 1.04172682762146, "learning_rate": 9.17523841961853e-05, "loss": 1.0937, "step": 14740 }, { "epoch": 0.84, "grad_norm": 0.9725020527839661, "learning_rate": 9.174670753860127e-05, "loss": 1.1189, "step": 14750 }, { "epoch": 0.84, "grad_norm": 1.0178797245025635, "learning_rate": 9.174103088101727e-05, "loss": 1.112, "step": 14760 }, { "epoch": 0.84, "grad_norm": 1.0414918661117554, "learning_rate": 9.173535422343325e-05, "loss": 1.1306, "step": 14770 }, { "epoch": 0.84, "grad_norm": 1.0190048217773438, "learning_rate": 9.172967756584924e-05, "loss": 1.112, "step": 14780 }, { "epoch": 0.84, "grad_norm": 0.9426143765449524, "learning_rate": 9.172400090826522e-05, "loss": 1.1114, "step": 14790 }, { "epoch": 0.84, "grad_norm": 1.016837477684021, "learning_rate": 9.17183242506812e-05, "loss": 1.0975, "step": 14800 }, { "epoch": 0.84, "grad_norm": 1.0125977993011475, "learning_rate": 9.17126475930972e-05, "loss": 1.1084, "step": 14810 }, { "epoch": 0.84, "grad_norm": 1.0121351480484009, "learning_rate": 9.170697093551317e-05, "loss": 1.1105, "step": 14820 }, { "epoch": 0.84, "grad_norm": 1.0159473419189453, "learning_rate": 9.170129427792917e-05, "loss": 1.1171, "step": 14830 }, { "epoch": 0.84, "grad_norm": 0.9714972376823425, "learning_rate": 9.169561762034515e-05, "loss": 1.1045, "step": 14840 }, { "epoch": 0.84, "grad_norm": 1.0369690656661987, "learning_rate": 9.168994096276113e-05, "loss": 1.1134, "step": 14850 }, { "epoch": 0.84, "grad_norm": 1.007424235343933, "learning_rate": 9.168426430517711e-05, "loss": 1.1324, "step": 14860 }, { "epoch": 0.84, "grad_norm": 1.063423991203308, "learning_rate": 9.16785876475931e-05, "loss": 1.0976, "step": 14870 }, { "epoch": 0.84, "grad_norm": 0.9873882532119751, "learning_rate": 9.167291099000908e-05, "loss": 1.0978, "step": 14880 }, { "epoch": 0.84, "grad_norm": 1.1501818895339966, "learning_rate": 9.166723433242506e-05, "loss": 1.1164, "step": 14890 }, { "epoch": 0.84, "grad_norm": 1.052310824394226, "learning_rate": 9.166155767484106e-05, "loss": 1.0752, "step": 14900 }, { "epoch": 0.85, "grad_norm": 0.9920529127120972, "learning_rate": 9.165588101725704e-05, "loss": 1.1065, "step": 14910 }, { "epoch": 0.85, "grad_norm": 1.0247036218643188, "learning_rate": 9.165020435967303e-05, "loss": 1.1304, "step": 14920 }, { "epoch": 0.85, "grad_norm": 0.9717522263526917, "learning_rate": 9.164452770208901e-05, "loss": 1.1347, "step": 14930 }, { "epoch": 0.85, "grad_norm": 1.0294815301895142, "learning_rate": 9.1638851044505e-05, "loss": 1.1129, "step": 14940 }, { "epoch": 0.85, "grad_norm": 1.0402541160583496, "learning_rate": 9.163317438692098e-05, "loss": 1.1163, "step": 14950 }, { "epoch": 0.85, "grad_norm": 1.021889567375183, "learning_rate": 9.162749772933698e-05, "loss": 1.089, "step": 14960 }, { "epoch": 0.85, "grad_norm": 0.9911003112792969, "learning_rate": 9.162182107175296e-05, "loss": 1.1052, "step": 14970 }, { "epoch": 0.85, "grad_norm": 0.9876708984375, "learning_rate": 9.161614441416894e-05, "loss": 1.1038, "step": 14980 }, { "epoch": 0.85, "grad_norm": 0.9870926737785339, "learning_rate": 9.161046775658493e-05, "loss": 1.1235, "step": 14990 }, { "epoch": 0.85, "grad_norm": 1.0747051239013672, "learning_rate": 9.160479109900091e-05, "loss": 1.119, "step": 15000 }, { "epoch": 0.85, "grad_norm": 0.9977317452430725, "learning_rate": 9.15991144414169e-05, "loss": 1.1139, "step": 15010 }, { "epoch": 0.85, "grad_norm": 1.0255883932113647, "learning_rate": 9.159343778383288e-05, "loss": 1.1132, "step": 15020 }, { "epoch": 0.85, "grad_norm": 1.0029537677764893, "learning_rate": 9.158776112624888e-05, "loss": 1.1141, "step": 15030 }, { "epoch": 0.85, "grad_norm": 0.9406919479370117, "learning_rate": 9.158208446866486e-05, "loss": 1.1285, "step": 15040 }, { "epoch": 0.85, "grad_norm": 1.0196586847305298, "learning_rate": 9.157640781108085e-05, "loss": 1.1239, "step": 15050 }, { "epoch": 0.85, "grad_norm": 1.0043028593063354, "learning_rate": 9.157073115349683e-05, "loss": 1.1051, "step": 15060 }, { "epoch": 0.85, "grad_norm": 0.9687239527702332, "learning_rate": 9.156505449591281e-05, "loss": 1.0895, "step": 15070 }, { "epoch": 0.86, "grad_norm": 0.9783738255500793, "learning_rate": 9.15593778383288e-05, "loss": 1.1082, "step": 15080 }, { "epoch": 0.86, "grad_norm": 1.0274312496185303, "learning_rate": 9.155370118074478e-05, "loss": 1.1356, "step": 15090 }, { "epoch": 0.86, "grad_norm": 1.0200538635253906, "learning_rate": 9.154802452316076e-05, "loss": 1.1263, "step": 15100 }, { "epoch": 0.86, "grad_norm": 0.9787881970405579, "learning_rate": 9.154234786557676e-05, "loss": 1.1384, "step": 15110 }, { "epoch": 0.86, "grad_norm": 1.0776710510253906, "learning_rate": 9.153667120799274e-05, "loss": 1.126, "step": 15120 }, { "epoch": 0.86, "grad_norm": 1.0407251119613647, "learning_rate": 9.153099455040872e-05, "loss": 1.1166, "step": 15130 }, { "epoch": 0.86, "grad_norm": 0.9796594977378845, "learning_rate": 9.152531789282471e-05, "loss": 1.1033, "step": 15140 }, { "epoch": 0.86, "grad_norm": 1.0773029327392578, "learning_rate": 9.151964123524069e-05, "loss": 1.0947, "step": 15150 }, { "epoch": 0.86, "grad_norm": 1.0034475326538086, "learning_rate": 9.151396457765668e-05, "loss": 1.0864, "step": 15160 }, { "epoch": 0.86, "grad_norm": 1.0408823490142822, "learning_rate": 9.150828792007266e-05, "loss": 1.1008, "step": 15170 }, { "epoch": 0.86, "grad_norm": 0.9979718923568726, "learning_rate": 9.150261126248864e-05, "loss": 1.0991, "step": 15180 }, { "epoch": 0.86, "grad_norm": 1.0188885927200317, "learning_rate": 9.149693460490464e-05, "loss": 1.098, "step": 15190 }, { "epoch": 0.86, "grad_norm": 0.9854590892791748, "learning_rate": 9.149125794732062e-05, "loss": 1.1021, "step": 15200 }, { "epoch": 0.86, "grad_norm": 0.9712744951248169, "learning_rate": 9.148558128973661e-05, "loss": 1.1226, "step": 15210 }, { "epoch": 0.86, "grad_norm": 1.0301159620285034, "learning_rate": 9.147990463215259e-05, "loss": 1.1102, "step": 15220 }, { "epoch": 0.86, "grad_norm": 0.9843093156814575, "learning_rate": 9.147422797456858e-05, "loss": 1.083, "step": 15230 }, { "epoch": 0.86, "grad_norm": 0.9823591113090515, "learning_rate": 9.146855131698456e-05, "loss": 1.1173, "step": 15240 }, { "epoch": 0.86, "grad_norm": 0.991837739944458, "learning_rate": 9.146287465940056e-05, "loss": 1.0965, "step": 15250 }, { "epoch": 0.87, "grad_norm": 0.9655587077140808, "learning_rate": 9.145719800181654e-05, "loss": 1.12, "step": 15260 }, { "epoch": 0.87, "grad_norm": 0.9382675886154175, "learning_rate": 9.145152134423252e-05, "loss": 1.1087, "step": 15270 }, { "epoch": 0.87, "grad_norm": 0.9779953360557556, "learning_rate": 9.144584468664851e-05, "loss": 1.1119, "step": 15280 }, { "epoch": 0.87, "grad_norm": 1.0198532342910767, "learning_rate": 9.144016802906449e-05, "loss": 1.0936, "step": 15290 }, { "epoch": 0.87, "grad_norm": 0.9969067573547363, "learning_rate": 9.143449137148048e-05, "loss": 1.1027, "step": 15300 }, { "epoch": 0.87, "grad_norm": 0.9901171922683716, "learning_rate": 9.142881471389646e-05, "loss": 1.1009, "step": 15310 }, { "epoch": 0.87, "grad_norm": 1.0412951707839966, "learning_rate": 9.142313805631246e-05, "loss": 1.1146, "step": 15320 }, { "epoch": 0.87, "grad_norm": 0.9761418700218201, "learning_rate": 9.141746139872844e-05, "loss": 1.1227, "step": 15330 }, { "epoch": 0.87, "grad_norm": 0.9998602867126465, "learning_rate": 9.141178474114442e-05, "loss": 1.0971, "step": 15340 }, { "epoch": 0.87, "grad_norm": 1.0863254070281982, "learning_rate": 9.140610808356041e-05, "loss": 1.1012, "step": 15350 }, { "epoch": 0.87, "grad_norm": 1.0403858423233032, "learning_rate": 9.140043142597639e-05, "loss": 1.1039, "step": 15360 }, { "epoch": 0.87, "grad_norm": 1.06740403175354, "learning_rate": 9.139475476839237e-05, "loss": 1.1049, "step": 15370 }, { "epoch": 0.87, "grad_norm": 0.9717476963996887, "learning_rate": 9.138907811080835e-05, "loss": 1.1201, "step": 15380 }, { "epoch": 0.87, "grad_norm": 1.007228970527649, "learning_rate": 9.138340145322434e-05, "loss": 1.0813, "step": 15390 }, { "epoch": 0.87, "grad_norm": 0.9729843139648438, "learning_rate": 9.137772479564032e-05, "loss": 1.1163, "step": 15400 }, { "epoch": 0.87, "grad_norm": 0.9905862808227539, "learning_rate": 9.137204813805632e-05, "loss": 1.1351, "step": 15410 }, { "epoch": 0.87, "grad_norm": 1.0035568475723267, "learning_rate": 9.13663714804723e-05, "loss": 1.1203, "step": 15420 }, { "epoch": 0.87, "grad_norm": 0.9934986233711243, "learning_rate": 9.136069482288829e-05, "loss": 1.0878, "step": 15430 }, { "epoch": 0.88, "grad_norm": 0.9727883338928223, "learning_rate": 9.135501816530427e-05, "loss": 1.1317, "step": 15440 }, { "epoch": 0.88, "grad_norm": 0.9576403498649597, "learning_rate": 9.134934150772025e-05, "loss": 1.1094, "step": 15450 }, { "epoch": 0.88, "grad_norm": 0.9829134941101074, "learning_rate": 9.134366485013624e-05, "loss": 1.1403, "step": 15460 }, { "epoch": 0.88, "grad_norm": 1.0177232027053833, "learning_rate": 9.133798819255222e-05, "loss": 1.0885, "step": 15470 }, { "epoch": 0.88, "grad_norm": 0.97464519739151, "learning_rate": 9.133231153496822e-05, "loss": 1.1153, "step": 15480 }, { "epoch": 0.88, "grad_norm": 0.9820401072502136, "learning_rate": 9.13266348773842e-05, "loss": 1.0935, "step": 15490 }, { "epoch": 0.88, "grad_norm": 0.9464737176895142, "learning_rate": 9.132095821980019e-05, "loss": 1.1151, "step": 15500 }, { "epoch": 0.88, "grad_norm": 0.9699311256408691, "learning_rate": 9.131528156221617e-05, "loss": 1.0937, "step": 15510 }, { "epoch": 0.88, "grad_norm": 1.0071078538894653, "learning_rate": 9.130960490463217e-05, "loss": 1.1034, "step": 15520 }, { "epoch": 0.88, "grad_norm": 0.9282706379890442, "learning_rate": 9.130392824704815e-05, "loss": 1.096, "step": 15530 }, { "epoch": 0.88, "grad_norm": 1.1596356630325317, "learning_rate": 9.129825158946413e-05, "loss": 1.1267, "step": 15540 }, { "epoch": 0.88, "grad_norm": 0.9299152493476868, "learning_rate": 9.129257493188012e-05, "loss": 1.0756, "step": 15550 }, { "epoch": 0.88, "grad_norm": 0.9721208810806274, "learning_rate": 9.12868982742961e-05, "loss": 1.1086, "step": 15560 }, { "epoch": 0.88, "grad_norm": 0.9402008056640625, "learning_rate": 9.128122161671209e-05, "loss": 1.1077, "step": 15570 }, { "epoch": 0.88, "grad_norm": 0.9763596653938293, "learning_rate": 9.127554495912807e-05, "loss": 1.098, "step": 15580 }, { "epoch": 0.88, "grad_norm": 1.0196951627731323, "learning_rate": 9.126986830154407e-05, "loss": 1.0968, "step": 15590 }, { "epoch": 0.88, "grad_norm": 0.9128172397613525, "learning_rate": 9.126419164396005e-05, "loss": 1.0858, "step": 15600 }, { "epoch": 0.89, "grad_norm": 1.0091369152069092, "learning_rate": 9.125851498637603e-05, "loss": 1.0816, "step": 15610 }, { "epoch": 0.89, "grad_norm": 0.9995017051696777, "learning_rate": 9.1252838328792e-05, "loss": 1.1047, "step": 15620 }, { "epoch": 0.89, "grad_norm": 1.0964733362197876, "learning_rate": 9.1247161671208e-05, "loss": 1.1229, "step": 15630 }, { "epoch": 0.89, "grad_norm": 1.0931124687194824, "learning_rate": 9.124148501362398e-05, "loss": 1.1209, "step": 15640 }, { "epoch": 0.89, "grad_norm": 0.9235308766365051, "learning_rate": 9.123580835603996e-05, "loss": 1.1262, "step": 15650 }, { "epoch": 0.89, "grad_norm": 0.9412137866020203, "learning_rate": 9.123013169845595e-05, "loss": 1.1142, "step": 15660 }, { "epoch": 0.89, "grad_norm": 0.9502237439155579, "learning_rate": 9.122445504087193e-05, "loss": 1.1224, "step": 15670 }, { "epoch": 0.89, "grad_norm": 1.0503579378128052, "learning_rate": 9.121877838328793e-05, "loss": 1.1176, "step": 15680 }, { "epoch": 0.89, "grad_norm": 0.9857829809188843, "learning_rate": 9.12131017257039e-05, "loss": 1.1079, "step": 15690 }, { "epoch": 0.89, "grad_norm": 1.0332545042037964, "learning_rate": 9.12074250681199e-05, "loss": 1.0934, "step": 15700 }, { "epoch": 0.89, "grad_norm": 1.0070550441741943, "learning_rate": 9.120174841053588e-05, "loss": 1.0822, "step": 15710 }, { "epoch": 0.89, "grad_norm": 1.007931113243103, "learning_rate": 9.119607175295186e-05, "loss": 1.0844, "step": 15720 }, { "epoch": 0.89, "grad_norm": 1.00246000289917, "learning_rate": 9.119039509536785e-05, "loss": 1.1013, "step": 15730 }, { "epoch": 0.89, "grad_norm": 1.0666040182113647, "learning_rate": 9.118471843778383e-05, "loss": 1.1173, "step": 15740 }, { "epoch": 0.89, "grad_norm": 1.0196809768676758, "learning_rate": 9.117904178019983e-05, "loss": 1.1265, "step": 15750 }, { "epoch": 0.89, "grad_norm": 0.9277765154838562, "learning_rate": 9.11733651226158e-05, "loss": 1.1137, "step": 15760 }, { "epoch": 0.89, "grad_norm": 1.0882501602172852, "learning_rate": 9.11676884650318e-05, "loss": 1.1021, "step": 15770 }, { "epoch": 0.89, "grad_norm": 1.043147087097168, "learning_rate": 9.116201180744778e-05, "loss": 1.068, "step": 15780 }, { "epoch": 0.9, "grad_norm": 0.9894164204597473, "learning_rate": 9.115633514986377e-05, "loss": 1.0906, "step": 15790 }, { "epoch": 0.9, "grad_norm": 0.9724876284599304, "learning_rate": 9.115065849227975e-05, "loss": 1.1118, "step": 15800 }, { "epoch": 0.9, "grad_norm": 1.0383309125900269, "learning_rate": 9.114498183469573e-05, "loss": 1.1049, "step": 15810 }, { "epoch": 0.9, "grad_norm": 1.0055935382843018, "learning_rate": 9.113930517711173e-05, "loss": 1.1074, "step": 15820 }, { "epoch": 0.9, "grad_norm": 0.9889771342277527, "learning_rate": 9.11336285195277e-05, "loss": 1.086, "step": 15830 }, { "epoch": 0.9, "grad_norm": 0.9921349883079529, "learning_rate": 9.11279518619437e-05, "loss": 1.0873, "step": 15840 }, { "epoch": 0.9, "grad_norm": 1.0478415489196777, "learning_rate": 9.112227520435968e-05, "loss": 1.0952, "step": 15850 }, { "epoch": 0.9, "grad_norm": 1.0900003910064697, "learning_rate": 9.111659854677566e-05, "loss": 1.1096, "step": 15860 }, { "epoch": 0.9, "grad_norm": 1.068149447441101, "learning_rate": 9.111092188919165e-05, "loss": 1.1121, "step": 15870 }, { "epoch": 0.9, "grad_norm": 0.9784067869186401, "learning_rate": 9.110524523160763e-05, "loss": 1.1065, "step": 15880 }, { "epoch": 0.9, "grad_norm": 0.9648650884628296, "learning_rate": 9.109956857402361e-05, "loss": 1.0914, "step": 15890 }, { "epoch": 0.9, "grad_norm": 1.0062381029129028, "learning_rate": 9.10938919164396e-05, "loss": 1.1184, "step": 15900 }, { "epoch": 0.9, "grad_norm": 1.0883089303970337, "learning_rate": 9.108821525885559e-05, "loss": 1.1053, "step": 15910 }, { "epoch": 0.9, "grad_norm": 1.017722725868225, "learning_rate": 9.108253860127157e-05, "loss": 1.1114, "step": 15920 }, { "epoch": 0.9, "grad_norm": 1.088148832321167, "learning_rate": 9.107686194368756e-05, "loss": 1.1011, "step": 15930 }, { "epoch": 0.9, "grad_norm": 0.9899068474769592, "learning_rate": 9.107118528610354e-05, "loss": 1.1101, "step": 15940 }, { "epoch": 0.9, "grad_norm": 0.972322940826416, "learning_rate": 9.106550862851953e-05, "loss": 1.0967, "step": 15950 }, { "epoch": 0.9, "grad_norm": 1.0191304683685303, "learning_rate": 9.105983197093551e-05, "loss": 1.097, "step": 15960 }, { "epoch": 0.91, "grad_norm": 1.044387698173523, "learning_rate": 9.105415531335151e-05, "loss": 1.1252, "step": 15970 }, { "epoch": 0.91, "grad_norm": 1.0123069286346436, "learning_rate": 9.104847865576749e-05, "loss": 1.0933, "step": 15980 }, { "epoch": 0.91, "grad_norm": 1.0085434913635254, "learning_rate": 9.104280199818347e-05, "loss": 1.0865, "step": 15990 }, { "epoch": 0.91, "grad_norm": 1.0436115264892578, "learning_rate": 9.103712534059946e-05, "loss": 1.1167, "step": 16000 }, { "epoch": 0.91, "grad_norm": 1.0386344194412231, "learning_rate": 9.103144868301544e-05, "loss": 1.119, "step": 16010 }, { "epoch": 0.91, "grad_norm": 1.007534384727478, "learning_rate": 9.102577202543143e-05, "loss": 1.1085, "step": 16020 }, { "epoch": 0.91, "grad_norm": 1.0097370147705078, "learning_rate": 9.102009536784741e-05, "loss": 1.0852, "step": 16030 }, { "epoch": 0.91, "grad_norm": 0.9968022704124451, "learning_rate": 9.101441871026341e-05, "loss": 1.0748, "step": 16040 }, { "epoch": 0.91, "grad_norm": 0.9724341630935669, "learning_rate": 9.100874205267939e-05, "loss": 1.092, "step": 16050 }, { "epoch": 0.91, "grad_norm": 0.9989782571792603, "learning_rate": 9.100306539509538e-05, "loss": 1.0981, "step": 16060 }, { "epoch": 0.91, "grad_norm": 0.9777481555938721, "learning_rate": 9.099738873751136e-05, "loss": 1.1307, "step": 16070 }, { "epoch": 0.91, "grad_norm": 1.0234135389328003, "learning_rate": 9.099171207992734e-05, "loss": 1.1415, "step": 16080 }, { "epoch": 0.91, "grad_norm": 1.0307340621948242, "learning_rate": 9.098603542234333e-05, "loss": 1.1146, "step": 16090 }, { "epoch": 0.91, "grad_norm": 1.0201928615570068, "learning_rate": 9.098035876475931e-05, "loss": 1.0931, "step": 16100 }, { "epoch": 0.91, "grad_norm": 1.0517903566360474, "learning_rate": 9.097468210717531e-05, "loss": 1.1166, "step": 16110 }, { "epoch": 0.91, "grad_norm": 0.9974765181541443, "learning_rate": 9.096900544959129e-05, "loss": 1.0927, "step": 16120 }, { "epoch": 0.91, "grad_norm": 0.9997950792312622, "learning_rate": 9.096332879200727e-05, "loss": 1.1241, "step": 16130 }, { "epoch": 0.92, "grad_norm": 0.9676579236984253, "learning_rate": 9.095765213442325e-05, "loss": 1.1386, "step": 16140 }, { "epoch": 0.92, "grad_norm": 1.0225988626480103, "learning_rate": 9.095197547683924e-05, "loss": 1.1189, "step": 16150 }, { "epoch": 0.92, "grad_norm": 1.1289271116256714, "learning_rate": 9.094629881925522e-05, "loss": 1.1279, "step": 16160 }, { "epoch": 0.92, "grad_norm": 1.0318282842636108, "learning_rate": 9.09406221616712e-05, "loss": 1.0985, "step": 16170 }, { "epoch": 0.92, "grad_norm": 1.0440020561218262, "learning_rate": 9.09349455040872e-05, "loss": 1.0883, "step": 16180 }, { "epoch": 0.92, "grad_norm": 1.0131661891937256, "learning_rate": 9.092926884650318e-05, "loss": 1.0925, "step": 16190 }, { "epoch": 0.92, "grad_norm": 1.1270027160644531, "learning_rate": 9.092359218891917e-05, "loss": 1.1077, "step": 16200 }, { "epoch": 0.92, "grad_norm": 0.9565750956535339, "learning_rate": 9.091791553133515e-05, "loss": 1.1056, "step": 16210 }, { "epoch": 0.92, "grad_norm": 1.0218476057052612, "learning_rate": 9.091223887375114e-05, "loss": 1.111, "step": 16220 }, { "epoch": 0.92, "grad_norm": 1.000506043434143, "learning_rate": 9.090656221616712e-05, "loss": 1.1337, "step": 16230 }, { "epoch": 0.92, "grad_norm": 0.9499496817588806, "learning_rate": 9.090088555858312e-05, "loss": 1.0773, "step": 16240 }, { "epoch": 0.92, "grad_norm": 1.0095534324645996, "learning_rate": 9.08952089009991e-05, "loss": 1.1108, "step": 16250 }, { "epoch": 0.92, "grad_norm": 0.9470630884170532, "learning_rate": 9.088953224341508e-05, "loss": 1.1164, "step": 16260 }, { "epoch": 0.92, "grad_norm": 1.0566656589508057, "learning_rate": 9.088385558583107e-05, "loss": 1.0911, "step": 16270 }, { "epoch": 0.92, "grad_norm": 1.0246098041534424, "learning_rate": 9.087817892824705e-05, "loss": 1.1054, "step": 16280 }, { "epoch": 0.92, "grad_norm": 1.0071094036102295, "learning_rate": 9.087250227066304e-05, "loss": 1.1089, "step": 16290 }, { "epoch": 0.92, "grad_norm": 0.9881753325462341, "learning_rate": 9.086682561307902e-05, "loss": 1.1266, "step": 16300 }, { "epoch": 0.92, "grad_norm": 1.035309910774231, "learning_rate": 9.086114895549502e-05, "loss": 1.083, "step": 16310 }, { "epoch": 0.93, "grad_norm": 1.085066556930542, "learning_rate": 9.0855472297911e-05, "loss": 1.1295, "step": 16320 }, { "epoch": 0.93, "grad_norm": 1.027355670928955, "learning_rate": 9.084979564032699e-05, "loss": 1.0911, "step": 16330 }, { "epoch": 0.93, "grad_norm": 0.9752262234687805, "learning_rate": 9.084411898274297e-05, "loss": 1.095, "step": 16340 }, { "epoch": 0.93, "grad_norm": 1.0339298248291016, "learning_rate": 9.083844232515895e-05, "loss": 1.0774, "step": 16350 }, { "epoch": 0.93, "grad_norm": 0.9961239695549011, "learning_rate": 9.083276566757494e-05, "loss": 1.0819, "step": 16360 }, { "epoch": 0.93, "grad_norm": 1.0506178140640259, "learning_rate": 9.082708900999092e-05, "loss": 1.0964, "step": 16370 }, { "epoch": 0.93, "grad_norm": 0.9840331077575684, "learning_rate": 9.08214123524069e-05, "loss": 1.0842, "step": 16380 }, { "epoch": 0.93, "grad_norm": 1.0637125968933105, "learning_rate": 9.08157356948229e-05, "loss": 1.1141, "step": 16390 }, { "epoch": 0.93, "grad_norm": 1.0476300716400146, "learning_rate": 9.081005903723888e-05, "loss": 1.0729, "step": 16400 }, { "epoch": 0.93, "grad_norm": 1.033827304840088, "learning_rate": 9.080438237965486e-05, "loss": 1.0799, "step": 16410 }, { "epoch": 0.93, "grad_norm": 1.0318756103515625, "learning_rate": 9.079870572207085e-05, "loss": 1.1009, "step": 16420 }, { "epoch": 0.93, "grad_norm": 1.0071394443511963, "learning_rate": 9.079302906448683e-05, "loss": 1.0751, "step": 16430 }, { "epoch": 0.93, "grad_norm": 1.0451158285140991, "learning_rate": 9.078735240690281e-05, "loss": 1.1002, "step": 16440 }, { "epoch": 0.93, "grad_norm": 1.0250425338745117, "learning_rate": 9.07816757493188e-05, "loss": 1.0867, "step": 16450 }, { "epoch": 0.93, "grad_norm": 1.0194082260131836, "learning_rate": 9.077599909173478e-05, "loss": 1.0818, "step": 16460 }, { "epoch": 0.93, "grad_norm": 1.0849069356918335, "learning_rate": 9.077032243415078e-05, "loss": 1.1015, "step": 16470 }, { "epoch": 0.93, "grad_norm": 1.0635998249053955, "learning_rate": 9.076464577656676e-05, "loss": 1.0977, "step": 16480 }, { "epoch": 0.94, "grad_norm": 1.0882033109664917, "learning_rate": 9.075896911898275e-05, "loss": 1.0948, "step": 16490 }, { "epoch": 0.94, "grad_norm": 1.017020583152771, "learning_rate": 9.075329246139873e-05, "loss": 1.103, "step": 16500 }, { "epoch": 0.94, "grad_norm": 0.9480723142623901, "learning_rate": 9.074761580381472e-05, "loss": 1.1073, "step": 16510 }, { "epoch": 0.94, "grad_norm": 0.9871799945831299, "learning_rate": 9.07419391462307e-05, "loss": 1.0941, "step": 16520 }, { "epoch": 0.94, "grad_norm": 1.04385244846344, "learning_rate": 9.073626248864668e-05, "loss": 1.0923, "step": 16530 }, { "epoch": 0.94, "grad_norm": 0.9880574345588684, "learning_rate": 9.073058583106268e-05, "loss": 1.0919, "step": 16540 }, { "epoch": 0.94, "grad_norm": 0.9654357433319092, "learning_rate": 9.072490917347866e-05, "loss": 1.0692, "step": 16550 }, { "epoch": 0.94, "grad_norm": 1.000730037689209, "learning_rate": 9.071923251589465e-05, "loss": 1.0901, "step": 16560 }, { "epoch": 0.94, "grad_norm": 0.9684690237045288, "learning_rate": 9.071355585831063e-05, "loss": 1.0672, "step": 16570 }, { "epoch": 0.94, "grad_norm": 0.9954023957252502, "learning_rate": 9.070787920072662e-05, "loss": 1.0788, "step": 16580 }, { "epoch": 0.94, "grad_norm": 1.0485107898712158, "learning_rate": 9.07022025431426e-05, "loss": 1.1066, "step": 16590 }, { "epoch": 0.94, "grad_norm": 0.9173890948295593, "learning_rate": 9.06965258855586e-05, "loss": 1.1112, "step": 16600 }, { "epoch": 0.94, "grad_norm": 1.0863655805587769, "learning_rate": 9.069084922797458e-05, "loss": 1.0878, "step": 16610 }, { "epoch": 0.94, "grad_norm": 0.9664111137390137, "learning_rate": 9.068517257039056e-05, "loss": 1.1347, "step": 16620 }, { "epoch": 0.94, "grad_norm": 0.9118823409080505, "learning_rate": 9.067949591280655e-05, "loss": 1.1131, "step": 16630 }, { "epoch": 0.94, "grad_norm": 1.0825508832931519, "learning_rate": 9.067381925522253e-05, "loss": 1.0686, "step": 16640 }, { "epoch": 0.94, "grad_norm": 1.0472077131271362, "learning_rate": 9.066814259763851e-05, "loss": 1.0934, "step": 16650 }, { "epoch": 0.94, "grad_norm": 0.9876842498779297, "learning_rate": 9.066246594005449e-05, "loss": 1.1036, "step": 16660 }, { "epoch": 0.95, "grad_norm": 1.0723121166229248, "learning_rate": 9.065678928247048e-05, "loss": 1.1239, "step": 16670 }, { "epoch": 0.95, "grad_norm": 1.0293035507202148, "learning_rate": 9.065111262488646e-05, "loss": 1.1212, "step": 16680 }, { "epoch": 0.95, "grad_norm": 0.9841594696044922, "learning_rate": 9.064543596730246e-05, "loss": 1.1068, "step": 16690 }, { "epoch": 0.95, "grad_norm": 1.0393431186676025, "learning_rate": 9.063975930971844e-05, "loss": 1.1048, "step": 16700 }, { "epoch": 0.95, "grad_norm": 1.0287729501724243, "learning_rate": 9.063408265213442e-05, "loss": 1.0749, "step": 16710 }, { "epoch": 0.95, "grad_norm": 0.9889966249465942, "learning_rate": 9.062840599455041e-05, "loss": 1.0951, "step": 16720 }, { "epoch": 0.95, "grad_norm": 0.9588437676429749, "learning_rate": 9.062272933696639e-05, "loss": 1.1172, "step": 16730 }, { "epoch": 0.95, "grad_norm": 1.0642303228378296, "learning_rate": 9.061705267938238e-05, "loss": 1.1044, "step": 16740 }, { "epoch": 0.95, "grad_norm": 1.082733392715454, "learning_rate": 9.061137602179836e-05, "loss": 1.085, "step": 16750 }, { "epoch": 0.95, "grad_norm": 1.0009549856185913, "learning_rate": 9.060569936421436e-05, "loss": 1.1116, "step": 16760 }, { "epoch": 0.95, "grad_norm": 1.0134121179580688, "learning_rate": 9.060002270663034e-05, "loss": 1.0861, "step": 16770 }, { "epoch": 0.95, "grad_norm": 1.0432090759277344, "learning_rate": 9.059434604904633e-05, "loss": 1.0765, "step": 16780 }, { "epoch": 0.95, "grad_norm": 1.0448909997940063, "learning_rate": 9.058866939146231e-05, "loss": 1.1201, "step": 16790 }, { "epoch": 0.95, "grad_norm": 0.9981644153594971, "learning_rate": 9.058299273387829e-05, "loss": 1.0958, "step": 16800 }, { "epoch": 0.95, "grad_norm": 1.035082221031189, "learning_rate": 9.057731607629429e-05, "loss": 1.1133, "step": 16810 }, { "epoch": 0.95, "grad_norm": 1.0626424551010132, "learning_rate": 9.057163941871027e-05, "loss": 1.1133, "step": 16820 }, { "epoch": 0.95, "grad_norm": 1.0205612182617188, "learning_rate": 9.056596276112626e-05, "loss": 1.0862, "step": 16830 }, { "epoch": 0.95, "grad_norm": 0.9773542881011963, "learning_rate": 9.056028610354224e-05, "loss": 1.0968, "step": 16840 }, { "epoch": 0.96, "grad_norm": 0.980117678642273, "learning_rate": 9.055460944595823e-05, "loss": 1.0933, "step": 16850 }, { "epoch": 0.96, "grad_norm": 1.0402412414550781, "learning_rate": 9.054893278837421e-05, "loss": 1.1189, "step": 16860 }, { "epoch": 0.96, "grad_norm": 0.973577618598938, "learning_rate": 9.05432561307902e-05, "loss": 1.1054, "step": 16870 }, { "epoch": 0.96, "grad_norm": 1.0160834789276123, "learning_rate": 9.053757947320619e-05, "loss": 1.1092, "step": 16880 }, { "epoch": 0.96, "grad_norm": 0.9204998016357422, "learning_rate": 9.053190281562217e-05, "loss": 1.0974, "step": 16890 }, { "epoch": 0.96, "grad_norm": 1.1085889339447021, "learning_rate": 9.052622615803815e-05, "loss": 1.0856, "step": 16900 }, { "epoch": 0.96, "grad_norm": 1.0582939386367798, "learning_rate": 9.052054950045414e-05, "loss": 1.1278, "step": 16910 }, { "epoch": 0.96, "grad_norm": 1.0668084621429443, "learning_rate": 9.051487284287012e-05, "loss": 1.0987, "step": 16920 }, { "epoch": 0.96, "grad_norm": 0.9617071747779846, "learning_rate": 9.05091961852861e-05, "loss": 1.0852, "step": 16930 }, { "epoch": 0.96, "grad_norm": 1.0114076137542725, "learning_rate": 9.050351952770209e-05, "loss": 1.091, "step": 16940 }, { "epoch": 0.96, "grad_norm": 1.0092146396636963, "learning_rate": 9.049784287011807e-05, "loss": 1.0902, "step": 16950 }, { "epoch": 0.96, "grad_norm": 1.0551519393920898, "learning_rate": 9.049216621253407e-05, "loss": 1.089, "step": 16960 }, { "epoch": 0.96, "grad_norm": 0.9967354536056519, "learning_rate": 9.048648955495005e-05, "loss": 1.1306, "step": 16970 }, { "epoch": 0.96, "grad_norm": 0.8968470692634583, "learning_rate": 9.048081289736604e-05, "loss": 1.0836, "step": 16980 }, { "epoch": 0.96, "grad_norm": 1.001888632774353, "learning_rate": 9.047513623978202e-05, "loss": 1.0971, "step": 16990 }, { "epoch": 0.96, "grad_norm": 1.0741177797317505, "learning_rate": 9.0469459582198e-05, "loss": 1.098, "step": 17000 }, { "epoch": 0.96, "grad_norm": 1.0731189250946045, "learning_rate": 9.046378292461399e-05, "loss": 1.0688, "step": 17010 }, { "epoch": 0.97, "grad_norm": 0.9876343011856079, "learning_rate": 9.045810626702997e-05, "loss": 1.0867, "step": 17020 }, { "epoch": 0.97, "grad_norm": 0.9558156132698059, "learning_rate": 9.045242960944597e-05, "loss": 1.0816, "step": 17030 }, { "epoch": 0.97, "grad_norm": 1.0600016117095947, "learning_rate": 9.044675295186195e-05, "loss": 1.0704, "step": 17040 }, { "epoch": 0.97, "grad_norm": 1.0841532945632935, "learning_rate": 9.044107629427794e-05, "loss": 1.1014, "step": 17050 }, { "epoch": 0.97, "grad_norm": 1.0159236192703247, "learning_rate": 9.043539963669392e-05, "loss": 1.0973, "step": 17060 }, { "epoch": 0.97, "grad_norm": 0.9998457431793213, "learning_rate": 9.042972297910991e-05, "loss": 1.1017, "step": 17070 }, { "epoch": 0.97, "grad_norm": 1.0251778364181519, "learning_rate": 9.042404632152589e-05, "loss": 1.0932, "step": 17080 }, { "epoch": 0.97, "grad_norm": 0.9856871366500854, "learning_rate": 9.041836966394187e-05, "loss": 1.0933, "step": 17090 }, { "epoch": 0.97, "grad_norm": 1.0353485345840454, "learning_rate": 9.041269300635787e-05, "loss": 1.0815, "step": 17100 }, { "epoch": 0.97, "grad_norm": 1.0373533964157104, "learning_rate": 9.040701634877385e-05, "loss": 1.1156, "step": 17110 }, { "epoch": 0.97, "grad_norm": 1.0101714134216309, "learning_rate": 9.040133969118984e-05, "loss": 1.0846, "step": 17120 }, { "epoch": 0.97, "grad_norm": 1.0176833868026733, "learning_rate": 9.039566303360582e-05, "loss": 1.1061, "step": 17130 }, { "epoch": 0.97, "grad_norm": 0.9944831728935242, "learning_rate": 9.038998637602181e-05, "loss": 1.0621, "step": 17140 }, { "epoch": 0.97, "grad_norm": 1.041317343711853, "learning_rate": 9.03843097184378e-05, "loss": 1.1002, "step": 17150 }, { "epoch": 0.97, "grad_norm": 0.995404839515686, "learning_rate": 9.037863306085377e-05, "loss": 1.1079, "step": 17160 }, { "epoch": 0.97, "grad_norm": 0.944424033164978, "learning_rate": 9.037295640326975e-05, "loss": 1.089, "step": 17170 }, { "epoch": 0.97, "grad_norm": 1.1730425357818604, "learning_rate": 9.036727974568573e-05, "loss": 1.0898, "step": 17180 }, { "epoch": 0.97, "grad_norm": 1.0295026302337646, "learning_rate": 9.036160308810173e-05, "loss": 1.1209, "step": 17190 }, { "epoch": 0.98, "grad_norm": 1.0284481048583984, "learning_rate": 9.035592643051771e-05, "loss": 1.1201, "step": 17200 }, { "epoch": 0.98, "grad_norm": 0.9916067719459534, "learning_rate": 9.03502497729337e-05, "loss": 1.0755, "step": 17210 }, { "epoch": 0.98, "grad_norm": 0.9887465238571167, "learning_rate": 9.034457311534968e-05, "loss": 1.0737, "step": 17220 }, { "epoch": 0.98, "grad_norm": 1.05711829662323, "learning_rate": 9.033889645776567e-05, "loss": 1.1007, "step": 17230 }, { "epoch": 0.98, "grad_norm": 0.9847268462181091, "learning_rate": 9.033321980018165e-05, "loss": 1.1078, "step": 17240 }, { "epoch": 0.98, "grad_norm": 1.0585123300552368, "learning_rate": 9.032754314259765e-05, "loss": 1.1095, "step": 17250 }, { "epoch": 0.98, "grad_norm": 1.003151297569275, "learning_rate": 9.032186648501363e-05, "loss": 1.0729, "step": 17260 }, { "epoch": 0.98, "grad_norm": 0.992611289024353, "learning_rate": 9.031618982742961e-05, "loss": 1.0906, "step": 17270 }, { "epoch": 0.98, "grad_norm": 1.0341038703918457, "learning_rate": 9.03105131698456e-05, "loss": 1.123, "step": 17280 }, { "epoch": 0.98, "grad_norm": 1.0193891525268555, "learning_rate": 9.030483651226158e-05, "loss": 1.0853, "step": 17290 }, { "epoch": 0.98, "grad_norm": 1.0172853469848633, "learning_rate": 9.029915985467757e-05, "loss": 1.1002, "step": 17300 }, { "epoch": 0.98, "grad_norm": 0.9676674008369446, "learning_rate": 9.029348319709355e-05, "loss": 1.0922, "step": 17310 }, { "epoch": 0.98, "grad_norm": 1.0233944654464722, "learning_rate": 9.028780653950955e-05, "loss": 1.1404, "step": 17320 }, { "epoch": 0.98, "grad_norm": 1.0414488315582275, "learning_rate": 9.028212988192553e-05, "loss": 1.0803, "step": 17330 }, { "epoch": 0.98, "grad_norm": 1.029581904411316, "learning_rate": 9.027645322434152e-05, "loss": 1.0939, "step": 17340 }, { "epoch": 0.98, "grad_norm": 1.0005854368209839, "learning_rate": 9.02707765667575e-05, "loss": 1.106, "step": 17350 }, { "epoch": 0.98, "grad_norm": 1.03742253780365, "learning_rate": 9.026509990917348e-05, "loss": 1.0683, "step": 17360 }, { "epoch": 0.98, "grad_norm": 1.039552092552185, "learning_rate": 9.025942325158947e-05, "loss": 1.094, "step": 17370 }, { "epoch": 0.99, "grad_norm": 0.9637312889099121, "learning_rate": 9.025374659400545e-05, "loss": 1.1125, "step": 17380 }, { "epoch": 0.99, "grad_norm": 1.0168901681900024, "learning_rate": 9.024806993642145e-05, "loss": 1.1094, "step": 17390 }, { "epoch": 0.99, "grad_norm": 1.161018967628479, "learning_rate": 9.024239327883743e-05, "loss": 1.0823, "step": 17400 }, { "epoch": 0.99, "grad_norm": 1.1037890911102295, "learning_rate": 9.023671662125341e-05, "loss": 1.1066, "step": 17410 }, { "epoch": 0.99, "grad_norm": 1.009763240814209, "learning_rate": 9.023103996366939e-05, "loss": 1.0751, "step": 17420 }, { "epoch": 0.99, "grad_norm": 1.1191492080688477, "learning_rate": 9.022536330608538e-05, "loss": 1.0866, "step": 17430 }, { "epoch": 0.99, "grad_norm": 0.9924859404563904, "learning_rate": 9.021968664850136e-05, "loss": 1.0526, "step": 17440 }, { "epoch": 0.99, "grad_norm": 1.0444775819778442, "learning_rate": 9.021400999091734e-05, "loss": 1.0825, "step": 17450 }, { "epoch": 0.99, "grad_norm": 1.0255706310272217, "learning_rate": 9.020833333333334e-05, "loss": 1.0851, "step": 17460 }, { "epoch": 0.99, "grad_norm": 1.0679575204849243, "learning_rate": 9.020265667574931e-05, "loss": 1.0976, "step": 17470 }, { "epoch": 0.99, "grad_norm": 1.0215342044830322, "learning_rate": 9.019698001816531e-05, "loss": 1.0901, "step": 17480 }, { "epoch": 0.99, "grad_norm": 1.001754879951477, "learning_rate": 9.019130336058129e-05, "loss": 1.0755, "step": 17490 }, { "epoch": 0.99, "grad_norm": 1.0387197732925415, "learning_rate": 9.018562670299728e-05, "loss": 1.0974, "step": 17500 }, { "epoch": 0.99, "grad_norm": 1.0036932229995728, "learning_rate": 9.017995004541326e-05, "loss": 1.0985, "step": 17510 }, { "epoch": 0.99, "grad_norm": 0.926325798034668, "learning_rate": 9.017427338782926e-05, "loss": 1.0836, "step": 17520 }, { "epoch": 0.99, "grad_norm": 1.029331922531128, "learning_rate": 9.016859673024524e-05, "loss": 1.0849, "step": 17530 }, { "epoch": 0.99, "grad_norm": 1.014387845993042, "learning_rate": 9.016292007266122e-05, "loss": 1.0952, "step": 17540 }, { "epoch": 1.0, "grad_norm": 0.9838865399360657, "learning_rate": 9.015724341507721e-05, "loss": 1.0899, "step": 17550 }, { "epoch": 1.0, "grad_norm": 1.0315765142440796, "learning_rate": 9.015156675749319e-05, "loss": 1.096, "step": 17560 }, { "epoch": 1.0, "grad_norm": 1.0071032047271729, "learning_rate": 9.014589009990918e-05, "loss": 1.0821, "step": 17570 }, { "epoch": 1.0, "grad_norm": 0.9945970773696899, "learning_rate": 9.014021344232516e-05, "loss": 1.0856, "step": 17580 }, { "epoch": 1.0, "grad_norm": 0.979688286781311, "learning_rate": 9.013453678474116e-05, "loss": 1.0755, "step": 17590 }, { "epoch": 1.0, "grad_norm": 1.0206164121627808, "learning_rate": 9.012886012715714e-05, "loss": 1.1121, "step": 17600 }, { "epoch": 1.0, "grad_norm": 1.1330362558364868, "learning_rate": 9.012318346957313e-05, "loss": 1.0503, "step": 17610 }, { "epoch": 1.0, "grad_norm": 1.0378249883651733, "learning_rate": 9.011750681198911e-05, "loss": 1.1076, "step": 17620 }, { "epoch": 1.0, "grad_norm": 1.0391685962677002, "learning_rate": 9.011183015440509e-05, "loss": 1.1068, "step": 17630 }, { "epoch": 1.0, "eval_loss": 1.3208675384521484, "eval_runtime": 6200.7764, "eval_samples_per_second": 267.763, "eval_steps_per_second": 4.184, "step": 17636 }, { "epoch": 1.0, "grad_norm": 1.0272337198257446, "learning_rate": 9.010615349682108e-05, "loss": 1.0638, "step": 17640 }, { "epoch": 1.0, "grad_norm": 1.0140159130096436, "learning_rate": 9.010047683923706e-05, "loss": 1.0704, "step": 17650 }, { "epoch": 1.0, "grad_norm": 1.0640531778335571, "learning_rate": 9.009480018165306e-05, "loss": 1.1009, "step": 17660 }, { "epoch": 1.0, "grad_norm": 1.042861819267273, "learning_rate": 9.008912352406904e-05, "loss": 1.0742, "step": 17670 }, { "epoch": 1.0, "grad_norm": 1.0285511016845703, "learning_rate": 9.008344686648502e-05, "loss": 1.0888, "step": 17680 }, { "epoch": 1.0, "grad_norm": 0.9652631282806396, "learning_rate": 9.0077770208901e-05, "loss": 1.0655, "step": 17690 }, { "epoch": 1.0, "grad_norm": 0.9820543527603149, "learning_rate": 9.007209355131699e-05, "loss": 1.0624, "step": 17700 }, { "epoch": 1.0, "grad_norm": 1.0196304321289062, "learning_rate": 9.006641689373297e-05, "loss": 1.0978, "step": 17710 }, { "epoch": 1.0, "grad_norm": 1.012948751449585, "learning_rate": 9.006074023614895e-05, "loss": 1.0792, "step": 17720 }, { "epoch": 1.01, "grad_norm": 1.1765342950820923, "learning_rate": 9.005506357856494e-05, "loss": 1.067, "step": 17730 }, { "epoch": 1.01, "grad_norm": 1.0257614850997925, "learning_rate": 9.004938692098092e-05, "loss": 1.0455, "step": 17740 }, { "epoch": 1.01, "grad_norm": 1.0249483585357666, "learning_rate": 9.004371026339692e-05, "loss": 1.0867, "step": 17750 }, { "epoch": 1.01, "grad_norm": 1.0736576318740845, "learning_rate": 9.00380336058129e-05, "loss": 1.0689, "step": 17760 }, { "epoch": 1.01, "grad_norm": 1.0121577978134155, "learning_rate": 9.003235694822889e-05, "loss": 1.0402, "step": 17770 }, { "epoch": 1.01, "grad_norm": 1.074211835861206, "learning_rate": 9.002668029064487e-05, "loss": 1.0674, "step": 17780 }, { "epoch": 1.01, "grad_norm": 1.0516862869262695, "learning_rate": 9.002100363306086e-05, "loss": 1.0398, "step": 17790 }, { "epoch": 1.01, "grad_norm": 1.0739288330078125, "learning_rate": 9.001532697547684e-05, "loss": 1.0923, "step": 17800 }, { "epoch": 1.01, "grad_norm": 1.0997319221496582, "learning_rate": 9.000965031789282e-05, "loss": 1.064, "step": 17810 }, { "epoch": 1.01, "grad_norm": 1.0071208477020264, "learning_rate": 9.000397366030882e-05, "loss": 1.0634, "step": 17820 }, { "epoch": 1.01, "grad_norm": 1.0944613218307495, "learning_rate": 8.99982970027248e-05, "loss": 1.05, "step": 17830 }, { "epoch": 1.01, "grad_norm": 1.0223208665847778, "learning_rate": 8.999262034514079e-05, "loss": 1.0763, "step": 17840 }, { "epoch": 1.01, "grad_norm": 1.1282323598861694, "learning_rate": 8.998694368755677e-05, "loss": 1.0908, "step": 17850 }, { "epoch": 1.01, "grad_norm": 1.0192111730575562, "learning_rate": 8.998126702997276e-05, "loss": 1.0577, "step": 17860 }, { "epoch": 1.01, "grad_norm": 1.055091142654419, "learning_rate": 8.997559037238874e-05, "loss": 1.0782, "step": 17870 }, { "epoch": 1.01, "grad_norm": 0.9943811893463135, "learning_rate": 8.996991371480474e-05, "loss": 1.0907, "step": 17880 }, { "epoch": 1.01, "grad_norm": 0.9999253749847412, "learning_rate": 8.996423705722072e-05, "loss": 1.0562, "step": 17890 }, { "epoch": 1.01, "grad_norm": 1.0613141059875488, "learning_rate": 8.99585603996367e-05, "loss": 1.0628, "step": 17900 }, { "epoch": 1.02, "grad_norm": 1.0777662992477417, "learning_rate": 8.995288374205269e-05, "loss": 1.0986, "step": 17910 }, { "epoch": 1.02, "grad_norm": 1.059638261795044, "learning_rate": 8.994720708446867e-05, "loss": 1.0896, "step": 17920 }, { "epoch": 1.02, "grad_norm": 1.0488141775131226, "learning_rate": 8.994153042688465e-05, "loss": 1.0829, "step": 17930 }, { "epoch": 1.02, "grad_norm": 1.0094916820526123, "learning_rate": 8.993585376930064e-05, "loss": 1.0753, "step": 17940 }, { "epoch": 1.02, "grad_norm": 1.0711807012557983, "learning_rate": 8.993017711171662e-05, "loss": 1.0536, "step": 17950 }, { "epoch": 1.02, "grad_norm": 1.0353243350982666, "learning_rate": 8.99245004541326e-05, "loss": 1.0736, "step": 17960 }, { "epoch": 1.02, "grad_norm": 1.038089394569397, "learning_rate": 8.99188237965486e-05, "loss": 1.0663, "step": 17970 }, { "epoch": 1.02, "grad_norm": 1.0735338926315308, "learning_rate": 8.991314713896458e-05, "loss": 1.0551, "step": 17980 }, { "epoch": 1.02, "grad_norm": 1.0356054306030273, "learning_rate": 8.990747048138056e-05, "loss": 1.0542, "step": 17990 }, { "epoch": 1.02, "grad_norm": 1.0180166959762573, "learning_rate": 8.990179382379655e-05, "loss": 1.0637, "step": 18000 }, { "epoch": 1.02, "grad_norm": 1.0506871938705444, "learning_rate": 8.989611716621253e-05, "loss": 1.077, "step": 18010 }, { "epoch": 1.02, "grad_norm": 1.0578285455703735, "learning_rate": 8.989044050862852e-05, "loss": 1.0752, "step": 18020 }, { "epoch": 1.02, "grad_norm": 1.073859453201294, "learning_rate": 8.98847638510445e-05, "loss": 1.0771, "step": 18030 }, { "epoch": 1.02, "grad_norm": 1.0111383199691772, "learning_rate": 8.98790871934605e-05, "loss": 1.0904, "step": 18040 }, { "epoch": 1.02, "grad_norm": 1.1048519611358643, "learning_rate": 8.987341053587648e-05, "loss": 1.0655, "step": 18050 }, { "epoch": 1.02, "grad_norm": 1.0775214433670044, "learning_rate": 8.986773387829247e-05, "loss": 1.0644, "step": 18060 }, { "epoch": 1.02, "grad_norm": 0.9977869391441345, "learning_rate": 8.986205722070845e-05, "loss": 1.0918, "step": 18070 }, { "epoch": 1.03, "grad_norm": 1.061667799949646, "learning_rate": 8.985638056312443e-05, "loss": 1.0314, "step": 18080 }, { "epoch": 1.03, "grad_norm": 1.0475828647613525, "learning_rate": 8.985070390554043e-05, "loss": 1.0829, "step": 18090 }, { "epoch": 1.03, "grad_norm": 1.0815876722335815, "learning_rate": 8.98450272479564e-05, "loss": 1.0723, "step": 18100 }, { "epoch": 1.03, "grad_norm": 1.0456702709197998, "learning_rate": 8.98393505903724e-05, "loss": 1.0581, "step": 18110 }, { "epoch": 1.03, "grad_norm": 1.1460204124450684, "learning_rate": 8.983367393278838e-05, "loss": 1.0803, "step": 18120 }, { "epoch": 1.03, "grad_norm": 0.9976013898849487, "learning_rate": 8.982799727520437e-05, "loss": 1.0962, "step": 18130 }, { "epoch": 1.03, "grad_norm": 1.0714240074157715, "learning_rate": 8.982232061762035e-05, "loss": 1.0517, "step": 18140 }, { "epoch": 1.03, "grad_norm": 1.0284347534179688, "learning_rate": 8.981664396003635e-05, "loss": 1.0596, "step": 18150 }, { "epoch": 1.03, "grad_norm": 1.0487271547317505, "learning_rate": 8.981096730245233e-05, "loss": 1.0712, "step": 18160 }, { "epoch": 1.03, "grad_norm": 1.0549498796463013, "learning_rate": 8.98052906448683e-05, "loss": 1.082, "step": 18170 }, { "epoch": 1.03, "grad_norm": 1.1075046062469482, "learning_rate": 8.97996139872843e-05, "loss": 1.0957, "step": 18180 }, { "epoch": 1.03, "grad_norm": 1.034991979598999, "learning_rate": 8.979393732970028e-05, "loss": 1.0618, "step": 18190 }, { "epoch": 1.03, "grad_norm": 1.046815276145935, "learning_rate": 8.978826067211626e-05, "loss": 1.0902, "step": 18200 }, { "epoch": 1.03, "grad_norm": 1.002024531364441, "learning_rate": 8.978258401453224e-05, "loss": 1.0713, "step": 18210 }, { "epoch": 1.03, "grad_norm": 1.0288420915603638, "learning_rate": 8.977690735694823e-05, "loss": 1.0511, "step": 18220 }, { "epoch": 1.03, "grad_norm": 1.0884274244308472, "learning_rate": 8.977123069936421e-05, "loss": 1.0602, "step": 18230 }, { "epoch": 1.03, "grad_norm": 1.0289725065231323, "learning_rate": 8.97655540417802e-05, "loss": 1.0693, "step": 18240 }, { "epoch": 1.03, "grad_norm": 1.0350127220153809, "learning_rate": 8.975987738419619e-05, "loss": 1.0825, "step": 18250 }, { "epoch": 1.04, "grad_norm": 1.0942857265472412, "learning_rate": 8.975420072661217e-05, "loss": 1.0988, "step": 18260 }, { "epoch": 1.04, "grad_norm": 1.070473313331604, "learning_rate": 8.974852406902816e-05, "loss": 1.0662, "step": 18270 }, { "epoch": 1.04, "grad_norm": 1.0537196397781372, "learning_rate": 8.974284741144414e-05, "loss": 1.0691, "step": 18280 }, { "epoch": 1.04, "grad_norm": 1.0491708517074585, "learning_rate": 8.973717075386013e-05, "loss": 1.058, "step": 18290 }, { "epoch": 1.04, "grad_norm": 1.0347707271575928, "learning_rate": 8.973149409627611e-05, "loss": 1.0573, "step": 18300 }, { "epoch": 1.04, "grad_norm": 1.0887863636016846, "learning_rate": 8.97258174386921e-05, "loss": 1.0684, "step": 18310 }, { "epoch": 1.04, "grad_norm": 1.038752555847168, "learning_rate": 8.972014078110809e-05, "loss": 1.07, "step": 18320 }, { "epoch": 1.04, "grad_norm": 1.014475703239441, "learning_rate": 8.971446412352408e-05, "loss": 1.0827, "step": 18330 }, { "epoch": 1.04, "grad_norm": 1.1164541244506836, "learning_rate": 8.970878746594006e-05, "loss": 1.0691, "step": 18340 }, { "epoch": 1.04, "grad_norm": 0.9665988087654114, "learning_rate": 8.970311080835604e-05, "loss": 1.0599, "step": 18350 }, { "epoch": 1.04, "grad_norm": 1.0009914636611938, "learning_rate": 8.969743415077203e-05, "loss": 1.067, "step": 18360 }, { "epoch": 1.04, "grad_norm": 1.1242344379425049, "learning_rate": 8.969175749318801e-05, "loss": 1.1035, "step": 18370 }, { "epoch": 1.04, "grad_norm": 1.047084927558899, "learning_rate": 8.9686080835604e-05, "loss": 1.0523, "step": 18380 }, { "epoch": 1.04, "grad_norm": 1.108413815498352, "learning_rate": 8.968040417801999e-05, "loss": 1.0795, "step": 18390 }, { "epoch": 1.04, "grad_norm": 1.0000178813934326, "learning_rate": 8.967472752043598e-05, "loss": 1.0558, "step": 18400 }, { "epoch": 1.04, "grad_norm": 1.0701167583465576, "learning_rate": 8.966905086285196e-05, "loss": 1.0672, "step": 18410 }, { "epoch": 1.04, "grad_norm": 1.001745343208313, "learning_rate": 8.966337420526795e-05, "loss": 1.0823, "step": 18420 }, { "epoch": 1.05, "grad_norm": 1.0773652791976929, "learning_rate": 8.965769754768393e-05, "loss": 1.0735, "step": 18430 }, { "epoch": 1.05, "grad_norm": 0.9949339032173157, "learning_rate": 8.965202089009991e-05, "loss": 1.0779, "step": 18440 }, { "epoch": 1.05, "grad_norm": 1.084444522857666, "learning_rate": 8.96463442325159e-05, "loss": 1.0818, "step": 18450 }, { "epoch": 1.05, "grad_norm": 0.9539121985435486, "learning_rate": 8.964066757493189e-05, "loss": 1.078, "step": 18460 }, { "epoch": 1.05, "grad_norm": 1.049892544746399, "learning_rate": 8.963499091734787e-05, "loss": 1.0516, "step": 18470 }, { "epoch": 1.05, "grad_norm": 1.0421289205551147, "learning_rate": 8.962931425976385e-05, "loss": 1.066, "step": 18480 }, { "epoch": 1.05, "grad_norm": 1.0193041563034058, "learning_rate": 8.962363760217984e-05, "loss": 1.0889, "step": 18490 }, { "epoch": 1.05, "grad_norm": 1.0222270488739014, "learning_rate": 8.961796094459582e-05, "loss": 1.0589, "step": 18500 }, { "epoch": 1.05, "grad_norm": 1.0511577129364014, "learning_rate": 8.961228428701181e-05, "loss": 1.0918, "step": 18510 }, { "epoch": 1.05, "grad_norm": 1.0875929594039917, "learning_rate": 8.96066076294278e-05, "loss": 1.0481, "step": 18520 }, { "epoch": 1.05, "grad_norm": 1.0534061193466187, "learning_rate": 8.960093097184377e-05, "loss": 1.0609, "step": 18530 }, { "epoch": 1.05, "grad_norm": 1.0688360929489136, "learning_rate": 8.959525431425977e-05, "loss": 1.0547, "step": 18540 }, { "epoch": 1.05, "grad_norm": 1.019882321357727, "learning_rate": 8.958957765667575e-05, "loss": 1.0552, "step": 18550 }, { "epoch": 1.05, "grad_norm": 1.1323702335357666, "learning_rate": 8.958390099909174e-05, "loss": 1.0669, "step": 18560 }, { "epoch": 1.05, "grad_norm": 0.9842608571052551, "learning_rate": 8.957822434150772e-05, "loss": 1.0766, "step": 18570 }, { "epoch": 1.05, "grad_norm": 1.121148705482483, "learning_rate": 8.957254768392371e-05, "loss": 1.0518, "step": 18580 }, { "epoch": 1.05, "grad_norm": 1.0077377557754517, "learning_rate": 8.95668710263397e-05, "loss": 1.0768, "step": 18590 }, { "epoch": 1.05, "grad_norm": 1.0590720176696777, "learning_rate": 8.956119436875569e-05, "loss": 1.0761, "step": 18600 }, { "epoch": 1.06, "grad_norm": 1.0470386743545532, "learning_rate": 8.955551771117167e-05, "loss": 1.0918, "step": 18610 }, { "epoch": 1.06, "grad_norm": 1.0292167663574219, "learning_rate": 8.954984105358765e-05, "loss": 1.0846, "step": 18620 }, { "epoch": 1.06, "grad_norm": 1.035483956336975, "learning_rate": 8.954416439600364e-05, "loss": 1.0476, "step": 18630 }, { "epoch": 1.06, "grad_norm": 1.0064340829849243, "learning_rate": 8.953848773841962e-05, "loss": 1.0865, "step": 18640 }, { "epoch": 1.06, "grad_norm": 0.9858803153038025, "learning_rate": 8.953281108083561e-05, "loss": 1.0722, "step": 18650 }, { "epoch": 1.06, "grad_norm": 1.051945447921753, "learning_rate": 8.95271344232516e-05, "loss": 1.0672, "step": 18660 }, { "epoch": 1.06, "grad_norm": 1.0722295045852661, "learning_rate": 8.952145776566759e-05, "loss": 1.0731, "step": 18670 }, { "epoch": 1.06, "grad_norm": 1.0204280614852905, "learning_rate": 8.951578110808357e-05, "loss": 1.0963, "step": 18680 }, { "epoch": 1.06, "grad_norm": 1.0194505453109741, "learning_rate": 8.951010445049955e-05, "loss": 1.0698, "step": 18690 }, { "epoch": 1.06, "grad_norm": 1.034116268157959, "learning_rate": 8.950442779291554e-05, "loss": 1.0601, "step": 18700 }, { "epoch": 1.06, "grad_norm": 1.0689849853515625, "learning_rate": 8.949875113533152e-05, "loss": 1.0179, "step": 18710 }, { "epoch": 1.06, "grad_norm": 1.1031112670898438, "learning_rate": 8.94930744777475e-05, "loss": 1.0724, "step": 18720 }, { "epoch": 1.06, "grad_norm": 1.0775904655456543, "learning_rate": 8.948739782016348e-05, "loss": 1.0667, "step": 18730 }, { "epoch": 1.06, "grad_norm": 1.0699291229248047, "learning_rate": 8.948172116257948e-05, "loss": 1.0793, "step": 18740 }, { "epoch": 1.06, "grad_norm": 1.0025135278701782, "learning_rate": 8.947604450499545e-05, "loss": 1.0773, "step": 18750 }, { "epoch": 1.06, "grad_norm": 1.0680561065673828, "learning_rate": 8.947036784741145e-05, "loss": 1.0576, "step": 18760 }, { "epoch": 1.06, "grad_norm": 0.977552592754364, "learning_rate": 8.946469118982743e-05, "loss": 1.1056, "step": 18770 }, { "epoch": 1.06, "grad_norm": 1.0406081676483154, "learning_rate": 8.945901453224342e-05, "loss": 1.0708, "step": 18780 }, { "epoch": 1.07, "grad_norm": 1.0886889696121216, "learning_rate": 8.94533378746594e-05, "loss": 1.0388, "step": 18790 }, { "epoch": 1.07, "grad_norm": 1.0567853450775146, "learning_rate": 8.94476612170754e-05, "loss": 1.0588, "step": 18800 }, { "epoch": 1.07, "grad_norm": 0.9497102499008179, "learning_rate": 8.944198455949138e-05, "loss": 1.0768, "step": 18810 }, { "epoch": 1.07, "grad_norm": 1.059110164642334, "learning_rate": 8.943630790190736e-05, "loss": 1.0935, "step": 18820 }, { "epoch": 1.07, "grad_norm": 1.0902700424194336, "learning_rate": 8.943063124432335e-05, "loss": 1.0711, "step": 18830 }, { "epoch": 1.07, "grad_norm": 1.0138581991195679, "learning_rate": 8.942495458673933e-05, "loss": 1.0672, "step": 18840 }, { "epoch": 1.07, "grad_norm": 1.1148749589920044, "learning_rate": 8.941927792915532e-05, "loss": 1.0365, "step": 18850 }, { "epoch": 1.07, "grad_norm": 1.0308160781860352, "learning_rate": 8.94136012715713e-05, "loss": 1.0746, "step": 18860 }, { "epoch": 1.07, "grad_norm": 0.9958709478378296, "learning_rate": 8.94079246139873e-05, "loss": 1.0665, "step": 18870 }, { "epoch": 1.07, "grad_norm": 1.0176873207092285, "learning_rate": 8.940224795640328e-05, "loss": 1.0553, "step": 18880 }, { "epoch": 1.07, "grad_norm": 1.072497844696045, "learning_rate": 8.939657129881927e-05, "loss": 1.0611, "step": 18890 }, { "epoch": 1.07, "grad_norm": 1.07579505443573, "learning_rate": 8.939089464123525e-05, "loss": 1.0549, "step": 18900 }, { "epoch": 1.07, "grad_norm": 0.9873781204223633, "learning_rate": 8.938521798365123e-05, "loss": 1.0562, "step": 18910 }, { "epoch": 1.07, "grad_norm": 0.9587239027023315, "learning_rate": 8.937954132606722e-05, "loss": 1.0475, "step": 18920 }, { "epoch": 1.07, "grad_norm": 1.067460536956787, "learning_rate": 8.93738646684832e-05, "loss": 1.0559, "step": 18930 }, { "epoch": 1.07, "grad_norm": 1.0855406522750854, "learning_rate": 8.93681880108992e-05, "loss": 1.0829, "step": 18940 }, { "epoch": 1.07, "grad_norm": 1.0238043069839478, "learning_rate": 8.936251135331518e-05, "loss": 1.0884, "step": 18950 }, { "epoch": 1.08, "grad_norm": 1.063547134399414, "learning_rate": 8.935683469573116e-05, "loss": 1.0744, "step": 18960 }, { "epoch": 1.08, "grad_norm": 1.0184129476547241, "learning_rate": 8.935115803814714e-05, "loss": 1.0475, "step": 18970 }, { "epoch": 1.08, "grad_norm": 1.0359212160110474, "learning_rate": 8.934548138056313e-05, "loss": 1.0678, "step": 18980 }, { "epoch": 1.08, "grad_norm": 1.047886848449707, "learning_rate": 8.933980472297911e-05, "loss": 1.0656, "step": 18990 }, { "epoch": 1.08, "grad_norm": 1.0408378839492798, "learning_rate": 8.933412806539509e-05, "loss": 1.078, "step": 19000 }, { "epoch": 1.08, "grad_norm": 0.9728055596351624, "learning_rate": 8.932845140781108e-05, "loss": 1.084, "step": 19010 }, { "epoch": 1.08, "grad_norm": 0.999902606010437, "learning_rate": 8.932277475022706e-05, "loss": 1.0892, "step": 19020 }, { "epoch": 1.08, "grad_norm": 0.9725161194801331, "learning_rate": 8.931709809264306e-05, "loss": 1.0793, "step": 19030 }, { "epoch": 1.08, "grad_norm": 1.0521583557128906, "learning_rate": 8.931142143505904e-05, "loss": 1.089, "step": 19040 }, { "epoch": 1.08, "grad_norm": 1.0311439037322998, "learning_rate": 8.930574477747503e-05, "loss": 1.0646, "step": 19050 }, { "epoch": 1.08, "grad_norm": 1.0065784454345703, "learning_rate": 8.930006811989101e-05, "loss": 1.0619, "step": 19060 }, { "epoch": 1.08, "grad_norm": 1.0242995023727417, "learning_rate": 8.9294391462307e-05, "loss": 1.0608, "step": 19070 }, { "epoch": 1.08, "grad_norm": 1.0669872760772705, "learning_rate": 8.928871480472298e-05, "loss": 1.067, "step": 19080 }, { "epoch": 1.08, "grad_norm": 1.0203746557235718, "learning_rate": 8.928303814713896e-05, "loss": 1.0611, "step": 19090 }, { "epoch": 1.08, "grad_norm": 1.032788872718811, "learning_rate": 8.927736148955496e-05, "loss": 1.077, "step": 19100 }, { "epoch": 1.08, "grad_norm": 1.0482443571090698, "learning_rate": 8.927168483197094e-05, "loss": 1.0772, "step": 19110 }, { "epoch": 1.08, "grad_norm": 1.0732501745224, "learning_rate": 8.926600817438693e-05, "loss": 1.0498, "step": 19120 }, { "epoch": 1.08, "grad_norm": 1.025858998298645, "learning_rate": 8.926033151680291e-05, "loss": 1.0825, "step": 19130 }, { "epoch": 1.09, "grad_norm": 1.0182279348373413, "learning_rate": 8.92546548592189e-05, "loss": 1.0904, "step": 19140 }, { "epoch": 1.09, "grad_norm": 1.1223173141479492, "learning_rate": 8.924897820163488e-05, "loss": 1.0705, "step": 19150 }, { "epoch": 1.09, "grad_norm": 1.0868470668792725, "learning_rate": 8.924330154405088e-05, "loss": 1.0531, "step": 19160 }, { "epoch": 1.09, "grad_norm": 1.0468647480010986, "learning_rate": 8.923762488646686e-05, "loss": 1.0418, "step": 19170 }, { "epoch": 1.09, "grad_norm": 1.009442925453186, "learning_rate": 8.923194822888284e-05, "loss": 1.1078, "step": 19180 }, { "epoch": 1.09, "grad_norm": 1.1166343688964844, "learning_rate": 8.922627157129883e-05, "loss": 1.0675, "step": 19190 }, { "epoch": 1.09, "grad_norm": 1.004292368888855, "learning_rate": 8.922059491371481e-05, "loss": 1.0636, "step": 19200 }, { "epoch": 1.09, "grad_norm": 1.018625259399414, "learning_rate": 8.921491825613079e-05, "loss": 1.0579, "step": 19210 }, { "epoch": 1.09, "grad_norm": 1.0357282161712646, "learning_rate": 8.920924159854678e-05, "loss": 1.0572, "step": 19220 }, { "epoch": 1.09, "grad_norm": 1.098685383796692, "learning_rate": 8.920356494096276e-05, "loss": 1.07, "step": 19230 }, { "epoch": 1.09, "grad_norm": 1.0277825593948364, "learning_rate": 8.919788828337874e-05, "loss": 1.056, "step": 19240 }, { "epoch": 1.09, "grad_norm": 1.026738166809082, "learning_rate": 8.919221162579474e-05, "loss": 1.0929, "step": 19250 }, { "epoch": 1.09, "grad_norm": 1.071790099143982, "learning_rate": 8.918653496821072e-05, "loss": 1.073, "step": 19260 }, { "epoch": 1.09, "grad_norm": 1.056937575340271, "learning_rate": 8.91808583106267e-05, "loss": 1.074, "step": 19270 }, { "epoch": 1.09, "grad_norm": 1.060760736465454, "learning_rate": 8.917518165304269e-05, "loss": 1.0559, "step": 19280 }, { "epoch": 1.09, "grad_norm": 0.938668966293335, "learning_rate": 8.916950499545867e-05, "loss": 1.0572, "step": 19290 }, { "epoch": 1.09, "grad_norm": 1.1005709171295166, "learning_rate": 8.916382833787466e-05, "loss": 1.0681, "step": 19300 }, { "epoch": 1.09, "grad_norm": 1.0455455780029297, "learning_rate": 8.915815168029064e-05, "loss": 1.0916, "step": 19310 }, { "epoch": 1.1, "grad_norm": 1.056420922279358, "learning_rate": 8.915247502270664e-05, "loss": 1.0356, "step": 19320 }, { "epoch": 1.1, "grad_norm": 1.0560402870178223, "learning_rate": 8.914679836512262e-05, "loss": 1.091, "step": 19330 }, { "epoch": 1.1, "grad_norm": 1.0278939008712769, "learning_rate": 8.914112170753861e-05, "loss": 1.0673, "step": 19340 }, { "epoch": 1.1, "grad_norm": 1.0260425806045532, "learning_rate": 8.913544504995459e-05, "loss": 1.0751, "step": 19350 }, { "epoch": 1.1, "grad_norm": 1.0129364728927612, "learning_rate": 8.913033605812898e-05, "loss": 1.0716, "step": 19360 }, { "epoch": 1.1, "grad_norm": 1.039722204208374, "learning_rate": 8.912465940054496e-05, "loss": 1.061, "step": 19370 }, { "epoch": 1.1, "grad_norm": 1.1219229698181152, "learning_rate": 8.911898274296094e-05, "loss": 1.0353, "step": 19380 }, { "epoch": 1.1, "grad_norm": 1.124125361442566, "learning_rate": 8.911330608537693e-05, "loss": 1.068, "step": 19390 }, { "epoch": 1.1, "grad_norm": 1.072460412979126, "learning_rate": 8.910762942779291e-05, "loss": 1.0869, "step": 19400 }, { "epoch": 1.1, "grad_norm": 1.049027919769287, "learning_rate": 8.91019527702089e-05, "loss": 1.0549, "step": 19410 }, { "epoch": 1.1, "grad_norm": 1.0572046041488647, "learning_rate": 8.909627611262489e-05, "loss": 1.0487, "step": 19420 }, { "epoch": 1.1, "grad_norm": 1.1211833953857422, "learning_rate": 8.909059945504088e-05, "loss": 1.0773, "step": 19430 }, { "epoch": 1.1, "grad_norm": 1.0764585733413696, "learning_rate": 8.908492279745686e-05, "loss": 1.0582, "step": 19440 }, { "epoch": 1.1, "grad_norm": 1.037237286567688, "learning_rate": 8.907924613987285e-05, "loss": 1.0673, "step": 19450 }, { "epoch": 1.1, "grad_norm": 1.0807349681854248, "learning_rate": 8.907356948228883e-05, "loss": 1.0609, "step": 19460 }, { "epoch": 1.1, "grad_norm": 1.0746057033538818, "learning_rate": 8.906789282470481e-05, "loss": 1.0551, "step": 19470 }, { "epoch": 1.1, "grad_norm": 1.1632825136184692, "learning_rate": 8.90622161671208e-05, "loss": 1.068, "step": 19480 }, { "epoch": 1.11, "grad_norm": 1.0623642206192017, "learning_rate": 8.905653950953679e-05, "loss": 1.0736, "step": 19490 }, { "epoch": 1.11, "grad_norm": 1.1541050672531128, "learning_rate": 8.905086285195278e-05, "loss": 1.0896, "step": 19500 }, { "epoch": 1.11, "grad_norm": 1.0443987846374512, "learning_rate": 8.904518619436876e-05, "loss": 1.0611, "step": 19510 }, { "epoch": 1.11, "grad_norm": 1.0767040252685547, "learning_rate": 8.903950953678475e-05, "loss": 1.0521, "step": 19520 }, { "epoch": 1.11, "grad_norm": 1.045660138130188, "learning_rate": 8.903383287920073e-05, "loss": 1.0603, "step": 19530 }, { "epoch": 1.11, "grad_norm": 1.06366765499115, "learning_rate": 8.902815622161673e-05, "loss": 1.0703, "step": 19540 }, { "epoch": 1.11, "grad_norm": 1.018843412399292, "learning_rate": 8.90224795640327e-05, "loss": 1.0715, "step": 19550 }, { "epoch": 1.11, "grad_norm": 1.0958688259124756, "learning_rate": 8.901680290644869e-05, "loss": 1.0664, "step": 19560 }, { "epoch": 1.11, "grad_norm": 1.1187902688980103, "learning_rate": 8.901112624886468e-05, "loss": 1.0433, "step": 19570 }, { "epoch": 1.11, "grad_norm": 1.0355998277664185, "learning_rate": 8.900544959128066e-05, "loss": 1.0355, "step": 19580 }, { "epoch": 1.11, "grad_norm": 1.09615957736969, "learning_rate": 8.899977293369664e-05, "loss": 1.0769, "step": 19590 }, { "epoch": 1.11, "grad_norm": 0.9759981632232666, "learning_rate": 8.899409627611262e-05, "loss": 1.0892, "step": 19600 }, { "epoch": 1.11, "grad_norm": 1.0747500658035278, "learning_rate": 8.898841961852861e-05, "loss": 1.0769, "step": 19610 }, { "epoch": 1.11, "grad_norm": 1.0626169443130493, "learning_rate": 8.89827429609446e-05, "loss": 1.0505, "step": 19620 }, { "epoch": 1.11, "grad_norm": 1.0150731801986694, "learning_rate": 8.897706630336059e-05, "loss": 1.0605, "step": 19630 }, { "epoch": 1.11, "grad_norm": 1.0063129663467407, "learning_rate": 8.897138964577657e-05, "loss": 1.0591, "step": 19640 }, { "epoch": 1.11, "grad_norm": 1.0822914838790894, "learning_rate": 8.896571298819255e-05, "loss": 1.0626, "step": 19650 }, { "epoch": 1.11, "grad_norm": 1.0135700702667236, "learning_rate": 8.896003633060854e-05, "loss": 1.0659, "step": 19660 }, { "epoch": 1.12, "grad_norm": 1.0597819089889526, "learning_rate": 8.895435967302452e-05, "loss": 1.0557, "step": 19670 }, { "epoch": 1.12, "grad_norm": 0.9766944646835327, "learning_rate": 8.894868301544051e-05, "loss": 1.0851, "step": 19680 }, { "epoch": 1.12, "grad_norm": 1.0866427421569824, "learning_rate": 8.89430063578565e-05, "loss": 1.0582, "step": 19690 }, { "epoch": 1.12, "grad_norm": 1.0379681587219238, "learning_rate": 8.893732970027249e-05, "loss": 1.0821, "step": 19700 }, { "epoch": 1.12, "grad_norm": 1.0992668867111206, "learning_rate": 8.893165304268847e-05, "loss": 1.0658, "step": 19710 }, { "epoch": 1.12, "grad_norm": 1.0603681802749634, "learning_rate": 8.892597638510446e-05, "loss": 1.0962, "step": 19720 }, { "epoch": 1.12, "grad_norm": 1.1069761514663696, "learning_rate": 8.892029972752044e-05, "loss": 1.0898, "step": 19730 }, { "epoch": 1.12, "grad_norm": 1.043450117111206, "learning_rate": 8.891462306993642e-05, "loss": 1.0788, "step": 19740 }, { "epoch": 1.12, "grad_norm": 1.161283254623413, "learning_rate": 8.890894641235241e-05, "loss": 1.0783, "step": 19750 }, { "epoch": 1.12, "grad_norm": 1.0653133392333984, "learning_rate": 8.89032697547684e-05, "loss": 1.0726, "step": 19760 }, { "epoch": 1.12, "grad_norm": 1.0420210361480713, "learning_rate": 8.889759309718439e-05, "loss": 1.071, "step": 19770 }, { "epoch": 1.12, "grad_norm": 1.0725061893463135, "learning_rate": 8.889191643960037e-05, "loss": 1.0583, "step": 19780 }, { "epoch": 1.12, "grad_norm": 1.0285577774047852, "learning_rate": 8.888623978201636e-05, "loss": 1.0588, "step": 19790 }, { "epoch": 1.12, "grad_norm": 1.031090497970581, "learning_rate": 8.888056312443234e-05, "loss": 1.0732, "step": 19800 }, { "epoch": 1.12, "grad_norm": 1.0962963104248047, "learning_rate": 8.887488646684833e-05, "loss": 1.0798, "step": 19810 }, { "epoch": 1.12, "grad_norm": 1.0120975971221924, "learning_rate": 8.886920980926431e-05, "loss": 1.0654, "step": 19820 }, { "epoch": 1.12, "grad_norm": 0.9955295324325562, "learning_rate": 8.88635331516803e-05, "loss": 1.0559, "step": 19830 }, { "epoch": 1.12, "grad_norm": 1.0379774570465088, "learning_rate": 8.885785649409627e-05, "loss": 1.06, "step": 19840 }, { "epoch": 1.13, "grad_norm": 1.0369793176651, "learning_rate": 8.885217983651227e-05, "loss": 1.0777, "step": 19850 }, { "epoch": 1.13, "grad_norm": 1.0141618251800537, "learning_rate": 8.884650317892825e-05, "loss": 1.079, "step": 19860 }, { "epoch": 1.13, "grad_norm": 1.0548855066299438, "learning_rate": 8.884082652134423e-05, "loss": 1.0717, "step": 19870 }, { "epoch": 1.13, "grad_norm": 1.0729728937149048, "learning_rate": 8.883514986376022e-05, "loss": 1.0538, "step": 19880 }, { "epoch": 1.13, "grad_norm": 1.0060250759124756, "learning_rate": 8.88294732061762e-05, "loss": 1.0624, "step": 19890 }, { "epoch": 1.13, "grad_norm": 1.0456488132476807, "learning_rate": 8.88237965485922e-05, "loss": 1.0972, "step": 19900 }, { "epoch": 1.13, "grad_norm": 1.0597950220108032, "learning_rate": 8.881811989100818e-05, "loss": 1.0842, "step": 19910 }, { "epoch": 1.13, "grad_norm": 1.0420573949813843, "learning_rate": 8.881244323342417e-05, "loss": 1.0746, "step": 19920 }, { "epoch": 1.13, "grad_norm": 1.0527095794677734, "learning_rate": 8.880676657584015e-05, "loss": 1.0637, "step": 19930 }, { "epoch": 1.13, "grad_norm": 1.1177769899368286, "learning_rate": 8.880108991825613e-05, "loss": 1.0823, "step": 19940 }, { "epoch": 1.13, "grad_norm": 1.0376001596450806, "learning_rate": 8.879541326067212e-05, "loss": 1.0511, "step": 19950 }, { "epoch": 1.13, "grad_norm": 1.0334689617156982, "learning_rate": 8.87897366030881e-05, "loss": 1.0681, "step": 19960 }, { "epoch": 1.13, "grad_norm": 1.1020965576171875, "learning_rate": 8.87840599455041e-05, "loss": 1.0501, "step": 19970 }, { "epoch": 1.13, "grad_norm": 1.0422677993774414, "learning_rate": 8.877838328792008e-05, "loss": 1.0505, "step": 19980 }, { "epoch": 1.13, "grad_norm": 1.07109534740448, "learning_rate": 8.877270663033607e-05, "loss": 1.071, "step": 19990 }, { "epoch": 1.13, "grad_norm": 1.0466283559799194, "learning_rate": 8.876702997275205e-05, "loss": 1.0758, "step": 20000 }, { "epoch": 1.13, "grad_norm": 0.9863439798355103, "learning_rate": 8.876135331516804e-05, "loss": 1.0931, "step": 20010 }, { "epoch": 1.14, "grad_norm": 1.0229188203811646, "learning_rate": 8.875567665758402e-05, "loss": 1.058, "step": 20020 }, { "epoch": 1.14, "grad_norm": 1.140616536140442, "learning_rate": 8.875e-05, "loss": 1.0538, "step": 20030 }, { "epoch": 1.14, "grad_norm": 0.9950951337814331, "learning_rate": 8.8744323342416e-05, "loss": 1.0521, "step": 20040 }, { "epoch": 1.14, "grad_norm": 1.0319138765335083, "learning_rate": 8.873864668483198e-05, "loss": 1.0466, "step": 20050 }, { "epoch": 1.14, "grad_norm": 1.0607590675354004, "learning_rate": 8.873297002724797e-05, "loss": 1.031, "step": 20060 }, { "epoch": 1.14, "grad_norm": 1.0279523134231567, "learning_rate": 8.872729336966395e-05, "loss": 1.0911, "step": 20070 }, { "epoch": 1.14, "grad_norm": 1.057482123374939, "learning_rate": 8.872161671207993e-05, "loss": 1.0555, "step": 20080 }, { "epoch": 1.14, "grad_norm": 1.0835058689117432, "learning_rate": 8.871594005449592e-05, "loss": 1.0528, "step": 20090 }, { "epoch": 1.14, "grad_norm": 1.0165742635726929, "learning_rate": 8.87102633969119e-05, "loss": 1.0589, "step": 20100 }, { "epoch": 1.14, "grad_norm": 1.0637547969818115, "learning_rate": 8.870458673932788e-05, "loss": 1.0713, "step": 20110 }, { "epoch": 1.14, "grad_norm": 1.0264283418655396, "learning_rate": 8.869891008174386e-05, "loss": 1.077, "step": 20120 }, { "epoch": 1.14, "grad_norm": 1.0717065334320068, "learning_rate": 8.869323342415986e-05, "loss": 1.0636, "step": 20130 }, { "epoch": 1.14, "grad_norm": 1.0388652086257935, "learning_rate": 8.868755676657584e-05, "loss": 1.0479, "step": 20140 }, { "epoch": 1.14, "grad_norm": 1.0984303951263428, "learning_rate": 8.868188010899183e-05, "loss": 1.0486, "step": 20150 }, { "epoch": 1.14, "grad_norm": 1.028473138809204, "learning_rate": 8.867620345140781e-05, "loss": 1.0898, "step": 20160 }, { "epoch": 1.14, "grad_norm": 1.0935053825378418, "learning_rate": 8.86705267938238e-05, "loss": 1.0541, "step": 20170 }, { "epoch": 1.14, "grad_norm": 0.959526538848877, "learning_rate": 8.866485013623978e-05, "loss": 1.0511, "step": 20180 }, { "epoch": 1.14, "grad_norm": 1.0417803525924683, "learning_rate": 8.865917347865578e-05, "loss": 1.0599, "step": 20190 }, { "epoch": 1.15, "grad_norm": 1.0892553329467773, "learning_rate": 8.865349682107176e-05, "loss": 1.0546, "step": 20200 }, { "epoch": 1.15, "grad_norm": 1.0363308191299438, "learning_rate": 8.864782016348774e-05, "loss": 1.0563, "step": 20210 }, { "epoch": 1.15, "grad_norm": 1.0036818981170654, "learning_rate": 8.864214350590373e-05, "loss": 1.0532, "step": 20220 }, { "epoch": 1.15, "grad_norm": 1.0363349914550781, "learning_rate": 8.863646684831971e-05, "loss": 1.0717, "step": 20230 }, { "epoch": 1.15, "grad_norm": 1.062130331993103, "learning_rate": 8.86307901907357e-05, "loss": 1.0707, "step": 20240 }, { "epoch": 1.15, "grad_norm": 1.0914536714553833, "learning_rate": 8.862511353315168e-05, "loss": 1.0681, "step": 20250 }, { "epoch": 1.15, "grad_norm": 1.071374773979187, "learning_rate": 8.861943687556768e-05, "loss": 1.0496, "step": 20260 }, { "epoch": 1.15, "grad_norm": 1.0747919082641602, "learning_rate": 8.861376021798366e-05, "loss": 1.0442, "step": 20270 }, { "epoch": 1.15, "grad_norm": 0.9965943694114685, "learning_rate": 8.860808356039965e-05, "loss": 1.0585, "step": 20280 }, { "epoch": 1.15, "grad_norm": 1.0479921102523804, "learning_rate": 8.860240690281563e-05, "loss": 1.0784, "step": 20290 }, { "epoch": 1.15, "grad_norm": 1.0355370044708252, "learning_rate": 8.859673024523161e-05, "loss": 1.0768, "step": 20300 }, { "epoch": 1.15, "grad_norm": 1.0897995233535767, "learning_rate": 8.85910535876476e-05, "loss": 1.0551, "step": 20310 }, { "epoch": 1.15, "grad_norm": 1.0719703435897827, "learning_rate": 8.858537693006358e-05, "loss": 1.058, "step": 20320 }, { "epoch": 1.15, "grad_norm": 1.0728038549423218, "learning_rate": 8.857970027247958e-05, "loss": 1.062, "step": 20330 }, { "epoch": 1.15, "grad_norm": 0.9904671311378479, "learning_rate": 8.857402361489556e-05, "loss": 1.0442, "step": 20340 }, { "epoch": 1.15, "grad_norm": 1.0080974102020264, "learning_rate": 8.856834695731154e-05, "loss": 1.071, "step": 20350 }, { "epoch": 1.15, "grad_norm": 1.042209506034851, "learning_rate": 8.856267029972752e-05, "loss": 1.0532, "step": 20360 }, { "epoch": 1.16, "grad_norm": 1.0555109977722168, "learning_rate": 8.855699364214351e-05, "loss": 1.0516, "step": 20370 }, { "epoch": 1.16, "grad_norm": 1.1866790056228638, "learning_rate": 8.855131698455949e-05, "loss": 1.049, "step": 20380 }, { "epoch": 1.16, "grad_norm": 1.0470653772354126, "learning_rate": 8.854564032697547e-05, "loss": 1.0631, "step": 20390 }, { "epoch": 1.16, "grad_norm": 1.0241729021072388, "learning_rate": 8.853996366939146e-05, "loss": 1.0708, "step": 20400 }, { "epoch": 1.16, "grad_norm": 1.0123515129089355, "learning_rate": 8.853428701180744e-05, "loss": 1.0789, "step": 20410 }, { "epoch": 1.16, "grad_norm": 1.00401771068573, "learning_rate": 8.852861035422344e-05, "loss": 1.0538, "step": 20420 }, { "epoch": 1.16, "grad_norm": 1.027596354484558, "learning_rate": 8.852293369663942e-05, "loss": 1.0662, "step": 20430 }, { "epoch": 1.16, "grad_norm": 1.116837739944458, "learning_rate": 8.851725703905541e-05, "loss": 1.0377, "step": 20440 }, { "epoch": 1.16, "grad_norm": 1.035902738571167, "learning_rate": 8.851158038147139e-05, "loss": 1.0783, "step": 20450 }, { "epoch": 1.16, "grad_norm": 1.0052319765090942, "learning_rate": 8.850590372388738e-05, "loss": 1.0614, "step": 20460 }, { "epoch": 1.16, "grad_norm": 1.1572017669677734, "learning_rate": 8.850022706630336e-05, "loss": 1.0423, "step": 20470 }, { "epoch": 1.16, "grad_norm": 1.019801139831543, "learning_rate": 8.849455040871934e-05, "loss": 1.0834, "step": 20480 }, { "epoch": 1.16, "grad_norm": 1.0221452713012695, "learning_rate": 8.848887375113534e-05, "loss": 1.0582, "step": 20490 }, { "epoch": 1.16, "grad_norm": 1.0144776105880737, "learning_rate": 8.848319709355132e-05, "loss": 1.0577, "step": 20500 }, { "epoch": 1.16, "grad_norm": 1.0053555965423584, "learning_rate": 8.847752043596731e-05, "loss": 1.0198, "step": 20510 }, { "epoch": 1.16, "grad_norm": 1.086133360862732, "learning_rate": 8.847184377838329e-05, "loss": 1.0888, "step": 20520 }, { "epoch": 1.16, "grad_norm": 1.0212129354476929, "learning_rate": 8.846616712079929e-05, "loss": 1.0013, "step": 20530 }, { "epoch": 1.16, "grad_norm": 1.0772147178649902, "learning_rate": 8.846049046321527e-05, "loss": 1.0759, "step": 20540 }, { "epoch": 1.17, "grad_norm": 1.021315574645996, "learning_rate": 8.845481380563126e-05, "loss": 1.0585, "step": 20550 }, { "epoch": 1.17, "grad_norm": 1.1144574880599976, "learning_rate": 8.844913714804724e-05, "loss": 1.0525, "step": 20560 }, { "epoch": 1.17, "grad_norm": 1.078034520149231, "learning_rate": 8.844346049046322e-05, "loss": 1.0559, "step": 20570 }, { "epoch": 1.17, "grad_norm": 1.0857787132263184, "learning_rate": 8.843778383287921e-05, "loss": 1.0978, "step": 20580 }, { "epoch": 1.17, "grad_norm": 1.060692310333252, "learning_rate": 8.843210717529519e-05, "loss": 1.0616, "step": 20590 }, { "epoch": 1.17, "grad_norm": 1.1279404163360596, "learning_rate": 8.842643051771117e-05, "loss": 1.0551, "step": 20600 }, { "epoch": 1.17, "grad_norm": 1.0988376140594482, "learning_rate": 8.842075386012717e-05, "loss": 1.0474, "step": 20610 }, { "epoch": 1.17, "grad_norm": 1.0475634336471558, "learning_rate": 8.841507720254315e-05, "loss": 1.0572, "step": 20620 }, { "epoch": 1.17, "grad_norm": 1.0347323417663574, "learning_rate": 8.840940054495913e-05, "loss": 1.0537, "step": 20630 }, { "epoch": 1.17, "grad_norm": 1.0162173509597778, "learning_rate": 8.840372388737512e-05, "loss": 1.0695, "step": 20640 }, { "epoch": 1.17, "grad_norm": 1.0750162601470947, "learning_rate": 8.83980472297911e-05, "loss": 1.0656, "step": 20650 }, { "epoch": 1.17, "grad_norm": 1.0959526300430298, "learning_rate": 8.839237057220708e-05, "loss": 1.0924, "step": 20660 }, { "epoch": 1.17, "grad_norm": 1.1212882995605469, "learning_rate": 8.838669391462307e-05, "loss": 1.0619, "step": 20670 }, { "epoch": 1.17, "grad_norm": 1.0362157821655273, "learning_rate": 8.838101725703905e-05, "loss": 1.0661, "step": 20680 }, { "epoch": 1.17, "grad_norm": 1.0396943092346191, "learning_rate": 8.837534059945505e-05, "loss": 1.0394, "step": 20690 }, { "epoch": 1.17, "grad_norm": 1.0432844161987305, "learning_rate": 8.836966394187103e-05, "loss": 1.0677, "step": 20700 }, { "epoch": 1.17, "grad_norm": 1.0335136651992798, "learning_rate": 8.836398728428702e-05, "loss": 1.0636, "step": 20710 }, { "epoch": 1.17, "grad_norm": 1.0286593437194824, "learning_rate": 8.8358310626703e-05, "loss": 1.0761, "step": 20720 }, { "epoch": 1.18, "grad_norm": 1.0391358137130737, "learning_rate": 8.835263396911899e-05, "loss": 1.0748, "step": 20730 }, { "epoch": 1.18, "grad_norm": 1.0350191593170166, "learning_rate": 8.834695731153497e-05, "loss": 1.0497, "step": 20740 }, { "epoch": 1.18, "grad_norm": 1.0465182065963745, "learning_rate": 8.834128065395095e-05, "loss": 1.0364, "step": 20750 }, { "epoch": 1.18, "grad_norm": 1.1253176927566528, "learning_rate": 8.833560399636695e-05, "loss": 1.0948, "step": 20760 }, { "epoch": 1.18, "grad_norm": 1.1433619260787964, "learning_rate": 8.832992733878293e-05, "loss": 1.0642, "step": 20770 }, { "epoch": 1.18, "grad_norm": 1.0453286170959473, "learning_rate": 8.832425068119892e-05, "loss": 1.0308, "step": 20780 }, { "epoch": 1.18, "grad_norm": 1.0519074201583862, "learning_rate": 8.83185740236149e-05, "loss": 1.0713, "step": 20790 }, { "epoch": 1.18, "grad_norm": 1.0871578454971313, "learning_rate": 8.831289736603089e-05, "loss": 1.0801, "step": 20800 }, { "epoch": 1.18, "grad_norm": 1.02552330493927, "learning_rate": 8.830722070844687e-05, "loss": 1.0935, "step": 20810 }, { "epoch": 1.18, "grad_norm": 1.0069829225540161, "learning_rate": 8.830154405086287e-05, "loss": 1.0363, "step": 20820 }, { "epoch": 1.18, "grad_norm": 1.0679603815078735, "learning_rate": 8.829586739327885e-05, "loss": 1.0585, "step": 20830 }, { "epoch": 1.18, "grad_norm": 1.0579584836959839, "learning_rate": 8.829019073569483e-05, "loss": 1.0636, "step": 20840 }, { "epoch": 1.18, "grad_norm": 1.1083428859710693, "learning_rate": 8.828451407811082e-05, "loss": 1.0775, "step": 20850 }, { "epoch": 1.18, "grad_norm": 1.0444563627243042, "learning_rate": 8.82788374205268e-05, "loss": 1.0747, "step": 20860 }, { "epoch": 1.18, "grad_norm": 1.1259359121322632, "learning_rate": 8.827316076294278e-05, "loss": 1.0802, "step": 20870 }, { "epoch": 1.18, "grad_norm": 1.0273969173431396, "learning_rate": 8.826748410535876e-05, "loss": 1.0342, "step": 20880 }, { "epoch": 1.18, "grad_norm": 1.0938622951507568, "learning_rate": 8.826180744777475e-05, "loss": 1.0595, "step": 20890 }, { "epoch": 1.19, "grad_norm": 1.0152835845947266, "learning_rate": 8.825613079019073e-05, "loss": 1.0425, "step": 20900 }, { "epoch": 1.19, "grad_norm": 1.1038157939910889, "learning_rate": 8.825045413260673e-05, "loss": 1.0653, "step": 20910 }, { "epoch": 1.19, "grad_norm": 1.097157597541809, "learning_rate": 8.824477747502271e-05, "loss": 1.0609, "step": 20920 }, { "epoch": 1.19, "grad_norm": 1.100598931312561, "learning_rate": 8.823910081743869e-05, "loss": 1.0486, "step": 20930 }, { "epoch": 1.19, "grad_norm": 1.0213395357131958, "learning_rate": 8.823342415985468e-05, "loss": 1.0723, "step": 20940 }, { "epoch": 1.19, "grad_norm": 1.0603309869766235, "learning_rate": 8.822774750227066e-05, "loss": 1.0593, "step": 20950 }, { "epoch": 1.19, "grad_norm": 1.0598081350326538, "learning_rate": 8.822207084468665e-05, "loss": 1.04, "step": 20960 }, { "epoch": 1.19, "grad_norm": 1.039402961730957, "learning_rate": 8.821639418710263e-05, "loss": 1.06, "step": 20970 }, { "epoch": 1.19, "grad_norm": 1.1010510921478271, "learning_rate": 8.821071752951863e-05, "loss": 1.0619, "step": 20980 }, { "epoch": 1.19, "grad_norm": 1.0895475149154663, "learning_rate": 8.820504087193461e-05, "loss": 1.0634, "step": 20990 }, { "epoch": 1.19, "grad_norm": 1.0747750997543335, "learning_rate": 8.81993642143506e-05, "loss": 1.0303, "step": 21000 }, { "epoch": 1.19, "grad_norm": 1.106872320175171, "learning_rate": 8.819368755676658e-05, "loss": 1.0577, "step": 21010 }, { "epoch": 1.19, "grad_norm": 1.0355286598205566, "learning_rate": 8.818801089918256e-05, "loss": 1.0667, "step": 21020 }, { "epoch": 1.19, "grad_norm": 1.073618769645691, "learning_rate": 8.818233424159855e-05, "loss": 1.0507, "step": 21030 }, { "epoch": 1.19, "grad_norm": 1.0424822568893433, "learning_rate": 8.817665758401453e-05, "loss": 1.0855, "step": 21040 }, { "epoch": 1.19, "grad_norm": 1.1018362045288086, "learning_rate": 8.817098092643053e-05, "loss": 1.0462, "step": 21050 }, { "epoch": 1.19, "grad_norm": 1.0703846216201782, "learning_rate": 8.816530426884651e-05, "loss": 1.0553, "step": 21060 }, { "epoch": 1.19, "grad_norm": 1.1002042293548584, "learning_rate": 8.81596276112625e-05, "loss": 1.0536, "step": 21070 }, { "epoch": 1.2, "grad_norm": 1.1746025085449219, "learning_rate": 8.815395095367848e-05, "loss": 1.0404, "step": 21080 }, { "epoch": 1.2, "grad_norm": 0.9977022409439087, "learning_rate": 8.814827429609447e-05, "loss": 1.0639, "step": 21090 }, { "epoch": 1.2, "grad_norm": 1.12899911403656, "learning_rate": 8.814259763851045e-05, "loss": 1.0352, "step": 21100 }, { "epoch": 1.2, "grad_norm": 1.050251841545105, "learning_rate": 8.813692098092643e-05, "loss": 1.0441, "step": 21110 }, { "epoch": 1.2, "grad_norm": 1.0685515403747559, "learning_rate": 8.813124432334241e-05, "loss": 1.0757, "step": 21120 }, { "epoch": 1.2, "grad_norm": 1.1346689462661743, "learning_rate": 8.812556766575841e-05, "loss": 1.0566, "step": 21130 }, { "epoch": 1.2, "grad_norm": 1.0791813135147095, "learning_rate": 8.811989100817439e-05, "loss": 1.0612, "step": 21140 }, { "epoch": 1.2, "grad_norm": 1.0707634687423706, "learning_rate": 8.811421435059037e-05, "loss": 1.059, "step": 21150 }, { "epoch": 1.2, "grad_norm": 1.0008734464645386, "learning_rate": 8.810853769300636e-05, "loss": 1.0488, "step": 21160 }, { "epoch": 1.2, "grad_norm": 1.046141266822815, "learning_rate": 8.810286103542234e-05, "loss": 1.0365, "step": 21170 }, { "epoch": 1.2, "grad_norm": 1.1298972368240356, "learning_rate": 8.809718437783834e-05, "loss": 1.0708, "step": 21180 }, { "epoch": 1.2, "grad_norm": 1.1723045110702515, "learning_rate": 8.809150772025432e-05, "loss": 1.0599, "step": 21190 }, { "epoch": 1.2, "grad_norm": 1.0816832780838013, "learning_rate": 8.80858310626703e-05, "loss": 1.0439, "step": 21200 }, { "epoch": 1.2, "grad_norm": 1.0241034030914307, "learning_rate": 8.808015440508629e-05, "loss": 1.0516, "step": 21210 }, { "epoch": 1.2, "grad_norm": 1.0003905296325684, "learning_rate": 8.807447774750227e-05, "loss": 1.0506, "step": 21220 }, { "epoch": 1.2, "grad_norm": 1.046169400215149, "learning_rate": 8.806880108991826e-05, "loss": 1.0643, "step": 21230 }, { "epoch": 1.2, "grad_norm": 1.1029932498931885, "learning_rate": 8.806312443233424e-05, "loss": 1.0263, "step": 21240 }, { "epoch": 1.2, "grad_norm": 1.0916121006011963, "learning_rate": 8.805744777475024e-05, "loss": 1.0495, "step": 21250 }, { "epoch": 1.21, "grad_norm": 1.0759705305099487, "learning_rate": 8.805177111716622e-05, "loss": 1.0865, "step": 21260 }, { "epoch": 1.21, "grad_norm": 1.0537559986114502, "learning_rate": 8.804609445958221e-05, "loss": 1.0578, "step": 21270 }, { "epoch": 1.21, "grad_norm": 1.005778193473816, "learning_rate": 8.804041780199819e-05, "loss": 1.0384, "step": 21280 }, { "epoch": 1.21, "grad_norm": 1.0550323724746704, "learning_rate": 8.803474114441417e-05, "loss": 1.04, "step": 21290 }, { "epoch": 1.21, "grad_norm": 1.0878018140792847, "learning_rate": 8.802906448683016e-05, "loss": 1.0229, "step": 21300 }, { "epoch": 1.21, "grad_norm": 1.0474399328231812, "learning_rate": 8.802338782924614e-05, "loss": 1.0523, "step": 21310 }, { "epoch": 1.21, "grad_norm": 1.046255350112915, "learning_rate": 8.801771117166214e-05, "loss": 1.0503, "step": 21320 }, { "epoch": 1.21, "grad_norm": 1.0214650630950928, "learning_rate": 8.801203451407812e-05, "loss": 1.0435, "step": 21330 }, { "epoch": 1.21, "grad_norm": 1.0585654973983765, "learning_rate": 8.800635785649411e-05, "loss": 1.0358, "step": 21340 }, { "epoch": 1.21, "grad_norm": 1.0717859268188477, "learning_rate": 8.800068119891009e-05, "loss": 1.0493, "step": 21350 }, { "epoch": 1.21, "grad_norm": 1.0247430801391602, "learning_rate": 8.799500454132607e-05, "loss": 1.0534, "step": 21360 }, { "epoch": 1.21, "grad_norm": 1.0497373342514038, "learning_rate": 8.798932788374206e-05, "loss": 1.0771, "step": 21370 }, { "epoch": 1.21, "grad_norm": 1.1052316427230835, "learning_rate": 8.798365122615804e-05, "loss": 1.0294, "step": 21380 }, { "epoch": 1.21, "grad_norm": 1.0896515846252441, "learning_rate": 8.797797456857402e-05, "loss": 1.0567, "step": 21390 }, { "epoch": 1.21, "grad_norm": 1.1328537464141846, "learning_rate": 8.797229791099e-05, "loss": 1.0674, "step": 21400 }, { "epoch": 1.21, "grad_norm": 1.0873790979385376, "learning_rate": 8.7966621253406e-05, "loss": 1.0336, "step": 21410 }, { "epoch": 1.21, "grad_norm": 1.0667444467544556, "learning_rate": 8.796094459582198e-05, "loss": 1.056, "step": 21420 }, { "epoch": 1.22, "grad_norm": 1.0494977235794067, "learning_rate": 8.795526793823797e-05, "loss": 1.0622, "step": 21430 }, { "epoch": 1.22, "grad_norm": 1.07500422000885, "learning_rate": 8.794959128065395e-05, "loss": 1.0495, "step": 21440 }, { "epoch": 1.22, "grad_norm": 1.0444661378860474, "learning_rate": 8.794391462306994e-05, "loss": 1.0525, "step": 21450 }, { "epoch": 1.22, "grad_norm": 1.0150220394134521, "learning_rate": 8.793823796548592e-05, "loss": 1.076, "step": 21460 }, { "epoch": 1.22, "grad_norm": 1.0818216800689697, "learning_rate": 8.79325613079019e-05, "loss": 1.0238, "step": 21470 }, { "epoch": 1.22, "grad_norm": 1.0892986059188843, "learning_rate": 8.79268846503179e-05, "loss": 1.0483, "step": 21480 }, { "epoch": 1.22, "grad_norm": 1.031985878944397, "learning_rate": 8.792120799273388e-05, "loss": 1.0466, "step": 21490 }, { "epoch": 1.22, "grad_norm": 1.0227570533752441, "learning_rate": 8.791553133514987e-05, "loss": 1.0613, "step": 21500 }, { "epoch": 1.22, "grad_norm": 1.0108333826065063, "learning_rate": 8.790985467756585e-05, "loss": 1.051, "step": 21510 }, { "epoch": 1.22, "grad_norm": 1.0496141910552979, "learning_rate": 8.790417801998184e-05, "loss": 1.0599, "step": 21520 }, { "epoch": 1.22, "grad_norm": 1.0879873037338257, "learning_rate": 8.789850136239782e-05, "loss": 1.0815, "step": 21530 }, { "epoch": 1.22, "grad_norm": 1.1150298118591309, "learning_rate": 8.789282470481382e-05, "loss": 1.0801, "step": 21540 }, { "epoch": 1.22, "grad_norm": 1.0510164499282837, "learning_rate": 8.78871480472298e-05, "loss": 1.0507, "step": 21550 }, { "epoch": 1.22, "grad_norm": 1.1199082136154175, "learning_rate": 8.788147138964578e-05, "loss": 1.0299, "step": 21560 }, { "epoch": 1.22, "grad_norm": 1.0846716165542603, "learning_rate": 8.787579473206177e-05, "loss": 1.0489, "step": 21570 }, { "epoch": 1.22, "grad_norm": 1.044701099395752, "learning_rate": 8.787011807447775e-05, "loss": 1.0362, "step": 21580 }, { "epoch": 1.22, "grad_norm": 1.126043438911438, "learning_rate": 8.786444141689374e-05, "loss": 1.039, "step": 21590 }, { "epoch": 1.22, "grad_norm": 1.0378719568252563, "learning_rate": 8.785876475930972e-05, "loss": 1.0582, "step": 21600 }, { "epoch": 1.23, "grad_norm": 1.053046703338623, "learning_rate": 8.785308810172572e-05, "loss": 1.0615, "step": 21610 }, { "epoch": 1.23, "grad_norm": 1.149213433265686, "learning_rate": 8.78474114441417e-05, "loss": 1.0721, "step": 21620 }, { "epoch": 1.23, "grad_norm": 1.1079955101013184, "learning_rate": 8.784173478655768e-05, "loss": 1.057, "step": 21630 }, { "epoch": 1.23, "grad_norm": 1.0135488510131836, "learning_rate": 8.783605812897366e-05, "loss": 1.0537, "step": 21640 }, { "epoch": 1.23, "grad_norm": 1.0499823093414307, "learning_rate": 8.783038147138965e-05, "loss": 1.0387, "step": 21650 }, { "epoch": 1.23, "grad_norm": 1.0897066593170166, "learning_rate": 8.782470481380563e-05, "loss": 1.0476, "step": 21660 }, { "epoch": 1.23, "grad_norm": 1.108344316482544, "learning_rate": 8.781902815622161e-05, "loss": 1.0662, "step": 21670 }, { "epoch": 1.23, "grad_norm": 1.091840386390686, "learning_rate": 8.78133514986376e-05, "loss": 1.0613, "step": 21680 }, { "epoch": 1.23, "grad_norm": 1.0995441675186157, "learning_rate": 8.780767484105358e-05, "loss": 1.0849, "step": 21690 }, { "epoch": 1.23, "grad_norm": 1.0286363363265991, "learning_rate": 8.780199818346958e-05, "loss": 1.0771, "step": 21700 }, { "epoch": 1.23, "grad_norm": 1.0369468927383423, "learning_rate": 8.779632152588556e-05, "loss": 1.0224, "step": 21710 }, { "epoch": 1.23, "grad_norm": 1.0051324367523193, "learning_rate": 8.779064486830155e-05, "loss": 1.0311, "step": 21720 }, { "epoch": 1.23, "grad_norm": 1.042696237564087, "learning_rate": 8.778496821071753e-05, "loss": 1.0518, "step": 21730 }, { "epoch": 1.23, "grad_norm": 1.0493894815444946, "learning_rate": 8.777929155313352e-05, "loss": 1.0208, "step": 21740 }, { "epoch": 1.23, "grad_norm": 1.0441114902496338, "learning_rate": 8.77736148955495e-05, "loss": 1.0368, "step": 21750 }, { "epoch": 1.23, "grad_norm": 1.031522512435913, "learning_rate": 8.776793823796548e-05, "loss": 1.057, "step": 21760 }, { "epoch": 1.23, "grad_norm": 1.0086742639541626, "learning_rate": 8.776226158038148e-05, "loss": 1.0584, "step": 21770 }, { "epoch": 1.23, "grad_norm": 1.1084133386611938, "learning_rate": 8.775658492279746e-05, "loss": 1.0535, "step": 21780 }, { "epoch": 1.24, "grad_norm": 1.0854772329330444, "learning_rate": 8.775090826521345e-05, "loss": 1.0494, "step": 21790 }, { "epoch": 1.24, "grad_norm": 1.0493109226226807, "learning_rate": 8.774523160762943e-05, "loss": 1.0704, "step": 21800 }, { "epoch": 1.24, "grad_norm": 1.0642813444137573, "learning_rate": 8.773955495004543e-05, "loss": 1.0629, "step": 21810 }, { "epoch": 1.24, "grad_norm": 1.058044195175171, "learning_rate": 8.77338782924614e-05, "loss": 1.0366, "step": 21820 }, { "epoch": 1.24, "grad_norm": 1.1092636585235596, "learning_rate": 8.77282016348774e-05, "loss": 1.0449, "step": 21830 }, { "epoch": 1.24, "grad_norm": 1.086999773979187, "learning_rate": 8.772252497729338e-05, "loss": 1.0731, "step": 21840 }, { "epoch": 1.24, "grad_norm": 1.0340814590454102, "learning_rate": 8.771684831970936e-05, "loss": 1.0488, "step": 21850 }, { "epoch": 1.24, "grad_norm": 1.1131315231323242, "learning_rate": 8.771117166212535e-05, "loss": 1.0555, "step": 21860 }, { "epoch": 1.24, "grad_norm": 1.0814061164855957, "learning_rate": 8.770549500454133e-05, "loss": 1.0397, "step": 21870 }, { "epoch": 1.24, "grad_norm": 1.090725064277649, "learning_rate": 8.769981834695731e-05, "loss": 1.0444, "step": 21880 }, { "epoch": 1.24, "grad_norm": 1.0831105709075928, "learning_rate": 8.76941416893733e-05, "loss": 1.0548, "step": 21890 }, { "epoch": 1.24, "grad_norm": 1.0650908946990967, "learning_rate": 8.768846503178929e-05, "loss": 1.0497, "step": 21900 }, { "epoch": 1.24, "grad_norm": 1.0468180179595947, "learning_rate": 8.768278837420527e-05, "loss": 1.0363, "step": 21910 }, { "epoch": 1.24, "grad_norm": 1.0712534189224243, "learning_rate": 8.767711171662126e-05, "loss": 1.0129, "step": 21920 }, { "epoch": 1.24, "grad_norm": 1.1547163724899292, "learning_rate": 8.767143505903724e-05, "loss": 1.0411, "step": 21930 }, { "epoch": 1.24, "grad_norm": 1.0931527614593506, "learning_rate": 8.766575840145322e-05, "loss": 1.0583, "step": 21940 }, { "epoch": 1.24, "grad_norm": 1.0580841302871704, "learning_rate": 8.766008174386921e-05, "loss": 1.0459, "step": 21950 }, { "epoch": 1.25, "grad_norm": 1.089203953742981, "learning_rate": 8.765440508628519e-05, "loss": 1.0461, "step": 21960 }, { "epoch": 1.25, "grad_norm": 1.0031250715255737, "learning_rate": 8.764872842870119e-05, "loss": 1.0469, "step": 21970 }, { "epoch": 1.25, "grad_norm": 1.0511555671691895, "learning_rate": 8.764305177111717e-05, "loss": 1.0392, "step": 21980 }, { "epoch": 1.25, "grad_norm": 1.1257861852645874, "learning_rate": 8.763737511353316e-05, "loss": 1.0693, "step": 21990 }, { "epoch": 1.25, "grad_norm": 1.087302803993225, "learning_rate": 8.763169845594914e-05, "loss": 1.0509, "step": 22000 }, { "epoch": 1.25, "grad_norm": 1.0143836736679077, "learning_rate": 8.762602179836513e-05, "loss": 1.0189, "step": 22010 }, { "epoch": 1.25, "grad_norm": 1.0296688079833984, "learning_rate": 8.762034514078111e-05, "loss": 1.0769, "step": 22020 }, { "epoch": 1.25, "grad_norm": 1.0741559267044067, "learning_rate": 8.761466848319709e-05, "loss": 1.0392, "step": 22030 }, { "epoch": 1.25, "grad_norm": 1.1043363809585571, "learning_rate": 8.760899182561309e-05, "loss": 1.0388, "step": 22040 }, { "epoch": 1.25, "grad_norm": 1.0418915748596191, "learning_rate": 8.760331516802907e-05, "loss": 1.0616, "step": 22050 }, { "epoch": 1.25, "grad_norm": 1.104185700416565, "learning_rate": 8.759763851044506e-05, "loss": 1.0418, "step": 22060 }, { "epoch": 1.25, "grad_norm": 1.0336707830429077, "learning_rate": 8.759196185286104e-05, "loss": 1.0672, "step": 22070 }, { "epoch": 1.25, "grad_norm": 1.082141399383545, "learning_rate": 8.758628519527703e-05, "loss": 1.0627, "step": 22080 }, { "epoch": 1.25, "grad_norm": 1.0054153203964233, "learning_rate": 8.758060853769301e-05, "loss": 1.0794, "step": 22090 }, { "epoch": 1.25, "grad_norm": 1.0366487503051758, "learning_rate": 8.7574931880109e-05, "loss": 1.0225, "step": 22100 }, { "epoch": 1.25, "grad_norm": 1.0359135866165161, "learning_rate": 8.756925522252499e-05, "loss": 1.0589, "step": 22110 }, { "epoch": 1.25, "grad_norm": 1.0556676387786865, "learning_rate": 8.756357856494097e-05, "loss": 1.0518, "step": 22120 }, { "epoch": 1.25, "grad_norm": 1.0212721824645996, "learning_rate": 8.755790190735696e-05, "loss": 1.0514, "step": 22130 }, { "epoch": 1.26, "grad_norm": 1.0554033517837524, "learning_rate": 8.755222524977294e-05, "loss": 1.0714, "step": 22140 }, { "epoch": 1.26, "grad_norm": 1.0544931888580322, "learning_rate": 8.754654859218892e-05, "loss": 1.0415, "step": 22150 }, { "epoch": 1.26, "grad_norm": 1.084411859512329, "learning_rate": 8.75408719346049e-05, "loss": 1.0552, "step": 22160 }, { "epoch": 1.26, "grad_norm": 1.047290563583374, "learning_rate": 8.75351952770209e-05, "loss": 1.0484, "step": 22170 }, { "epoch": 1.26, "grad_norm": 1.0472489595413208, "learning_rate": 8.752951861943687e-05, "loss": 1.06, "step": 22180 }, { "epoch": 1.26, "grad_norm": 1.0274584293365479, "learning_rate": 8.752384196185287e-05, "loss": 1.053, "step": 22190 }, { "epoch": 1.26, "grad_norm": 1.0905218124389648, "learning_rate": 8.751816530426885e-05, "loss": 1.0681, "step": 22200 }, { "epoch": 1.26, "grad_norm": 1.0495814085006714, "learning_rate": 8.751248864668483e-05, "loss": 1.0634, "step": 22210 }, { "epoch": 1.26, "grad_norm": 1.0552217960357666, "learning_rate": 8.750681198910082e-05, "loss": 1.0365, "step": 22220 }, { "epoch": 1.26, "grad_norm": 1.0780365467071533, "learning_rate": 8.75011353315168e-05, "loss": 1.0671, "step": 22230 }, { "epoch": 1.26, "grad_norm": 0.9906680583953857, "learning_rate": 8.74954586739328e-05, "loss": 1.041, "step": 22240 }, { "epoch": 1.26, "grad_norm": 1.021528720855713, "learning_rate": 8.748978201634877e-05, "loss": 1.0548, "step": 22250 }, { "epoch": 1.26, "grad_norm": 0.9911982417106628, "learning_rate": 8.748410535876477e-05, "loss": 1.0488, "step": 22260 }, { "epoch": 1.26, "grad_norm": 1.0025520324707031, "learning_rate": 8.747842870118075e-05, "loss": 1.0426, "step": 22270 }, { "epoch": 1.26, "grad_norm": 1.065458059310913, "learning_rate": 8.747275204359674e-05, "loss": 1.0334, "step": 22280 }, { "epoch": 1.26, "grad_norm": 1.064314603805542, "learning_rate": 8.746707538601272e-05, "loss": 1.0601, "step": 22290 }, { "epoch": 1.26, "grad_norm": 1.0937182903289795, "learning_rate": 8.74613987284287e-05, "loss": 1.0618, "step": 22300 }, { "epoch": 1.27, "grad_norm": 1.0344494581222534, "learning_rate": 8.74557220708447e-05, "loss": 1.0827, "step": 22310 }, { "epoch": 1.27, "grad_norm": 1.075310230255127, "learning_rate": 8.745004541326067e-05, "loss": 1.0215, "step": 22320 }, { "epoch": 1.27, "grad_norm": 1.0111116170883179, "learning_rate": 8.744436875567667e-05, "loss": 1.0653, "step": 22330 }, { "epoch": 1.27, "grad_norm": 1.0129929780960083, "learning_rate": 8.743869209809265e-05, "loss": 1.0555, "step": 22340 }, { "epoch": 1.27, "grad_norm": 0.9826786518096924, "learning_rate": 8.743301544050864e-05, "loss": 1.0593, "step": 22350 }, { "epoch": 1.27, "grad_norm": 1.0274604558944702, "learning_rate": 8.742733878292462e-05, "loss": 1.0638, "step": 22360 }, { "epoch": 1.27, "grad_norm": 1.0747777223587036, "learning_rate": 8.742166212534061e-05, "loss": 1.0761, "step": 22370 }, { "epoch": 1.27, "grad_norm": 1.010561227798462, "learning_rate": 8.74159854677566e-05, "loss": 1.0786, "step": 22380 }, { "epoch": 1.27, "grad_norm": 1.0501755475997925, "learning_rate": 8.741030881017257e-05, "loss": 1.0382, "step": 22390 }, { "epoch": 1.27, "grad_norm": 1.0968793630599976, "learning_rate": 8.740463215258855e-05, "loss": 1.0727, "step": 22400 }, { "epoch": 1.27, "grad_norm": 1.0403860807418823, "learning_rate": 8.739895549500455e-05, "loss": 1.019, "step": 22410 }, { "epoch": 1.27, "grad_norm": 1.0473589897155762, "learning_rate": 8.739327883742053e-05, "loss": 1.0392, "step": 22420 }, { "epoch": 1.27, "grad_norm": 1.0767806768417358, "learning_rate": 8.738760217983651e-05, "loss": 1.0556, "step": 22430 }, { "epoch": 1.27, "grad_norm": 1.031253457069397, "learning_rate": 8.73819255222525e-05, "loss": 1.0707, "step": 22440 }, { "epoch": 1.27, "grad_norm": 1.0324368476867676, "learning_rate": 8.737624886466848e-05, "loss": 1.0735, "step": 22450 }, { "epoch": 1.27, "grad_norm": 1.0607309341430664, "learning_rate": 8.737057220708448e-05, "loss": 1.0163, "step": 22460 }, { "epoch": 1.27, "grad_norm": 1.0482069253921509, "learning_rate": 8.736489554950046e-05, "loss": 1.0487, "step": 22470 }, { "epoch": 1.27, "grad_norm": 1.0879185199737549, "learning_rate": 8.735921889191644e-05, "loss": 1.0445, "step": 22480 }, { "epoch": 1.28, "grad_norm": 1.0884209871292114, "learning_rate": 8.735354223433243e-05, "loss": 1.0567, "step": 22490 }, { "epoch": 1.28, "grad_norm": 1.0805174112319946, "learning_rate": 8.734786557674841e-05, "loss": 1.0266, "step": 22500 }, { "epoch": 1.28, "grad_norm": 1.0686906576156616, "learning_rate": 8.73421889191644e-05, "loss": 1.0176, "step": 22510 }, { "epoch": 1.28, "grad_norm": 1.0316359996795654, "learning_rate": 8.733651226158038e-05, "loss": 1.0419, "step": 22520 }, { "epoch": 1.28, "grad_norm": 1.0494099855422974, "learning_rate": 8.733083560399638e-05, "loss": 1.0353, "step": 22530 }, { "epoch": 1.28, "grad_norm": 1.1050044298171997, "learning_rate": 8.732515894641236e-05, "loss": 1.0728, "step": 22540 }, { "epoch": 1.28, "grad_norm": 1.156101107597351, "learning_rate": 8.731948228882835e-05, "loss": 1.0404, "step": 22550 }, { "epoch": 1.28, "grad_norm": 1.0945260524749756, "learning_rate": 8.731380563124433e-05, "loss": 1.024, "step": 22560 }, { "epoch": 1.28, "grad_norm": 1.0426337718963623, "learning_rate": 8.730812897366031e-05, "loss": 1.0562, "step": 22570 }, { "epoch": 1.28, "grad_norm": 1.076880693435669, "learning_rate": 8.73024523160763e-05, "loss": 1.0524, "step": 22580 }, { "epoch": 1.28, "grad_norm": 1.0732098817825317, "learning_rate": 8.729677565849228e-05, "loss": 1.038, "step": 22590 }, { "epoch": 1.28, "grad_norm": 1.0721722841262817, "learning_rate": 8.729109900090828e-05, "loss": 1.0486, "step": 22600 }, { "epoch": 1.28, "grad_norm": 1.1418843269348145, "learning_rate": 8.728542234332426e-05, "loss": 1.0423, "step": 22610 }, { "epoch": 1.28, "grad_norm": 1.0620827674865723, "learning_rate": 8.727974568574025e-05, "loss": 1.0612, "step": 22620 }, { "epoch": 1.28, "grad_norm": 1.1170167922973633, "learning_rate": 8.727406902815623e-05, "loss": 1.0529, "step": 22630 }, { "epoch": 1.28, "grad_norm": 1.0621514320373535, "learning_rate": 8.726839237057222e-05, "loss": 1.0477, "step": 22640 }, { "epoch": 1.28, "grad_norm": 1.0432491302490234, "learning_rate": 8.72627157129882e-05, "loss": 1.0607, "step": 22650 }, { "epoch": 1.28, "grad_norm": 0.9973015189170837, "learning_rate": 8.725703905540418e-05, "loss": 1.0545, "step": 22660 }, { "epoch": 1.29, "grad_norm": 1.0759618282318115, "learning_rate": 8.725136239782016e-05, "loss": 1.0375, "step": 22670 }, { "epoch": 1.29, "grad_norm": 1.0854226350784302, "learning_rate": 8.724568574023614e-05, "loss": 1.048, "step": 22680 }, { "epoch": 1.29, "grad_norm": 1.1225231885910034, "learning_rate": 8.724000908265214e-05, "loss": 1.0452, "step": 22690 }, { "epoch": 1.29, "grad_norm": 1.0626229047775269, "learning_rate": 8.723433242506812e-05, "loss": 1.0599, "step": 22700 }, { "epoch": 1.29, "grad_norm": 1.063279390335083, "learning_rate": 8.722865576748411e-05, "loss": 1.0645, "step": 22710 }, { "epoch": 1.29, "grad_norm": 1.106383204460144, "learning_rate": 8.722297910990009e-05, "loss": 1.0345, "step": 22720 }, { "epoch": 1.29, "grad_norm": 1.0699182748794556, "learning_rate": 8.721730245231608e-05, "loss": 1.0389, "step": 22730 }, { "epoch": 1.29, "grad_norm": 1.0921827554702759, "learning_rate": 8.721162579473206e-05, "loss": 1.0561, "step": 22740 }, { "epoch": 1.29, "grad_norm": 1.0636377334594727, "learning_rate": 8.720594913714804e-05, "loss": 1.041, "step": 22750 }, { "epoch": 1.29, "grad_norm": 1.1275712251663208, "learning_rate": 8.720027247956404e-05, "loss": 1.0303, "step": 22760 }, { "epoch": 1.29, "grad_norm": 1.076472520828247, "learning_rate": 8.719459582198002e-05, "loss": 1.0441, "step": 22770 }, { "epoch": 1.29, "grad_norm": 1.1047557592391968, "learning_rate": 8.718891916439601e-05, "loss": 1.0661, "step": 22780 }, { "epoch": 1.29, "grad_norm": 1.091522216796875, "learning_rate": 8.718324250681199e-05, "loss": 1.0372, "step": 22790 }, { "epoch": 1.29, "grad_norm": 1.0567039251327515, "learning_rate": 8.717756584922798e-05, "loss": 1.034, "step": 22800 }, { "epoch": 1.29, "grad_norm": 1.1389214992523193, "learning_rate": 8.717188919164396e-05, "loss": 1.0432, "step": 22810 }, { "epoch": 1.29, "grad_norm": 1.0980632305145264, "learning_rate": 8.716621253405996e-05, "loss": 1.0401, "step": 22820 }, { "epoch": 1.29, "grad_norm": 1.0791411399841309, "learning_rate": 8.716053587647594e-05, "loss": 1.0349, "step": 22830 }, { "epoch": 1.3, "grad_norm": 1.0683735609054565, "learning_rate": 8.715485921889192e-05, "loss": 1.0606, "step": 22840 }, { "epoch": 1.3, "grad_norm": 1.0887709856033325, "learning_rate": 8.714918256130791e-05, "loss": 1.0494, "step": 22850 }, { "epoch": 1.3, "grad_norm": 1.0852530002593994, "learning_rate": 8.714350590372389e-05, "loss": 1.0372, "step": 22860 }, { "epoch": 1.3, "grad_norm": 1.076922059059143, "learning_rate": 8.713782924613988e-05, "loss": 1.0541, "step": 22870 }, { "epoch": 1.3, "grad_norm": 1.078803539276123, "learning_rate": 8.713215258855586e-05, "loss": 1.0482, "step": 22880 }, { "epoch": 1.3, "grad_norm": 1.05330228805542, "learning_rate": 8.712647593097186e-05, "loss": 1.0574, "step": 22890 }, { "epoch": 1.3, "grad_norm": 1.0686010122299194, "learning_rate": 8.712079927338784e-05, "loss": 1.0784, "step": 22900 }, { "epoch": 1.3, "grad_norm": 1.0358515977859497, "learning_rate": 8.711512261580382e-05, "loss": 1.0656, "step": 22910 }, { "epoch": 1.3, "grad_norm": 1.0425788164138794, "learning_rate": 8.710944595821981e-05, "loss": 1.0533, "step": 22920 }, { "epoch": 1.3, "grad_norm": 1.0470019578933716, "learning_rate": 8.710376930063579e-05, "loss": 1.0487, "step": 22930 }, { "epoch": 1.3, "grad_norm": 1.0337601900100708, "learning_rate": 8.709809264305177e-05, "loss": 1.039, "step": 22940 }, { "epoch": 1.3, "grad_norm": 1.098320484161377, "learning_rate": 8.709241598546775e-05, "loss": 1.067, "step": 22950 }, { "epoch": 1.3, "grad_norm": 1.0496511459350586, "learning_rate": 8.708673932788374e-05, "loss": 1.0446, "step": 22960 }, { "epoch": 1.3, "grad_norm": 1.1226173639297485, "learning_rate": 8.708106267029972e-05, "loss": 1.0302, "step": 22970 }, { "epoch": 1.3, "grad_norm": 1.0779452323913574, "learning_rate": 8.707538601271572e-05, "loss": 1.0465, "step": 22980 }, { "epoch": 1.3, "grad_norm": 1.1430795192718506, "learning_rate": 8.70697093551317e-05, "loss": 1.0351, "step": 22990 }, { "epoch": 1.3, "grad_norm": 1.0133132934570312, "learning_rate": 8.706403269754769e-05, "loss": 1.03, "step": 23000 }, { "epoch": 1.3, "grad_norm": 1.054701566696167, "learning_rate": 8.705835603996367e-05, "loss": 1.0445, "step": 23010 }, { "epoch": 1.31, "grad_norm": 1.0278418064117432, "learning_rate": 8.705267938237965e-05, "loss": 1.0559, "step": 23020 }, { "epoch": 1.31, "grad_norm": 1.0459551811218262, "learning_rate": 8.704700272479564e-05, "loss": 1.0568, "step": 23030 }, { "epoch": 1.31, "grad_norm": 1.0651085376739502, "learning_rate": 8.704132606721162e-05, "loss": 1.039, "step": 23040 }, { "epoch": 1.31, "grad_norm": 1.107542634010315, "learning_rate": 8.703564940962762e-05, "loss": 1.0427, "step": 23050 }, { "epoch": 1.31, "grad_norm": 1.1733877658843994, "learning_rate": 8.70299727520436e-05, "loss": 1.0558, "step": 23060 }, { "epoch": 1.31, "grad_norm": 1.0430281162261963, "learning_rate": 8.702429609445959e-05, "loss": 1.0604, "step": 23070 }, { "epoch": 1.31, "grad_norm": 1.0284955501556396, "learning_rate": 8.701861943687557e-05, "loss": 1.0451, "step": 23080 }, { "epoch": 1.31, "grad_norm": 1.0393493175506592, "learning_rate": 8.701294277929157e-05, "loss": 1.0327, "step": 23090 }, { "epoch": 1.31, "grad_norm": 1.079905390739441, "learning_rate": 8.700726612170755e-05, "loss": 1.0304, "step": 23100 }, { "epoch": 1.31, "grad_norm": 1.020281195640564, "learning_rate": 8.700158946412353e-05, "loss": 1.0614, "step": 23110 }, { "epoch": 1.31, "grad_norm": 0.975426971912384, "learning_rate": 8.699591280653952e-05, "loss": 1.0646, "step": 23120 }, { "epoch": 1.31, "grad_norm": 0.9876058101654053, "learning_rate": 8.69902361489555e-05, "loss": 1.0429, "step": 23130 }, { "epoch": 1.31, "grad_norm": 1.0579097270965576, "learning_rate": 8.698455949137149e-05, "loss": 1.0362, "step": 23140 }, { "epoch": 1.31, "grad_norm": 1.0652434825897217, "learning_rate": 8.697888283378747e-05, "loss": 1.0374, "step": 23150 }, { "epoch": 1.31, "grad_norm": 1.021103858947754, "learning_rate": 8.697320617620347e-05, "loss": 1.0725, "step": 23160 }, { "epoch": 1.31, "grad_norm": 1.1098734140396118, "learning_rate": 8.696752951861945e-05, "loss": 1.051, "step": 23170 }, { "epoch": 1.31, "grad_norm": 1.1229329109191895, "learning_rate": 8.696185286103543e-05, "loss": 1.0446, "step": 23180 }, { "epoch": 1.31, "grad_norm": 1.0508487224578857, "learning_rate": 8.69561762034514e-05, "loss": 1.0446, "step": 23190 }, { "epoch": 1.32, "grad_norm": 1.0473641157150269, "learning_rate": 8.695049954586739e-05, "loss": 1.0485, "step": 23200 }, { "epoch": 1.32, "grad_norm": 1.0183296203613281, "learning_rate": 8.694482288828338e-05, "loss": 1.0527, "step": 23210 }, { "epoch": 1.32, "grad_norm": 1.0803182125091553, "learning_rate": 8.693914623069936e-05, "loss": 1.0526, "step": 23220 }, { "epoch": 1.32, "grad_norm": 1.1551694869995117, "learning_rate": 8.693346957311535e-05, "loss": 1.0511, "step": 23230 }, { "epoch": 1.32, "grad_norm": 1.1202346086502075, "learning_rate": 8.692779291553133e-05, "loss": 1.0392, "step": 23240 }, { "epoch": 1.32, "grad_norm": 1.1228097677230835, "learning_rate": 8.692211625794733e-05, "loss": 1.0635, "step": 23250 }, { "epoch": 1.32, "grad_norm": 1.0571916103363037, "learning_rate": 8.69164396003633e-05, "loss": 1.0539, "step": 23260 }, { "epoch": 1.32, "grad_norm": 0.9989543557167053, "learning_rate": 8.69107629427793e-05, "loss": 1.0535, "step": 23270 }, { "epoch": 1.32, "grad_norm": 1.0763503313064575, "learning_rate": 8.690508628519528e-05, "loss": 1.0618, "step": 23280 }, { "epoch": 1.32, "grad_norm": 1.0911791324615479, "learning_rate": 8.689940962761126e-05, "loss": 1.0386, "step": 23290 }, { "epoch": 1.32, "grad_norm": 1.0546287298202515, "learning_rate": 8.689373297002725e-05, "loss": 1.035, "step": 23300 }, { "epoch": 1.32, "grad_norm": 1.0167467594146729, "learning_rate": 8.688805631244323e-05, "loss": 1.0285, "step": 23310 }, { "epoch": 1.32, "grad_norm": 1.0985878705978394, "learning_rate": 8.688237965485923e-05, "loss": 1.0503, "step": 23320 }, { "epoch": 1.32, "grad_norm": 1.0918431282043457, "learning_rate": 8.68767029972752e-05, "loss": 1.0168, "step": 23330 }, { "epoch": 1.32, "grad_norm": 1.1093504428863525, "learning_rate": 8.68710263396912e-05, "loss": 1.0336, "step": 23340 }, { "epoch": 1.32, "grad_norm": 1.0766338109970093, "learning_rate": 8.686534968210718e-05, "loss": 1.0479, "step": 23350 }, { "epoch": 1.32, "grad_norm": 1.131386160850525, "learning_rate": 8.685967302452317e-05, "loss": 1.051, "step": 23360 }, { "epoch": 1.33, "grad_norm": 1.0829981565475464, "learning_rate": 8.685399636693915e-05, "loss": 1.054, "step": 23370 }, { "epoch": 1.33, "grad_norm": 1.123764991760254, "learning_rate": 8.684831970935513e-05, "loss": 1.0529, "step": 23380 }, { "epoch": 1.33, "grad_norm": 1.086255669593811, "learning_rate": 8.684264305177113e-05, "loss": 1.0428, "step": 23390 }, { "epoch": 1.33, "grad_norm": 1.0336748361587524, "learning_rate": 8.68369663941871e-05, "loss": 1.038, "step": 23400 }, { "epoch": 1.33, "grad_norm": 1.058127760887146, "learning_rate": 8.68312897366031e-05, "loss": 1.042, "step": 23410 }, { "epoch": 1.33, "grad_norm": 1.1488451957702637, "learning_rate": 8.682561307901908e-05, "loss": 1.0596, "step": 23420 }, { "epoch": 1.33, "grad_norm": 0.9978166818618774, "learning_rate": 8.681993642143506e-05, "loss": 1.0717, "step": 23430 }, { "epoch": 1.33, "grad_norm": 1.0719130039215088, "learning_rate": 8.681425976385105e-05, "loss": 1.0777, "step": 23440 }, { "epoch": 1.33, "grad_norm": 1.083740234375, "learning_rate": 8.680858310626703e-05, "loss": 1.0357, "step": 23450 }, { "epoch": 1.33, "grad_norm": 1.0403536558151245, "learning_rate": 8.680290644868301e-05, "loss": 1.0492, "step": 23460 }, { "epoch": 1.33, "grad_norm": 1.0357732772827148, "learning_rate": 8.6797229791099e-05, "loss": 1.0425, "step": 23470 }, { "epoch": 1.33, "grad_norm": 1.1109933853149414, "learning_rate": 8.679155313351499e-05, "loss": 1.0293, "step": 23480 }, { "epoch": 1.33, "grad_norm": 1.0918998718261719, "learning_rate": 8.678587647593097e-05, "loss": 1.0426, "step": 23490 }, { "epoch": 1.33, "grad_norm": 1.0781028270721436, "learning_rate": 8.678019981834696e-05, "loss": 1.0399, "step": 23500 }, { "epoch": 1.33, "grad_norm": 1.0853700637817383, "learning_rate": 8.677452316076294e-05, "loss": 1.0477, "step": 23510 }, { "epoch": 1.33, "grad_norm": 1.1100997924804688, "learning_rate": 8.676884650317893e-05, "loss": 1.0484, "step": 23520 }, { "epoch": 1.33, "grad_norm": 1.0924367904663086, "learning_rate": 8.676316984559491e-05, "loss": 1.0374, "step": 23530 }, { "epoch": 1.33, "grad_norm": 1.1034126281738281, "learning_rate": 8.675749318801091e-05, "loss": 1.0598, "step": 23540 }, { "epoch": 1.34, "grad_norm": 1.1098045110702515, "learning_rate": 8.675181653042689e-05, "loss": 1.0557, "step": 23550 }, { "epoch": 1.34, "grad_norm": 1.1400171518325806, "learning_rate": 8.674613987284288e-05, "loss": 1.062, "step": 23560 }, { "epoch": 1.34, "grad_norm": NaN, "learning_rate": 8.674103088101725e-05, "loss": 1.0628, "step": 23570 }, { "epoch": 1.34, "grad_norm": 1.057300329208374, "learning_rate": 8.673592188919166e-05, "loss": 1.0408, "step": 23580 }, { "epoch": 1.34, "grad_norm": 1.067442536354065, "learning_rate": 8.673024523160764e-05, "loss": 1.0248, "step": 23590 }, { "epoch": 1.34, "grad_norm": 1.0669045448303223, "learning_rate": 8.672456857402363e-05, "loss": 1.0334, "step": 23600 }, { "epoch": 1.34, "grad_norm": 1.0742230415344238, "learning_rate": 8.671889191643961e-05, "loss": 1.0417, "step": 23610 }, { "epoch": 1.34, "grad_norm": 1.0318858623504639, "learning_rate": 8.671321525885559e-05, "loss": 1.0239, "step": 23620 }, { "epoch": 1.34, "grad_norm": 1.199392557144165, "learning_rate": 8.670753860127158e-05, "loss": 1.0488, "step": 23630 }, { "epoch": 1.34, "grad_norm": 1.0697004795074463, "learning_rate": 8.670186194368756e-05, "loss": 1.0427, "step": 23640 }, { "epoch": 1.34, "grad_norm": 1.0457149744033813, "learning_rate": 8.669618528610354e-05, "loss": 1.0467, "step": 23650 }, { "epoch": 1.34, "grad_norm": 1.0821443796157837, "learning_rate": 8.669050862851952e-05, "loss": 1.0412, "step": 23660 }, { "epoch": 1.34, "grad_norm": 1.1249126195907593, "learning_rate": 8.668483197093552e-05, "loss": 1.0515, "step": 23670 }, { "epoch": 1.34, "grad_norm": 1.090413212776184, "learning_rate": 8.66791553133515e-05, "loss": 1.0801, "step": 23680 }, { "epoch": 1.34, "grad_norm": 1.0087432861328125, "learning_rate": 8.667347865576749e-05, "loss": 1.0613, "step": 23690 }, { "epoch": 1.34, "grad_norm": 1.0466498136520386, "learning_rate": 8.666780199818347e-05, "loss": 1.0531, "step": 23700 }, { "epoch": 1.34, "grad_norm": 1.0330034494400024, "learning_rate": 8.666212534059945e-05, "loss": 1.0736, "step": 23710 }, { "epoch": 1.34, "grad_norm": 0.9712209105491638, "learning_rate": 8.665644868301544e-05, "loss": 1.0335, "step": 23720 }, { "epoch": 1.35, "grad_norm": 1.1318464279174805, "learning_rate": 8.665077202543142e-05, "loss": 1.0573, "step": 23730 }, { "epoch": 1.35, "grad_norm": 1.0932565927505493, "learning_rate": 8.664509536784742e-05, "loss": 1.0498, "step": 23740 }, { "epoch": 1.35, "grad_norm": 1.0107802152633667, "learning_rate": 8.66394187102634e-05, "loss": 1.0362, "step": 23750 }, { "epoch": 1.35, "grad_norm": 1.0634331703186035, "learning_rate": 8.663374205267939e-05, "loss": 1.0259, "step": 23760 }, { "epoch": 1.35, "grad_norm": 1.0031077861785889, "learning_rate": 8.662806539509537e-05, "loss": 1.0614, "step": 23770 }, { "epoch": 1.35, "grad_norm": 1.0134872198104858, "learning_rate": 8.662238873751136e-05, "loss": 1.0485, "step": 23780 }, { "epoch": 1.35, "grad_norm": 1.1182775497436523, "learning_rate": 8.661671207992734e-05, "loss": 1.0386, "step": 23790 }, { "epoch": 1.35, "grad_norm": 1.0908191204071045, "learning_rate": 8.661103542234332e-05, "loss": 1.0402, "step": 23800 }, { "epoch": 1.35, "grad_norm": 1.061642050743103, "learning_rate": 8.660535876475932e-05, "loss": 1.0588, "step": 23810 }, { "epoch": 1.35, "grad_norm": 1.101072072982788, "learning_rate": 8.65996821071753e-05, "loss": 1.0612, "step": 23820 }, { "epoch": 1.35, "grad_norm": 1.054057240486145, "learning_rate": 8.659400544959129e-05, "loss": 1.0646, "step": 23830 }, { "epoch": 1.35, "grad_norm": 1.0815752744674683, "learning_rate": 8.658832879200727e-05, "loss": 1.0611, "step": 23840 }, { "epoch": 1.35, "grad_norm": 1.0991877317428589, "learning_rate": 8.658321980018166e-05, "loss": 1.0499, "step": 23850 }, { "epoch": 1.35, "grad_norm": 1.0528414249420166, "learning_rate": 8.657754314259764e-05, "loss": 1.0456, "step": 23860 }, { "epoch": 1.35, "grad_norm": 1.073887825012207, "learning_rate": 8.657186648501363e-05, "loss": 1.077, "step": 23870 }, { "epoch": 1.35, "grad_norm": 1.0218042135238647, "learning_rate": 8.656618982742961e-05, "loss": 1.0285, "step": 23880 }, { "epoch": 1.35, "grad_norm": 1.0555557012557983, "learning_rate": 8.65605131698456e-05, "loss": 1.0401, "step": 23890 }, { "epoch": 1.36, "grad_norm": 1.0796271562576294, "learning_rate": 8.655483651226158e-05, "loss": 1.052, "step": 23900 }, { "epoch": 1.36, "grad_norm": 1.0191986560821533, "learning_rate": 8.654915985467758e-05, "loss": 1.0294, "step": 23910 }, { "epoch": 1.36, "grad_norm": 1.0782431364059448, "learning_rate": 8.654348319709356e-05, "loss": 1.0241, "step": 23920 }, { "epoch": 1.36, "grad_norm": 1.0647064447402954, "learning_rate": 8.653780653950954e-05, "loss": 1.061, "step": 23930 }, { "epoch": 1.36, "grad_norm": 1.2200767993927002, "learning_rate": 8.653212988192553e-05, "loss": 1.0483, "step": 23940 }, { "epoch": 1.36, "grad_norm": 1.0180548429489136, "learning_rate": 8.652645322434151e-05, "loss": 1.0695, "step": 23950 }, { "epoch": 1.36, "grad_norm": 1.0623596906661987, "learning_rate": 8.65207765667575e-05, "loss": 1.0732, "step": 23960 }, { "epoch": 1.36, "grad_norm": 1.0484087467193604, "learning_rate": 8.651509990917348e-05, "loss": 1.0392, "step": 23970 }, { "epoch": 1.36, "grad_norm": 0.9907153844833374, "learning_rate": 8.650942325158948e-05, "loss": 1.0027, "step": 23980 }, { "epoch": 1.36, "grad_norm": 1.0116022825241089, "learning_rate": 8.650374659400546e-05, "loss": 1.0723, "step": 23990 }, { "epoch": 1.36, "grad_norm": 1.1187827587127686, "learning_rate": 8.649806993642144e-05, "loss": 1.0167, "step": 24000 }, { "epoch": 1.36, "grad_norm": 1.0276119709014893, "learning_rate": 8.649239327883742e-05, "loss": 1.0339, "step": 24010 }, { "epoch": 1.36, "grad_norm": 1.0620925426483154, "learning_rate": 8.648671662125341e-05, "loss": 1.0342, "step": 24020 }, { "epoch": 1.36, "grad_norm": 1.0884236097335815, "learning_rate": 8.648103996366939e-05, "loss": 1.0506, "step": 24030 }, { "epoch": 1.36, "grad_norm": 1.11058509349823, "learning_rate": 8.647536330608537e-05, "loss": 1.023, "step": 24040 }, { "epoch": 1.36, "grad_norm": 1.1119128465652466, "learning_rate": 8.646968664850137e-05, "loss": 1.053, "step": 24050 }, { "epoch": 1.36, "grad_norm": 1.1085296869277954, "learning_rate": 8.646400999091735e-05, "loss": 1.032, "step": 24060 }, { "epoch": 1.36, "grad_norm": 1.1088110208511353, "learning_rate": 8.645833333333334e-05, "loss": 1.0463, "step": 24070 }, { "epoch": 1.37, "grad_norm": 1.0645087957382202, "learning_rate": 8.645265667574932e-05, "loss": 1.0512, "step": 24080 }, { "epoch": 1.37, "grad_norm": 1.0821049213409424, "learning_rate": 8.644698001816531e-05, "loss": 1.0251, "step": 24090 }, { "epoch": 1.37, "grad_norm": 1.0678443908691406, "learning_rate": 8.644130336058129e-05, "loss": 1.0329, "step": 24100 }, { "epoch": 1.37, "grad_norm": 1.0472038984298706, "learning_rate": 8.643562670299727e-05, "loss": 1.0525, "step": 24110 }, { "epoch": 1.37, "grad_norm": 1.0495973825454712, "learning_rate": 8.642995004541327e-05, "loss": 1.0335, "step": 24120 }, { "epoch": 1.37, "grad_norm": 1.138681173324585, "learning_rate": 8.642427338782925e-05, "loss": 1.0259, "step": 24130 }, { "epoch": 1.37, "grad_norm": 1.1250709295272827, "learning_rate": 8.641859673024524e-05, "loss": 1.0507, "step": 24140 }, { "epoch": 1.37, "grad_norm": 1.049135446548462, "learning_rate": 8.641292007266122e-05, "loss": 1.0204, "step": 24150 }, { "epoch": 1.37, "grad_norm": 1.0968623161315918, "learning_rate": 8.640724341507721e-05, "loss": 1.0246, "step": 24160 }, { "epoch": 1.37, "grad_norm": 1.091429591178894, "learning_rate": 8.640156675749319e-05, "loss": 1.0767, "step": 24170 }, { "epoch": 1.37, "grad_norm": 1.0619871616363525, "learning_rate": 8.639589009990919e-05, "loss": 1.041, "step": 24180 }, { "epoch": 1.37, "grad_norm": 0.99837726354599, "learning_rate": 8.639021344232517e-05, "loss": 1.0485, "step": 24190 }, { "epoch": 1.37, "grad_norm": 1.0902308225631714, "learning_rate": 8.638453678474115e-05, "loss": 1.0577, "step": 24200 }, { "epoch": 1.37, "grad_norm": 1.0127924680709839, "learning_rate": 8.637886012715714e-05, "loss": 1.019, "step": 24210 }, { "epoch": 1.37, "grad_norm": 1.0912864208221436, "learning_rate": 8.637318346957312e-05, "loss": 1.019, "step": 24220 }, { "epoch": 1.37, "grad_norm": 1.0891278982162476, "learning_rate": 8.636750681198911e-05, "loss": 1.0396, "step": 24230 }, { "epoch": 1.37, "grad_norm": 1.0470199584960938, "learning_rate": 8.636183015440509e-05, "loss": 1.0303, "step": 24240 }, { "epoch": 1.38, "grad_norm": 1.0708727836608887, "learning_rate": 8.635615349682107e-05, "loss": 1.0433, "step": 24250 }, { "epoch": 1.38, "grad_norm": 1.0360268354415894, "learning_rate": 8.635047683923707e-05, "loss": 1.0372, "step": 24260 }, { "epoch": 1.38, "grad_norm": 1.056504487991333, "learning_rate": 8.634480018165305e-05, "loss": 1.0592, "step": 24270 }, { "epoch": 1.38, "grad_norm": 1.075885534286499, "learning_rate": 8.633912352406903e-05, "loss": 1.0588, "step": 24280 }, { "epoch": 1.38, "grad_norm": 1.0299609899520874, "learning_rate": 8.6333446866485e-05, "loss": 1.0545, "step": 24290 }, { "epoch": 1.38, "grad_norm": 1.0659034252166748, "learning_rate": 8.6327770208901e-05, "loss": 1.0407, "step": 24300 }, { "epoch": 1.38, "grad_norm": 1.0878348350524902, "learning_rate": 8.632209355131698e-05, "loss": 1.0418, "step": 24310 }, { "epoch": 1.38, "grad_norm": 1.0178364515304565, "learning_rate": 8.631641689373297e-05, "loss": 1.0245, "step": 24320 }, { "epoch": 1.38, "grad_norm": 1.0715620517730713, "learning_rate": 8.631074023614895e-05, "loss": 1.0506, "step": 24330 }, { "epoch": 1.38, "grad_norm": 1.0954707860946655, "learning_rate": 8.630506357856495e-05, "loss": 1.0542, "step": 24340 }, { "epoch": 1.38, "grad_norm": 1.1221842765808105, "learning_rate": 8.629938692098093e-05, "loss": 1.0273, "step": 24350 }, { "epoch": 1.38, "grad_norm": 1.1319139003753662, "learning_rate": 8.629371026339692e-05, "loss": 1.0527, "step": 24360 }, { "epoch": 1.38, "grad_norm": 1.054721713066101, "learning_rate": 8.62880336058129e-05, "loss": 1.0354, "step": 24370 }, { "epoch": 1.38, "grad_norm": 1.026168704032898, "learning_rate": 8.628235694822888e-05, "loss": 1.0419, "step": 24380 }, { "epoch": 1.38, "grad_norm": 1.0544965267181396, "learning_rate": 8.627668029064487e-05, "loss": 1.0467, "step": 24390 }, { "epoch": 1.38, "grad_norm": 1.037162184715271, "learning_rate": 8.627100363306085e-05, "loss": 1.0434, "step": 24400 }, { "epoch": 1.38, "grad_norm": 1.0699474811553955, "learning_rate": 8.626532697547685e-05, "loss": 1.0619, "step": 24410 }, { "epoch": 1.38, "grad_norm": 1.0352730751037598, "learning_rate": 8.625965031789283e-05, "loss": 1.0108, "step": 24420 }, { "epoch": 1.39, "grad_norm": 1.105663776397705, "learning_rate": 8.625397366030882e-05, "loss": 1.0414, "step": 24430 }, { "epoch": 1.39, "grad_norm": 0.9943239688873291, "learning_rate": 8.62482970027248e-05, "loss": 1.0436, "step": 24440 }, { "epoch": 1.39, "grad_norm": 1.0763862133026123, "learning_rate": 8.62426203451408e-05, "loss": 1.0405, "step": 24450 }, { "epoch": 1.39, "grad_norm": 1.0196524858474731, "learning_rate": 8.623694368755677e-05, "loss": 1.0436, "step": 24460 }, { "epoch": 1.39, "grad_norm": 1.104831337928772, "learning_rate": 8.623126702997275e-05, "loss": 1.0388, "step": 24470 }, { "epoch": 1.39, "grad_norm": 1.1146093606948853, "learning_rate": 8.622559037238875e-05, "loss": 1.0439, "step": 24480 }, { "epoch": 1.39, "grad_norm": 1.1007367372512817, "learning_rate": 8.621991371480473e-05, "loss": 1.0211, "step": 24490 }, { "epoch": 1.39, "grad_norm": 1.083724856376648, "learning_rate": 8.621423705722072e-05, "loss": 1.0355, "step": 24500 }, { "epoch": 1.39, "grad_norm": 1.0446629524230957, "learning_rate": 8.62085603996367e-05, "loss": 1.0479, "step": 24510 }, { "epoch": 1.39, "grad_norm": 1.0864131450653076, "learning_rate": 8.620288374205268e-05, "loss": 1.0444, "step": 24520 }, { "epoch": 1.39, "grad_norm": 1.0696213245391846, "learning_rate": 8.619720708446866e-05, "loss": 1.0564, "step": 24530 }, { "epoch": 1.39, "grad_norm": 1.10694420337677, "learning_rate": 8.619153042688465e-05, "loss": 1.0322, "step": 24540 }, { "epoch": 1.39, "grad_norm": 1.0259569883346558, "learning_rate": 8.618585376930063e-05, "loss": 1.0418, "step": 24550 }, { "epoch": 1.39, "grad_norm": 1.062497615814209, "learning_rate": 8.618017711171661e-05, "loss": 1.0617, "step": 24560 }, { "epoch": 1.39, "grad_norm": 1.0407142639160156, "learning_rate": 8.617450045413261e-05, "loss": 1.0342, "step": 24570 }, { "epoch": 1.39, "grad_norm": 1.1207878589630127, "learning_rate": 8.616882379654859e-05, "loss": 1.0254, "step": 24580 }, { "epoch": 1.39, "grad_norm": 1.1011419296264648, "learning_rate": 8.616314713896458e-05, "loss": 1.0392, "step": 24590 }, { "epoch": 1.39, "grad_norm": 1.0811915397644043, "learning_rate": 8.615747048138056e-05, "loss": 1.0244, "step": 24600 }, { "epoch": 1.4, "grad_norm": 1.0631990432739258, "learning_rate": 8.615179382379655e-05, "loss": 1.0341, "step": 24610 }, { "epoch": 1.4, "grad_norm": 1.0208629369735718, "learning_rate": 8.614611716621253e-05, "loss": 1.0554, "step": 24620 }, { "epoch": 1.4, "grad_norm": 1.0833150148391724, "learning_rate": 8.614044050862853e-05, "loss": 1.0318, "step": 24630 }, { "epoch": 1.4, "grad_norm": 1.0015344619750977, "learning_rate": 8.613476385104451e-05, "loss": 1.0461, "step": 24640 }, { "epoch": 1.4, "grad_norm": 1.1032558679580688, "learning_rate": 8.612908719346049e-05, "loss": 1.039, "step": 24650 }, { "epoch": 1.4, "grad_norm": 1.074925422668457, "learning_rate": 8.612341053587648e-05, "loss": 1.033, "step": 24660 }, { "epoch": 1.4, "grad_norm": 1.0745397806167603, "learning_rate": 8.611773387829246e-05, "loss": 1.0471, "step": 24670 }, { "epoch": 1.4, "grad_norm": 1.0592844486236572, "learning_rate": 8.611205722070846e-05, "loss": 1.0277, "step": 24680 }, { "epoch": 1.4, "grad_norm": 1.0575400590896606, "learning_rate": 8.610638056312444e-05, "loss": 1.0466, "step": 24690 }, { "epoch": 1.4, "grad_norm": 1.077784776687622, "learning_rate": 8.610070390554043e-05, "loss": 1.0345, "step": 24700 }, { "epoch": 1.4, "grad_norm": 1.0648469924926758, "learning_rate": 8.609502724795641e-05, "loss": 1.0265, "step": 24710 }, { "epoch": 1.4, "grad_norm": 1.096855878829956, "learning_rate": 8.60893505903724e-05, "loss": 1.038, "step": 24720 }, { "epoch": 1.4, "grad_norm": 1.0371639728546143, "learning_rate": 8.608367393278838e-05, "loss": 1.0385, "step": 24730 }, { "epoch": 1.4, "grad_norm": 1.0803663730621338, "learning_rate": 8.607799727520436e-05, "loss": 1.0247, "step": 24740 }, { "epoch": 1.4, "grad_norm": 0.9923868775367737, "learning_rate": 8.607232061762036e-05, "loss": 1.0418, "step": 24750 }, { "epoch": 1.4, "grad_norm": 1.0715665817260742, "learning_rate": 8.606664396003634e-05, "loss": 1.0542, "step": 24760 }, { "epoch": 1.4, "grad_norm": 1.1043728590011597, "learning_rate": 8.606096730245232e-05, "loss": 1.056, "step": 24770 }, { "epoch": 1.41, "grad_norm": 1.0308653116226196, "learning_rate": 8.605529064486831e-05, "loss": 1.0461, "step": 24780 }, { "epoch": 1.41, "grad_norm": 1.1159710884094238, "learning_rate": 8.604961398728429e-05, "loss": 1.0629, "step": 24790 }, { "epoch": 1.41, "grad_norm": 1.0560646057128906, "learning_rate": 8.604393732970027e-05, "loss": 1.058, "step": 24800 }, { "epoch": 1.41, "grad_norm": 1.0605884790420532, "learning_rate": 8.603826067211626e-05, "loss": 1.0436, "step": 24810 }, { "epoch": 1.41, "grad_norm": 1.094813346862793, "learning_rate": 8.603258401453224e-05, "loss": 1.0275, "step": 24820 }, { "epoch": 1.41, "grad_norm": 1.0661004781723022, "learning_rate": 8.602690735694822e-05, "loss": 1.0283, "step": 24830 }, { "epoch": 1.41, "grad_norm": 1.1020746231079102, "learning_rate": 8.602123069936422e-05, "loss": 1.0735, "step": 24840 }, { "epoch": 1.41, "grad_norm": 1.0741585493087769, "learning_rate": 8.60155540417802e-05, "loss": 1.0287, "step": 24850 }, { "epoch": 1.41, "grad_norm": 1.0306243896484375, "learning_rate": 8.600987738419619e-05, "loss": 1.0438, "step": 24860 }, { "epoch": 1.41, "grad_norm": 1.0452895164489746, "learning_rate": 8.600420072661217e-05, "loss": 1.0579, "step": 24870 }, { "epoch": 1.41, "grad_norm": 1.0603854656219482, "learning_rate": 8.599852406902816e-05, "loss": 1.0275, "step": 24880 }, { "epoch": 1.41, "grad_norm": 1.0314439535140991, "learning_rate": 8.599284741144414e-05, "loss": 1.0691, "step": 24890 }, { "epoch": 1.41, "grad_norm": 1.054731845855713, "learning_rate": 8.598717075386014e-05, "loss": 1.0506, "step": 24900 }, { "epoch": 1.41, "grad_norm": 1.0530756711959839, "learning_rate": 8.598149409627612e-05, "loss": 1.0409, "step": 24910 }, { "epoch": 1.41, "grad_norm": 1.1173176765441895, "learning_rate": 8.59758174386921e-05, "loss": 1.0281, "step": 24920 }, { "epoch": 1.41, "grad_norm": 1.088672161102295, "learning_rate": 8.597014078110809e-05, "loss": 1.0204, "step": 24930 }, { "epoch": 1.41, "grad_norm": 1.157058835029602, "learning_rate": 8.596446412352407e-05, "loss": 1.0667, "step": 24940 }, { "epoch": 1.41, "grad_norm": 1.1032135486602783, "learning_rate": 8.595878746594006e-05, "loss": 1.0343, "step": 24950 }, { "epoch": 1.42, "grad_norm": 1.045949101448059, "learning_rate": 8.595311080835604e-05, "loss": 1.0235, "step": 24960 }, { "epoch": 1.42, "grad_norm": 0.9886520504951477, "learning_rate": 8.594743415077204e-05, "loss": 1.0309, "step": 24970 }, { "epoch": 1.42, "grad_norm": 1.1251637935638428, "learning_rate": 8.594175749318802e-05, "loss": 1.0212, "step": 24980 }, { "epoch": 1.42, "grad_norm": 1.0743234157562256, "learning_rate": 8.593608083560401e-05, "loss": 1.0168, "step": 24990 }, { "epoch": 1.42, "grad_norm": 1.0361019372940063, "learning_rate": 8.593040417801999e-05, "loss": 1.0349, "step": 25000 }, { "epoch": 1.42, "grad_norm": 1.1083567142486572, "learning_rate": 8.592472752043597e-05, "loss": 1.0533, "step": 25010 }, { "epoch": 1.42, "grad_norm": 1.116161584854126, "learning_rate": 8.591905086285196e-05, "loss": 1.0306, "step": 25020 }, { "epoch": 1.42, "grad_norm": 1.0334296226501465, "learning_rate": 8.591337420526794e-05, "loss": 1.0514, "step": 25030 }, { "epoch": 1.42, "grad_norm": 1.0784640312194824, "learning_rate": 8.590769754768392e-05, "loss": 1.0492, "step": 25040 }, { "epoch": 1.42, "grad_norm": 1.1665613651275635, "learning_rate": 8.59020208900999e-05, "loss": 1.0229, "step": 25050 }, { "epoch": 1.42, "grad_norm": 1.1276755332946777, "learning_rate": 8.58963442325159e-05, "loss": 1.0465, "step": 25060 }, { "epoch": 1.42, "grad_norm": 1.093558430671692, "learning_rate": 8.589066757493188e-05, "loss": 1.0447, "step": 25070 }, { "epoch": 1.42, "grad_norm": 1.001947045326233, "learning_rate": 8.588499091734787e-05, "loss": 1.0496, "step": 25080 }, { "epoch": 1.42, "grad_norm": 1.136443853378296, "learning_rate": 8.587931425976385e-05, "loss": 1.0508, "step": 25090 }, { "epoch": 1.42, "grad_norm": 1.0700500011444092, "learning_rate": 8.587363760217984e-05, "loss": 1.0347, "step": 25100 }, { "epoch": 1.42, "grad_norm": 1.0996134281158447, "learning_rate": 8.586796094459582e-05, "loss": 1.0425, "step": 25110 }, { "epoch": 1.42, "grad_norm": 1.011782169342041, "learning_rate": 8.58622842870118e-05, "loss": 1.0594, "step": 25120 }, { "epoch": 1.42, "grad_norm": 1.0674827098846436, "learning_rate": 8.58566076294278e-05, "loss": 1.0296, "step": 25130 }, { "epoch": 1.43, "grad_norm": 1.0751572847366333, "learning_rate": 8.585093097184378e-05, "loss": 1.0442, "step": 25140 }, { "epoch": 1.43, "grad_norm": 1.0521131753921509, "learning_rate": 8.584525431425977e-05, "loss": 1.0477, "step": 25150 }, { "epoch": 1.43, "grad_norm": 1.0603035688400269, "learning_rate": 8.583957765667575e-05, "loss": 1.0224, "step": 25160 }, { "epoch": 1.43, "grad_norm": 1.111396074295044, "learning_rate": 8.583390099909174e-05, "loss": 1.037, "step": 25170 }, { "epoch": 1.43, "grad_norm": 1.0984817743301392, "learning_rate": 8.582822434150772e-05, "loss": 1.0468, "step": 25180 }, { "epoch": 1.43, "grad_norm": 1.1039501428604126, "learning_rate": 8.582254768392372e-05, "loss": 1.0614, "step": 25190 }, { "epoch": 1.43, "grad_norm": 1.0282272100448608, "learning_rate": 8.58168710263397e-05, "loss": 1.0512, "step": 25200 }, { "epoch": 1.43, "grad_norm": 1.0528944730758667, "learning_rate": 8.581119436875568e-05, "loss": 1.0141, "step": 25210 }, { "epoch": 1.43, "grad_norm": 1.0686018466949463, "learning_rate": 8.580551771117167e-05, "loss": 1.0152, "step": 25220 }, { "epoch": 1.43, "grad_norm": 1.0636078119277954, "learning_rate": 8.579984105358765e-05, "loss": 1.0158, "step": 25230 }, { "epoch": 1.43, "grad_norm": 1.0865229368209839, "learning_rate": 8.579416439600364e-05, "loss": 1.0497, "step": 25240 }, { "epoch": 1.43, "grad_norm": 1.0036611557006836, "learning_rate": 8.578848773841962e-05, "loss": 1.0344, "step": 25250 }, { "epoch": 1.43, "grad_norm": 1.0652108192443848, "learning_rate": 8.578281108083562e-05, "loss": 1.0087, "step": 25260 }, { "epoch": 1.43, "grad_norm": 1.0784941911697388, "learning_rate": 8.57771344232516e-05, "loss": 1.0305, "step": 25270 }, { "epoch": 1.43, "grad_norm": 1.0328446626663208, "learning_rate": 8.577145776566758e-05, "loss": 1.0408, "step": 25280 }, { "epoch": 1.43, "grad_norm": 1.1399006843566895, "learning_rate": 8.576578110808356e-05, "loss": 1.0372, "step": 25290 }, { "epoch": 1.43, "grad_norm": 1.117617130279541, "learning_rate": 8.576010445049955e-05, "loss": 1.0316, "step": 25300 }, { "epoch": 1.44, "grad_norm": 1.1025259494781494, "learning_rate": 8.575442779291553e-05, "loss": 1.0474, "step": 25310 }, { "epoch": 1.44, "grad_norm": 1.0518693923950195, "learning_rate": 8.574875113533151e-05, "loss": 1.0479, "step": 25320 }, { "epoch": 1.44, "grad_norm": 1.131056547164917, "learning_rate": 8.57430744777475e-05, "loss": 1.048, "step": 25330 }, { "epoch": 1.44, "grad_norm": 1.1105297803878784, "learning_rate": 8.573739782016349e-05, "loss": 1.0295, "step": 25340 }, { "epoch": 1.44, "grad_norm": 1.0845342874526978, "learning_rate": 8.573172116257948e-05, "loss": 1.053, "step": 25350 }, { "epoch": 1.44, "grad_norm": 1.092549204826355, "learning_rate": 8.572604450499546e-05, "loss": 1.0685, "step": 25360 }, { "epoch": 1.44, "grad_norm": 1.1004682779312134, "learning_rate": 8.572036784741145e-05, "loss": 1.0375, "step": 25370 }, { "epoch": 1.44, "grad_norm": 1.0871856212615967, "learning_rate": 8.571469118982743e-05, "loss": 1.0219, "step": 25380 }, { "epoch": 1.44, "grad_norm": 1.0111656188964844, "learning_rate": 8.570901453224341e-05, "loss": 1.0362, "step": 25390 }, { "epoch": 1.44, "grad_norm": 1.089620590209961, "learning_rate": 8.57033378746594e-05, "loss": 1.0179, "step": 25400 }, { "epoch": 1.44, "grad_norm": 1.1128486394882202, "learning_rate": 8.569766121707539e-05, "loss": 1.055, "step": 25410 }, { "epoch": 1.44, "grad_norm": 1.098874807357788, "learning_rate": 8.569198455949138e-05, "loss": 1.0473, "step": 25420 }, { "epoch": 1.44, "grad_norm": 1.0749881267547607, "learning_rate": 8.568630790190736e-05, "loss": 1.0605, "step": 25430 }, { "epoch": 1.44, "grad_norm": 1.0613707304000854, "learning_rate": 8.568063124432335e-05, "loss": 1.0386, "step": 25440 }, { "epoch": 1.44, "grad_norm": 1.0713648796081543, "learning_rate": 8.567495458673933e-05, "loss": 1.0567, "step": 25450 }, { "epoch": 1.44, "grad_norm": 1.1204193830490112, "learning_rate": 8.566927792915533e-05, "loss": 1.0661, "step": 25460 }, { "epoch": 1.44, "grad_norm": 1.0670700073242188, "learning_rate": 8.56636012715713e-05, "loss": 1.043, "step": 25470 }, { "epoch": 1.44, "grad_norm": 1.051074504852295, "learning_rate": 8.565792461398729e-05, "loss": 1.0375, "step": 25480 }, { "epoch": 1.45, "grad_norm": 1.0603156089782715, "learning_rate": 8.565224795640328e-05, "loss": 1.0294, "step": 25490 }, { "epoch": 1.45, "grad_norm": 0.9811649322509766, "learning_rate": 8.564657129881926e-05, "loss": 1.0339, "step": 25500 }, { "epoch": 1.45, "grad_norm": 1.0838370323181152, "learning_rate": 8.564089464123525e-05, "loss": 1.0357, "step": 25510 }, { "epoch": 1.45, "grad_norm": 1.0551096200942993, "learning_rate": 8.563521798365123e-05, "loss": 1.0355, "step": 25520 }, { "epoch": 1.45, "grad_norm": 1.1034151315689087, "learning_rate": 8.562954132606721e-05, "loss": 1.0259, "step": 25530 }, { "epoch": 1.45, "grad_norm": 1.1441587209701538, "learning_rate": 8.56238646684832e-05, "loss": 1.0191, "step": 25540 }, { "epoch": 1.45, "grad_norm": 1.169660210609436, "learning_rate": 8.561818801089919e-05, "loss": 1.031, "step": 25550 }, { "epoch": 1.45, "grad_norm": 1.1110678911209106, "learning_rate": 8.561251135331517e-05, "loss": 1.0103, "step": 25560 }, { "epoch": 1.45, "grad_norm": 1.0623809099197388, "learning_rate": 8.560683469573115e-05, "loss": 1.034, "step": 25570 }, { "epoch": 1.45, "grad_norm": 1.107325792312622, "learning_rate": 8.560115803814714e-05, "loss": 1.0297, "step": 25580 }, { "epoch": 1.45, "grad_norm": 1.1488921642303467, "learning_rate": 8.559548138056312e-05, "loss": 1.0461, "step": 25590 }, { "epoch": 1.45, "grad_norm": 1.03333580493927, "learning_rate": 8.558980472297911e-05, "loss": 1.0209, "step": 25600 }, { "epoch": 1.45, "grad_norm": 1.111876368522644, "learning_rate": 8.55841280653951e-05, "loss": 1.0219, "step": 25610 }, { "epoch": 1.45, "grad_norm": 1.1091499328613281, "learning_rate": 8.557845140781109e-05, "loss": 1.0639, "step": 25620 }, { "epoch": 1.45, "grad_norm": 1.0577607154846191, "learning_rate": 8.557277475022707e-05, "loss": 1.0251, "step": 25630 }, { "epoch": 1.45, "grad_norm": 1.03562331199646, "learning_rate": 8.556709809264306e-05, "loss": 1.0185, "step": 25640 }, { "epoch": 1.45, "grad_norm": 1.043339490890503, "learning_rate": 8.556142143505904e-05, "loss": 1.0403, "step": 25650 }, { "epoch": 1.45, "grad_norm": 1.0786539316177368, "learning_rate": 8.555574477747502e-05, "loss": 1.0252, "step": 25660 }, { "epoch": 1.46, "grad_norm": 1.077749252319336, "learning_rate": 8.555006811989101e-05, "loss": 1.0421, "step": 25670 }, { "epoch": 1.46, "grad_norm": 1.0368069410324097, "learning_rate": 8.5544391462307e-05, "loss": 1.0275, "step": 25680 }, { "epoch": 1.46, "grad_norm": 1.0470412969589233, "learning_rate": 8.553871480472299e-05, "loss": 1.0264, "step": 25690 }, { "epoch": 1.46, "grad_norm": 1.0792256593704224, "learning_rate": 8.553303814713897e-05, "loss": 1.0616, "step": 25700 }, { "epoch": 1.46, "grad_norm": 1.1054552793502808, "learning_rate": 8.552736148955496e-05, "loss": 1.0411, "step": 25710 }, { "epoch": 1.46, "grad_norm": 1.0692039728164673, "learning_rate": 8.552168483197094e-05, "loss": 1.0174, "step": 25720 }, { "epoch": 1.46, "grad_norm": 1.0735652446746826, "learning_rate": 8.551600817438693e-05, "loss": 1.0497, "step": 25730 }, { "epoch": 1.46, "grad_norm": 1.0163389444351196, "learning_rate": 8.551033151680291e-05, "loss": 1.0283, "step": 25740 }, { "epoch": 1.46, "grad_norm": 1.0322751998901367, "learning_rate": 8.55046548592189e-05, "loss": 1.0265, "step": 25750 }, { "epoch": 1.46, "grad_norm": 1.0776638984680176, "learning_rate": 8.549897820163489e-05, "loss": 1.0376, "step": 25760 }, { "epoch": 1.46, "grad_norm": 1.1050390005111694, "learning_rate": 8.549330154405087e-05, "loss": 1.0646, "step": 25770 }, { "epoch": 1.46, "grad_norm": 1.082166075706482, "learning_rate": 8.548762488646686e-05, "loss": 1.0485, "step": 25780 }, { "epoch": 1.46, "grad_norm": 1.1230400800704956, "learning_rate": 8.548194822888284e-05, "loss": 1.0337, "step": 25790 }, { "epoch": 1.46, "grad_norm": 1.0487926006317139, "learning_rate": 8.547627157129882e-05, "loss": 1.0246, "step": 25800 }, { "epoch": 1.46, "grad_norm": 1.108508586883545, "learning_rate": 8.54705949137148e-05, "loss": 1.0341, "step": 25810 }, { "epoch": 1.46, "grad_norm": 1.0678547620773315, "learning_rate": 8.54649182561308e-05, "loss": 1.0426, "step": 25820 }, { "epoch": 1.46, "grad_norm": 1.0177972316741943, "learning_rate": 8.545924159854677e-05, "loss": 1.0295, "step": 25830 }, { "epoch": 1.47, "grad_norm": 1.0610544681549072, "learning_rate": 8.545356494096275e-05, "loss": 1.0318, "step": 25840 }, { "epoch": 1.47, "grad_norm": 1.0781772136688232, "learning_rate": 8.544788828337875e-05, "loss": 1.015, "step": 25850 }, { "epoch": 1.47, "grad_norm": 1.1089316606521606, "learning_rate": 8.544221162579473e-05, "loss": 1.0534, "step": 25860 }, { "epoch": 1.47, "grad_norm": 1.0937469005584717, "learning_rate": 8.543653496821072e-05, "loss": 1.0301, "step": 25870 }, { "epoch": 1.47, "grad_norm": 1.064944863319397, "learning_rate": 8.54308583106267e-05, "loss": 1.0195, "step": 25880 }, { "epoch": 1.47, "grad_norm": 1.1209380626678467, "learning_rate": 8.54251816530427e-05, "loss": 1.0457, "step": 25890 }, { "epoch": 1.47, "grad_norm": 1.111092448234558, "learning_rate": 8.541950499545867e-05, "loss": 1.0459, "step": 25900 }, { "epoch": 1.47, "grad_norm": 1.0203781127929688, "learning_rate": 8.541382833787467e-05, "loss": 1.0398, "step": 25910 }, { "epoch": 1.47, "grad_norm": 0.9938228130340576, "learning_rate": 8.540815168029065e-05, "loss": 1.0375, "step": 25920 }, { "epoch": 1.47, "grad_norm": 1.04469895362854, "learning_rate": 8.540247502270663e-05, "loss": 1.0335, "step": 25930 }, { "epoch": 1.47, "grad_norm": 1.1636312007904053, "learning_rate": 8.539679836512262e-05, "loss": 1.0333, "step": 25940 }, { "epoch": 1.47, "grad_norm": 1.0836819410324097, "learning_rate": 8.53911217075386e-05, "loss": 1.012, "step": 25950 }, { "epoch": 1.47, "grad_norm": 1.0723495483398438, "learning_rate": 8.53854450499546e-05, "loss": 1.0403, "step": 25960 }, { "epoch": 1.47, "grad_norm": 1.003122329711914, "learning_rate": 8.537976839237058e-05, "loss": 1.0268, "step": 25970 }, { "epoch": 1.47, "grad_norm": 1.0466291904449463, "learning_rate": 8.537409173478657e-05, "loss": 1.0372, "step": 25980 }, { "epoch": 1.47, "grad_norm": 1.0880447626113892, "learning_rate": 8.536841507720255e-05, "loss": 1.0375, "step": 25990 }, { "epoch": 1.47, "grad_norm": 1.0599682331085205, "learning_rate": 8.536273841961854e-05, "loss": 1.0408, "step": 26000 }, { "epoch": 1.47, "grad_norm": 1.0282504558563232, "learning_rate": 8.535706176203452e-05, "loss": 1.016, "step": 26010 }, { "epoch": 1.48, "grad_norm": 1.0052751302719116, "learning_rate": 8.53513851044505e-05, "loss": 1.0304, "step": 26020 }, { "epoch": 1.48, "grad_norm": 1.0633858442306519, "learning_rate": 8.53457084468665e-05, "loss": 1.0321, "step": 26030 }, { "epoch": 1.48, "grad_norm": 1.014920711517334, "learning_rate": 8.534003178928248e-05, "loss": 1.0738, "step": 26040 }, { "epoch": 1.48, "grad_norm": 1.1024792194366455, "learning_rate": 8.533435513169847e-05, "loss": 1.0522, "step": 26050 }, { "epoch": 1.48, "grad_norm": 1.071089267730713, "learning_rate": 8.532867847411445e-05, "loss": 1.0448, "step": 26060 }, { "epoch": 1.48, "grad_norm": 1.0733187198638916, "learning_rate": 8.532300181653043e-05, "loss": 1.0414, "step": 26070 }, { "epoch": 1.48, "grad_norm": 1.096829891204834, "learning_rate": 8.531732515894641e-05, "loss": 1.0513, "step": 26080 }, { "epoch": 1.48, "grad_norm": 1.0773016214370728, "learning_rate": 8.53116485013624e-05, "loss": 1.0301, "step": 26090 }, { "epoch": 1.48, "grad_norm": 1.060770869255066, "learning_rate": 8.530597184377838e-05, "loss": 1.0263, "step": 26100 }, { "epoch": 1.48, "grad_norm": 1.0543676614761353, "learning_rate": 8.530029518619436e-05, "loss": 1.0333, "step": 26110 }, { "epoch": 1.48, "grad_norm": 1.1308801174163818, "learning_rate": 8.529461852861036e-05, "loss": 1.0624, "step": 26120 }, { "epoch": 1.48, "grad_norm": 1.0684411525726318, "learning_rate": 8.528894187102634e-05, "loss": 1.0648, "step": 26130 }, { "epoch": 1.48, "grad_norm": 1.1294361352920532, "learning_rate": 8.528326521344233e-05, "loss": 1.0472, "step": 26140 }, { "epoch": 1.48, "grad_norm": 1.040653109550476, "learning_rate": 8.527758855585831e-05, "loss": 1.0307, "step": 26150 }, { "epoch": 1.48, "grad_norm": 1.045551061630249, "learning_rate": 8.52719118982743e-05, "loss": 1.0573, "step": 26160 }, { "epoch": 1.48, "grad_norm": 1.0588799715042114, "learning_rate": 8.526623524069028e-05, "loss": 1.0508, "step": 26170 }, { "epoch": 1.48, "grad_norm": 1.1037887334823608, "learning_rate": 8.526055858310628e-05, "loss": 1.0252, "step": 26180 }, { "epoch": 1.49, "grad_norm": 1.1158475875854492, "learning_rate": 8.525488192552226e-05, "loss": 1.0552, "step": 26190 }, { "epoch": 1.49, "grad_norm": 1.0452454090118408, "learning_rate": 8.524920526793824e-05, "loss": 1.0391, "step": 26200 }, { "epoch": 1.49, "grad_norm": 1.08488929271698, "learning_rate": 8.524352861035423e-05, "loss": 1.0369, "step": 26210 }, { "epoch": 1.49, "grad_norm": 1.0436139106750488, "learning_rate": 8.523785195277021e-05, "loss": 1.0399, "step": 26220 }, { "epoch": 1.49, "grad_norm": 1.0602563619613647, "learning_rate": 8.52321752951862e-05, "loss": 1.0499, "step": 26230 }, { "epoch": 1.49, "grad_norm": 1.1369328498840332, "learning_rate": 8.522649863760218e-05, "loss": 1.0219, "step": 26240 }, { "epoch": 1.49, "grad_norm": 1.0871156454086304, "learning_rate": 8.522082198001818e-05, "loss": 1.036, "step": 26250 }, { "epoch": 1.49, "grad_norm": 1.0912245512008667, "learning_rate": 8.521514532243416e-05, "loss": 1.0292, "step": 26260 }, { "epoch": 1.49, "grad_norm": 1.0980579853057861, "learning_rate": 8.520946866485015e-05, "loss": 1.0386, "step": 26270 }, { "epoch": 1.49, "grad_norm": 1.0727285146713257, "learning_rate": 8.520379200726613e-05, "loss": 1.0392, "step": 26280 }, { "epoch": 1.49, "grad_norm": 1.1261309385299683, "learning_rate": 8.519811534968211e-05, "loss": 1.0293, "step": 26290 }, { "epoch": 1.49, "grad_norm": 1.0273427963256836, "learning_rate": 8.51924386920981e-05, "loss": 1.0488, "step": 26300 }, { "epoch": 1.49, "grad_norm": 1.1424314975738525, "learning_rate": 8.518676203451408e-05, "loss": 1.0375, "step": 26310 }, { "epoch": 1.49, "grad_norm": 1.0824123620986938, "learning_rate": 8.518108537693006e-05, "loss": 1.0148, "step": 26320 }, { "epoch": 1.49, "grad_norm": 1.1016435623168945, "learning_rate": 8.517540871934604e-05, "loss": 1.0322, "step": 26330 }, { "epoch": 1.49, "grad_norm": 1.0368143320083618, "learning_rate": 8.516973206176204e-05, "loss": 1.0215, "step": 26340 }, { "epoch": 1.49, "grad_norm": 1.0397987365722656, "learning_rate": 8.516405540417802e-05, "loss": 1.0323, "step": 26350 }, { "epoch": 1.49, "grad_norm": 1.066589117050171, "learning_rate": 8.515837874659401e-05, "loss": 1.039, "step": 26360 }, { "epoch": 1.5, "grad_norm": 1.1360101699829102, "learning_rate": 8.515270208900999e-05, "loss": 1.0493, "step": 26370 }, { "epoch": 1.5, "grad_norm": 1.060848355293274, "learning_rate": 8.514702543142597e-05, "loss": 1.0189, "step": 26380 }, { "epoch": 1.5, "grad_norm": 1.096444010734558, "learning_rate": 8.514134877384196e-05, "loss": 1.0173, "step": 26390 }, { "epoch": 1.5, "grad_norm": 1.0499037504196167, "learning_rate": 8.513567211625794e-05, "loss": 1.0386, "step": 26400 }, { "epoch": 1.5, "grad_norm": 1.0968937873840332, "learning_rate": 8.512999545867394e-05, "loss": 1.0278, "step": 26410 }, { "epoch": 1.5, "grad_norm": 1.0437902212142944, "learning_rate": 8.512431880108992e-05, "loss": 1.0553, "step": 26420 }, { "epoch": 1.5, "grad_norm": 1.0839430093765259, "learning_rate": 8.511864214350591e-05, "loss": 1.0192, "step": 26430 }, { "epoch": 1.5, "grad_norm": 1.0869196653366089, "learning_rate": 8.511296548592189e-05, "loss": 1.0222, "step": 26440 }, { "epoch": 1.5, "grad_norm": 1.0736488103866577, "learning_rate": 8.510728882833788e-05, "loss": 1.022, "step": 26450 }, { "epoch": 1.5, "grad_norm": 1.08152174949646, "learning_rate": 8.510161217075386e-05, "loss": 1.0394, "step": 26460 }, { "epoch": 1.5, "grad_norm": 1.0340886116027832, "learning_rate": 8.509593551316984e-05, "loss": 1.0364, "step": 26470 }, { "epoch": 1.5, "grad_norm": 1.073075532913208, "learning_rate": 8.509025885558584e-05, "loss": 1.0174, "step": 26480 }, { "epoch": 1.5, "grad_norm": 1.0974020957946777, "learning_rate": 8.508458219800182e-05, "loss": 1.0132, "step": 26490 }, { "epoch": 1.5, "grad_norm": 1.0511151552200317, "learning_rate": 8.507890554041781e-05, "loss": 1.0748, "step": 26500 }, { "epoch": 1.5, "grad_norm": 1.1193112134933472, "learning_rate": 8.507322888283379e-05, "loss": 1.0456, "step": 26510 }, { "epoch": 1.5, "grad_norm": 1.0450776815414429, "learning_rate": 8.506755222524978e-05, "loss": 1.033, "step": 26520 }, { "epoch": 1.5, "grad_norm": 1.1171506643295288, "learning_rate": 8.506187556766576e-05, "loss": 1.0335, "step": 26530 }, { "epoch": 1.5, "grad_norm": 1.1196831464767456, "learning_rate": 8.505619891008176e-05, "loss": 1.0236, "step": 26540 }, { "epoch": 1.51, "grad_norm": 1.0600507259368896, "learning_rate": 8.505052225249774e-05, "loss": 1.034, "step": 26550 }, { "epoch": 1.51, "grad_norm": 1.1178171634674072, "learning_rate": 8.504484559491372e-05, "loss": 1.0515, "step": 26560 }, { "epoch": 1.51, "grad_norm": 1.106784462928772, "learning_rate": 8.503916893732971e-05, "loss": 1.0403, "step": 26570 }, { "epoch": 1.51, "grad_norm": 1.0396783351898193, "learning_rate": 8.503349227974569e-05, "loss": 0.9994, "step": 26580 }, { "epoch": 1.51, "grad_norm": 1.1729408502578735, "learning_rate": 8.502781562216167e-05, "loss": 1.0314, "step": 26590 }, { "epoch": 1.51, "grad_norm": 1.0512171983718872, "learning_rate": 8.502213896457765e-05, "loss": 1.0274, "step": 26600 }, { "epoch": 1.51, "grad_norm": 1.0833150148391724, "learning_rate": 8.501646230699365e-05, "loss": 1.0573, "step": 26610 }, { "epoch": 1.51, "grad_norm": 1.0688533782958984, "learning_rate": 8.501078564940963e-05, "loss": 1.0267, "step": 26620 }, { "epoch": 1.51, "grad_norm": 1.0949103832244873, "learning_rate": 8.500510899182562e-05, "loss": 1.0312, "step": 26630 }, { "epoch": 1.51, "grad_norm": 1.1213871240615845, "learning_rate": 8.49994323342416e-05, "loss": 1.0465, "step": 26640 }, { "epoch": 1.51, "grad_norm": 1.1025370359420776, "learning_rate": 8.499375567665758e-05, "loss": 1.0096, "step": 26650 }, { "epoch": 1.51, "grad_norm": 1.072756052017212, "learning_rate": 8.498807901907357e-05, "loss": 0.9919, "step": 26660 }, { "epoch": 1.51, "grad_norm": 1.0794070959091187, "learning_rate": 8.498240236148955e-05, "loss": 1.0132, "step": 26670 }, { "epoch": 1.51, "grad_norm": 1.1264245510101318, "learning_rate": 8.497672570390555e-05, "loss": 1.0254, "step": 26680 }, { "epoch": 1.51, "grad_norm": 1.1118965148925781, "learning_rate": 8.497104904632153e-05, "loss": 1.0165, "step": 26690 }, { "epoch": 1.51, "grad_norm": 1.0184201002120972, "learning_rate": 8.496537238873752e-05, "loss": 1.0188, "step": 26700 }, { "epoch": 1.51, "grad_norm": 1.0561351776123047, "learning_rate": 8.49596957311535e-05, "loss": 1.0135, "step": 26710 }, { "epoch": 1.52, "grad_norm": 1.011909008026123, "learning_rate": 8.495401907356949e-05, "loss": 1.0535, "step": 26720 }, { "epoch": 1.52, "grad_norm": 1.0310617685317993, "learning_rate": 8.494834241598547e-05, "loss": 1.0347, "step": 26730 }, { "epoch": 1.52, "grad_norm": 1.125165581703186, "learning_rate": 8.494266575840145e-05, "loss": 1.0091, "step": 26740 }, { "epoch": 1.52, "grad_norm": 1.0704227685928345, "learning_rate": 8.493698910081745e-05, "loss": 1.0529, "step": 26750 }, { "epoch": 1.52, "grad_norm": 1.0724339485168457, "learning_rate": 8.493131244323343e-05, "loss": 1.0182, "step": 26760 }, { "epoch": 1.52, "grad_norm": 1.065321683883667, "learning_rate": 8.492563578564942e-05, "loss": 1.027, "step": 26770 }, { "epoch": 1.52, "grad_norm": 1.135810375213623, "learning_rate": 8.49199591280654e-05, "loss": 1.008, "step": 26780 }, { "epoch": 1.52, "grad_norm": 1.143743634223938, "learning_rate": 8.491428247048139e-05, "loss": 1.0215, "step": 26790 }, { "epoch": 1.52, "grad_norm": 1.04877769947052, "learning_rate": 8.490860581289737e-05, "loss": 1.0202, "step": 26800 }, { "epoch": 1.52, "grad_norm": 1.0481332540512085, "learning_rate": 8.490292915531337e-05, "loss": 1.0372, "step": 26810 }, { "epoch": 1.52, "grad_norm": 1.0580120086669922, "learning_rate": 8.489725249772935e-05, "loss": 1.0144, "step": 26820 }, { "epoch": 1.52, "grad_norm": 1.123199701309204, "learning_rate": 8.489157584014533e-05, "loss": 1.06, "step": 26830 }, { "epoch": 1.52, "grad_norm": 1.058341145515442, "learning_rate": 8.48858991825613e-05, "loss": 1.0099, "step": 26840 }, { "epoch": 1.52, "grad_norm": 1.1034603118896484, "learning_rate": 8.48802225249773e-05, "loss": 1.0433, "step": 26850 }, { "epoch": 1.52, "grad_norm": 1.052620530128479, "learning_rate": 8.487454586739328e-05, "loss": 1.0375, "step": 26860 }, { "epoch": 1.52, "grad_norm": 1.113507628440857, "learning_rate": 8.486886920980926e-05, "loss": 1.0359, "step": 26870 }, { "epoch": 1.52, "grad_norm": 1.0478311777114868, "learning_rate": 8.486319255222525e-05, "loss": 1.0397, "step": 26880 }, { "epoch": 1.52, "grad_norm": 1.091139793395996, "learning_rate": 8.485751589464123e-05, "loss": 1.0312, "step": 26890 }, { "epoch": 1.53, "grad_norm": 1.049472451210022, "learning_rate": 8.485183923705723e-05, "loss": 1.0291, "step": 26900 }, { "epoch": 1.53, "grad_norm": 1.1364504098892212, "learning_rate": 8.48461625794732e-05, "loss": 1.0396, "step": 26910 }, { "epoch": 1.53, "grad_norm": 1.0754480361938477, "learning_rate": 8.48404859218892e-05, "loss": 1.0496, "step": 26920 }, { "epoch": 1.53, "grad_norm": 1.1163142919540405, "learning_rate": 8.483480926430518e-05, "loss": 1.0657, "step": 26930 }, { "epoch": 1.53, "grad_norm": 1.0453120470046997, "learning_rate": 8.482913260672116e-05, "loss": 1.0192, "step": 26940 }, { "epoch": 1.53, "grad_norm": 1.1642099618911743, "learning_rate": 8.482345594913715e-05, "loss": 1.0424, "step": 26950 }, { "epoch": 1.53, "grad_norm": 1.1426668167114258, "learning_rate": 8.481777929155313e-05, "loss": 1.0445, "step": 26960 }, { "epoch": 1.53, "grad_norm": 1.073163390159607, "learning_rate": 8.481210263396913e-05, "loss": 1.0386, "step": 26970 }, { "epoch": 1.53, "grad_norm": 1.1576958894729614, "learning_rate": 8.480642597638511e-05, "loss": 1.0595, "step": 26980 }, { "epoch": 1.53, "grad_norm": 1.0426156520843506, "learning_rate": 8.48007493188011e-05, "loss": 1.0173, "step": 26990 }, { "epoch": 1.53, "grad_norm": 1.0893619060516357, "learning_rate": 8.479507266121708e-05, "loss": 1.041, "step": 27000 }, { "epoch": 1.53, "grad_norm": 1.0882762670516968, "learning_rate": 8.478939600363307e-05, "loss": 1.0558, "step": 27010 }, { "epoch": 1.53, "grad_norm": 1.1048500537872314, "learning_rate": 8.478371934604905e-05, "loss": 1.0189, "step": 27020 }, { "epoch": 1.53, "grad_norm": 1.1902625560760498, "learning_rate": 8.477804268846503e-05, "loss": 1.0497, "step": 27030 }, { "epoch": 1.53, "grad_norm": 1.0729800462722778, "learning_rate": 8.477236603088103e-05, "loss": 1.049, "step": 27040 }, { "epoch": 1.53, "grad_norm": 1.0322216749191284, "learning_rate": 8.476668937329701e-05, "loss": 1.0547, "step": 27050 }, { "epoch": 1.53, "grad_norm": 1.02250337600708, "learning_rate": 8.4761012715713e-05, "loss": 1.0156, "step": 27060 }, { "epoch": 1.53, "grad_norm": 1.078972578048706, "learning_rate": 8.475533605812898e-05, "loss": 1.0429, "step": 27070 }, { "epoch": 1.54, "grad_norm": 1.0728557109832764, "learning_rate": 8.474965940054496e-05, "loss": 1.0305, "step": 27080 }, { "epoch": 1.54, "grad_norm": 1.0554001331329346, "learning_rate": 8.474398274296095e-05, "loss": 1.0499, "step": 27090 }, { "epoch": 1.54, "grad_norm": 1.181875228881836, "learning_rate": 8.473830608537693e-05, "loss": 1.0308, "step": 27100 }, { "epoch": 1.54, "grad_norm": 1.0925424098968506, "learning_rate": 8.473262942779291e-05, "loss": 1.0431, "step": 27110 }, { "epoch": 1.54, "grad_norm": 1.1405072212219238, "learning_rate": 8.47269527702089e-05, "loss": 1.0296, "step": 27120 }, { "epoch": 1.54, "grad_norm": 1.081164002418518, "learning_rate": 8.472127611262489e-05, "loss": 1.0405, "step": 27130 }, { "epoch": 1.54, "grad_norm": 1.0701526403427124, "learning_rate": 8.471559945504087e-05, "loss": 1.011, "step": 27140 }, { "epoch": 1.54, "grad_norm": 1.0757498741149902, "learning_rate": 8.470992279745686e-05, "loss": 1.0231, "step": 27150 }, { "epoch": 1.54, "grad_norm": 1.0740242004394531, "learning_rate": 8.470424613987284e-05, "loss": 1.0368, "step": 27160 }, { "epoch": 1.54, "grad_norm": 1.062424898147583, "learning_rate": 8.469856948228883e-05, "loss": 1.0314, "step": 27170 }, { "epoch": 1.54, "grad_norm": 1.1633119583129883, "learning_rate": 8.469289282470481e-05, "loss": 0.9972, "step": 27180 }, { "epoch": 1.54, "grad_norm": 1.0350247621536255, "learning_rate": 8.468721616712081e-05, "loss": 1.0033, "step": 27190 }, { "epoch": 1.54, "grad_norm": 1.0129618644714355, "learning_rate": 8.468153950953679e-05, "loss": 1.0349, "step": 27200 }, { "epoch": 1.54, "grad_norm": 1.122113585472107, "learning_rate": 8.467586285195277e-05, "loss": 1.0339, "step": 27210 }, { "epoch": 1.54, "grad_norm": 1.1530349254608154, "learning_rate": 8.467018619436876e-05, "loss": 1.0386, "step": 27220 }, { "epoch": 1.54, "grad_norm": 1.0684665441513062, "learning_rate": 8.466450953678474e-05, "loss": 1.041, "step": 27230 }, { "epoch": 1.54, "grad_norm": 1.1030889749526978, "learning_rate": 8.465883287920074e-05, "loss": 1.0312, "step": 27240 }, { "epoch": 1.55, "grad_norm": 1.059718132019043, "learning_rate": 8.465315622161672e-05, "loss": 1.0383, "step": 27250 }, { "epoch": 1.55, "grad_norm": 1.0456109046936035, "learning_rate": 8.464747956403271e-05, "loss": 1.0693, "step": 27260 }, { "epoch": 1.55, "grad_norm": 1.0504735708236694, "learning_rate": 8.464180290644869e-05, "loss": 1.0388, "step": 27270 }, { "epoch": 1.55, "grad_norm": 1.0719006061553955, "learning_rate": 8.463612624886468e-05, "loss": 1.0199, "step": 27280 }, { "epoch": 1.55, "grad_norm": 1.0071922540664673, "learning_rate": 8.463044959128066e-05, "loss": 1.0249, "step": 27290 }, { "epoch": 1.55, "grad_norm": 1.0407023429870605, "learning_rate": 8.462477293369664e-05, "loss": 1.038, "step": 27300 }, { "epoch": 1.55, "grad_norm": 1.086524486541748, "learning_rate": 8.461909627611264e-05, "loss": 1.0525, "step": 27310 }, { "epoch": 1.55, "grad_norm": 1.1049474477767944, "learning_rate": 8.461341961852862e-05, "loss": 1.0308, "step": 27320 }, { "epoch": 1.55, "grad_norm": 1.0957913398742676, "learning_rate": 8.460774296094461e-05, "loss": 1.06, "step": 27330 }, { "epoch": 1.55, "grad_norm": 1.0380489826202393, "learning_rate": 8.460206630336059e-05, "loss": 1.0225, "step": 27340 }, { "epoch": 1.55, "grad_norm": 1.0430588722229004, "learning_rate": 8.459638964577657e-05, "loss": 1.0292, "step": 27350 }, { "epoch": 1.55, "grad_norm": 1.0051699876785278, "learning_rate": 8.459071298819255e-05, "loss": 1.0399, "step": 27360 }, { "epoch": 1.55, "grad_norm": 1.1102490425109863, "learning_rate": 8.458503633060854e-05, "loss": 1.0267, "step": 27370 }, { "epoch": 1.55, "grad_norm": 1.1183550357818604, "learning_rate": 8.457935967302452e-05, "loss": 1.0383, "step": 27380 }, { "epoch": 1.55, "grad_norm": 1.0606646537780762, "learning_rate": 8.45736830154405e-05, "loss": 1.0285, "step": 27390 }, { "epoch": 1.55, "grad_norm": 0.9955136775970459, "learning_rate": 8.45680063578565e-05, "loss": 1.0426, "step": 27400 }, { "epoch": 1.55, "grad_norm": 1.047567367553711, "learning_rate": 8.456232970027248e-05, "loss": 1.0246, "step": 27410 }, { "epoch": 1.55, "grad_norm": 1.027348279953003, "learning_rate": 8.455665304268847e-05, "loss": 1.0469, "step": 27420 }, { "epoch": 1.56, "grad_norm": 1.013365626335144, "learning_rate": 8.455097638510445e-05, "loss": 1.0284, "step": 27430 }, { "epoch": 1.56, "grad_norm": 1.1212509870529175, "learning_rate": 8.454529972752044e-05, "loss": 0.9964, "step": 27440 }, { "epoch": 1.56, "grad_norm": 1.112927794456482, "learning_rate": 8.453962306993642e-05, "loss": 1.0311, "step": 27450 }, { "epoch": 1.56, "grad_norm": 1.1230723857879639, "learning_rate": 8.453394641235242e-05, "loss": 1.0134, "step": 27460 }, { "epoch": 1.56, "grad_norm": 1.0603512525558472, "learning_rate": 8.45282697547684e-05, "loss": 1.0587, "step": 27470 }, { "epoch": 1.56, "grad_norm": 1.0934616327285767, "learning_rate": 8.452259309718438e-05, "loss": 1.0478, "step": 27480 }, { "epoch": 1.56, "grad_norm": 1.0898276567459106, "learning_rate": 8.451691643960037e-05, "loss": 1.053, "step": 27490 }, { "epoch": 1.56, "grad_norm": 1.037590503692627, "learning_rate": 8.451123978201635e-05, "loss": 1.0149, "step": 27500 }, { "epoch": 1.56, "grad_norm": 1.0819743871688843, "learning_rate": 8.450556312443234e-05, "loss": 1.0156, "step": 27510 }, { "epoch": 1.56, "grad_norm": 1.09792959690094, "learning_rate": 8.449988646684832e-05, "loss": 1.0405, "step": 27520 }, { "epoch": 1.56, "grad_norm": 1.1121187210083008, "learning_rate": 8.449420980926432e-05, "loss": 1.0275, "step": 27530 }, { "epoch": 1.56, "grad_norm": 1.0870826244354248, "learning_rate": 8.44885331516803e-05, "loss": 1.0478, "step": 27540 }, { "epoch": 1.56, "grad_norm": 1.0661145448684692, "learning_rate": 8.448285649409629e-05, "loss": 1.002, "step": 27550 }, { "epoch": 1.56, "grad_norm": 1.0845766067504883, "learning_rate": 8.447717983651227e-05, "loss": 1.0438, "step": 27560 }, { "epoch": 1.56, "grad_norm": 1.148488998413086, "learning_rate": 8.447150317892825e-05, "loss": 1.029, "step": 27570 }, { "epoch": 1.56, "grad_norm": 1.0939689874649048, "learning_rate": 8.446582652134424e-05, "loss": 1.0101, "step": 27580 }, { "epoch": 1.56, "grad_norm": 1.135745882987976, "learning_rate": 8.446014986376022e-05, "loss": 1.0208, "step": 27590 }, { "epoch": 1.56, "grad_norm": 1.156302809715271, "learning_rate": 8.44544732061762e-05, "loss": 1.0262, "step": 27600 }, { "epoch": 1.57, "grad_norm": 1.0954949855804443, "learning_rate": 8.44487965485922e-05, "loss": 1.0369, "step": 27610 }, { "epoch": 1.57, "grad_norm": 1.090406894683838, "learning_rate": 8.444311989100818e-05, "loss": 1.0605, "step": 27620 }, { "epoch": 1.57, "grad_norm": 1.068727731704712, "learning_rate": 8.443744323342416e-05, "loss": 1.0297, "step": 27630 }, { "epoch": 1.57, "grad_norm": 1.0926753282546997, "learning_rate": 8.443176657584015e-05, "loss": 1.0556, "step": 27640 }, { "epoch": 1.57, "grad_norm": 1.0868667364120483, "learning_rate": 8.442608991825613e-05, "loss": 1.0462, "step": 27650 }, { "epoch": 1.57, "grad_norm": 1.081336259841919, "learning_rate": 8.442041326067211e-05, "loss": 1.0292, "step": 27660 }, { "epoch": 1.57, "grad_norm": 1.132210373878479, "learning_rate": 8.44147366030881e-05, "loss": 1.0264, "step": 27670 }, { "epoch": 1.57, "grad_norm": 1.079209327697754, "learning_rate": 8.440905994550408e-05, "loss": 1.0324, "step": 27680 }, { "epoch": 1.57, "grad_norm": 1.0666086673736572, "learning_rate": 8.440338328792008e-05, "loss": 1.0249, "step": 27690 }, { "epoch": 1.57, "grad_norm": 1.1295536756515503, "learning_rate": 8.439770663033606e-05, "loss": 1.0141, "step": 27700 }, { "epoch": 1.57, "grad_norm": 1.1481215953826904, "learning_rate": 8.439202997275205e-05, "loss": 1.0457, "step": 27710 }, { "epoch": 1.57, "grad_norm": 1.1097265481948853, "learning_rate": 8.438635331516803e-05, "loss": 1.0432, "step": 27720 }, { "epoch": 1.57, "grad_norm": 1.0740796327590942, "learning_rate": 8.438067665758402e-05, "loss": 1.0394, "step": 27730 }, { "epoch": 1.57, "grad_norm": 1.060360312461853, "learning_rate": 8.4375e-05, "loss": 1.046, "step": 27740 }, { "epoch": 1.57, "grad_norm": 1.0544153451919556, "learning_rate": 8.436932334241598e-05, "loss": 1.0279, "step": 27750 }, { "epoch": 1.57, "grad_norm": 1.088105320930481, "learning_rate": 8.436364668483198e-05, "loss": 1.0133, "step": 27760 }, { "epoch": 1.57, "grad_norm": 1.0419126749038696, "learning_rate": 8.435797002724796e-05, "loss": 1.0158, "step": 27770 }, { "epoch": 1.58, "grad_norm": 1.111018419265747, "learning_rate": 8.435229336966395e-05, "loss": 0.9958, "step": 27780 }, { "epoch": 1.58, "grad_norm": 1.1183429956436157, "learning_rate": 8.434661671207993e-05, "loss": 1.0118, "step": 27790 }, { "epoch": 1.58, "grad_norm": 1.0963131189346313, "learning_rate": 8.434094005449592e-05, "loss": 1.0216, "step": 27800 }, { "epoch": 1.58, "grad_norm": 1.1235411167144775, "learning_rate": 8.43352633969119e-05, "loss": 1.0224, "step": 27810 }, { "epoch": 1.58, "grad_norm": 1.1575918197631836, "learning_rate": 8.43295867393279e-05, "loss": 1.0544, "step": 27820 }, { "epoch": 1.58, "grad_norm": 1.1289623975753784, "learning_rate": 8.432391008174388e-05, "loss": 1.0181, "step": 27830 }, { "epoch": 1.58, "grad_norm": 0.9776872396469116, "learning_rate": 8.431823342415986e-05, "loss": 1.0216, "step": 27840 }, { "epoch": 1.58, "grad_norm": 1.1232506036758423, "learning_rate": 8.431255676657585e-05, "loss": 1.0339, "step": 27850 }, { "epoch": 1.58, "grad_norm": 1.0282658338546753, "learning_rate": 8.430688010899183e-05, "loss": 1.0299, "step": 27860 }, { "epoch": 1.58, "grad_norm": 1.0380496978759766, "learning_rate": 8.430120345140781e-05, "loss": 1.0129, "step": 27870 }, { "epoch": 1.58, "grad_norm": 1.084304928779602, "learning_rate": 8.429552679382379e-05, "loss": 1.057, "step": 27880 }, { "epoch": 1.58, "grad_norm": 1.0899438858032227, "learning_rate": 8.428985013623979e-05, "loss": 1.019, "step": 27890 }, { "epoch": 1.58, "grad_norm": 1.0493764877319336, "learning_rate": 8.428417347865577e-05, "loss": 1.0489, "step": 27900 }, { "epoch": 1.58, "grad_norm": 1.0809894800186157, "learning_rate": 8.427849682107176e-05, "loss": 1.0327, "step": 27910 }, { "epoch": 1.58, "grad_norm": 1.0016474723815918, "learning_rate": 8.427282016348774e-05, "loss": 1.0333, "step": 27920 }, { "epoch": 1.58, "grad_norm": 1.0859230756759644, "learning_rate": 8.426714350590372e-05, "loss": 1.0243, "step": 27930 }, { "epoch": 1.58, "grad_norm": 1.0746361017227173, "learning_rate": 8.426146684831971e-05, "loss": 1.019, "step": 27940 }, { "epoch": 1.58, "grad_norm": 1.0459810495376587, "learning_rate": 8.425579019073569e-05, "loss": 1.027, "step": 27950 }, { "epoch": 1.59, "grad_norm": 1.1703987121582031, "learning_rate": 8.425011353315169e-05, "loss": 1.0246, "step": 27960 }, { "epoch": 1.59, "grad_norm": 1.0546724796295166, "learning_rate": 8.424443687556767e-05, "loss": 1.0102, "step": 27970 }, { "epoch": 1.59, "grad_norm": 1.1224933862686157, "learning_rate": 8.423876021798366e-05, "loss": 1.0391, "step": 27980 }, { "epoch": 1.59, "grad_norm": 1.1186931133270264, "learning_rate": 8.423308356039964e-05, "loss": 1.0098, "step": 27990 }, { "epoch": 1.59, "grad_norm": 1.0564823150634766, "learning_rate": 8.422740690281563e-05, "loss": 1.0449, "step": 28000 }, { "epoch": 1.59, "grad_norm": 1.0804423093795776, "learning_rate": 8.422173024523161e-05, "loss": 1.0164, "step": 28010 }, { "epoch": 1.59, "grad_norm": 1.1685253381729126, "learning_rate": 8.421605358764759e-05, "loss": 1.0144, "step": 28020 }, { "epoch": 1.59, "grad_norm": 1.0898975133895874, "learning_rate": 8.421037693006359e-05, "loss": 1.0343, "step": 28030 }, { "epoch": 1.59, "grad_norm": 1.0865752696990967, "learning_rate": 8.420470027247957e-05, "loss": 1.0302, "step": 28040 }, { "epoch": 1.59, "grad_norm": 1.0918776988983154, "learning_rate": 8.419902361489556e-05, "loss": 1.0181, "step": 28050 }, { "epoch": 1.59, "grad_norm": 1.0837897062301636, "learning_rate": 8.419334695731154e-05, "loss": 1.0434, "step": 28060 }, { "epoch": 1.59, "grad_norm": 1.0367114543914795, "learning_rate": 8.418767029972753e-05, "loss": 1.0296, "step": 28070 }, { "epoch": 1.59, "grad_norm": 1.0577870607376099, "learning_rate": 8.418199364214351e-05, "loss": 1.0214, "step": 28080 }, { "epoch": 1.59, "grad_norm": 1.10112726688385, "learning_rate": 8.41763169845595e-05, "loss": 1.03, "step": 28090 }, { "epoch": 1.59, "grad_norm": 1.110358715057373, "learning_rate": 8.417064032697549e-05, "loss": 1.003, "step": 28100 }, { "epoch": 1.59, "grad_norm": 1.1703184843063354, "learning_rate": 8.416496366939147e-05, "loss": 1.0433, "step": 28110 }, { "epoch": 1.59, "grad_norm": 1.0849319696426392, "learning_rate": 8.415928701180745e-05, "loss": 1.0301, "step": 28120 }, { "epoch": 1.6, "grad_norm": 1.0127675533294678, "learning_rate": 8.415361035422344e-05, "loss": 1.0579, "step": 28130 }, { "epoch": 1.6, "grad_norm": 1.0895578861236572, "learning_rate": 8.414793369663942e-05, "loss": 1.0091, "step": 28140 }, { "epoch": 1.6, "grad_norm": 1.0587319135665894, "learning_rate": 8.41422570390554e-05, "loss": 1.0402, "step": 28150 }, { "epoch": 1.6, "grad_norm": 1.09025239944458, "learning_rate": 8.41365803814714e-05, "loss": 1.0164, "step": 28160 }, { "epoch": 1.6, "grad_norm": 1.148901104927063, "learning_rate": 8.413090372388737e-05, "loss": 0.9997, "step": 28170 }, { "epoch": 1.6, "grad_norm": 1.0996648073196411, "learning_rate": 8.412522706630337e-05, "loss": 1.0096, "step": 28180 }, { "epoch": 1.6, "grad_norm": 1.137698769569397, "learning_rate": 8.411955040871935e-05, "loss": 1.0381, "step": 28190 }, { "epoch": 1.6, "grad_norm": 1.0747947692871094, "learning_rate": 8.411387375113533e-05, "loss": 1.0367, "step": 28200 }, { "epoch": 1.6, "grad_norm": 1.072318196296692, "learning_rate": 8.410819709355132e-05, "loss": 1.0297, "step": 28210 }, { "epoch": 1.6, "grad_norm": 1.1393135786056519, "learning_rate": 8.41025204359673e-05, "loss": 1.0483, "step": 28220 }, { "epoch": 1.6, "grad_norm": 1.0921533107757568, "learning_rate": 8.40968437783833e-05, "loss": 1.0185, "step": 28230 }, { "epoch": 1.6, "grad_norm": 1.1180837154388428, "learning_rate": 8.409116712079927e-05, "loss": 1.0247, "step": 28240 }, { "epoch": 1.6, "grad_norm": 1.1501946449279785, "learning_rate": 8.408549046321527e-05, "loss": 1.0139, "step": 28250 }, { "epoch": 1.6, "grad_norm": 1.045985221862793, "learning_rate": 8.407981380563125e-05, "loss": 1.0331, "step": 28260 }, { "epoch": 1.6, "grad_norm": 1.008318543434143, "learning_rate": 8.407413714804724e-05, "loss": 1.07, "step": 28270 }, { "epoch": 1.6, "grad_norm": 1.048492193222046, "learning_rate": 8.406846049046322e-05, "loss": 1.0218, "step": 28280 }, { "epoch": 1.6, "grad_norm": 1.1289730072021484, "learning_rate": 8.40627838328792e-05, "loss": 1.024, "step": 28290 }, { "epoch": 1.6, "grad_norm": 1.0779582262039185, "learning_rate": 8.40571071752952e-05, "loss": 1.0259, "step": 28300 }, { "epoch": 1.61, "grad_norm": 1.0346300601959229, "learning_rate": 8.405143051771117e-05, "loss": 1.0664, "step": 28310 }, { "epoch": 1.61, "grad_norm": 1.0850094556808472, "learning_rate": 8.404575386012717e-05, "loss": 1.0358, "step": 28320 }, { "epoch": 1.61, "grad_norm": 1.069287657737732, "learning_rate": 8.404007720254315e-05, "loss": 1.0268, "step": 28330 }, { "epoch": 1.61, "grad_norm": 1.0539522171020508, "learning_rate": 8.403440054495914e-05, "loss": 1.036, "step": 28340 }, { "epoch": 1.61, "grad_norm": 1.1005871295928955, "learning_rate": 8.402872388737512e-05, "loss": 1.0457, "step": 28350 }, { "epoch": 1.61, "grad_norm": 1.078933596611023, "learning_rate": 8.40230472297911e-05, "loss": 1.033, "step": 28360 }, { "epoch": 1.61, "grad_norm": 1.0893824100494385, "learning_rate": 8.40173705722071e-05, "loss": 1.0227, "step": 28370 }, { "epoch": 1.61, "grad_norm": 1.0544462203979492, "learning_rate": 8.401169391462307e-05, "loss": 1.0103, "step": 28380 }, { "epoch": 1.61, "grad_norm": 1.1343989372253418, "learning_rate": 8.400601725703905e-05, "loss": 1.0108, "step": 28390 }, { "epoch": 1.61, "grad_norm": 1.1393245458602905, "learning_rate": 8.400034059945503e-05, "loss": 1.0279, "step": 28400 }, { "epoch": 1.61, "grad_norm": 1.076672911643982, "learning_rate": 8.399466394187103e-05, "loss": 1.0194, "step": 28410 }, { "epoch": 1.61, "grad_norm": 1.0617698431015015, "learning_rate": 8.398898728428701e-05, "loss": 1.0241, "step": 28420 }, { "epoch": 1.61, "grad_norm": 1.194355845451355, "learning_rate": 8.3983310626703e-05, "loss": 1.0436, "step": 28430 }, { "epoch": 1.61, "grad_norm": 1.0682562589645386, "learning_rate": 8.397763396911898e-05, "loss": 1.0452, "step": 28440 }, { "epoch": 1.61, "grad_norm": 1.107323408126831, "learning_rate": 8.397195731153497e-05, "loss": 1.029, "step": 28450 }, { "epoch": 1.61, "grad_norm": 1.085408329963684, "learning_rate": 8.396628065395095e-05, "loss": 1.0105, "step": 28460 }, { "epoch": 1.61, "grad_norm": 1.1003516912460327, "learning_rate": 8.396060399636693e-05, "loss": 1.0313, "step": 28470 }, { "epoch": 1.61, "grad_norm": 1.0311685800552368, "learning_rate": 8.395492733878293e-05, "loss": 1.0034, "step": 28480 }, { "epoch": 1.62, "grad_norm": 1.092213749885559, "learning_rate": 8.394925068119891e-05, "loss": 1.0225, "step": 28490 }, { "epoch": 1.62, "grad_norm": 1.0382686853408813, "learning_rate": 8.39435740236149e-05, "loss": 1.0314, "step": 28500 }, { "epoch": 1.62, "grad_norm": 1.0710628032684326, "learning_rate": 8.393789736603088e-05, "loss": 0.99, "step": 28510 }, { "epoch": 1.62, "grad_norm": 1.0550529956817627, "learning_rate": 8.393222070844688e-05, "loss": 1.0308, "step": 28520 }, { "epoch": 1.62, "grad_norm": 1.0835072994232178, "learning_rate": 8.392654405086286e-05, "loss": 1.0218, "step": 28530 }, { "epoch": 1.62, "grad_norm": 1.1051599979400635, "learning_rate": 8.392086739327885e-05, "loss": 1.016, "step": 28540 }, { "epoch": 1.62, "grad_norm": 1.0684447288513184, "learning_rate": 8.391519073569483e-05, "loss": 1.0044, "step": 28550 }, { "epoch": 1.62, "grad_norm": 1.18722665309906, "learning_rate": 8.390951407811081e-05, "loss": 1.0356, "step": 28560 }, { "epoch": 1.62, "grad_norm": 1.0854851007461548, "learning_rate": 8.39038374205268e-05, "loss": 1.0183, "step": 28570 }, { "epoch": 1.62, "grad_norm": 1.0427144765853882, "learning_rate": 8.389816076294278e-05, "loss": 1.0338, "step": 28580 }, { "epoch": 1.62, "grad_norm": 1.0765329599380493, "learning_rate": 8.389248410535878e-05, "loss": 1.0243, "step": 28590 }, { "epoch": 1.62, "grad_norm": 1.1278742551803589, "learning_rate": 8.388680744777476e-05, "loss": 1.0124, "step": 28600 }, { "epoch": 1.62, "grad_norm": 1.0679434537887573, "learning_rate": 8.388113079019075e-05, "loss": 0.9995, "step": 28610 }, { "epoch": 1.62, "grad_norm": 1.1618252992630005, "learning_rate": 8.387545413260673e-05, "loss": 1.0317, "step": 28620 }, { "epoch": 1.62, "grad_norm": 1.0903652906417847, "learning_rate": 8.386977747502271e-05, "loss": 1.0443, "step": 28630 }, { "epoch": 1.62, "grad_norm": 1.0595765113830566, "learning_rate": 8.386410081743869e-05, "loss": 1.015, "step": 28640 }, { "epoch": 1.62, "grad_norm": 1.1033047437667847, "learning_rate": 8.385842415985468e-05, "loss": 1.0332, "step": 28650 }, { "epoch": 1.63, "grad_norm": 1.070757269859314, "learning_rate": 8.385274750227066e-05, "loss": 1.0053, "step": 28660 }, { "epoch": 1.63, "grad_norm": 1.1463623046875, "learning_rate": 8.384707084468664e-05, "loss": 1.0163, "step": 28670 }, { "epoch": 1.63, "grad_norm": 1.1187554597854614, "learning_rate": 8.384139418710264e-05, "loss": 1.0419, "step": 28680 }, { "epoch": 1.63, "grad_norm": 1.1095792055130005, "learning_rate": 8.383571752951862e-05, "loss": 1.0158, "step": 28690 }, { "epoch": 1.63, "grad_norm": 1.1141018867492676, "learning_rate": 8.383004087193461e-05, "loss": 1.0205, "step": 28700 }, { "epoch": 1.63, "grad_norm": 1.076541543006897, "learning_rate": 8.382436421435059e-05, "loss": 1.034, "step": 28710 }, { "epoch": 1.63, "grad_norm": 1.0752462148666382, "learning_rate": 8.381868755676658e-05, "loss": 1.0392, "step": 28720 }, { "epoch": 1.63, "grad_norm": 1.0950939655303955, "learning_rate": 8.381301089918256e-05, "loss": 1.0168, "step": 28730 }, { "epoch": 1.63, "grad_norm": 1.1034865379333496, "learning_rate": 8.380733424159856e-05, "loss": 0.9995, "step": 28740 }, { "epoch": 1.63, "grad_norm": 1.0547419786453247, "learning_rate": 8.380165758401454e-05, "loss": 1.0389, "step": 28750 }, { "epoch": 1.63, "grad_norm": 1.0090250968933105, "learning_rate": 8.379598092643052e-05, "loss": 1.022, "step": 28760 }, { "epoch": 1.63, "grad_norm": 1.0899120569229126, "learning_rate": 8.379030426884651e-05, "loss": 1.039, "step": 28770 }, { "epoch": 1.63, "grad_norm": 1.103974461555481, "learning_rate": 8.378462761126249e-05, "loss": 1.019, "step": 28780 }, { "epoch": 1.63, "grad_norm": 1.090895175933838, "learning_rate": 8.377895095367848e-05, "loss": 1.0438, "step": 28790 }, { "epoch": 1.63, "grad_norm": 1.0462384223937988, "learning_rate": 8.377327429609446e-05, "loss": 1.0398, "step": 28800 }, { "epoch": 1.63, "grad_norm": 1.1191879510879517, "learning_rate": 8.376759763851046e-05, "loss": 1.019, "step": 28810 }, { "epoch": 1.63, "grad_norm": 1.177117109298706, "learning_rate": 8.376192098092644e-05, "loss": 1.0152, "step": 28820 }, { "epoch": 1.63, "grad_norm": 1.0619508028030396, "learning_rate": 8.375624432334243e-05, "loss": 1.0196, "step": 28830 }, { "epoch": 1.64, "grad_norm": 1.1267673969268799, "learning_rate": 8.375056766575841e-05, "loss": 1.011, "step": 28840 }, { "epoch": 1.64, "grad_norm": 1.0598678588867188, "learning_rate": 8.374489100817439e-05, "loss": 1.039, "step": 28850 }, { "epoch": 1.64, "grad_norm": 1.0668635368347168, "learning_rate": 8.373921435059038e-05, "loss": 1.0381, "step": 28860 }, { "epoch": 1.64, "grad_norm": 1.0381115674972534, "learning_rate": 8.373353769300636e-05, "loss": 0.9875, "step": 28870 }, { "epoch": 1.64, "grad_norm": 1.0314429998397827, "learning_rate": 8.372786103542234e-05, "loss": 1.0437, "step": 28880 }, { "epoch": 1.64, "grad_norm": 1.096281886100769, "learning_rate": 8.372218437783834e-05, "loss": 0.9931, "step": 28890 }, { "epoch": 1.64, "grad_norm": 1.1173741817474365, "learning_rate": 8.371650772025432e-05, "loss": 1.015, "step": 28900 }, { "epoch": 1.64, "grad_norm": 1.0777473449707031, "learning_rate": 8.37108310626703e-05, "loss": 1.0008, "step": 28910 }, { "epoch": 1.64, "grad_norm": 1.1195136308670044, "learning_rate": 8.370515440508629e-05, "loss": 0.9991, "step": 28920 }, { "epoch": 1.64, "grad_norm": 1.0451414585113525, "learning_rate": 8.369947774750227e-05, "loss": 1.0229, "step": 28930 }, { "epoch": 1.64, "grad_norm": 1.1471974849700928, "learning_rate": 8.369380108991825e-05, "loss": 1.0197, "step": 28940 }, { "epoch": 1.64, "grad_norm": 1.0669705867767334, "learning_rate": 8.368812443233424e-05, "loss": 1.0298, "step": 28950 }, { "epoch": 1.64, "grad_norm": 1.0715781450271606, "learning_rate": 8.368244777475022e-05, "loss": 1.0344, "step": 28960 }, { "epoch": 1.64, "grad_norm": 1.0268189907073975, "learning_rate": 8.367677111716622e-05, "loss": 1.027, "step": 28970 }, { "epoch": 1.64, "grad_norm": 1.0655059814453125, "learning_rate": 8.36710944595822e-05, "loss": 1.0062, "step": 28980 }, { "epoch": 1.64, "grad_norm": 1.1433197259902954, "learning_rate": 8.366541780199819e-05, "loss": 1.0066, "step": 28990 }, { "epoch": 1.64, "grad_norm": 1.1516512632369995, "learning_rate": 8.365974114441417e-05, "loss": 1.0433, "step": 29000 }, { "epoch": 1.64, "grad_norm": 1.0932490825653076, "learning_rate": 8.365406448683016e-05, "loss": 1.0527, "step": 29010 }, { "epoch": 1.65, "grad_norm": 1.0887700319290161, "learning_rate": 8.364838782924614e-05, "loss": 1.0207, "step": 29020 }, { "epoch": 1.65, "grad_norm": 1.0588399171829224, "learning_rate": 8.364271117166212e-05, "loss": 1.0167, "step": 29030 }, { "epoch": 1.65, "grad_norm": 1.0723159313201904, "learning_rate": 8.363703451407812e-05, "loss": 1.0131, "step": 29040 }, { "epoch": 1.65, "grad_norm": 1.071089744567871, "learning_rate": 8.36313578564941e-05, "loss": 1.0105, "step": 29050 }, { "epoch": 1.65, "grad_norm": 1.0450924634933472, "learning_rate": 8.362568119891009e-05, "loss": 1.0388, "step": 29060 }, { "epoch": 1.65, "grad_norm": 1.054674744606018, "learning_rate": 8.362000454132607e-05, "loss": 1.0019, "step": 29070 }, { "epoch": 1.65, "grad_norm": 1.0973517894744873, "learning_rate": 8.361432788374206e-05, "loss": 1.0278, "step": 29080 }, { "epoch": 1.65, "grad_norm": 1.1468178033828735, "learning_rate": 8.360865122615804e-05, "loss": 1.04, "step": 29090 }, { "epoch": 1.65, "grad_norm": 1.0658283233642578, "learning_rate": 8.360297456857404e-05, "loss": 1.0248, "step": 29100 }, { "epoch": 1.65, "grad_norm": 1.0205363035202026, "learning_rate": 8.359729791099002e-05, "loss": 1.0238, "step": 29110 }, { "epoch": 1.65, "grad_norm": 1.1196805238723755, "learning_rate": 8.3591621253406e-05, "loss": 1.0201, "step": 29120 }, { "epoch": 1.65, "grad_norm": 1.1259863376617432, "learning_rate": 8.358594459582199e-05, "loss": 1.0374, "step": 29130 }, { "epoch": 1.65, "grad_norm": 1.073534607887268, "learning_rate": 8.358026793823797e-05, "loss": 1.0009, "step": 29140 }, { "epoch": 1.65, "grad_norm": 1.1223376989364624, "learning_rate": 8.357459128065395e-05, "loss": 0.9983, "step": 29150 }, { "epoch": 1.65, "grad_norm": 1.101183295249939, "learning_rate": 8.356891462306993e-05, "loss": 1.0062, "step": 29160 }, { "epoch": 1.65, "grad_norm": 1.0215178728103638, "learning_rate": 8.356323796548593e-05, "loss": 1.0199, "step": 29170 }, { "epoch": 1.65, "grad_norm": 1.1100609302520752, "learning_rate": 8.35575613079019e-05, "loss": 1.0144, "step": 29180 }, { "epoch": 1.66, "grad_norm": 1.1171693801879883, "learning_rate": 8.35518846503179e-05, "loss": 1.0199, "step": 29190 }, { "epoch": 1.66, "grad_norm": 1.1023083925247192, "learning_rate": 8.354620799273388e-05, "loss": 0.9765, "step": 29200 }, { "epoch": 1.66, "grad_norm": 1.0380134582519531, "learning_rate": 8.354053133514986e-05, "loss": 1.0152, "step": 29210 }, { "epoch": 1.66, "grad_norm": 1.124502420425415, "learning_rate": 8.353485467756585e-05, "loss": 1.02, "step": 29220 }, { "epoch": 1.66, "grad_norm": 1.0958794355392456, "learning_rate": 8.352917801998183e-05, "loss": 1.0182, "step": 29230 }, { "epoch": 1.66, "grad_norm": 1.0814547538757324, "learning_rate": 8.352350136239783e-05, "loss": 1.0197, "step": 29240 }, { "epoch": 1.66, "grad_norm": 1.169853687286377, "learning_rate": 8.35178247048138e-05, "loss": 1.0055, "step": 29250 }, { "epoch": 1.66, "grad_norm": 1.1481910943984985, "learning_rate": 8.35121480472298e-05, "loss": 1.0014, "step": 29260 }, { "epoch": 1.66, "grad_norm": 1.1517927646636963, "learning_rate": 8.350647138964578e-05, "loss": 1.008, "step": 29270 }, { "epoch": 1.66, "grad_norm": 1.0996443033218384, "learning_rate": 8.350079473206177e-05, "loss": 1.0254, "step": 29280 }, { "epoch": 1.66, "grad_norm": 1.0617769956588745, "learning_rate": 8.349511807447775e-05, "loss": 1.0099, "step": 29290 }, { "epoch": 1.66, "grad_norm": 1.1103917360305786, "learning_rate": 8.348944141689373e-05, "loss": 0.9938, "step": 29300 }, { "epoch": 1.66, "grad_norm": 1.0421613454818726, "learning_rate": 8.348376475930973e-05, "loss": 1.0073, "step": 29310 }, { "epoch": 1.66, "grad_norm": 1.119416356086731, "learning_rate": 8.34780881017257e-05, "loss": 1.0159, "step": 29320 }, { "epoch": 1.66, "grad_norm": 1.0748963356018066, "learning_rate": 8.34724114441417e-05, "loss": 1.0377, "step": 29330 }, { "epoch": 1.66, "grad_norm": 1.091825008392334, "learning_rate": 8.346673478655768e-05, "loss": 1.0135, "step": 29340 }, { "epoch": 1.66, "grad_norm": 1.1980068683624268, "learning_rate": 8.346105812897367e-05, "loss": 1.0295, "step": 29350 }, { "epoch": 1.66, "grad_norm": 1.1299015283584595, "learning_rate": 8.345538147138965e-05, "loss": 1.0325, "step": 29360 }, { "epoch": 1.67, "grad_norm": 1.040940523147583, "learning_rate": 8.344970481380565e-05, "loss": 1.0317, "step": 29370 }, { "epoch": 1.67, "grad_norm": 1.144249677658081, "learning_rate": 8.344402815622163e-05, "loss": 1.0241, "step": 29380 }, { "epoch": 1.67, "grad_norm": 1.1180146932601929, "learning_rate": 8.34383514986376e-05, "loss": 1.0365, "step": 29390 }, { "epoch": 1.67, "grad_norm": 1.072290301322937, "learning_rate": 8.343267484105359e-05, "loss": 1.0283, "step": 29400 }, { "epoch": 1.67, "grad_norm": 1.116302728652954, "learning_rate": 8.342699818346958e-05, "loss": 1.03, "step": 29410 }, { "epoch": 1.67, "grad_norm": 1.0909156799316406, "learning_rate": 8.342132152588556e-05, "loss": 1.0166, "step": 29420 }, { "epoch": 1.67, "grad_norm": 1.1233201026916504, "learning_rate": 8.341564486830154e-05, "loss": 1.0178, "step": 29430 }, { "epoch": 1.67, "grad_norm": 1.2372362613677979, "learning_rate": 8.340996821071753e-05, "loss": 0.9968, "step": 29440 }, { "epoch": 1.67, "grad_norm": 1.1341452598571777, "learning_rate": 8.340429155313351e-05, "loss": 1.0243, "step": 29450 }, { "epoch": 1.67, "grad_norm": 1.0524852275848389, "learning_rate": 8.33986148955495e-05, "loss": 1.0539, "step": 29460 }, { "epoch": 1.67, "grad_norm": 1.0285815000534058, "learning_rate": 8.339293823796549e-05, "loss": 1.0308, "step": 29470 }, { "epoch": 1.67, "grad_norm": 1.091331124305725, "learning_rate": 8.338726158038147e-05, "loss": 1.0025, "step": 29480 }, { "epoch": 1.67, "grad_norm": 1.081467866897583, "learning_rate": 8.338158492279746e-05, "loss": 1.0523, "step": 29490 }, { "epoch": 1.67, "grad_norm": 1.111905574798584, "learning_rate": 8.337590826521344e-05, "loss": 1.0276, "step": 29500 }, { "epoch": 1.67, "grad_norm": 1.0808148384094238, "learning_rate": 8.337023160762943e-05, "loss": 1.0215, "step": 29510 }, { "epoch": 1.67, "grad_norm": 1.0462448596954346, "learning_rate": 8.336455495004541e-05, "loss": 1.0645, "step": 29520 }, { "epoch": 1.67, "grad_norm": 1.0299468040466309, "learning_rate": 8.335887829246141e-05, "loss": 1.0137, "step": 29530 }, { "epoch": 1.67, "grad_norm": 1.14873468875885, "learning_rate": 8.335320163487739e-05, "loss": 1.0134, "step": 29540 }, { "epoch": 1.68, "grad_norm": 1.1145169734954834, "learning_rate": 8.334752497729338e-05, "loss": 1.0112, "step": 29550 }, { "epoch": 1.68, "grad_norm": 1.0989068746566772, "learning_rate": 8.334184831970936e-05, "loss": 1.0354, "step": 29560 }, { "epoch": 1.68, "grad_norm": 1.0893616676330566, "learning_rate": 8.333617166212534e-05, "loss": 1.0176, "step": 29570 }, { "epoch": 1.68, "grad_norm": 1.0706515312194824, "learning_rate": 8.333049500454133e-05, "loss": 1.0251, "step": 29580 }, { "epoch": 1.68, "grad_norm": 1.1121983528137207, "learning_rate": 8.332481834695731e-05, "loss": 1.0254, "step": 29590 }, { "epoch": 1.68, "grad_norm": 1.0549662113189697, "learning_rate": 8.331914168937331e-05, "loss": 1.0259, "step": 29600 }, { "epoch": 1.68, "grad_norm": 1.0596963167190552, "learning_rate": 8.331346503178929e-05, "loss": 1.0071, "step": 29610 }, { "epoch": 1.68, "grad_norm": 1.1111539602279663, "learning_rate": 8.330778837420528e-05, "loss": 1.027, "step": 29620 }, { "epoch": 1.68, "grad_norm": 1.065026879310608, "learning_rate": 8.330211171662126e-05, "loss": 1.0338, "step": 29630 }, { "epoch": 1.68, "grad_norm": 1.1282345056533813, "learning_rate": 8.329643505903725e-05, "loss": 1.0421, "step": 29640 }, { "epoch": 1.68, "grad_norm": 1.105503797531128, "learning_rate": 8.329075840145323e-05, "loss": 1.003, "step": 29650 }, { "epoch": 1.68, "grad_norm": 1.123127818107605, "learning_rate": 8.328508174386921e-05, "loss": 1.0413, "step": 29660 }, { "epoch": 1.68, "grad_norm": 1.103744626045227, "learning_rate": 8.32794050862852e-05, "loss": 1.011, "step": 29670 }, { "epoch": 1.68, "grad_norm": 1.0880047082901, "learning_rate": 8.327372842870117e-05, "loss": 1.038, "step": 29680 }, { "epoch": 1.68, "grad_norm": 1.0436053276062012, "learning_rate": 8.326805177111717e-05, "loss": 1.012, "step": 29690 }, { "epoch": 1.68, "grad_norm": 1.0845444202423096, "learning_rate": 8.326237511353315e-05, "loss": 0.9955, "step": 29700 }, { "epoch": 1.68, "grad_norm": 1.0711522102355957, "learning_rate": 8.325669845594914e-05, "loss": 1.0346, "step": 29710 }, { "epoch": 1.69, "grad_norm": 1.1125798225402832, "learning_rate": 8.325102179836512e-05, "loss": 1.0475, "step": 29720 }, { "epoch": 1.69, "grad_norm": 1.100852370262146, "learning_rate": 8.324534514078111e-05, "loss": 1.0461, "step": 29730 }, { "epoch": 1.69, "grad_norm": 1.0361233949661255, "learning_rate": 8.32396684831971e-05, "loss": 1.0198, "step": 29740 }, { "epoch": 1.69, "grad_norm": 1.11830472946167, "learning_rate": 8.323399182561307e-05, "loss": 1.0191, "step": 29750 }, { "epoch": 1.69, "grad_norm": 1.0993824005126953, "learning_rate": 8.322831516802907e-05, "loss": 1.0189, "step": 29760 }, { "epoch": 1.69, "grad_norm": 1.117144227027893, "learning_rate": 8.322263851044505e-05, "loss": 1.0214, "step": 29770 }, { "epoch": 1.69, "grad_norm": 1.049229383468628, "learning_rate": 8.321696185286104e-05, "loss": 1.0127, "step": 29780 }, { "epoch": 1.69, "grad_norm": 1.1585898399353027, "learning_rate": 8.321128519527702e-05, "loss": 1.0146, "step": 29790 }, { "epoch": 1.69, "grad_norm": 1.1307878494262695, "learning_rate": 8.320560853769302e-05, "loss": 1.0203, "step": 29800 }, { "epoch": 1.69, "grad_norm": 1.0568103790283203, "learning_rate": 8.3199931880109e-05, "loss": 1.0035, "step": 29810 }, { "epoch": 1.69, "grad_norm": 1.0945969820022583, "learning_rate": 8.319425522252499e-05, "loss": 1.0287, "step": 29820 }, { "epoch": 1.69, "grad_norm": 1.065744161605835, "learning_rate": 8.318857856494097e-05, "loss": 1.053, "step": 29830 }, { "epoch": 1.69, "grad_norm": 1.027955174446106, "learning_rate": 8.318290190735695e-05, "loss": 1.0436, "step": 29840 }, { "epoch": 1.69, "grad_norm": 1.050028681755066, "learning_rate": 8.317722524977294e-05, "loss": 1.0458, "step": 29850 }, { "epoch": 1.69, "grad_norm": 1.080098271369934, "learning_rate": 8.317154859218892e-05, "loss": 1.0114, "step": 29860 }, { "epoch": 1.69, "grad_norm": 1.0238865613937378, "learning_rate": 8.316587193460492e-05, "loss": 1.0324, "step": 29870 }, { "epoch": 1.69, "grad_norm": 1.1251204013824463, "learning_rate": 8.31601952770209e-05, "loss": 1.0564, "step": 29880 }, { "epoch": 1.69, "grad_norm": 1.1141357421875, "learning_rate": 8.315451861943689e-05, "loss": 1.0012, "step": 29890 }, { "epoch": 1.7, "grad_norm": 1.0974124670028687, "learning_rate": 8.314884196185287e-05, "loss": 1.0277, "step": 29900 }, { "epoch": 1.7, "grad_norm": 1.1240901947021484, "learning_rate": 8.314316530426885e-05, "loss": 1.0205, "step": 29910 }, { "epoch": 1.7, "grad_norm": 1.1084004640579224, "learning_rate": 8.313748864668483e-05, "loss": 1.0173, "step": 29920 }, { "epoch": 1.7, "grad_norm": 1.0836172103881836, "learning_rate": 8.313181198910082e-05, "loss": 1.0075, "step": 29930 }, { "epoch": 1.7, "grad_norm": 1.1003082990646362, "learning_rate": 8.31261353315168e-05, "loss": 1.0273, "step": 29940 }, { "epoch": 1.7, "grad_norm": 1.0335389375686646, "learning_rate": 8.31210263396912e-05, "loss": 1.0035, "step": 29950 }, { "epoch": 1.7, "grad_norm": 1.0934758186340332, "learning_rate": 8.311534968210718e-05, "loss": 1.0328, "step": 29960 }, { "epoch": 1.7, "grad_norm": 1.0848125219345093, "learning_rate": 8.310967302452316e-05, "loss": 1.0445, "step": 29970 }, { "epoch": 1.7, "grad_norm": 1.1429146528244019, "learning_rate": 8.310399636693916e-05, "loss": 1.0292, "step": 29980 }, { "epoch": 1.7, "grad_norm": 1.1068086624145508, "learning_rate": 8.309831970935514e-05, "loss": 1.0202, "step": 29990 }, { "epoch": 1.7, "grad_norm": 1.1128861904144287, "learning_rate": 8.309264305177113e-05, "loss": 1.0487, "step": 30000 }, { "epoch": 1.7, "grad_norm": 1.1284259557724, "learning_rate": 8.308696639418711e-05, "loss": 1.0121, "step": 30010 }, { "epoch": 1.7, "grad_norm": 1.0923515558242798, "learning_rate": 8.308128973660309e-05, "loss": 1.0384, "step": 30020 }, { "epoch": 1.7, "grad_norm": 1.0619938373565674, "learning_rate": 8.307561307901907e-05, "loss": 1.0366, "step": 30030 }, { "epoch": 1.7, "grad_norm": 1.124550461769104, "learning_rate": 8.306993642143506e-05, "loss": 1.0308, "step": 30040 }, { "epoch": 1.7, "grad_norm": 1.0542631149291992, "learning_rate": 8.306425976385104e-05, "loss": 1.0345, "step": 30050 }, { "epoch": 1.7, "grad_norm": 1.0561270713806152, "learning_rate": 8.305858310626702e-05, "loss": 1.0216, "step": 30060 }, { "epoch": 1.71, "grad_norm": 1.006394386291504, "learning_rate": 8.305290644868302e-05, "loss": 1.0212, "step": 30070 }, { "epoch": 1.71, "grad_norm": 1.118670105934143, "learning_rate": 8.3047229791099e-05, "loss": 1.0246, "step": 30080 }, { "epoch": 1.71, "grad_norm": 1.1435573101043701, "learning_rate": 8.304155313351499e-05, "loss": 1.0149, "step": 30090 }, { "epoch": 1.71, "grad_norm": 1.1207507848739624, "learning_rate": 8.303587647593097e-05, "loss": 1.0434, "step": 30100 }, { "epoch": 1.71, "grad_norm": 1.0527719259262085, "learning_rate": 8.303019981834696e-05, "loss": 1.0097, "step": 30110 }, { "epoch": 1.71, "grad_norm": 1.1087863445281982, "learning_rate": 8.302452316076294e-05, "loss": 1.0245, "step": 30120 }, { "epoch": 1.71, "grad_norm": 1.092635154724121, "learning_rate": 8.301884650317894e-05, "loss": 1.0386, "step": 30130 }, { "epoch": 1.71, "grad_norm": 1.0808402299880981, "learning_rate": 8.301316984559492e-05, "loss": 1.024, "step": 30140 }, { "epoch": 1.71, "grad_norm": 1.1576876640319824, "learning_rate": 8.30074931880109e-05, "loss": 1.0335, "step": 30150 }, { "epoch": 1.71, "grad_norm": 1.0730072259902954, "learning_rate": 8.300181653042689e-05, "loss": 1.0101, "step": 30160 }, { "epoch": 1.71, "grad_norm": 1.0457624197006226, "learning_rate": 8.299613987284287e-05, "loss": 1.0161, "step": 30170 }, { "epoch": 1.71, "grad_norm": 1.0981661081314087, "learning_rate": 8.299046321525886e-05, "loss": 1.0072, "step": 30180 }, { "epoch": 1.71, "grad_norm": 1.1348974704742432, "learning_rate": 8.298478655767484e-05, "loss": 1.0258, "step": 30190 }, { "epoch": 1.71, "grad_norm": 1.1456252336502075, "learning_rate": 8.297910990009084e-05, "loss": 0.9827, "step": 30200 }, { "epoch": 1.71, "grad_norm": 1.1386945247650146, "learning_rate": 8.297343324250682e-05, "loss": 1.0252, "step": 30210 }, { "epoch": 1.71, "grad_norm": 1.0532327890396118, "learning_rate": 8.296775658492281e-05, "loss": 1.0363, "step": 30220 }, { "epoch": 1.71, "grad_norm": 1.0942809581756592, "learning_rate": 8.296207992733879e-05, "loss": 1.0156, "step": 30230 }, { "epoch": 1.71, "grad_norm": 1.102354645729065, "learning_rate": 8.295640326975477e-05, "loss": 1.0406, "step": 30240 }, { "epoch": 1.72, "grad_norm": 1.0735888481140137, "learning_rate": 8.295072661217076e-05, "loss": 1.008, "step": 30250 }, { "epoch": 1.72, "grad_norm": 1.0895060300827026, "learning_rate": 8.294504995458674e-05, "loss": 1.0314, "step": 30260 }, { "epoch": 1.72, "grad_norm": 1.0322163105010986, "learning_rate": 8.293937329700272e-05, "loss": 1.0034, "step": 30270 }, { "epoch": 1.72, "grad_norm": 1.0789235830307007, "learning_rate": 8.293369663941872e-05, "loss": 1.0136, "step": 30280 }, { "epoch": 1.72, "grad_norm": 1.1098060607910156, "learning_rate": 8.29280199818347e-05, "loss": 1.0135, "step": 30290 }, { "epoch": 1.72, "grad_norm": 1.0650980472564697, "learning_rate": 8.292234332425068e-05, "loss": 0.9946, "step": 30300 }, { "epoch": 1.72, "grad_norm": 1.0390585660934448, "learning_rate": 8.291666666666667e-05, "loss": 1.0236, "step": 30310 }, { "epoch": 1.72, "grad_norm": 1.063751220703125, "learning_rate": 8.291099000908265e-05, "loss": 0.9963, "step": 30320 }, { "epoch": 1.72, "grad_norm": 1.1778829097747803, "learning_rate": 8.290531335149863e-05, "loss": 1.0134, "step": 30330 }, { "epoch": 1.72, "grad_norm": 1.0738866329193115, "learning_rate": 8.289963669391463e-05, "loss": 1.0069, "step": 30340 }, { "epoch": 1.72, "grad_norm": 1.0286579132080078, "learning_rate": 8.28939600363306e-05, "loss": 1.0208, "step": 30350 }, { "epoch": 1.72, "grad_norm": 1.081158995628357, "learning_rate": 8.28882833787466e-05, "loss": 1.0329, "step": 30360 }, { "epoch": 1.72, "grad_norm": 1.0759130716323853, "learning_rate": 8.288260672116258e-05, "loss": 1.0311, "step": 30370 }, { "epoch": 1.72, "grad_norm": 1.0990709066390991, "learning_rate": 8.287693006357857e-05, "loss": 1.049, "step": 30380 }, { "epoch": 1.72, "grad_norm": 1.0538569688796997, "learning_rate": 8.287125340599455e-05, "loss": 1.0169, "step": 30390 }, { "epoch": 1.72, "grad_norm": 1.0610321760177612, "learning_rate": 8.286557674841055e-05, "loss": 1.0306, "step": 30400 }, { "epoch": 1.72, "grad_norm": 1.0753628015518188, "learning_rate": 8.285990009082653e-05, "loss": 1.0316, "step": 30410 }, { "epoch": 1.72, "grad_norm": 1.0665677785873413, "learning_rate": 8.28542234332425e-05, "loss": 1.0104, "step": 30420 }, { "epoch": 1.73, "grad_norm": 1.1057401895523071, "learning_rate": 8.28485467756585e-05, "loss": 1.0003, "step": 30430 }, { "epoch": 1.73, "grad_norm": 1.1098501682281494, "learning_rate": 8.284287011807448e-05, "loss": 1.0074, "step": 30440 }, { "epoch": 1.73, "grad_norm": 1.080500841140747, "learning_rate": 8.283719346049047e-05, "loss": 1.0209, "step": 30450 }, { "epoch": 1.73, "grad_norm": 1.050067663192749, "learning_rate": 8.283151680290645e-05, "loss": 1.0274, "step": 30460 }, { "epoch": 1.73, "grad_norm": 1.1447416543960571, "learning_rate": 8.282584014532245e-05, "loss": 1.0042, "step": 30470 }, { "epoch": 1.73, "grad_norm": 1.102830171585083, "learning_rate": 8.282016348773843e-05, "loss": 1.0478, "step": 30480 }, { "epoch": 1.73, "grad_norm": 1.0972310304641724, "learning_rate": 8.281448683015442e-05, "loss": 0.9984, "step": 30490 }, { "epoch": 1.73, "grad_norm": 1.0542577505111694, "learning_rate": 8.28088101725704e-05, "loss": 1.0168, "step": 30500 }, { "epoch": 1.73, "grad_norm": 1.0763846635818481, "learning_rate": 8.280313351498638e-05, "loss": 1.0145, "step": 30510 }, { "epoch": 1.73, "grad_norm": 1.061044692993164, "learning_rate": 8.279745685740237e-05, "loss": 1.0322, "step": 30520 }, { "epoch": 1.73, "grad_norm": 1.1793538331985474, "learning_rate": 8.279178019981835e-05, "loss": 1.0252, "step": 30530 }, { "epoch": 1.73, "grad_norm": 1.0935118198394775, "learning_rate": 8.278610354223433e-05, "loss": 0.9954, "step": 30540 }, { "epoch": 1.73, "grad_norm": 1.1018625497817993, "learning_rate": 8.278042688465031e-05, "loss": 1.0202, "step": 30550 }, { "epoch": 1.73, "grad_norm": 1.0815802812576294, "learning_rate": 8.27747502270663e-05, "loss": 1.0232, "step": 30560 }, { "epoch": 1.73, "grad_norm": 1.1204509735107422, "learning_rate": 8.276907356948229e-05, "loss": 1.0134, "step": 30570 }, { "epoch": 1.73, "grad_norm": 1.1144328117370605, "learning_rate": 8.276339691189828e-05, "loss": 1.0295, "step": 30580 }, { "epoch": 1.73, "grad_norm": 1.0918463468551636, "learning_rate": 8.275772025431426e-05, "loss": 1.0152, "step": 30590 }, { "epoch": 1.74, "grad_norm": 1.0643364191055298, "learning_rate": 8.275204359673024e-05, "loss": 1.0105, "step": 30600 }, { "epoch": 1.74, "grad_norm": 1.0850639343261719, "learning_rate": 8.274636693914623e-05, "loss": 1.0523, "step": 30610 }, { "epoch": 1.74, "grad_norm": 1.0449563264846802, "learning_rate": 8.274069028156221e-05, "loss": 1.0003, "step": 30620 }, { "epoch": 1.74, "grad_norm": 1.0810867547988892, "learning_rate": 8.273501362397821e-05, "loss": 1.0485, "step": 30630 }, { "epoch": 1.74, "grad_norm": 1.0923200845718384, "learning_rate": 8.272933696639419e-05, "loss": 1.0287, "step": 30640 }, { "epoch": 1.74, "grad_norm": 1.0453407764434814, "learning_rate": 8.272366030881018e-05, "loss": 1.0336, "step": 30650 }, { "epoch": 1.74, "grad_norm": 1.1060024499893188, "learning_rate": 8.271798365122616e-05, "loss": 1.0478, "step": 30660 }, { "epoch": 1.74, "grad_norm": 1.095049262046814, "learning_rate": 8.271230699364215e-05, "loss": 1.0183, "step": 30670 }, { "epoch": 1.74, "grad_norm": 1.053131103515625, "learning_rate": 8.270663033605813e-05, "loss": 0.9944, "step": 30680 }, { "epoch": 1.74, "grad_norm": 1.0624552965164185, "learning_rate": 8.270095367847411e-05, "loss": 1.0281, "step": 30690 }, { "epoch": 1.74, "grad_norm": 1.1315926313400269, "learning_rate": 8.269527702089011e-05, "loss": 1.0114, "step": 30700 }, { "epoch": 1.74, "grad_norm": 1.0638864040374756, "learning_rate": 8.268960036330609e-05, "loss": 1.0184, "step": 30710 }, { "epoch": 1.74, "grad_norm": 1.1797354221343994, "learning_rate": 8.268392370572208e-05, "loss": 1.0268, "step": 30720 }, { "epoch": 1.74, "grad_norm": 1.0216152667999268, "learning_rate": 8.267824704813806e-05, "loss": 1.0335, "step": 30730 }, { "epoch": 1.74, "grad_norm": 1.0937172174453735, "learning_rate": 8.267257039055405e-05, "loss": 1.0126, "step": 30740 }, { "epoch": 1.74, "grad_norm": 1.060176968574524, "learning_rate": 8.266689373297003e-05, "loss": 1.0056, "step": 30750 }, { "epoch": 1.74, "grad_norm": 1.1231201887130737, "learning_rate": 8.266121707538603e-05, "loss": 1.0059, "step": 30760 }, { "epoch": 1.74, "grad_norm": 1.138529896736145, "learning_rate": 8.265554041780201e-05, "loss": 0.9955, "step": 30770 }, { "epoch": 1.75, "grad_norm": 1.1238261461257935, "learning_rate": 8.264986376021799e-05, "loss": 1.0198, "step": 30780 }, { "epoch": 1.75, "grad_norm": 1.0379589796066284, "learning_rate": 8.264418710263397e-05, "loss": 1.0309, "step": 30790 }, { "epoch": 1.75, "grad_norm": 1.11403489112854, "learning_rate": 8.263851044504996e-05, "loss": 0.9901, "step": 30800 }, { "epoch": 1.75, "grad_norm": 1.059790015220642, "learning_rate": 8.263283378746594e-05, "loss": 1.0107, "step": 30810 }, { "epoch": 1.75, "grad_norm": 1.0873416662216187, "learning_rate": 8.262715712988192e-05, "loss": 1.0109, "step": 30820 }, { "epoch": 1.75, "grad_norm": 1.0557626485824585, "learning_rate": 8.262148047229791e-05, "loss": 0.9971, "step": 30830 }, { "epoch": 1.75, "grad_norm": 1.0838706493377686, "learning_rate": 8.26158038147139e-05, "loss": 1.0149, "step": 30840 }, { "epoch": 1.75, "grad_norm": 1.0195860862731934, "learning_rate": 8.261012715712989e-05, "loss": 1.0071, "step": 30850 }, { "epoch": 1.75, "grad_norm": 1.0815666913986206, "learning_rate": 8.260445049954587e-05, "loss": 1.0042, "step": 30860 }, { "epoch": 1.75, "grad_norm": 1.0875173807144165, "learning_rate": 8.259877384196185e-05, "loss": 1.0098, "step": 30870 }, { "epoch": 1.75, "grad_norm": 1.0442404747009277, "learning_rate": 8.259309718437784e-05, "loss": 1.0206, "step": 30880 }, { "epoch": 1.75, "grad_norm": 1.0279392004013062, "learning_rate": 8.258742052679382e-05, "loss": 1.0115, "step": 30890 }, { "epoch": 1.75, "grad_norm": 1.1318820714950562, "learning_rate": 8.258174386920981e-05, "loss": 1.0098, "step": 30900 }, { "epoch": 1.75, "grad_norm": 1.0681805610656738, "learning_rate": 8.25760672116258e-05, "loss": 1.018, "step": 30910 }, { "epoch": 1.75, "grad_norm": 1.1700522899627686, "learning_rate": 8.257039055404179e-05, "loss": 1.0044, "step": 30920 }, { "epoch": 1.75, "grad_norm": 1.0860252380371094, "learning_rate": 8.256471389645777e-05, "loss": 1.0334, "step": 30930 }, { "epoch": 1.75, "grad_norm": 1.111694574356079, "learning_rate": 8.255903723887376e-05, "loss": 1.002, "step": 30940 }, { "epoch": 1.75, "grad_norm": 1.1304547786712646, "learning_rate": 8.255336058128974e-05, "loss": 0.9757, "step": 30950 }, { "epoch": 1.76, "grad_norm": 1.1070390939712524, "learning_rate": 8.254768392370572e-05, "loss": 1.0038, "step": 30960 }, { "epoch": 1.76, "grad_norm": 1.0618045330047607, "learning_rate": 8.254200726612172e-05, "loss": 1.0081, "step": 30970 }, { "epoch": 1.76, "grad_norm": 1.0640225410461426, "learning_rate": 8.25363306085377e-05, "loss": 1.0191, "step": 30980 }, { "epoch": 1.76, "grad_norm": 1.0519434213638306, "learning_rate": 8.253065395095369e-05, "loss": 0.9986, "step": 30990 }, { "epoch": 1.76, "grad_norm": 1.0972321033477783, "learning_rate": 8.252497729336967e-05, "loss": 1.0226, "step": 31000 }, { "epoch": 1.76, "grad_norm": 1.066292643547058, "learning_rate": 8.251930063578566e-05, "loss": 1.0212, "step": 31010 }, { "epoch": 1.76, "grad_norm": 1.0386322736740112, "learning_rate": 8.251362397820164e-05, "loss": 1.0373, "step": 31020 }, { "epoch": 1.76, "grad_norm": 1.1190485954284668, "learning_rate": 8.250794732061764e-05, "loss": 1.0297, "step": 31030 }, { "epoch": 1.76, "grad_norm": 1.1167618036270142, "learning_rate": 8.250227066303362e-05, "loss": 1.0093, "step": 31040 }, { "epoch": 1.76, "grad_norm": 1.153601884841919, "learning_rate": 8.24965940054496e-05, "loss": 1.0199, "step": 31050 }, { "epoch": 1.76, "grad_norm": 1.0894988775253296, "learning_rate": 8.249091734786558e-05, "loss": 1.0231, "step": 31060 }, { "epoch": 1.76, "grad_norm": 1.1556568145751953, "learning_rate": 8.248524069028156e-05, "loss": 1.002, "step": 31070 }, { "epoch": 1.76, "grad_norm": 1.1941630840301514, "learning_rate": 8.247956403269755e-05, "loss": 1.0029, "step": 31080 }, { "epoch": 1.76, "grad_norm": 1.033735990524292, "learning_rate": 8.247388737511353e-05, "loss": 1.0049, "step": 31090 }, { "epoch": 1.76, "grad_norm": 1.113483190536499, "learning_rate": 8.246821071752952e-05, "loss": 1.0206, "step": 31100 }, { "epoch": 1.76, "grad_norm": 1.0998070240020752, "learning_rate": 8.24625340599455e-05, "loss": 1.0153, "step": 31110 }, { "epoch": 1.76, "grad_norm": 1.1605685949325562, "learning_rate": 8.24568574023615e-05, "loss": 1.0295, "step": 31120 }, { "epoch": 1.77, "grad_norm": 1.106507658958435, "learning_rate": 8.245118074477748e-05, "loss": 1.0103, "step": 31130 }, { "epoch": 1.77, "grad_norm": 1.0860917568206787, "learning_rate": 8.244550408719346e-05, "loss": 1.0055, "step": 31140 }, { "epoch": 1.77, "grad_norm": 1.116723656654358, "learning_rate": 8.243982742960945e-05, "loss": 1.025, "step": 31150 }, { "epoch": 1.77, "grad_norm": 1.0928330421447754, "learning_rate": 8.243415077202543e-05, "loss": 1.017, "step": 31160 }, { "epoch": 1.77, "grad_norm": 1.0969936847686768, "learning_rate": 8.242847411444142e-05, "loss": 1.014, "step": 31170 }, { "epoch": 1.77, "grad_norm": 1.1018942594528198, "learning_rate": 8.24227974568574e-05, "loss": 1.0302, "step": 31180 }, { "epoch": 1.77, "grad_norm": 1.0792814493179321, "learning_rate": 8.24171207992734e-05, "loss": 0.9958, "step": 31190 }, { "epoch": 1.77, "grad_norm": 1.0415551662445068, "learning_rate": 8.241144414168938e-05, "loss": 1.0422, "step": 31200 }, { "epoch": 1.77, "grad_norm": 1.1072187423706055, "learning_rate": 8.240576748410537e-05, "loss": 1.0076, "step": 31210 }, { "epoch": 1.77, "grad_norm": 1.1901006698608398, "learning_rate": 8.240009082652135e-05, "loss": 1.0087, "step": 31220 }, { "epoch": 1.77, "grad_norm": 1.047868013381958, "learning_rate": 8.239441416893733e-05, "loss": 1.0094, "step": 31230 }, { "epoch": 1.77, "grad_norm": 1.1388314962387085, "learning_rate": 8.238873751135332e-05, "loss": 1.0215, "step": 31240 }, { "epoch": 1.77, "grad_norm": 1.0980850458145142, "learning_rate": 8.23830608537693e-05, "loss": 1.0074, "step": 31250 }, { "epoch": 1.77, "grad_norm": 1.0298950672149658, "learning_rate": 8.23773841961853e-05, "loss": 0.9953, "step": 31260 }, { "epoch": 1.77, "grad_norm": 1.0794548988342285, "learning_rate": 8.237170753860128e-05, "loss": 1.0079, "step": 31270 }, { "epoch": 1.77, "grad_norm": 1.1285111904144287, "learning_rate": 8.236603088101727e-05, "loss": 1.0095, "step": 31280 }, { "epoch": 1.77, "grad_norm": 1.086379885673523, "learning_rate": 8.236035422343325e-05, "loss": 1.0222, "step": 31290 }, { "epoch": 1.77, "grad_norm": 1.095442771911621, "learning_rate": 8.235467756584923e-05, "loss": 1.0286, "step": 31300 }, { "epoch": 1.78, "grad_norm": 1.0794790983200073, "learning_rate": 8.234900090826521e-05, "loss": 1.003, "step": 31310 }, { "epoch": 1.78, "grad_norm": 1.1606311798095703, "learning_rate": 8.23433242506812e-05, "loss": 1.0044, "step": 31320 }, { "epoch": 1.78, "grad_norm": 1.0470257997512817, "learning_rate": 8.233764759309718e-05, "loss": 1.0341, "step": 31330 }, { "epoch": 1.78, "grad_norm": 1.1569746732711792, "learning_rate": 8.233197093551316e-05, "loss": 1.0225, "step": 31340 }, { "epoch": 1.78, "grad_norm": 1.1075246334075928, "learning_rate": 8.232629427792916e-05, "loss": 1.0211, "step": 31350 }, { "epoch": 1.78, "grad_norm": 1.123046636581421, "learning_rate": 8.232061762034514e-05, "loss": 0.9979, "step": 31360 }, { "epoch": 1.78, "grad_norm": 1.0192207098007202, "learning_rate": 8.231494096276113e-05, "loss": 1.02, "step": 31370 }, { "epoch": 1.78, "grad_norm": 1.0404164791107178, "learning_rate": 8.230926430517711e-05, "loss": 1.0066, "step": 31380 }, { "epoch": 1.78, "grad_norm": 1.09114408493042, "learning_rate": 8.23035876475931e-05, "loss": 1.0349, "step": 31390 }, { "epoch": 1.78, "grad_norm": 1.0527377128601074, "learning_rate": 8.229791099000908e-05, "loss": 1.0595, "step": 31400 }, { "epoch": 1.78, "grad_norm": 1.1126964092254639, "learning_rate": 8.229223433242506e-05, "loss": 1.0253, "step": 31410 }, { "epoch": 1.78, "grad_norm": 1.1214845180511475, "learning_rate": 8.228655767484106e-05, "loss": 1.0221, "step": 31420 }, { "epoch": 1.78, "grad_norm": 1.1355916261672974, "learning_rate": 8.228088101725704e-05, "loss": 1.0252, "step": 31430 }, { "epoch": 1.78, "grad_norm": 1.088579773902893, "learning_rate": 8.227520435967303e-05, "loss": 1.0046, "step": 31440 }, { "epoch": 1.78, "grad_norm": 1.1202744245529175, "learning_rate": 8.226952770208901e-05, "loss": 1.0003, "step": 31450 }, { "epoch": 1.78, "grad_norm": 1.0381220579147339, "learning_rate": 8.2263851044505e-05, "loss": 0.9796, "step": 31460 }, { "epoch": 1.78, "grad_norm": 1.1331591606140137, "learning_rate": 8.225817438692098e-05, "loss": 1.0289, "step": 31470 }, { "epoch": 1.78, "grad_norm": 1.0491106510162354, "learning_rate": 8.225249772933698e-05, "loss": 1.0037, "step": 31480 }, { "epoch": 1.79, "grad_norm": 1.1508257389068604, "learning_rate": 8.224682107175296e-05, "loss": 1.0333, "step": 31490 }, { "epoch": 1.79, "grad_norm": 1.0458945035934448, "learning_rate": 8.224114441416894e-05, "loss": 1.0077, "step": 31500 }, { "epoch": 1.79, "grad_norm": 1.0284419059753418, "learning_rate": 8.223546775658493e-05, "loss": 1.0332, "step": 31510 }, { "epoch": 1.79, "grad_norm": 1.1994080543518066, "learning_rate": 8.222979109900091e-05, "loss": 0.9976, "step": 31520 }, { "epoch": 1.79, "grad_norm": 1.0313926935195923, "learning_rate": 8.22241144414169e-05, "loss": 1.037, "step": 31530 }, { "epoch": 1.79, "grad_norm": 1.1034080982208252, "learning_rate": 8.221843778383288e-05, "loss": 1.0235, "step": 31540 }, { "epoch": 1.79, "grad_norm": 1.0595053434371948, "learning_rate": 8.221276112624888e-05, "loss": 1.0139, "step": 31550 }, { "epoch": 1.79, "grad_norm": 1.1378123760223389, "learning_rate": 8.220708446866486e-05, "loss": 1.0227, "step": 31560 }, { "epoch": 1.79, "grad_norm": 1.1198556423187256, "learning_rate": 8.220140781108084e-05, "loss": 1.0164, "step": 31570 }, { "epoch": 1.79, "grad_norm": 1.1024311780929565, "learning_rate": 8.219573115349682e-05, "loss": 1.0074, "step": 31580 }, { "epoch": 1.79, "grad_norm": 1.056549072265625, "learning_rate": 8.21900544959128e-05, "loss": 1.033, "step": 31590 }, { "epoch": 1.79, "grad_norm": 1.0275949239730835, "learning_rate": 8.218437783832879e-05, "loss": 1.0135, "step": 31600 }, { "epoch": 1.79, "grad_norm": 1.1191270351409912, "learning_rate": 8.217870118074477e-05, "loss": 1.0103, "step": 31610 }, { "epoch": 1.79, "grad_norm": 1.1007163524627686, "learning_rate": 8.217302452316077e-05, "loss": 1.0043, "step": 31620 }, { "epoch": 1.79, "grad_norm": 1.0432230234146118, "learning_rate": 8.216734786557675e-05, "loss": 1.0366, "step": 31630 }, { "epoch": 1.79, "grad_norm": 1.0098060369491577, "learning_rate": 8.216167120799274e-05, "loss": 1.0199, "step": 31640 }, { "epoch": 1.79, "grad_norm": 1.0749567747116089, "learning_rate": 8.215599455040872e-05, "loss": 1.0175, "step": 31650 }, { "epoch": 1.8, "grad_norm": 1.0700886249542236, "learning_rate": 8.215031789282471e-05, "loss": 1.0065, "step": 31660 }, { "epoch": 1.8, "grad_norm": 1.1265324354171753, "learning_rate": 8.214464123524069e-05, "loss": 1.0066, "step": 31670 }, { "epoch": 1.8, "grad_norm": 1.0365995168685913, "learning_rate": 8.213896457765669e-05, "loss": 1.0101, "step": 31680 }, { "epoch": 1.8, "grad_norm": 1.139880895614624, "learning_rate": 8.213328792007267e-05, "loss": 1.0016, "step": 31690 }, { "epoch": 1.8, "grad_norm": 1.0630877017974854, "learning_rate": 8.212761126248865e-05, "loss": 1.0025, "step": 31700 }, { "epoch": 1.8, "grad_norm": 1.0007330179214478, "learning_rate": 8.212193460490464e-05, "loss": 1.0171, "step": 31710 }, { "epoch": 1.8, "grad_norm": 1.0785242319107056, "learning_rate": 8.211625794732062e-05, "loss": 1.0292, "step": 31720 }, { "epoch": 1.8, "grad_norm": 1.1371891498565674, "learning_rate": 8.211058128973661e-05, "loss": 1.0172, "step": 31730 }, { "epoch": 1.8, "grad_norm": 1.088346242904663, "learning_rate": 8.210490463215259e-05, "loss": 1.0049, "step": 31740 }, { "epoch": 1.8, "grad_norm": 1.1117513179779053, "learning_rate": 8.209922797456859e-05, "loss": 1.0108, "step": 31750 }, { "epoch": 1.8, "grad_norm": 1.0974558591842651, "learning_rate": 8.209411898274296e-05, "loss": 1.0101, "step": 31760 }, { "epoch": 1.8, "grad_norm": 1.1296586990356445, "learning_rate": 8.208844232515895e-05, "loss": 1.0288, "step": 31770 }, { "epoch": 1.8, "grad_norm": 1.1066354513168335, "learning_rate": 8.208276566757493e-05, "loss": 1.0189, "step": 31780 }, { "epoch": 1.8, "grad_norm": 1.1051180362701416, "learning_rate": 8.207708900999093e-05, "loss": 1.0259, "step": 31790 }, { "epoch": 1.8, "grad_norm": 1.080627202987671, "learning_rate": 8.207141235240691e-05, "loss": 1.0502, "step": 31800 }, { "epoch": 1.8, "grad_norm": 1.0603878498077393, "learning_rate": 8.206573569482289e-05, "loss": 0.9885, "step": 31810 }, { "epoch": 1.8, "grad_norm": 1.1858097314834595, "learning_rate": 8.206005903723888e-05, "loss": 1.0166, "step": 31820 }, { "epoch": 1.8, "grad_norm": 1.1764699220657349, "learning_rate": 8.205438237965486e-05, "loss": 0.9994, "step": 31830 }, { "epoch": 1.81, "grad_norm": 1.1263219118118286, "learning_rate": 8.204870572207085e-05, "loss": 1.0102, "step": 31840 }, { "epoch": 1.81, "grad_norm": 1.0929338932037354, "learning_rate": 8.204302906448683e-05, "loss": 1.0096, "step": 31850 }, { "epoch": 1.81, "grad_norm": 1.0922828912734985, "learning_rate": 8.203735240690283e-05, "loss": 1.0228, "step": 31860 }, { "epoch": 1.81, "grad_norm": 1.1556732654571533, "learning_rate": 8.203167574931881e-05, "loss": 1.0292, "step": 31870 }, { "epoch": 1.81, "grad_norm": 1.1060112714767456, "learning_rate": 8.20259990917348e-05, "loss": 1.0006, "step": 31880 }, { "epoch": 1.81, "grad_norm": 1.1049818992614746, "learning_rate": 8.202032243415078e-05, "loss": 1.0158, "step": 31890 }, { "epoch": 1.81, "grad_norm": 1.0875693559646606, "learning_rate": 8.201464577656676e-05, "loss": 1.011, "step": 31900 }, { "epoch": 1.81, "grad_norm": 1.1391106843948364, "learning_rate": 8.200896911898275e-05, "loss": 1.0227, "step": 31910 }, { "epoch": 1.81, "grad_norm": 1.0303471088409424, "learning_rate": 8.200329246139873e-05, "loss": 1.0137, "step": 31920 }, { "epoch": 1.81, "grad_norm": 1.1096264123916626, "learning_rate": 8.199761580381471e-05, "loss": 1.0121, "step": 31930 }, { "epoch": 1.81, "grad_norm": 1.0119881629943848, "learning_rate": 8.19919391462307e-05, "loss": 1.0217, "step": 31940 }, { "epoch": 1.81, "grad_norm": 1.0474086999893188, "learning_rate": 8.198626248864669e-05, "loss": 1.0175, "step": 31950 }, { "epoch": 1.81, "grad_norm": 1.0994187593460083, "learning_rate": 8.198058583106267e-05, "loss": 1.0102, "step": 31960 }, { "epoch": 1.81, "grad_norm": 1.1355555057525635, "learning_rate": 8.197490917347866e-05, "loss": 1.0022, "step": 31970 }, { "epoch": 1.81, "grad_norm": 1.1861597299575806, "learning_rate": 8.196923251589464e-05, "loss": 1.0377, "step": 31980 }, { "epoch": 1.81, "grad_norm": 1.1697932481765747, "learning_rate": 8.196355585831062e-05, "loss": 1.0049, "step": 31990 }, { "epoch": 1.81, "grad_norm": 1.1130647659301758, "learning_rate": 8.195787920072661e-05, "loss": 1.017, "step": 32000 }, { "epoch": 1.82, "grad_norm": 1.0181981325149536, "learning_rate": 8.19522025431426e-05, "loss": 1.0047, "step": 32010 }, { "epoch": 1.82, "grad_norm": 1.0332461595535278, "learning_rate": 8.194652588555859e-05, "loss": 0.9851, "step": 32020 }, { "epoch": 1.82, "grad_norm": 1.1224746704101562, "learning_rate": 8.194084922797457e-05, "loss": 1.011, "step": 32030 }, { "epoch": 1.82, "grad_norm": 1.1220955848693848, "learning_rate": 8.193517257039056e-05, "loss": 1.0336, "step": 32040 }, { "epoch": 1.82, "grad_norm": 1.0511800050735474, "learning_rate": 8.192949591280654e-05, "loss": 1.0149, "step": 32050 }, { "epoch": 1.82, "grad_norm": 1.0991191864013672, "learning_rate": 8.192381925522253e-05, "loss": 1.0287, "step": 32060 }, { "epoch": 1.82, "grad_norm": 1.1679564714431763, "learning_rate": 8.191814259763851e-05, "loss": 1.0043, "step": 32070 }, { "epoch": 1.82, "grad_norm": 1.030511736869812, "learning_rate": 8.19124659400545e-05, "loss": 1.0241, "step": 32080 }, { "epoch": 1.82, "grad_norm": 1.116937279701233, "learning_rate": 8.190678928247049e-05, "loss": 1.028, "step": 32090 }, { "epoch": 1.82, "grad_norm": 1.1343973875045776, "learning_rate": 8.190111262488647e-05, "loss": 1.0194, "step": 32100 }, { "epoch": 1.82, "grad_norm": 1.085165023803711, "learning_rate": 8.189543596730246e-05, "loss": 1.0086, "step": 32110 }, { "epoch": 1.82, "grad_norm": 1.0888166427612305, "learning_rate": 8.188975930971844e-05, "loss": 1.0093, "step": 32120 }, { "epoch": 1.82, "grad_norm": 1.0974639654159546, "learning_rate": 8.188408265213444e-05, "loss": 1.0364, "step": 32130 }, { "epoch": 1.82, "grad_norm": 1.1081318855285645, "learning_rate": 8.187840599455042e-05, "loss": 1.0073, "step": 32140 }, { "epoch": 1.82, "grad_norm": 1.1072226762771606, "learning_rate": 8.187272933696641e-05, "loss": 1.018, "step": 32150 }, { "epoch": 1.82, "grad_norm": 1.1106479167938232, "learning_rate": 8.186705267938239e-05, "loss": 1.0049, "step": 32160 }, { "epoch": 1.82, "grad_norm": 1.1470611095428467, "learning_rate": 8.186137602179837e-05, "loss": 1.0227, "step": 32170 }, { "epoch": 1.82, "grad_norm": 1.0782488584518433, "learning_rate": 8.185569936421435e-05, "loss": 1.0188, "step": 32180 }, { "epoch": 1.83, "grad_norm": 1.073638677597046, "learning_rate": 8.185002270663034e-05, "loss": 1.0199, "step": 32190 }, { "epoch": 1.83, "grad_norm": 1.0946649312973022, "learning_rate": 8.184434604904632e-05, "loss": 1.0093, "step": 32200 }, { "epoch": 1.83, "grad_norm": 1.115898609161377, "learning_rate": 8.18386693914623e-05, "loss": 1.0276, "step": 32210 }, { "epoch": 1.83, "grad_norm": 1.1586062908172607, "learning_rate": 8.18329927338783e-05, "loss": 0.9958, "step": 32220 }, { "epoch": 1.83, "grad_norm": 1.0548367500305176, "learning_rate": 8.182731607629428e-05, "loss": 0.9962, "step": 32230 }, { "epoch": 1.83, "grad_norm": 1.0899231433868408, "learning_rate": 8.182163941871027e-05, "loss": 0.9979, "step": 32240 }, { "epoch": 1.83, "grad_norm": 1.1159247159957886, "learning_rate": 8.181596276112625e-05, "loss": 1.0271, "step": 32250 }, { "epoch": 1.83, "grad_norm": 1.006462574005127, "learning_rate": 8.181028610354223e-05, "loss": 0.9976, "step": 32260 }, { "epoch": 1.83, "grad_norm": 1.1431719064712524, "learning_rate": 8.180460944595822e-05, "loss": 1.0306, "step": 32270 }, { "epoch": 1.83, "grad_norm": 1.125361680984497, "learning_rate": 8.17989327883742e-05, "loss": 1.0382, "step": 32280 }, { "epoch": 1.83, "grad_norm": 1.0952574014663696, "learning_rate": 8.17932561307902e-05, "loss": 1.0367, "step": 32290 }, { "epoch": 1.83, "grad_norm": 1.0576205253601074, "learning_rate": 8.178757947320618e-05, "loss": 1.0086, "step": 32300 }, { "epoch": 1.83, "grad_norm": 1.0713385343551636, "learning_rate": 8.178190281562217e-05, "loss": 1.0451, "step": 32310 }, { "epoch": 1.83, "grad_norm": 1.075202465057373, "learning_rate": 8.177622615803815e-05, "loss": 1.0162, "step": 32320 }, { "epoch": 1.83, "grad_norm": 1.1003203392028809, "learning_rate": 8.177054950045414e-05, "loss": 1.019, "step": 32330 }, { "epoch": 1.83, "grad_norm": 1.0893020629882812, "learning_rate": 8.176487284287012e-05, "loss": 1.0173, "step": 32340 }, { "epoch": 1.83, "grad_norm": 1.1112933158874512, "learning_rate": 8.17591961852861e-05, "loss": 0.9779, "step": 32350 }, { "epoch": 1.83, "grad_norm": 1.0560053586959839, "learning_rate": 8.17535195277021e-05, "loss": 1.0077, "step": 32360 }, { "epoch": 1.84, "grad_norm": 1.1526414155960083, "learning_rate": 8.174784287011808e-05, "loss": 1.002, "step": 32370 }, { "epoch": 1.84, "grad_norm": 1.1634215116500854, "learning_rate": 8.174216621253407e-05, "loss": 1.0031, "step": 32380 }, { "epoch": 1.84, "grad_norm": 1.106748342514038, "learning_rate": 8.173648955495005e-05, "loss": 1.0062, "step": 32390 }, { "epoch": 1.84, "grad_norm": 1.1217175722122192, "learning_rate": 8.173081289736604e-05, "loss": 1.0191, "step": 32400 }, { "epoch": 1.84, "grad_norm": 1.1253718137741089, "learning_rate": 8.172513623978202e-05, "loss": 1.0191, "step": 32410 }, { "epoch": 1.84, "grad_norm": 1.0756021738052368, "learning_rate": 8.171945958219802e-05, "loss": 0.9914, "step": 32420 }, { "epoch": 1.84, "grad_norm": 1.0732388496398926, "learning_rate": 8.1713782924614e-05, "loss": 1.0322, "step": 32430 }, { "epoch": 1.84, "grad_norm": 1.0565547943115234, "learning_rate": 8.170810626702998e-05, "loss": 1.0202, "step": 32440 }, { "epoch": 1.84, "grad_norm": 1.1310817003250122, "learning_rate": 8.170242960944596e-05, "loss": 1.0114, "step": 32450 }, { "epoch": 1.84, "grad_norm": 1.0891785621643066, "learning_rate": 8.169675295186194e-05, "loss": 1.0239, "step": 32460 }, { "epoch": 1.84, "grad_norm": 1.0984665155410767, "learning_rate": 8.169107629427793e-05, "loss": 1.0132, "step": 32470 }, { "epoch": 1.84, "grad_norm": 1.0711076259613037, "learning_rate": 8.168539963669391e-05, "loss": 1.0139, "step": 32480 }, { "epoch": 1.84, "grad_norm": 1.087588906288147, "learning_rate": 8.16797229791099e-05, "loss": 0.9913, "step": 32490 }, { "epoch": 1.84, "grad_norm": 1.1359463930130005, "learning_rate": 8.167404632152588e-05, "loss": 0.993, "step": 32500 }, { "epoch": 1.84, "grad_norm": 1.0379987955093384, "learning_rate": 8.166836966394188e-05, "loss": 1.0234, "step": 32510 }, { "epoch": 1.84, "grad_norm": 1.1320888996124268, "learning_rate": 8.166269300635786e-05, "loss": 1.0122, "step": 32520 }, { "epoch": 1.84, "grad_norm": 1.1282579898834229, "learning_rate": 8.165701634877384e-05, "loss": 1.0077, "step": 32530 }, { "epoch": 1.85, "grad_norm": 1.1093038320541382, "learning_rate": 8.165133969118983e-05, "loss": 0.9864, "step": 32540 }, { "epoch": 1.85, "grad_norm": 1.0746251344680786, "learning_rate": 8.164566303360581e-05, "loss": 1.0099, "step": 32550 }, { "epoch": 1.85, "grad_norm": 1.080380916595459, "learning_rate": 8.16399863760218e-05, "loss": 1.0447, "step": 32560 }, { "epoch": 1.85, "grad_norm": 1.0855402946472168, "learning_rate": 8.163430971843778e-05, "loss": 1.0093, "step": 32570 }, { "epoch": 1.85, "grad_norm": 1.1197353601455688, "learning_rate": 8.162863306085378e-05, "loss": 1.0172, "step": 32580 }, { "epoch": 1.85, "grad_norm": 1.089258074760437, "learning_rate": 8.162295640326976e-05, "loss": 1.0297, "step": 32590 }, { "epoch": 1.85, "grad_norm": 1.0906521081924438, "learning_rate": 8.161727974568575e-05, "loss": 1.0425, "step": 32600 }, { "epoch": 1.85, "grad_norm": 1.207413911819458, "learning_rate": 8.161160308810173e-05, "loss": 1.0127, "step": 32610 }, { "epoch": 1.85, "grad_norm": 1.0671991109848022, "learning_rate": 8.160592643051771e-05, "loss": 1.0019, "step": 32620 }, { "epoch": 1.85, "grad_norm": 1.0821099281311035, "learning_rate": 8.16002497729337e-05, "loss": 1.0232, "step": 32630 }, { "epoch": 1.85, "grad_norm": 1.1224946975708008, "learning_rate": 8.159457311534968e-05, "loss": 0.9731, "step": 32640 }, { "epoch": 1.85, "grad_norm": 1.1184998750686646, "learning_rate": 8.158889645776568e-05, "loss": 1.0306, "step": 32650 }, { "epoch": 1.85, "grad_norm": 1.0642013549804688, "learning_rate": 8.158321980018166e-05, "loss": 0.9947, "step": 32660 }, { "epoch": 1.85, "grad_norm": 1.0266691446304321, "learning_rate": 8.157754314259765e-05, "loss": 1.0417, "step": 32670 }, { "epoch": 1.85, "grad_norm": 1.0565258264541626, "learning_rate": 8.157186648501363e-05, "loss": 0.9996, "step": 32680 }, { "epoch": 1.85, "grad_norm": 1.1708765029907227, "learning_rate": 8.156618982742961e-05, "loss": 1.0258, "step": 32690 }, { "epoch": 1.85, "grad_norm": 1.08760404586792, "learning_rate": 8.156051316984559e-05, "loss": 1.0168, "step": 32700 }, { "epoch": 1.85, "grad_norm": 1.063580870628357, "learning_rate": 8.155483651226158e-05, "loss": 1.0229, "step": 32710 }, { "epoch": 1.86, "grad_norm": 1.0321418046951294, "learning_rate": 8.154915985467756e-05, "loss": 1.0059, "step": 32720 }, { "epoch": 1.86, "grad_norm": 1.0650122165679932, "learning_rate": 8.154348319709354e-05, "loss": 0.9928, "step": 32730 }, { "epoch": 1.86, "grad_norm": 1.1426682472229004, "learning_rate": 8.153780653950954e-05, "loss": 1.0414, "step": 32740 }, { "epoch": 1.86, "grad_norm": 1.088510274887085, "learning_rate": 8.153212988192552e-05, "loss": 1.0108, "step": 32750 }, { "epoch": 1.86, "grad_norm": 1.0522726774215698, "learning_rate": 8.152645322434151e-05, "loss": 1.0047, "step": 32760 }, { "epoch": 1.86, "grad_norm": 1.0630825757980347, "learning_rate": 8.152077656675749e-05, "loss": 1.0322, "step": 32770 }, { "epoch": 1.86, "grad_norm": 1.068561315536499, "learning_rate": 8.151509990917349e-05, "loss": 1.0042, "step": 32780 }, { "epoch": 1.86, "grad_norm": 1.0589276552200317, "learning_rate": 8.150942325158947e-05, "loss": 1.0429, "step": 32790 }, { "epoch": 1.86, "grad_norm": 1.0897725820541382, "learning_rate": 8.150374659400546e-05, "loss": 1.0172, "step": 32800 }, { "epoch": 1.86, "grad_norm": 1.052225947380066, "learning_rate": 8.149806993642144e-05, "loss": 1.0328, "step": 32810 }, { "epoch": 1.86, "grad_norm": 1.122309684753418, "learning_rate": 8.149239327883742e-05, "loss": 1.0034, "step": 32820 }, { "epoch": 1.86, "grad_norm": 1.1308724880218506, "learning_rate": 8.148671662125341e-05, "loss": 1.0043, "step": 32830 }, { "epoch": 1.86, "grad_norm": 1.0886505842208862, "learning_rate": 8.148103996366939e-05, "loss": 0.9971, "step": 32840 }, { "epoch": 1.86, "grad_norm": 1.0172450542449951, "learning_rate": 8.147536330608539e-05, "loss": 1.0239, "step": 32850 }, { "epoch": 1.86, "grad_norm": 1.0694515705108643, "learning_rate": 8.146968664850137e-05, "loss": 1.0239, "step": 32860 }, { "epoch": 1.86, "grad_norm": 1.050222635269165, "learning_rate": 8.146400999091736e-05, "loss": 1.0212, "step": 32870 }, { "epoch": 1.86, "grad_norm": 1.0176578760147095, "learning_rate": 8.145833333333334e-05, "loss": 1.0281, "step": 32880 }, { "epoch": 1.86, "grad_norm": 1.1138941049575806, "learning_rate": 8.145265667574933e-05, "loss": 1.0094, "step": 32890 }, { "epoch": 1.87, "grad_norm": 1.1060059070587158, "learning_rate": 8.144698001816531e-05, "loss": 1.0073, "step": 32900 }, { "epoch": 1.87, "grad_norm": 1.1506617069244385, "learning_rate": 8.144130336058129e-05, "loss": 0.9889, "step": 32910 }, { "epoch": 1.87, "grad_norm": 1.110973596572876, "learning_rate": 8.143562670299729e-05, "loss": 1.024, "step": 32920 }, { "epoch": 1.87, "grad_norm": 1.1161577701568604, "learning_rate": 8.142995004541327e-05, "loss": 1.007, "step": 32930 }, { "epoch": 1.87, "grad_norm": 1.1064051389694214, "learning_rate": 8.142427338782926e-05, "loss": 1.0057, "step": 32940 }, { "epoch": 1.87, "grad_norm": 1.127477765083313, "learning_rate": 8.141859673024524e-05, "loss": 1.0266, "step": 32950 }, { "epoch": 1.87, "grad_norm": 1.0853968858718872, "learning_rate": 8.141292007266122e-05, "loss": 0.9902, "step": 32960 }, { "epoch": 1.87, "grad_norm": 1.1363043785095215, "learning_rate": 8.14072434150772e-05, "loss": 1.002, "step": 32970 }, { "epoch": 1.87, "grad_norm": 1.08796226978302, "learning_rate": 8.140156675749319e-05, "loss": 1.02, "step": 32980 }, { "epoch": 1.87, "grad_norm": 1.1258102655410767, "learning_rate": 8.139589009990917e-05, "loss": 0.9934, "step": 32990 }, { "epoch": 1.87, "grad_norm": 1.0577772855758667, "learning_rate": 8.139021344232515e-05, "loss": 1.0256, "step": 33000 }, { "epoch": 1.87, "grad_norm": 1.0537112951278687, "learning_rate": 8.138453678474115e-05, "loss": 1.0116, "step": 33010 }, { "epoch": 1.87, "grad_norm": 1.077146053314209, "learning_rate": 8.137886012715713e-05, "loss": 1.0361, "step": 33020 }, { "epoch": 1.87, "grad_norm": 1.0594955682754517, "learning_rate": 8.137318346957312e-05, "loss": 1.0376, "step": 33030 }, { "epoch": 1.87, "grad_norm": 1.1079331636428833, "learning_rate": 8.13675068119891e-05, "loss": 1.0027, "step": 33040 }, { "epoch": 1.87, "grad_norm": 1.101919412612915, "learning_rate": 8.13618301544051e-05, "loss": 1.0171, "step": 33050 }, { "epoch": 1.87, "grad_norm": 1.056222677230835, "learning_rate": 8.135615349682107e-05, "loss": 1.0158, "step": 33060 }, { "epoch": 1.88, "grad_norm": 1.0674606561660767, "learning_rate": 8.135047683923707e-05, "loss": 0.9883, "step": 33070 }, { "epoch": 1.88, "grad_norm": 1.0746431350708008, "learning_rate": 8.134480018165305e-05, "loss": 1.0081, "step": 33080 }, { "epoch": 1.88, "grad_norm": 1.0541328191757202, "learning_rate": 8.133912352406903e-05, "loss": 1.0158, "step": 33090 }, { "epoch": 1.88, "grad_norm": 1.1187669038772583, "learning_rate": 8.133344686648502e-05, "loss": 0.9981, "step": 33100 }, { "epoch": 1.88, "grad_norm": 1.0939453840255737, "learning_rate": 8.1327770208901e-05, "loss": 0.984, "step": 33110 }, { "epoch": 1.88, "grad_norm": 1.0537962913513184, "learning_rate": 8.1322093551317e-05, "loss": 1.0235, "step": 33120 }, { "epoch": 1.88, "grad_norm": 1.1216368675231934, "learning_rate": 8.131641689373297e-05, "loss": 0.9816, "step": 33130 }, { "epoch": 1.88, "grad_norm": 1.1058259010314941, "learning_rate": 8.131074023614897e-05, "loss": 0.9968, "step": 33140 }, { "epoch": 1.88, "grad_norm": 1.1152594089508057, "learning_rate": 8.130506357856495e-05, "loss": 1.0146, "step": 33150 }, { "epoch": 1.88, "grad_norm": 1.0695233345031738, "learning_rate": 8.129938692098094e-05, "loss": 1.0201, "step": 33160 }, { "epoch": 1.88, "grad_norm": 1.1106756925582886, "learning_rate": 8.129371026339692e-05, "loss": 1.0168, "step": 33170 }, { "epoch": 1.88, "grad_norm": 1.1041889190673828, "learning_rate": 8.12880336058129e-05, "loss": 0.9955, "step": 33180 }, { "epoch": 1.88, "grad_norm": 1.0619618892669678, "learning_rate": 8.12823569482289e-05, "loss": 1.0076, "step": 33190 }, { "epoch": 1.88, "grad_norm": 1.1007329225540161, "learning_rate": 8.127668029064487e-05, "loss": 0.9937, "step": 33200 }, { "epoch": 1.88, "grad_norm": 1.1032233238220215, "learning_rate": 8.127100363306085e-05, "loss": 1.0029, "step": 33210 }, { "epoch": 1.88, "grad_norm": 1.0892945528030396, "learning_rate": 8.126532697547685e-05, "loss": 1.0443, "step": 33220 }, { "epoch": 1.88, "grad_norm": 1.0738695859909058, "learning_rate": 8.125965031789283e-05, "loss": 0.9983, "step": 33230 }, { "epoch": 1.88, "grad_norm": 1.044852375984192, "learning_rate": 8.125397366030881e-05, "loss": 1.0358, "step": 33240 }, { "epoch": 1.89, "grad_norm": 1.1348459720611572, "learning_rate": 8.12482970027248e-05, "loss": 1.0061, "step": 33250 }, { "epoch": 1.89, "grad_norm": 1.1222975254058838, "learning_rate": 8.124262034514078e-05, "loss": 1.0091, "step": 33260 }, { "epoch": 1.89, "grad_norm": 1.1154011487960815, "learning_rate": 8.123694368755676e-05, "loss": 0.9981, "step": 33270 }, { "epoch": 1.89, "grad_norm": 1.1006438732147217, "learning_rate": 8.123126702997275e-05, "loss": 0.9961, "step": 33280 }, { "epoch": 1.89, "grad_norm": 1.1502100229263306, "learning_rate": 8.122559037238873e-05, "loss": 1.0227, "step": 33290 }, { "epoch": 1.89, "grad_norm": 1.1145612001419067, "learning_rate": 8.121991371480473e-05, "loss": 1.0324, "step": 33300 }, { "epoch": 1.89, "grad_norm": 1.1366780996322632, "learning_rate": 8.121423705722071e-05, "loss": 1.0244, "step": 33310 }, { "epoch": 1.89, "grad_norm": 1.0790634155273438, "learning_rate": 8.12085603996367e-05, "loss": 0.9923, "step": 33320 }, { "epoch": 1.89, "grad_norm": 1.0791027545928955, "learning_rate": 8.120288374205268e-05, "loss": 0.9953, "step": 33330 }, { "epoch": 1.89, "grad_norm": 1.0543891191482544, "learning_rate": 8.119720708446867e-05, "loss": 1.0074, "step": 33340 }, { "epoch": 1.89, "grad_norm": 1.0959115028381348, "learning_rate": 8.119153042688465e-05, "loss": 1.0199, "step": 33350 }, { "epoch": 1.89, "grad_norm": 1.1267374753952026, "learning_rate": 8.118585376930063e-05, "loss": 1.0145, "step": 33360 }, { "epoch": 1.89, "grad_norm": 1.1313340663909912, "learning_rate": 8.118017711171663e-05, "loss": 1.0274, "step": 33370 }, { "epoch": 1.89, "grad_norm": 1.0180960893630981, "learning_rate": 8.117450045413261e-05, "loss": 1.0022, "step": 33380 }, { "epoch": 1.89, "grad_norm": 1.0726594924926758, "learning_rate": 8.11688237965486e-05, "loss": 1.0131, "step": 33390 }, { "epoch": 1.89, "grad_norm": 1.1318213939666748, "learning_rate": 8.116314713896458e-05, "loss": 1.0047, "step": 33400 }, { "epoch": 1.89, "grad_norm": 1.0797613859176636, "learning_rate": 8.115747048138058e-05, "loss": 1.0224, "step": 33410 }, { "epoch": 1.89, "grad_norm": 1.093698263168335, "learning_rate": 8.115179382379656e-05, "loss": 0.9771, "step": 33420 }, { "epoch": 1.9, "grad_norm": 1.0970851182937622, "learning_rate": 8.114611716621255e-05, "loss": 0.9877, "step": 33430 }, { "epoch": 1.9, "grad_norm": 1.094175100326538, "learning_rate": 8.114044050862853e-05, "loss": 1.0029, "step": 33440 }, { "epoch": 1.9, "grad_norm": 1.0955294370651245, "learning_rate": 8.113476385104451e-05, "loss": 1.0049, "step": 33450 }, { "epoch": 1.9, "grad_norm": 1.1804425716400146, "learning_rate": 8.11290871934605e-05, "loss": 1.0176, "step": 33460 }, { "epoch": 1.9, "grad_norm": 1.1260852813720703, "learning_rate": 8.112341053587648e-05, "loss": 0.9988, "step": 33470 }, { "epoch": 1.9, "grad_norm": 1.091636061668396, "learning_rate": 8.111773387829246e-05, "loss": 0.9946, "step": 33480 }, { "epoch": 1.9, "grad_norm": 1.0679632425308228, "learning_rate": 8.111205722070844e-05, "loss": 1.0139, "step": 33490 }, { "epoch": 1.9, "grad_norm": 1.0715689659118652, "learning_rate": 8.110638056312444e-05, "loss": 1.0068, "step": 33500 }, { "epoch": 1.9, "grad_norm": 1.0553346872329712, "learning_rate": 8.110070390554042e-05, "loss": 1.0094, "step": 33510 }, { "epoch": 1.9, "grad_norm": 1.161297082901001, "learning_rate": 8.109502724795641e-05, "loss": 1.0063, "step": 33520 }, { "epoch": 1.9, "grad_norm": 1.1007587909698486, "learning_rate": 8.108935059037239e-05, "loss": 0.9855, "step": 33530 }, { "epoch": 1.9, "grad_norm": 1.1061023473739624, "learning_rate": 8.108367393278837e-05, "loss": 1.0353, "step": 33540 }, { "epoch": 1.9, "grad_norm": 1.139479398727417, "learning_rate": 8.107799727520436e-05, "loss": 1.0069, "step": 33550 }, { "epoch": 1.9, "grad_norm": 1.1113027334213257, "learning_rate": 8.107232061762034e-05, "loss": 1.0202, "step": 33560 }, { "epoch": 1.9, "grad_norm": 1.0953278541564941, "learning_rate": 8.106664396003634e-05, "loss": 1.0223, "step": 33570 }, { "epoch": 1.9, "grad_norm": 1.1178348064422607, "learning_rate": 8.106096730245232e-05, "loss": 0.962, "step": 33580 }, { "epoch": 1.9, "grad_norm": 1.1072700023651123, "learning_rate": 8.105529064486831e-05, "loss": 0.9814, "step": 33590 }, { "epoch": 1.91, "grad_norm": 1.0490320920944214, "learning_rate": 8.104961398728429e-05, "loss": 1.0105, "step": 33600 }, { "epoch": 1.91, "grad_norm": 1.1171157360076904, "learning_rate": 8.104393732970028e-05, "loss": 0.9782, "step": 33610 }, { "epoch": 1.91, "grad_norm": 1.073888897895813, "learning_rate": 8.103826067211626e-05, "loss": 1.0086, "step": 33620 }, { "epoch": 1.91, "grad_norm": 1.1486802101135254, "learning_rate": 8.103258401453224e-05, "loss": 1.0079, "step": 33630 }, { "epoch": 1.91, "grad_norm": 1.0920120477676392, "learning_rate": 8.102690735694824e-05, "loss": 1.0196, "step": 33640 }, { "epoch": 1.91, "grad_norm": 1.0485477447509766, "learning_rate": 8.102123069936422e-05, "loss": 0.9958, "step": 33650 }, { "epoch": 1.91, "grad_norm": 1.1817920207977295, "learning_rate": 8.101555404178021e-05, "loss": 1.0053, "step": 33660 }, { "epoch": 1.91, "grad_norm": 1.144073724746704, "learning_rate": 8.100987738419619e-05, "loss": 1.0299, "step": 33670 }, { "epoch": 1.91, "grad_norm": 1.1874297857284546, "learning_rate": 8.100420072661218e-05, "loss": 1.0183, "step": 33680 }, { "epoch": 1.91, "grad_norm": 1.0580432415008545, "learning_rate": 8.099852406902816e-05, "loss": 1.0195, "step": 33690 }, { "epoch": 1.91, "grad_norm": 1.1469000577926636, "learning_rate": 8.099284741144416e-05, "loss": 1.0163, "step": 33700 }, { "epoch": 1.91, "grad_norm": 1.0837923288345337, "learning_rate": 8.098717075386014e-05, "loss": 1.0414, "step": 33710 }, { "epoch": 1.91, "grad_norm": 1.1472777128219604, "learning_rate": 8.098149409627612e-05, "loss": 1.0064, "step": 33720 }, { "epoch": 1.91, "grad_norm": 1.1597740650177002, "learning_rate": 8.09758174386921e-05, "loss": 0.9972, "step": 33730 }, { "epoch": 1.91, "grad_norm": 1.193117380142212, "learning_rate": 8.097014078110809e-05, "loss": 1.0197, "step": 33740 }, { "epoch": 1.91, "grad_norm": 1.1899889707565308, "learning_rate": 8.096446412352407e-05, "loss": 0.9726, "step": 33750 }, { "epoch": 1.91, "grad_norm": 1.1270185708999634, "learning_rate": 8.095878746594005e-05, "loss": 1.0099, "step": 33760 }, { "epoch": 1.91, "grad_norm": 1.0651122331619263, "learning_rate": 8.095311080835604e-05, "loss": 1.0097, "step": 33770 }, { "epoch": 1.92, "grad_norm": 1.1770671606063843, "learning_rate": 8.094743415077202e-05, "loss": 0.9911, "step": 33780 }, { "epoch": 1.92, "grad_norm": 1.114853024482727, "learning_rate": 8.094175749318802e-05, "loss": 1.01, "step": 33790 }, { "epoch": 1.92, "grad_norm": 1.0558512210845947, "learning_rate": 8.0936080835604e-05, "loss": 1.0065, "step": 33800 }, { "epoch": 1.92, "grad_norm": 1.2111115455627441, "learning_rate": 8.093040417801998e-05, "loss": 1.0092, "step": 33810 }, { "epoch": 1.92, "grad_norm": 1.098079800605774, "learning_rate": 8.092472752043597e-05, "loss": 0.9873, "step": 33820 }, { "epoch": 1.92, "grad_norm": 1.120775580406189, "learning_rate": 8.091905086285195e-05, "loss": 1.0049, "step": 33830 }, { "epoch": 1.92, "grad_norm": 1.0827248096466064, "learning_rate": 8.091337420526794e-05, "loss": 1.0042, "step": 33840 }, { "epoch": 1.92, "grad_norm": 1.0920192003250122, "learning_rate": 8.090769754768392e-05, "loss": 1.0108, "step": 33850 }, { "epoch": 1.92, "grad_norm": 1.176983118057251, "learning_rate": 8.090202089009992e-05, "loss": 1.0123, "step": 33860 }, { "epoch": 1.92, "grad_norm": 1.112532615661621, "learning_rate": 8.08963442325159e-05, "loss": 0.998, "step": 33870 }, { "epoch": 1.92, "grad_norm": 1.1338281631469727, "learning_rate": 8.089066757493189e-05, "loss": 1.0222, "step": 33880 }, { "epoch": 1.92, "grad_norm": 1.0988776683807373, "learning_rate": 8.088499091734787e-05, "loss": 1.0032, "step": 33890 }, { "epoch": 1.92, "grad_norm": 1.1927175521850586, "learning_rate": 8.087931425976385e-05, "loss": 1.0033, "step": 33900 }, { "epoch": 1.92, "grad_norm": 1.1021801233291626, "learning_rate": 8.087363760217984e-05, "loss": 0.9924, "step": 33910 }, { "epoch": 1.92, "grad_norm": 1.1295796632766724, "learning_rate": 8.086796094459582e-05, "loss": 1.008, "step": 33920 }, { "epoch": 1.92, "grad_norm": 1.0687536001205444, "learning_rate": 8.086228428701182e-05, "loss": 1.0002, "step": 33930 }, { "epoch": 1.92, "grad_norm": 1.0836297273635864, "learning_rate": 8.08566076294278e-05, "loss": 1.0106, "step": 33940 }, { "epoch": 1.93, "grad_norm": 1.0568251609802246, "learning_rate": 8.085093097184379e-05, "loss": 1.0093, "step": 33950 }, { "epoch": 1.93, "grad_norm": 1.0922003984451294, "learning_rate": 8.084525431425977e-05, "loss": 0.9946, "step": 33960 }, { "epoch": 1.93, "grad_norm": 1.0614336729049683, "learning_rate": 8.083957765667575e-05, "loss": 1.0142, "step": 33970 }, { "epoch": 1.93, "grad_norm": 1.1606985330581665, "learning_rate": 8.083390099909174e-05, "loss": 1.0188, "step": 33980 }, { "epoch": 1.93, "grad_norm": 1.0712085962295532, "learning_rate": 8.082822434150772e-05, "loss": 1.0034, "step": 33990 }, { "epoch": 1.93, "grad_norm": 1.0696427822113037, "learning_rate": 8.08225476839237e-05, "loss": 0.9982, "step": 34000 }, { "epoch": 1.93, "grad_norm": 1.1076712608337402, "learning_rate": 8.081687102633968e-05, "loss": 1.0069, "step": 34010 }, { "epoch": 1.93, "grad_norm": 1.12317955493927, "learning_rate": 8.081119436875568e-05, "loss": 1.0122, "step": 34020 }, { "epoch": 1.93, "grad_norm": 1.1530163288116455, "learning_rate": 8.080551771117166e-05, "loss": 1.0186, "step": 34030 }, { "epoch": 1.93, "grad_norm": 1.0917056798934937, "learning_rate": 8.079984105358765e-05, "loss": 1.0102, "step": 34040 }, { "epoch": 1.93, "grad_norm": 1.122033953666687, "learning_rate": 8.079416439600363e-05, "loss": 1.0067, "step": 34050 }, { "epoch": 1.93, "grad_norm": 1.005246877670288, "learning_rate": 8.078848773841963e-05, "loss": 1.0195, "step": 34060 }, { "epoch": 1.93, "grad_norm": 1.079512596130371, "learning_rate": 8.07828110808356e-05, "loss": 1.007, "step": 34070 }, { "epoch": 1.93, "grad_norm": 1.0777698755264282, "learning_rate": 8.077713442325159e-05, "loss": 1.0086, "step": 34080 }, { "epoch": 1.93, "grad_norm": 1.1147466897964478, "learning_rate": 8.077145776566758e-05, "loss": 0.9567, "step": 34090 }, { "epoch": 1.93, "grad_norm": 1.1214940547943115, "learning_rate": 8.076578110808356e-05, "loss": 1.0011, "step": 34100 }, { "epoch": 1.93, "grad_norm": 1.0697262287139893, "learning_rate": 8.076010445049955e-05, "loss": 1.0233, "step": 34110 }, { "epoch": 1.93, "grad_norm": 1.0945382118225098, "learning_rate": 8.075442779291553e-05, "loss": 1.0043, "step": 34120 }, { "epoch": 1.94, "grad_norm": 1.0727038383483887, "learning_rate": 8.074875113533153e-05, "loss": 0.9927, "step": 34130 }, { "epoch": 1.94, "grad_norm": 1.18470299243927, "learning_rate": 8.07430744777475e-05, "loss": 1.0022, "step": 34140 }, { "epoch": 1.94, "grad_norm": 1.0915753841400146, "learning_rate": 8.07373978201635e-05, "loss": 1.0076, "step": 34150 }, { "epoch": 1.94, "grad_norm": 1.039151668548584, "learning_rate": 8.073172116257948e-05, "loss": 1.0075, "step": 34160 }, { "epoch": 1.94, "grad_norm": 1.1021548509597778, "learning_rate": 8.072604450499546e-05, "loss": 1.0202, "step": 34170 }, { "epoch": 1.94, "grad_norm": 1.0300475358963013, "learning_rate": 8.072036784741145e-05, "loss": 1.0139, "step": 34180 }, { "epoch": 1.94, "grad_norm": 1.0589317083358765, "learning_rate": 8.071469118982743e-05, "loss": 0.9995, "step": 34190 }, { "epoch": 1.94, "grad_norm": 1.0579530000686646, "learning_rate": 8.070901453224343e-05, "loss": 1.01, "step": 34200 }, { "epoch": 1.94, "grad_norm": 1.0918447971343994, "learning_rate": 8.07033378746594e-05, "loss": 1.0349, "step": 34210 }, { "epoch": 1.94, "grad_norm": 1.1338268518447876, "learning_rate": 8.06976612170754e-05, "loss": 1.0051, "step": 34220 }, { "epoch": 1.94, "grad_norm": 1.103610634803772, "learning_rate": 8.069198455949138e-05, "loss": 1.0346, "step": 34230 }, { "epoch": 1.94, "grad_norm": 1.1178876161575317, "learning_rate": 8.068630790190736e-05, "loss": 1.0054, "step": 34240 }, { "epoch": 1.94, "grad_norm": 1.091368317604065, "learning_rate": 8.068063124432334e-05, "loss": 1.0192, "step": 34250 }, { "epoch": 1.94, "grad_norm": 1.1202245950698853, "learning_rate": 8.067495458673933e-05, "loss": 1.0193, "step": 34260 }, { "epoch": 1.94, "grad_norm": 1.0537540912628174, "learning_rate": 8.066927792915531e-05, "loss": 0.9946, "step": 34270 }, { "epoch": 1.94, "grad_norm": 1.0915966033935547, "learning_rate": 8.066360127157129e-05, "loss": 1.0081, "step": 34280 }, { "epoch": 1.94, "grad_norm": 1.0724248886108398, "learning_rate": 8.065792461398729e-05, "loss": 1.0193, "step": 34290 }, { "epoch": 1.94, "grad_norm": 1.060144305229187, "learning_rate": 8.065224795640327e-05, "loss": 1.0022, "step": 34300 }, { "epoch": 1.95, "grad_norm": 1.0960886478424072, "learning_rate": 8.064657129881926e-05, "loss": 1.0155, "step": 34310 }, { "epoch": 1.95, "grad_norm": 1.075573205947876, "learning_rate": 8.064089464123524e-05, "loss": 1.0004, "step": 34320 }, { "epoch": 1.95, "grad_norm": 1.105133056640625, "learning_rate": 8.063521798365123e-05, "loss": 1.0027, "step": 34330 }, { "epoch": 1.95, "grad_norm": 1.082247257232666, "learning_rate": 8.062954132606721e-05, "loss": 0.9878, "step": 34340 }, { "epoch": 1.95, "grad_norm": 1.0931367874145508, "learning_rate": 8.06238646684832e-05, "loss": 0.9805, "step": 34350 }, { "epoch": 1.95, "grad_norm": 1.133259654045105, "learning_rate": 8.061818801089919e-05, "loss": 1.0121, "step": 34360 }, { "epoch": 1.95, "grad_norm": 1.1361985206604004, "learning_rate": 8.061251135331517e-05, "loss": 0.9839, "step": 34370 }, { "epoch": 1.95, "grad_norm": 1.121565341949463, "learning_rate": 8.060683469573116e-05, "loss": 1.0334, "step": 34380 }, { "epoch": 1.95, "grad_norm": 1.1303865909576416, "learning_rate": 8.060115803814714e-05, "loss": 0.979, "step": 34390 }, { "epoch": 1.95, "grad_norm": 1.0803173780441284, "learning_rate": 8.059548138056313e-05, "loss": 1.0013, "step": 34400 }, { "epoch": 1.95, "grad_norm": 1.0399612188339233, "learning_rate": 8.058980472297911e-05, "loss": 0.9849, "step": 34410 }, { "epoch": 1.95, "grad_norm": 1.085366129875183, "learning_rate": 8.058412806539511e-05, "loss": 0.9907, "step": 34420 }, { "epoch": 1.95, "grad_norm": 1.1854283809661865, "learning_rate": 8.057845140781109e-05, "loss": 1.0083, "step": 34430 }, { "epoch": 1.95, "grad_norm": 1.0978314876556396, "learning_rate": 8.057277475022707e-05, "loss": 0.9676, "step": 34440 }, { "epoch": 1.95, "grad_norm": 1.157835841178894, "learning_rate": 8.056709809264306e-05, "loss": 1.0064, "step": 34450 }, { "epoch": 1.95, "grad_norm": 1.0768041610717773, "learning_rate": 8.056142143505904e-05, "loss": 1.023, "step": 34460 }, { "epoch": 1.95, "grad_norm": 1.0979183912277222, "learning_rate": 8.055574477747503e-05, "loss": 0.9651, "step": 34470 }, { "epoch": 1.96, "grad_norm": 1.0424301624298096, "learning_rate": 8.055006811989101e-05, "loss": 1.018, "step": 34480 }, { "epoch": 1.96, "grad_norm": 1.0989540815353394, "learning_rate": 8.0544391462307e-05, "loss": 0.9989, "step": 34490 }, { "epoch": 1.96, "grad_norm": 1.108622670173645, "learning_rate": 8.053871480472299e-05, "loss": 0.9805, "step": 34500 }, { "epoch": 1.96, "grad_norm": 1.1738834381103516, "learning_rate": 8.053303814713897e-05, "loss": 1.0253, "step": 34510 }, { "epoch": 1.96, "grad_norm": 1.109684944152832, "learning_rate": 8.052736148955495e-05, "loss": 0.9989, "step": 34520 }, { "epoch": 1.96, "grad_norm": 1.086384654045105, "learning_rate": 8.052168483197093e-05, "loss": 0.9987, "step": 34530 }, { "epoch": 1.96, "grad_norm": 1.0835994482040405, "learning_rate": 8.051600817438692e-05, "loss": 1.0157, "step": 34540 }, { "epoch": 1.96, "grad_norm": 1.1034008264541626, "learning_rate": 8.05103315168029e-05, "loss": 1.0318, "step": 34550 }, { "epoch": 1.96, "grad_norm": 1.1671911478042603, "learning_rate": 8.05046548592189e-05, "loss": 1.0041, "step": 34560 }, { "epoch": 1.96, "grad_norm": 1.096564531326294, "learning_rate": 8.049897820163487e-05, "loss": 1.0125, "step": 34570 }, { "epoch": 1.96, "grad_norm": 1.161565899848938, "learning_rate": 8.049330154405087e-05, "loss": 0.9923, "step": 34580 }, { "epoch": 1.96, "grad_norm": 1.1509267091751099, "learning_rate": 8.048762488646685e-05, "loss": 0.9976, "step": 34590 }, { "epoch": 1.96, "grad_norm": 1.1412544250488281, "learning_rate": 8.048194822888284e-05, "loss": 0.9877, "step": 34600 }, { "epoch": 1.96, "grad_norm": 1.1171194314956665, "learning_rate": 8.047627157129882e-05, "loss": 1.0104, "step": 34610 }, { "epoch": 1.96, "grad_norm": 1.0660346746444702, "learning_rate": 8.047059491371481e-05, "loss": 1.0201, "step": 34620 }, { "epoch": 1.96, "grad_norm": 1.0966014862060547, "learning_rate": 8.04649182561308e-05, "loss": 1.0041, "step": 34630 }, { "epoch": 1.96, "grad_norm": 1.1407338380813599, "learning_rate": 8.045924159854677e-05, "loss": 1.0052, "step": 34640 }, { "epoch": 1.96, "grad_norm": 1.0959229469299316, "learning_rate": 8.045356494096277e-05, "loss": 1.0064, "step": 34650 }, { "epoch": 1.97, "grad_norm": 1.0900428295135498, "learning_rate": 8.044788828337875e-05, "loss": 1.0138, "step": 34660 }, { "epoch": 1.97, "grad_norm": 1.099420428276062, "learning_rate": 8.044221162579474e-05, "loss": 0.9881, "step": 34670 }, { "epoch": 1.97, "grad_norm": 1.1489720344543457, "learning_rate": 8.043653496821072e-05, "loss": 1.0134, "step": 34680 }, { "epoch": 1.97, "grad_norm": 1.1389869451522827, "learning_rate": 8.043085831062672e-05, "loss": 1.0073, "step": 34690 }, { "epoch": 1.97, "grad_norm": 1.1473486423492432, "learning_rate": 8.04251816530427e-05, "loss": 0.9867, "step": 34700 }, { "epoch": 1.97, "grad_norm": 1.1374011039733887, "learning_rate": 8.041950499545869e-05, "loss": 0.9784, "step": 34710 }, { "epoch": 1.97, "grad_norm": 1.1268433332443237, "learning_rate": 8.041382833787467e-05, "loss": 1.006, "step": 34720 }, { "epoch": 1.97, "grad_norm": 1.1033672094345093, "learning_rate": 8.040815168029065e-05, "loss": 1.0002, "step": 34730 }, { "epoch": 1.97, "grad_norm": 1.102721095085144, "learning_rate": 8.040247502270664e-05, "loss": 1.006, "step": 34740 }, { "epoch": 1.97, "grad_norm": 1.1406208276748657, "learning_rate": 8.039679836512262e-05, "loss": 1.0081, "step": 34750 }, { "epoch": 1.97, "grad_norm": 1.0777230262756348, "learning_rate": 8.03911217075386e-05, "loss": 1.0308, "step": 34760 }, { "epoch": 1.97, "grad_norm": 1.1328567266464233, "learning_rate": 8.038544504995458e-05, "loss": 1.0007, "step": 34770 }, { "epoch": 1.97, "grad_norm": 1.0706348419189453, "learning_rate": 8.037976839237058e-05, "loss": 1.0232, "step": 34780 }, { "epoch": 1.97, "grad_norm": 1.18440580368042, "learning_rate": 8.037409173478656e-05, "loss": 1.0172, "step": 34790 }, { "epoch": 1.97, "grad_norm": 1.1011769771575928, "learning_rate": 8.036841507720255e-05, "loss": 0.9967, "step": 34800 }, { "epoch": 1.97, "grad_norm": 1.0871354341506958, "learning_rate": 8.036273841961853e-05, "loss": 0.9696, "step": 34810 }, { "epoch": 1.97, "grad_norm": 1.0890824794769287, "learning_rate": 8.035706176203451e-05, "loss": 1.0241, "step": 34820 }, { "epoch": 1.97, "grad_norm": 1.0780296325683594, "learning_rate": 8.03513851044505e-05, "loss": 1.0197, "step": 34830 }, { "epoch": 1.98, "grad_norm": 1.1419790983200073, "learning_rate": 8.034570844686648e-05, "loss": 0.9943, "step": 34840 }, { "epoch": 1.98, "grad_norm": 1.134312629699707, "learning_rate": 8.034003178928248e-05, "loss": 0.9902, "step": 34850 }, { "epoch": 1.98, "grad_norm": 1.1375572681427002, "learning_rate": 8.033435513169846e-05, "loss": 0.9911, "step": 34860 }, { "epoch": 1.98, "grad_norm": 1.0990984439849854, "learning_rate": 8.032867847411445e-05, "loss": 1.0118, "step": 34870 }, { "epoch": 1.98, "grad_norm": 1.0979222059249878, "learning_rate": 8.032300181653043e-05, "loss": 0.9574, "step": 34880 }, { "epoch": 1.98, "grad_norm": 1.110902190208435, "learning_rate": 8.031732515894642e-05, "loss": 1.0414, "step": 34890 }, { "epoch": 1.98, "grad_norm": 1.0733470916748047, "learning_rate": 8.03116485013624e-05, "loss": 0.9993, "step": 34900 }, { "epoch": 1.98, "grad_norm": 1.1892988681793213, "learning_rate": 8.030597184377838e-05, "loss": 0.9953, "step": 34910 }, { "epoch": 1.98, "grad_norm": 1.1297696828842163, "learning_rate": 8.030029518619438e-05, "loss": 1.011, "step": 34920 }, { "epoch": 1.98, "grad_norm": 1.1115585565567017, "learning_rate": 8.029461852861036e-05, "loss": 0.9983, "step": 34930 }, { "epoch": 1.98, "grad_norm": 1.215113878250122, "learning_rate": 8.028894187102635e-05, "loss": 1.0199, "step": 34940 }, { "epoch": 1.98, "grad_norm": 1.078534722328186, "learning_rate": 8.028326521344233e-05, "loss": 1.0031, "step": 34950 }, { "epoch": 1.98, "grad_norm": 1.0607690811157227, "learning_rate": 8.027758855585832e-05, "loss": 0.9984, "step": 34960 }, { "epoch": 1.98, "grad_norm": 1.0363714694976807, "learning_rate": 8.02719118982743e-05, "loss": 1.018, "step": 34970 }, { "epoch": 1.98, "grad_norm": 1.1174211502075195, "learning_rate": 8.02662352406903e-05, "loss": 1.0058, "step": 34980 }, { "epoch": 1.98, "grad_norm": 1.0994765758514404, "learning_rate": 8.026055858310628e-05, "loss": 0.9731, "step": 34990 }, { "epoch": 1.98, "grad_norm": 1.1533819437026978, "learning_rate": 8.025488192552226e-05, "loss": 1.049, "step": 35000 }, { "epoch": 1.99, "grad_norm": 1.0445717573165894, "learning_rate": 8.024920526793824e-05, "loss": 1.0052, "step": 35010 }, { "epoch": 1.99, "grad_norm": 1.1207653284072876, "learning_rate": 8.024352861035423e-05, "loss": 0.9875, "step": 35020 }, { "epoch": 1.99, "grad_norm": 1.1164246797561646, "learning_rate": 8.023785195277021e-05, "loss": 1.0092, "step": 35030 }, { "epoch": 1.99, "grad_norm": 1.145721673965454, "learning_rate": 8.023217529518619e-05, "loss": 1.0151, "step": 35040 }, { "epoch": 1.99, "grad_norm": 1.071134328842163, "learning_rate": 8.022649863760218e-05, "loss": 1.028, "step": 35050 }, { "epoch": 1.99, "grad_norm": 1.1236205101013184, "learning_rate": 8.022082198001816e-05, "loss": 0.9921, "step": 35060 }, { "epoch": 1.99, "grad_norm": 1.0894601345062256, "learning_rate": 8.021514532243416e-05, "loss": 0.9783, "step": 35070 }, { "epoch": 1.99, "grad_norm": 1.14150071144104, "learning_rate": 8.020946866485014e-05, "loss": 1.0212, "step": 35080 }, { "epoch": 1.99, "grad_norm": 1.0685398578643799, "learning_rate": 8.020379200726612e-05, "loss": 1.0007, "step": 35090 }, { "epoch": 1.99, "grad_norm": 1.1256561279296875, "learning_rate": 8.019811534968211e-05, "loss": 1.0257, "step": 35100 }, { "epoch": 1.99, "grad_norm": 1.136267900466919, "learning_rate": 8.019243869209809e-05, "loss": 1.0033, "step": 35110 }, { "epoch": 1.99, "grad_norm": 1.0899463891983032, "learning_rate": 8.018676203451408e-05, "loss": 0.9946, "step": 35120 }, { "epoch": 1.99, "grad_norm": 1.1323151588439941, "learning_rate": 8.018108537693006e-05, "loss": 0.9851, "step": 35130 }, { "epoch": 1.99, "grad_norm": 1.036213755607605, "learning_rate": 8.017540871934606e-05, "loss": 1.0214, "step": 35140 }, { "epoch": 1.99, "grad_norm": 1.1139520406723022, "learning_rate": 8.016973206176204e-05, "loss": 0.9972, "step": 35150 }, { "epoch": 1.99, "grad_norm": 1.1060576438903809, "learning_rate": 8.016405540417803e-05, "loss": 0.9885, "step": 35160 }, { "epoch": 1.99, "grad_norm": 1.0693747997283936, "learning_rate": 8.015837874659401e-05, "loss": 1.0152, "step": 35170 }, { "epoch": 1.99, "grad_norm": 1.065463662147522, "learning_rate": 8.015270208900999e-05, "loss": 1.0095, "step": 35180 }, { "epoch": 2.0, "grad_norm": 1.153450846672058, "learning_rate": 8.014702543142598e-05, "loss": 0.9952, "step": 35190 }, { "epoch": 2.0, "grad_norm": 1.1063895225524902, "learning_rate": 8.014134877384196e-05, "loss": 1.0221, "step": 35200 }, { "epoch": 2.0, "grad_norm": 1.1109532117843628, "learning_rate": 8.013567211625796e-05, "loss": 0.9939, "step": 35210 }, { "epoch": 2.0, "grad_norm": 1.1770817041397095, "learning_rate": 8.012999545867394e-05, "loss": 0.999, "step": 35220 }, { "epoch": 2.0, "grad_norm": 1.111732840538025, "learning_rate": 8.012431880108993e-05, "loss": 1.016, "step": 35230 }, { "epoch": 2.0, "grad_norm": 1.0318434238433838, "learning_rate": 8.011864214350591e-05, "loss": 0.9728, "step": 35240 }, { "epoch": 2.0, "grad_norm": 1.1686722040176392, "learning_rate": 8.011296548592189e-05, "loss": 1.0101, "step": 35250 }, { "epoch": 2.0, "grad_norm": 1.1736102104187012, "learning_rate": 8.010728882833788e-05, "loss": 1.022, "step": 35260 }, { "epoch": 2.0, "grad_norm": 1.153889536857605, "learning_rate": 8.010161217075386e-05, "loss": 1.0257, "step": 35270 }, { "epoch": 2.0, "eval_loss": 1.337753415107727, "eval_runtime": 6199.8154, "eval_samples_per_second": 267.804, "eval_steps_per_second": 4.184, "step": 35272 } ], "logging_steps": 10, "max_steps": 176360, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.5043420985543885e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }